git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/test/intltest/convtest.cpp

... / ...

Commit	Line	Data
	1	/*
	2	*******************************************************************************
	3	*
	4	* Copyright (C) 2003-2010, International Business Machines
	5	* Corporation and others. All Rights Reserved.
	6	*
	7	*******************************************************************************
	8	* file name: convtest.cpp
	9	* encoding: US-ASCII
	10	* tab size: 8 (not used)
	11	* indentation:4
	12	*
	13	* created on: 2003jul15
	14	* created by: Markus W. Scherer
	15	*
	16	* Test file for data-driven conversion tests.
	17	*/
	18
	19	#include "unicode/utypes.h"
	20
	21	#if !UCONFIG_NO_LEGACY_CONVERSION
	22	/*
	23	* Note: Turning off all of convtest.cpp if !UCONFIG_NO_LEGACY_CONVERSION
	24	* is slightly unnecessary - it removes tests for Unicode charsets
	25	* like UTF-8 that should work.
	26	* However, there is no easy way for the test to detect whether a test case
	27	* is for a Unicode charset, so it would be difficult to only exclude those.
	28	* Also, regular testing of ICU is done with all modules on, therefore
	29	* not testing conversion for a custom configuration like this should be ok.
	30	*/
	31
	32	#include "unicode/ucnv.h"
	33	#include "unicode/unistr.h"
	34	#include "unicode/parsepos.h"
	35	#include "unicode/uniset.h"
	36	#include "unicode/ustring.h"
	37	#include "unicode/ures.h"
	38	#include "convtest.h"
	39	#include "unicode/tstdtmod.h"
	40	#include <string.h>
	41	#include <stdlib.h>
	42
	43	#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
	44
	45	enum {
	46	// characters used in test data for callbacks
	47	SUB_CB='?',
	48	SKIP_CB='0',
	49	STOP_CB='.',
	50	ESC_CB='&'
	51	};
	52
	53	ConversionTest::ConversionTest() {
	54	UErrorCode errorCode=U_ZERO_ERROR;
	55	utf8Cnv=ucnv_open("UTF-8", &errorCode);
	56	ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
	57	if(U_FAILURE(errorCode)) {
	58	errln("unable to open UTF-8 converter");
	59	}
	60	}
	61
	62	ConversionTest::~ConversionTest() {
	63	ucnv_close(utf8Cnv);
	64	}
	65
	66	void
	67	ConversionTest::runIndexedTest(int32_t index, UBool exec, const char &name, char /par/) {
	68	if (exec) logln("TestSuite ConversionTest: ");
	69	switch (index) {
	70	#if !UCONFIG_NO_FILE_IO
	71	case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break;
	72	case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break;
	73	case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break;
	74	#else
	75	case 0:
	76	case 1:
	77	case 2: name="skip"; break;
	78	#endif
	79	case 3: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break;
	80	default: name=""; break; //needed to end loop
	81	}
	82	}
	83
	84	// test data interface ----------------------------------------------------- ***
	85
	86	void
	87	ConversionTest::TestToUnicode() {
	88	ConversionCase cc;
	89	char charset[100], cbopt[4];
	90	const char *option;
	91	UnicodeString s, unicode;
	92	int32_t offsetsLength;
	93	UConverterToUCallback callback;
	94
	95	TestDataModule *dataModule;
	96	TestData *testData;
	97	const DataMap *testCase;
	98	UErrorCode errorCode;
	99	int32_t i;
	100
	101	errorCode=U_ZERO_ERROR;
	102	dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);
	103	if(U_SUCCESS(errorCode)) {
	104	testData=dataModule->createTestData("toUnicode", errorCode);
	105	if(U_SUCCESS(errorCode)) {
	106	for(i=0; testData->nextCase(testCase, errorCode); ++i) {
	107	if(U_FAILURE(errorCode)) {
	108	errln("error retrieving conversion/toUnicode test case %d - %s",
	109	i, u_errorName(errorCode));
	110	errorCode=U_ZERO_ERROR;
	111	continue;
	112	}
	113
	114	cc.caseNr=i;
	115
	116	s=testCase->getString("charset", errorCode);
	117	s.extract(0, 0x7fffffff, charset, sizeof(charset), "");
	118	cc.charset=charset;
	119
	120	cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode);
	121	unicode=testCase->getString("unicode", errorCode);
	122	cc.unicode=unicode.getBuffer();
	123	cc.unicodeLength=unicode.length();
	124
	125	offsetsLength=0;
	126	cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode);
	127	if(offsetsLength==0) {
	128	cc.offsets=NULL;
	129	} else if(offsetsLength!=unicode.length()) {
	130	errln("toUnicode[%d] unicode[%d] and offsets[%d] must have the same length",
	131	i, unicode.length(), offsetsLength);
	132	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
	133	}
	134
	135	cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);
	136	cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);
	137
	138	s=testCase->getString("errorCode", errorCode);
	139	if(s==UNICODE_STRING("invalid", 7)) {
	140	cc.outErrorCode=U_INVALID_CHAR_FOUND;
	141	} else if(s==UNICODE_STRING("illegal", 7)) {
	142	cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;
	143	} else if(s==UNICODE_STRING("truncated", 9)) {
	144	cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;
	145	} else if(s==UNICODE_STRING("illesc", 6)) {
	146	cc.outErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE;
	147	} else if(s==UNICODE_STRING("unsuppesc", 9)) {
	148	cc.outErrorCode=U_UNSUPPORTED_ESCAPE_SEQUENCE;
	149	} else {
	150	cc.outErrorCode=U_ZERO_ERROR;
	151	}
	152
	153	s=testCase->getString("callback", errorCode);
	154	s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");
	155	cc.cbopt=cbopt;
	156	switch(cbopt[0]) {
	157	case SUB_CB:
	158	callback=UCNV_TO_U_CALLBACK_SUBSTITUTE;
	159	break;
	160	case SKIP_CB:
	161	callback=UCNV_TO_U_CALLBACK_SKIP;
	162	break;
	163	case STOP_CB:
	164	callback=UCNV_TO_U_CALLBACK_STOP;
	165	break;
	166	case ESC_CB:
	167	callback=UCNV_TO_U_CALLBACK_ESCAPE;
	168	break;
	169	default:
	170	callback=NULL;
	171	break;
	172	}
	173	option=callback==NULL ? cbopt : cbopt+1;
	174	if(*option==0) {
	175	option=NULL;
	176	}
	177
	178	cc.invalidChars=testCase->getBinary(cc.invalidLength, "invalidChars", errorCode);
	179
	180	if(U_FAILURE(errorCode)) {
	181	errln("error parsing conversion/toUnicode test case %d - %s",
	182	i, u_errorName(errorCode));
	183	errorCode=U_ZERO_ERROR;
	184	} else {
	185	logln("TestToUnicode[%d] %s", i, charset);
	186	ToUnicodeCase(cc, callback, option);
	187	}
	188	}
	189	delete testData;
	190	}
	191	delete dataModule;
	192	}
	193	else {
	194	dataerrln("Could not load test conversion data");
	195	}
	196	}
	197
	198	void
	199	ConversionTest::TestFromUnicode() {
	200	ConversionCase cc;
	201	char charset[100], cbopt[4];
	202	const char *option;
	203	UnicodeString s, unicode, invalidUChars;
	204	int32_t offsetsLength, index;
	205	UConverterFromUCallback callback;
	206
	207	TestDataModule *dataModule;
	208	TestData *testData;
	209	const DataMap *testCase;
	210	const UChar *p;
	211	UErrorCode errorCode;
	212	int32_t i, length;
	213
	214	errorCode=U_ZERO_ERROR;
	215	dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);
	216	if(U_SUCCESS(errorCode)) {
	217	testData=dataModule->createTestData("fromUnicode", errorCode);
	218	if(U_SUCCESS(errorCode)) {
	219	for(i=0; testData->nextCase(testCase, errorCode); ++i) {
	220	if(U_FAILURE(errorCode)) {
	221	errln("error retrieving conversion/fromUnicode test case %d - %s",
	222	i, u_errorName(errorCode));
	223	errorCode=U_ZERO_ERROR;
	224	continue;
	225	}
	226
	227	cc.caseNr=i;
	228
	229	s=testCase->getString("charset", errorCode);
	230	s.extract(0, 0x7fffffff, charset, sizeof(charset), "");
	231	cc.charset=charset;
	232
	233	unicode=testCase->getString("unicode", errorCode);
	234	cc.unicode=unicode.getBuffer();
	235	cc.unicodeLength=unicode.length();
	236	cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode);
	237
	238	offsetsLength=0;
	239	cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode);
	240	if(offsetsLength==0) {
	241	cc.offsets=NULL;
	242	} else if(offsetsLength!=cc.bytesLength) {
	243	errln("fromUnicode[%d] bytes[%d] and offsets[%d] must have the same length",
	244	i, cc.bytesLength, offsetsLength);
	245	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
	246	}
	247
	248	cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);
	249	cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);
	250
	251	s=testCase->getString("errorCode", errorCode);
	252	if(s==UNICODE_STRING("invalid", 7)) {
	253	cc.outErrorCode=U_INVALID_CHAR_FOUND;
	254	} else if(s==UNICODE_STRING("illegal", 7)) {
	255	cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;
	256	} else if(s==UNICODE_STRING("truncated", 9)) {
	257	cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;
	258	} else {
	259	cc.outErrorCode=U_ZERO_ERROR;
	260	}
	261
	262	s=testCase->getString("callback", errorCode);
	263	cc.setSub=0; // default: no subchar
	264
	265	if((index=s.indexOf((UChar)0))>0) {
	266	// read NUL-separated subchar first, if any
	267	// copy the subchar from Latin-1 characters
	268	// start after the NUL
	269	p=s.getTerminatedBuffer();
	270	length=index+1;
	271	p+=length;
	272	length=s.length()-length;
	273	if(length<=0 \|\| length>=(int32_t)sizeof(cc.subchar)) {
	274	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
	275	} else {
	276	int32_t j;
	277
	278	for(j=0; j<length; ++j) {
	279	cc.subchar[j]=(char)p[j];
	280	}
	281	// NUL-terminate the subchar
	282	cc.subchar[j]=0;
	283	cc.setSub=1;
	284	}
	285
	286	// remove the NUL and subchar from s
	287	s.truncate(index);
	288	} else if((index=s.indexOf((UChar)0x3d))>0) /* '=' */ {
	289	// read a substitution string, separated by an equal sign
	290	p=s.getBuffer()+index+1;
	291	length=s.length()-(index+1);
	292	if(length<0 \|\| length>=LENGTHOF(cc.subString)) {
	293	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
	294	} else {
	295	u_memcpy(cc.subString, p, length);
	296	// NUL-terminate the subString
	297	cc.subString[length]=0;
	298	cc.setSub=-1;
	299	}
	300
	301	// remove the equal sign and subString from s
	302	s.truncate(index);
	303	}
	304
	305	s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");
	306	cc.cbopt=cbopt;
	307	switch(cbopt[0]) {
	308	case SUB_CB:
	309	callback=UCNV_FROM_U_CALLBACK_SUBSTITUTE;
	310	break;
	311	case SKIP_CB:
	312	callback=UCNV_FROM_U_CALLBACK_SKIP;
	313	break;
	314	case STOP_CB:
	315	callback=UCNV_FROM_U_CALLBACK_STOP;
	316	break;
	317	case ESC_CB:
	318	callback=UCNV_FROM_U_CALLBACK_ESCAPE;
	319	break;
	320	default:
	321	callback=NULL;
	322	break;
	323	}
	324	option=callback==NULL ? cbopt : cbopt+1;
	325	if(*option==0) {
	326	option=NULL;
	327	}
	328
	329	invalidUChars=testCase->getString("invalidUChars", errorCode);
	330	cc.invalidUChars=invalidUChars.getBuffer();
	331	cc.invalidLength=invalidUChars.length();
	332
	333	if(U_FAILURE(errorCode)) {
	334	errln("error parsing conversion/fromUnicode test case %d - %s",
	335	i, u_errorName(errorCode));
	336	errorCode=U_ZERO_ERROR;
	337	} else {
	338	logln("TestFromUnicode[%d] %s", i, charset);
	339	FromUnicodeCase(cc, callback, option);
	340	}
	341	}
	342	delete testData;
	343	}
	344	delete dataModule;
	345	}
	346	else {
	347	dataerrln("Could not load test conversion data");
	348	}
	349	}
	350
	351	static const UChar ellipsis[]={ 0x2e, 0x2e, 0x2e };
	352
	353	void
	354	ConversionTest::TestGetUnicodeSet() {
	355	char charset[100];
	356	UnicodeString s, map, mapnot;
	357	int32_t which;
	358
	359	ParsePosition pos;
	360	UnicodeSet cnvSet, mapSet, mapnotSet, diffSet;
	361	UnicodeSet *cnvSetPtr = &cnvSet;
	362	LocalUConverterPointer cnv;
	363
	364	TestDataModule *dataModule;
	365	TestData *testData;
	366	const DataMap *testCase;
	367	UErrorCode errorCode;
	368	int32_t i;
	369
	370	errorCode=U_ZERO_ERROR;
	371	dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);
	372	if(U_SUCCESS(errorCode)) {
	373	testData=dataModule->createTestData("getUnicodeSet", errorCode);
	374	if(U_SUCCESS(errorCode)) {
	375	for(i=0; testData->nextCase(testCase, errorCode); ++i) {
	376	if(U_FAILURE(errorCode)) {
	377	errln("error retrieving conversion/getUnicodeSet test case %d - %s",
	378	i, u_errorName(errorCode));
	379	errorCode=U_ZERO_ERROR;
	380	continue;
	381	}
	382
	383	s=testCase->getString("charset", errorCode);
	384	s.extract(0, 0x7fffffff, charset, sizeof(charset), "");
	385
	386	map=testCase->getString("map", errorCode);
	387	mapnot=testCase->getString("mapnot", errorCode);
	388
	389	which=testCase->getInt28("which", errorCode);
	390
	391	if(U_FAILURE(errorCode)) {
	392	errln("error parsing conversion/getUnicodeSet test case %d - %s",
	393	i, u_errorName(errorCode));
	394	errorCode=U_ZERO_ERROR;
	395	continue;
	396	}
	397
	398	// test this test case
	399	mapSet.clear();
	400	mapnotSet.clear();
	401
	402	pos.setIndex(0);
	403	mapSet.applyPattern(map, pos, 0, NULL, errorCode);
	404	if(U_FAILURE(errorCode) \|\| pos.getIndex()!=map.length()) {
	405	errln("error creating the map set for conversion/getUnicodeSet test case %d - %s\n"
	406	" error index %d index %d U+%04x",
	407	i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), map.char32At(pos.getIndex()));
	408	errorCode=U_ZERO_ERROR;
	409	continue;
	410	}
	411
	412	pos.setIndex(0);
	413	mapnotSet.applyPattern(mapnot, pos, 0, NULL, errorCode);
	414	if(U_FAILURE(errorCode) \|\| pos.getIndex()!=mapnot.length()) {
	415	errln("error creating the mapnot set for conversion/getUnicodeSet test case %d - %s\n"
	416	" error index %d index %d U+%04x",
	417	i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), mapnot.char32At(pos.getIndex()));
	418	errorCode=U_ZERO_ERROR;
	419	continue;
	420	}
	421
	422	logln("TestGetUnicodeSet[%d] %s", i, charset);
	423
	424	cnv.adoptInstead(cnv_open(charset, errorCode));
	425	if(U_FAILURE(errorCode)) {
	426	errcheckln(errorCode, "error opening \"%s\" for conversion/getUnicodeSet test case %d - %s",
	427	charset, i, u_errorName(errorCode));
	428	errorCode=U_ZERO_ERROR;
	429	continue;
	430	}
	431
	432	ucnv_getUnicodeSet(cnv.getAlias(), cnvSetPtr->toUSet(), (UConverterUnicodeSet)which, &errorCode);
	433
	434	if(U_FAILURE(errorCode)) {
	435	errln("error in ucnv_getUnicodeSet(\"%s\") for conversion/getUnicodeSet test case %d - %s",
	436	charset, i, u_errorName(errorCode));
	437	errorCode=U_ZERO_ERROR;
	438	continue;
	439	}
	440
	441	// are there items that must be in cnvSet but are not?
	442	(diffSet=mapSet).removeAll(cnvSet);
	443	if(!diffSet.isEmpty()) {
	444	diffSet.toPattern(s, TRUE);
	445	if(s.length()>100) {
	446	s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
	447	}
	448	errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d",
	449	charset, i);
	450	errln(s);
	451	}
	452
	453	// are there items that must not be in cnvSet but are?
	454	(diffSet=mapnotSet).retainAll(cnvSet);
	455	if(!diffSet.isEmpty()) {
	456	diffSet.toPattern(s, TRUE);
	457	if(s.length()>100) {
	458	s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
	459	}
	460	errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d",
	461	charset, i);
	462	errln(s);
	463	}
	464	}
	465	delete testData;
	466	}
	467	delete dataModule;
	468	}
	469	else {
	470	dataerrln("Could not load test conversion data");
	471	}
	472	}
	473
	474	U_CDECL_BEGIN
	475	static void U_CALLCONV
	476	getUnicodeSetCallback(const void *context,
	477	UConverterFromUnicodeArgs * /fromUArgs/,
	478	const UChar* /codeUnits/,
	479	int32_t /length/,
	480	UChar32 codePoint,
	481	UConverterCallbackReason reason,
	482	UErrorCode *pErrorCode) {
	483	if(reason<=UCNV_IRREGULAR) {
	484	((UnicodeSet *)context)->remove(codePoint); // the converter cannot convert this code point
	485	*pErrorCode=U_ZERO_ERROR; // skip
	486	} // else ignore the reset, close and clone calls.
	487	}
	488	U_CDECL_END
	489
	490	// Compare ucnv_getUnicodeSet() with the set of characters that can be converted.
	491	void
	492	ConversionTest::TestGetUnicodeSet2() {
	493	// Build a string with all code points.
	494	UChar32 cpLimit;
	495	int32_t s0Length;
	496	if(quick) {
	497	cpLimit=s0Length=0x10000; // BMP only
	498	} else {
	499	cpLimit=0x110000;
	500	s0Length=0x10000+0x200000; // BMP + surrogate pairs
	501	}
	502	UChar *s0=new UChar[s0Length];
	503	if(s0==NULL) {
	504	return;
	505	}
	506	UChar *s=s0;
	507	UChar32 c;
	508	UChar c2;
	509	// low BMP
	510	for(c=0; c<=0xd7ff; ++c) {
	511	*s++=(UChar)c;
	512	}
	513	// trail surrogates
	514	for(c=0xdc00; c<=0xdfff; ++c) {
	515	*s++=(UChar)c;
	516	}
	517	// lead surrogates
	518	// (after trails so that there is not even one surrogate pair in between)
	519	for(c=0xd800; c<=0xdbff; ++c) {
	520	*s++=(UChar)c;
	521	}
	522	// high BMP
	523	for(c=0xe000; c<=0xffff; ++c) {
	524	*s++=(UChar)c;
	525	}
	526	// supplementary code points = surrogate pairs
	527	if(cpLimit==0x110000) {
	528	for(c=0xd800; c<=0xdbff; ++c) {
	529	for(c2=0xdc00; c2<=0xdfff; ++c2) {
	530	*s++=(UChar)c;
	531	*s++=c2;
	532	}
	533	}
	534	}
	535
	536	static const char *const cnvNames[]={
	537	"UTF-8",
	538	"UTF-7",
	539	"UTF-16",
	540	"US-ASCII",
	541	"ISO-8859-1",
	542	"windows-1252",
	543	"Shift-JIS",
	544	"ibm-1390", // EBCDIC_STATEFUL table
	545	"ibm-16684", // DBCS-only extension table based on EBCDIC_STATEFUL table
	546	"HZ",
	547	"ISO-2022-JP",
	548	"JIS7",
	549	"ISO-2022-CN",
	550	"ISO-2022-CN-EXT",
	551	"LMBCS"
	552	};
	553	LocalUConverterPointer cnv;
	554	char buffer[1024];
	555	int32_t i;
	556	for(i=0; i<LENGTHOF(cnvNames); ++i) {
	557	UErrorCode errorCode=U_ZERO_ERROR;
	558	cnv.adoptInstead(cnv_open(cnvNames[i], errorCode));
	559	if(U_FAILURE(errorCode)) {
	560	errcheckln(errorCode, "failed to open converter %s - %s", cnvNames[i], u_errorName(errorCode));
	561	continue;
	562	}
	563	UnicodeSet expected;
	564	ucnv_setFromUCallBack(cnv.getAlias(), getUnicodeSetCallback, &expected, NULL, NULL, &errorCode);
	565	if(U_FAILURE(errorCode)) {
	566	errln("failed to set the callback on converter %s - %s", cnvNames[i], u_errorName(errorCode));
	567	continue;
	568	}
	569	UConverterUnicodeSet which;
	570	for(which=UCNV_ROUNDTRIP_SET; which<UCNV_SET_COUNT; which=(UConverterUnicodeSet)((int)which+1)) {
	571	if(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
	572	ucnv_setFallback(cnv.getAlias(), TRUE);
	573	}
	574	expected.add(0, cpLimit-1);
	575	s=s0;
	576	UBool flush;
	577	do {
	578	char *t=buffer;
	579	flush=(UBool)(s==s0+s0Length);
	580	ucnv_fromUnicode(cnv.getAlias(), &t, buffer+sizeof(buffer), (const UChar **)&s, s0+s0Length, NULL, flush, &errorCode);
	581	if(U_FAILURE(errorCode)) {
	582	if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
	583	errorCode=U_ZERO_ERROR;
	584	continue;
	585	} else {
	586	break; // unexpected error, should not occur
	587	}
	588	}
	589	} while(!flush);
	590	UnicodeSet set;
	591	ucnv_getUnicodeSet(cnv.getAlias(), set.toUSet(), which, &errorCode);
	592	if(cpLimit<0x110000) {
	593	set.remove(cpLimit, 0x10ffff);
	594	}
	595	if(which==UCNV_ROUNDTRIP_SET) {
	596	// ignore PUA code points because they will be converted even if they
	597	// are fallbacks and when other fallbacks are turned off,
	598	// but ucnv_getUnicodeSet(UCNV_ROUNDTRIP_SET) delivers true roundtrips
	599	expected.remove(0xe000, 0xf8ff);
	600	expected.remove(0xf0000, 0xffffd);
	601	expected.remove(0x100000, 0x10fffd);
	602	set.remove(0xe000, 0xf8ff);
	603	set.remove(0xf0000, 0xffffd);
	604	set.remove(0x100000, 0x10fffd);
	605	}
	606	if(set!=expected) {
	607	// First try to see if we have different sets because ucnv_getUnicodeSet()
	608	// added strings: The above conversion method does not tell us what strings might be convertible.
	609	// Remove strings from the set and compare again.
	610	// Unfortunately, there are no good, direct set methods for finding out whether there are strings
	611	// in the set, nor for enumerating or removing just them.
	612	// Intersect all code points with the set. The intersection will not contain strings.
	613	UnicodeSet temp(0, 0x10ffff);
	614	temp.retainAll(set);
	615	set=temp;
	616	}
	617	if(set!=expected) {
	618	UnicodeSet diffSet;
	619	UnicodeString out;
	620
	621	// are there items that must be in the set but are not?
	622	(diffSet=expected).removeAll(set);
	623	if(!diffSet.isEmpty()) {
	624	diffSet.toPattern(out, TRUE);
	625	if(out.length()>100) {
	626	out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
	627	}
	628	errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d",
	629	cnvNames[i], which);
	630	errln(out);
	631	}
	632
	633	// are there items that must not be in the set but are?
	634	(diffSet=set).removeAll(expected);
	635	if(!diffSet.isEmpty()) {
	636	diffSet.toPattern(out, TRUE);
	637	if(out.length()>100) {
	638	out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
	639	}
	640	errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d",
	641	cnvNames[i], which);
	642	errln(out);
	643	}
	644	}
	645	}
	646	}
	647
	648	delete [] s0;
	649	}
	650
	651	// open testdata or ICU data converter ------------------------------------- ***
	652
	653	UConverter *
	654	ConversionTest::cnv_open(const char *name, UErrorCode &errorCode) {
	655	if(name!=NULL && name=='') {
	656	/* loadTestData(): set the data directory */
	657	return ucnv_openPackage(loadTestData(errorCode), name+1, &errorCode);
	658	} else if(name!=NULL && *name=='+') {
	659	return ucnv_open((name+1), &errorCode);
	660	} else {
	661	return ucnv_open(name, &errorCode);
	662	}
	663	}
	664
	665	// output helpers ---------------------------------------------------------- ***
	666
	667	static inline char
	668	hexDigit(uint8_t digit) {
	669	return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit);
	670	}
	671
	672	static char *
	673	printBytes(const uint8_t bytes, int32_t length, char out) {
	674	uint8_t b;
	675
	676	if(length>0) {
	677	b=*bytes++;
	678	--length;
	679	*out++=hexDigit((uint8_t)(b>>4));
	680	*out++=hexDigit((uint8_t)(b&0xf));
	681	}
	682
	683	while(length>0) {
	684	b=*bytes++;
	685	--length;
	686	*out++=' ';
	687	*out++=hexDigit((uint8_t)(b>>4));
	688	*out++=hexDigit((uint8_t)(b&0xf));
	689	}
	690	*out++=0;
	691	return out;
	692	}
	693
	694	static char *
	695	printUnicode(const UChar unicode, int32_t length, char out) {
	696	UChar32 c;
	697	int32_t i;
	698
	699	for(i=0; i<length;) {
	700	if(i>0) {
	701	*out++=' ';
	702	}
	703	U16_NEXT(unicode, i, length, c);
	704	// write 4..6 digits
	705	if(c>=0x100000) {
	706	*out++='1';
	707	}
	708	if(c>=0x10000) {
	709	*out++=hexDigit((uint8_t)((c>>16)&0xf));
	710	}
	711	*out++=hexDigit((uint8_t)((c>>12)&0xf));
	712	*out++=hexDigit((uint8_t)((c>>8)&0xf));
	713	*out++=hexDigit((uint8_t)((c>>4)&0xf));
	714	*out++=hexDigit((uint8_t)(c&0xf));
	715	}
	716	*out++=0;
	717	return out;
	718	}
	719
	720	static char *
	721	printOffsets(const int32_t offsets, int32_t length, char out) {
	722	int32_t i, o, d;
	723
	724	if(offsets==NULL) {
	725	length=0;
	726	}
	727
	728	for(i=0; i<length; ++i) {
	729	if(i>0) {
	730	*out++=' ';
	731	}
	732	o=offsets[i];
	733
	734	// print all offsets with 2 characters each (-x, -9..99, xx)
	735	if(o<-9) {
	736	*out++='-';
	737	*out++='x';
	738	} else if(o<0) {
	739	*out++='-';
	740	*out++=(char)('0'-o);
	741	} else if(o<=99) {
	742	*out++=(d=o/10)==0 ? ' ' : (char)('0'+d);
	743	*out++=(char)('0'+o%10);
	744	} else /* o>99 */ {
	745	*out++='x';
	746	*out++='x';
	747	}
	748	}
	749	*out++=0;
	750	return out;
	751	}
	752
	753	// toUnicode test worker functions ----------------------------------------- ***
	754
	755	static int32_t
	756	stepToUnicode(ConversionCase &cc, UConverter *cnv,
	757	UChar *result, int32_t resultCapacity,
	758	int32_t resultOffsets, / also resultCapacity */
	759	int32_t step,
	760	UErrorCode *pErrorCode) {
	761	const char source, sourceLimit, *bytesLimit;
	762	UChar target, targetLimit, *resultLimit;
	763	UBool flush;
	764
	765	source=(const char *)cc.bytes;
	766	target=result;
	767	bytesLimit=source+cc.bytesLength;
	768	resultLimit=result+resultCapacity;
	769
	770	if(step>=0) {
	771	// call ucnv_toUnicode() with in/out buffers no larger than (step) at a time
	772	// move only one buffer (in vs. out) at a time to be extra mean
	773	// step==0 performs bulk conversion and generates offsets
	774
	775	// initialize the partial limits for the loop
	776	if(step==0) {
	777	// use the entire buffers
	778	sourceLimit=bytesLimit;
	779	targetLimit=resultLimit;
	780	flush=cc.finalFlush;
	781	} else {
	782	// start with empty partial buffers
	783	sourceLimit=source;
	784	targetLimit=target;
	785	flush=FALSE;
	786
	787	// output offsets only for bulk conversion
	788	resultOffsets=NULL;
	789	}
	790
	791	for(;;) {
	792	// resetting the opposite conversion direction must not affect this one
	793	ucnv_resetFromUnicode(cnv);
	794
	795	// convert
	796	ucnv_toUnicode(cnv,
	797	&target, targetLimit,
	798	&source, sourceLimit,
	799	resultOffsets,
	800	flush, pErrorCode);
	801
	802	// check pointers and errors
	803	if(source>sourceLimit \|\| target>targetLimit) {
	804	*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
	805	break;
	806	} else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
	807	if(target!=targetLimit) {
	808	// buffer overflow must only be set when the target is filled
	809	*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
	810	break;
	811	} else if(targetLimit==resultLimit) {
	812	// not just a partial overflow
	813	break;
	814	}
	815
	816	// the partial target is filled, set a new limit, reset the error and continue
	817	targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
	818	*pErrorCode=U_ZERO_ERROR;
	819	} else if(U_FAILURE(*pErrorCode)) {
	820	// some other error occurred, done
	821	break;
	822	} else {
	823	if(source!=sourceLimit) {
	824	// when no error occurs, then the input must be consumed
	825	*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
	826	break;
	827	}
	828
	829	if(sourceLimit==bytesLimit) {
	830	// we are done
	831	break;
	832	}
	833
	834	// the partial conversion succeeded, set a new limit and continue
	835	sourceLimit=(bytesLimit-source)>=step ? source+step : bytesLimit;
	836	flush=(UBool)(cc.finalFlush && sourceLimit==bytesLimit);
	837	}
	838	}
	839	} else /* step<0 */ {
	840	/*
	841	* step==-1: call only ucnv_getNextUChar()
	842	* otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar()
	843	* if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input,
	844	* else give it at most (-step-2)/2 bytes
	845	*/
	846	UChar32 c;
	847
	848	// end the loop by getting an index out of bounds error
	849	for(;;) {
	850	// resetting the opposite conversion direction must not affect this one
	851	ucnv_resetFromUnicode(cnv);
	852
	853	// convert
	854	if((step&1)!=0 /* odd: -1, -3, -5, ... */) {
	855	sourceLimit=source; // use sourceLimit not as a real limit
	856	// but to remember the pre-getNextUChar source pointer
	857	c=ucnv_getNextUChar(cnv, &source, bytesLimit, pErrorCode);
	858
	859	// check pointers and errors
	860	if(*pErrorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
	861	if(source!=bytesLimit) {
	862	*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
	863	} else {
	864	*pErrorCode=U_ZERO_ERROR;
	865	}
	866	break;
	867	} else if(U_FAILURE(*pErrorCode)) {
	868	break;
	869	}
	870	// source may not move if c is from previous overflow
	871
	872	if(target==resultLimit) {
	873	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	874	break;
	875	}
	876	if(c<=0xffff) {
	877	*target++=(UChar)c;
	878	} else {
	879	*target++=U16_LEAD(c);
	880	if(target==resultLimit) {
	881	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	882	break;
	883	}
	884	*target++=U16_TRAIL(c);
	885	}
	886
	887	// alternate between -n-1 and -n but leave -1 alone
	888	if(step<-1) {
	889	++step;
	890	}
	891	} else /* step is even */ {
	892	// allow only one UChar output
	893	targetLimit=target<resultLimit ? target+1 : resultLimit;
	894
	895	// as with ucnv_getNextUChar(), we always flush (if we go to bytesLimit)
	896	// and never output offsets
	897	if(step==-2) {
	898	sourceLimit=bytesLimit;
	899	} else {
	900	sourceLimit=source+(-step-2)/2;
	901	if(sourceLimit>bytesLimit) {
	902	sourceLimit=bytesLimit;
	903	}
	904	}
	905
	906	ucnv_toUnicode(cnv,
	907	&target, targetLimit,
	908	&source, sourceLimit,
	909	NULL, (UBool)(sourceLimit==bytesLimit), pErrorCode);
	910
	911	// check pointers and errors
	912	if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
	913	if(target!=targetLimit) {
	914	// buffer overflow must only be set when the target is filled
	915	*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
	916	break;
	917	} else if(targetLimit==resultLimit) {
	918	// not just a partial overflow
	919	break;
	920	}
	921
	922	// the partial target is filled, set a new limit and continue
	923	*pErrorCode=U_ZERO_ERROR;
	924	} else if(U_FAILURE(*pErrorCode)) {
	925	// some other error occurred, done
	926	break;
	927	} else {
	928	if(source!=sourceLimit) {
	929	// when no error occurs, then the input must be consumed
	930	*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
	931	break;
	932	}
	933
	934	// we are done (flush==TRUE) but we continue, to get the index out of bounds error above
	935	}
	936
	937	--step;
	938	}
	939	}
	940	}
	941
	942	return (int32_t)(target-result);
	943	}
	944
	945	UBool
	946	ConversionTest::ToUnicodeCase(ConversionCase &cc, UConverterToUCallback callback, const char *option) {
	947	// open the converter
	948	IcuTestErrorCode errorCode(*this, "ToUnicodeCase");
	949	LocalUConverterPointer cnv(cnv_open(cc.charset, errorCode));
	950	if(errorCode.isFailure()) {
	951	errcheckln(errorCode, "toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
	952	cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, errorCode.errorName());
	953	errorCode.reset();
	954	return FALSE;
	955	}
	956
	957	// set the callback
	958	if(callback!=NULL) {
	959	ucnv_setToUCallBack(cnv.getAlias(), callback, option, NULL, NULL, errorCode);
	960	if(U_FAILURE(errorCode)) {
	961	errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setToUCallBack() failed - %s",
	962	cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
	963	return FALSE;
	964	}
	965	}
	966
	967	int32_t resultOffsets[256];
	968	UChar result[256];
	969	int32_t resultLength;
	970	UBool ok;
	971
	972	static const struct {
	973	int32_t step;
	974	const char *name;
	975	} steps[]={
	976	{ 0, "bulk" }, // must be first for offsets to be checked
	977	{ 1, "step=1" },
	978	{ 3, "step=3" },
	979	{ 7, "step=7" },
	980	{ -1, "getNext" },
	981	{ -2, "toU(bulk)+getNext" },
	982	{ -3, "getNext+toU(bulk)" },
	983	{ -4, "toU(1)+getNext" },
	984	{ -5, "getNext+toU(1)" },
	985	{ -12, "toU(5)+getNext" },
	986	{ -13, "getNext+toU(5)" },
	987	};
	988	int32_t i, step;
	989
	990	ok=TRUE;
	991	for(i=0; i<LENGTHOF(steps) && ok; ++i) {
	992	step=steps[i].step;
	993	if(step<0 && !cc.finalFlush) {
	994	// skip ucnv_getNextUChar() if !finalFlush because
	995	// ucnv_getNextUChar() always implies flush
	996	continue;
	997	}
	998	if(step!=0) {
	999	// bulk test is first, then offsets are not checked any more
	1000	cc.offsets=NULL;
	1001	}
	1002	else {
	1003	memset(resultOffsets, -1, LENGTHOF(resultOffsets));
	1004	}
	1005	memset(result, -1, LENGTHOF(result));
	1006	errorCode.reset();
	1007	resultLength=stepToUnicode(cc, cnv.getAlias(),
	1008	result, LENGTHOF(result),
	1009	step==0 ? resultOffsets : NULL,
	1010	step, errorCode);
	1011	ok=checkToUnicode(
	1012	cc, cnv.getAlias(), steps[i].name,
	1013	result, resultLength,
	1014	cc.offsets!=NULL ? resultOffsets : NULL,
	1015	errorCode);
	1016	if(errorCode.isFailure() \|\| !cc.finalFlush) {
	1017	// reset if an error occurred or we did not flush
	1018	// otherwise do nothing to make sure that flushing resets
	1019	ucnv_resetToUnicode(cnv.getAlias());
	1020	}
	1021	if (cc.offsets != NULL && resultOffsets[resultLength] != -1) {
	1022	errln("toUnicode[%d](%s) Conversion wrote too much to offsets at index %d",
	1023	cc.caseNr, cc.charset, resultLength);
	1024	}
	1025	if (result[resultLength] != (UChar)-1) {
	1026	errln("toUnicode[%d](%s) Conversion wrote too much to result at index %d",
	1027	cc.caseNr, cc.charset, resultLength);
	1028	}
	1029	}
	1030
	1031	// not a real loop, just a convenience for breaking out of the block
	1032	while(ok && cc.finalFlush) {
	1033	// test ucnv_toUChars()
	1034	memset(result, 0, sizeof(result));
	1035
	1036	errorCode.reset();
	1037	resultLength=ucnv_toUChars(cnv.getAlias(),
	1038	result, LENGTHOF(result),
	1039	(const char *)cc.bytes, cc.bytesLength,
	1040	errorCode);
	1041	ok=checkToUnicode(
	1042	cc, cnv.getAlias(), "toUChars",
	1043	result, resultLength,
	1044	NULL,
	1045	errorCode);
	1046	if(!ok) {
	1047	break;
	1048	}
	1049
	1050	// test preflighting
	1051	// keep the correct result for simple checking
	1052	errorCode.reset();
	1053	resultLength=ucnv_toUChars(cnv.getAlias(),
	1054	NULL, 0,
	1055	(const char *)cc.bytes, cc.bytesLength,
	1056	errorCode);
	1057	if(errorCode.get()==U_STRING_NOT_TERMINATED_WARNING \|\| errorCode.get()==U_BUFFER_OVERFLOW_ERROR) {
	1058	errorCode.reset();
	1059	}
	1060	ok=checkToUnicode(
	1061	cc, cnv.getAlias(), "preflight toUChars",
	1062	result, resultLength,
	1063	NULL,
	1064	errorCode);
	1065	break;
	1066	}
	1067
	1068	errorCode.reset(); // all errors have already been reported
	1069	return ok;
	1070	}
	1071
	1072	UBool
	1073	ConversionTest::checkToUnicode(ConversionCase &cc, UConverter cnv, const char name,
	1074	const UChar *result, int32_t resultLength,
	1075	const int32_t *resultOffsets,
	1076	UErrorCode resultErrorCode) {
	1077	char resultInvalidChars[8];
	1078	int8_t resultInvalidLength;
	1079	UErrorCode errorCode;
	1080
	1081	const char *msg;
	1082
	1083	// reset the message; NULL will mean "ok"
	1084	msg=NULL;
	1085
	1086	errorCode=U_ZERO_ERROR;
	1087	resultInvalidLength=sizeof(resultInvalidChars);
	1088	ucnv_getInvalidChars(cnv, resultInvalidChars, &resultInvalidLength, &errorCode);
	1089	if(U_FAILURE(errorCode)) {
	1090	errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidChars() failed - %s",
	1091	cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode));
	1092	return FALSE;
	1093	}
	1094
	1095	// check everything that might have gone wrong
	1096	if(cc.unicodeLength!=resultLength) {
	1097	msg="wrong result length";
	1098	} else if(0!=u_memcmp(cc.unicode, result, cc.unicodeLength)) {
	1099	msg="wrong result string";
	1100	} else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.unicodeLengthsizeof(cc.offsets))) {
	1101	msg="wrong offsets";
	1102	} else if(cc.outErrorCode!=resultErrorCode) {
	1103	msg="wrong error code";
	1104	} else if(cc.invalidLength!=resultInvalidLength) {
	1105	msg="wrong length of last invalid input";
	1106	} else if(0!=memcmp(cc.invalidChars, resultInvalidChars, cc.invalidLength)) {
	1107	msg="wrong last invalid input";
	1108	}
	1109
	1110	if(msg==NULL) {
	1111	return TRUE;
	1112	} else {
	1113	char buffer[2000]; // one buffer for all strings
	1114	char s, bytesString, unicodeString, resultString,
	1115	offsetsString, resultOffsetsString,
	1116	invalidCharsString, resultInvalidCharsString;
	1117
	1118	bytesString=s=buffer;
	1119	s=printBytes(cc.bytes, cc.bytesLength, bytesString);
	1120	s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString=s);
	1121	s=printUnicode(result, resultLength, resultString=s);
	1122	s=printOffsets(cc.offsets, cc.unicodeLength, offsetsString=s);
	1123	s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);
	1124	s=printBytes(cc.invalidChars, cc.invalidLength, invalidCharsString=s);
	1125	s=printBytes((uint8_t *)resultInvalidChars, resultInvalidLength, resultInvalidCharsString=s);
	1126
	1127	if((s-buffer)>(int32_t)sizeof(buffer)) {
	1128	errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkToUnicode() test output buffer overflow writing %d chars\n",
	1129	cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer));
	1130	exit(1);
	1131	}
	1132
	1133	errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"
	1134	" bytes <%s>[%d]\n"
	1135	" expected <%s>[%d]\n"
	1136	" result <%s>[%d]\n"
	1137	" offsets <%s>\n"
	1138	" result offsets <%s>\n"
	1139	" error code expected %s got %s\n"
	1140	" invalidChars expected <%s> got <%s>\n",
	1141	cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg,
	1142	bytesString, cc.bytesLength,
	1143	unicodeString, cc.unicodeLength,
	1144	resultString, resultLength,
	1145	offsetsString,
	1146	resultOffsetsString,
	1147	u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),
	1148	invalidCharsString, resultInvalidCharsString);
	1149
	1150	return FALSE;
	1151	}
	1152	}
	1153
	1154	// fromUnicode test worker functions --------------------------------------- ***
	1155
	1156	static int32_t
	1157	stepFromUTF8(ConversionCase &cc,
	1158	UConverter utf8Cnv, UConverter cnv,
	1159	char *result, int32_t resultCapacity,
	1160	int32_t step,
	1161	UErrorCode *pErrorCode) {
	1162	const char source, sourceLimit, *utf8Limit;
	1163	UChar pivotBuffer[32];
	1164	UChar pivotSource, pivotTarget, *pivotLimit;
	1165	char target, targetLimit, *resultLimit;
	1166	UBool flush;
	1167
	1168	source=cc.utf8;
	1169	pivotSource=pivotTarget=pivotBuffer;
	1170	target=result;
	1171	utf8Limit=source+cc.utf8Length;
	1172	resultLimit=result+resultCapacity;
	1173
	1174	// call ucnv_convertEx() with in/out buffers no larger than (step) at a time
	1175	// move only one buffer (in vs. out) at a time to be extra mean
	1176	// step==0 performs bulk conversion
	1177
	1178	// initialize the partial limits for the loop
	1179	if(step==0) {
	1180	// use the entire buffers
	1181	sourceLimit=utf8Limit;
	1182	targetLimit=resultLimit;
	1183	flush=cc.finalFlush;
	1184
	1185	pivotLimit=pivotBuffer+LENGTHOF(pivotBuffer);
	1186	} else {
	1187	// start with empty partial buffers
	1188	sourceLimit=source;
	1189	targetLimit=target;
	1190	flush=FALSE;
	1191
	1192	// empty pivot is not allowed, make it of length step
	1193	pivotLimit=pivotBuffer+step;
	1194	}
	1195
	1196	for(;;) {
	1197	// resetting the opposite conversion direction must not affect this one
	1198	ucnv_resetFromUnicode(utf8Cnv);
	1199	ucnv_resetToUnicode(cnv);
	1200
	1201	// convert
	1202	ucnv_convertEx(cnv, utf8Cnv,
	1203	&target, targetLimit,
	1204	&source, sourceLimit,
	1205	pivotBuffer, &pivotSource, &pivotTarget, pivotLimit,
	1206	FALSE, flush, pErrorCode);
	1207
	1208	// check pointers and errors
	1209	if(source>sourceLimit \|\| target>targetLimit) {
	1210	*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
	1211	break;
	1212	} else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
	1213	if(target!=targetLimit) {
	1214	// buffer overflow must only be set when the target is filled
	1215	*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
	1216	break;
	1217	} else if(targetLimit==resultLimit) {
	1218	// not just a partial overflow
	1219	break;
	1220	}
	1221
	1222	// the partial target is filled, set a new limit, reset the error and continue
	1223	targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
	1224	*pErrorCode=U_ZERO_ERROR;
	1225	} else if(U_FAILURE(*pErrorCode)) {
	1226	if(pivotSource==pivotBuffer) {
	1227	// toUnicode error, should not occur
	1228	// toUnicode errors are tested in cintltst TestConvertExFromUTF8()
	1229	break;
	1230	} else {
	1231	// fromUnicode error
	1232	// some other error occurred, done
	1233	break;
	1234	}
	1235	} else {
	1236	if(source!=sourceLimit) {
	1237	// when no error occurs, then the input must be consumed
	1238	*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
	1239	break;
	1240	}
	1241
	1242	if(sourceLimit==utf8Limit) {
	1243	// we are done
	1244	if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
	1245	// ucnv_convertEx() warns about not terminating the output
	1246	// but ucnv_fromUnicode() does not and so
	1247	// checkFromUnicode() does not expect it
	1248	*pErrorCode=U_ZERO_ERROR;
	1249	}
	1250	break;
	1251	}
	1252
	1253	// the partial conversion succeeded, set a new limit and continue
	1254	sourceLimit=(utf8Limit-source)>=step ? source+step : utf8Limit;
	1255	flush=(UBool)(cc.finalFlush && sourceLimit==utf8Limit);
	1256	}
	1257	}
	1258
	1259	return (int32_t)(target-result);
	1260	}
	1261
	1262	static int32_t
	1263	stepFromUnicode(ConversionCase &cc, UConverter *cnv,
	1264	char *result, int32_t resultCapacity,
	1265	int32_t resultOffsets, / also resultCapacity */
	1266	int32_t step,
	1267	UErrorCode *pErrorCode) {
	1268	const UChar source, sourceLimit, *unicodeLimit;
	1269	char target, targetLimit, *resultLimit;
	1270	UBool flush;
	1271
	1272	source=cc.unicode;
	1273	target=result;
	1274	unicodeLimit=source+cc.unicodeLength;
	1275	resultLimit=result+resultCapacity;
	1276
	1277	// call ucnv_fromUnicode() with in/out buffers no larger than (step) at a time
	1278	// move only one buffer (in vs. out) at a time to be extra mean
	1279	// step==0 performs bulk conversion and generates offsets
	1280
	1281	// initialize the partial limits for the loop
	1282	if(step==0) {
	1283	// use the entire buffers
	1284	sourceLimit=unicodeLimit;
	1285	targetLimit=resultLimit;
	1286	flush=cc.finalFlush;
	1287	} else {
	1288	// start with empty partial buffers
	1289	sourceLimit=source;
	1290	targetLimit=target;
	1291	flush=FALSE;
	1292
	1293	// output offsets only for bulk conversion
	1294	resultOffsets=NULL;
	1295	}
	1296
	1297	for(;;) {
	1298	// resetting the opposite conversion direction must not affect this one
	1299	ucnv_resetToUnicode(cnv);
	1300
	1301	// convert
	1302	ucnv_fromUnicode(cnv,
	1303	&target, targetLimit,
	1304	&source, sourceLimit,
	1305	resultOffsets,
	1306	flush, pErrorCode);
	1307
	1308	// check pointers and errors
	1309	if(source>sourceLimit \|\| target>targetLimit) {
	1310	*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
	1311	break;
	1312	} else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
	1313	if(target!=targetLimit) {
	1314	// buffer overflow must only be set when the target is filled
	1315	*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
	1316	break;
	1317	} else if(targetLimit==resultLimit) {
	1318	// not just a partial overflow
	1319	break;
	1320	}
	1321
	1322	// the partial target is filled, set a new limit, reset the error and continue
	1323	targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
	1324	*pErrorCode=U_ZERO_ERROR;
	1325	} else if(U_FAILURE(*pErrorCode)) {
	1326	// some other error occurred, done
	1327	break;
	1328	} else {
	1329	if(source!=sourceLimit) {
	1330	// when no error occurs, then the input must be consumed
	1331	*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
	1332	break;
	1333	}
	1334
	1335	if(sourceLimit==unicodeLimit) {
	1336	// we are done
	1337	break;
	1338	}
	1339
	1340	// the partial conversion succeeded, set a new limit and continue
	1341	sourceLimit=(unicodeLimit-source)>=step ? source+step : unicodeLimit;
	1342	flush=(UBool)(cc.finalFlush && sourceLimit==unicodeLimit);
	1343	}
	1344	}
	1345
	1346	return (int32_t)(target-result);
	1347	}
	1348
	1349	UBool
	1350	ConversionTest::FromUnicodeCase(ConversionCase &cc, UConverterFromUCallback callback, const char *option) {
	1351	UConverter *cnv;
	1352	UErrorCode errorCode;
	1353
	1354	// open the converter
	1355	errorCode=U_ZERO_ERROR;
	1356	cnv=cnv_open(cc.charset, errorCode);
	1357	if(U_FAILURE(errorCode)) {
	1358	errcheckln(errorCode, "fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
	1359	cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
	1360	return FALSE;
	1361	}
	1362	ucnv_resetToUnicode(utf8Cnv);
	1363
	1364	// set the callback
	1365	if(callback!=NULL) {
	1366	ucnv_setFromUCallBack(cnv, callback, option, NULL, NULL, &errorCode);
	1367	if(U_FAILURE(errorCode)) {
	1368	errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setFromUCallBack() failed - %s",
	1369	cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
	1370	ucnv_close(cnv);
	1371	return FALSE;
	1372	}
	1373	}
	1374
	1375	// set the fallbacks flag
	1376	// TODO change with Jitterbug 2401, then add a similar call for toUnicode too
	1377	ucnv_setFallback(cnv, cc.fallbacks);
	1378
	1379	// set the subchar
	1380	int32_t length;
	1381
	1382	if(cc.setSub>0) {
	1383	length=(int32_t)strlen(cc.subchar);
	1384	ucnv_setSubstChars(cnv, cc.subchar, (int8_t)length, &errorCode);
	1385	if(U_FAILURE(errorCode)) {
	1386	errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstChars() failed - %s",
	1387	cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
	1388	ucnv_close(cnv);
	1389	return FALSE;
	1390	}
	1391	} else if(cc.setSub<0) {
	1392	ucnv_setSubstString(cnv, cc.subString, -1, &errorCode);
	1393	if(U_FAILURE(errorCode)) {
	1394	errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstString() failed - %s",
	1395	cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
	1396	ucnv_close(cnv);
	1397	return FALSE;
	1398	}
	1399	}
	1400
	1401	// convert unicode to utf8
	1402	char utf8[256];
	1403	cc.utf8=utf8;
	1404	u_strToUTF8(utf8, LENGTHOF(utf8), &cc.utf8Length,
	1405	cc.unicode, cc.unicodeLength,
	1406	&errorCode);
	1407	if(U_FAILURE(errorCode)) {
	1408	// skip UTF-8 testing of a string with an unpaired surrogate,
	1409	// or of one that's too long
	1410	// toUnicode errors are tested in cintltst TestConvertExFromUTF8()
	1411	cc.utf8Length=-1;
	1412	}
	1413
	1414	int32_t resultOffsets[256];
	1415	char result[256];
	1416	int32_t resultLength;
	1417	UBool ok;
	1418
	1419	static const struct {
	1420	int32_t step;
	1421	const char name, utf8Name;
	1422	} steps[]={
	1423	{ 0, "bulk", "utf8" }, // must be first for offsets to be checked
	1424	{ 1, "step=1", "utf8 step=1" },
	1425	{ 3, "step=3", "utf8 step=3" },
	1426	{ 7, "step=7", "utf8 step=7" }
	1427	};
	1428	int32_t i, step;
	1429
	1430	ok=TRUE;
	1431	for(i=0; i<LENGTHOF(steps) && ok; ++i) {
	1432	step=steps[i].step;
	1433	memset(resultOffsets, -1, LENGTHOF(resultOffsets));
	1434	memset(result, -1, LENGTHOF(result));
	1435	errorCode=U_ZERO_ERROR;
	1436	resultLength=stepFromUnicode(cc, cnv,
	1437	result, LENGTHOF(result),
	1438	step==0 ? resultOffsets : NULL,
	1439	step, &errorCode);
	1440	ok=checkFromUnicode(
	1441	cc, cnv, steps[i].name,
	1442	(uint8_t *)result, resultLength,
	1443	cc.offsets!=NULL ? resultOffsets : NULL,
	1444	errorCode);
	1445	if(U_FAILURE(errorCode) \|\| !cc.finalFlush) {
	1446	// reset if an error occurred or we did not flush
	1447	// otherwise do nothing to make sure that flushing resets
	1448	ucnv_resetFromUnicode(cnv);
	1449	}
	1450	if (resultOffsets[resultLength] != -1) {
	1451	errln("fromUnicode[%d](%s) Conversion wrote too much to offsets at index %d",
	1452	cc.caseNr, cc.charset, resultLength);
	1453	}
	1454	if (result[resultLength] != (char)-1) {
	1455	errln("fromUnicode[%d](%s) Conversion wrote too much to result at index %d",
	1456	cc.caseNr, cc.charset, resultLength);
	1457	}
	1458
	1459	// bulk test is first, then offsets are not checked any more
	1460	cc.offsets=NULL;
	1461
	1462	// test direct conversion from UTF-8
	1463	if(cc.utf8Length>=0) {
	1464	errorCode=U_ZERO_ERROR;
	1465	resultLength=stepFromUTF8(cc, utf8Cnv, cnv,
	1466	result, LENGTHOF(result),
	1467	step, &errorCode);
	1468	ok=checkFromUnicode(
	1469	cc, cnv, steps[i].utf8Name,
	1470	(uint8_t *)result, resultLength,
	1471	NULL,
	1472	errorCode);
	1473	if(U_FAILURE(errorCode) \|\| !cc.finalFlush) {
	1474	// reset if an error occurred or we did not flush
	1475	// otherwise do nothing to make sure that flushing resets
	1476	ucnv_resetToUnicode(utf8Cnv);
	1477	ucnv_resetFromUnicode(cnv);
	1478	}
	1479	}
	1480	}
	1481
	1482	// not a real loop, just a convenience for breaking out of the block
	1483	while(ok && cc.finalFlush) {
	1484	// test ucnv_fromUChars()
	1485	memset(result, 0, sizeof(result));
	1486
	1487	errorCode=U_ZERO_ERROR;
	1488	resultLength=ucnv_fromUChars(cnv,
	1489	result, LENGTHOF(result),
	1490	cc.unicode, cc.unicodeLength,
	1491	&errorCode);
	1492	ok=checkFromUnicode(
	1493	cc, cnv, "fromUChars",
	1494	(uint8_t *)result, resultLength,
	1495	NULL,
	1496	errorCode);
	1497	if(!ok) {
	1498	break;
	1499	}
	1500
	1501	// test preflighting
	1502	// keep the correct result for simple checking
	1503	errorCode=U_ZERO_ERROR;
	1504	resultLength=ucnv_fromUChars(cnv,
	1505	NULL, 0,
	1506	cc.unicode, cc.unicodeLength,
	1507	&errorCode);
	1508	if(errorCode==U_STRING_NOT_TERMINATED_WARNING \|\| errorCode==U_BUFFER_OVERFLOW_ERROR) {
	1509	errorCode=U_ZERO_ERROR;
	1510	}
	1511	ok=checkFromUnicode(
	1512	cc, cnv, "preflight fromUChars",
	1513	(uint8_t *)result, resultLength,
	1514	NULL,
	1515	errorCode);
	1516	break;
	1517	}
	1518
	1519	ucnv_close(cnv);
	1520	return ok;
	1521	}
	1522
	1523	UBool
	1524	ConversionTest::checkFromUnicode(ConversionCase &cc, UConverter cnv, const char name,
	1525	const uint8_t *result, int32_t resultLength,
	1526	const int32_t *resultOffsets,
	1527	UErrorCode resultErrorCode) {
	1528	UChar resultInvalidUChars[8];
	1529	int8_t resultInvalidLength;
	1530	UErrorCode errorCode;
	1531
	1532	const char *msg;
	1533
	1534	// reset the message; NULL will mean "ok"
	1535	msg=NULL;
	1536
	1537	errorCode=U_ZERO_ERROR;
	1538	resultInvalidLength=LENGTHOF(resultInvalidUChars);
	1539	ucnv_getInvalidUChars(cnv, resultInvalidUChars, &resultInvalidLength, &errorCode);
	1540	if(U_FAILURE(errorCode)) {
	1541	errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s",
	1542	cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode));
	1543	return FALSE;
	1544	}
	1545
	1546	// check everything that might have gone wrong
	1547	if(cc.bytesLength!=resultLength) {
	1548	msg="wrong result length";
	1549	} else if(0!=memcmp(cc.bytes, result, cc.bytesLength)) {
	1550	msg="wrong result string";
	1551	} else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.bytesLengthsizeof(cc.offsets))) {
	1552	msg="wrong offsets";
	1553	} else if(cc.outErrorCode!=resultErrorCode) {
	1554	msg="wrong error code";
	1555	} else if(cc.invalidLength!=resultInvalidLength) {
	1556	msg="wrong length of last invalid input";
	1557	} else if(0!=u_memcmp(cc.invalidUChars, resultInvalidUChars, cc.invalidLength)) {
	1558	msg="wrong last invalid input";
	1559	}
	1560
	1561	if(msg==NULL) {
	1562	return TRUE;
	1563	} else {
	1564	char buffer[2000]; // one buffer for all strings
	1565	char s, unicodeString, bytesString, resultString,
	1566	offsetsString, resultOffsetsString,
	1567	invalidCharsString, resultInvalidUCharsString;
	1568
	1569	unicodeString=s=buffer;
	1570	s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString);
	1571	s=printBytes(cc.bytes, cc.bytesLength, bytesString=s);
	1572	s=printBytes(result, resultLength, resultString=s);
	1573	s=printOffsets(cc.offsets, cc.bytesLength, offsetsString=s);
	1574	s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);
	1575	s=printUnicode(cc.invalidUChars, cc.invalidLength, invalidCharsString=s);
	1576	s=printUnicode(resultInvalidUChars, resultInvalidLength, resultInvalidUCharsString=s);
	1577
	1578	if((s-buffer)>(int32_t)sizeof(buffer)) {
	1579	errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkFromUnicode() test output buffer overflow writing %d chars\n",
	1580	cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer));
	1581	exit(1);
	1582	}
	1583
	1584	errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"
	1585	" unicode <%s>[%d]\n"
	1586	" expected <%s>[%d]\n"
	1587	" result <%s>[%d]\n"
	1588	" offsets <%s>\n"
	1589	" result offsets <%s>\n"
	1590	" error code expected %s got %s\n"
	1591	" invalidChars expected <%s> got <%s>\n",
	1592	cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg,
	1593	unicodeString, cc.unicodeLength,
	1594	bytesString, cc.bytesLength,
	1595	resultString, resultLength,
	1596	offsetsString,
	1597	resultOffsetsString,
	1598	u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),
	1599	invalidCharsString, resultInvalidUCharsString);
	1600
	1601	return FALSE;
	1602	}
	1603	}
	1604
	1605	#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */