git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	**********************************************************************
	3	* Copyright (C) 2002-2010, International Business Machines
	4	* Corporation and others. All Rights Reserved.
	5	**********************************************************************
	6	* file name: ucnv_u16.c
	7	* encoding: US-ASCII
	8	* tab size: 8 (not used)
	9	* indentation:4
	10	*
	11	* created on: 2002jul01
	12	* created by: Markus W. Scherer
	13	*
	14	* UTF-16 converter implementation. Used to be in ucnv_utf.c.
	15	*/
	16
	17	#include "unicode/utypes.h"
	18
	19	#if !UCONFIG_NO_CONVERSION
	20
	21	#include "unicode/ucnv.h"
	22	#include "ucnv_bld.h"
	23	#include "ucnv_cnv.h"
	24	#include "cmemory.h"
	25
	26	enum {
	27	UCNV_NEED_TO_WRITE_BOM=1
	28	};
	29
	30	/*
	31	* The UTF-16 toUnicode implementation is also used for the Java-specific
	32	* "with BOM" variants of UTF-16BE and UTF-16LE.
	33	*/
	34	static void
	35	_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
	36	UErrorCode *pErrorCode);
	37
	38	/* UTF-16BE ----------------------------------------------------------------- */
	39
	40	#if U_IS_BIG_ENDIAN
	41	# define _UTF16PEFromUnicodeWithOffsets _UTF16BEFromUnicodeWithOffsets
	42	#else
	43	# define _UTF16PEFromUnicodeWithOffsets _UTF16LEFromUnicodeWithOffsets
	44	#endif
	45
	46
	47	static void
	48	_UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
	49	UErrorCode *pErrorCode) {
	50	UConverter *cnv;
	51	const UChar *source;
	52	char *target;
	53	int32_t *offsets;
	54
	55	uint32_t targetCapacity, length, sourceIndex;
	56	UChar c, trail;
	57	char overflow[4];
	58
	59	source=pArgs->source;
	60	length=(int32_t)(pArgs->sourceLimit-source);
	61	if(length<=0) {
	62	/* no input, nothing to do */
	63	return;
	64	}
	65
	66	cnv=pArgs->converter;
	67
	68	/* write the BOM if necessary */
	69	if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
	70	static const char bom[]={ (char)0xfe, (char)0xff };
	71	ucnv_fromUWriteBytes(cnv,
	72	bom, 2,
	73	&pArgs->target, pArgs->targetLimit,
	74	&pArgs->offsets, -1,
	75	pErrorCode);
	76	cnv->fromUnicodeStatus=0;
	77	}
	78
	79	target=pArgs->target;
	80	if(target >= pArgs->targetLimit) {
	81	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	82	return;
	83	}
	84
	85	targetCapacity=(uint32_t)(pArgs->targetLimit-target);
	86	offsets=pArgs->offsets;
	87	sourceIndex=0;
	88
	89	/* c!=0 indicates in several places outside the main loops that a surrogate was found */
	90
	91	if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) {
	92	/* the last buffer ended with a lead surrogate, output the surrogate pair */
	93	++source;
	94	--length;
	95	target[0]=(uint8_t)(c>>8);
	96	target[1]=(uint8_t)c;
	97	target[2]=(uint8_t)(trail>>8);
	98	target[3]=(uint8_t)trail;
	99	target+=4;
	100	targetCapacity-=4;
	101	if(offsets!=NULL) {
	102	*offsets++=-1;
	103	*offsets++=-1;
	104	*offsets++=-1;
	105	*offsets++=-1;
	106	}
	107	sourceIndex=1;
	108	cnv->fromUChar32=c=0;
	109	}
	110
	111	if(c==0) {
	112	/* copy an even number of bytes for complete UChars */
	113	uint32_t count=2*length;
	114	if(count>targetCapacity) {
	115	count=targetCapacity&~1;
	116	}
	117	/* count is even */
	118	targetCapacity-=count;
	119	count>>=1;
	120	length-=count;
	121
	122	if(offsets==NULL) {
	123	while(count>0) {
	124	c=*source++;
	125	if(U16_IS_SINGLE(c)) {
	126	target[0]=(uint8_t)(c>>8);
	127	target[1]=(uint8_t)c;
	128	target+=2;
	129	} else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) {
	130	++source;
	131	--count;
	132	target[0]=(uint8_t)(c>>8);
	133	target[1]=(uint8_t)c;
	134	target[2]=(uint8_t)(trail>>8);
	135	target[3]=(uint8_t)trail;
	136	target+=4;
	137	} else {
	138	break;
	139	}
	140	--count;
	141	}
	142	} else {
	143	while(count>0) {
	144	c=*source++;
	145	if(U16_IS_SINGLE(c)) {
	146	target[0]=(uint8_t)(c>>8);
	147	target[1]=(uint8_t)c;
	148	target+=2;
	149	*offsets++=sourceIndex;
	150	*offsets++=sourceIndex++;
	151	} else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) {
	152	++source;
	153	--count;
	154	target[0]=(uint8_t)(c>>8);
	155	target[1]=(uint8_t)c;
	156	target[2]=(uint8_t)(trail>>8);
	157	target[3]=(uint8_t)trail;
	158	target+=4;
	159	*offsets++=sourceIndex;
	160	*offsets++=sourceIndex;
	161	*offsets++=sourceIndex;
	162	*offsets++=sourceIndex;
	163	sourceIndex+=2;
	164	} else {
	165	break;
	166	}
	167	--count;
	168	}
	169	}
	170
	171	if(count==0) {
	172	/* done with the loop for complete UChars */
	173	if(length>0 && targetCapacity>0) {
	174	/*
	175	* there is more input and some target capacity -
	176	* it must be targetCapacity==1 because otherwise
	177	* the above would have copied more;
	178	* prepare for overflow output
	179	*/
	180	if(U16_IS_SINGLE(c=*source++)) {
	181	overflow[0]=(char)(c>>8);
	182	overflow[1]=(char)c;
	183	length=2; /* 2 bytes to output */
	184	c=0;
	185	/* } else { keep c for surrogate handling, length will be set there */
	186	}
	187	} else {
	188	length=0;
	189	c=0;
	190	}
	191	} else {
	192	/* keep c for surrogate handling, length will be set there */
	193	targetCapacity+=2*count;
	194	}
	195	} else {
	196	length=0; /* from here on, length counts the bytes in overflow[] */
	197	}
	198
	199	if(c!=0) {
	200	/*
	201	* c is a surrogate, and
	202	* - source or target too short
	203	* - or the surrogate is unmatched
	204	*/
	205	length=0;
	206	if(U16_IS_SURROGATE_LEAD(c)) {
	207	if(source<pArgs->sourceLimit) {
	208	if(U16_IS_TRAIL(trail=*source)) {
	209	/* output the surrogate pair, will overflow (see conditions comment above) */
	210	++source;
	211	overflow[0]=(char)(c>>8);
	212	overflow[1]=(char)c;
	213	overflow[2]=(char)(trail>>8);
	214	overflow[3]=(char)trail;
	215	length=4; /* 4 bytes to output */
	216	c=0;
	217	} else {
	218	/* unmatched lead surrogate */
	219	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	220	}
	221	} else {
	222	/* see if the trail surrogate is in the next buffer */
	223	}
	224	} else {
	225	/* unmatched trail surrogate */
	226	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	227	}
	228	cnv->fromUChar32=c;
	229	}
	230
	231	if(length>0) {
	232	/* output length bytes with overflow (length>targetCapacity>0) */
	233	ucnv_fromUWriteBytes(cnv,
	234	overflow, length,
	235	(char **)&target, pArgs->targetLimit,
	236	&offsets, sourceIndex,
	237	pErrorCode);
	238	targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target);
	239	}
	240
	241	if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) {
	242	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	243	}
	244
	245	/* write back the updated pointers */
	246	pArgs->source=source;
	247	pArgs->target=(char *)target;
	248	pArgs->offsets=offsets;
	249	}
	250
	251	static void
	252	_UTF16BEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
	253	UErrorCode *pErrorCode) {
	254	UConverter *cnv;
	255	const uint8_t *source;
	256	UChar *target;
	257	int32_t *offsets;
	258
	259	uint32_t targetCapacity, length, count, sourceIndex;
	260	UChar c, trail;
	261
	262	if(pArgs->converter->mode<8) {
	263	_UTF16ToUnicodeWithOffsets(pArgs, pErrorCode);
	264	return;
	265	}
	266
	267	cnv=pArgs->converter;
	268	source=(const uint8_t *)pArgs->source;
	269	length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
	270	if(length<=0 && cnv->toUnicodeStatus==0) {
	271	/* no input, nothing to do */
	272	return;
	273	}
	274
	275	target=pArgs->target;
	276	if(target >= pArgs->targetLimit) {
	277	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	278	return;
	279	}
	280
	281	targetCapacity=(uint32_t)(pArgs->targetLimit-target);
	282	offsets=pArgs->offsets;
	283	sourceIndex=0;
	284	c=0;
	285
	286	/* complete a partial UChar or pair from the last call */
	287	if(cnv->toUnicodeStatus!=0) {
	288	/*
	289	* special case: single byte from a previous buffer,
	290	* where the byte turned out not to belong to a trail surrogate
	291	* and the preceding, unmatched lead surrogate was put into toUBytes[]
	292	* for error handling
	293	*/
	294	cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus;
	295	cnv->toULength=1;
	296	cnv->toUnicodeStatus=0;
	297	}
	298	if((count=cnv->toULength)!=0) {
	299	uint8_t *p=cnv->toUBytes;
	300	do {
	301	p[count++]=*source++;
	302	++sourceIndex;
	303	--length;
	304	if(count==2) {
	305	c=((UChar)p[0]<<8)\|p[1];
	306	if(U16_IS_SINGLE(c)) {
	307	/* output the BMP code point */
	308	*target++=c;
	309	if(offsets!=NULL) {
	310	*offsets++=-1;
	311	}
	312	--targetCapacity;
	313	count=0;
	314	c=0;
	315	break;
	316	} else if(U16_IS_SURROGATE_LEAD(c)) {
	317	/* continue collecting bytes for the trail surrogate */
	318	c=0; /* avoid unnecessary surrogate handling below */
	319	} else {
	320	/* fall through to error handling for an unmatched trail surrogate */
	321	break;
	322	}
	323	} else if(count==4) {
	324	c=((UChar)p[0]<<8)\|p[1];
	325	trail=((UChar)p[2]<<8)\|p[3];
	326	if(U16_IS_TRAIL(trail)) {
	327	/* output the surrogate pair */
	328	*target++=c;
	329	if(targetCapacity>=2) {
	330	*target++=trail;
	331	if(offsets!=NULL) {
	332	*offsets++=-1;
	333	*offsets++=-1;
	334	}
	335	targetCapacity-=2;
	336	} else /* targetCapacity==1 */ {
	337	targetCapacity=0;
	338	cnv->UCharErrorBuffer[0]=trail;
	339	cnv->UCharErrorBufferLength=1;
	340	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	341	}
	342	count=0;
	343	c=0;
	344	break;
	345	} else {
	346	/* unmatched lead surrogate, handle here for consistent toUBytes[] */
	347	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	348
	349	/* back out reading the code unit after it */
	350	if(((const uint8_t *)pArgs->source-source)>=2) {
	351	source-=2;
	352	} else {
	353	/*
	354	* if the trail unit's first byte was in a previous buffer, then
	355	* we need to put it into a special place because toUBytes[] will be
	356	* used for the lead unit's bytes
	357	*/
	358	cnv->toUnicodeStatus=0x100\|p[2];
	359	--source;
	360	}
	361	cnv->toULength=2;
	362
	363	/* write back the updated pointers */
	364	pArgs->source=(const char *)source;
	365	pArgs->target=target;
	366	pArgs->offsets=offsets;
	367	return;
	368	}
	369	}
	370	} while(length>0);
	371	cnv->toULength=(int8_t)count;
	372	}
	373
	374	/* copy an even number of bytes for complete UChars */
	375	count=2*targetCapacity;
	376	if(count>length) {
	377	count=length&~1;
	378	}
	379	if(c==0 && count>0) {
	380	length-=count;
	381	count>>=1;
	382	targetCapacity-=count;
	383	if(offsets==NULL) {
	384	do {
	385	c=((UChar)source[0]<<8)\|source[1];
	386	source+=2;
	387	if(U16_IS_SINGLE(c)) {
	388	*target++=c;
	389	} else if(U16_IS_SURROGATE_LEAD(c) && count>=2 &&
	390	U16_IS_TRAIL(trail=((UChar)source[0]<<8)\|source[1])
	391	) {
	392	source+=2;
	393	--count;
	394	*target++=c;
	395	*target++=trail;
	396	} else {
	397	break;
	398	}
	399	} while(--count>0);
	400	} else {
	401	do {
	402	c=((UChar)source[0]<<8)\|source[1];
	403	source+=2;
	404	if(U16_IS_SINGLE(c)) {
	405	*target++=c;
	406	*offsets++=sourceIndex;
	407	sourceIndex+=2;
	408	} else if(U16_IS_SURROGATE_LEAD(c) && count>=2 &&
	409	U16_IS_TRAIL(trail=((UChar)source[0]<<8)\|source[1])
	410	) {
	411	source+=2;
	412	--count;
	413	*target++=c;
	414	*target++=trail;
	415	*offsets++=sourceIndex;
	416	*offsets++=sourceIndex;
	417	sourceIndex+=4;
	418	} else {
	419	break;
	420	}
	421	} while(--count>0);
	422	}
	423
	424	if(count==0) {
	425	/* done with the loop for complete UChars */
	426	c=0;
	427	} else {
	428	/* keep c for surrogate handling, trail will be set there */
	429	length+=2(count-1); / one more byte pair was consumed than count decremented */
	430	targetCapacity+=count;
	431	}
	432	}
	433
	434	if(c!=0) {
	435	/*
	436	* c is a surrogate, and
	437	* - source or target too short
	438	* - or the surrogate is unmatched
	439	*/
	440	cnv->toUBytes[0]=(uint8_t)(c>>8);
	441	cnv->toUBytes[1]=(uint8_t)c;
	442	cnv->toULength=2;
	443
	444	if(U16_IS_SURROGATE_LEAD(c)) {
	445	if(length>=2) {
	446	if(U16_IS_TRAIL(trail=((UChar)source[0]<<8)\|source[1])) {
	447	/* output the surrogate pair, will overflow (see conditions comment above) */
	448	source+=2;
	449	length-=2;
	450	*target++=c;
	451	if(offsets!=NULL) {
	452	*offsets++=sourceIndex;
	453	}
	454	cnv->UCharErrorBuffer[0]=trail;
	455	cnv->UCharErrorBufferLength=1;
	456	cnv->toULength=0;
	457	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	458	} else {
	459	/* unmatched lead surrogate */
	460	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	461	}
	462	} else {
	463	/* see if the trail surrogate is in the next buffer */
	464	}
	465	} else {
	466	/* unmatched trail surrogate */
	467	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	468	}
	469	}
	470
	471	if(U_SUCCESS(*pErrorCode)) {
	472	/* check for a remaining source byte */
	473	if(length>0) {
	474	if(targetCapacity==0) {
	475	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	476	} else {
	477	/* it must be length==1 because otherwise the above would have copied more */
	478	cnv->toUBytes[cnv->toULength++]=*source++;
	479	}
	480	}
	481	}
	482
	483	/* write back the updated pointers */
	484	pArgs->source=(const char *)source;
	485	pArgs->target=target;
	486	pArgs->offsets=offsets;
	487	}
	488
	489	static UChar32
	490	_UTF16BEGetNextUChar(UConverterToUnicodeArgs pArgs, UErrorCode err) {
	491	const uint8_t s, sourceLimit;
	492	UChar32 c;
	493
	494	if(pArgs->converter->mode<8) {
	495	return UCNV_GET_NEXT_UCHAR_USE_TO_U;
	496	}
	497
	498	s=(const uint8_t *)pArgs->source;
	499	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
	500
	501	if(s>=sourceLimit) {
	502	/* no input */
	503	*err=U_INDEX_OUTOFBOUNDS_ERROR;
	504	return 0xffff;
	505	}
	506
	507	if(s+2>sourceLimit) {
	508	/* only one byte: truncated UChar */
	509	pArgs->converter->toUBytes[0]=*s++;
	510	pArgs->converter->toULength=1;
	511	pArgs->source=(const char *)s;
	512	*err = U_TRUNCATED_CHAR_FOUND;
	513	return 0xffff;
	514	}
	515
	516	/* get one UChar */
	517	c=((UChar32)*s<<8)\|s[1];
	518	s+=2;
	519
	520	/* check for a surrogate pair */
	521	if(U_IS_SURROGATE(c)) {
	522	if(U16_IS_SURROGATE_LEAD(c)) {
	523	if(s+2<=sourceLimit) {
	524	UChar trail;
	525
	526	/* get a second UChar and see if it is a trail surrogate */
	527	trail=((UChar)*s<<8)\|s[1];
	528	if(U16_IS_TRAIL(trail)) {
	529	c=U16_GET_SUPPLEMENTARY(c, trail);
	530	s+=2;
	531	} else {
	532	/* unmatched lead surrogate */
	533	c=-2;
	534	}
	535	} else {
	536	/* too few (2 or 3) bytes for a surrogate pair: truncated code point */
	537	uint8_t *bytes=pArgs->converter->toUBytes;
	538	s-=2;
	539	pArgs->converter->toULength=(int8_t)(sourceLimit-s);
	540	do {
	541	bytes++=s++;
	542	} while(s<sourceLimit);
	543
	544	c=0xffff;
	545	*err=U_TRUNCATED_CHAR_FOUND;
	546	}
	547	} else {
	548	/* unmatched trail surrogate */
	549	c=-2;
	550	}
	551
	552	if(c<0) {
	553	/* write the unmatched surrogate */
	554	uint8_t *bytes=pArgs->converter->toUBytes;
	555	pArgs->converter->toULength=2;
	556	bytes=(s-2);
	557	bytes[1]=*(s-1);
	558
	559	c=0xffff;
	560	*err=U_ILLEGAL_CHAR_FOUND;
	561	}
	562	}
	563
	564	pArgs->source=(const char *)s;
	565	return c;
	566	}
	567
	568	static void
	569	_UTF16BEReset(UConverter *cnv, UConverterResetChoice choice) {
	570	if(choice<=UCNV_RESET_TO_UNICODE) {
	571	/* reset toUnicode state */
	572	if(UCNV_GET_VERSION(cnv)==0) {
	573	cnv->mode=8; /* no BOM handling */
	574	} else {
	575	cnv->mode=0; /* Java-specific "UnicodeBig" requires BE BOM or no BOM */
	576	}
	577	}
	578	if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) {
	579	/* reset fromUnicode for "UnicodeBig": prepare to output the UTF-16BE BOM */
	580	cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
	581	}
	582	}
	583
	584	static void
	585	_UTF16BEOpen(UConverter *cnv,
	586	UConverterLoadArgs *pArgs,
	587	UErrorCode *pErrorCode) {
	588	if(UCNV_GET_VERSION(cnv)<=1) {
	589	_UTF16BEReset(cnv, UCNV_RESET_BOTH);
	590	} else {
	591	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
	592	}
	593	}
	594
	595	static const char *
	596	_UTF16BEGetName(const UConverter *cnv) {
	597	if(UCNV_GET_VERSION(cnv)==0) {
	598	return "UTF-16BE";
	599	} else {
	600	return "UTF-16BE,version=1";
	601	}
	602	}
	603
	604	static const UConverterImpl _UTF16BEImpl={
	605	UCNV_UTF16_BigEndian,
	606
	607	NULL,
	608	NULL,
	609
	610	_UTF16BEOpen,
	611	NULL,
	612	_UTF16BEReset,
	613
	614	_UTF16BEToUnicodeWithOffsets,
	615	_UTF16BEToUnicodeWithOffsets,
	616	_UTF16BEFromUnicodeWithOffsets,
	617	_UTF16BEFromUnicodeWithOffsets,
	618	_UTF16BEGetNextUChar,
	619
	620	NULL,
	621	_UTF16BEGetName,
	622	NULL,
	623	NULL,
	624	ucnv_getNonSurrogateUnicodeSet
	625	};
	626
	627	static const UConverterStaticData _UTF16BEStaticData={
	628	sizeof(UConverterStaticData),
	629	"UTF-16BE",
	630	1200, UCNV_IBM, UCNV_UTF16_BigEndian, 2, 2,
	631	{ 0xff, 0xfd, 0, 0 },2,FALSE,FALSE,
	632	0,
	633	0,
	634	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
	635	};
	636
	637
	638	const UConverterSharedData _UTF16BEData={
	639	sizeof(UConverterSharedData), ~((uint32_t) 0),
	640	NULL, NULL, &_UTF16BEStaticData, FALSE, &_UTF16BEImpl,
	641	0
	642	};
	643
	644	/* UTF-16LE ----------------------------------------------------------------- */
	645
	646	static void
	647	_UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
	648	UErrorCode *pErrorCode) {
	649	UConverter *cnv;
	650	const UChar *source;
	651	char *target;
	652	int32_t *offsets;
	653
	654	uint32_t targetCapacity, length, sourceIndex;
	655	UChar c, trail;
	656	char overflow[4];
	657
	658	source=pArgs->source;
	659	length=(int32_t)(pArgs->sourceLimit-source);
	660	if(length<=0) {
	661	/* no input, nothing to do */
	662	return;
	663	}
	664
	665	cnv=pArgs->converter;
	666
	667	/* write the BOM if necessary */
	668	if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
	669	static const char bom[]={ (char)0xff, (char)0xfe };
	670	ucnv_fromUWriteBytes(cnv,
	671	bom, 2,
	672	&pArgs->target, pArgs->targetLimit,
	673	&pArgs->offsets, -1,
	674	pErrorCode);
	675	cnv->fromUnicodeStatus=0;
	676	}
	677
	678	target=pArgs->target;
	679	if(target >= pArgs->targetLimit) {
	680	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	681	return;
	682	}
	683
	684	targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target);
	685	offsets=pArgs->offsets;
	686	sourceIndex=0;
	687
	688	/* c!=0 indicates in several places outside the main loops that a surrogate was found */
	689
	690	if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) {
	691	/* the last buffer ended with a lead surrogate, output the surrogate pair */
	692	++source;
	693	--length;
	694	target[0]=(uint8_t)c;
	695	target[1]=(uint8_t)(c>>8);
	696	target[2]=(uint8_t)trail;
	697	target[3]=(uint8_t)(trail>>8);
	698	target+=4;
	699	targetCapacity-=4;
	700	if(offsets!=NULL) {
	701	*offsets++=-1;
	702	*offsets++=-1;
	703	*offsets++=-1;
	704	*offsets++=-1;
	705	}
	706	sourceIndex=1;
	707	cnv->fromUChar32=c=0;
	708	}
	709
	710	if(c==0) {
	711	/* copy an even number of bytes for complete UChars */
	712	uint32_t count=2*length;
	713	if(count>targetCapacity) {
	714	count=targetCapacity&~1;
	715	}
	716	/* count is even */
	717	targetCapacity-=count;
	718	count>>=1;
	719	length-=count;
	720
	721	if(offsets==NULL) {
	722	while(count>0) {
	723	c=*source++;
	724	if(U16_IS_SINGLE(c)) {
	725	target[0]=(uint8_t)c;
	726	target[1]=(uint8_t)(c>>8);
	727	target+=2;
	728	} else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) {
	729	++source;
	730	--count;
	731	target[0]=(uint8_t)c;
	732	target[1]=(uint8_t)(c>>8);
	733	target[2]=(uint8_t)trail;
	734	target[3]=(uint8_t)(trail>>8);
	735	target+=4;
	736	} else {
	737	break;
	738	}
	739	--count;
	740	}
	741	} else {
	742	while(count>0) {
	743	c=*source++;
	744	if(U16_IS_SINGLE(c)) {
	745	target[0]=(uint8_t)c;
	746	target[1]=(uint8_t)(c>>8);
	747	target+=2;
	748	*offsets++=sourceIndex;
	749	*offsets++=sourceIndex++;
	750	} else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) {
	751	++source;
	752	--count;
	753	target[0]=(uint8_t)c;
	754	target[1]=(uint8_t)(c>>8);
	755	target[2]=(uint8_t)trail;
	756	target[3]=(uint8_t)(trail>>8);
	757	target+=4;
	758	*offsets++=sourceIndex;
	759	*offsets++=sourceIndex;
	760	*offsets++=sourceIndex;
	761	*offsets++=sourceIndex;
	762	sourceIndex+=2;
	763	} else {
	764	break;
	765	}
	766	--count;
	767	}
	768	}
	769
	770	if(count==0) {
	771	/* done with the loop for complete UChars */
	772	if(length>0 && targetCapacity>0) {
	773	/*
	774	* there is more input and some target capacity -
	775	* it must be targetCapacity==1 because otherwise
	776	* the above would have copied more;
	777	* prepare for overflow output
	778	*/
	779	if(U16_IS_SINGLE(c=*source++)) {
	780	overflow[0]=(char)c;
	781	overflow[1]=(char)(c>>8);
	782	length=2; /* 2 bytes to output */
	783	c=0;
	784	/* } else { keep c for surrogate handling, length will be set there */
	785	}
	786	} else {
	787	length=0;
	788	c=0;
	789	}
	790	} else {
	791	/* keep c for surrogate handling, length will be set there */
	792	targetCapacity+=2*count;
	793	}
	794	} else {
	795	length=0; /* from here on, length counts the bytes in overflow[] */
	796	}
	797
	798	if(c!=0) {
	799	/*
	800	* c is a surrogate, and
	801	* - source or target too short
	802	* - or the surrogate is unmatched
	803	*/
	804	length=0;
	805	if(U16_IS_SURROGATE_LEAD(c)) {
	806	if(source<pArgs->sourceLimit) {
	807	if(U16_IS_TRAIL(trail=*source)) {
	808	/* output the surrogate pair, will overflow (see conditions comment above) */
	809	++source;
	810	overflow[0]=(char)c;
	811	overflow[1]=(char)(c>>8);
	812	overflow[2]=(char)trail;
	813	overflow[3]=(char)(trail>>8);
	814	length=4; /* 4 bytes to output */
	815	c=0;
	816	} else {
	817	/* unmatched lead surrogate */
	818	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	819	}
	820	} else {
	821	/* see if the trail surrogate is in the next buffer */
	822	}
	823	} else {
	824	/* unmatched trail surrogate */
	825	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	826	}
	827	cnv->fromUChar32=c;
	828	}
	829
	830	if(length>0) {
	831	/* output length bytes with overflow (length>targetCapacity>0) */
	832	ucnv_fromUWriteBytes(cnv,
	833	overflow, length,
	834	&target, pArgs->targetLimit,
	835	&offsets, sourceIndex,
	836	pErrorCode);
	837	targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target);
	838	}
	839
	840	if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) {
	841	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	842	}
	843
	844	/* write back the updated pointers */
	845	pArgs->source=source;
	846	pArgs->target=target;
	847	pArgs->offsets=offsets;
	848	}
	849
	850	static void
	851	_UTF16LEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
	852	UErrorCode *pErrorCode) {
	853	UConverter *cnv;
	854	const uint8_t *source;
	855	UChar *target;
	856	int32_t *offsets;
	857
	858	uint32_t targetCapacity, length, count, sourceIndex;
	859	UChar c, trail;
	860
	861	if(pArgs->converter->mode<8) {
	862	_UTF16ToUnicodeWithOffsets(pArgs, pErrorCode);
	863	return;
	864	}
	865
	866	cnv=pArgs->converter;
	867	source=(const uint8_t *)pArgs->source;
	868	length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
	869	if(length<=0 && cnv->toUnicodeStatus==0) {
	870	/* no input, nothing to do */
	871	return;
	872	}
	873
	874	target=pArgs->target;
	875	if(target >= pArgs->targetLimit) {
	876	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	877	return;
	878	}
	879
	880	targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target);
	881	offsets=pArgs->offsets;
	882	sourceIndex=0;
	883	c=0;
	884
	885	/* complete a partial UChar or pair from the last call */
	886	if(cnv->toUnicodeStatus!=0) {
	887	/*
	888	* special case: single byte from a previous buffer,
	889	* where the byte turned out not to belong to a trail surrogate
	890	* and the preceding, unmatched lead surrogate was put into toUBytes[]
	891	* for error handling
	892	*/
	893	cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus;
	894	cnv->toULength=1;
	895	cnv->toUnicodeStatus=0;
	896	}
	897	if((count=cnv->toULength)!=0) {
	898	uint8_t *p=cnv->toUBytes;
	899	do {
	900	p[count++]=*source++;
	901	++sourceIndex;
	902	--length;
	903	if(count==2) {
	904	c=((UChar)p[1]<<8)\|p[0];
	905	if(U16_IS_SINGLE(c)) {
	906	/* output the BMP code point */
	907	*target++=c;
	908	if(offsets!=NULL) {
	909	*offsets++=-1;
	910	}
	911	--targetCapacity;
	912	count=0;
	913	c=0;
	914	break;
	915	} else if(U16_IS_SURROGATE_LEAD(c)) {
	916	/* continue collecting bytes for the trail surrogate */
	917	c=0; /* avoid unnecessary surrogate handling below */
	918	} else {
	919	/* fall through to error handling for an unmatched trail surrogate */
	920	break;
	921	}
	922	} else if(count==4) {
	923	c=((UChar)p[1]<<8)\|p[0];
	924	trail=((UChar)p[3]<<8)\|p[2];
	925	if(U16_IS_TRAIL(trail)) {
	926	/* output the surrogate pair */
	927	*target++=c;
	928	if(targetCapacity>=2) {
	929	*target++=trail;
	930	if(offsets!=NULL) {
	931	*offsets++=-1;
	932	*offsets++=-1;
	933	}
	934	targetCapacity-=2;
	935	} else /* targetCapacity==1 */ {
	936	targetCapacity=0;
	937	cnv->UCharErrorBuffer[0]=trail;
	938	cnv->UCharErrorBufferLength=1;
	939	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	940	}
	941	count=0;
	942	c=0;
	943	break;
	944	} else {
	945	/* unmatched lead surrogate, handle here for consistent toUBytes[] */
	946	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	947
	948	/* back out reading the code unit after it */
	949	if(((const uint8_t *)pArgs->source-source)>=2) {
	950	source-=2;
	951	} else {
	952	/*
	953	* if the trail unit's first byte was in a previous buffer, then
	954	* we need to put it into a special place because toUBytes[] will be
	955	* used for the lead unit's bytes
	956	*/
	957	cnv->toUnicodeStatus=0x100\|p[2];
	958	--source;
	959	}
	960	cnv->toULength=2;
	961
	962	/* write back the updated pointers */
	963	pArgs->source=(const char *)source;
	964	pArgs->target=target;
	965	pArgs->offsets=offsets;
	966	return;
	967	}
	968	}
	969	} while(length>0);
	970	cnv->toULength=(int8_t)count;
	971	}
	972
	973	/* copy an even number of bytes for complete UChars */
	974	count=2*targetCapacity;
	975	if(count>length) {
	976	count=length&~1;
	977	}
	978	if(c==0 && count>0) {
	979	length-=count;
	980	count>>=1;
	981	targetCapacity-=count;
	982	if(offsets==NULL) {
	983	do {
	984	c=((UChar)source[1]<<8)\|source[0];
	985	source+=2;
	986	if(U16_IS_SINGLE(c)) {
	987	*target++=c;
	988	} else if(U16_IS_SURROGATE_LEAD(c) && count>=2 &&
	989	U16_IS_TRAIL(trail=((UChar)source[1]<<8)\|source[0])
	990	) {
	991	source+=2;
	992	--count;
	993	*target++=c;
	994	*target++=trail;
	995	} else {
	996	break;
	997	}
	998	} while(--count>0);
	999	} else {
	1000	do {
	1001	c=((UChar)source[1]<<8)\|source[0];
	1002	source+=2;
	1003	if(U16_IS_SINGLE(c)) {
	1004	*target++=c;
	1005	*offsets++=sourceIndex;
	1006	sourceIndex+=2;
	1007	} else if(U16_IS_SURROGATE_LEAD(c) && count>=2 &&
	1008	U16_IS_TRAIL(trail=((UChar)source[1]<<8)\|source[0])
	1009	) {
	1010	source+=2;
	1011	--count;
	1012	*target++=c;
	1013	*target++=trail;
	1014	*offsets++=sourceIndex;
	1015	*offsets++=sourceIndex;
	1016	sourceIndex+=4;
	1017	} else {
	1018	break;
	1019	}
	1020	} while(--count>0);
	1021	}
	1022
	1023	if(count==0) {
	1024	/* done with the loop for complete UChars */
	1025	c=0;
	1026	} else {
	1027	/* keep c for surrogate handling, trail will be set there */
	1028	length+=2(count-1); / one more byte pair was consumed than count decremented */
	1029	targetCapacity+=count;
	1030	}
	1031	}
	1032
	1033	if(c!=0) {
	1034	/*
	1035	* c is a surrogate, and
	1036	* - source or target too short
	1037	* - or the surrogate is unmatched
	1038	*/
	1039	cnv->toUBytes[0]=(uint8_t)c;
	1040	cnv->toUBytes[1]=(uint8_t)(c>>8);
	1041	cnv->toULength=2;
	1042
	1043	if(U16_IS_SURROGATE_LEAD(c)) {
	1044	if(length>=2) {
	1045	if(U16_IS_TRAIL(trail=((UChar)source[1]<<8)\|source[0])) {
	1046	/* output the surrogate pair, will overflow (see conditions comment above) */
	1047	source+=2;
	1048	length-=2;
	1049	*target++=c;
	1050	if(offsets!=NULL) {
	1051	*offsets++=sourceIndex;
	1052	}
	1053	cnv->UCharErrorBuffer[0]=trail;
	1054	cnv->UCharErrorBufferLength=1;
	1055	cnv->toULength=0;
	1056	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	1057	} else {
	1058	/* unmatched lead surrogate */
	1059	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	1060	}
	1061	} else {
	1062	/* see if the trail surrogate is in the next buffer */
	1063	}
	1064	} else {
	1065	/* unmatched trail surrogate */
	1066	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	1067	}
	1068	}
	1069
	1070	if(U_SUCCESS(*pErrorCode)) {
	1071	/* check for a remaining source byte */
	1072	if(length>0) {
	1073	if(targetCapacity==0) {
	1074	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	1075	} else {
	1076	/* it must be length==1 because otherwise the above would have copied more */
	1077	cnv->toUBytes[cnv->toULength++]=*source++;
	1078	}
	1079	}
	1080	}
	1081
	1082	/* write back the updated pointers */
	1083	pArgs->source=(const char *)source;
	1084	pArgs->target=target;
	1085	pArgs->offsets=offsets;
	1086	}
	1087
	1088	static UChar32
	1089	_UTF16LEGetNextUChar(UConverterToUnicodeArgs pArgs, UErrorCode err) {
	1090	const uint8_t s, sourceLimit;
	1091	UChar32 c;
	1092
	1093	if(pArgs->converter->mode<8) {
	1094	return UCNV_GET_NEXT_UCHAR_USE_TO_U;
	1095	}
	1096
	1097	s=(const uint8_t *)pArgs->source;
	1098	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
	1099
	1100	if(s>=sourceLimit) {
	1101	/* no input */
	1102	*err=U_INDEX_OUTOFBOUNDS_ERROR;
	1103	return 0xffff;
	1104	}
	1105
	1106	if(s+2>sourceLimit) {
	1107	/* only one byte: truncated UChar */
	1108	pArgs->converter->toUBytes[0]=*s++;
	1109	pArgs->converter->toULength=1;
	1110	pArgs->source=(const char *)s;
	1111	*err = U_TRUNCATED_CHAR_FOUND;
	1112	return 0xffff;
	1113	}
	1114
	1115	/* get one UChar */
	1116	c=((UChar32)s[1]<<8)\|*s;
	1117	s+=2;
	1118
	1119	/* check for a surrogate pair */
	1120	if(U_IS_SURROGATE(c)) {
	1121	if(U16_IS_SURROGATE_LEAD(c)) {
	1122	if(s+2<=sourceLimit) {
	1123	UChar trail;
	1124
	1125	/* get a second UChar and see if it is a trail surrogate */
	1126	trail=((UChar)s[1]<<8)\|*s;
	1127	if(U16_IS_TRAIL(trail)) {
	1128	c=U16_GET_SUPPLEMENTARY(c, trail);
	1129	s+=2;
	1130	} else {
	1131	/* unmatched lead surrogate */
	1132	c=-2;
	1133	}
	1134	} else {
	1135	/* too few (2 or 3) bytes for a surrogate pair: truncated code point */
	1136	uint8_t *bytes=pArgs->converter->toUBytes;
	1137	s-=2;
	1138	pArgs->converter->toULength=(int8_t)(sourceLimit-s);
	1139	do {
	1140	bytes++=s++;
	1141	} while(s<sourceLimit);
	1142
	1143	c=0xffff;
	1144	*err=U_TRUNCATED_CHAR_FOUND;
	1145	}
	1146	} else {
	1147	/* unmatched trail surrogate */
	1148	c=-2;
	1149	}
	1150
	1151	if(c<0) {
	1152	/* write the unmatched surrogate */
	1153	uint8_t *bytes=pArgs->converter->toUBytes;
	1154	pArgs->converter->toULength=2;
	1155	bytes=(s-2);
	1156	bytes[1]=*(s-1);
	1157
	1158	c=0xffff;
	1159	*err=U_ILLEGAL_CHAR_FOUND;
	1160	}
	1161	}
	1162
	1163	pArgs->source=(const char *)s;
	1164	return c;
	1165	}
	1166
	1167	static void
	1168	_UTF16LEReset(UConverter *cnv, UConverterResetChoice choice) {
	1169	if(choice<=UCNV_RESET_TO_UNICODE) {
	1170	/* reset toUnicode state */
	1171	if(UCNV_GET_VERSION(cnv)==0) {
	1172	cnv->mode=8; /* no BOM handling */
	1173	} else {
	1174	cnv->mode=0; /* Java-specific "UnicodeLittle" requires LE BOM or no BOM */
	1175	}
	1176	}
	1177	if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) {
	1178	/* reset fromUnicode for "UnicodeLittle": prepare to output the UTF-16LE BOM */
	1179	cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
	1180	}
	1181	}
	1182
	1183	static void
	1184	_UTF16LEOpen(UConverter *cnv,
	1185	UConverterLoadArgs *pArgs,
	1186	UErrorCode *pErrorCode) {
	1187	if(UCNV_GET_VERSION(cnv)<=1) {
	1188	_UTF16LEReset(cnv, UCNV_RESET_BOTH);
	1189	} else {
	1190	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
	1191	}
	1192	}
	1193
	1194	static const char *
	1195	_UTF16LEGetName(const UConverter *cnv) {
	1196	if(UCNV_GET_VERSION(cnv)==0) {
	1197	return "UTF-16LE";
	1198	} else {
	1199	return "UTF-16LE,version=1";
	1200	}
	1201	}
	1202
	1203	static const UConverterImpl _UTF16LEImpl={
	1204	UCNV_UTF16_LittleEndian,
	1205
	1206	NULL,
	1207	NULL,
	1208
	1209	_UTF16LEOpen,
	1210	NULL,
	1211	_UTF16LEReset,
	1212
	1213	_UTF16LEToUnicodeWithOffsets,
	1214	_UTF16LEToUnicodeWithOffsets,
	1215	_UTF16LEFromUnicodeWithOffsets,
	1216	_UTF16LEFromUnicodeWithOffsets,
	1217	_UTF16LEGetNextUChar,
	1218
	1219	NULL,
	1220	_UTF16LEGetName,
	1221	NULL,
	1222	NULL,
	1223	ucnv_getNonSurrogateUnicodeSet
	1224	};
	1225
	1226
	1227	static const UConverterStaticData _UTF16LEStaticData={
	1228	sizeof(UConverterStaticData),
	1229	"UTF-16LE",
	1230	1202, UCNV_IBM, UCNV_UTF16_LittleEndian, 2, 2,
	1231	{ 0xfd, 0xff, 0, 0 },2,FALSE,FALSE,
	1232	0,
	1233	0,
	1234	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
	1235	};
	1236
	1237
	1238	const UConverterSharedData _UTF16LEData={
	1239	sizeof(UConverterSharedData), ~((uint32_t) 0),
	1240	NULL, NULL, &_UTF16LEStaticData, FALSE, &_UTF16LEImpl,
	1241	0
	1242	};
	1243
	1244	/* UTF-16 (Detect BOM) ------------------------------------------------------ */
	1245
	1246	/*
	1247	* Detect a BOM at the beginning of the stream and select UTF-16BE or UTF-16LE
	1248	* accordingly.
	1249	* This is a simpler version of the UTF-32 converter, with
	1250	* fewer states for shorter BOMs.
	1251	*
	1252	* State values:
	1253	* 0 initial state
	1254	* 1 saw first byte
	1255	* 2..5 -
	1256	* 6..7 see _UTF16ToUnicodeWithOffsets() comments in state 1
	1257	* 8 UTF-16BE mode
	1258	* 9 UTF-16LE mode
	1259	*
	1260	* During detection: state==number of initial bytes seen so far.
	1261	*
	1262	* On output, emit U+FEFF as the first code point.
	1263	*
	1264	* Variants:
	1265	* - UTF-16,version=1 (Java "Unicode" encoding) treats a missing BOM as an error.
	1266	* - UTF-16BE,version=1 (Java "UnicodeBig" encoding) and
	1267	* UTF-16LE,version=1 (Java "UnicodeLittle" encoding) treat a reverse BOM as an error.
	1268	*/
	1269
	1270	static void
	1271	_UTF16Reset(UConverter *cnv, UConverterResetChoice choice) {
	1272	if(choice<=UCNV_RESET_TO_UNICODE) {
	1273	/* reset toUnicode: state=0 */
	1274	cnv->mode=0;
	1275	}
	1276	if(choice!=UCNV_RESET_TO_UNICODE) {
	1277	/* reset fromUnicode: prepare to output the UTF-16PE BOM */
	1278	cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
	1279	}
	1280	}
	1281
	1282	static const UConverterSharedData _UTF16v2Data;
	1283
	1284	static void
	1285	_UTF16Open(UConverter *cnv,
	1286	UConverterLoadArgs *pArgs,
	1287	UErrorCode *pErrorCode) {
	1288	if(UCNV_GET_VERSION(cnv)<=2) {
	1289	if(UCNV_GET_VERSION(cnv)==2 && !pArgs->onlyTestIsLoadable) {
	1290	/*
	1291	* Switch implementation, and switch the staticData that's different
	1292	* and was copied into the UConverter.
	1293	* (See ucnv_createConverterFromSharedData() in ucnv_bld.c.)
	1294	* UTF-16,version=2 fromUnicode() always writes a big-endian byte stream.
	1295	*/
	1296	cnv->sharedData=(UConverterSharedData*)&_UTF16v2Data;
	1297	uprv_memcpy(cnv->subChars, _UTF16v2Data.staticData->subChar, UCNV_MAX_SUBCHAR_LEN);
	1298	}
	1299	_UTF16Reset(cnv, UCNV_RESET_BOTH);
	1300	} else {
	1301	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
	1302	}
	1303	}
	1304
	1305	static const char *
	1306	_UTF16GetName(const UConverter *cnv) {
	1307	if(UCNV_GET_VERSION(cnv)==0) {
	1308	return "UTF-16";
	1309	} else if(UCNV_GET_VERSION(cnv)==1) {
	1310	return "UTF-16,version=1";
	1311	} else {
	1312	return "UTF-16,version=2";
	1313	}
	1314	}
	1315
	1316	const UConverterSharedData _UTF16Data;
	1317
	1318	#define IS_UTF16BE(cnv) ((cnv)->sharedData==&_UTF16BEData)
	1319	#define IS_UTF16LE(cnv) ((cnv)->sharedData==&_UTF16LEData)
	1320	#define IS_UTF16(cnv) ((cnv)->sharedData==&_UTF16Data \|\| (cnv)->sharedData==&_UTF16v2Data)
	1321
	1322	static void
	1323	_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
	1324	UErrorCode *pErrorCode) {
	1325	UConverter *cnv=pArgs->converter;
	1326	const char *source=pArgs->source;
	1327	const char *sourceLimit=pArgs->sourceLimit;
	1328	int32_t *offsets=pArgs->offsets;
	1329
	1330	int32_t state, offsetDelta;
	1331	uint8_t b;
	1332
	1333	state=cnv->mode;
	1334
	1335	/*
	1336	* If we detect a BOM in this buffer, then we must add the BOM size to the
	1337	* offsets because the actual converter function will not see and count the BOM.
	1338	* offsetDelta will have the number of the BOM bytes that are in the current buffer.
	1339	*/
	1340	offsetDelta=0;
	1341
	1342	while(source<sourceLimit && U_SUCCESS(*pErrorCode)) {
	1343	switch(state) {
	1344	case 0:
	1345	cnv->toUBytes[0]=(uint8_t)*source++;
	1346	cnv->toULength=1;
	1347	state=1;
	1348	break;
	1349	case 1:
	1350	/*
	1351	* Only inside this switch case can the state variable
	1352	* temporarily take two additional values:
	1353	* 6: BOM error, continue with BE
	1354	* 7: BOM error, continue with LE
	1355	*/
	1356	b=*source;
	1357	if(cnv->toUBytes[0]==0xfe && b==0xff) {
	1358	if(IS_UTF16LE(cnv)) {
	1359	state=7; /* illegal reverse BOM for Java "UnicodeLittle" */
	1360	} else {
	1361	state=8; /* detect UTF-16BE */
	1362	}
	1363	} else if(cnv->toUBytes[0]==0xff && b==0xfe) {
	1364	if(IS_UTF16BE(cnv)) {
	1365	state=6; /* illegal reverse BOM for Java "UnicodeBig" */
	1366	} else {
	1367	state=9; /* detect UTF-16LE */
	1368	}
	1369	} else if((IS_UTF16(cnv) && UCNV_GET_VERSION(cnv)==1)) {
	1370	state=6; /* illegal missing BOM for Java "Unicode" */
	1371	}
	1372	if(state>=8) {
	1373	/* BOM detected, consume it */
	1374	++source;
	1375	cnv->toULength=0;
	1376	offsetDelta=(int32_t)(source-pArgs->source);
	1377	} else if(state<6) {
	1378	/* ok: no BOM, and not a reverse BOM */
	1379	if(source!=pArgs->source) {
	1380	/* reset the source for a correct first offset */
	1381	source=pArgs->source;
	1382	cnv->toULength=0;
	1383	}
	1384	if(IS_UTF16LE(cnv)) {
	1385	/* Make Java "UnicodeLittle" default to LE. */
	1386	state=9;
	1387	} else {
	1388	/* Make standard UTF-16 and Java "UnicodeBig" default to BE. */
	1389	state=8;
	1390	}
	1391	} else {
	1392	/*
	1393	* error: missing BOM, or reverse BOM
	1394	* UTF-16,version=1: Java-specific "Unicode" requires a BOM.
	1395	* UTF-16BE,version=1: Java-specific "UnicodeBig" requires a BE BOM or no BOM.
	1396	* UTF-16LE,version=1: Java-specific "UnicodeLittle" requires an LE BOM or no BOM.
	1397	*/
	1398	/* report the non-BOM or reverse BOM as an illegal sequence */
	1399	cnv->toUBytes[1]=b;
	1400	cnv->toULength=2;
	1401	pArgs->source=source+1;
	1402	/* continue with conversion if the callback resets the error */
	1403	/*
	1404	* Make Java "Unicode" default to BE like standard UTF-16.
	1405	* Make Java "UnicodeBig" and "UnicodeLittle" default
	1406	* to their normal endiannesses.
	1407	*/
	1408	cnv->mode=state+2;
	1409	*pErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE;
	1410	return;
	1411	}
	1412	/* convert the rest of the stream */
	1413	cnv->mode=state;
	1414	continue;
	1415	case 8:
	1416	/* call UTF-16BE */
	1417	pArgs->source=source;
	1418	_UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode);
	1419	source=pArgs->source;
	1420	break;
	1421	case 9:
	1422	/* call UTF-16LE */
	1423	pArgs->source=source;
	1424	_UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode);
	1425	source=pArgs->source;
	1426	break;
	1427	default:
	1428	break; /* does not occur */
	1429	}
	1430	}
	1431
	1432	/* add BOM size to offsets - see comment at offsetDelta declaration */
	1433	if(offsets!=NULL && offsetDelta!=0) {
	1434	int32_t *offsetsLimit=pArgs->offsets;
	1435	while(offsets<offsetsLimit) {
	1436	*offsets++ += offsetDelta;
	1437	}
	1438	}
	1439
	1440	pArgs->source=source;
	1441
	1442	if(source==sourceLimit && pArgs->flush) {
	1443	/* handle truncated input */
	1444	switch(state) {
	1445	case 0:
	1446	break; /* no input at all, nothing to do */
	1447	case 8:
	1448	_UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode);
	1449	break;
	1450	case 9:
	1451	_UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode);
	1452	break;
	1453	default:
	1454	/* 0<state<8: framework will report truncation, nothing to do here */
	1455	break;
	1456	}
	1457	}
	1458
	1459	cnv->mode=state;
	1460	}
	1461
	1462	static UChar32
	1463	_UTF16GetNextUChar(UConverterToUnicodeArgs *pArgs,
	1464	UErrorCode *pErrorCode) {
	1465	switch(pArgs->converter->mode) {
	1466	case 8:
	1467	return _UTF16BEGetNextUChar(pArgs, pErrorCode);
	1468	case 9:
	1469	return _UTF16LEGetNextUChar(pArgs, pErrorCode);
	1470	default:
	1471	return UCNV_GET_NEXT_UCHAR_USE_TO_U;
	1472	}
	1473	}
	1474
	1475	static const UConverterImpl _UTF16Impl = {
	1476	UCNV_UTF16,
	1477
	1478	NULL,
	1479	NULL,
	1480
	1481	_UTF16Open,
	1482	NULL,
	1483	_UTF16Reset,
	1484
	1485	_UTF16ToUnicodeWithOffsets,
	1486	_UTF16ToUnicodeWithOffsets,
	1487	_UTF16PEFromUnicodeWithOffsets,
	1488	_UTF16PEFromUnicodeWithOffsets,
	1489	_UTF16GetNextUChar,
	1490
	1491	NULL, /* ### TODO implement getStarters for all Unicode encodings?! */
	1492	_UTF16GetName,
	1493	NULL,
	1494	NULL,
	1495	ucnv_getNonSurrogateUnicodeSet
	1496	};
	1497
	1498	static const UConverterStaticData _UTF16StaticData = {
	1499	sizeof(UConverterStaticData),
	1500	"UTF-16",
	1501	1204, /* CCSID for BOM sensitive UTF-16 */
	1502	UCNV_IBM, UCNV_UTF16, 2, 2,
	1503	#if U_IS_BIG_ENDIAN
	1504	{ 0xff, 0xfd, 0, 0 }, 2,
	1505	#else
	1506	{ 0xfd, 0xff, 0, 0 }, 2,
	1507	#endif
	1508	FALSE, FALSE,
	1509	0,
	1510	0,
	1511	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
	1512	};
	1513
	1514	const UConverterSharedData _UTF16Data = {
	1515	sizeof(UConverterSharedData), ~((uint32_t) 0),
	1516	NULL, NULL, &_UTF16StaticData, FALSE, &_UTF16Impl,
	1517	0
	1518	};
	1519
	1520	static const UConverterImpl _UTF16v2Impl = {
	1521	UCNV_UTF16,
	1522
	1523	NULL,
	1524	NULL,
	1525
	1526	_UTF16Open,
	1527	NULL,
	1528	_UTF16Reset,
	1529
	1530	_UTF16ToUnicodeWithOffsets,
	1531	_UTF16ToUnicodeWithOffsets,
	1532	_UTF16BEFromUnicodeWithOffsets,
	1533	_UTF16BEFromUnicodeWithOffsets,
	1534	_UTF16GetNextUChar,
	1535
	1536	NULL, /* ### TODO implement getStarters for all Unicode encodings?! */
	1537	_UTF16GetName,
	1538	NULL,
	1539	NULL,
	1540	ucnv_getNonSurrogateUnicodeSet
	1541	};
	1542
	1543	static const UConverterStaticData _UTF16v2StaticData = {
	1544	sizeof(UConverterStaticData),
	1545	"UTF-16,version=2",
	1546	1204, /* CCSID for BOM sensitive UTF-16 */
	1547	UCNV_IBM, UCNV_UTF16, 2, 2,
	1548	{ 0xff, 0xfd, 0, 0 }, 2,
	1549	FALSE, FALSE,
	1550	0,
	1551	0,
	1552	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
	1553	};
	1554
	1555	static const UConverterSharedData _UTF16v2Data = {
	1556	sizeof(UConverterSharedData), ~((uint32_t) 0),
	1557	NULL, NULL, &_UTF16v2StaticData, FALSE, &_UTF16v2Impl,
	1558	0
	1559	};
	1560
	1561	#endif