git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	******************************************************************************
	3	*
	4	* Copyright (C) 2002-2011, International Business Machines
	5	* Corporation and others. All Rights Reserved.
	6	*
	7	******************************************************************************
	8	* file name: ucnvbocu.cpp
	9	* encoding: US-ASCII
	10	* tab size: 8 (not used)
	11	* indentation:4
	12	*
	13	* created on: 2002mar27
	14	* created by: Markus W. Scherer
	15	*
	16	* This is an implementation of the Binary Ordered Compression for Unicode,
	17	* in its MIME-friendly form as defined in http://www.unicode.org/notes/tn6/
	18	*/
	19
	20	#include "unicode/utypes.h"
	21
	22	#if !UCONFIG_NO_CONVERSION
	23
	24	#include "unicode/ucnv.h"
	25	#include "unicode/ucnv_cb.h"
	26	#include "unicode/utf16.h"
	27	#include "putilimp.h"
	28	#include "ucnv_bld.h"
	29	#include "ucnv_cnv.h"
	30	#include "uassert.h"
	31
	32	/* BOCU-1 constants and macros ---------------------------------------------- */
	33
	34	/*
	35	* BOCU-1 encodes the code points of a Unicode string as
	36	* a sequence of byte-encoded differences (slope detection),
	37	* preserving lexical order.
	38	*
	39	* Optimize the difference-taking for runs of Unicode text within
	40	* small scripts:
	41	*
	42	* Most small scripts are allocated within aligned 128-blocks of Unicode
	43	* code points. Lexical order is preserved if the "previous code point" state
	44	* is always moved into the middle of such a block.
	45	*
	46	* Additionally, "prev" is moved from anywhere in the Unihan and Hangul
	47	* areas into the middle of those areas.
	48	*
	49	* C0 control codes and space are encoded with their US-ASCII bytes.
	50	* "prev" is reset for C0 controls but not for space.
	51	*/
	52
	53	/* initial value for "prev": middle of the ASCII range */
	54	#define BOCU1_ASCII_PREV 0x40
	55
	56	/* bounding byte values for differences */
	57	#define BOCU1_MIN 0x21
	58	#define BOCU1_MIDDLE 0x90
	59	#define BOCU1_MAX_LEAD 0xfe
	60	#define BOCU1_MAX_TRAIL 0xff
	61	#define BOCU1_RESET 0xff
	62
	63	/* number of lead bytes */
	64	#define BOCU1_COUNT (BOCU1_MAX_LEAD-BOCU1_MIN+1)
	65
	66	/* adjust trail byte counts for the use of some C0 control byte values */
	67	#define BOCU1_TRAIL_CONTROLS_COUNT 20
	68	#define BOCU1_TRAIL_BYTE_OFFSET (BOCU1_MIN-BOCU1_TRAIL_CONTROLS_COUNT)
	69
	70	/* number of trail bytes */
	71	#define BOCU1_TRAIL_COUNT ((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT)
	72
	73	/*
	74	* number of positive and negative single-byte codes
	75	* (counting 0==BOCU1_MIDDLE among the positive ones)
	76	*/
	77	#define BOCU1_SINGLE 64
	78
	79	/* number of lead bytes for positive and negative 2/3/4-byte sequences */
	80	#define BOCU1_LEAD_2 43
	81	#define BOCU1_LEAD_3 3
	82	#define BOCU1_LEAD_4 1
	83
	84	/* The difference value range for single-byters. */
	85	#define BOCU1_REACH_POS_1 (BOCU1_SINGLE-1)
	86	#define BOCU1_REACH_NEG_1 (-BOCU1_SINGLE)
	87
	88	/* The difference value range for double-byters. */
	89	#define BOCU1_REACH_POS_2 (BOCU1_REACH_POS_1+BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
	90	#define BOCU1_REACH_NEG_2 (BOCU1_REACH_NEG_1-BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
	91
	92	/* The difference value range for 3-byters. */
	93	#define BOCU1_REACH_POS_3 \
	94	(BOCU1_REACH_POS_2+BOCU1_LEAD_3BOCU1_TRAIL_COUNTBOCU1_TRAIL_COUNT)
	95
	96	#define BOCU1_REACH_NEG_3 (BOCU1_REACH_NEG_2-BOCU1_LEAD_3BOCU1_TRAIL_COUNTBOCU1_TRAIL_COUNT)
	97
	98	/* The lead byte start values. */
	99	#define BOCU1_START_POS_2 (BOCU1_MIDDLE+BOCU1_REACH_POS_1+1)
	100	#define BOCU1_START_POS_3 (BOCU1_START_POS_2+BOCU1_LEAD_2)
	101	#define BOCU1_START_POS_4 (BOCU1_START_POS_3+BOCU1_LEAD_3)
	102	/* ==BOCU1_MAX_LEAD */
	103
	104	#define BOCU1_START_NEG_2 (BOCU1_MIDDLE+BOCU1_REACH_NEG_1)
	105	#define BOCU1_START_NEG_3 (BOCU1_START_NEG_2-BOCU1_LEAD_2)
	106	#define BOCU1_START_NEG_4 (BOCU1_START_NEG_3-BOCU1_LEAD_3)
	107	/* ==BOCU1_MIN+1 */
	108
	109	/* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */
	110	#define BOCU1_LENGTH_FROM_LEAD(lead) \
	111	((BOCU1_START_NEG_2<=(lead) && (lead)<BOCU1_START_POS_2) ? 1 : \
	112	(BOCU1_START_NEG_3<=(lead) && (lead)<BOCU1_START_POS_3) ? 2 : \
	113	(BOCU1_START_NEG_4<=(lead) && (lead)<BOCU1_START_POS_4) ? 3 : 4)
	114
	115	/* The length of a byte sequence, according to its packed form. */
	116	#define BOCU1_LENGTH_FROM_PACKED(packed) \
	117	((uint32_t)(packed)<0x04000000 ? (packed)>>24 : 4)
	118
	119	/*
	120	* 12 commonly used C0 control codes (and space) are only used to encode
	121	* themselves directly,
	122	* which makes BOCU-1 MIME-usable and reasonably safe for
	123	* ASCII-oriented software.
	124	*
	125	* These controls are
	126	* 0 NUL
	127	*
	128	* 7 BEL
	129	* 8 BS
	130	*
	131	* 9 TAB
	132	* a LF
	133	* b VT
	134	* c FF
	135	* d CR
	136	*
	137	* e SO
	138	* f SI
	139	*
	140	* 1a SUB
	141	* 1b ESC
	142	*
	143	* The other 20 C0 controls are also encoded directly (to preserve order)
	144	* but are also used as trail bytes in difference encoding
	145	* (for better compression).
	146	*/
	147	#define BOCU1_TRAIL_TO_BYTE(t) ((t)>=BOCU1_TRAIL_CONTROLS_COUNT ? (t)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[t])
	148
	149	/*
	150	* Byte value map for control codes,
	151	* from external byte values 0x00..0x20
	152	* to trail byte values 0..19 (0..0x13) as used in the difference calculation.
	153	* External byte values that are illegal as trail bytes are mapped to -1.
	154	*/
	155	static const int8_t
	156	bocu1ByteToTrail[BOCU1_MIN]={
	157	/* 0 1 2 3 4 5 6 7 */
	158	-1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1,
	159
	160	/* 8 9 a b c d e f */
	161	-1, -1, -1, -1, -1, -1, -1, -1,
	162
	163	/* 10 11 12 13 14 15 16 17 */
	164	0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
	165
	166	/* 18 19 1a 1b 1c 1d 1e 1f */
	167	0x0e, 0x0f, -1, -1, 0x10, 0x11, 0x12, 0x13,
	168
	169	/* 20 */
	170	-1
	171	};
	172
	173	/*
	174	* Byte value map for control codes,
	175	* from trail byte values 0..19 (0..0x13) as used in the difference calculation
	176	* to external byte values 0x00..0x20.
	177	*/
	178	static const int8_t
	179	bocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={
	180	/* 0 1 2 3 4 5 6 7 */
	181	0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11,
	182
	183	/* 8 9 a b c d e f */
	184	0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
	185
	186	/* 10 11 12 13 */
	187	0x1c, 0x1d, 0x1e, 0x1f
	188	};
	189
	190	/**
	191	* Integer division and modulo with negative numerators
	192	* yields negative modulo results and quotients that are one more than
	193	* what we need here.
	194	* This macro adjust the results so that the modulo-value m is always >=0.
	195	*
	196	* For positive n, the if() condition is always FALSE.
	197	*
	198	* @param n Number to be split into quotient and rest.
	199	* Will be modified to contain the quotient.
	200	* @param d Divisor.
	201	* @param m Output variable for the rest (modulo result).
	202	*/
	203	#define NEGDIVMOD(n, d, m) { \
	204	(m)=(n)%(d); \
	205	(n)/=(d); \
	206	if((m)<0) { \
	207	--(n); \
	208	(m)+=(d); \
	209	} \
	210	}
	211
	212	/* Faster versions of packDiff() for single-byte-encoded diff values. */
	213
	214	/** Is a diff value encodable in a single byte? */
	215	#define DIFF_IS_SINGLE(diff) (BOCU1_REACH_NEG_1<=(diff) && (diff)<=BOCU1_REACH_POS_1)
	216
	217	/** Encode a diff value in a single byte. */
	218	#define PACK_SINGLE_DIFF(diff) (BOCU1_MIDDLE+(diff))
	219
	220	/** Is a diff value encodable in two bytes? */
	221	#define DIFF_IS_DOUBLE(diff) (BOCU1_REACH_NEG_2<=(diff) && (diff)<=BOCU1_REACH_POS_2)
	222
	223	/* BOCU-1 implementation functions ------------------------------------------ */
	224
	225	#define BOCU1_SIMPLE_PREV(c) (((c)&~0x7f)+BOCU1_ASCII_PREV)
	226
	227	/**
	228	* Compute the next "previous" value for differencing
	229	* from the current code point.
	230	*
	231	* @param c current code point, 0x3040..0xd7a3 (rest handled by macro below)
	232	* @return "previous code point" state value
	233	*/
	234	static inline int32_t
	235	bocu1Prev(int32_t c) {
	236	/* compute new prev */
	237	if(/* 0x3040<=c && */ c<=0x309f) {
	238	/* Hiragana is not 128-aligned */
	239	return 0x3070;
	240	} else if(0x4e00<=c && c<=0x9fa5) {
	241	/* CJK Unihan */
	242	return 0x4e00-BOCU1_REACH_NEG_2;
	243	} else if(0xac00<=c /* && c<=0xd7a3 */) {
	244	/* Korean Hangul */
	245	return (0xd7a3+0xac00)/2;
	246	} else {
	247	/* mostly small scripts */
	248	return BOCU1_SIMPLE_PREV(c);
	249	}
	250	}
	251
	252	/** Fast version of bocu1Prev() for most scripts. */
	253	#define BOCU1_PREV(c) ((c)<0x3040 \|\| (c)>0xd7a3 ? BOCU1_SIMPLE_PREV(c) : bocu1Prev(c))
	254
	255	/*
	256	* The BOCU-1 converter uses the standard setup code in ucnv.c/ucnv_bld.c.
	257	* The UConverter fields are used as follows:
	258	*
	259	* fromUnicodeStatus encoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
	260	*
	261	* toUnicodeStatus decoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
	262	* mode decoder's incomplete (diff<<2)\|count (ignored when toULength==0)
	263	*/
	264
	265	/* BOCU-1-from-Unicode conversion functions --------------------------------- */
	266
	267	/**
	268	* Encode a difference -0x10ffff..0x10ffff in 1..4 bytes
	269	* and return a packed integer with them.
	270	*
	271	* The encoding favors small absolute differences with short encodings
	272	* to compress runs of same-script characters.
	273	*
	274	* Optimized version with unrolled loops and fewer floating-point operations
	275	* than the standard packDiff().
	276	*
	277	* @param diff difference value -0x10ffff..0x10ffff
	278	* @return
	279	* 0x010000zz for 1-byte sequence zz
	280	* 0x0200yyzz for 2-byte sequence yy zz
	281	* 0x03xxyyzz for 3-byte sequence xx yy zz
	282	* 0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03)
	283	*/
	284	static int32_t
	285	packDiff(int32_t diff) {
	286	int32_t result, m;
	287
	288	U_ASSERT(!DIFF_IS_SINGLE(diff)); /* assume we won't be called where diff==BOCU1_REACH_NEG_1=-64 */
	289	if(diff>=BOCU1_REACH_NEG_1) {
	290	/* mostly positive differences, and single-byte negative ones */
	291	#if 0 /* single-byte case handled in macros, see below */
	292	if(diff<=BOCU1_REACH_POS_1) {
	293	/* single byte */
	294	return 0x01000000\|(BOCU1_MIDDLE+diff);
	295	} else
	296	#endif
	297	if(diff<=BOCU1_REACH_POS_2) {
	298	/* two bytes */
	299	diff-=BOCU1_REACH_POS_1+1;
	300	result=0x02000000;
	301
	302	m=diff%BOCU1_TRAIL_COUNT;
	303	diff/=BOCU1_TRAIL_COUNT;
	304	result\|=BOCU1_TRAIL_TO_BYTE(m);
	305
	306	result\|=(BOCU1_START_POS_2+diff)<<8;
	307	} else if(diff<=BOCU1_REACH_POS_3) {
	308	/* three bytes */
	309	diff-=BOCU1_REACH_POS_2+1;
	310	result=0x03000000;
	311
	312	m=diff%BOCU1_TRAIL_COUNT;
	313	diff/=BOCU1_TRAIL_COUNT;
	314	result\|=BOCU1_TRAIL_TO_BYTE(m);
	315
	316	m=diff%BOCU1_TRAIL_COUNT;
	317	diff/=BOCU1_TRAIL_COUNT;
	318	result\|=BOCU1_TRAIL_TO_BYTE(m)<<8;
	319
	320	result\|=(BOCU1_START_POS_3+diff)<<16;
	321	} else {
	322	/* four bytes */
	323	diff-=BOCU1_REACH_POS_3+1;
	324
	325	m=diff%BOCU1_TRAIL_COUNT;
	326	diff/=BOCU1_TRAIL_COUNT;
	327	result=BOCU1_TRAIL_TO_BYTE(m);
	328
	329	m=diff%BOCU1_TRAIL_COUNT;
	330	diff/=BOCU1_TRAIL_COUNT;
	331	result\|=BOCU1_TRAIL_TO_BYTE(m)<<8;
	332
	333	/*
	334	* We know that / and % would deliver quotient 0 and rest=diff.
	335	* Avoid division and modulo for performance.
	336	*/
	337	result\|=BOCU1_TRAIL_TO_BYTE(diff)<<16;
	338
	339	result\|=((uint32_t)BOCU1_START_POS_4)<<24;
	340	}
	341	} else {
	342	/* two- to four-byte negative differences */
	343	if(diff>=BOCU1_REACH_NEG_2) {
	344	/* two bytes */
	345	diff-=BOCU1_REACH_NEG_1;
	346	result=0x02000000;
	347
	348	NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
	349	result\|=BOCU1_TRAIL_TO_BYTE(m);
	350
	351	result\|=(BOCU1_START_NEG_2+diff)<<8;
	352	} else if(diff>=BOCU1_REACH_NEG_3) {
	353	/* three bytes */
	354	diff-=BOCU1_REACH_NEG_2;
	355	result=0x03000000;
	356
	357	NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
	358	result\|=BOCU1_TRAIL_TO_BYTE(m);
	359
	360	NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
	361	result\|=BOCU1_TRAIL_TO_BYTE(m)<<8;
	362
	363	result\|=(BOCU1_START_NEG_3+diff)<<16;
	364	} else {
	365	/* four bytes */
	366	diff-=BOCU1_REACH_NEG_3;
	367
	368	NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
	369	result=BOCU1_TRAIL_TO_BYTE(m);
	370
	371	NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
	372	result\|=BOCU1_TRAIL_TO_BYTE(m)<<8;
	373
	374	/*
	375	* We know that NEGDIVMOD would deliver
	376	* quotient -1 and rest=diff+BOCU1_TRAIL_COUNT.
	377	* Avoid division and modulo for performance.
	378	*/
	379	m=diff+BOCU1_TRAIL_COUNT;
	380	result\|=BOCU1_TRAIL_TO_BYTE(m)<<16;
	381
	382	result\|=BOCU1_MIN<<24;
	383	}
	384	}
	385	return result;
	386	}
	387
	388
	389	static void
	390	_Bocu1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
	391	UErrorCode *pErrorCode) {
	392	UConverter *cnv;
	393	const UChar source, sourceLimit;
	394	uint8_t *target;
	395	int32_t targetCapacity;
	396	int32_t *offsets;
	397
	398	int32_t prev, c, diff;
	399
	400	int32_t sourceIndex, nextSourceIndex;
	401
	402	U_ALIGN_CODE(16)
	403
	404	/* set up the local pointers */
	405	cnv=pArgs->converter;
	406	source=pArgs->source;
	407	sourceLimit=pArgs->sourceLimit;
	408	target=(uint8_t *)pArgs->target;
	409	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
	410	offsets=pArgs->offsets;
	411
	412	/* get the converter state from UConverter */
	413	c=cnv->fromUChar32;
	414	prev=(int32_t)cnv->fromUnicodeStatus;
	415	if(prev==0) {
	416	prev=BOCU1_ASCII_PREV;
	417	}
	418
	419	/* sourceIndex=-1 if the current character began in the previous buffer */
	420	sourceIndex= c==0 ? 0 : -1;
	421	nextSourceIndex=0;
	422
	423	/* conversion loop */
	424	if(c!=0 && targetCapacity>0) {
	425	goto getTrail;
	426	}
	427
	428	fastSingle:
	429	/* fast loop for single-byte differences */
	430	/* use only one loop counter variable, targetCapacity, not also source */
	431	diff=(int32_t)(sourceLimit-source);
	432	if(targetCapacity>diff) {
	433	targetCapacity=diff;
	434	}
	435	while(targetCapacity>0 && (c=*source)<0x3000) {
	436	if(c<=0x20) {
	437	if(c!=0x20) {
	438	prev=BOCU1_ASCII_PREV;
	439	}
	440	*target++=(uint8_t)c;
	441	*offsets++=nextSourceIndex++;
	442	++source;
	443	--targetCapacity;
	444	} else {
	445	diff=c-prev;
	446	if(DIFF_IS_SINGLE(diff)) {
	447	prev=BOCU1_SIMPLE_PREV(c);
	448	*target++=(uint8_t)PACK_SINGLE_DIFF(diff);
	449	*offsets++=nextSourceIndex++;
	450	++source;
	451	--targetCapacity;
	452	} else {
	453	break;
	454	}
	455	}
	456	}
	457	/* restore real values */
	458	targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
	459	sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
	460
	461	/* regular loop for all cases */
	462	while(source<sourceLimit) {
	463	if(targetCapacity>0) {
	464	c=*source++;
	465	++nextSourceIndex;
	466
	467	if(c<=0x20) {
	468	/*
	469	* ISO C0 control & space:
	470	* Encode directly for MIME compatibility,
	471	* and reset state except for space, to not disrupt compression.
	472	*/
	473	if(c!=0x20) {
	474	prev=BOCU1_ASCII_PREV;
	475	}
	476	*target++=(uint8_t)c;
	477	*offsets++=sourceIndex;
	478	--targetCapacity;
	479
	480	sourceIndex=nextSourceIndex;
	481	continue;
	482	}
	483
	484	if(U16_IS_LEAD(c)) {
	485	getTrail:
	486	if(source<sourceLimit) {
	487	/* test the following code unit */
	488	UChar trail=*source;
	489	if(U16_IS_TRAIL(trail)) {
	490	++source;
	491	++nextSourceIndex;
	492	c=U16_GET_SUPPLEMENTARY(c, trail);
	493	}
	494	} else {
	495	/* no more input */
	496	c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
	497	break;
	498	}
	499	}
	500
	501	/*
	502	* all other Unicode code points c==U+0021..U+10ffff
	503	* are encoded with the difference c-prev
	504	*
	505	* a new prev is computed from c,
	506	* placed in the middle of a 0x80-block (for most small scripts) or
	507	* in the middle of the Unihan and Hangul blocks
	508	* to statistically minimize the following difference
	509	*/
	510	diff=c-prev;
	511	prev=BOCU1_PREV(c);
	512	if(DIFF_IS_SINGLE(diff)) {
	513	*target++=(uint8_t)PACK_SINGLE_DIFF(diff);
	514	*offsets++=sourceIndex;
	515	--targetCapacity;
	516	sourceIndex=nextSourceIndex;
	517	if(c<0x3000) {
	518	goto fastSingle;
	519	}
	520	} else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) {
	521	/* optimize 2-byte case */
	522	int32_t m;
	523
	524	if(diff>=0) {
	525	diff-=BOCU1_REACH_POS_1+1;
	526	m=diff%BOCU1_TRAIL_COUNT;
	527	diff/=BOCU1_TRAIL_COUNT;
	528	diff+=BOCU1_START_POS_2;
	529	} else {
	530	diff-=BOCU1_REACH_NEG_1;
	531	NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
	532	diff+=BOCU1_START_NEG_2;
	533	}
	534	*target++=(uint8_t)diff;
	535	*target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m);
	536	*offsets++=sourceIndex;
	537	*offsets++=sourceIndex;
	538	targetCapacity-=2;
	539	sourceIndex=nextSourceIndex;
	540	} else {
	541	int32_t length; /* will be 2..4 */
	542
	543	diff=packDiff(diff);
	544	length=BOCU1_LENGTH_FROM_PACKED(diff);
	545
	546	/* write the output character bytes from diff and length */
	547	/* from the first if in the loop we know that targetCapacity>0 */
	548	if(length<=targetCapacity) {
	549	switch(length) {
	550	/* each branch falls through to the next one */
	551	case 4:
	552	*target++=(uint8_t)(diff>>24);
	553	*offsets++=sourceIndex;
	554	case 3: /fall through/
	555	*target++=(uint8_t)(diff>>16);
	556	*offsets++=sourceIndex;
	557	case 2: /fall through/
	558	*target++=(uint8_t)(diff>>8);
	559	*offsets++=sourceIndex;
	560	/* case 1: handled above */
	561	*target++=(uint8_t)diff;
	562	*offsets++=sourceIndex;
	563	default:
	564	/* will never occur */
	565	break;
	566	}
	567	targetCapacity-=length;
	568	sourceIndex=nextSourceIndex;
	569	} else {
	570	uint8_t *charErrorBuffer;
	571
	572	/*
	573	* We actually do this backwards here:
	574	* In order to save an intermediate variable, we output
	575	* first to the overflow buffer what does not fit into the
	576	* regular target.
	577	*/
	578	/* we know that 1<=targetCapacity<length<=4 */
	579	length-=targetCapacity;
	580	charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
	581	switch(length) {
	582	/* each branch falls through to the next one */
	583	case 3:
	584	*charErrorBuffer++=(uint8_t)(diff>>16);
	585	case 2: /fall through/
	586	*charErrorBuffer++=(uint8_t)(diff>>8);
	587	case 1: /fall through/
	588	*charErrorBuffer=(uint8_t)diff;
	589	default:
	590	/* will never occur */
	591	break;
	592	}
	593	cnv->charErrorBufferLength=(int8_t)length;
	594
	595	/* now output what fits into the regular target */
	596	diff>>=8length; / length was reduced by targetCapacity */
	597	switch(targetCapacity) {
	598	/* each branch falls through to the next one */
	599	case 3:
	600	*target++=(uint8_t)(diff>>16);
	601	*offsets++=sourceIndex;
	602	case 2: /fall through/
	603	*target++=(uint8_t)(diff>>8);
	604	*offsets++=sourceIndex;
	605	case 1: /fall through/
	606	*target++=(uint8_t)diff;
	607	*offsets++=sourceIndex;
	608	default:
	609	/* will never occur */
	610	break;
	611	}
	612
	613	/* target overflow */
	614	targetCapacity=0;
	615	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	616	break;
	617	}
	618	}
	619	} else {
	620	/* target is full */
	621	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	622	break;
	623	}
	624	}
	625
	626	/* set the converter state back into UConverter */
	627	cnv->fromUChar32= c<0 ? -c : 0;
	628	cnv->fromUnicodeStatus=(uint32_t)prev;
	629
	630	/* write back the updated pointers */
	631	pArgs->source=source;
	632	pArgs->target=(char *)target;
	633	pArgs->offsets=offsets;
	634	}
	635
	636	/*
	637	* Identical to _Bocu1FromUnicodeWithOffsets but without offset handling.
	638	* If a change is made in the original function, then either
	639	* change this function the same way or
	640	* re-copy the original function and remove the variables
	641	* offsets, sourceIndex, and nextSourceIndex.
	642	*/
	643	static void
	644	_Bocu1FromUnicode(UConverterFromUnicodeArgs *pArgs,
	645	UErrorCode *pErrorCode) {
	646	UConverter *cnv;
	647	const UChar source, sourceLimit;
	648	uint8_t *target;
	649	int32_t targetCapacity;
	650
	651	int32_t prev, c, diff;
	652
	653	/* set up the local pointers */
	654	cnv=pArgs->converter;
	655	source=pArgs->source;
	656	sourceLimit=pArgs->sourceLimit;
	657	target=(uint8_t *)pArgs->target;
	658	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
	659
	660	/* get the converter state from UConverter */
	661	c=cnv->fromUChar32;
	662	prev=(int32_t)cnv->fromUnicodeStatus;
	663	if(prev==0) {
	664	prev=BOCU1_ASCII_PREV;
	665	}
	666
	667	/* conversion loop */
	668	if(c!=0 && targetCapacity>0) {
	669	goto getTrail;
	670	}
	671
	672	fastSingle:
	673	/* fast loop for single-byte differences */
	674	/* use only one loop counter variable, targetCapacity, not also source */
	675	diff=(int32_t)(sourceLimit-source);
	676	if(targetCapacity>diff) {
	677	targetCapacity=diff;
	678	}
	679	while(targetCapacity>0 && (c=*source)<0x3000) {
	680	if(c<=0x20) {
	681	if(c!=0x20) {
	682	prev=BOCU1_ASCII_PREV;
	683	}
	684	*target++=(uint8_t)c;
	685	} else {
	686	diff=c-prev;
	687	if(DIFF_IS_SINGLE(diff)) {
	688	prev=BOCU1_SIMPLE_PREV(c);
	689	*target++=(uint8_t)PACK_SINGLE_DIFF(diff);
	690	} else {
	691	break;
	692	}
	693	}
	694	++source;
	695	--targetCapacity;
	696	}
	697	/* restore real values */
	698	targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
	699
	700	/* regular loop for all cases */
	701	while(source<sourceLimit) {
	702	if(targetCapacity>0) {
	703	c=*source++;
	704
	705	if(c<=0x20) {
	706	/*
	707	* ISO C0 control & space:
	708	* Encode directly for MIME compatibility,
	709	* and reset state except for space, to not disrupt compression.
	710	*/
	711	if(c!=0x20) {
	712	prev=BOCU1_ASCII_PREV;
	713	}
	714	*target++=(uint8_t)c;
	715	--targetCapacity;
	716	continue;
	717	}
	718
	719	if(U16_IS_LEAD(c)) {
	720	getTrail:
	721	if(source<sourceLimit) {
	722	/* test the following code unit */
	723	UChar trail=*source;
	724	if(U16_IS_TRAIL(trail)) {
	725	++source;
	726	c=U16_GET_SUPPLEMENTARY(c, trail);
	727	}
	728	} else {
	729	/* no more input */
	730	c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
	731	break;
	732	}
	733	}
	734
	735	/*
	736	* all other Unicode code points c==U+0021..U+10ffff
	737	* are encoded with the difference c-prev
	738	*
	739	* a new prev is computed from c,
	740	* placed in the middle of a 0x80-block (for most small scripts) or
	741	* in the middle of the Unihan and Hangul blocks
	742	* to statistically minimize the following difference
	743	*/
	744	diff=c-prev;
	745	prev=BOCU1_PREV(c);
	746	if(DIFF_IS_SINGLE(diff)) {
	747	*target++=(uint8_t)PACK_SINGLE_DIFF(diff);
	748	--targetCapacity;
	749	if(c<0x3000) {
	750	goto fastSingle;
	751	}
	752	} else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) {
	753	/* optimize 2-byte case */
	754	int32_t m;
	755
	756	if(diff>=0) {
	757	diff-=BOCU1_REACH_POS_1+1;
	758	m=diff%BOCU1_TRAIL_COUNT;
	759	diff/=BOCU1_TRAIL_COUNT;
	760	diff+=BOCU1_START_POS_2;
	761	} else {
	762	diff-=BOCU1_REACH_NEG_1;
	763	NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
	764	diff+=BOCU1_START_NEG_2;
	765	}
	766	*target++=(uint8_t)diff;
	767	*target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m);
	768	targetCapacity-=2;
	769	} else {
	770	int32_t length; /* will be 2..4 */
	771
	772	diff=packDiff(diff);
	773	length=BOCU1_LENGTH_FROM_PACKED(diff);
	774
	775	/* write the output character bytes from diff and length */
	776	/* from the first if in the loop we know that targetCapacity>0 */
	777	if(length<=targetCapacity) {
	778	switch(length) {
	779	/* each branch falls through to the next one */
	780	case 4:
	781	*target++=(uint8_t)(diff>>24);
	782	case 3: /fall through/
	783	*target++=(uint8_t)(diff>>16);
	784	/* case 2: handled above */
	785	*target++=(uint8_t)(diff>>8);
	786	/* case 1: handled above */
	787	*target++=(uint8_t)diff;
	788	default:
	789	/* will never occur */
	790	break;
	791	}
	792	targetCapacity-=length;
	793	} else {
	794	uint8_t *charErrorBuffer;
	795
	796	/*
	797	* We actually do this backwards here:
	798	* In order to save an intermediate variable, we output
	799	* first to the overflow buffer what does not fit into the
	800	* regular target.
	801	*/
	802	/* we know that 1<=targetCapacity<length<=4 */
	803	length-=targetCapacity;
	804	charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
	805	switch(length) {
	806	/* each branch falls through to the next one */
	807	case 3:
	808	*charErrorBuffer++=(uint8_t)(diff>>16);
	809	case 2: /fall through/
	810	*charErrorBuffer++=(uint8_t)(diff>>8);
	811	case 1: /fall through/
	812	*charErrorBuffer=(uint8_t)diff;
	813	default:
	814	/* will never occur */
	815	break;
	816	}
	817	cnv->charErrorBufferLength=(int8_t)length;
	818
	819	/* now output what fits into the regular target */
	820	diff>>=8length; / length was reduced by targetCapacity */
	821	switch(targetCapacity) {
	822	/* each branch falls through to the next one */
	823	case 3:
	824	*target++=(uint8_t)(diff>>16);
	825	case 2: /fall through/
	826	*target++=(uint8_t)(diff>>8);
	827	case 1: /fall through/
	828	*target++=(uint8_t)diff;
	829	default:
	830	/* will never occur */
	831	break;
	832	}
	833
	834	/* target overflow */
	835	targetCapacity=0;
	836	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	837	break;
	838	}
	839	}
	840	} else {
	841	/* target is full */
	842	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	843	break;
	844	}
	845	}
	846
	847	/* set the converter state back into UConverter */
	848	cnv->fromUChar32= c<0 ? -c : 0;
	849	cnv->fromUnicodeStatus=(uint32_t)prev;
	850
	851	/* write back the updated pointers */
	852	pArgs->source=source;
	853	pArgs->target=(char *)target;
	854	}
	855
	856	/* BOCU-1-to-Unicode conversion functions ----------------------------------- */
	857
	858	/**
	859	* Function for BOCU-1 decoder; handles multi-byte lead bytes.
	860	*
	861	* @param b lead byte;
	862	* BOCU1_MIN<=b<BOCU1_START_NEG_2 or BOCU1_START_POS_2<=b<BOCU1_MAX_LEAD
	863	* @return (diff<<2)\|count
	864	*/
	865	static inline int32_t
	866	decodeBocu1LeadByte(int32_t b) {
	867	int32_t diff, count;
	868
	869	if(b>=BOCU1_START_NEG_2) {
	870	/* positive difference */
	871	if(b<BOCU1_START_POS_3) {
	872	/* two bytes */
	873	diff=((int32_t)b-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
	874	count=1;
	875	} else if(b<BOCU1_START_POS_4) {
	876	/* three bytes */
	877	diff=((int32_t)b-BOCU1_START_POS_3)BOCU1_TRAIL_COUNTBOCU1_TRAIL_COUNT+BOCU1_REACH_POS_2+1;
	878	count=2;
	879	} else {
	880	/* four bytes */
	881	diff=BOCU1_REACH_POS_3+1;
	882	count=3;
	883	}
	884	} else {
	885	/* negative difference */
	886	if(b>=BOCU1_START_NEG_3) {
	887	/* two bytes */
	888	diff=((int32_t)b-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
	889	count=1;
	890	} else if(b>BOCU1_MIN) {
	891	/* three bytes */
	892	diff=((int32_t)b-BOCU1_START_NEG_3)BOCU1_TRAIL_COUNTBOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_2;
	893	count=2;
	894	} else {
	895	/* four bytes */
	896	diff=-BOCU1_TRAIL_COUNTBOCU1_TRAIL_COUNTBOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_3;
	897	count=3;
	898	}
	899	}
	900
	901	/* return the state for decoding the trail byte(s) */
	902	return (diff<<2)\|count;
	903	}
	904
	905	/**
	906	* Function for BOCU-1 decoder; handles multi-byte trail bytes.
	907	*
	908	* @param count number of remaining trail bytes including this one
	909	* @param b trail byte
	910	* @return new delta for diff including b - <0 indicates an error
	911	*
	912	* @see decodeBocu1
	913	*/
	914	static inline int32_t
	915	decodeBocu1TrailByte(int32_t count, int32_t b) {
	916	if(b<=0x20) {
	917	/* skip some C0 controls and make the trail byte range contiguous */
	918	b=bocu1ByteToTrail[b];
	919	/* b<0 for an illegal trail byte value will result in return<0 below */
	920	#if BOCU1_MAX_TRAIL<0xff
	921	} else if(b>BOCU1_MAX_TRAIL) {
	922	return -99;
	923	#endif
	924	} else {
	925	b-=BOCU1_TRAIL_BYTE_OFFSET;
	926	}
	927
	928	/* add trail byte into difference and decrement count */
	929	if(count==1) {
	930	return b;
	931	} else if(count==2) {
	932	return b*BOCU1_TRAIL_COUNT;
	933	} else /* count==3 */ {
	934	return b(BOCU1_TRAIL_COUNTBOCU1_TRAIL_COUNT);
	935	}
	936	}
	937
	938	static void
	939	_Bocu1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
	940	UErrorCode *pErrorCode) {
	941	UConverter *cnv;
	942	const uint8_t source, sourceLimit;
	943	UChar *target;
	944	const UChar *targetLimit;
	945	int32_t *offsets;
	946
	947	int32_t prev, count, diff, c;
	948
	949	int8_t byteIndex;
	950	uint8_t *bytes;
	951
	952	int32_t sourceIndex, nextSourceIndex;
	953
	954	/* set up the local pointers */
	955	cnv=pArgs->converter;
	956	source=(const uint8_t *)pArgs->source;
	957	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
	958	target=pArgs->target;
	959	targetLimit=pArgs->targetLimit;
	960	offsets=pArgs->offsets;
	961
	962	/* get the converter state from UConverter */
	963	prev=(int32_t)cnv->toUnicodeStatus;
	964	if(prev==0) {
	965	prev=BOCU1_ASCII_PREV;
	966	}
	967	diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
	968	count=diff&3;
	969	diff>>=2;
	970
	971	byteIndex=cnv->toULength;
	972	bytes=cnv->toUBytes;
	973
	974	/* sourceIndex=-1 if the current character began in the previous buffer */
	975	sourceIndex=byteIndex==0 ? 0 : -1;
	976	nextSourceIndex=0;
	977
	978	/* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
	979	if(count>0 && byteIndex>0 && target<targetLimit) {
	980	goto getTrail;
	981	}
	982
	983	fastSingle:
	984	/* fast loop for single-byte differences */
	985	/* use count as the only loop counter variable */
	986	diff=(int32_t)(sourceLimit-source);
	987	count=(int32_t)(pArgs->targetLimit-target);
	988	if(count>diff) {
	989	count=diff;
	990	}
	991	while(count>0) {
	992	if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) {
	993	c=prev+(c-BOCU1_MIDDLE);
	994	if(c<0x3000) {
	995	*target++=(UChar)c;
	996	*offsets++=nextSourceIndex++;
	997	prev=BOCU1_SIMPLE_PREV(c);
	998	} else {
	999	break;
	1000	}
	1001	} else if(c<=0x20) {
	1002	if(c!=0x20) {
	1003	prev=BOCU1_ASCII_PREV;
	1004	}
	1005	*target++=(UChar)c;
	1006	*offsets++=nextSourceIndex++;
	1007	} else {
	1008	break;
	1009	}
	1010	++source;
	1011	--count;
	1012	}
	1013	sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
	1014
	1015	/* decode a sequence of single and lead bytes */
	1016	while(source<sourceLimit) {
	1017	if(target>=targetLimit) {
	1018	/* target is full */
	1019	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	1020	break;
	1021	}
	1022
	1023	++nextSourceIndex;
	1024	c=*source++;
	1025	if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) {
	1026	/* Write a code point directly from a single-byte difference. */
	1027	c=prev+(c-BOCU1_MIDDLE);
	1028	if(c<0x3000) {
	1029	*target++=(UChar)c;
	1030	*offsets++=sourceIndex;
	1031	prev=BOCU1_SIMPLE_PREV(c);
	1032	sourceIndex=nextSourceIndex;
	1033	goto fastSingle;
	1034	}
	1035	} else if(c<=0x20) {
	1036	/*
	1037	* Direct-encoded C0 control code or space.
	1038	* Reset prev for C0 control codes but not for space.
	1039	*/
	1040	if(c!=0x20) {
	1041	prev=BOCU1_ASCII_PREV;
	1042	}
	1043	*target++=(UChar)c;
	1044	*offsets++=sourceIndex;
	1045	sourceIndex=nextSourceIndex;
	1046	continue;
	1047	} else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) {
	1048	/* Optimize two-byte case. */
	1049	if(c>=BOCU1_MIDDLE) {
	1050	diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
	1051	} else {
	1052	diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
	1053	}
	1054
	1055	/* trail byte */
	1056	++nextSourceIndex;
	1057	c=decodeBocu1TrailByte(1, *source++);
	1058	if(c<0 \|\| (uint32_t)(c=prev+diff+c)>0x10ffff) {
	1059	bytes[0]=source[-2];
	1060	bytes[1]=source[-1];
	1061	byteIndex=2;
	1062	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	1063	break;
	1064	}
	1065	} else if(c==BOCU1_RESET) {
	1066	/* only reset the state, no code point */
	1067	prev=BOCU1_ASCII_PREV;
	1068	sourceIndex=nextSourceIndex;
	1069	continue;
	1070	} else {
	1071	/*
	1072	* For multi-byte difference lead bytes, set the decoder state
	1073	* with the partial difference value from the lead byte and
	1074	* with the number of trail bytes.
	1075	*/
	1076	bytes[0]=(uint8_t)c;
	1077	byteIndex=1;
	1078
	1079	diff=decodeBocu1LeadByte(c);
	1080	count=diff&3;
	1081	diff>>=2;
	1082	getTrail:
	1083	for(;;) {
	1084	if(source>=sourceLimit) {
	1085	goto endloop;
	1086	}
	1087	++nextSourceIndex;
	1088	c=bytes[byteIndex++]=*source++;
	1089
	1090	/* trail byte in any position */
	1091	c=decodeBocu1TrailByte(count, c);
	1092	if(c<0) {
	1093	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	1094	goto endloop;
	1095	}
	1096
	1097	diff+=c;
	1098	if(--count==0) {
	1099	/* final trail byte, deliver a code point */
	1100	byteIndex=0;
	1101	c=prev+diff;
	1102	if((uint32_t)c>0x10ffff) {
	1103	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	1104	goto endloop;
	1105	}
	1106	break;
	1107	}
	1108	}
	1109	}
	1110
	1111	/* calculate the next prev and output c */
	1112	prev=BOCU1_PREV(c);
	1113	if(c<=0xffff) {
	1114	*target++=(UChar)c;
	1115	*offsets++=sourceIndex;
	1116	} else {
	1117	/* output surrogate pair */
	1118	*target++=U16_LEAD(c);
	1119	if(target<targetLimit) {
	1120	*target++=U16_TRAIL(c);
	1121	*offsets++=sourceIndex;
	1122	*offsets++=sourceIndex;
	1123	} else {
	1124	/* target overflow */
	1125	*offsets++=sourceIndex;
	1126	cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
	1127	cnv->UCharErrorBufferLength=1;
	1128	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	1129	break;
	1130	}
	1131	}
	1132	sourceIndex=nextSourceIndex;
	1133	}
	1134	endloop:
	1135
	1136	if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
	1137	/* set the converter state in UConverter to deal with the next character */
	1138	cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
	1139	cnv->mode=0;
	1140	} else {
	1141	/* set the converter state back into UConverter */
	1142	cnv->toUnicodeStatus=(uint32_t)prev;
	1143	cnv->mode=(diff<<2)\|count;
	1144	}
	1145	cnv->toULength=byteIndex;
	1146
	1147	/* write back the updated pointers */
	1148	pArgs->source=(const char *)source;
	1149	pArgs->target=target;
	1150	pArgs->offsets=offsets;
	1151	return;
	1152	}
	1153
	1154	/*
	1155	* Identical to _Bocu1ToUnicodeWithOffsets but without offset handling.
	1156	* If a change is made in the original function, then either
	1157	* change this function the same way or
	1158	* re-copy the original function and remove the variables
	1159	* offsets, sourceIndex, and nextSourceIndex.
	1160	*/
	1161	static void
	1162	_Bocu1ToUnicode(UConverterToUnicodeArgs *pArgs,
	1163	UErrorCode *pErrorCode) {
	1164	UConverter *cnv;
	1165	const uint8_t source, sourceLimit;
	1166	UChar *target;
	1167	const UChar *targetLimit;
	1168
	1169	int32_t prev, count, diff, c;
	1170
	1171	int8_t byteIndex;
	1172	uint8_t *bytes;
	1173
	1174	U_ALIGN_CODE(16)
	1175
	1176	/* set up the local pointers */
	1177	cnv=pArgs->converter;
	1178	source=(const uint8_t *)pArgs->source;
	1179	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
	1180	target=pArgs->target;
	1181	targetLimit=pArgs->targetLimit;
	1182
	1183	/* get the converter state from UConverter */
	1184	prev=(int32_t)cnv->toUnicodeStatus;
	1185	if(prev==0) {
	1186	prev=BOCU1_ASCII_PREV;
	1187	}
	1188	diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
	1189	count=diff&3;
	1190	diff>>=2;
	1191
	1192	byteIndex=cnv->toULength;
	1193	bytes=cnv->toUBytes;
	1194
	1195	/* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
	1196	if(count>0 && byteIndex>0 && target<targetLimit) {
	1197	goto getTrail;
	1198	}
	1199
	1200	fastSingle:
	1201	/* fast loop for single-byte differences */
	1202	/* use count as the only loop counter variable */
	1203	diff=(int32_t)(sourceLimit-source);
	1204	count=(int32_t)(pArgs->targetLimit-target);
	1205	if(count>diff) {
	1206	count=diff;
	1207	}
	1208	while(count>0) {
	1209	if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) {
	1210	c=prev+(c-BOCU1_MIDDLE);
	1211	if(c<0x3000) {
	1212	*target++=(UChar)c;
	1213	prev=BOCU1_SIMPLE_PREV(c);
	1214	} else {
	1215	break;
	1216	}
	1217	} else if(c<=0x20) {
	1218	if(c!=0x20) {
	1219	prev=BOCU1_ASCII_PREV;
	1220	}
	1221	*target++=(UChar)c;
	1222	} else {
	1223	break;
	1224	}
	1225	++source;
	1226	--count;
	1227	}
	1228
	1229	/* decode a sequence of single and lead bytes */
	1230	while(source<sourceLimit) {
	1231	if(target>=targetLimit) {
	1232	/* target is full */
	1233	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	1234	break;
	1235	}
	1236
	1237	c=*source++;
	1238	if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) {
	1239	/* Write a code point directly from a single-byte difference. */
	1240	c=prev+(c-BOCU1_MIDDLE);
	1241	if(c<0x3000) {
	1242	*target++=(UChar)c;
	1243	prev=BOCU1_SIMPLE_PREV(c);
	1244	goto fastSingle;
	1245	}
	1246	} else if(c<=0x20) {
	1247	/*
	1248	* Direct-encoded C0 control code or space.
	1249	* Reset prev for C0 control codes but not for space.
	1250	*/
	1251	if(c!=0x20) {
	1252	prev=BOCU1_ASCII_PREV;
	1253	}
	1254	*target++=(UChar)c;
	1255	continue;
	1256	} else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) {
	1257	/* Optimize two-byte case. */
	1258	if(c>=BOCU1_MIDDLE) {
	1259	diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
	1260	} else {
	1261	diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
	1262	}
	1263
	1264	/* trail byte */
	1265	c=decodeBocu1TrailByte(1, *source++);
	1266	if(c<0 \|\| (uint32_t)(c=prev+diff+c)>0x10ffff) {
	1267	bytes[0]=source[-2];
	1268	bytes[1]=source[-1];
	1269	byteIndex=2;
	1270	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	1271	break;
	1272	}
	1273	} else if(c==BOCU1_RESET) {
	1274	/* only reset the state, no code point */
	1275	prev=BOCU1_ASCII_PREV;
	1276	continue;
	1277	} else {
	1278	/*
	1279	* For multi-byte difference lead bytes, set the decoder state
	1280	* with the partial difference value from the lead byte and
	1281	* with the number of trail bytes.
	1282	*/
	1283	bytes[0]=(uint8_t)c;
	1284	byteIndex=1;
	1285
	1286	diff=decodeBocu1LeadByte(c);
	1287	count=diff&3;
	1288	diff>>=2;
	1289	getTrail:
	1290	for(;;) {
	1291	if(source>=sourceLimit) {
	1292	goto endloop;
	1293	}
	1294	c=bytes[byteIndex++]=*source++;
	1295
	1296	/* trail byte in any position */
	1297	c=decodeBocu1TrailByte(count, c);
	1298	if(c<0) {
	1299	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	1300	goto endloop;
	1301	}
	1302
	1303	diff+=c;
	1304	if(--count==0) {
	1305	/* final trail byte, deliver a code point */
	1306	byteIndex=0;
	1307	c=prev+diff;
	1308	if((uint32_t)c>0x10ffff) {
	1309	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	1310	goto endloop;
	1311	}
	1312	break;
	1313	}
	1314	}
	1315	}
	1316
	1317	/* calculate the next prev and output c */
	1318	prev=BOCU1_PREV(c);
	1319	if(c<=0xffff) {
	1320	*target++=(UChar)c;
	1321	} else {
	1322	/* output surrogate pair */
	1323	*target++=U16_LEAD(c);
	1324	if(target<targetLimit) {
	1325	*target++=U16_TRAIL(c);
	1326	} else {
	1327	/* target overflow */
	1328	cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
	1329	cnv->UCharErrorBufferLength=1;
	1330	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	1331	break;
	1332	}
	1333	}
	1334	}
	1335	endloop:
	1336
	1337	if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
	1338	/* set the converter state in UConverter to deal with the next character */
	1339	cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
	1340	cnv->mode=0;
	1341	} else {
	1342	/* set the converter state back into UConverter */
	1343	cnv->toUnicodeStatus=(uint32_t)prev;
	1344	cnv->mode=(diff<<2)\|count;
	1345	}
	1346	cnv->toULength=byteIndex;
	1347
	1348	/* write back the updated pointers */
	1349	pArgs->source=(const char *)source;
	1350	pArgs->target=target;
	1351	return;
	1352	}
	1353
	1354	/* miscellaneous ------------------------------------------------------------ */
	1355
	1356	static const UConverterImpl _Bocu1Impl={
	1357	UCNV_BOCU1,
	1358
	1359	NULL,
	1360	NULL,
	1361
	1362	NULL,
	1363	NULL,
	1364	NULL,
	1365
	1366	_Bocu1ToUnicode,
	1367	_Bocu1ToUnicodeWithOffsets,
	1368	_Bocu1FromUnicode,
	1369	_Bocu1FromUnicodeWithOffsets,
	1370	NULL,
	1371
	1372	NULL,
	1373	NULL,
	1374	NULL,
	1375	NULL,
	1376	ucnv_getCompleteUnicodeSet,
	1377
	1378	NULL,
	1379	NULL
	1380	};
	1381
	1382	static const UConverterStaticData _Bocu1StaticData={
	1383	sizeof(UConverterStaticData),
	1384	"BOCU-1",
	1385	1214, /* CCSID for BOCU-1 */
	1386	UCNV_IBM, UCNV_BOCU1,
	1387	1, 4, /* one UChar generates at least 1 byte and at most 4 bytes */
	1388	{ 0x1a, 0, 0, 0 }, 1, /* BOCU-1 never needs to write a subchar */
	1389	FALSE, FALSE,
	1390	0,
	1391	0,
	1392	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
	1393	};
	1394
	1395	const UConverterSharedData _Bocu1Data={
	1396	sizeof(UConverterSharedData), ~((uint32_t)0),
	1397	NULL, NULL, &_Bocu1StaticData, FALSE, &_Bocu1Impl,
	1398	0,
	1399	UCNV_MBCS_TABLE_INITIALIZER
	1400	};
	1401
	1402	#endif