git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	**********************************************************************
	3	* Copyright (C) 2002-2011, International Business Machines
	4	* Corporation and others. All Rights Reserved.
	5	**********************************************************************
	6	* file name: ucnv_u32.c
	7	* encoding: US-ASCII
	8	* tab size: 8 (not used)
	9	* indentation:4
	10	*
	11	* created on: 2002jul01
	12	* created by: Markus W. Scherer
	13	*
	14	* UTF-32 converter implementation. Used to be in ucnv_utf.c.
	15	*/
	16
	17	#include "unicode/utypes.h"
	18
	19	#if !UCONFIG_NO_CONVERSION
	20
	21	#include "unicode/ucnv.h"
	22	#include "unicode/utf.h"
	23	#include "ucnv_bld.h"
	24	#include "ucnv_cnv.h"
	25	#include "cmemory.h"
	26
	27	#define MAXIMUM_UCS2 0x0000FFFF
	28	#define MAXIMUM_UTF 0x0010FFFF
	29	#define HALF_SHIFT 10
	30	#define HALF_BASE 0x0010000
	31	#define HALF_MASK 0x3FF
	32	#define SURROGATE_HIGH_START 0xD800
	33	#define SURROGATE_LOW_START 0xDC00
	34
	35	/* -SURROGATE_LOW_START + HALF_BASE */
	36	#define SURROGATE_LOW_BASE 9216
	37
	38	enum {
	39	UCNV_NEED_TO_WRITE_BOM=1
	40	};
	41
	42	/* UTF-32BE ----------------------------------------------------------------- */
	43
	44	static void
	45	T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args,
	46	UErrorCode * err)
	47	{
	48	const unsigned char mySource = (unsigned char ) args->source;
	49	UChar *myTarget = args->target;
	50	const unsigned char sourceLimit = (unsigned char ) args->sourceLimit;
	51	const UChar *targetLimit = args->targetLimit;
	52	unsigned char *toUBytes = args->converter->toUBytes;
	53	uint32_t ch, i;
	54
	55	/* Restore state of current sequence */
	56	if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
	57	i = args->converter->toULength; /* restore # of bytes consumed */
	58	args->converter->toULength = 0;
	59
	60	ch = args->converter->toUnicodeStatus - 1;/Stores the previously calculated ch from a previous call/
	61	args->converter->toUnicodeStatus = 0;
	62	goto morebytes;
	63	}
	64
	65	while (mySource < sourceLimit && myTarget < targetLimit) {
	66	i = 0;
	67	ch = 0;
	68	morebytes:
	69	while (i < sizeof(uint32_t)) {
	70	if (mySource < sourceLimit) {
	71	ch = (ch << 8) \| (uint8_t)(*mySource);
	72	toUBytes[i++] = (char) *(mySource++);
	73	}
	74	else {
	75	/* stores a partially calculated target*/
	76	/* + 1 to make 0 a valid character */
	77	args->converter->toUnicodeStatus = ch + 1;
	78	args->converter->toULength = (int8_t) i;
	79	goto donefornow;
	80	}
	81	}
	82
	83	if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) {
	84	/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
	85	if (ch <= MAXIMUM_UCS2)
	86	{
	87	/* fits in 16 bits */
	88	*(myTarget++) = (UChar) ch;
	89	}
	90	else {
	91	/* write out the surrogates */
	92	*(myTarget++) = U16_LEAD(ch);
	93	ch = U16_TRAIL(ch);
	94	if (myTarget < targetLimit) {
	95	*(myTarget++) = (UChar)ch;
	96	}
	97	else {
	98	/* Put in overflow buffer (not handled here) */
	99	args->converter->UCharErrorBuffer[0] = (UChar) ch;
	100	args->converter->UCharErrorBufferLength = 1;
	101	*err = U_BUFFER_OVERFLOW_ERROR;
	102	break;
	103	}
	104	}
	105	}
	106	else {
	107	args->converter->toULength = (int8_t)i;
	108	*err = U_ILLEGAL_CHAR_FOUND;
	109	break;
	110	}
	111	}
	112
	113	donefornow:
	114	if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) {
	115	/* End of target buffer */
	116	*err = U_BUFFER_OVERFLOW_ERROR;
	117	}
	118
	119	args->target = myTarget;
	120	args->source = (const char *) mySource;
	121	}
	122
	123	static void
	124	T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
	125	UErrorCode * err)
	126	{
	127	const unsigned char mySource = (unsigned char ) args->source;
	128	UChar *myTarget = args->target;
	129	int32_t *myOffsets = args->offsets;
	130	const unsigned char sourceLimit = (unsigned char ) args->sourceLimit;
	131	const UChar *targetLimit = args->targetLimit;
	132	unsigned char *toUBytes = args->converter->toUBytes;
	133	uint32_t ch, i;
	134	int32_t offsetNum = 0;
	135
	136	/* Restore state of current sequence */
	137	if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
	138	i = args->converter->toULength; /* restore # of bytes consumed */
	139	args->converter->toULength = 0;
	140
	141	ch = args->converter->toUnicodeStatus - 1;/Stores the previously calculated ch from a previous call/
	142	args->converter->toUnicodeStatus = 0;
	143	goto morebytes;
	144	}
	145
	146	while (mySource < sourceLimit && myTarget < targetLimit) {
	147	i = 0;
	148	ch = 0;
	149	morebytes:
	150	while (i < sizeof(uint32_t)) {
	151	if (mySource < sourceLimit) {
	152	ch = (ch << 8) \| (uint8_t)(*mySource);
	153	toUBytes[i++] = (char) *(mySource++);
	154	}
	155	else {
	156	/* stores a partially calculated target*/
	157	/* + 1 to make 0 a valid character */
	158	args->converter->toUnicodeStatus = ch + 1;
	159	args->converter->toULength = (int8_t) i;
	160	goto donefornow;
	161	}
	162	}
	163
	164	if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) {
	165	/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
	166	if (ch <= MAXIMUM_UCS2) {
	167	/* fits in 16 bits */
	168	*(myTarget++) = (UChar) ch;
	169	*(myOffsets++) = offsetNum;
	170	}
	171	else {
	172	/* write out the surrogates */
	173	*(myTarget++) = U16_LEAD(ch);
	174	*myOffsets++ = offsetNum;
	175	ch = U16_TRAIL(ch);
	176	if (myTarget < targetLimit)
	177	{
	178	*(myTarget++) = (UChar)ch;
	179	*(myOffsets++) = offsetNum;
	180	}
	181	else {
	182	/* Put in overflow buffer (not handled here) */
	183	args->converter->UCharErrorBuffer[0] = (UChar) ch;
	184	args->converter->UCharErrorBufferLength = 1;
	185	*err = U_BUFFER_OVERFLOW_ERROR;
	186	break;
	187	}
	188	}
	189	}
	190	else {
	191	args->converter->toULength = (int8_t)i;
	192	*err = U_ILLEGAL_CHAR_FOUND;
	193	break;
	194	}
	195	offsetNum += i;
	196	}
	197
	198	donefornow:
	199	if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
	200	{
	201	/* End of target buffer */
	202	*err = U_BUFFER_OVERFLOW_ERROR;
	203	}
	204
	205	args->target = myTarget;
	206	args->source = (const char *) mySource;
	207	args->offsets = myOffsets;
	208	}
	209
	210	static void
	211	T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args,
	212	UErrorCode * err)
	213	{
	214	const UChar *mySource = args->source;
	215	unsigned char *myTarget;
	216	const UChar *sourceLimit = args->sourceLimit;
	217	const unsigned char targetLimit = (unsigned char ) args->targetLimit;
	218	UChar32 ch, ch2;
	219	unsigned int indexToWrite;
	220	unsigned char temp[sizeof(uint32_t)];
	221
	222	if(mySource >= sourceLimit) {
	223	/* no input, nothing to do */
	224	return;
	225	}
	226
	227	/* write the BOM if necessary */
	228	if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
	229	static const char bom[]={ 0, 0, (char)0xfe, (char)0xff };
	230	ucnv_fromUWriteBytes(args->converter,
	231	bom, 4,
	232	&args->target, args->targetLimit,
	233	&args->offsets, -1,
	234	err);
	235	args->converter->fromUnicodeStatus=0;
	236	}
	237
	238	myTarget = (unsigned char *) args->target;
	239	temp[0] = 0;
	240
	241	if (args->converter->fromUChar32) {
	242	ch = args->converter->fromUChar32;
	243	args->converter->fromUChar32 = 0;
	244	goto lowsurogate;
	245	}
	246
	247	while (mySource < sourceLimit && myTarget < targetLimit) {
	248	ch = *(mySource++);
	249
	250	if (U_IS_SURROGATE(ch)) {
	251	if (U_IS_LEAD(ch)) {
	252	lowsurogate:
	253	if (mySource < sourceLimit) {
	254	ch2 = *mySource;
	255	if (U_IS_TRAIL(ch2)) {
	256	ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
	257	mySource++;
	258	}
	259	else {
	260	/* this is an unmatched trail code unit (2nd surrogate) */
	261	/* callback(illegal) */
	262	args->converter->fromUChar32 = ch;
	263	*err = U_ILLEGAL_CHAR_FOUND;
	264	break;
	265	}
	266	}
	267	else {
	268	/* ran out of source */
	269	args->converter->fromUChar32 = ch;
	270	if (args->flush) {
	271	/* this is an unmatched trail code unit (2nd surrogate) */
	272	/* callback(illegal) */
	273	*err = U_ILLEGAL_CHAR_FOUND;
	274	}
	275	break;
	276	}
	277	}
	278	else {
	279	/* this is an unmatched trail code unit (2nd surrogate) */
	280	/* callback(illegal) */
	281	args->converter->fromUChar32 = ch;
	282	*err = U_ILLEGAL_CHAR_FOUND;
	283	break;
	284	}
	285	}
	286
	287	/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
	288	temp[1] = (uint8_t) (ch >> 16 & 0x1F);
	289	temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */
	290	temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */
	291
	292	for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) {
	293	if (myTarget < targetLimit) {
	294	*(myTarget++) = temp[indexToWrite];
	295	}
	296	else {
	297	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
	298	*err = U_BUFFER_OVERFLOW_ERROR;
	299	}
	300	}
	301	}
	302
	303	if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) {
	304	*err = U_BUFFER_OVERFLOW_ERROR;
	305	}
	306
	307	args->target = (char *) myTarget;
	308	args->source = mySource;
	309	}
	310
	311	static void
	312	T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
	313	UErrorCode * err)
	314	{
	315	const UChar *mySource = args->source;
	316	unsigned char *myTarget;
	317	int32_t *myOffsets;
	318	const UChar *sourceLimit = args->sourceLimit;
	319	const unsigned char targetLimit = (unsigned char ) args->targetLimit;
	320	UChar32 ch, ch2;
	321	int32_t offsetNum = 0;
	322	unsigned int indexToWrite;
	323	unsigned char temp[sizeof(uint32_t)];
	324
	325	if(mySource >= sourceLimit) {
	326	/* no input, nothing to do */
	327	return;
	328	}
	329
	330	/* write the BOM if necessary */
	331	if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
	332	static const char bom[]={ 0, 0, (char)0xfe, (char)0xff };
	333	ucnv_fromUWriteBytes(args->converter,
	334	bom, 4,
	335	&args->target, args->targetLimit,
	336	&args->offsets, -1,
	337	err);
	338	args->converter->fromUnicodeStatus=0;
	339	}
	340
	341	myTarget = (unsigned char *) args->target;
	342	myOffsets = args->offsets;
	343	temp[0] = 0;
	344
	345	if (args->converter->fromUChar32) {
	346	ch = args->converter->fromUChar32;
	347	args->converter->fromUChar32 = 0;
	348	goto lowsurogate;
	349	}
	350
	351	while (mySource < sourceLimit && myTarget < targetLimit) {
	352	ch = *(mySource++);
	353
	354	if (U_IS_SURROGATE(ch)) {
	355	if (U_IS_LEAD(ch)) {
	356	lowsurogate:
	357	if (mySource < sourceLimit) {
	358	ch2 = *mySource;
	359	if (U_IS_TRAIL(ch2)) {
	360	ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
	361	mySource++;
	362	}
	363	else {
	364	/* this is an unmatched trail code unit (2nd surrogate) */
	365	/* callback(illegal) */
	366	args->converter->fromUChar32 = ch;
	367	*err = U_ILLEGAL_CHAR_FOUND;
	368	break;
	369	}
	370	}
	371	else {
	372	/* ran out of source */
	373	args->converter->fromUChar32 = ch;
	374	if (args->flush) {
	375	/* this is an unmatched trail code unit (2nd surrogate) */
	376	/* callback(illegal) */
	377	*err = U_ILLEGAL_CHAR_FOUND;
	378	}
	379	break;
	380	}
	381	}
	382	else {
	383	/* this is an unmatched trail code unit (2nd surrogate) */
	384	/* callback(illegal) */
	385	args->converter->fromUChar32 = ch;
	386	*err = U_ILLEGAL_CHAR_FOUND;
	387	break;
	388	}
	389	}
	390
	391	/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
	392	temp[1] = (uint8_t) (ch >> 16 & 0x1F);
	393	temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */
	394	temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */
	395
	396	for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) {
	397	if (myTarget < targetLimit) {
	398	*(myTarget++) = temp[indexToWrite];
	399	*(myOffsets++) = offsetNum;
	400	}
	401	else {
	402	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
	403	*err = U_BUFFER_OVERFLOW_ERROR;
	404	}
	405	}
	406	offsetNum = offsetNum + 1 + (temp[1] != 0);
	407	}
	408
	409	if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) {
	410	*err = U_BUFFER_OVERFLOW_ERROR;
	411	}
	412
	413	args->target = (char *) myTarget;
	414	args->source = mySource;
	415	args->offsets = myOffsets;
	416	}
	417
	418	static UChar32
	419	T_UConverter_getNextUChar_UTF32_BE(UConverterToUnicodeArgs* args,
	420	UErrorCode* err)
	421	{
	422	const uint8_t *mySource;
	423	UChar32 myUChar;
	424	int32_t length;
	425
	426	mySource = (const uint8_t *)args->source;
	427	if (mySource >= (const uint8_t *)args->sourceLimit)
	428	{
	429	/* no input */
	430	*err = U_INDEX_OUTOFBOUNDS_ERROR;
	431	return 0xffff;
	432	}
	433
	434	length = (int32_t)((const uint8_t *)args->sourceLimit - mySource);
	435	if (length < 4)
	436	{
	437	/* got a partial character */
	438	uprv_memcpy(args->converter->toUBytes, mySource, length);
	439	args->converter->toULength = (int8_t)length;
	440	args->source = (const char *)(mySource + length);
	441	*err = U_TRUNCATED_CHAR_FOUND;
	442	return 0xffff;
	443	}
	444
	445	/* Don't even try to do a direct cast because the value may be on an odd address. */
	446	myUChar = ((UChar32)mySource[0] << 24)
	447	\| ((UChar32)mySource[1] << 16)
	448	\| ((UChar32)mySource[2] << 8)
	449	\| ((UChar32)mySource[3]);
	450
	451	args->source = (const char *)(mySource + 4);
	452	if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) {
	453	return myUChar;
	454	}
	455
	456	uprv_memcpy(args->converter->toUBytes, mySource, 4);
	457	args->converter->toULength = 4;
	458
	459	*err = U_ILLEGAL_CHAR_FOUND;
	460	return 0xffff;
	461	}
	462
	463	static const UConverterImpl _UTF32BEImpl = {
	464	UCNV_UTF32_BigEndian,
	465
	466	NULL,
	467	NULL,
	468
	469	NULL,
	470	NULL,
	471	NULL,
	472
	473	T_UConverter_toUnicode_UTF32_BE,
	474	T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC,
	475	T_UConverter_fromUnicode_UTF32_BE,
	476	T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC,
	477	T_UConverter_getNextUChar_UTF32_BE,
	478
	479	NULL,
	480	NULL,
	481	NULL,
	482	NULL,
	483	ucnv_getNonSurrogateUnicodeSet
	484	};
	485
	486	/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */
	487	static const UConverterStaticData _UTF32BEStaticData = {
	488	sizeof(UConverterStaticData),
	489	"UTF-32BE",
	490	1232,
	491	UCNV_IBM, UCNV_UTF32_BigEndian, 4, 4,
	492	{ 0, 0, 0xff, 0xfd }, 4, FALSE, FALSE,
	493	0,
	494	0,
	495	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
	496	};
	497
	498	const UConverterSharedData _UTF32BEData = {
	499	sizeof(UConverterSharedData), ~((uint32_t) 0),
	500	NULL, NULL, &_UTF32BEStaticData, FALSE, &_UTF32BEImpl,
	501	0
	502	};
	503
	504	/* UTF-32LE ---------------------------------------------------------- */
	505
	506	static void
	507	T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args,
	508	UErrorCode * err)
	509	{
	510	const unsigned char mySource = (unsigned char ) args->source;
	511	UChar *myTarget = args->target;
	512	const unsigned char sourceLimit = (unsigned char ) args->sourceLimit;
	513	const UChar *targetLimit = args->targetLimit;
	514	unsigned char *toUBytes = args->converter->toUBytes;
	515	uint32_t ch, i;
	516
	517	/* Restore state of current sequence */
	518	if (args->converter->toUnicodeStatus && myTarget < targetLimit)
	519	{
	520	i = args->converter->toULength; /* restore # of bytes consumed */
	521	args->converter->toULength = 0;
	522
	523	/* Stores the previously calculated ch from a previous call*/
	524	ch = args->converter->toUnicodeStatus - 1;
	525	args->converter->toUnicodeStatus = 0;
	526	goto morebytes;
	527	}
	528
	529	while (mySource < sourceLimit && myTarget < targetLimit)
	530	{
	531	i = 0;
	532	ch = 0;
	533	morebytes:
	534	while (i < sizeof(uint32_t))
	535	{
	536	if (mySource < sourceLimit)
	537	{
	538	ch \|= ((uint8_t)(mySource)) << (i 8);
	539	toUBytes[i++] = (char) *(mySource++);
	540	}
	541	else
	542	{
	543	/* stores a partially calculated target*/
	544	/* + 1 to make 0 a valid character */
	545	args->converter->toUnicodeStatus = ch + 1;
	546	args->converter->toULength = (int8_t) i;
	547	goto donefornow;
	548	}
	549	}
	550
	551	if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) {
	552	/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
	553	if (ch <= MAXIMUM_UCS2) {
	554	/* fits in 16 bits */
	555	*(myTarget++) = (UChar) ch;
	556	}
	557	else {
	558	/* write out the surrogates */
	559	*(myTarget++) = U16_LEAD(ch);
	560	ch = U16_TRAIL(ch);
	561	if (myTarget < targetLimit) {
	562	*(myTarget++) = (UChar)ch;
	563	}
	564	else {
	565	/* Put in overflow buffer (not handled here) */
	566	args->converter->UCharErrorBuffer[0] = (UChar) ch;
	567	args->converter->UCharErrorBufferLength = 1;
	568	*err = U_BUFFER_OVERFLOW_ERROR;
	569	break;
	570	}
	571	}
	572	}
	573	else {
	574	args->converter->toULength = (int8_t)i;
	575	*err = U_ILLEGAL_CHAR_FOUND;
	576	break;
	577	}
	578	}
	579
	580	donefornow:
	581	if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
	582	{
	583	/* End of target buffer */
	584	*err = U_BUFFER_OVERFLOW_ERROR;
	585	}
	586
	587	args->target = myTarget;
	588	args->source = (const char *) mySource;
	589	}
	590
	591	static void
	592	T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
	593	UErrorCode * err)
	594	{
	595	const unsigned char mySource = (unsigned char ) args->source;
	596	UChar *myTarget = args->target;
	597	int32_t *myOffsets = args->offsets;
	598	const unsigned char sourceLimit = (unsigned char ) args->sourceLimit;
	599	const UChar *targetLimit = args->targetLimit;
	600	unsigned char *toUBytes = args->converter->toUBytes;
	601	uint32_t ch, i;
	602	int32_t offsetNum = 0;
	603
	604	/* Restore state of current sequence */
	605	if (args->converter->toUnicodeStatus && myTarget < targetLimit)
	606	{
	607	i = args->converter->toULength; /* restore # of bytes consumed */
	608	args->converter->toULength = 0;
	609
	610	/* Stores the previously calculated ch from a previous call*/
	611	ch = args->converter->toUnicodeStatus - 1;
	612	args->converter->toUnicodeStatus = 0;
	613	goto morebytes;
	614	}
	615
	616	while (mySource < sourceLimit && myTarget < targetLimit)
	617	{
	618	i = 0;
	619	ch = 0;
	620	morebytes:
	621	while (i < sizeof(uint32_t))
	622	{
	623	if (mySource < sourceLimit)
	624	{
	625	ch \|= ((uint8_t)(mySource)) << (i 8);
	626	toUBytes[i++] = (char) *(mySource++);
	627	}
	628	else
	629	{
	630	/* stores a partially calculated target*/
	631	/* + 1 to make 0 a valid character */
	632	args->converter->toUnicodeStatus = ch + 1;
	633	args->converter->toULength = (int8_t) i;
	634	goto donefornow;
	635	}
	636	}
	637
	638	if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch))
	639	{
	640	/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
	641	if (ch <= MAXIMUM_UCS2)
	642	{
	643	/* fits in 16 bits */
	644	*(myTarget++) = (UChar) ch;
	645	*(myOffsets++) = offsetNum;
	646	}
	647	else {
	648	/* write out the surrogates */
	649	*(myTarget++) = U16_LEAD(ch);
	650	*(myOffsets++) = offsetNum;
	651	ch = U16_TRAIL(ch);
	652	if (myTarget < targetLimit)
	653	{
	654	*(myTarget++) = (UChar)ch;
	655	*(myOffsets++) = offsetNum;
	656	}
	657	else
	658	{
	659	/* Put in overflow buffer (not handled here) */
	660	args->converter->UCharErrorBuffer[0] = (UChar) ch;
	661	args->converter->UCharErrorBufferLength = 1;
	662	*err = U_BUFFER_OVERFLOW_ERROR;
	663	break;
	664	}
	665	}
	666	}
	667	else
	668	{
	669	args->converter->toULength = (int8_t)i;
	670	*err = U_ILLEGAL_CHAR_FOUND;
	671	break;
	672	}
	673	offsetNum += i;
	674	}
	675
	676	donefornow:
	677	if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
	678	{
	679	/* End of target buffer */
	680	*err = U_BUFFER_OVERFLOW_ERROR;
	681	}
	682
	683	args->target = myTarget;
	684	args->source = (const char *) mySource;
	685	args->offsets = myOffsets;
	686	}
	687
	688	static void
	689	T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args,
	690	UErrorCode * err)
	691	{
	692	const UChar *mySource = args->source;
	693	unsigned char *myTarget;
	694	const UChar *sourceLimit = args->sourceLimit;
	695	const unsigned char targetLimit = (unsigned char ) args->targetLimit;
	696	UChar32 ch, ch2;
	697	unsigned int indexToWrite;
	698	unsigned char temp[sizeof(uint32_t)];
	699
	700	if(mySource >= sourceLimit) {
	701	/* no input, nothing to do */
	702	return;
	703	}
	704
	705	/* write the BOM if necessary */
	706	if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
	707	static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 };
	708	ucnv_fromUWriteBytes(args->converter,
	709	bom, 4,
	710	&args->target, args->targetLimit,
	711	&args->offsets, -1,
	712	err);
	713	args->converter->fromUnicodeStatus=0;
	714	}
	715
	716	myTarget = (unsigned char *) args->target;
	717	temp[3] = 0;
	718
	719	if (args->converter->fromUChar32)
	720	{
	721	ch = args->converter->fromUChar32;
	722	args->converter->fromUChar32 = 0;
	723	goto lowsurogate;
	724	}
	725
	726	while (mySource < sourceLimit && myTarget < targetLimit)
	727	{
	728	ch = *(mySource++);
	729
	730	if (U16_IS_SURROGATE(ch)) {
	731	if (U16_IS_LEAD(ch))
	732	{
	733	lowsurogate:
	734	if (mySource < sourceLimit)
	735	{
	736	ch2 = *mySource;
	737	if (U16_IS_TRAIL(ch2)) {
	738	ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
	739	mySource++;
	740	}
	741	else {
	742	/* this is an unmatched trail code unit (2nd surrogate) */
	743	/* callback(illegal) */
	744	args->converter->fromUChar32 = ch;
	745	*err = U_ILLEGAL_CHAR_FOUND;
	746	break;
	747	}
	748	}
	749	else {
	750	/* ran out of source */
	751	args->converter->fromUChar32 = ch;
	752	if (args->flush) {
	753	/* this is an unmatched trail code unit (2nd surrogate) */
	754	/* callback(illegal) */
	755	*err = U_ILLEGAL_CHAR_FOUND;
	756	}
	757	break;
	758	}
	759	}
	760	else {
	761	/* this is an unmatched trail code unit (2nd surrogate) */
	762	/* callback(illegal) */
	763	args->converter->fromUChar32 = ch;
	764	*err = U_ILLEGAL_CHAR_FOUND;
	765	break;
	766	}
	767	}
	768
	769	/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
	770	temp[2] = (uint8_t) (ch >> 16 & 0x1F);
	771	temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */
	772	temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */
	773
	774	for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++)
	775	{
	776	if (myTarget < targetLimit)
	777	{
	778	*(myTarget++) = temp[indexToWrite];
	779	}
	780	else
	781	{
	782	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
	783	*err = U_BUFFER_OVERFLOW_ERROR;
	784	}
	785	}
	786	}
	787
	788	if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
	789	{
	790	*err = U_BUFFER_OVERFLOW_ERROR;
	791	}
	792
	793	args->target = (char *) myTarget;
	794	args->source = mySource;
	795	}
	796
	797	static void
	798	T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
	799	UErrorCode * err)
	800	{
	801	const UChar *mySource = args->source;
	802	unsigned char *myTarget;
	803	int32_t *myOffsets;
	804	const UChar *sourceLimit = args->sourceLimit;
	805	const unsigned char targetLimit = (unsigned char ) args->targetLimit;
	806	UChar32 ch, ch2;
	807	unsigned int indexToWrite;
	808	unsigned char temp[sizeof(uint32_t)];
	809	int32_t offsetNum = 0;
	810
	811	if(mySource >= sourceLimit) {
	812	/* no input, nothing to do */
	813	return;
	814	}
	815
	816	/* write the BOM if necessary */
	817	if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
	818	static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 };
	819	ucnv_fromUWriteBytes(args->converter,
	820	bom, 4,
	821	&args->target, args->targetLimit,
	822	&args->offsets, -1,
	823	err);
	824	args->converter->fromUnicodeStatus=0;
	825	}
	826
	827	myTarget = (unsigned char *) args->target;
	828	myOffsets = args->offsets;
	829	temp[3] = 0;
	830
	831	if (args->converter->fromUChar32)
	832	{
	833	ch = args->converter->fromUChar32;
	834	args->converter->fromUChar32 = 0;
	835	goto lowsurogate;
	836	}
	837
	838	while (mySource < sourceLimit && myTarget < targetLimit)
	839	{
	840	ch = *(mySource++);
	841
	842	if (U16_IS_SURROGATE(ch)) {
	843	if (U16_IS_LEAD(ch))
	844	{
	845	lowsurogate:
	846	if (mySource < sourceLimit)
	847	{
	848	ch2 = *mySource;
	849	if (U16_IS_TRAIL(ch2))
	850	{
	851	ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
	852	mySource++;
	853	}
	854	else {
	855	/* this is an unmatched trail code unit (2nd surrogate) */
	856	/* callback(illegal) */
	857	args->converter->fromUChar32 = ch;
	858	*err = U_ILLEGAL_CHAR_FOUND;
	859	break;
	860	}
	861	}
	862	else {
	863	/* ran out of source */
	864	args->converter->fromUChar32 = ch;
	865	if (args->flush) {
	866	/* this is an unmatched trail code unit (2nd surrogate) */
	867	/* callback(illegal) */
	868	*err = U_ILLEGAL_CHAR_FOUND;
	869	}
	870	break;
	871	}
	872	}
	873	else {
	874	/* this is an unmatched trail code unit (2nd surrogate) */
	875	/* callback(illegal) */
	876	args->converter->fromUChar32 = ch;
	877	*err = U_ILLEGAL_CHAR_FOUND;
	878	break;
	879	}
	880	}
	881
	882	/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
	883	temp[2] = (uint8_t) (ch >> 16 & 0x1F);
	884	temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */
	885	temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */
	886
	887	for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++)
	888	{
	889	if (myTarget < targetLimit)
	890	{
	891	*(myTarget++) = temp[indexToWrite];
	892	*(myOffsets++) = offsetNum;
	893	}
	894	else
	895	{
	896	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
	897	*err = U_BUFFER_OVERFLOW_ERROR;
	898	}
	899	}
	900	offsetNum = offsetNum + 1 + (temp[2] != 0);
	901	}
	902
	903	if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
	904	{
	905	*err = U_BUFFER_OVERFLOW_ERROR;
	906	}
	907
	908	args->target = (char *) myTarget;
	909	args->source = mySource;
	910	args->offsets = myOffsets;
	911	}
	912
	913	static UChar32
	914	T_UConverter_getNextUChar_UTF32_LE(UConverterToUnicodeArgs* args,
	915	UErrorCode* err)
	916	{
	917	const uint8_t *mySource;
	918	UChar32 myUChar;
	919	int32_t length;
	920
	921	mySource = (const uint8_t *)args->source;
	922	if (mySource >= (const uint8_t *)args->sourceLimit)
	923	{
	924	/* no input */
	925	*err = U_INDEX_OUTOFBOUNDS_ERROR;
	926	return 0xffff;
	927	}
	928
	929	length = (int32_t)((const uint8_t *)args->sourceLimit - mySource);
	930	if (length < 4)
	931	{
	932	/* got a partial character */
	933	uprv_memcpy(args->converter->toUBytes, mySource, length);
	934	args->converter->toULength = (int8_t)length;
	935	args->source = (const char *)(mySource + length);
	936	*err = U_TRUNCATED_CHAR_FOUND;
	937	return 0xffff;
	938	}
	939
	940	/* Don't even try to do a direct cast because the value may be on an odd address. */
	941	myUChar = ((UChar32)mySource[3] << 24)
	942	\| ((UChar32)mySource[2] << 16)
	943	\| ((UChar32)mySource[1] << 8)
	944	\| ((UChar32)mySource[0]);
	945
	946	args->source = (const char *)(mySource + 4);
	947	if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) {
	948	return myUChar;
	949	}
	950
	951	uprv_memcpy(args->converter->toUBytes, mySource, 4);
	952	args->converter->toULength = 4;
	953
	954	*err = U_ILLEGAL_CHAR_FOUND;
	955	return 0xffff;
	956	}
	957
	958	static const UConverterImpl _UTF32LEImpl = {
	959	UCNV_UTF32_LittleEndian,
	960
	961	NULL,
	962	NULL,
	963
	964	NULL,
	965	NULL,
	966	NULL,
	967
	968	T_UConverter_toUnicode_UTF32_LE,
	969	T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC,
	970	T_UConverter_fromUnicode_UTF32_LE,
	971	T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC,
	972	T_UConverter_getNextUChar_UTF32_LE,
	973
	974	NULL,
	975	NULL,
	976	NULL,
	977	NULL,
	978	ucnv_getNonSurrogateUnicodeSet
	979	};
	980
	981	/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */
	982	static const UConverterStaticData _UTF32LEStaticData = {
	983	sizeof(UConverterStaticData),
	984	"UTF-32LE",
	985	1234,
	986	UCNV_IBM, UCNV_UTF32_LittleEndian, 4, 4,
	987	{ 0xfd, 0xff, 0, 0 }, 4, FALSE, FALSE,
	988	0,
	989	0,
	990	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
	991	};
	992
	993
	994	const UConverterSharedData _UTF32LEData = {
	995	sizeof(UConverterSharedData), ~((uint32_t) 0),
	996	NULL, NULL, &_UTF32LEStaticData, FALSE, &_UTF32LEImpl,
	997	0
	998	};
	999
	1000	/* UTF-32 (Detect BOM) ------------------------------------------------------ */
	1001
	1002	/*
	1003	* Detect a BOM at the beginning of the stream and select UTF-32BE or UTF-32LE
	1004	* accordingly.
	1005	*
	1006	* State values:
	1007	* 0 initial state
	1008	* 1 saw 00
	1009	* 2 saw 00 00
	1010	* 3 saw 00 00 FE
	1011	* 4 -
	1012	* 5 saw FF
	1013	* 6 saw FF FE
	1014	* 7 saw FF FE 00
	1015	* 8 UTF-32BE mode
	1016	* 9 UTF-32LE mode
	1017	*
	1018	* During detection: state&3==number of matching bytes so far.
	1019	*
	1020	* On output, emit U+FEFF as the first code point.
	1021	*/
	1022
	1023	static void
	1024	_UTF32Reset(UConverter *cnv, UConverterResetChoice choice) {
	1025	if(choice<=UCNV_RESET_TO_UNICODE) {
	1026	/* reset toUnicode: state=0 */
	1027	cnv->mode=0;
	1028	}
	1029	if(choice!=UCNV_RESET_TO_UNICODE) {
	1030	/* reset fromUnicode: prepare to output the UTF-32PE BOM */
	1031	cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
	1032	}
	1033	}
	1034
	1035	static void
	1036	_UTF32Open(UConverter *cnv,
	1037	UConverterLoadArgs *pArgs,
	1038	UErrorCode *pErrorCode) {
	1039	_UTF32Reset(cnv, UCNV_RESET_BOTH);
	1040	}
	1041
	1042	static const char utf32BOM[8]={ 0, 0, (char)0xfe, (char)0xff, (char)0xff, (char)0xfe, 0, 0 };
	1043
	1044	static void
	1045	_UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
	1046	UErrorCode *pErrorCode) {
	1047	UConverter *cnv=pArgs->converter;
	1048	const char *source=pArgs->source;
	1049	const char *sourceLimit=pArgs->sourceLimit;
	1050	int32_t *offsets=pArgs->offsets;
	1051
	1052	int32_t state, offsetDelta;
	1053	char b;
	1054
	1055	state=cnv->mode;
	1056
	1057	/*
	1058	* If we detect a BOM in this buffer, then we must add the BOM size to the
	1059	* offsets because the actual converter function will not see and count the BOM.
	1060	* offsetDelta will have the number of the BOM bytes that are in the current buffer.
	1061	*/
	1062	offsetDelta=0;
	1063
	1064	while(source<sourceLimit && U_SUCCESS(*pErrorCode)) {
	1065	switch(state) {
	1066	case 0:
	1067	b=*source;
	1068	if(b==0) {
	1069	state=1; /* could be 00 00 FE FF */
	1070	} else if(b==(char)0xff) {
	1071	state=5; /* could be FF FE 00 00 */
	1072	} else {
	1073	state=8; /* default to UTF-32BE */
	1074	continue;
	1075	}
	1076	++source;
	1077	break;
	1078	case 1:
	1079	case 2:
	1080	case 3:
	1081	case 5:
	1082	case 6:
	1083	case 7:
	1084	if(*source==utf32BOM[state]) {
	1085	++state;
	1086	++source;
	1087	if(state==4) {
	1088	state=8; /* detect UTF-32BE */
	1089	offsetDelta=(int32_t)(source-pArgs->source);
	1090	} else if(state==8) {
	1091	state=9; /* detect UTF-32LE */
	1092	offsetDelta=(int32_t)(source-pArgs->source);
	1093	}
	1094	} else {
	1095	/* switch to UTF-32BE and pass the previous bytes */
	1096	int32_t count=(int32_t)(source-pArgs->source); /* number of bytes from this buffer */
	1097
	1098	/* reset the source */
	1099	source=pArgs->source;
	1100
	1101	if(count==(state&3)) {
	1102	/* simple: all in the same buffer, just reset source */
	1103	} else {
	1104	UBool oldFlush=pArgs->flush;
	1105
	1106	/* some of the bytes are from a previous buffer, replay those first */
	1107	pArgs->source=utf32BOM+(state&4); /* select the correct BOM */
	1108	pArgs->sourceLimit=pArgs->source+((state&3)-count); /* replay previous bytes */
	1109	pArgs->flush=FALSE; /* this sourceLimit is not the real source stream limit */
	1110
	1111	/* no offsets: bytes from previous buffer, and not enough for output */
	1112	T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
	1113
	1114	/* restore real pointers; pArgs->source will be set in case 8/9 */
	1115	pArgs->sourceLimit=sourceLimit;
	1116	pArgs->flush=oldFlush;
	1117	}
	1118	state=8;
	1119	continue;
	1120	}
	1121	break;
	1122	case 8:
	1123	/* call UTF-32BE */
	1124	pArgs->source=source;
	1125	if(offsets==NULL) {
	1126	T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
	1127	} else {
	1128	T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(pArgs, pErrorCode);
	1129	}
	1130	source=pArgs->source;
	1131	break;
	1132	case 9:
	1133	/* call UTF-32LE */
	1134	pArgs->source=source;
	1135	if(offsets==NULL) {
	1136	T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode);
	1137	} else {
	1138	T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(pArgs, pErrorCode);
	1139	}
	1140	source=pArgs->source;
	1141	break;
	1142	default:
	1143	break; /* does not occur */
	1144	}
	1145	}
	1146
	1147	/* add BOM size to offsets - see comment at offsetDelta declaration */
	1148	if(offsets!=NULL && offsetDelta!=0) {
	1149	int32_t *offsetsLimit=pArgs->offsets;
	1150	while(offsets<offsetsLimit) {
	1151	*offsets++ += offsetDelta;
	1152	}
	1153	}
	1154
	1155	pArgs->source=source;
	1156
	1157	if(source==sourceLimit && pArgs->flush) {
	1158	/* handle truncated input */
	1159	switch(state) {
	1160	case 0:
	1161	break; /* no input at all, nothing to do */
	1162	case 8:
	1163	T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
	1164	break;
	1165	case 9:
	1166	T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode);
	1167	break;
	1168	default:
	1169	/* handle 0<state<8: call UTF-32BE with too-short input */
	1170	pArgs->source=utf32BOM+(state&4); /* select the correct BOM */
	1171	pArgs->sourceLimit=pArgs->source+(state&3); /* replay bytes */
	1172
	1173	/* no offsets: not enough for output */
	1174	T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
	1175	pArgs->source=source;
	1176	pArgs->sourceLimit=sourceLimit;
	1177	state=8;
	1178	break;
	1179	}
	1180	}
	1181
	1182	cnv->mode=state;
	1183	}
	1184
	1185	static UChar32
	1186	_UTF32GetNextUChar(UConverterToUnicodeArgs *pArgs,
	1187	UErrorCode *pErrorCode) {
	1188	switch(pArgs->converter->mode) {
	1189	case 8:
	1190	return T_UConverter_getNextUChar_UTF32_BE(pArgs, pErrorCode);
	1191	case 9:
	1192	return T_UConverter_getNextUChar_UTF32_LE(pArgs, pErrorCode);
	1193	default:
	1194	return UCNV_GET_NEXT_UCHAR_USE_TO_U;
	1195	}
	1196	}
	1197
	1198	static const UConverterImpl _UTF32Impl = {
	1199	UCNV_UTF32,
	1200
	1201	NULL,
	1202	NULL,
	1203
	1204	_UTF32Open,
	1205	NULL,
	1206	_UTF32Reset,
	1207
	1208	_UTF32ToUnicodeWithOffsets,
	1209	_UTF32ToUnicodeWithOffsets,
	1210	#if U_IS_BIG_ENDIAN
	1211	T_UConverter_fromUnicode_UTF32_BE,
	1212	T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC,
	1213	#else
	1214	T_UConverter_fromUnicode_UTF32_LE,
	1215	T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC,
	1216	#endif
	1217	_UTF32GetNextUChar,
	1218
	1219	NULL, /* ### TODO implement getStarters for all Unicode encodings?! */
	1220	NULL,
	1221	NULL,
	1222	NULL,
	1223	ucnv_getNonSurrogateUnicodeSet
	1224	};
	1225
	1226	/* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianess of UTF-32 */
	1227	static const UConverterStaticData _UTF32StaticData = {
	1228	sizeof(UConverterStaticData),
	1229	"UTF-32",
	1230	1236,
	1231	UCNV_IBM, UCNV_UTF32, 4, 4,
	1232	#if U_IS_BIG_ENDIAN
	1233	{ 0, 0, 0xff, 0xfd }, 4,
	1234	#else
	1235	{ 0xfd, 0xff, 0, 0 }, 4,
	1236	#endif
	1237	FALSE, FALSE,
	1238	0,
	1239	0,
	1240	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
	1241	};
	1242
	1243	const UConverterSharedData _UTF32Data = {
	1244	sizeof(UConverterSharedData), ~((uint32_t) 0),
	1245	NULL, NULL, &_UTF32StaticData, FALSE, &_UTF32Impl,
	1246	0
	1247	};
	1248
	1249	#endif