[apple/icu.git] / icuSources / common / ucnvlat1.c

/* 
**********************************************************************
*   Copyright (C) 2000-2004, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   file name:  ucnvlat1.cpp
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2000feb07
*   created by: Markus W. Scherer
*/

#include "unicode/utypes.h"

#if !UCONFIG_NO_CONVERSION

#include "unicode/ucnv.h"
#include "unicode/uset.h"
#include "ucnv_bld.h"
#include "ucnv_cnv.h"

/* control optimizations according to the platform */
#define LATIN1_UNROLL_TO_UNICODE 1
#define LATIN1_UNROLL_FROM_UNICODE 1
#define ASCII_UNROLL_TO_UNICODE 1

/* ISO 8859-1 --------------------------------------------------------------- */

/* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
static void
_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
                            UErrorCode *pErrorCode) {
    const uint8_t *source;
    UChar *target;
    int32_t targetCapacity, length;
    int32_t *offsets;

    int32_t sourceIndex;

    /* set up the local pointers */
    source=(const uint8_t *)pArgs->source;
    target=pArgs->target;
    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
    offsets=pArgs->offsets;

    sourceIndex=0;

    /*
     * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
     * for the minimum of the sourceLength and targetCapacity
     */
    length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
    if(length<=targetCapacity) {
        targetCapacity=length;
    } else {
        /* target will be full */
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
        length=targetCapacity;
    }

#if LATIN1_UNROLL_TO_UNICODE
    if(targetCapacity>=16) {
        int32_t count, loops;

        loops=count=targetCapacity>>4;
        length=targetCapacity&=0xf;
        do {
            *target++=*source++;
            *target++=*source++;
            *target++=*source++;
            *target++=*source++;
            *target++=*source++;
            *target++=*source++;
            *target++=*source++;
            *target++=*source++;
            *target++=*source++;
            *target++=*source++;
            *target++=*source++;
            *target++=*source++;
            *target++=*source++;
            *target++=*source++;
            *target++=*source++;
            *target++=*source++;
        } while(--count>0);

        if(offsets!=NULL) {
            do {
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
            } while(--loops>0);
        }
    }
#endif

    /* conversion loop */
    while(targetCapacity>0) {
        *target++=*source++;
        --targetCapacity;
    }

    /* write back the updated pointers */
    pArgs->source=(const char *)source;
    pArgs->target=target;

    /* set offsets */
    if(offsets!=NULL) {
        while(length>0) {
            *offsets++=sourceIndex++;
            --length;
        }
        pArgs->offsets=offsets;
    }
}

/* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */
static UChar32
_Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
                    UErrorCode *pErrorCode) {
    const uint8_t *source=(const uint8_t *)pArgs->source;
    if(source<(const uint8_t *)pArgs->sourceLimit) {
        pArgs->source=(const char *)(source+1);
        return *source;
    }

    /* no output because of empty input */
    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    return 0xffff;
}

/* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */
static void
_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                              UErrorCode *pErrorCode) {
    UConverter *cnv;
    const UChar *source, *sourceLimit;
    uint8_t *target, *oldTarget;
    int32_t targetCapacity, length;
    int32_t *offsets;

    UChar32 cp;
    UChar c, max;

    int32_t sourceIndex;

    /* set up the local pointers */
    cnv=pArgs->converter;
    source=pArgs->source;
    sourceLimit=pArgs->sourceLimit;
    target=oldTarget=(uint8_t *)pArgs->target;
    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
    offsets=pArgs->offsets;

    if(cnv->sharedData==&_Latin1Data) {
        max=0xff; /* Latin-1 */
    } else {
        max=0x7f; /* US-ASCII */
    }

    /* get the converter state from UConverter */
    cp=cnv->fromUChar32;

    /* sourceIndex=-1 if the current character began in the previous buffer */
    sourceIndex= cp==0 ? 0 : -1;

    /*
     * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
     * for the minimum of the sourceLength and targetCapacity
     */
    length=(int32_t)(sourceLimit-source);
    if(length<targetCapacity) {
        targetCapacity=length;
    }

    /* conversion loop */
    if(cp!=0 && targetCapacity>0) {
        goto getTrail;
    }

#if LATIN1_UNROLL_FROM_UNICODE
    /* unroll the loop with the most common case */
    if(targetCapacity>=16) {
        int32_t count, loops;
        UChar u, oredChars;

        loops=count=targetCapacity>>4;
        do {
            oredChars=u=*source++;
            *target++=(uint8_t)u;
            oredChars|=u=*source++;
            *target++=(uint8_t)u;
            oredChars|=u=*source++;
            *target++=(uint8_t)u;
            oredChars|=u=*source++;
            *target++=(uint8_t)u;
            oredChars|=u=*source++;
            *target++=(uint8_t)u;
            oredChars|=u=*source++;
            *target++=(uint8_t)u;
            oredChars|=u=*source++;
            *target++=(uint8_t)u;
            oredChars|=u=*source++;
            *target++=(uint8_t)u;
            oredChars|=u=*source++;
            *target++=(uint8_t)u;
            oredChars|=u=*source++;
            *target++=(uint8_t)u;
            oredChars|=u=*source++;
            *target++=(uint8_t)u;
            oredChars|=u=*source++;
            *target++=(uint8_t)u;
            oredChars|=u=*source++;
            *target++=(uint8_t)u;
            oredChars|=u=*source++;
            *target++=(uint8_t)u;
            oredChars|=u=*source++;
            *target++=(uint8_t)u;
            oredChars|=u=*source++;
            *target++=(uint8_t)u;

            /* were all 16 entries really valid? */
            if(oredChars>max) {
                /* no, return to the first of these 16 */
                source-=16;
                target-=16;
                break;
            }
        } while(--count>0);
        count=loops-count;
        targetCapacity-=16*count;

        if(offsets!=NULL) {
            oldTarget+=16*count;
            while(count>0) {
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                --count;
            }
        }
    }
#endif

    /* conversion loop */
    c=0;
    while(targetCapacity>0 && (c=*source++)<=max) {
        /* convert the Unicode code point */
        *target++=(uint8_t)c;
        --targetCapacity;
    }

    if(c>max) {
        cp=c;
        if(!U_IS_SURROGATE(cp)) {
            /* callback(unassigned) */
        } else if(U_IS_SURROGATE_LEAD(cp)) {
getTrail:
            if(source<sourceLimit) {
                /* test the following code unit */
                UChar trail=*source;
                if(U16_IS_TRAIL(trail)) {
                    ++source;
                    cp=U16_GET_SUPPLEMENTARY(cp, trail);
                    /* this codepage does not map supplementary code points */
                    /* callback(unassigned) */
                } else {
                    /* this is an unmatched lead code unit (1st surrogate) */
                    /* callback(illegal) */
                }
            } else {
                /* no more input */
                cnv->fromUChar32=cp;
                goto noMoreInput;
            }
        } else {
            /* this is an unmatched trail code unit (2nd surrogate) */
            /* callback(illegal) */
        }

        *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
        cnv->fromUChar32=cp;
    }
noMoreInput:

    /* set offsets since the start */
    if(offsets!=NULL) {
        size_t count=target-oldTarget;
        while(count>0) {
            *offsets++=sourceIndex++;
            --count;
        }
    }

    if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
        /* target is full */
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    }

    /* write back the updated pointers */
    pArgs->source=source;
    pArgs->target=(char *)target;
    pArgs->offsets=offsets;
}

static void
_Latin1GetUnicodeSet(const UConverter *cnv,
                     const USetAdder *sa,
                     UConverterUnicodeSet which,
                     UErrorCode *pErrorCode) {
    sa->addRange(sa->set, 0, 0xff);
}

static const UConverterImpl _Latin1Impl={
    UCNV_LATIN_1,

    NULL,
    NULL,

    NULL,
    NULL,
    NULL,

    _Latin1ToUnicodeWithOffsets,
    _Latin1ToUnicodeWithOffsets,
    _Latin1FromUnicodeWithOffsets,
    _Latin1FromUnicodeWithOffsets,
    _Latin1GetNextUChar,

    NULL,
    NULL,
    NULL,
    NULL,
    _Latin1GetUnicodeSet
};

static const UConverterStaticData _Latin1StaticData={
    sizeof(UConverterStaticData),
    "ISO-8859-1",
    819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
    { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
    0,
    0,
    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
};

const UConverterSharedData _Latin1Data={
    sizeof(UConverterSharedData), ~((uint32_t) 0),
    NULL, NULL, &_Latin1StaticData, FALSE, &_Latin1Impl, 
    0
};

/* US-ASCII ----------------------------------------------------------------- */

/* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
static void
_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
                           UErrorCode *pErrorCode) {
    const uint8_t *source, *sourceLimit;
    UChar *target, *oldTarget;
    int32_t targetCapacity, length;
    int32_t *offsets;

    int32_t sourceIndex;

    uint8_t c;

    /* set up the local pointers */
    source=(const uint8_t *)pArgs->source;
    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
    target=oldTarget=pArgs->target;
    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
    offsets=pArgs->offsets;

    /* sourceIndex=-1 if the current character began in the previous buffer */
    sourceIndex=0;

    /*
     * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
     * for the minimum of the sourceLength and targetCapacity
     */
    length=(int32_t)(sourceLimit-source);
    if(length<targetCapacity) {
        targetCapacity=length;
    }

#if ASCII_UNROLL_TO_UNICODE
    /* unroll the loop with the most common case */
    if(targetCapacity>=16) {
        int32_t count, loops;
        UChar oredChars;

        loops=count=targetCapacity>>4;
        do {
            oredChars=*target++=*source++;
            oredChars|=*target++=*source++;
            oredChars|=*target++=*source++;
            oredChars|=*target++=*source++;
            oredChars|=*target++=*source++;
            oredChars|=*target++=*source++;
            oredChars|=*target++=*source++;
            oredChars|=*target++=*source++;
            oredChars|=*target++=*source++;
            oredChars|=*target++=*source++;
            oredChars|=*target++=*source++;
            oredChars|=*target++=*source++;
            oredChars|=*target++=*source++;
            oredChars|=*target++=*source++;
            oredChars|=*target++=*source++;
            oredChars|=*target++=*source++;

            /* were all 16 entries really valid? */
            if(oredChars>0x7f) {
                /* no, return to the first of these 16 */
                source-=16;
                target-=16;
                break;
            }
        } while(--count>0);
        count=loops-count;
        targetCapacity-=16*count;

        if(offsets!=NULL) {
            oldTarget+=16*count;
            while(count>0) {
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                *offsets++=sourceIndex++;
                --count;
            }
        }
    }
#endif

    /* conversion loop */
    c=0;
    while(targetCapacity>0 && (c=*source++)<=0x7f) {
        *target++=c;
        --targetCapacity;
    }

    if(c>0x7f) {
        /* callback(illegal); copy the current bytes to toUBytes[] */
        UConverter *cnv=pArgs->converter;
        cnv->toUBytes[0]=c;
        cnv->toULength=1;
        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    } else if(source<sourceLimit && target>=pArgs->targetLimit) {
        /* target is full */
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    }

    /* set offsets since the start */
    if(offsets!=NULL) {
        size_t count=target-oldTarget;
        while(count>0) {
            *offsets++=sourceIndex++;
            --count;
        }
    }

    /* write back the updated pointers */
    pArgs->source=(const char *)source;
    pArgs->target=target;
    pArgs->offsets=offsets;
}

/* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */
static UChar32
_ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
                   UErrorCode *pErrorCode) {
    const uint8_t *source;
    uint8_t b;

    source=(const uint8_t *)pArgs->source;
    if(source<(const uint8_t *)pArgs->sourceLimit) {
        b=*source++;
        pArgs->source=(const char *)source;
        if(b<=0x7f) {
            return b;
        } else {
            UConverter *cnv=pArgs->converter;
            cnv->toUBytes[0]=b;
            cnv->toULength=1;
            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
            return 0xffff;
        }
    }

    /* no output because of empty input */
    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    return 0xffff;
}

static void
_ASCIIGetUnicodeSet(const UConverter *cnv,
                    const USetAdder *sa,
                    UConverterUnicodeSet which,
                    UErrorCode *pErrorCode) {
    sa->addRange(sa->set, 0, 0x7f);
}

static const UConverterImpl _ASCIIImpl={
    UCNV_US_ASCII,

    NULL,
    NULL,

    NULL,
    NULL,
    NULL,

    _ASCIIToUnicodeWithOffsets,
    _ASCIIToUnicodeWithOffsets,
    _Latin1FromUnicodeWithOffsets,
    _Latin1FromUnicodeWithOffsets,
    _ASCIIGetNextUChar,

    NULL,
    NULL,
    NULL,
    NULL,
    _ASCIIGetUnicodeSet
};

static const UConverterStaticData _ASCIIStaticData={
    sizeof(UConverterStaticData),
    "US-ASCII",
    367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
    { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
    0,
    0,
    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
};

const UConverterSharedData _ASCIIData={
    sizeof(UConverterSharedData), ~((uint32_t) 0),
    NULL, NULL, &_ASCIIStaticData, FALSE, &_ASCIIImpl, 
    0
};

#endif
Commit	Line	Data
b75a7d8f A	1	/*
b75a7d8f A	2	**********************************************************************
374ca955	3	* Copyright (C) 2000-2004, International Business Machines
b75a7d8f A	4	* Corporation and others. All Rights Reserved.
	5	**********************************************************************
	6	* file name: ucnvlat1.cpp
	7	* encoding: US-ASCII
	8	* tab size: 8 (not used)
	9	* indentation:4
	10	*
	11	* created on: 2000feb07
	12	* created by: Markus W. Scherer
	13	*/
	14
	15	#include "unicode/utypes.h"
374ca955 A	16
	17	#if !UCONFIG_NO_CONVERSION
	18
b75a7d8f	19	#include "unicode/ucnv.h"
b75a7d8f A	20	#include "unicode/uset.h"
	21	#include "ucnv_bld.h"
	22	#include "ucnv_cnv.h"
	23
	24	/* control optimizations according to the platform */
	25	#define LATIN1_UNROLL_TO_UNICODE 1
	26	#define LATIN1_UNROLL_FROM_UNICODE 1
	27	#define ASCII_UNROLL_TO_UNICODE 1
	28
	29	/* ISO 8859-1 --------------------------------------------------------------- */
	30
374ca955	31	/* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
b75a7d8f A	32	static void
	33	_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
	34	UErrorCode *pErrorCode) {
	35	const uint8_t *source;
	36	UChar *target;
	37	int32_t targetCapacity, length;
	38	int32_t *offsets;
	39
	40	int32_t sourceIndex;
	41
	42	/* set up the local pointers */
	43	source=(const uint8_t *)pArgs->source;
	44	target=pArgs->target;
73c04bcf	45	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
b75a7d8f A	46	offsets=pArgs->offsets;
	47
	48	sourceIndex=0;
	49
	50	/*
	51	* since the conversion here is 1:1 UChar:uint8_t, we need only one counter
	52	* for the minimum of the sourceLength and targetCapacity
	53	*/
73c04bcf	54	length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
b75a7d8f A	55	if(length<=targetCapacity) {
	56	targetCapacity=length;
	57	} else {
	58	/* target will be full */
	59	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	60	length=targetCapacity;
	61	}
	62
	63	#if LATIN1_UNROLL_TO_UNICODE
	64	if(targetCapacity>=16) {
	65	int32_t count, loops;
	66
	67	loops=count=targetCapacity>>4;
	68	length=targetCapacity&=0xf;
	69	do {
	70	target++=source++;
	71	target++=source++;
	72	target++=source++;
	73	target++=source++;
	74	target++=source++;
	75	target++=source++;
	76	target++=source++;
	77	target++=source++;
	78	target++=source++;
	79	target++=source++;
	80	target++=source++;
	81	target++=source++;
	82	target++=source++;
	83	target++=source++;
	84	target++=source++;
	85	target++=source++;
	86	} while(--count>0);
	87
	88	if(offsets!=NULL) {
	89	do {
	90	*offsets++=sourceIndex++;
	91	*offsets++=sourceIndex++;
	92	*offsets++=sourceIndex++;
	93	*offsets++=sourceIndex++;
	94	*offsets++=sourceIndex++;
	95	*offsets++=sourceIndex++;
	96	*offsets++=sourceIndex++;
	97	*offsets++=sourceIndex++;
	98	*offsets++=sourceIndex++;
	99	*offsets++=sourceIndex++;
	100	*offsets++=sourceIndex++;
	101	*offsets++=sourceIndex++;
	102	*offsets++=sourceIndex++;
	103	*offsets++=sourceIndex++;
	104	*offsets++=sourceIndex++;
	105	*offsets++=sourceIndex++;
	106	} while(--loops>0);
	107	}
	108	}
	109	#endif
	110
	111	/* conversion loop */
	112	while(targetCapacity>0) {
	113	target++=source++;
	114	--targetCapacity;
	115	}
	116
	117	/* write back the updated pointers */
	118	pArgs->source=(const char *)source;
119	pArgs->target=target;
120
121	/* set offsets */
122	if(offsets!=NULL) {
123	while(length>0) {
124	*offsets++=sourceIndex++;
125	--length;
126	}
127	pArgs->offsets=offsets;
128	}
129	}
130
374ca955	131	/* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */
b75a7d8f A	132	static UChar32
	133	_Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
	134	UErrorCode *pErrorCode) {
	135	const uint8_t source=(const uint8_t )pArgs->source;
	136	if(source<(const uint8_t *)pArgs->sourceLimit) {
	137	pArgs->source=(const char *)(source+1);
	138	return *source;
	139	}
	140
	141	/* no output because of empty input */
	142	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
	143	return 0xffff;
	144	}
	145
374ca955	146	/* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */
b75a7d8f A	147	static void
	148	_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
	149	UErrorCode *pErrorCode) {
	150	UConverter *cnv;
374ca955 A	151	const UChar source, sourceLimit;
374ca955 A	152	uint8_t target, oldTarget;
b75a7d8f A	153	int32_t targetCapacity, length;
	154	int32_t *offsets;
	155
374ca955 A	156	UChar32 cp;
374ca955 A	157	UChar c, max;
b75a7d8f A	158
	159	int32_t sourceIndex;
	160
b75a7d8f A	161	/* set up the local pointers */
	162	cnv=pArgs->converter;
	163	source=pArgs->source;
	164	sourceLimit=pArgs->sourceLimit;
374ca955	165	target=oldTarget=(uint8_t *)pArgs->target;
73c04bcf	166	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
b75a7d8f A	167	offsets=pArgs->offsets;
	168
	169	if(cnv->sharedData==&_Latin1Data) {
	170	max=0xff; /* Latin-1 */
	171	} else {
	172	max=0x7f; /* US-ASCII */
	173	}
	174
	175	/* get the converter state from UConverter */
374ca955	176	cp=cnv->fromUChar32;
b75a7d8f A	177
b75a7d8f A	178	/* sourceIndex=-1 if the current character began in the previous buffer */
374ca955	179	sourceIndex= cp==0 ? 0 : -1;
b75a7d8f A	180
	181	/*
	182	* since the conversion here is 1:1 UChar:uint8_t, we need only one counter
	183	* for the minimum of the sourceLength and targetCapacity
	184	*/
73c04bcf	185	length=(int32_t)(sourceLimit-source);
b75a7d8f A	186	if(length<targetCapacity) {
	187	targetCapacity=length;
	188	}
	189
	190	/* conversion loop */
374ca955	191	if(cp!=0 && targetCapacity>0) {
b75a7d8f A	192	goto getTrail;
	193	}
	194
	195	#if LATIN1_UNROLL_FROM_UNICODE
	196	/* unroll the loop with the most common case */
b75a7d8f A	197	if(targetCapacity>=16) {
	198	int32_t count, loops;
	199	UChar u, oredChars;
	200
	201	loops=count=targetCapacity>>4;
	202	do {
	203	oredChars=u=*source++;
	204	*target++=(uint8_t)u;
	205	oredChars\|=u=*source++;
	206	*target++=(uint8_t)u;
	207	oredChars\|=u=*source++;
	208	*target++=(uint8_t)u;
	209	oredChars\|=u=*source++;
	210	*target++=(uint8_t)u;
	211	oredChars\|=u=*source++;
	212	*target++=(uint8_t)u;
	213	oredChars\|=u=*source++;
	214	*target++=(uint8_t)u;
	215	oredChars\|=u=*source++;
	216	*target++=(uint8_t)u;
	217	oredChars\|=u=*source++;
	218	*target++=(uint8_t)u;
	219	oredChars\|=u=*source++;
	220	*target++=(uint8_t)u;
	221	oredChars\|=u=*source++;
	222	*target++=(uint8_t)u;
	223	oredChars\|=u=*source++;
	224	*target++=(uint8_t)u;
	225	oredChars\|=u=*source++;
	226	*target++=(uint8_t)u;
	227	oredChars\|=u=*source++;
	228	*target++=(uint8_t)u;
	229	oredChars\|=u=*source++;
	230	*target++=(uint8_t)u;
	231	oredChars\|=u=*source++;
	232	*target++=(uint8_t)u;
	233	oredChars\|=u=*source++;
	234	*target++=(uint8_t)u;
	235
	236	/* were all 16 entries really valid? */
	237	if(oredChars>max) {
	238	/* no, return to the first of these 16 */
	239	source-=16;
	240	target-=16;
	241	break;
	242	}
	243	} while(--count>0);
	244	count=loops-count;
	245	targetCapacity-=16*count;
	246
	247	if(offsets!=NULL) {
374ca955	248	oldTarget+=16*count;
b75a7d8f A	249	while(count>0) {
	250	*offsets++=sourceIndex++;
	251	*offsets++=sourceIndex++;
	252	*offsets++=sourceIndex++;
	253	*offsets++=sourceIndex++;
	254	*offsets++=sourceIndex++;
	255	*offsets++=sourceIndex++;
	256	*offsets++=sourceIndex++;
	257	*offsets++=sourceIndex++;
	258	*offsets++=sourceIndex++;
	259	*offsets++=sourceIndex++;
	260	*offsets++=sourceIndex++;
	261	*offsets++=sourceIndex++;
	262	*offsets++=sourceIndex++;
	263	*offsets++=sourceIndex++;
	264	*offsets++=sourceIndex++;
	265	*offsets++=sourceIndex++;
	266	--count;
	267	}
	268	}
b75a7d8f A	269	}
	270	#endif
	271
374ca955 A	272	/* conversion loop */
	273	c=0;
	274	while(targetCapacity>0 && (c=*source++)<=max) {
	275	/* convert the Unicode code point */
	276	*target++=(uint8_t)c;
	277	--targetCapacity;
	278	}
	279
	280	if(c>max) {
	281	cp=c;
	282	if(!U_IS_SURROGATE(cp)) {
	283	/* callback(unassigned) */
	284	} else if(U_IS_SURROGATE_LEAD(cp)) {
b75a7d8f	285	getTrail:
374ca955 A	286	if(source<sourceLimit) {
	287	/* test the following code unit */
	288	UChar trail=*source;
	289	if(U16_IS_TRAIL(trail)) {
	290	++source;
	291	cp=U16_GET_SUPPLEMENTARY(cp, trail);
	292	/* this codepage does not map supplementary code points */
	293	/* callback(unassigned) */
b75a7d8f	294	} else {
374ca955 A	295	/* this is an unmatched lead code unit (1st surrogate) */
374ca955 A	296	/* callback(illegal) */
b75a7d8f A	297	}
b75a7d8f A	298	} else {
374ca955 A	299	/* no more input */
	300	cnv->fromUChar32=cp;
	301	goto noMoreInput;
b75a7d8f	302	}
374ca955 A	303	} else {
	304	/* this is an unmatched trail code unit (2nd surrogate) */
	305	/* callback(illegal) */
b75a7d8f	306	}
b75a7d8f	307
374ca955 A	308	*pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
374ca955 A	309	cnv->fromUChar32=cp;
b75a7d8f	310	}
374ca955	311	noMoreInput:
b75a7d8f	312
374ca955	313	/* set offsets since the start */
b75a7d8f	314	if(offsets!=NULL) {
374ca955	315	size_t count=target-oldTarget;
b75a7d8f A	316	while(count>0) {
	317	*offsets++=sourceIndex++;
	318	--count;
	319	}
	320	}
	321
374ca955 A	322	if(U_SUCCESS(pErrorCode) && source<sourceLimit && target>=(uint8_t )pArgs->targetLimit) {
	323	/* target is full */
	324	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
b75a7d8f A	325	}
	326
	327	/* write back the updated pointers */
	328	pArgs->source=source;
	329	pArgs->target=(char *)target;
	330	pArgs->offsets=offsets;
	331	}
	332
	333	static void
	334	_Latin1GetUnicodeSet(const UConverter *cnv,
73c04bcf	335	const USetAdder *sa,
b75a7d8f A	336	UConverterUnicodeSet which,
b75a7d8f A	337	UErrorCode *pErrorCode) {
374ca955	338	sa->addRange(sa->set, 0, 0xff);
b75a7d8f A	339	}
	340
	341	static const UConverterImpl _Latin1Impl={
	342	UCNV_LATIN_1,
	343
	344	NULL,
	345	NULL,
	346
	347	NULL,
	348	NULL,
	349	NULL,
	350
	351	_Latin1ToUnicodeWithOffsets,
	352	_Latin1ToUnicodeWithOffsets,
	353	_Latin1FromUnicodeWithOffsets,
	354	_Latin1FromUnicodeWithOffsets,
	355	_Latin1GetNextUChar,
	356
	357	NULL,
	358	NULL,
	359	NULL,
	360	NULL,
	361	_Latin1GetUnicodeSet
	362	};
	363
	364	static const UConverterStaticData _Latin1StaticData={
	365	sizeof(UConverterStaticData),
	366	"ISO-8859-1",
	367	819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
	368	{ 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
	369	0,
	370	0,
	371	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
	372	};
	373
	374	const UConverterSharedData _Latin1Data={
	375	sizeof(UConverterSharedData), ~((uint32_t) 0),
	376	NULL, NULL, &_Latin1StaticData, FALSE, &_Latin1Impl,
	377	0
	378	};
	379
	380	/* US-ASCII ----------------------------------------------------------------- */
	381
374ca955	382	/* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
b75a7d8f A	383	static void
	384	_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
	385	UErrorCode *pErrorCode) {
374ca955 A	386	const uint8_t source, sourceLimit;
374ca955 A	387	UChar target, oldTarget;
b75a7d8f A	388	int32_t targetCapacity, length;
	389	int32_t *offsets;
	390
	391	int32_t sourceIndex;
	392
374ca955 A	393	uint8_t c;
374ca955 A	394
b75a7d8f A	395	/* set up the local pointers */
	396	source=(const uint8_t *)pArgs->source;
	397	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
374ca955	398	target=oldTarget=pArgs->target;
73c04bcf	399	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
b75a7d8f A	400	offsets=pArgs->offsets;
	401
	402	/* sourceIndex=-1 if the current character began in the previous buffer */
	403	sourceIndex=0;
b75a7d8f A	404
	405	/*
	406	* since the conversion here is 1:1 UChar:uint8_t, we need only one counter
	407	* for the minimum of the sourceLength and targetCapacity
	408	*/
73c04bcf	409	length=(int32_t)(sourceLimit-source);
b75a7d8f A	410	if(length<targetCapacity) {
	411	targetCapacity=length;
	412	}
	413
	414	#if ASCII_UNROLL_TO_UNICODE
	415	/* unroll the loop with the most common case */
b75a7d8f A	416	if(targetCapacity>=16) {
	417	int32_t count, loops;
	418	UChar oredChars;
	419
	420	loops=count=targetCapacity>>4;
	421	do {
	422	oredChars=target++=source++;
	423	oredChars\|=target++=source++;
	424	oredChars\|=target++=source++;
	425	oredChars\|=target++=source++;
	426	oredChars\|=target++=source++;
	427	oredChars\|=target++=source++;
	428	oredChars\|=target++=source++;
	429	oredChars\|=target++=source++;
	430	oredChars\|=target++=source++;
	431	oredChars\|=target++=source++;
	432	oredChars\|=target++=source++;
	433	oredChars\|=target++=source++;
	434	oredChars\|=target++=source++;
	435	oredChars\|=target++=source++;
	436	oredChars\|=target++=source++;
	437	oredChars\|=target++=source++;
	438
	439	/* were all 16 entries really valid? */
	440	if(oredChars>0x7f) {
	441	/* no, return to the first of these 16 */
	442	source-=16;
	443	target-=16;
	444	break;
	445	}
	446	} while(--count>0);
	447	count=loops-count;
	448	targetCapacity-=16*count;
	449
	450	if(offsets!=NULL) {
374ca955	451	oldTarget+=16*count;
b75a7d8f A	452	while(count>0) {
	453	*offsets++=sourceIndex++;
	454	*offsets++=sourceIndex++;
	455	*offsets++=sourceIndex++;
	456	*offsets++=sourceIndex++;
	457	*offsets++=sourceIndex++;
	458	*offsets++=sourceIndex++;
	459	*offsets++=sourceIndex++;
	460	*offsets++=sourceIndex++;
	461	*offsets++=sourceIndex++;
	462	*offsets++=sourceIndex++;
	463	*offsets++=sourceIndex++;
	464	*offsets++=sourceIndex++;
	465	*offsets++=sourceIndex++;
	466	*offsets++=sourceIndex++;
	467	*offsets++=sourceIndex++;
	468	*offsets++=sourceIndex++;
	469	--count;
	470	}
	471	}
	472	}
	473	#endif
	474
	475	/* conversion loop */
374ca955 A	476	c=0;
	477	while(targetCapacity>0 && (c=*source++)<=0x7f) {
	478	*target++=c;
	479	--targetCapacity;
b75a7d8f A	480	}
b75a7d8f A	481
374ca955 A	482	if(c>0x7f) {
	483	/* callback(illegal); copy the current bytes to toUBytes[] */
	484	UConverter *cnv=pArgs->converter;
	485	cnv->toUBytes[0]=c;
	486	cnv->toULength=1;
	487	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	488	} else if(source<sourceLimit && target>=pArgs->targetLimit) {
b75a7d8f A	489	/* target is full */
	490	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	491	}
	492
374ca955	493	/* set offsets since the start */
b75a7d8f	494	if(offsets!=NULL) {
374ca955	495	size_t count=target-oldTarget;
b75a7d8f A	496	while(count>0) {
	497	*offsets++=sourceIndex++;
	498	--count;
	499	}
	500	}
	501
	502	/* write back the updated pointers */
	503	pArgs->source=(const char *)source;
	504	pArgs->target=target;
	505	pArgs->offsets=offsets;
	506	}
	507
374ca955	508	/* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */
b75a7d8f A	509	static UChar32
	510	_ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
	511	UErrorCode *pErrorCode) {
b75a7d8f A	512	const uint8_t *source;
	513	uint8_t b;
	514
b75a7d8f	515	source=(const uint8_t *)pArgs->source;
374ca955	516	if(source<(const uint8_t *)pArgs->sourceLimit) {
b75a7d8f A	517	b=*source++;
	518	pArgs->source=(const char *)source;
	519	if(b<=0x7f) {
	520	return b;
	521	} else {
b75a7d8f	522	UConverter *cnv=pArgs->converter;
374ca955 A	523	cnv->toUBytes[0]=b;
374ca955 A	524	cnv->toULength=1;
b75a7d8f	525	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
374ca955	526	return 0xffff;
b75a7d8f A	527	}
	528	}
	529
374ca955	530	/* no output because of empty input */
b75a7d8f A	531	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
	532	return 0xffff;
	533	}
	534
	535	static void
	536	_ASCIIGetUnicodeSet(const UConverter *cnv,
73c04bcf	537	const USetAdder *sa,
b75a7d8f A	538	UConverterUnicodeSet which,
b75a7d8f A	539	UErrorCode *pErrorCode) {
374ca955	540	sa->addRange(sa->set, 0, 0x7f);
b75a7d8f A	541	}
	542
	543	static const UConverterImpl _ASCIIImpl={
	544	UCNV_US_ASCII,
	545
	546	NULL,
	547	NULL,
	548
	549	NULL,
	550	NULL,
	551	NULL,
	552
	553	_ASCIIToUnicodeWithOffsets,
	554	_ASCIIToUnicodeWithOffsets,
	555	_Latin1FromUnicodeWithOffsets,
	556	_Latin1FromUnicodeWithOffsets,
	557	_ASCIIGetNextUChar,
	558
	559	NULL,
	560	NULL,
	561	NULL,
	562	NULL,
	563	_ASCIIGetUnicodeSet
	564	};
	565
	566	static const UConverterStaticData _ASCIIStaticData={
	567	sizeof(UConverterStaticData),
	568	"US-ASCII",
	569	367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
	570	{ 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
	571	0,
	572	0,
	573	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
	574	};
	575
	576	const UConverterSharedData _ASCIIData={
	577	sizeof(UConverterSharedData), ~((uint32_t) 0),
	578	NULL, NULL, &_ASCIIStaticData, FALSE, &_ASCIIImpl,
	579	0
	580	};
374ca955 A	581
374ca955 A	582	#endif