[apple/icu.git] / icuSources / common / ucnv_cb.cpp

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (C) 2000-2006, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
 *  ucnv_cb.c:
 *  External APIs for the ICU's codeset conversion library
 *  Helena Shih
 *
 * Modification History:
 *
 *   Date        Name        Description
 *   7/28/2000   srl         Implementation
 */

/**
 * @name Character Conversion C API
 *
 */

#include "unicode/utypes.h"

#if !UCONFIG_NO_CONVERSION

#include "unicode/ucnv_cb.h"
#include "ucnv_bld.h"
#include "ucnv_cnv.h"
#include "cmemory.h"

/* need to update the offsets when the target moves. */
/* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly
if you don't use ucnv_cbXXX functions.  Make sure you don't use the same callback within
the same call stack if the complexity arises. */
U_CAPI void  U_EXPORT2
ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
                       const char* source,
                       int32_t length,
                       int32_t offsetIndex,
                       UErrorCode * err)
{
    if(U_FAILURE(*err)) {
        return;
    }

    ucnv_fromUWriteBytes(
        args->converter,
        source, length,
        &args->target, args->targetLimit,
        &args->offsets, offsetIndex,
        err);
}

U_CAPI void  U_EXPORT2
ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
                             const UChar** source,
                             const UChar*  sourceLimit,
                             int32_t offsetIndex,
                             UErrorCode * err)
{
    /*
    This is a fun one.  Recursion can occur - we're basically going to
    just retry shoving data through the same converter. Note, if you got
    here through some kind of invalid sequence, you maybe should emit a
    reset sequence of some kind and/or call ucnv_reset().  Since this
    IS an actual conversion, take care that you've changed the callback
    or the data, or you'll get an infinite loop.

    Please set the err value to something reasonable before calling
    into this.
    */

    char *oldTarget;

    if(U_FAILURE(*err))
    {
        return;
    }

    oldTarget = args->target;

    ucnv_fromUnicode(args->converter,
        &args->target,
        args->targetLimit,
        source,
        sourceLimit,
        NULL, /* no offsets */
        FALSE, /* no flush */
        err);

    if(args->offsets)
    {
        while (args->target != oldTarget)  /* if it moved at all.. */
        {
            *(args->offsets)++ = offsetIndex;
            oldTarget++;
        }
    }

    /*
    Note, if you did something like used a Stop subcallback, things would get interesting.
    In fact, here's where we want to return the partially consumed in-source!
    */
    if(*err == U_BUFFER_OVERFLOW_ERROR)
    /* && (*source < sourceLimit && args->target >= args->targetLimit)
    -- S. Hrcek */
    {
        /* Overflowed the target.  Now, we'll write into the charErrorBuffer.
        It's a fixed size. If we overflow it... Hmm */
        char *newTarget;
        const char *newTargetLimit;
        UErrorCode err2 = U_ZERO_ERROR;

        int8_t errBuffLen;

        errBuffLen  = args->converter->charErrorBufferLength;

        /* start the new target at the first free slot in the errbuff.. */
        newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen);

        newTargetLimit = (char *)(args->converter->charErrorBuffer +
            sizeof(args->converter->charErrorBuffer));

        if(newTarget >= newTargetLimit)
        {
            *err = U_INTERNAL_PROGRAM_ERROR;
            return;
        }

        /* We're going to tell the converter that the errbuff len is empty.
        This prevents the existing errbuff from being 'flushed' out onto
        itself.  If the errbuff is needed by the converter this time,
        we're hosed - we're out of space! */

        args->converter->charErrorBufferLength = 0;

        ucnv_fromUnicode(args->converter,
                         &newTarget,
                         newTargetLimit,
                         source,
                         sourceLimit,
                         NULL,
                         FALSE,
                         &err2);

        /* We can go ahead and overwrite the  length here. We know just how
        to recalculate it. */

        args->converter->charErrorBufferLength = (int8_t)(
            newTarget - (char*)args->converter->charErrorBuffer);

        if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR))
        {
            /* now we're REALLY in trouble.
            Internal program error - callback shouldn't have written this much
            data!
            */
            *err = U_INTERNAL_PROGRAM_ERROR;
            return;
        }
        /*else {*/
            /* sub errs could be invalid/truncated/illegal chars or w/e.
            These might want to be passed on up.. But the problem is, we already
            need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these
            other errs.. */

            /*
            if(U_FAILURE(err2))
            ??
            */
        /*}*/
    }
}

U_CAPI void  U_EXPORT2
ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
                           int32_t offsetIndex,
                           UErrorCode * err)
{
    UConverter *converter;
    int32_t length;

    if(U_FAILURE(*err)) {
        return;
    }
    converter = args->converter;
    length = converter->subCharLen;

    if(length == 0) {
        return;
    }

    if(length < 0) {
        /*
         * Write/convert the substitution string. Its real length is -length.
         * Unlike the escape callback, we need not change the converter's
         * callback function because ucnv_setSubstString() verified that
         * the string can be converted, so we will not get a conversion error
         * and will not recurse.
         * At worst we should get a U_BUFFER_OVERFLOW_ERROR.
         */
        const UChar *source = (const UChar *)converter->subChars;
        ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err);
        return;
    }

    if(converter->sharedData->impl->writeSub!=NULL) {
        converter->sharedData->impl->writeSub(args, offsetIndex, err);
    }
    else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) {
        /*
        TODO: Is this untestable because the MBCS converter has a writeSub function to call
        and the other converters don't use subChar1?
        */
        ucnv_cbFromUWriteBytes(args,
                               (const char *)&converter->subChar1, 1,
                               offsetIndex, err);
    }
    else {
        ucnv_cbFromUWriteBytes(args,
                               (const char *)converter->subChars, length,
                               offsetIndex, err);
    }
}

U_CAPI void  U_EXPORT2
ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
                            const UChar* source,
                            int32_t length,
                            int32_t offsetIndex,
                            UErrorCode * err)
{
    if(U_FAILURE(*err)) {
        return;
    }

    ucnv_toUWriteUChars(
        args->converter,
        source, length,
        &args->target, args->targetLimit,
        &args->offsets, offsetIndex,
        err);
}

U_CAPI void  U_EXPORT2
ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
                         int32_t offsetIndex,
                       UErrorCode * err)
{
    static const UChar kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD;

    /* could optimize this case, just one uchar */
    if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) {
        ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err);
    } else {
        ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err);
    }
}

#endif
Commit	Line	Data
f3c0d7a5 A	1	// © 2016 and later: Unicode, Inc. and others.
f3c0d7a5 A	2	// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f A	3	/*
b75a7d8f A	4	**********************************************************************
73c04bcf	5	* Copyright (C) 2000-2006, International Business Machines
b75a7d8f A	6	* Corporation and others. All Rights Reserved.
	7	**********************************************************************
	8	* ucnv_cb.c:
	9	* External APIs for the ICU's codeset conversion library
	10	* Helena Shih
	11	*
	12	* Modification History:
	13	*
	14	* Date Name Description
	15	* 7/28/2000 srl Implementation
	16	*/
	17
	18	/**
	19	* @name Character Conversion C API
	20	*
	21	*/
	22
	23	#include "unicode/utypes.h"
374ca955 A	24
	25	#if !UCONFIG_NO_CONVERSION
	26
b75a7d8f A	27	#include "unicode/ucnv_cb.h"
	28	#include "ucnv_bld.h"
	29	#include "ucnv_cnv.h"
	30	#include "cmemory.h"
	31
	32	/* need to update the offsets when the target moves. */
	33	/* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly
	34	if you don't use ucnv_cbXXX functions. Make sure you don't use the same callback within
	35	the same call stack if the complexity arises. */
	36	U_CAPI void U_EXPORT2
	37	ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
	38	const char* source,
	39	int32_t length,
	40	int32_t offsetIndex,
	41	UErrorCode * err)
	42	{
374ca955 A	43	if(U_FAILURE(*err)) {
374ca955 A	44	return;
b75a7d8f	45	}
b75a7d8f	46
374ca955 A	47	ucnv_fromUWriteBytes(
	48	args->converter,
	49	source, length,
	50	&args->target, args->targetLimit,
	51	&args->offsets, offsetIndex,
	52	err);
b75a7d8f A	53	}
	54
	55	U_CAPI void U_EXPORT2
	56	ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
	57	const UChar** source,
	58	const UChar* sourceLimit,
	59	int32_t offsetIndex,
	60	UErrorCode * err)
	61	{
	62	/*
	63	This is a fun one. Recursion can occur - we're basically going to
	64	just retry shoving data through the same converter. Note, if you got
	65	here through some kind of invalid sequence, you maybe should emit a
	66	reset sequence of some kind and/or call ucnv_reset(). Since this
	67	IS an actual conversion, take care that you've changed the callback
	68	or the data, or you'll get an infinite loop.
	69
	70	Please set the err value to something reasonable before calling
	71	into this.
	72	*/
	73
	74	char *oldTarget;
	75
	76	if(U_FAILURE(*err))
	77	{
	78	return;
	79	}
	80
	81	oldTarget = args->target;
	82
	83	ucnv_fromUnicode(args->converter,
	84	&args->target,
	85	args->targetLimit,
	86	source,
	87	sourceLimit,
	88	NULL, /* no offsets */
	89	FALSE, /* no flush */
	90	err);
	91
	92	if(args->offsets)
	93	{
	94	while (args->target != oldTarget) /* if it moved at all.. */
	95	{
	96	*(args->offsets)++ = offsetIndex;
	97	oldTarget++;
	98	}
	99	}
	100
	101	/*
	102	Note, if you did something like used a Stop subcallback, things would get interesting.
	103	In fact, here's where we want to return the partially consumed in-source!
	104	*/
	105	if(*err == U_BUFFER_OVERFLOW_ERROR)
	106	/* && (*source < sourceLimit && args->target >= args->targetLimit)
	107	-- S. Hrcek */
	108	{
73c04bcf	109	/* Overflowed the target. Now, we'll write into the charErrorBuffer.
b75a7d8f A	110	It's a fixed size. If we overflow it... Hmm */
	111	char *newTarget;
	112	const char *newTargetLimit;
	113	UErrorCode err2 = U_ZERO_ERROR;
	114
	115	int8_t errBuffLen;
	116
	117	errBuffLen = args->converter->charErrorBufferLength;
	118
	119	/* start the new target at the first free slot in the errbuff.. */
	120	newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen);
	121
	122	newTargetLimit = (char *)(args->converter->charErrorBuffer +
	123	sizeof(args->converter->charErrorBuffer));
	124
	125	if(newTarget >= newTargetLimit)
	126	{
	127	*err = U_INTERNAL_PROGRAM_ERROR;
	128	return;
	129	}
	130
73c04bcf A	131	/* We're going to tell the converter that the errbuff len is empty.
	132	This prevents the existing errbuff from being 'flushed' out onto
	133	itself. If the errbuff is needed by the converter this time,
	134	we're hosed - we're out of space! */
	135
	136	args->converter->charErrorBufferLength = 0;
	137
	138	ucnv_fromUnicode(args->converter,
	139	&newTarget,
	140	newTargetLimit,
	141	source,
	142	sourceLimit,
	143	NULL,
	144	FALSE,
	145	&err2);
	146
	147	/* We can go ahead and overwrite the length here. We know just how
	148	to recalculate it. */
	149
	150	args->converter->charErrorBufferLength = (int8_t)(
	151	newTarget - (char*)args->converter->charErrorBuffer);
	152
	153	if((newTarget >= newTargetLimit) \|\| (err2 == U_BUFFER_OVERFLOW_ERROR))
	154	{
	155	/* now we're REALLY in trouble.
	156	Internal program error - callback shouldn't have written this much
	157	data!
	158	*/
	159	*err = U_INTERNAL_PROGRAM_ERROR;
	160	return;
	161	}
	162	/else {/
	163	/* sub errs could be invalid/truncated/illegal chars or w/e.
	164	These might want to be passed on up.. But the problem is, we already
	165	need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these
	166	other errs.. */
	167
	168	/*
	169	if(U_FAILURE(err2))
	170	??
	171	*/
	172	/}/
b75a7d8f A	173	}
	174	}
	175
	176	U_CAPI void U_EXPORT2
	177	ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
	178	int32_t offsetIndex,
	179	UErrorCode * err)
	180	{
73c04bcf A	181	UConverter *converter;
	182	int32_t length;
	183
b75a7d8f A	184	if(U_FAILURE(*err)) {
	185	return;
	186	}
73c04bcf A	187	converter = args->converter;
73c04bcf A	188	length = converter->subCharLen;
b75a7d8f	189
73c04bcf A	190	if(length == 0) {
	191	return;
	192	}
	193
	194	if(length < 0) {
	195	/*
	196	* Write/convert the substitution string. Its real length is -length.
	197	* Unlike the escape callback, we need not change the converter's
	198	* callback function because ucnv_setSubstString() verified that
	199	* the string can be converted, so we will not get a conversion error
	200	* and will not recurse.
	201	* At worst we should get a U_BUFFER_OVERFLOW_ERROR.
	202	*/
	203	const UChar source = (const UChar )converter->subChars;
	204	ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err);
	205	return;
	206	}
	207
	208	if(converter->sharedData->impl->writeSub!=NULL) {
	209	converter->sharedData->impl->writeSub(args, offsetIndex, err);
	210	}
	211	else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) {
	212	/*
	213	TODO: Is this untestable because the MBCS converter has a writeSub function to call
	214	and the other converters don't use subChar1?
	215	*/
b75a7d8f	216	ucnv_cbFromUWriteBytes(args,
73c04bcf	217	(const char *)&converter->subChar1, 1,
b75a7d8f	218	offsetIndex, err);
73c04bcf A	219	}
73c04bcf A	220	else {
b75a7d8f	221	ucnv_cbFromUWriteBytes(args,
73c04bcf	222	(const char *)converter->subChars, length,
b75a7d8f A	223	offsetIndex, err);
	224	}
	225	}
	226
	227	U_CAPI void U_EXPORT2
	228	ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
	229	const UChar* source,
	230	int32_t length,
	231	int32_t offsetIndex,
	232	UErrorCode * err)
	233	{
374ca955	234	if(U_FAILURE(*err)) {
b75a7d8f A	235	return;
	236	}
	237
374ca955 A	238	ucnv_toUWriteUChars(
	239	args->converter,
	240	source, length,
	241	&args->target, args->targetLimit,
	242	&args->offsets, offsetIndex,
	243	err);
b75a7d8f A	244	}
	245
	246	U_CAPI void U_EXPORT2
	247	ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
	248	int32_t offsetIndex,
	249	UErrorCode * err)
	250	{
	251	static const UChar kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD;
	252
	253	/* could optimize this case, just one uchar */
	254	if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) {
	255	ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err);
	256	} else {
	257	ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err);
	258	}
	259	}
374ca955 A	260
374ca955 A	261	#endif