[apple/icu.git] / icuSources / common / unistr_cnv.cpp

/*
*******************************************************************************
*
*   Copyright (C) 1999-2010, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  unistr_cnv.cpp
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:2
*
*   created on: 2004aug19
*   created by: Markus W. Scherer
*
*   Character conversion functions moved here from unistr.cpp
*/

#include "unicode/utypes.h"

#if !UCONFIG_NO_CONVERSION

#include "unicode/putil.h"
#include "cstring.h"
#include "cmemory.h"
#include "unicode/ustring.h"
#include "unicode/unistr.h"
#include "unicode/ucnv.h"
#include "ucnv_imp.h"
#include "putilimp.h"
#include "ustr_cnv.h"
#include "ustr_imp.h"

U_NAMESPACE_BEGIN

//========================================
// Constructors
//========================================

#if !U_CHARSET_IS_UTF8

UnicodeString::UnicodeString(const char *codepageData)
  : fShortLength(0),
    fFlags(kShortString)
{
    if(codepageData != 0) {
        doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), 0);
    }
}

UnicodeString::UnicodeString(const char *codepageData,
                             int32_t dataLength)
  : fShortLength(0),
    fFlags(kShortString)
{
    if(codepageData != 0) {
        doCodepageCreate(codepageData, dataLength, 0);
    }
}

// else see unistr.cpp
#endif

UnicodeString::UnicodeString(const char *codepageData,
                             const char *codepage)
  : fShortLength(0),
    fFlags(kShortString)
{
    if(codepageData != 0) {
        doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), codepage);
    }
}

UnicodeString::UnicodeString(const char *codepageData,
                             int32_t dataLength,
                             const char *codepage)
  : fShortLength(0),
    fFlags(kShortString)
{
    if(codepageData != 0) {
        doCodepageCreate(codepageData, dataLength, codepage);
    }
}

UnicodeString::UnicodeString(const char *src, int32_t srcLength,
                             UConverter *cnv,
                             UErrorCode &errorCode)
  : fShortLength(0),
    fFlags(kShortString)
{
    if(U_SUCCESS(errorCode)) {
        // check arguments
        if(src==NULL) {
            // treat as an empty string, do nothing more
        } else if(srcLength<-1) {
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
        } else {
            // get input length
            if(srcLength==-1) {
                srcLength=(int32_t)uprv_strlen(src);
            }
            if(srcLength>0) {
                if(cnv!=0) {
                    // use the provided converter
                    ucnv_resetToUnicode(cnv);
                    doCodepageCreate(src, srcLength, cnv, errorCode);
                } else {
                    // use the default converter
                    cnv=u_getDefaultConverter(&errorCode);
                    doCodepageCreate(src, srcLength, cnv, errorCode);
                    u_releaseDefaultConverter(cnv);
                }
            }
        }

        if(U_FAILURE(errorCode)) {
            setToBogus();
        }
    }
}

//========================================
// Codeset conversion
//========================================

#if !U_CHARSET_IS_UTF8

int32_t
UnicodeString::extract(int32_t start,
                       int32_t length,
                       char *target,
                       uint32_t dstSize) const {
    return extract(start, length, target, dstSize, 0);
}

// else see unistr.cpp
#endif

int32_t
UnicodeString::extract(int32_t start,
                       int32_t length,
                       char *target,
                       uint32_t dstSize,
                       const char *codepage) const
{
    // if the arguments are illegal, then do nothing
    if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
        return 0;
    }

    // pin the indices to legal values
    pinIndices(start, length);

    // We need to cast dstSize to int32_t for all subsequent code.
    // I don't know why the API was defined with uint32_t but we are stuck with it.
    // Also, dstSize==0xffffffff means "unlimited" but if we use target+dstSize
    // as a limit in some functions, it may wrap around and yield a pointer
    // that compares less-than target.
    int32_t capacity;
    if(dstSize < 0x7fffffff) {
        // Assume that the capacity is real and a limit pointer won't wrap around.
        capacity = (int32_t)dstSize;
    } else {
        // Pin the capacity so that a limit pointer does not wrap around.
        char *targetLimit = (char *)U_MAX_PTR(target);
        // U_MAX_PTR(target) returns a targetLimit that is at most 0x7fffffff
        // greater than target and does not wrap around the top of the address space.
        capacity = (int32_t)(targetLimit - target);
    }

    // create the converter
    UConverter *converter;
    UErrorCode status = U_ZERO_ERROR;

    // just write the NUL if the string length is 0
    if(length == 0) {
        return u_terminateChars(target, capacity, 0, &status);
    }

    // if the codepage is the default, use our cache
    // if it is an empty string, then use the "invariant character" conversion
    if (codepage == 0) {
        const char *defaultName = ucnv_getDefaultName();
        if(UCNV_FAST_IS_UTF8(defaultName)) {
            return toUTF8(start, length, target, capacity);
        }
        converter = u_getDefaultConverter(&status);
    } else if (*codepage == 0) {
        // use the "invariant characters" conversion
        int32_t destLength;
        if(length <= capacity) {
            destLength = length;
        } else {
            destLength = capacity;
        }
        u_UCharsToChars(getArrayStart() + start, target, destLength);
        return u_terminateChars(target, capacity, length, &status);
    } else {
        converter = ucnv_open(codepage, &status);
    }

    length = doExtract(start, length, target, capacity, converter, status);

    // close the converter
    if (codepage == 0) {
        u_releaseDefaultConverter(converter);
    } else {
        ucnv_close(converter);
    }

    return length;
}

int32_t
UnicodeString::extract(char *dest, int32_t destCapacity,
                       UConverter *cnv,
                       UErrorCode &errorCode) const
{
    if(U_FAILURE(errorCode)) {
        return 0;
    }

    if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    // nothing to do?
    if(isEmpty()) {
        return u_terminateChars(dest, destCapacity, 0, &errorCode);
    }

    // get the converter
    UBool isDefaultConverter;
    if(cnv==0) {
        isDefaultConverter=TRUE;
        cnv=u_getDefaultConverter(&errorCode);
        if(U_FAILURE(errorCode)) {
            return 0;
        }
    } else {
        isDefaultConverter=FALSE;
        ucnv_resetFromUnicode(cnv);
    }

    // convert
    int32_t len=doExtract(0, length(), dest, destCapacity, cnv, errorCode);

    // release the converter
    if(isDefaultConverter) {
        u_releaseDefaultConverter(cnv);
    }

    return len;
}

int32_t
UnicodeString::doExtract(int32_t start, int32_t length,
                         char *dest, int32_t destCapacity,
                         UConverter *cnv,
                         UErrorCode &errorCode) const
{
    if(U_FAILURE(errorCode)) {
        if(destCapacity!=0) {
            *dest=0;
        }
        return 0;
    }

    const UChar *src=getArrayStart()+start, *srcLimit=src+length;
    char *originalDest=dest;
    const char *destLimit;

    if(destCapacity==0) {
        destLimit=dest=0;
    } else if(destCapacity==-1) {
        // Pin the limit to U_MAX_PTR if the "magic" destCapacity is used.
        destLimit=(char*)U_MAX_PTR(dest);
        // for NUL-termination, translate into highest int32_t
        destCapacity=0x7fffffff;
    } else {
        destLimit=dest+destCapacity;
    }

    // perform the conversion
    ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
    length=(int32_t)(dest-originalDest);

    // if an overflow occurs, then get the preflighting length
    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
        char buffer[1024];

        destLimit=buffer+sizeof(buffer);
        do {
            dest=buffer;
            errorCode=U_ZERO_ERROR;
            ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
            length+=(int32_t)(dest-buffer);
        } while(errorCode==U_BUFFER_OVERFLOW_ERROR);
    }

    return u_terminateChars(originalDest, destCapacity, length, &errorCode);
}

void
UnicodeString::doCodepageCreate(const char *codepageData,
                                int32_t dataLength,
                                const char *codepage)
{
    // if there's nothing to convert, do nothing
    if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
        return;
    }
    if(dataLength == -1) {
        dataLength = (int32_t)uprv_strlen(codepageData);
    }

    UErrorCode status = U_ZERO_ERROR;

    // create the converter
    // if the codepage is the default, use our cache
    // if it is an empty string, then use the "invariant character" conversion
    UConverter *converter;
    if (codepage == 0) {
        const char *defaultName = ucnv_getDefaultName();
        if(UCNV_FAST_IS_UTF8(defaultName)) {
            setToUTF8(StringPiece(codepageData, dataLength));
            return;
        }
        converter = u_getDefaultConverter(&status);
    } else if(*codepage == 0) {
        // use the "invariant characters" conversion
        if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) {
            u_charsToUChars(codepageData, getArrayStart(), dataLength);
            setLength(dataLength);
        } else {
            setToBogus();
        }
        return;
    } else {
        converter = ucnv_open(codepage, &status);
    }

    // if we failed, set the appropriate flags and return
    if(U_FAILURE(status)) {
        setToBogus();
        return;
    }

    // perform the conversion
    doCodepageCreate(codepageData, dataLength, converter, status);
    if(U_FAILURE(status)) {
        setToBogus();
    }

    // close the converter
    if(codepage == 0) {
        u_releaseDefaultConverter(converter);
    } else {
        ucnv_close(converter);
    }
}

void
UnicodeString::doCodepageCreate(const char *codepageData,
                                int32_t dataLength,
                                UConverter *converter,
                                UErrorCode &status)
{
    if(U_FAILURE(status)) {
        return;
    }

    // set up the conversion parameters
    const char *mySource     = codepageData;
    const char *mySourceEnd  = mySource + dataLength;
    UChar *array, *myTarget;

    // estimate the size needed:
    int32_t arraySize;
    if(dataLength <= US_STACKBUF_SIZE) {
        // try to use the stack buffer
        arraySize = US_STACKBUF_SIZE;
    } else {
        // 1.25 UChar's per source byte should cover most cases
        arraySize = dataLength + (dataLength >> 2);
    }

    // we do not care about the current contents
    UBool doCopyArray = FALSE;
    for(;;) {
        if(!cloneArrayIfNeeded(arraySize, arraySize, doCopyArray)) {
            setToBogus();
            break;
        }

        // perform the conversion
        array = getArrayStart();
        myTarget = array + length();
        ucnv_toUnicode(converter, &myTarget,  array + getCapacity(),
            &mySource, mySourceEnd, 0, TRUE, &status);

        // update the conversion parameters
        setLength((int32_t)(myTarget - array));

        // allocate more space and copy data, if needed
        if(status == U_BUFFER_OVERFLOW_ERROR) {
            // reset the error code
            status = U_ZERO_ERROR;

            // keep the previous conversion results
            doCopyArray = TRUE;

            // estimate the new size needed, larger than before
            // try 2 UChar's per remaining source byte
            arraySize = (int32_t)(length() + 2 * (mySourceEnd - mySource));
        } else {
            break;
        }
    }
}

U_NAMESPACE_END

#endif
Commit	Line	Data
374ca955 A	1	/*
	2	*******************************************************************************
	3	*
729e4ab9	4	* Copyright (C) 1999-2010, International Business Machines
374ca955 A	5	* Corporation and others. All Rights Reserved.
	6	*
	7	*******************************************************************************
	8	* file name: unistr_cnv.cpp
	9	* encoding: US-ASCII
	10	* tab size: 8 (not used)
	11	* indentation:2
	12	*
	13	* created on: 2004aug19
	14	* created by: Markus W. Scherer
	15	*
	16	* Character conversion functions moved here from unistr.cpp
	17	*/
	18
	19	#include "unicode/utypes.h"
	20
	21	#if !UCONFIG_NO_CONVERSION
	22
	23	#include "unicode/putil.h"
	24	#include "cstring.h"
	25	#include "cmemory.h"
	26	#include "unicode/ustring.h"
	27	#include "unicode/unistr.h"
	28	#include "unicode/ucnv.h"
729e4ab9	29	#include "ucnv_imp.h"
374ca955 A	30	#include "putilimp.h"
	31	#include "ustr_cnv.h"
	32	#include "ustr_imp.h"
	33
	34	U_NAMESPACE_BEGIN
	35
	36	//========================================
	37	// Constructors
	38	//========================================
	39
729e4ab9 A	40	#if !U_CHARSET_IS_UTF8
	41
	42	UnicodeString::UnicodeString(const char *codepageData)
	43	: fShortLength(0),
	44	fFlags(kShortString)
	45	{
	46	if(codepageData != 0) {
	47	doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), 0);
	48	}
	49	}
	50
	51	UnicodeString::UnicodeString(const char *codepageData,
	52	int32_t dataLength)
	53	: fShortLength(0),
	54	fFlags(kShortString)
	55	{
	56	if(codepageData != 0) {
	57	doCodepageCreate(codepageData, dataLength, 0);
	58	}
	59	}
	60
	61	// else see unistr.cpp
	62	#endif
	63
374ca955 A	64	UnicodeString::UnicodeString(const char *codepageData,
374ca955 A	65	const char *codepage)
46f4442e	66	: fShortLength(0),
374ca955 A	67	fFlags(kShortString)
374ca955 A	68	{
73c04bcf A	69	if(codepageData != 0) {
	70	doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), codepage);
	71	}
374ca955 A	72	}
374ca955 A	73
374ca955 A	74	UnicodeString::UnicodeString(const char *codepageData,
	75	int32_t dataLength,
	76	const char *codepage)
46f4442e	77	: fShortLength(0),
374ca955 A	78	fFlags(kShortString)
374ca955 A	79	{
73c04bcf A	80	if(codepageData != 0) {
	81	doCodepageCreate(codepageData, dataLength, codepage);
	82	}
374ca955 A	83	}
	84
	85	UnicodeString::UnicodeString(const char *src, int32_t srcLength,
	86	UConverter *cnv,
	87	UErrorCode &errorCode)
46f4442e	88	: fShortLength(0),
374ca955 A	89	fFlags(kShortString)
374ca955 A	90	{
73c04bcf A	91	if(U_SUCCESS(errorCode)) {
	92	// check arguments
	93	if(src==NULL) {
	94	// treat as an empty string, do nothing more
	95	} else if(srcLength<-1) {
	96	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
374ca955	97	} else {
73c04bcf A	98	// get input length
	99	if(srcLength==-1) {
	100	srcLength=(int32_t)uprv_strlen(src);
	101	}
	102	if(srcLength>0) {
	103	if(cnv!=0) {
	104	// use the provided converter
	105	ucnv_resetToUnicode(cnv);
	106	doCodepageCreate(src, srcLength, cnv, errorCode);
	107	} else {
	108	// use the default converter
	109	cnv=u_getDefaultConverter(&errorCode);
	110	doCodepageCreate(src, srcLength, cnv, errorCode);
	111	u_releaseDefaultConverter(cnv);
	112	}
	113	}
374ca955	114	}
374ca955	115
73c04bcf A	116	if(U_FAILURE(errorCode)) {
	117	setToBogus();
	118	}
374ca955	119	}
374ca955 A	120	}
	121
	122	//========================================
	123	// Codeset conversion
	124	//========================================
729e4ab9 A	125
	126	#if !U_CHARSET_IS_UTF8
	127
	128	int32_t
	129	UnicodeString::extract(int32_t start,
	130	int32_t length,
	131	char *target,
	132	uint32_t dstSize) const {
	133	return extract(start, length, target, dstSize, 0);
	134	}
	135
	136	// else see unistr.cpp
	137	#endif
	138
374ca955 A	139	int32_t
	140	UnicodeString::extract(int32_t start,
	141	int32_t length,
	142	char *target,
	143	uint32_t dstSize,
	144	const char *codepage) const
	145	{
73c04bcf A	146	// if the arguments are illegal, then do nothing
	147	if(/dstSize < 0 \|\| /(dstSize > 0 && target == 0)) {
	148	return 0;
	149	}
	150
	151	// pin the indices to legal values
	152	pinIndices(start, length);
	153
729e4ab9 A	154	// We need to cast dstSize to int32_t for all subsequent code.
	155	// I don't know why the API was defined with uint32_t but we are stuck with it.
	156	// Also, dstSize==0xffffffff means "unlimited" but if we use target+dstSize
	157	// as a limit in some functions, it may wrap around and yield a pointer
	158	// that compares less-than target.
	159	int32_t capacity;
	160	if(dstSize < 0x7fffffff) {
	161	// Assume that the capacity is real and a limit pointer won't wrap around.
	162	capacity = (int32_t)dstSize;
	163	} else {
	164	// Pin the capacity so that a limit pointer does not wrap around.
	165	char targetLimit = (char )U_MAX_PTR(target);
	166	// U_MAX_PTR(target) returns a targetLimit that is at most 0x7fffffff
	167	// greater than target and does not wrap around the top of the address space.
	168	capacity = (int32_t)(targetLimit - target);
	169	}
	170
73c04bcf A	171	// create the converter
	172	UConverter *converter;
	173	UErrorCode status = U_ZERO_ERROR;
	174
	175	// just write the NUL if the string length is 0
	176	if(length == 0) {
729e4ab9	177	return u_terminateChars(target, capacity, 0, &status);
73c04bcf A	178	}
	179
	180	// if the codepage is the default, use our cache
	181	// if it is an empty string, then use the "invariant character" conversion
	182	if (codepage == 0) {
729e4ab9 A	183	const char *defaultName = ucnv_getDefaultName();
	184	if(UCNV_FAST_IS_UTF8(defaultName)) {
	185	return toUTF8(start, length, target, capacity);
	186	}
73c04bcf A	187	converter = u_getDefaultConverter(&status);
	188	} else if (*codepage == 0) {
	189	// use the "invariant characters" conversion
	190	int32_t destLength;
729e4ab9	191	if(length <= capacity) {
73c04bcf A	192	destLength = length;
73c04bcf A	193	} else {
729e4ab9	194	destLength = capacity;
73c04bcf A	195	}
73c04bcf A	196	u_UCharsToChars(getArrayStart() + start, target, destLength);
729e4ab9	197	return u_terminateChars(target, capacity, length, &status);
374ca955	198	} else {
73c04bcf	199	converter = ucnv_open(codepage, &status);
374ca955	200	}
73c04bcf	201
729e4ab9	202	length = doExtract(start, length, target, capacity, converter, status);
73c04bcf A	203
	204	// close the converter
	205	if (codepage == 0) {
	206	u_releaseDefaultConverter(converter);
	207	} else {
	208	ucnv_close(converter);
	209	}
	210
	211	return length;
374ca955 A	212	}
	213
	214	int32_t
	215	UnicodeString::extract(char *dest, int32_t destCapacity,
	216	UConverter *cnv,
73c04bcf A	217	UErrorCode &errorCode) const
73c04bcf A	218	{
374ca955	219	if(U_FAILURE(errorCode)) {
73c04bcf	220	return 0;
374ca955	221	}
374ca955	222
73c04bcf A	223	if(isBogus() \|\| destCapacity<0 \|\| (destCapacity>0 && dest==0)) {
	224	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
	225	return 0;
	226	}
374ca955	227
73c04bcf	228	// nothing to do?
46f4442e	229	if(isEmpty()) {
73c04bcf A	230	return u_terminateChars(dest, destCapacity, 0, &errorCode);
73c04bcf A	231	}
374ca955	232
73c04bcf A	233	// get the converter
	234	UBool isDefaultConverter;
	235	if(cnv==0) {
	236	isDefaultConverter=TRUE;
	237	cnv=u_getDefaultConverter(&errorCode);
	238	if(U_FAILURE(errorCode)) {
	239	return 0;
	240	}
	241	} else {
	242	isDefaultConverter=FALSE;
	243	ucnv_resetFromUnicode(cnv);
	244	}
	245
	246	// convert
46f4442e	247	int32_t len=doExtract(0, length(), dest, destCapacity, cnv, errorCode);
73c04bcf A	248
	249	// release the converter
	250	if(isDefaultConverter) {
	251	u_releaseDefaultConverter(cnv);
	252	}
	253
46f4442e	254	return len;
374ca955 A	255	}
	256
	257	int32_t
	258	UnicodeString::doExtract(int32_t start, int32_t length,
	259	char *dest, int32_t destCapacity,
	260	UConverter *cnv,
73c04bcf A	261	UErrorCode &errorCode) const
	262	{
	263	if(U_FAILURE(errorCode)) {
	264	if(destCapacity!=0) {
	265	*dest=0;
	266	}
	267	return 0;
	268	}
	269
46f4442e	270	const UChar src=getArrayStart()+start, srcLimit=src+length;
73c04bcf A	271	char *originalDest=dest;
	272	const char *destLimit;
	273
	274	if(destCapacity==0) {
	275	destLimit=dest=0;
	276	} else if(destCapacity==-1) {
	277	// Pin the limit to U_MAX_PTR if the "magic" destCapacity is used.
	278	destLimit=(char*)U_MAX_PTR(dest);
	279	// for NUL-termination, translate into highest int32_t
	280	destCapacity=0x7fffffff;
	281	} else {
	282	destLimit=dest+destCapacity;
	283	}
	284
	285	// perform the conversion
	286	ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
	287	length=(int32_t)(dest-originalDest);
	288
	289	// if an overflow occurs, then get the preflighting length
	290	if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
	291	char buffer[1024];
	292
	293	destLimit=buffer+sizeof(buffer);
	294	do {
	295	dest=buffer;
	296	errorCode=U_ZERO_ERROR;
	297	ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
	298	length+=(int32_t)(dest-buffer);
	299	} while(errorCode==U_BUFFER_OVERFLOW_ERROR);
374ca955	300	}
73c04bcf A	301
73c04bcf A	302	return u_terminateChars(originalDest, destCapacity, length, &errorCode);
374ca955 A	303	}
	304
	305	void
	306	UnicodeString::doCodepageCreate(const char *codepageData,
73c04bcf A	307	int32_t dataLength,
73c04bcf A	308	const char *codepage)
374ca955	309	{
73c04bcf A	310	// if there's nothing to convert, do nothing
	311	if(codepageData == 0 \|\| dataLength == 0 \|\| dataLength < -1) {
	312	return;
	313	}
	314	if(dataLength == -1) {
	315	dataLength = (int32_t)uprv_strlen(codepageData);
	316	}
	317
	318	UErrorCode status = U_ZERO_ERROR;
	319
	320	// create the converter
	321	// if the codepage is the default, use our cache
	322	// if it is an empty string, then use the "invariant character" conversion
729e4ab9 A	323	UConverter *converter;
	324	if (codepage == 0) {
	325	const char *defaultName = ucnv_getDefaultName();
	326	if(UCNV_FAST_IS_UTF8(defaultName)) {
	327	setToUTF8(StringPiece(codepageData, dataLength));
	328	return;
	329	}
	330	converter = u_getDefaultConverter(&status);
	331	} else if(*codepage == 0) {
73c04bcf A	332	// use the "invariant characters" conversion
	333	if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) {
	334	u_charsToUChars(codepageData, getArrayStart(), dataLength);
46f4442e	335	setLength(dataLength);
73c04bcf A	336	} else {
	337	setToBogus();
	338	}
	339	return;
729e4ab9 A	340	} else {
729e4ab9 A	341	converter = ucnv_open(codepage, &status);
73c04bcf A	342	}
73c04bcf A	343
729e4ab9 A	344	// if we failed, set the appropriate flags and return
	345	if(U_FAILURE(status)) {
	346	setToBogus();
	347	return;
	348	}
	349
	350	// perform the conversion
73c04bcf A	351	doCodepageCreate(codepageData, dataLength, converter, status);
	352	if(U_FAILURE(status)) {
	353	setToBogus();
	354	}
	355
	356	// close the converter
	357	if(codepage == 0) {
	358	u_releaseDefaultConverter(converter);
374ca955	359	} else {
73c04bcf	360	ucnv_close(converter);
374ca955	361	}
374ca955 A	362	}
	363
	364	void
	365	UnicodeString::doCodepageCreate(const char *codepageData,
	366	int32_t dataLength,
	367	UConverter *converter,
73c04bcf A	368	UErrorCode &status)
	369	{
	370	if(U_FAILURE(status)) {
	371	return;
374ca955 A	372	}
374ca955 A	373
73c04bcf A	374	// set up the conversion parameters
	375	const char *mySource = codepageData;
	376	const char *mySourceEnd = mySource + dataLength;
46f4442e	377	UChar array, myTarget;
73c04bcf A	378
73c04bcf A	379	// estimate the size needed:
46f4442e A	380	int32_t arraySize;
	381	if(dataLength <= US_STACKBUF_SIZE) {
	382	// try to use the stack buffer
	383	arraySize = US_STACKBUF_SIZE;
	384	} else {
	385	// 1.25 UChar's per source byte should cover most cases
	386	arraySize = dataLength + (dataLength >> 2);
	387	}
73c04bcf A	388
	389	// we do not care about the current contents
	390	UBool doCopyArray = FALSE;
	391	for(;;) {
	392	if(!cloneArrayIfNeeded(arraySize, arraySize, doCopyArray)) {
	393	setToBogus();
	394	break;
	395	}
374ca955	396
73c04bcf	397	// perform the conversion
46f4442e A	398	array = getArrayStart();
	399	myTarget = array + length();
	400	ucnv_toUnicode(converter, &myTarget, array + getCapacity(),
73c04bcf	401	&mySource, mySourceEnd, 0, TRUE, &status);
374ca955	402
73c04bcf	403	// update the conversion parameters
46f4442e	404	setLength((int32_t)(myTarget - array));
374ca955	405
73c04bcf A	406	// allocate more space and copy data, if needed
	407	if(status == U_BUFFER_OVERFLOW_ERROR) {
	408	// reset the error code
	409	status = U_ZERO_ERROR;
374ca955	410
73c04bcf A	411	// keep the previous conversion results
	412	doCopyArray = TRUE;
	413
	414	// estimate the new size needed, larger than before
	415	// try 2 UChar's per remaining source byte
46f4442e	416	arraySize = (int32_t)(length() + 2 * (mySourceEnd - mySource));
73c04bcf A	417	} else {
	418	break;
	419	}
374ca955	420	}
374ca955 A	421	}
	422
	423	U_NAMESPACE_END
	424
	425	#endif