X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..4162bf987dca731f631c28df29ad400b78d50ea9:/icuSources/common/ucnv.c?ds=sidebyside diff --git a/icuSources/common/ucnv.c b/icuSources/common/ucnv.c index 3cf75768..f764f361 100644 --- a/icuSources/common/ucnv.c +++ b/icuSources/common/ucnv.c @@ -1,7 +1,7 @@ /* ****************************************************************************** * -* Copyright (C) 1998-2003, International Business Machines +* Copyright (C) 1998-2006,2008 International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** @@ -20,78 +20,23 @@ */ #include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + #include "unicode/ustring.h" -#include "unicode/ures.h" #include "unicode/ucnv.h" #include "unicode/ucnv_err.h" #include "unicode/uset.h" +#include "putilimp.h" #include "cmemory.h" #include "cstring.h" +#include "uassert.h" +#include "utracimp.h" #include "ustr_imp.h" #include "ucnv_imp.h" -#include "ucnv_io.h" #include "ucnv_cnv.h" #include "ucnv_bld.h" -#if 0 -/* debugging for converters */ -# include -void UCNV_DEBUG_LOG(const char *what, const char *who, const void *p, int l) -{ - static FILE *f = NULL; - if(f==NULL) - { - /* stderr, or open another file */ - f = stderr; - /* f = fopen("c:\\UCNV_DEBUG_LOG.txt", "w"); */ - } - if (!what) { - what = "(null)"; - } - if (!who) { - who = "(null)"; - } - if (!p) { - p = "(null)"; - } - - fprintf(f, "%p\t:%d\t%-20s\t%-10s\n", - p, l, who, what); - - fflush(f); -} - - -/* dump the contents of a converter */ -static void UCNV_DEBUG_CNV(const UConverter *c, int line) -{ - UErrorCode err = U_ZERO_ERROR; - fprintf(stderr, "%p\t:%d\t", c, line); - if(c!=NULL) { - const char *name = ucnv_getName(c, &err); - if (!name) { - name = "(null)"; - } - fprintf(stderr, "%s\t", name); - - fprintf(stderr, "shr=%p, ref=%x\n", - c->sharedData, - c->sharedData->referenceCounter); - } else { - fprintf(stderr, "DEMISED\n"); - } -} - -# define UCNV_DEBUG 1 -# define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__) -# define UCNV_DEBUG_CNV(c) UCNV_DEBUG_CNV(c, __LINE__) -#else -# define UCNV_DEBUG_LOG(x,y,z) -# define UCNV_DEBUG_CNV(c) -#endif - - - /* size of intermediate and preflighting buffers in ucnv_convert() */ #define CHUNK_SIZE 1024 @@ -103,23 +48,13 @@ typedef struct UAmbiguousConverter { static const UAmbiguousConverter ambiguousConverters[]={ { "ibm-942_P120-1999", 0xa5 }, { "ibm-943_P130-1999", 0xa5 }, + { "ibm-897_P100-1995", 0xa5 }, { "ibm-33722_P120-1999", 0xa5 }, { "ibm-949_P110-1999", 0x20a9 }, { "ibm-1363_P110-1997", 0x20a9 }, { "ISO_2022,locale=ko,version=0", 0x20a9 } }; -U_CAPI const char* U_EXPORT2 -ucnv_getDefaultName () -{ - return ucnv_io_getDefaultConverterName(); -} - -U_CAPI void U_EXPORT2 -ucnv_setDefaultName (const char *converterName) -{ - ucnv_io_setDefaultConverterName(converterName); -} /*Calls through createConverter */ U_CAPI UConverter* U_EXPORT2 ucnv_open (const char *name, @@ -128,13 +63,10 @@ ucnv_open (const char *name, UConverter *r; if (err == NULL || U_FAILURE (*err)) { - UCNV_DEBUG_LOG("open", name, NULL); return NULL; } r = ucnv_createConverter(NULL, name, err); - UCNV_DEBUG_LOG("open", name, r); - UCNV_DEBUG_CNV(r); return r; } @@ -163,6 +95,28 @@ ucnv_openU (const UChar * name, return ucnv_open(u_austrcpy(asciiName, name), err); } +/* Copy the string that is represented by the UConverterPlatform enum + * @param platformString An output buffer + * @param platform An enum representing a platform + * @return the length of the copied string. + */ +static int32_t +ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm) +{ + switch (pltfrm) + { + case UCNV_IBM: + uprv_strcpy(platformString, "ibm-"); + return 4; + case UCNV_UNKNOWN: + break; + } + + /* default to empty string */ + *platformString = 0; + return 0; +} + /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls *through createConverter*/ U_CAPI UConverter* U_EXPORT2 @@ -215,34 +169,26 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U NULL }; + UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE); + if (status == NULL || U_FAILURE(*status)){ + UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR); return 0; } if (!pBufferSize || !cnv){ - *status = U_ILLEGAL_ARGUMENT_ERROR; + *status = U_ILLEGAL_ARGUMENT_ERROR; + UTRACE_EXIT_STATUS(*status); return 0; } - UCNV_DEBUG_LOG("cloning FROM", ucnv_getName(cnv,status), cnv); - UCNV_DEBUG_LOG("cloning WITH", "memory", stackBuffer); - UCNV_DEBUG_CNV(cnv); - - /* Pointers on 64-bit platforms need to be aligned - * on a 64-bit boundry in memory. - */ - if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { - int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars); - *pBufferSize -= offsetUp; - stackBufferChars += offsetUp; - } + UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p", + ucnv_getName(cnv, status), cnv, stackBuffer); - stackBuffer = (void *)stackBufferChars; - if (cnv->sharedData->impl->safeClone != NULL) { /* call the custom safeClone function for sizing */ bufferSizeNeeded = 0; - cnv->sharedData->impl->safeClone(cnv, stackBuffer, &bufferSizeNeeded, status); + cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status); } else { @@ -252,10 +198,27 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U if (*pBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */ *pBufferSize = bufferSizeNeeded; + UTRACE_EXIT_VALUE(bufferSizeNeeded); return 0; } + /* Pointers on 64-bit platforms need to be aligned + * on a 64-bit boundary in memory. + */ + if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { + int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars); + if(*pBufferSize > offsetUp) { + *pBufferSize -= offsetUp; + stackBufferChars += offsetUp; + } else { + /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */ + *pBufferSize = 1; + } + } + + stackBuffer = (void *)stackBufferChars; + /* Now, see if we must allocate any memory */ if (*pBufferSize < bufferSizeNeeded || stackBuffer == NULL) { @@ -264,6 +227,7 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U if(localConverter == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; + UTRACE_EXIT_STATUS(*status); return NULL; } @@ -279,10 +243,25 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U allocatedConverter = NULL; } + uprv_memset(localConverter, 0, bufferSizeNeeded); + /* Copy initial state */ uprv_memcpy(localConverter, cnv, sizeof(UConverter)); localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE; + /* copy the substitution string */ + if (cnv->subChars == (uint8_t *)cnv->subUChars) { + localConverter->subChars = (uint8_t *)localConverter->subUChars; + } else { + localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); + if (localConverter->subChars == NULL) { + uprv_free(allocatedConverter); + UTRACE_EXIT_STATUS(*status); + return NULL; + } + uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); + } + /* now either call the safeclone fcn or not */ if (cnv->sharedData->impl->safeClone != NULL) { /* call the custom safeClone function */ @@ -290,7 +269,11 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U } if(localConverter==NULL || U_FAILURE(*status)) { + if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) { + uprv_free(allocatedConverter->subChars); + } uprv_free(allocatedConverter); + UTRACE_EXIT_STATUS(*status); return NULL; } @@ -307,21 +290,8 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U if(localConverter == (UConverter*)stackBuffer) { /* we're using user provided data - set to not destroy */ localConverter->isCopyLocal = TRUE; -#ifdef UCNV_DEBUG - fprintf(stderr, "%p\t:%d\t\t==stackbuffer %p, isCopyLocal TRUE\n", - localConverter, __LINE__, stackBuffer); -#endif - - } else { -#ifdef UCNV_DEBUG - fprintf(stderr, "%p\t:%d\t\t!=stackbuffer %p, isCopyLocal left at %s\n", - localConverter, __LINE__, stackBuffer, - localConverter->isCopyLocal?"TRUE":"FALSE"); -#endif } - localConverter->isExtraLocal = localConverter->isCopyLocal; - /* allow callback functions to handle any memory allocation */ toUArgs.converter = fromUArgs.converter = localConverter; cbErr = U_ZERO_ERROR; @@ -329,11 +299,7 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U cbErr = U_ZERO_ERROR; cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr); - UCNV_DEBUG_LOG("cloning TO", ucnv_getName(localConverter,status), localConverter); - UCNV_DEBUG_CNV(localConverter); - UCNV_DEBUG_CNV(cnv); - - + UTRACE_EXIT_PTR_STATUS(localConverter, *status); return localConverter; } @@ -345,61 +311,62 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U U_CAPI void U_EXPORT2 ucnv_close (UConverter * converter) { - /* first, notify the callback functions that the converter is closed */ - UConverterToUnicodeArgs toUArgs = { - sizeof(UConverterToUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; - UConverterFromUnicodeArgs fromUArgs = { - sizeof(UConverterFromUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; UErrorCode errorCode = U_ZERO_ERROR; + UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE); + if (converter == NULL) { + UTRACE_EXIT(); return; } - UCNV_DEBUG_LOG("close", ucnv_getName(converter, &errorCode), converter); - UCNV_DEBUG_CNV(converter); - - toUArgs.converter = fromUArgs.converter = converter; + UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b", + ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal); - converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode); - errorCode = U_ZERO_ERROR; - converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode); + /* In order to speed up the close, only call the callbacks when they have been changed. + This performance check will only work when the callbacks are set within a shared library + or from user code that statically links this code. */ + /* first, notify the callback functions that the converter is closed */ + if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { + UConverterToUnicodeArgs toUArgs = { + sizeof(UConverterToUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + + toUArgs.converter = converter; + errorCode = U_ZERO_ERROR; + converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode); + } + if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { + UConverterFromUnicodeArgs fromUArgs = { + sizeof(UConverterFromUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + fromUArgs.converter = converter; + errorCode = U_ZERO_ERROR; + converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode); + } - UCNV_DEBUG_CNV(converter); - if (converter->sharedData->impl->close != NULL) { converter->sharedData->impl->close(converter); } -#ifdef UCNV_DEBUG - { - char c[4]; - c[0]='0'+converter->sharedData->referenceCounter; - c[1]=0; - UCNV_DEBUG_LOG("close--", c, converter); - if((converter->sharedData->referenceCounter == 0)&&(converter->sharedData->sharedDataCached == FALSE)) { - UCNV_DEBUG_CNV(converter); - UCNV_DEBUG_LOG("close:delDead", "??", converter); - } + if (converter->subChars != (uint8_t *)converter->subUChars) { + uprv_free(converter->subChars); } -#endif /* Checking whether it's an algorithic converter is okay @@ -411,10 +378,10 @@ ucnv_close (UConverter * converter) } if(!converter->isCopyLocal){ - UCNV_DEBUG_LOG("close:free", "", converter); - uprv_free (converter); + uprv_free(converter); } - return; + + UTRACE_EXIT(); } /*returns a single Name from the list, will return NULL if out of bounds @@ -422,47 +389,21 @@ ucnv_close (UConverter * converter) U_CAPI const char* U_EXPORT2 ucnv_getAvailableName (int32_t n) { - if (0 <= n && n <= 0xffff) { - UErrorCode err = U_ZERO_ERROR; - const char *name = ucnv_io_getAvailableConverter((uint16_t)n, &err); - if (U_SUCCESS(err)) { - return name; + if (0 <= n && n <= 0xffff) { + UErrorCode err = U_ZERO_ERROR; + const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err); + if (U_SUCCESS(err)) { + return name; + } } - } - return NULL; + return NULL; } U_CAPI int32_t U_EXPORT2 ucnv_countAvailable () { UErrorCode err = U_ZERO_ERROR; - return ucnv_io_countAvailableConverters(&err); -} - -U_CAPI uint16_t U_EXPORT2 -ucnv_countAliases(const char *alias, UErrorCode *pErrorCode) -{ - return ucnv_io_countAliases(alias, pErrorCode); -} - - -U_CAPI const char* U_EXPORT2 -ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) -{ - return ucnv_io_getAlias(alias, n, pErrorCode); -} - -U_CAPI void U_EXPORT2 -ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode) -{ - ucnv_io_getAliases(alias, 0, aliases, pErrorCode); -} - -U_CAPI uint16_t U_EXPORT2 -ucnv_countStandards(void) -{ - UErrorCode err = U_ZERO_ERROR; - return ucnv_io_countStandards(&err); + return ucnv_bld_countAvailableConverters(&err); } U_CAPI void U_EXPORT2 @@ -474,15 +415,19 @@ ucnv_getSubstChars (const UConverter * converter, if (U_FAILURE (*err)) return; + if (converter->subCharLen <= 0) { + /* Unicode string or empty string from ucnv_setSubstString(). */ + *len = 0; + return; + } + if (*len < converter->subCharLen) /*not enough space in subChars */ { *err = U_INDEX_OUTOFBOUNDS_ERROR; return; } - uprv_memcpy (mySubChar, converter->subChar, converter->subCharLen); /*fills in the subchars */ - *len = converter->subCharLen; /*store # of bytes copied to buffer */ - uprv_memcpy (mySubChar, converter->subChar, converter->subCharLen); /*fills in the subchars */ + uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */ *len = converter->subCharLen; /*store # of bytes copied to buffer */ } @@ -503,7 +448,7 @@ ucnv_setSubstChars (UConverter * converter, return; } - uprv_memcpy (converter->subChar, mySubChar, len); /*copies the subchars */ + uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */ converter->subCharLen = len; /*sets the new len */ /* @@ -516,130 +461,181 @@ ucnv_setSubstChars (UConverter * converter, return; } -U_CAPI int32_t U_EXPORT2 -ucnv_getDisplayName(const UConverter *cnv, - const char *displayLocale, - UChar *displayName, int32_t displayNameCapacity, - UErrorCode *pErrorCode) { - UResourceBundle *rb; - const UChar *name; - int32_t length; - - /* check arguments */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; +U_DRAFT void U_EXPORT2 +ucnv_setSubstString(UConverter *cnv, + const UChar *s, + int32_t length, + UErrorCode *err) { + UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1]; + char chars[UCNV_ERROR_BUFFER_LENGTH]; + + UConverter *clone; + uint8_t *subChars; + int32_t cloneSize, length8; + + /* Let the following functions check all arguments. */ + cloneSize = sizeof(cloneBuffer); + clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err); + ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err); + length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err); + ucnv_close(clone); + if (U_FAILURE(*err)) { + return; } - if(cnv==NULL || displayNameCapacity<0 || (displayNameCapacity>0 && displayName==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; + if (cnv->sharedData->impl->writeSub == NULL +#if !UCONFIG_NO_LEGACY_CONVERSION + || (cnv->sharedData->staticData->conversionType == UCNV_MBCS && + ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL) +#endif + ) { + /* The converter is not stateful. Store the charset bytes as a fixed string. */ + subChars = (uint8_t *)chars; + } else { + /* + * The converter has a non-default writeSub() function, indicating + * that it is stateful. + * Store the Unicode string for on-the-fly conversion for correct + * state handling. + */ + if (length > UCNV_ERROR_BUFFER_LENGTH) { + /* + * Should not occur. The converter should output at least one byte + * per UChar, which means that ucnv_fromUChars() should catch all + * overflows. + */ + *err = U_BUFFER_OVERFLOW_ERROR; + return; + } + subChars = (uint8_t *)s; + if (length < 0) { + length = u_strlen(s); + } + length8 = length * U_SIZEOF_UCHAR; } - /* open the resource bundle and get the display name string */ - rb=ures_open(NULL, displayLocale, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return 0; + /* + * For storing the substitution string, select either the small buffer inside + * UConverter or allocate a subChars buffer. + */ + if (length8 > UCNV_MAX_SUBCHAR_LEN) { + /* Use a separate buffer for the string. Outside UConverter to not make it too large. */ + if (cnv->subChars == (uint8_t *)cnv->subUChars) { + /* Allocate a new buffer for the string. */ + cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); + if (cnv->subChars == NULL) { + cnv->subChars = (uint8_t *)cnv->subUChars; + *err = U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); + } } - /* use the internal name as the key */ - name=ures_getStringByKey(rb, cnv->sharedData->staticData->name, &length, pErrorCode); - ures_close(rb); - - if(U_SUCCESS(*pErrorCode)) { - /* copy the string */ - u_memcpy(displayName, name, uprv_min(length, displayNameCapacity)*U_SIZEOF_UCHAR); + /* Copy the substitution string into the UConverter or its subChars buffer. */ + if (length8 == 0) { + cnv->subCharLen = 0; } else { - /* convert the internal name into a Unicode string */ - *pErrorCode=U_ZERO_ERROR; - length=uprv_strlen(cnv->sharedData->staticData->name); - u_charsToUChars(cnv->sharedData->staticData->name, displayName, uprv_min(length, displayNameCapacity)); + uprv_memcpy(cnv->subChars, subChars, length8); + if (subChars == (uint8_t *)chars) { + cnv->subCharLen = (int8_t)length8; + } else /* subChars == s */ { + cnv->subCharLen = (int8_t)-length; + } } - return u_terminateUChars(displayName, displayNameCapacity, length, pErrorCode); + + /* See comment in ucnv_setSubstChars(). */ + cnv->subChar1 = 0; } /*resets the internal states of a converter *goal : have the same behaviour than a freshly created converter */ -static void _reset(UConverter *converter, UConverterResetChoice choice) { - /* first, notify the callback functions that the converter is reset */ - UConverterToUnicodeArgs toUArgs = { - sizeof(UConverterToUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; - UConverterFromUnicodeArgs fromUArgs = { - sizeof(UConverterFromUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; - UErrorCode errorCode; - +static void _reset(UConverter *converter, UConverterResetChoice choice, + UBool callCallback) { if(converter == NULL) { return; } - toUArgs.converter = fromUArgs.converter = converter; - if(choice<=UCNV_RESET_TO_UNICODE) { - errorCode = U_ZERO_ERROR; - converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode); - } - if(choice!=UCNV_RESET_TO_UNICODE) { - errorCode = U_ZERO_ERROR; - converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode); + if(callCallback) { + /* first, notify the callback functions that the converter is reset */ + UConverterToUnicodeArgs toUArgs = { + sizeof(UConverterToUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + UConverterFromUnicodeArgs fromUArgs = { + sizeof(UConverterFromUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + UErrorCode errorCode; + + toUArgs.converter = fromUArgs.converter = converter; + if(choice<=UCNV_RESET_TO_UNICODE) { + errorCode = U_ZERO_ERROR; + converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode); + } + if(choice!=UCNV_RESET_TO_UNICODE) { + errorCode = U_ZERO_ERROR; + converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode); + } } /* now reset the converter itself */ if(choice<=UCNV_RESET_TO_UNICODE) { converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus; + converter->mode = 0; converter->toULength = 0; converter->invalidCharLength = converter->UCharErrorBufferLength = 0; + converter->preToULength = 0; } if(choice!=UCNV_RESET_TO_UNICODE) { converter->fromUnicodeStatus = 0; - converter->fromUSurrogateLead = 0; + converter->fromUChar32 = 0; converter->invalidUCharLength = converter->charErrorBufferLength = 0; + converter->preFromUFirstCP = U_SENTINEL; + converter->preFromULength = 0; } if (converter->sharedData->impl->reset != NULL) { /* call the custom reset function */ converter->sharedData->impl->reset(converter, choice); - } else if(choice<=UCNV_RESET_TO_UNICODE) { - converter->mode = UCNV_SI; } } U_CAPI void U_EXPORT2 ucnv_reset(UConverter *converter) { - _reset(converter, UCNV_RESET_BOTH); + _reset(converter, UCNV_RESET_BOTH, TRUE); } U_CAPI void U_EXPORT2 ucnv_resetToUnicode(UConverter *converter) { - _reset(converter, UCNV_RESET_TO_UNICODE); + _reset(converter, UCNV_RESET_TO_UNICODE, TRUE); } U_CAPI void U_EXPORT2 ucnv_resetFromUnicode(UConverter *converter) { - _reset(converter, UCNV_RESET_FROM_UNICODE); + _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE); } U_CAPI int8_t U_EXPORT2 ucnv_getMaxCharSize (const UConverter * converter) { - return converter->sharedData->staticData->maxBytesPerChar; + return converter->maxBytesPerUChar; } @@ -663,14 +659,27 @@ ucnv_getName (const UConverter * converter, UErrorCode * err) return converter->sharedData->staticData->name; } -U_CAPI int32_t U_EXPORT2 -ucnv_getCCSID (const UConverter * converter, - UErrorCode * err) +U_CAPI int32_t U_EXPORT2 +ucnv_getCCSID(const UConverter * converter, + UErrorCode * err) { + int32_t ccsid; if (U_FAILURE (*err)) return -1; - return converter->sharedData->staticData->codepage; + ccsid = converter->sharedData->staticData->codepage; + if (ccsid == 0) { + /* Rare case. This is for cases like gb18030, + which doesn't have an IBM cannonical name, but does have an IBM alias. */ + const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err); + if (U_SUCCESS(*err) && standardName) { + const char *ccsidStr = uprv_strchr(standardName, '-'); + if (ccsidStr) { + ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */ + } + } + } + return ccsid; } @@ -684,33 +693,6 @@ ucnv_getPlatform (const UConverter * converter, return (UConverterPlatform)converter->sharedData->staticData->platform; } -U_CAPI void U_EXPORT2 -ucnv_getUnicodeSet(const UConverter *cnv, - USet *set, - UConverterUnicodeSet which, - UErrorCode *pErrorCode) { - /* argument checking */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return; - } - if(cnv==NULL || set==NULL || whichsharedData->impl->getUnicodeSet==NULL) { - *pErrorCode=U_UNSUPPORTED_ERROR; - return; - } - - /* empty the set */ - uset_clear(set); - - /* call the converter to add the code points it supports */ - cnv->sharedData->impl->getUnicodeSet(cnv, set, which, pErrorCode); -} - U_CAPI void U_EXPORT2 ucnv_getToUCallBack (const UConverter * converter, UConverterToUCallback *action, @@ -761,203 +743,876 @@ ucnv_setFromUCallBack (UConverter * converter, converter->fromUContext = newContext; } -U_CAPI void U_EXPORT2 -ucnv_fromUnicode (UConverter * _this, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t* offsets, - UBool flush, - UErrorCode * err) -{ - UConverterFromUnicodeArgs args; - const char *t; +static void +_updateOffsets(int32_t *offsets, int32_t length, + int32_t sourceIndex, int32_t errorInputLength) { + int32_t *limit; + int32_t delta, offset; + + if(sourceIndex>=0) { + /* + * adjust each offset by adding the previous sourceIndex + * minus the length of the input sequence that caused an + * error, if any + */ + delta=sourceIndex-errorInputLength; + } else { + /* + * set each offset to -1 because this conversion function + * does not handle offsets + */ + delta=-1; + } + + limit=offsets+length; + if(delta==0) { + /* most common case, nothing to do */ + } else if(delta>0) { + /* add the delta to each offset (but not if the offset is <0) */ + while(offsets=0) { + *offsets=offset+delta; + } + ++offsets; + } + } else /* delta<0 */ { + /* + * set each offset to -1 because this conversion function + * does not handle offsets + * or the error input sequence started in a previous buffer + */ + while(offsetsconverter; + s=pArgs->source; + t=pArgs->target; + offsets=pArgs->offsets; + + /* get the converter implementation function */ + sourceIndex=0; + if(offsets==NULL) { + fromUnicode=cnv->sharedData->impl->fromUnicode; + } else { + fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets; + if(fromUnicode==NULL) { + /* there is no WithOffsets implementation */ + fromUnicode=cnv->sharedData->impl->fromUnicode; + /* we will write -1 for each offset */ + sourceIndex=-1; + } + } + + if(cnv->preFromULength>=0) { + /* normal mode */ + realSource=NULL; + + /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ + realSourceLimit=NULL; + realFlush=FALSE; + realSourceIndex=0; + } else { + /* + * Previous m:n conversion stored source units from a partial match + * and failed to consume all of them. + * We need to "replay" them from a temporary buffer and convert them first. + */ + realSource=pArgs->source; + realSourceLimit=pArgs->sourceLimit; + realFlush=pArgs->flush; + realSourceIndex=sourceIndex; + + uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); + pArgs->source=replay; + pArgs->sourceLimit=replay-cnv->preFromULength; + pArgs->flush=FALSE; + sourceIndex=-1; + + cnv->preFromULength=0; + } /* - * Check parameters in for all conversions - */ - if (err == NULL || U_FAILURE (*err)) { + * loop for conversion and error handling + * + * loop { + * convert + * loop { + * update offsets + * handle end of input + * handle errors/call callback + * } + * } + */ + for(;;) { + /* convert */ + fromUnicode(pArgs, err); + + /* + * set a flag for whether the converter + * successfully processed the end of the input + * + * need not check cnv->preFromULength==0 because a replay (<0) will cause + * sflush && pArgs->source==pArgs->sourceLimit && + cnv->fromUChar32==0); + + /* no callback called yet for this iteration */ + calledCallback=FALSE; + + /* no sourceIndex adjustment for conversion, only for callback output */ + errorInputLength=0; + + /* + * loop for offsets and error handling + * + * iterates at most 3 times: + * 1. to clean up after the conversion function + * 2. after the callback + * 3. after the callback again if there was truncated input + */ + for(;;) { + /* update offsets if we write any */ + if(offsets!=NULL) { + int32_t length=(int32_t)(pArgs->target-t); + if(length>0) { + _updateOffsets(offsets, length, sourceIndex, errorInputLength); + + /* + * if a converter handles offsets and updates the offsets + * pointer at the end, then pArgs->offset should not change + * here; + * however, some converters do not handle offsets at all + * (sourceIndex<0) or may not update the offsets pointer + */ + pArgs->offsets=offsets+=length; + } + + if(sourceIndex>=0) { + sourceIndex+=(int32_t)(pArgs->source-s); + } + } + + if(cnv->preFromULength<0) { + /* + * switch the source to new replay units (cannot occur while replaying) + * after offset handling and before end-of-input and callback handling + */ + if(realSource==NULL) { + realSource=pArgs->source; + realSourceLimit=pArgs->sourceLimit; + realFlush=pArgs->flush; + realSourceIndex=sourceIndex; + + uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); + pArgs->source=replay; + pArgs->sourceLimit=replay-cnv->preFromULength; + pArgs->flush=FALSE; + if((sourceIndex+=cnv->preFromULength)<0) { + sourceIndex=-1; + } + + cnv->preFromULength=0; + } else { + /* see implementation note before _fromUnicodeWithCallback() */ + U_ASSERT(realSource==NULL); + *err=U_INTERNAL_PROGRAM_ERROR; + } + } + + /* update pointers */ + s=pArgs->source; + t=pArgs->target; + + if(U_SUCCESS(*err)) { + if(ssourceLimit) { + /* + * continue with the conversion loop while there is still input left + * (continue converting by breaking out of only the inner loop) + */ + break; + } else if(realSource!=NULL) { + /* switch back from replaying to the real source and continue */ + pArgs->source=realSource; + pArgs->sourceLimit=realSourceLimit; + pArgs->flush=realFlush; + sourceIndex=realSourceIndex; + + realSource=NULL; + break; + } else if(pArgs->flush && cnv->fromUChar32!=0) { + /* + * the entire input stream is consumed + * and there is a partial, truncated input sequence left + */ + + /* inject an error and continue with callback handling */ + *err=U_TRUNCATED_CHAR_FOUND; + calledCallback=FALSE; /* new error condition */ + } else { + /* input consumed */ + if(pArgs->flush) { + /* + * return to the conversion loop once more if the flush + * flag is set and the conversion function has not + * successfully processed the end of the input yet + * + * (continue converting by breaking out of only the inner loop) + */ + if(!converterSawEndOfInput) { + break; + } + + /* reset the converter without calling the callback function */ + _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE); + } + + /* done successfully */ + return; + } + } + + /* U_FAILURE(*err) */ + { + UErrorCode e; + + if( calledCallback || + (e=*err)==U_BUFFER_OVERFLOW_ERROR || + (e!=U_INVALID_CHAR_FOUND && + e!=U_ILLEGAL_CHAR_FOUND && + e!=U_TRUNCATED_CHAR_FOUND) + ) { + /* + * the callback did not or cannot resolve the error: + * set output pointers and return + * + * the check for buffer overflow is redundant but it is + * a high-runner case and hopefully documents the intent + * well + * + * if we were replaying, then the replay buffer must be + * copied back into the UConverter + * and the real arguments must be restored + */ + if(realSource!=NULL) { + int32_t length; + + U_ASSERT(cnv->preFromULength==0); + + length=(int32_t)(pArgs->sourceLimit-pArgs->source); + if(length>0) { + uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR); + cnv->preFromULength=(int8_t)-length; + } + + pArgs->source=realSource; + pArgs->sourceLimit=realSourceLimit; + pArgs->flush=realFlush; + } + + return; + } + } + + /* callback handling */ + { + UChar32 codePoint; + + /* get and write the code point */ + codePoint=cnv->fromUChar32; + errorInputLength=0; + U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint); + cnv->invalidUCharLength=(int8_t)errorInputLength; + + /* set the converter state to deal with the next character */ + cnv->fromUChar32=0; + + /* call the callback function */ + cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, + cnv->invalidUCharBuffer, errorInputLength, codePoint, + *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL, + err); + } + + /* + * loop back to the offset handling + * + * this flag will indicate after offset handling + * that a callback was called; + * if the callback did not resolve the error, then we return + */ + calledCallback=TRUE; + } + } +} + +U_CAPI void U_EXPORT2 +ucnv_fromUnicode(UConverter *cnv, + char **target, const char *targetLimit, + const UChar **source, const UChar *sourceLimit, + int32_t *offsets, + UBool flush, + UErrorCode *err) { + UConverterFromUnicodeArgs args; + const UChar *s; + char *t; + + /* check parameters */ + if(err==NULL || U_FAILURE(*err)) { return; } - if (_this == NULL || target == NULL || source == NULL) { - *err = U_ILLEGAL_ARGUMENT_ERROR; + if(cnv==NULL || target==NULL || source==NULL) { + *err=U_ILLEGAL_ARGUMENT_ERROR; return; } - t = *target; - if (targetLimit < t || sourceLimit < *source) - { - *err = U_ILLEGAL_ARGUMENT_ERROR; + s=*source; + t=*target; + if(sourceLimit (size_t)0x7fffffff && targetLimit > t) - { - targetLimit = t + 0x7fffffff; + * Make sure that the buffer sizes do not exceed the number range for + * int32_t because some functions use the size (in units or bytes) + * rather than comparing pointers, and because offsets are int32_t values. + * + * size_t is guaranteed to be unsigned and large enough for the job. + * + * Return with an error instead of adjusting the limits because we would + * not be able to maintain the semantics that either the source must be + * consumed or the target filled (unless an error occurs). + * An adjustment would be targetLimit=t+0x7fffffff; for example. + */ + if( + ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) || + ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) + ) { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return; } - /* - * Deal with stored carry over data. This is done in the common location - * to avoid doing it for each conversion. - */ - if (_this->charErrorBufferLength > 0) - { - int32_t myTargetIndex = 0; - - ucnv_flushInternalCharBuffer (_this, - (char *)t, - &myTargetIndex, - targetLimit - *target, - offsets?&offsets:NULL, - err); - *target += myTargetIndex; - if (U_FAILURE (*err)) - return; + /* flush the target overflow buffer */ + if(cnv->charErrorBufferLength>0) { + char *overflow; + int32_t i, length; + + overflow=(char *)cnv->charErrorBuffer; + length=cnv->charErrorBufferLength; + i=0; + do { + if(t==targetLimit) { + /* the overflow buffer contains too much, keep the rest */ + int32_t j=0; + + do { + overflow[j++]=overflow[i++]; + } while(icharErrorBufferLength=(int8_t)j; + *target=t; + *err=U_BUFFER_OVERFLOW_ERROR; + return; + } + + /* copy the overflow contents to the target */ + *t++=overflow[i++]; + if(offsets!=NULL) { + *offsets++=-1; /* no source index available for old output */ + } + } while(icharErrorBufferLength=0; } - if(!flush && *source == sourceLimit) { + if(!flush && s==sourceLimit && cnv->preFromULength>=0) { /* the overflow buffer is emptied and there is no new input: we are done */ + *target=t; return; } - args.converter = _this; - args.flush = flush; - args.offsets = offsets; - args.source = *source; - args.sourceLimit = sourceLimit; - args.target = *target; - args.targetLimit = targetLimit; - args.size = sizeof(args); - if (offsets) - { - if (_this->sharedData->impl->fromUnicodeWithOffsets != NULL) - { - _this->sharedData->impl->fromUnicodeWithOffsets(&args, err); - *source = args.source; - *target = args.target; - return; + /* + * Do not simply return with a buffer overflow error if + * !flush && t==targetLimit + * because it is possible that the source will not generate any output. + * For example, the skip callback may be called; + * it does not output anything. + */ + + /* prepare the converter arguments */ + args.converter=cnv; + args.flush=flush; + args.offsets=offsets; + args.source=s; + args.sourceLimit=sourceLimit; + args.target=t; + args.targetLimit=targetLimit; + args.size=sizeof(args); + + _fromUnicodeWithCallback(&args, err); + + *source=args.source; + *target=args.target; +} + +/* ucnv_toUnicode() --------------------------------------------------------- */ + +static void +_toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { + UConverterToUnicode toUnicode; + UConverter *cnv; + const char *s; + UChar *t; + int32_t *offsets; + int32_t sourceIndex; + int32_t errorInputLength; + UBool converterSawEndOfInput, calledCallback; + + /* variables for m:n conversion */ + char replay[UCNV_EXT_MAX_BYTES]; + const char *realSource, *realSourceLimit; + int32_t realSourceIndex; + UBool realFlush; + + cnv=pArgs->converter; + s=pArgs->source; + t=pArgs->target; + offsets=pArgs->offsets; + + /* get the converter implementation function */ + sourceIndex=0; + if(offsets==NULL) { + toUnicode=cnv->sharedData->impl->toUnicode; + } else { + toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets; + if(toUnicode==NULL) { + /* there is no WithOffsets implementation */ + toUnicode=cnv->sharedData->impl->toUnicode; + /* we will write -1 for each offset */ + sourceIndex=-1; + } + } + + if(cnv->preToULength>=0) { + /* normal mode */ + realSource=NULL; + + /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ + realSourceLimit=NULL; + realFlush=FALSE; + realSourceIndex=0; + } else { + /* + * Previous m:n conversion stored source units from a partial match + * and failed to consume all of them. + * We need to "replay" them from a temporary buffer and convert them first. + */ + realSource=pArgs->source; + realSourceLimit=pArgs->sourceLimit; + realFlush=pArgs->flush; + realSourceIndex=sourceIndex; + + uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); + pArgs->source=replay; + pArgs->sourceLimit=replay-cnv->preToULength; + pArgs->flush=FALSE; + sourceIndex=-1; + + cnv->preToULength=0; + } + + /* + * loop for conversion and error handling + * + * loop { + * convert + * loop { + * update offsets + * handle end of input + * handle errors/call callback + * } + * } + */ + for(;;) { + if(U_SUCCESS(*err)) { + /* convert */ + toUnicode(pArgs, err); + + /* + * set a flag for whether the converter + * successfully processed the end of the input + * + * need not check cnv->preToULength==0 because a replay (<0) will cause + * sflush && pArgs->source==pArgs->sourceLimit && + cnv->toULength==0); + } else { + /* handle error from getNextUChar() */ + converterSawEndOfInput=FALSE; } - else { - /* there is no implementation that sets offsets, set them all to -1 */ - int32_t i, targetSize = targetLimit - *target; - - for (i=0; itarget-t); + if(length>0) { + _updateOffsets(offsets, length, sourceIndex, errorInputLength); + + /* + * if a converter handles offsets and updates the offsets + * pointer at the end, then pArgs->offset should not change + * here; + * however, some converters do not handle offsets at all + * (sourceIndex<0) or may not update the offsets pointer + */ + pArgs->offsets=offsets+=length; + } + + if(sourceIndex>=0) { + sourceIndex+=(int32_t)(pArgs->source-s); + } + } + + if(cnv->preToULength<0) { + /* + * switch the source to new replay units (cannot occur while replaying) + * after offset handling and before end-of-input and callback handling + */ + if(realSource==NULL) { + realSource=pArgs->source; + realSourceLimit=pArgs->sourceLimit; + realFlush=pArgs->flush; + realSourceIndex=sourceIndex; + + uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); + pArgs->source=replay; + pArgs->sourceLimit=replay-cnv->preToULength; + pArgs->flush=FALSE; + if((sourceIndex+=cnv->preToULength)<0) { + sourceIndex=-1; + } + + cnv->preToULength=0; + } else { + /* see implementation note before _fromUnicodeWithCallback() */ + U_ASSERT(realSource==NULL); + *err=U_INTERNAL_PROGRAM_ERROR; + } + } + + /* update pointers */ + s=pArgs->source; + t=pArgs->target; + + if(U_SUCCESS(*err)) { + if(ssourceLimit) { + /* + * continue with the conversion loop while there is still input left + * (continue converting by breaking out of only the inner loop) + */ + break; + } else if(realSource!=NULL) { + /* switch back from replaying to the real source and continue */ + pArgs->source=realSource; + pArgs->sourceLimit=realSourceLimit; + pArgs->flush=realFlush; + sourceIndex=realSourceIndex; + + realSource=NULL; + break; + } else if(pArgs->flush && cnv->toULength>0) { + /* + * the entire input stream is consumed + * and there is a partial, truncated input sequence left + */ + + /* inject an error and continue with callback handling */ + *err=U_TRUNCATED_CHAR_FOUND; + calledCallback=FALSE; /* new error condition */ + } else { + /* input consumed */ + if(pArgs->flush) { + /* + * return to the conversion loop once more if the flush + * flag is set and the conversion function has not + * successfully processed the end of the input yet + * + * (continue converting by breaking out of only the inner loop) + */ + if(!converterSawEndOfInput) { + break; + } + + /* reset the converter without calling the callback function */ + _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); + } + + /* done successfully */ + return; + } } + + /* U_FAILURE(*err) */ + { + UErrorCode e; + + if( calledCallback || + (e=*err)==U_BUFFER_OVERFLOW_ERROR || + (e!=U_INVALID_CHAR_FOUND && + e!=U_ILLEGAL_CHAR_FOUND && + e!=U_TRUNCATED_CHAR_FOUND && + e!=U_ILLEGAL_ESCAPE_SEQUENCE && + e!=U_UNSUPPORTED_ESCAPE_SEQUENCE && + e!=U_PARSE_ERROR) /* temporary err to flag empty segment, will be reset to U_ILLEGAL_ESCAPE_SEQUENCE below */ + ) { + /* + * the callback did not or cannot resolve the error: + * set output pointers and return + * + * the check for buffer overflow is redundant but it is + * a high-runner case and hopefully documents the intent + * well + * + * if we were replaying, then the replay buffer must be + * copied back into the UConverter + * and the real arguments must be restored + */ + if(realSource!=NULL) { + int32_t length; + + U_ASSERT(cnv->preToULength==0); + + length=(int32_t)(pArgs->sourceLimit-pArgs->source); + if(length>0) { + uprv_memcpy(cnv->preToU, pArgs->source, length); + cnv->preToULength=(int8_t)-length; + } + + pArgs->source=realSource; + pArgs->sourceLimit=realSourceLimit; + pArgs->flush=realFlush; + } + + return; + } + } + + /* copy toUBytes[] to invalidCharBuffer[] */ + errorInputLength=cnv->invalidCharLength=cnv->toULength; + if(errorInputLength>0) { + uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength); + } + + /* set the converter state to deal with the next character */ + cnv->toULength=0; + + /* call the callback function */ + { + UConverterCallbackReason reason; + if (*err == U_PARSE_ERROR) { /* Here U_PARSE_ERROR indicates empty segment */ + *err = U_ILLEGAL_ESCAPE_SEQUENCE; + reason = UCNV_IRREGULAR; + } else { + reason = (*err==U_INVALID_CHAR_FOUND || *err==U_UNSUPPORTED_ESCAPE_SEQUENCE) ? + UCNV_UNASSIGNED : UCNV_ILLEGAL; + } + cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, + cnv->invalidCharBuffer, errorInputLength, reason, err); + } + + /* + * loop back to the offset handling + * + * this flag will indicate after offset handling + * that a callback was called; + * if the callback did not resolve the error, then we return + */ + calledCallback=TRUE; } } - - /*calls the specific conversion routines */ - _this->sharedData->impl->fromUnicode(&args, err); - *source = args.source; - *target = args.target; } - - -U_CAPI void U_EXPORT2 -ucnv_toUnicode (UConverter * _this, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t* offsets, - UBool flush, - UErrorCode * err) -{ +U_CAPI void U_EXPORT2 +ucnv_toUnicode(UConverter *cnv, + UChar **target, const UChar *targetLimit, + const char **source, const char *sourceLimit, + int32_t *offsets, + UBool flush, + UErrorCode *err) { UConverterToUnicodeArgs args; - const UChar *t; + const char *s; + UChar *t; - /* - * Check parameters in for all conversions - */ - if (err == NULL || U_FAILURE (*err)) { + /* check parameters */ + if(err==NULL || U_FAILURE(*err)) { return; } - if (_this == NULL || target == NULL || source == NULL) { - *err = U_ILLEGAL_ARGUMENT_ERROR; + if(cnv==NULL || target==NULL || source==NULL) { + *err=U_ILLEGAL_ARGUMENT_ERROR; return; } - t = *target; - if (targetLimit < t || sourceLimit < *source) { - *err = U_ILLEGAL_ARGUMENT_ERROR; + s=*source; + t=*target; + if(sourceLimit (size_t)0x3fffffff && targetLimit > t) { - targetLimit = t + 0x3fffffff; + * Make sure that the buffer sizes do not exceed the number range for + * int32_t because some functions use the size (in units or bytes) + * rather than comparing pointers, and because offsets are int32_t values. + * + * size_t is guaranteed to be unsigned and large enough for the job. + * + * Return with an error instead of adjusting the limits because we would + * not be able to maintain the semantics that either the source must be + * consumed or the target filled (unless an error occurs). + * An adjustment would be sourceLimit=t+0x7fffffff; for example. + */ + if( + ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) || + ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) + ) { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return; } + + /* flush the target overflow buffer */ + if(cnv->UCharErrorBufferLength>0) { + UChar *overflow; + int32_t i, length; + + overflow=cnv->UCharErrorBuffer; + length=cnv->UCharErrorBufferLength; + i=0; + do { + if(t==targetLimit) { + /* the overflow buffer contains too much, keep the rest */ + int32_t j=0; + + do { + overflow[j++]=overflow[i++]; + } while(iUCharErrorBufferLength=(int8_t)j; + *target=t; + *err=U_BUFFER_OVERFLOW_ERROR; + return; + } - /* - * Deal with stored carry over data. This is done in the common location - * to avoid doing it for each conversion. - */ - if (_this->UCharErrorBufferLength > 0) - { - int32_t myTargetIndex = 0; - - ucnv_flushInternalUnicodeBuffer (_this, - (UChar *)t, - &myTargetIndex, - targetLimit - *target, - offsets?&offsets:NULL, - err); - *target += myTargetIndex; - if (U_FAILURE (*err)) - return; + /* copy the overflow contents to the target */ + *t++=overflow[i++]; + if(offsets!=NULL) { + *offsets++=-1; /* no source index available for old output */ + } + } while(iUCharErrorBufferLength=0; } - if(!flush && *source == sourceLimit) { + if(!flush && s==sourceLimit && cnv->preToULength>=0) { /* the overflow buffer is emptied and there is no new input: we are done */ + *target=t; return; } - args.converter = _this; - args.flush = flush; - args.offsets = offsets; - args.source = (char *) *source; - args.sourceLimit = sourceLimit; - args.target = *target; - args.targetLimit = targetLimit; - args.size = sizeof(args); - if (offsets) { - if (_this->sharedData->impl->toUnicodeWithOffsets != NULL) { - _this->sharedData->impl->toUnicodeWithOffsets(&args, err); - *source = args.source; - *target = args.target; - return; - } else { - /* there is no implementation that sets offsets, set them all to -1 */ - int32_t i, targetSize = targetLimit - *target; - - for (i=0; isharedData->impl->toUnicode(&args, err); + /* prepare the converter arguments */ + args.converter=cnv; + args.flush=flush; + args.offsets=offsets; + args.source=s; + args.sourceLimit=sourceLimit; + args.target=t; + args.targetLimit=targetLimit; + args.size=sizeof(args); - *source = args.source; - *target = args.target; - return; + _toUnicodeWithCallback(&args, err); + + *source=args.source; + *target=args.target; } +/* ucnv_to/fromUChars() ----------------------------------------------------- */ + U_CAPI int32_t U_EXPORT2 ucnv_fromUChars(UConverter *cnv, char *dest, int32_t destCapacity, @@ -1044,7 +1699,7 @@ ucnv_toUChars(UConverter *cnv, ucnv_resetToUnicode(cnv); originalDest=dest; if(srcLength==-1) { - srcLength=uprv_strlen(src); + srcLength=(int32_t)uprv_strlen(src); } if(srcLength>0) { srcLimit=src+srcLength; @@ -1080,65 +1735,213 @@ ucnv_toUChars(UConverter *cnv, return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode); } -U_CAPI UChar32 U_EXPORT2 -ucnv_getNextUChar(UConverter * converter, - const char **source, - const char *sourceLimit, - UErrorCode * err) -{ +/* ucnv_getNextUChar() ------------------------------------------------------ */ + +U_CAPI UChar32 U_EXPORT2 +ucnv_getNextUChar(UConverter *cnv, + const char **source, const char *sourceLimit, + UErrorCode *err) { UConverterToUnicodeArgs args; - UChar32 ch; + UChar buffer[U16_MAX_LENGTH]; + const char *s; + UChar32 c; + int32_t i, length; - if(err == NULL || U_FAILURE(*err)) { + /* check parameters */ + if(err==NULL || U_FAILURE(*err)) { return 0xffff; } - if(converter == NULL || source == NULL || sourceLimit < *source) { - *err = U_ILLEGAL_ARGUMENT_ERROR; + if(cnv==NULL || source==NULL) { + *err=U_ILLEGAL_ARGUMENT_ERROR; return 0xffff; } - /* In case internal data had been stored - * we return the first UChar32 in the internal buffer, - * and update the internal state accordingly - */ - if (converter->UCharErrorBufferLength > 0) - { - int32_t i = 0; - UChar32 myUChar; - UTF_NEXT_CHAR(converter->UCharErrorBuffer, i, sizeof(converter->UCharErrorBuffer), myUChar); - /*In this memmove we update the internal buffer by - *popping the first character. - *Note that in the call itself we decrement - *UCharErrorBufferLength - */ - uprv_memmove (converter->UCharErrorBuffer, - converter->UCharErrorBuffer + i, - (converter->UCharErrorBufferLength - i) * sizeof (UChar)); - converter->UCharErrorBufferLength -= (int8_t)i; - return myUChar; - } - /*calls the specific conversion routines */ - /*as dictated in a code review, avoids a switch statement */ - args.converter = converter; - args.flush = TRUE; - args.offsets = NULL; - args.source = *source; - args.sourceLimit = sourceLimit; - args.target = NULL; - args.targetLimit = NULL; - args.size = sizeof(args); - if (converter->sharedData->impl->getNextUChar != NULL) - { - ch = converter->sharedData->impl->getNextUChar(&args, err); + s=*source; + if(sourceLimit(size_t)0x7fffffff && sourceLimit>s)) { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return 0xffff; + } + + c=U_SENTINEL; + + /* flush the target overflow buffer */ + if(cnv->UCharErrorBufferLength>0) { + UChar *overflow; + + overflow=cnv->UCharErrorBuffer; + i=0; + length=cnv->UCharErrorBufferLength; + U16_NEXT(overflow, i, length, c); + + /* move the remaining overflow contents up to the beginning */ + if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) { + uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i, + cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); + } + + if(!U16_IS_LEAD(c) || itoULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) { + c=cnv->sharedData->impl->getNextUChar(&args, err); + *source=s=args.source; + if(*err==U_INDEX_OUTOFBOUNDS_ERROR) { + /* reset the converter without calling the callback function */ + _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); + return 0xffff; /* no output */ + } else if(U_SUCCESS(*err) && c>=0) { + return c; + /* + * else fall through to use _toUnicode() because + * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all + * U_FAILURE: call _toUnicode() for callback handling (do not output c) + */ + } + } + + /* convert to one UChar in buffer[0], or handle getNextUChar() errors */ + _toUnicodeWithCallback(&args, err); + + if(*err==U_BUFFER_OVERFLOW_ERROR) { + *err=U_ZERO_ERROR; + } + + i=0; + length=(int32_t)(args.target-buffer); + } else { + /* write the lead surrogate from the overflow buffer */ + buffer[0]=(UChar)c; + args.target=buffer+1; + i=0; + length=1; + } + + /* buffer contents starts at i and ends before length */ + + if(U_FAILURE(*err)) { + c=0xffff; /* no output */ + } else if(length==0) { + /* no input or only state changes */ + *err=U_INDEX_OUTOFBOUNDS_ERROR; + /* no need to reset explicitly because _toUnicodeWithCallback() did it */ + c=0xffff; /* no output */ } else { - /* default implementation */ - ch = ucnv_getNextUCharFromToUImpl(&args, converter->sharedData->impl->toUnicode, FALSE, err); + c=buffer[0]; + i=1; + if(!U16_IS_LEAD(c)) { + /* consume c=buffer[0], done */ + } else { + /* got a lead surrogate, see if a trail surrogate follows */ + UChar c2; + + if(cnv->UCharErrorBufferLength>0) { + /* got overflow output from the conversion */ + if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) { + /* got a trail surrogate, too */ + c=U16_GET_SUPPLEMENTARY(c, c2); + + /* move the remaining overflow contents up to the beginning */ + if((--cnv->UCharErrorBufferLength)>0) { + uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1, + cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); + } + } else { + /* c is an unpaired lead surrogate, just return it */ + } + } else if(args.sourceUCharErrorBufferLength)>0) { + uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer, + length*U_SIZEOF_UCHAR); + } + cnv->UCharErrorBufferLength=(int8_t)(length+delta); + + cnv->UCharErrorBuffer[0]=buffer[i++]; + if(delta>1) { + cnv->UCharErrorBuffer[1]=buffer[i]; + } } - *source = args.source; - return ch; + + *source=args.source; + return c; } +/* ucnv_convert() and siblings ---------------------------------------------- */ + U_CAPI void U_EXPORT2 ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, char **target, const char *targetLimit, @@ -1164,6 +1967,12 @@ ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, } if(pivotStart==NULL) { + if(!flush) { + /* streaming conversion requires an explicit pivot buffer */ + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + /* use the stack pivot buffer */ pivotStart=myPivotSource=myPivotTarget=pivotBuffer; pivotSource=&myPivotSource; @@ -1293,7 +2102,7 @@ ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter, FALSE, TRUE, pErrorCode); - targetLength=myTarget-target; + targetLength=(int32_t)(myTarget-target); } /* @@ -1316,7 +2125,7 @@ ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter, FALSE, TRUE, pErrorCode); - targetLength+=(myTarget-targetBuffer); + targetLength+=(int32_t)(myTarget-targetBuffer); } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); /* done with preflighting, set warnings and errors as appropriate */ @@ -1463,7 +2272,7 @@ ucnv_getType(const UConverter* converter) int8_t type = converter->sharedData->staticData->conversionType; #if !UCONFIG_NO_LEGACY_CONVERSION if(type == UCNV_MBCS) { - return _MBCSGetType(converter); + return ucnv_MBCSGetType(converter); } #endif return (UConverterType)type; @@ -1631,7 +2440,7 @@ ucnv_detectUnicodeSignature( const char* source, } if(sourceLength==-1){ - sourceLength=uprv_strlen(source); + sourceLength=(int32_t)uprv_strlen(source); } @@ -1682,13 +2491,63 @@ ucnv_detectUnicodeSignature( const char* source, *signatureLength=4; return "UTF-7"; } + }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){ + *signatureLength=4; + return "UTF-EBCDIC"; } + /* no known Unicode signature byte sequence recognized */ *signatureLength=0; return NULL; } + U_DRAFT int32_t U_EXPORT2 + ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status){ + + if(status == NULL || U_FAILURE(*status)){ + return -1; + } + if(cnv == NULL){ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return -1; + } + + if(cnv->preFromULength > 0){ + return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ; + }else if(cnv->preFromULength < 0){ + return -cnv->preFromULength ; + }else if(cnv->fromUChar32 > 0){ + return 1; + }else if(cnv->preFromUFirstCP >0){ + return U16_LENGTH(cnv->preFromUFirstCP); + } + return 0; + + } + +U_DRAFT int32_t U_EXPORT2 +ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){ + + if(status == NULL || U_FAILURE(*status)){ + return -1; + } + if(cnv == NULL){ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return -1; + } + + if(cnv->preToULength > 0){ + return cnv->preToULength ; + }else if(cnv->preToULength < 0){ + return -cnv->preToULength; + }else if(cnv->toULength > 0){ + return cnv->toULength; + } + return 0; +} +#endif + /* * Hey, Emacs, please set the following: *