/*
******************************************************************************
*
-* Copyright (C) 1998-2003, International Business Machines
+* Copyright (C) 1998-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*/
#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
#include "unicode/ustring.h"
-#include "unicode/ures.h"
#include "unicode/ucnv.h"
#include "unicode/ucnv_err.h"
#include "unicode/uset.h"
+#include "unicode/utf.h"
+#include "unicode/utf16.h"
+#include "putilimp.h"
#include "cmemory.h"
#include "cstring.h"
+#include "uassert.h"
+#include "utracimp.h"
#include "ustr_imp.h"
#include "ucnv_imp.h"
-#include "ucnv_io.h"
#include "ucnv_cnv.h"
#include "ucnv_bld.h"
-#if 0
-/* debugging for converters */
-# include <stdio.h>
-void UCNV_DEBUG_LOG(const char *what, const char *who, const void *p, int l)
-{
- static FILE *f = NULL;
- if(f==NULL)
- {
- /* stderr, or open another file */
- f = stderr;
- /* f = fopen("c:\\UCNV_DEBUG_LOG.txt", "w"); */
- }
- if (!what) {
- what = "(null)";
- }
- if (!who) {
- who = "(null)";
- }
- if (!p) {
- p = "(null)";
- }
-
- fprintf(f, "%p\t:%d\t%-20s\t%-10s\n",
- p, l, who, what);
-
- fflush(f);
-}
-
-
-/* dump the contents of a converter */
-static void UCNV_DEBUG_CNV(const UConverter *c, int line)
-{
- UErrorCode err = U_ZERO_ERROR;
- fprintf(stderr, "%p\t:%d\t", c, line);
- if(c!=NULL) {
- const char *name = ucnv_getName(c, &err);
- if (!name) {
- name = "(null)";
- }
- fprintf(stderr, "%s\t", name);
-
- fprintf(stderr, "shr=%p, ref=%x\n",
- c->sharedData,
- c->sharedData->referenceCounter);
- } else {
- fprintf(stderr, "DEMISED\n");
- }
-}
-
-# define UCNV_DEBUG 1
-# define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__)
-# define UCNV_DEBUG_CNV(c) UCNV_DEBUG_CNV(c, __LINE__)
-#else
-# define UCNV_DEBUG_LOG(x,y,z)
-# define UCNV_DEBUG_CNV(c)
-#endif
-
-
-
/* size of intermediate and preflighting buffers in ucnv_convert() */
#define CHUNK_SIZE 1024
} UAmbiguousConverter;
static const UAmbiguousConverter ambiguousConverters[]={
+ { "ibm-897_P100-1995", 0xa5 },
{ "ibm-942_P120-1999", 0xa5 },
{ "ibm-943_P130-1999", 0xa5 },
+ { "ibm-946_P100-1995", 0xa5 },
{ "ibm-33722_P120-1999", 0xa5 },
+ { "ibm-1041_P100-1995", 0xa5 },
+ /*{ "ibm-54191_P100-2006", 0xa5 },*/
+ /*{ "ibm-62383_P100-2007", 0xa5 },*/
+ /*{ "ibm-891_P100-1995", 0x20a9 },*/
+ { "ibm-944_P100-1995", 0x20a9 },
{ "ibm-949_P110-1999", 0x20a9 },
{ "ibm-1363_P110-1997", 0x20a9 },
- { "ISO_2022,locale=ko,version=0", 0x20a9 }
+ { "ISO_2022,locale=ko,version=0", 0x20a9 },
+ { "ibm-1088_P100-1995", 0x20a9 }
};
-U_CAPI const char* U_EXPORT2
-ucnv_getDefaultName ()
-{
- return ucnv_io_getDefaultConverterName();
-}
-
-U_CAPI void U_EXPORT2
-ucnv_setDefaultName (const char *converterName)
-{
- ucnv_io_setDefaultConverterName(converterName);
-}
/*Calls through createConverter */
U_CAPI UConverter* U_EXPORT2
ucnv_open (const char *name,
UConverter *r;
if (err == NULL || U_FAILURE (*err)) {
- UCNV_DEBUG_LOG("open", name, NULL);
return NULL;
}
r = ucnv_createConverter(NULL, name, err);
- UCNV_DEBUG_LOG("open", name, r);
- UCNV_DEBUG_CNV(r);
return r;
}
return ucnv_open(u_austrcpy(asciiName, name), err);
}
+/* Copy the string that is represented by the UConverterPlatform enum
+ * @param platformString An output buffer
+ * @param platform An enum representing a platform
+ * @return the length of the copied string.
+ */
+static int32_t
+ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm)
+{
+ switch (pltfrm)
+ {
+ case UCNV_IBM:
+ uprv_strcpy(platformString, "ibm-");
+ return 4;
+ case UCNV_UNKNOWN:
+ break;
+ }
+
+ /* default to empty string */
+ *platformString = 0;
+ return 0;
+}
+
/*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
*through createConverter*/
U_CAPI UConverter* U_EXPORT2
NULL
};
+ UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE);
+
if (status == NULL || U_FAILURE(*status)){
+ UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR);
return 0;
}
if (!pBufferSize || !cnv){
- *status = U_ILLEGAL_ARGUMENT_ERROR;
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ UTRACE_EXIT_STATUS(*status);
return 0;
}
- UCNV_DEBUG_LOG("cloning FROM", ucnv_getName(cnv,status), cnv);
- UCNV_DEBUG_LOG("cloning WITH", "memory", stackBuffer);
- UCNV_DEBUG_CNV(cnv);
+ UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",
+ ucnv_getName(cnv, status), cnv, stackBuffer);
- /* Pointers on 64-bit platforms need to be aligned
- * on a 64-bit boundry in memory.
- */
- if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
- int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
- *pBufferSize -= offsetUp;
- stackBufferChars += offsetUp;
- }
-
- stackBuffer = (void *)stackBufferChars;
-
if (cnv->sharedData->impl->safeClone != NULL) {
/* call the custom safeClone function for sizing */
bufferSizeNeeded = 0;
- cnv->sharedData->impl->safeClone(cnv, stackBuffer, &bufferSizeNeeded, status);
+ cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status);
}
else
{
if (*pBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
*pBufferSize = bufferSizeNeeded;
+ UTRACE_EXIT_VALUE(bufferSizeNeeded);
return 0;
}
+ /* Pointers on 64-bit platforms need to be aligned
+ * on a 64-bit boundary in memory.
+ */
+ if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
+ int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
+ if(*pBufferSize > offsetUp) {
+ *pBufferSize -= offsetUp;
+ stackBufferChars += offsetUp;
+ } else {
+ /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
+ *pBufferSize = 1;
+ }
+ }
+
+ stackBuffer = (void *)stackBufferChars;
+
/* Now, see if we must allocate any memory */
if (*pBufferSize < bufferSizeNeeded || stackBuffer == NULL)
{
if(localConverter == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
+ UTRACE_EXIT_STATUS(*status);
return NULL;
}
allocatedConverter = NULL;
}
+ uprv_memset(localConverter, 0, bufferSizeNeeded);
+
/* Copy initial state */
uprv_memcpy(localConverter, cnv, sizeof(UConverter));
localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;
+ /* copy the substitution string */
+ if (cnv->subChars == (uint8_t *)cnv->subUChars) {
+ localConverter->subChars = (uint8_t *)localConverter->subUChars;
+ } else {
+ localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
+ if (localConverter->subChars == NULL) {
+ uprv_free(allocatedConverter);
+ UTRACE_EXIT_STATUS(*status);
+ return NULL;
+ }
+ uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
+ }
+
/* now either call the safeclone fcn or not */
if (cnv->sharedData->impl->safeClone != NULL) {
/* call the custom safeClone function */
}
if(localConverter==NULL || U_FAILURE(*status)) {
+ if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) {
+ uprv_free(allocatedConverter->subChars);
+ }
uprv_free(allocatedConverter);
+ UTRACE_EXIT_STATUS(*status);
return NULL;
}
if(localConverter == (UConverter*)stackBuffer) {
/* we're using user provided data - set to not destroy */
localConverter->isCopyLocal = TRUE;
-#ifdef UCNV_DEBUG
- fprintf(stderr, "%p\t:%d\t\t==stackbuffer %p, isCopyLocal TRUE\n",
- localConverter, __LINE__, stackBuffer);
-#endif
-
- } else {
-#ifdef UCNV_DEBUG
- fprintf(stderr, "%p\t:%d\t\t!=stackbuffer %p, isCopyLocal left at %s\n",
- localConverter, __LINE__, stackBuffer,
- localConverter->isCopyLocal?"TRUE":"FALSE");
-#endif
}
- localConverter->isExtraLocal = localConverter->isCopyLocal;
-
/* allow callback functions to handle any memory allocation */
toUArgs.converter = fromUArgs.converter = localConverter;
cbErr = U_ZERO_ERROR;
cbErr = U_ZERO_ERROR;
cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr);
- UCNV_DEBUG_LOG("cloning TO", ucnv_getName(localConverter,status), localConverter);
- UCNV_DEBUG_CNV(localConverter);
- UCNV_DEBUG_CNV(cnv);
-
-
+ UTRACE_EXIT_PTR_STATUS(localConverter, *status);
return localConverter;
}
U_CAPI void U_EXPORT2
ucnv_close (UConverter * converter)
{
- /* first, notify the callback functions that the converter is closed */
- UConverterToUnicodeArgs toUArgs = {
- sizeof(UConverterToUnicodeArgs),
- TRUE,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL
- };
- UConverterFromUnicodeArgs fromUArgs = {
- sizeof(UConverterFromUnicodeArgs),
- TRUE,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL
- };
UErrorCode errorCode = U_ZERO_ERROR;
+ UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE);
+
if (converter == NULL)
{
+ UTRACE_EXIT();
return;
}
- UCNV_DEBUG_LOG("close", ucnv_getName(converter, &errorCode), converter);
- UCNV_DEBUG_CNV(converter);
+ UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b",
+ ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal);
- toUArgs.converter = fromUArgs.converter = converter;
-
- converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);
- errorCode = U_ZERO_ERROR;
- converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);
+ /* In order to speed up the close, only call the callbacks when they have been changed.
+ This performance check will only work when the callbacks are set within a shared library
+ or from user code that statically links this code. */
+ /* first, notify the callback functions that the converter is closed */
+ if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
+ UConverterToUnicodeArgs toUArgs = {
+ sizeof(UConverterToUnicodeArgs),
+ TRUE,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+ };
+
+ toUArgs.converter = converter;
+ errorCode = U_ZERO_ERROR;
+ converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);
+ }
+ if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
+ UConverterFromUnicodeArgs fromUArgs = {
+ sizeof(UConverterFromUnicodeArgs),
+ TRUE,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+ };
+ fromUArgs.converter = converter;
+ errorCode = U_ZERO_ERROR;
+ converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);
+ }
- UCNV_DEBUG_CNV(converter);
-
if (converter->sharedData->impl->close != NULL) {
converter->sharedData->impl->close(converter);
}
-#ifdef UCNV_DEBUG
- {
- char c[4];
- c[0]='0'+converter->sharedData->referenceCounter;
- c[1]=0;
- UCNV_DEBUG_LOG("close--", c, converter);
- if((converter->sharedData->referenceCounter == 0)&&(converter->sharedData->sharedDataCached == FALSE)) {
- UCNV_DEBUG_CNV(converter);
- UCNV_DEBUG_LOG("close:delDead", "??", converter);
- }
+ if (converter->subChars != (uint8_t *)converter->subUChars) {
+ uprv_free(converter->subChars);
}
-#endif
/*
Checking whether it's an algorithic converter is okay
}
if(!converter->isCopyLocal){
- UCNV_DEBUG_LOG("close:free", "", converter);
- uprv_free (converter);
+ uprv_free(converter);
}
- return;
+
+ UTRACE_EXIT();
}
/*returns a single Name from the list, will return NULL if out of bounds
U_CAPI const char* U_EXPORT2
ucnv_getAvailableName (int32_t n)
{
- if (0 <= n && n <= 0xffff) {
- UErrorCode err = U_ZERO_ERROR;
- const char *name = ucnv_io_getAvailableConverter((uint16_t)n, &err);
- if (U_SUCCESS(err)) {
- return name;
+ if (0 <= n && n <= 0xffff) {
+ UErrorCode err = U_ZERO_ERROR;
+ const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err);
+ if (U_SUCCESS(err)) {
+ return name;
+ }
}
- }
- return NULL;
+ return NULL;
}
U_CAPI int32_t U_EXPORT2
ucnv_countAvailable ()
{
UErrorCode err = U_ZERO_ERROR;
- return ucnv_io_countAvailableConverters(&err);
-}
-
-U_CAPI uint16_t U_EXPORT2
-ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
-{
- return ucnv_io_countAliases(alias, pErrorCode);
-}
-
-
-U_CAPI const char* U_EXPORT2
-ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
-{
- return ucnv_io_getAlias(alias, n, pErrorCode);
-}
-
-U_CAPI void U_EXPORT2
-ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
-{
- ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
-}
-
-U_CAPI uint16_t U_EXPORT2
-ucnv_countStandards(void)
-{
- UErrorCode err = U_ZERO_ERROR;
- return ucnv_io_countStandards(&err);
+ return ucnv_bld_countAvailableConverters(&err);
}
U_CAPI void U_EXPORT2
if (U_FAILURE (*err))
return;
+ if (converter->subCharLen <= 0) {
+ /* Unicode string or empty string from ucnv_setSubstString(). */
+ *len = 0;
+ return;
+ }
+
if (*len < converter->subCharLen) /*not enough space in subChars */
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
return;
}
- uprv_memcpy (mySubChar, converter->subChar, converter->subCharLen); /*fills in the subchars */
- *len = converter->subCharLen; /*store # of bytes copied to buffer */
- uprv_memcpy (mySubChar, converter->subChar, converter->subCharLen); /*fills in the subchars */
+ uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */
*len = converter->subCharLen; /*store # of bytes copied to buffer */
}
return;
}
- uprv_memcpy (converter->subChar, mySubChar, len); /*copies the subchars */
+ uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */
converter->subCharLen = len; /*sets the new len */
/*
return;
}
-U_CAPI int32_t U_EXPORT2
-ucnv_getDisplayName(const UConverter *cnv,
- const char *displayLocale,
- UChar *displayName, int32_t displayNameCapacity,
- UErrorCode *pErrorCode) {
- UResourceBundle *rb;
- const UChar *name;
- int32_t length;
-
- /* check arguments */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
+U_CAPI void U_EXPORT2
+ucnv_setSubstString(UConverter *cnv,
+ const UChar *s,
+ int32_t length,
+ UErrorCode *err) {
+ UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1];
+ char chars[UCNV_ERROR_BUFFER_LENGTH];
+
+ UConverter *clone;
+ uint8_t *subChars;
+ int32_t cloneSize, length8;
+
+ /* Let the following functions check all arguments. */
+ cloneSize = sizeof(cloneBuffer);
+ clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err);
+ ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err);
+ length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err);
+ ucnv_close(clone);
+ if (U_FAILURE(*err)) {
+ return;
}
- if(cnv==NULL || displayNameCapacity<0 || (displayNameCapacity>0 && displayName==NULL)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
+ if (cnv->sharedData->impl->writeSub == NULL
+#if !UCONFIG_NO_LEGACY_CONVERSION
+ || (cnv->sharedData->staticData->conversionType == UCNV_MBCS &&
+ ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL)
+#endif
+ ) {
+ /* The converter is not stateful. Store the charset bytes as a fixed string. */
+ subChars = (uint8_t *)chars;
+ } else {
+ /*
+ * The converter has a non-default writeSub() function, indicating
+ * that it is stateful.
+ * Store the Unicode string for on-the-fly conversion for correct
+ * state handling.
+ */
+ if (length > UCNV_ERROR_BUFFER_LENGTH) {
+ /*
+ * Should not occur. The converter should output at least one byte
+ * per UChar, which means that ucnv_fromUChars() should catch all
+ * overflows.
+ */
+ *err = U_BUFFER_OVERFLOW_ERROR;
+ return;
+ }
+ subChars = (uint8_t *)s;
+ if (length < 0) {
+ length = u_strlen(s);
+ }
+ length8 = length * U_SIZEOF_UCHAR;
}
- /* open the resource bundle and get the display name string */
- rb=ures_open(NULL, displayLocale, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return 0;
+ /*
+ * For storing the substitution string, select either the small buffer inside
+ * UConverter or allocate a subChars buffer.
+ */
+ if (length8 > UCNV_MAX_SUBCHAR_LEN) {
+ /* Use a separate buffer for the string. Outside UConverter to not make it too large. */
+ if (cnv->subChars == (uint8_t *)cnv->subUChars) {
+ /* Allocate a new buffer for the string. */
+ cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
+ if (cnv->subChars == NULL) {
+ cnv->subChars = (uint8_t *)cnv->subUChars;
+ *err = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
+ }
}
- /* use the internal name as the key */
- name=ures_getStringByKey(rb, cnv->sharedData->staticData->name, &length, pErrorCode);
- ures_close(rb);
-
- if(U_SUCCESS(*pErrorCode)) {
- /* copy the string */
- u_memcpy(displayName, name, uprv_min(length, displayNameCapacity)*U_SIZEOF_UCHAR);
+ /* Copy the substitution string into the UConverter or its subChars buffer. */
+ if (length8 == 0) {
+ cnv->subCharLen = 0;
} else {
- /* convert the internal name into a Unicode string */
- *pErrorCode=U_ZERO_ERROR;
- length=uprv_strlen(cnv->sharedData->staticData->name);
- u_charsToUChars(cnv->sharedData->staticData->name, displayName, uprv_min(length, displayNameCapacity));
+ uprv_memcpy(cnv->subChars, subChars, length8);
+ if (subChars == (uint8_t *)chars) {
+ cnv->subCharLen = (int8_t)length8;
+ } else /* subChars == s */ {
+ cnv->subCharLen = (int8_t)-length;
+ }
}
- return u_terminateUChars(displayName, displayNameCapacity, length, pErrorCode);
+
+ /* See comment in ucnv_setSubstChars(). */
+ cnv->subChar1 = 0;
}
/*resets the internal states of a converter
*goal : have the same behaviour than a freshly created converter
*/
-static void _reset(UConverter *converter, UConverterResetChoice choice) {
- /* first, notify the callback functions that the converter is reset */
- UConverterToUnicodeArgs toUArgs = {
- sizeof(UConverterToUnicodeArgs),
- TRUE,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL
- };
- UConverterFromUnicodeArgs fromUArgs = {
- sizeof(UConverterFromUnicodeArgs),
- TRUE,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL
- };
- UErrorCode errorCode;
-
+static void _reset(UConverter *converter, UConverterResetChoice choice,
+ UBool callCallback) {
if(converter == NULL) {
return;
}
- toUArgs.converter = fromUArgs.converter = converter;
- if(choice<=UCNV_RESET_TO_UNICODE) {
- errorCode = U_ZERO_ERROR;
- converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
- }
- if(choice!=UCNV_RESET_TO_UNICODE) {
- errorCode = U_ZERO_ERROR;
- converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
+ if(callCallback) {
+ /* first, notify the callback functions that the converter is reset */
+ UErrorCode errorCode;
+
+ if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
+ UConverterToUnicodeArgs toUArgs = {
+ sizeof(UConverterToUnicodeArgs),
+ TRUE,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+ };
+ toUArgs.converter = converter;
+ errorCode = U_ZERO_ERROR;
+ converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
+ }
+ if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
+ UConverterFromUnicodeArgs fromUArgs = {
+ sizeof(UConverterFromUnicodeArgs),
+ TRUE,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+ };
+ fromUArgs.converter = converter;
+ errorCode = U_ZERO_ERROR;
+ converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
+ }
}
/* now reset the converter itself */
if(choice<=UCNV_RESET_TO_UNICODE) {
converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
+ converter->mode = 0;
converter->toULength = 0;
converter->invalidCharLength = converter->UCharErrorBufferLength = 0;
+ converter->preToULength = 0;
}
if(choice!=UCNV_RESET_TO_UNICODE) {
converter->fromUnicodeStatus = 0;
- converter->fromUSurrogateLead = 0;
+ converter->fromUChar32 = 0;
converter->invalidUCharLength = converter->charErrorBufferLength = 0;
+ converter->preFromUFirstCP = U_SENTINEL;
+ converter->preFromULength = 0;
}
if (converter->sharedData->impl->reset != NULL) {
/* call the custom reset function */
converter->sharedData->impl->reset(converter, choice);
- } else if(choice<=UCNV_RESET_TO_UNICODE) {
- converter->mode = UCNV_SI;
}
}
U_CAPI void U_EXPORT2
ucnv_reset(UConverter *converter)
{
- _reset(converter, UCNV_RESET_BOTH);
+ _reset(converter, UCNV_RESET_BOTH, TRUE);
}
U_CAPI void U_EXPORT2
ucnv_resetToUnicode(UConverter *converter)
{
- _reset(converter, UCNV_RESET_TO_UNICODE);
+ _reset(converter, UCNV_RESET_TO_UNICODE, TRUE);
}
U_CAPI void U_EXPORT2
ucnv_resetFromUnicode(UConverter *converter)
{
- _reset(converter, UCNV_RESET_FROM_UNICODE);
+ _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE);
}
U_CAPI int8_t U_EXPORT2
ucnv_getMaxCharSize (const UConverter * converter)
{
- return converter->sharedData->staticData->maxBytesPerChar;
+ return converter->maxBytesPerUChar;
}
return converter->sharedData->staticData->name;
}
-U_CAPI int32_t U_EXPORT2
-ucnv_getCCSID (const UConverter * converter,
- UErrorCode * err)
+U_CAPI int32_t U_EXPORT2
+ucnv_getCCSID(const UConverter * converter,
+ UErrorCode * err)
{
+ int32_t ccsid;
if (U_FAILURE (*err))
return -1;
- return converter->sharedData->staticData->codepage;
+ ccsid = converter->sharedData->staticData->codepage;
+ if (ccsid == 0) {
+ /* Rare case. This is for cases like gb18030,
+ which doesn't have an IBM canonical name, but does have an IBM alias. */
+ const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err);
+ if (U_SUCCESS(*err) && standardName) {
+ const char *ccsidStr = uprv_strchr(standardName, '-');
+ if (ccsidStr) {
+ ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */
+ }
+ }
+ }
+ return ccsid;
}
return (UConverterPlatform)converter->sharedData->staticData->platform;
}
-U_CAPI void U_EXPORT2
-ucnv_getUnicodeSet(const UConverter *cnv,
- USet *set,
- UConverterUnicodeSet which,
- UErrorCode *pErrorCode) {
- /* argument checking */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return;
- }
- if(cnv==NULL || set==NULL || which<UCNV_ROUNDTRIP_SET || UCNV_SET_COUNT<=which) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- /* does this converter support this function? */
- if(cnv->sharedData->impl->getUnicodeSet==NULL) {
- *pErrorCode=U_UNSUPPORTED_ERROR;
- return;
- }
-
- /* empty the set */
- uset_clear(set);
-
- /* call the converter to add the code points it supports */
- cnv->sharedData->impl->getUnicodeSet(cnv, set, which, pErrorCode);
-}
-
U_CAPI void U_EXPORT2
ucnv_getToUCallBack (const UConverter * converter,
UConverterToUCallback *action,
converter->fromUContext = newContext;
}
-U_CAPI void U_EXPORT2
-ucnv_fromUnicode (UConverter * _this,
- char **target,
- const char *targetLimit,
- const UChar ** source,
- const UChar * sourceLimit,
- int32_t* offsets,
- UBool flush,
- UErrorCode * err)
-{
- UConverterFromUnicodeArgs args;
- const char *t;
+static void
+_updateOffsets(int32_t *offsets, int32_t length,
+ int32_t sourceIndex, int32_t errorInputLength) {
+ int32_t *limit;
+ int32_t delta, offset;
+
+ if(sourceIndex>=0) {
+ /*
+ * adjust each offset by adding the previous sourceIndex
+ * minus the length of the input sequence that caused an
+ * error, if any
+ */
+ delta=sourceIndex-errorInputLength;
+ } else {
+ /*
+ * set each offset to -1 because this conversion function
+ * does not handle offsets
+ */
+ delta=-1;
+ }
+
+ limit=offsets+length;
+ if(delta==0) {
+ /* most common case, nothing to do */
+ } else if(delta>0) {
+ /* add the delta to each offset (but not if the offset is <0) */
+ while(offsets<limit) {
+ offset=*offsets;
+ if(offset>=0) {
+ *offsets=offset+delta;
+ }
+ ++offsets;
+ }
+ } else /* delta<0 */ {
+ /*
+ * set each offset to -1 because this conversion function
+ * does not handle offsets
+ * or the error input sequence started in a previous buffer
+ */
+ while(offsets<limit) {
+ *offsets++=-1;
+ }
+ }
+}
+
+/* ucnv_fromUnicode --------------------------------------------------------- */
+
+/*
+ * Implementation note for m:n conversions
+ *
+ * While collecting source units to find the longest match for m:n conversion,
+ * some source units may need to be stored for a partial match.
+ * When a second buffer does not yield a match on all of the previously stored
+ * source units, then they must be "replayed", i.e., fed back into the converter.
+ *
+ * The code relies on the fact that replaying will not nest -
+ * converting a replay buffer will not result in a replay.
+ * This is because a replay is necessary only after the _continuation_ of a
+ * partial match failed, but a replay buffer is converted as a whole.
+ * It may result in some of its units being stored again for a partial match,
+ * but there will not be a continuation _during_ the replay which could fail.
+ *
+ * It is conceivable that a callback function could call the converter
+ * recursively in a way that causes another replay to be stored, but that
+ * would be an error in the callback function.
+ * Such violations will cause assertion failures in a debug build,
+ * and wrong output, but they will not cause a crash.
+ */
+
+static void
+_fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
+ UConverterFromUnicode fromUnicode;
+ UConverter *cnv;
+ const UChar *s;
+ char *t;
+ int32_t *offsets;
+ int32_t sourceIndex;
+ int32_t errorInputLength;
+ UBool converterSawEndOfInput, calledCallback;
+
+ /* variables for m:n conversion */
+ UChar replay[UCNV_EXT_MAX_UCHARS];
+ const UChar *realSource, *realSourceLimit;
+ int32_t realSourceIndex;
+ UBool realFlush;
+
+ cnv=pArgs->converter;
+ s=pArgs->source;
+ t=pArgs->target;
+ offsets=pArgs->offsets;
+
+ /* get the converter implementation function */
+ sourceIndex=0;
+ if(offsets==NULL) {
+ fromUnicode=cnv->sharedData->impl->fromUnicode;
+ } else {
+ fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;
+ if(fromUnicode==NULL) {
+ /* there is no WithOffsets implementation */
+ fromUnicode=cnv->sharedData->impl->fromUnicode;
+ /* we will write -1 for each offset */
+ sourceIndex=-1;
+ }
+ }
+
+ if(cnv->preFromULength>=0) {
+ /* normal mode */
+ realSource=NULL;
+
+ /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
+ realSourceLimit=NULL;
+ realFlush=FALSE;
+ realSourceIndex=0;
+ } else {
+ /*
+ * Previous m:n conversion stored source units from a partial match
+ * and failed to consume all of them.
+ * We need to "replay" them from a temporary buffer and convert them first.
+ */
+ realSource=pArgs->source;
+ realSourceLimit=pArgs->sourceLimit;
+ realFlush=pArgs->flush;
+ realSourceIndex=sourceIndex;
+
+ uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
+ pArgs->source=replay;
+ pArgs->sourceLimit=replay-cnv->preFromULength;
+ pArgs->flush=FALSE;
+ sourceIndex=-1;
+
+ cnv->preFromULength=0;
+ }
+
+ /*
+ * loop for conversion and error handling
+ *
+ * loop {
+ * convert
+ * loop {
+ * update offsets
+ * handle end of input
+ * handle errors/call callback
+ * }
+ * }
+ */
+ for(;;) {
+ if(U_SUCCESS(*err)) {
+ /* convert */
+ fromUnicode(pArgs, err);
+
+ /*
+ * set a flag for whether the converter
+ * successfully processed the end of the input
+ *
+ * need not check cnv->preFromULength==0 because a replay (<0) will cause
+ * s<sourceLimit before converterSawEndOfInput is checked
+ */
+ converterSawEndOfInput=
+ (UBool)(U_SUCCESS(*err) &&
+ pArgs->flush && pArgs->source==pArgs->sourceLimit &&
+ cnv->fromUChar32==0);
+ } else {
+ /* handle error from ucnv_convertEx() */
+ converterSawEndOfInput=FALSE;
+ }
+
+ /* no callback called yet for this iteration */
+ calledCallback=FALSE;
+
+ /* no sourceIndex adjustment for conversion, only for callback output */
+ errorInputLength=0;
+
+ /*
+ * loop for offsets and error handling
+ *
+ * iterates at most 3 times:
+ * 1. to clean up after the conversion function
+ * 2. after the callback
+ * 3. after the callback again if there was truncated input
+ */
+ for(;;) {
+ /* update offsets if we write any */
+ if(offsets!=NULL) {
+ int32_t length=(int32_t)(pArgs->target-t);
+ if(length>0) {
+ _updateOffsets(offsets, length, sourceIndex, errorInputLength);
+
+ /*
+ * if a converter handles offsets and updates the offsets
+ * pointer at the end, then pArgs->offset should not change
+ * here;
+ * however, some converters do not handle offsets at all
+ * (sourceIndex<0) or may not update the offsets pointer
+ */
+ pArgs->offsets=offsets+=length;
+ }
+
+ if(sourceIndex>=0) {
+ sourceIndex+=(int32_t)(pArgs->source-s);
+ }
+ }
+
+ if(cnv->preFromULength<0) {
+ /*
+ * switch the source to new replay units (cannot occur while replaying)
+ * after offset handling and before end-of-input and callback handling
+ */
+ if(realSource==NULL) {
+ realSource=pArgs->source;
+ realSourceLimit=pArgs->sourceLimit;
+ realFlush=pArgs->flush;
+ realSourceIndex=sourceIndex;
+
+ uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
+ pArgs->source=replay;
+ pArgs->sourceLimit=replay-cnv->preFromULength;
+ pArgs->flush=FALSE;
+ if((sourceIndex+=cnv->preFromULength)<0) {
+ sourceIndex=-1;
+ }
+
+ cnv->preFromULength=0;
+ } else {
+ /* see implementation note before _fromUnicodeWithCallback() */
+ U_ASSERT(realSource==NULL);
+ *err=U_INTERNAL_PROGRAM_ERROR;
+ }
+ }
+
+ /* update pointers */
+ s=pArgs->source;
+ t=pArgs->target;
+
+ if(U_SUCCESS(*err)) {
+ if(s<pArgs->sourceLimit) {
+ /*
+ * continue with the conversion loop while there is still input left
+ * (continue converting by breaking out of only the inner loop)
+ */
+ break;
+ } else if(realSource!=NULL) {
+ /* switch back from replaying to the real source and continue */
+ pArgs->source=realSource;
+ pArgs->sourceLimit=realSourceLimit;
+ pArgs->flush=realFlush;
+ sourceIndex=realSourceIndex;
+
+ realSource=NULL;
+ break;
+ } else if(pArgs->flush && cnv->fromUChar32!=0) {
+ /*
+ * the entire input stream is consumed
+ * and there is a partial, truncated input sequence left
+ */
+
+ /* inject an error and continue with callback handling */
+ *err=U_TRUNCATED_CHAR_FOUND;
+ calledCallback=FALSE; /* new error condition */
+ } else {
+ /* input consumed */
+ if(pArgs->flush) {
+ /*
+ * return to the conversion loop once more if the flush
+ * flag is set and the conversion function has not
+ * successfully processed the end of the input yet
+ *
+ * (continue converting by breaking out of only the inner loop)
+ */
+ if(!converterSawEndOfInput) {
+ break;
+ }
+
+ /* reset the converter without calling the callback function */
+ _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE);
+ }
+
+ /* done successfully */
+ return;
+ }
+ }
+
+ /* U_FAILURE(*err) */
+ {
+ UErrorCode e;
+
+ if( calledCallback ||
+ (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
+ (e!=U_INVALID_CHAR_FOUND &&
+ e!=U_ILLEGAL_CHAR_FOUND &&
+ e!=U_TRUNCATED_CHAR_FOUND)
+ ) {
+ /*
+ * the callback did not or cannot resolve the error:
+ * set output pointers and return
+ *
+ * the check for buffer overflow is redundant but it is
+ * a high-runner case and hopefully documents the intent
+ * well
+ *
+ * if we were replaying, then the replay buffer must be
+ * copied back into the UConverter
+ * and the real arguments must be restored
+ */
+ if(realSource!=NULL) {
+ int32_t length;
+
+ U_ASSERT(cnv->preFromULength==0);
+
+ length=(int32_t)(pArgs->sourceLimit-pArgs->source);
+ if(length>0) {
+ uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR);
+ cnv->preFromULength=(int8_t)-length;
+ }
+
+ pArgs->source=realSource;
+ pArgs->sourceLimit=realSourceLimit;
+ pArgs->flush=realFlush;
+ }
+
+ return;
+ }
+ }
+
+ /* callback handling */
+ {
+ UChar32 codePoint;
+
+ /* get and write the code point */
+ codePoint=cnv->fromUChar32;
+ errorInputLength=0;
+ U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
+ cnv->invalidUCharLength=(int8_t)errorInputLength;
+
+ /* set the converter state to deal with the next character */
+ cnv->fromUChar32=0;
+
+ /* call the callback function */
+ cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
+ cnv->invalidUCharBuffer, errorInputLength, codePoint,
+ *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,
+ err);
+ }
+
+ /*
+ * loop back to the offset handling
+ *
+ * this flag will indicate after offset handling
+ * that a callback was called;
+ * if the callback did not resolve the error, then we return
+ */
+ calledCallback=TRUE;
+ }
+ }
+}
+
+/*
+ * Output the fromUnicode overflow buffer.
+ * Call this function if(cnv->charErrorBufferLength>0).
+ * @return TRUE if overflow
+ */
+static UBool
+ucnv_outputOverflowFromUnicode(UConverter *cnv,
+ char **target, const char *targetLimit,
+ int32_t **pOffsets,
+ UErrorCode *err) {
+ int32_t *offsets;
+ char *overflow, *t;
+ int32_t i, length;
+
+ t=*target;
+ if(pOffsets!=NULL) {
+ offsets=*pOffsets;
+ } else {
+ offsets=NULL;
+ }
+
+ overflow=(char *)cnv->charErrorBuffer;
+ length=cnv->charErrorBufferLength;
+ i=0;
+ while(i<length) {
+ if(t==targetLimit) {
+ /* the overflow buffer contains too much, keep the rest */
+ int32_t j=0;
+
+ do {
+ overflow[j++]=overflow[i++];
+ } while(i<length);
+
+ cnv->charErrorBufferLength=(int8_t)j;
+ *target=t;
+ if(offsets!=NULL) {
+ *pOffsets=offsets;
+ }
+ *err=U_BUFFER_OVERFLOW_ERROR;
+ return TRUE;
+ }
+
+ /* copy the overflow contents to the target */
+ *t++=overflow[i++];
+ if(offsets!=NULL) {
+ *offsets++=-1; /* no source index available for old output */
+ }
+ }
+
+ /* the overflow buffer is completely copied to the target */
+ cnv->charErrorBufferLength=0;
+ *target=t;
+ if(offsets!=NULL) {
+ *pOffsets=offsets;
+ }
+ return FALSE;
+}
+
+U_CAPI void U_EXPORT2
+ucnv_fromUnicode(UConverter *cnv,
+ char **target, const char *targetLimit,
+ const UChar **source, const UChar *sourceLimit,
+ int32_t *offsets,
+ UBool flush,
+ UErrorCode *err) {
+ UConverterFromUnicodeArgs args;
+ const UChar *s;
+ char *t;
+
+ /* check parameters */
+ if(err==NULL || U_FAILURE(*err)) {
+ return;
+ }
+
+ if(cnv==NULL || target==NULL || source==NULL) {
+ *err=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ s=*source;
+ t=*target;
+
+ if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) {
+ /*
+ Prevent code from going into an infinite loop in case we do hit this
+ limit. The limit pointer is expected to be on a UChar * boundary.
+ This also prevents the next argument check from failing.
+ */
+ sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1);
+ }
+
+ /*
+ * All these conditions should never happen.
+ *
+ * 1) Make sure that the limits are >= to the address source or target
+ *
+ * 2) Make sure that the buffer sizes do not exceed the number range for
+ * int32_t because some functions use the size (in units or bytes)
+ * rather than comparing pointers, and because offsets are int32_t values.
+ *
+ * size_t is guaranteed to be unsigned and large enough for the job.
+ *
+ * Return with an error instead of adjusting the limits because we would
+ * not be able to maintain the semantics that either the source must be
+ * consumed or the target filled (unless an error occurs).
+ * An adjustment would be targetLimit=t+0x7fffffff; for example.
+ *
+ * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
+ * to a char * pointer and provide an incomplete UChar code unit.
+ */
+ if (sourceLimit<s || targetLimit<t ||
+ ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||
+ ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) ||
+ (((const char *)sourceLimit-(const char *)s) & 1) != 0)
+ {
+ *err=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ /* output the target overflow buffer */
+ if( cnv->charErrorBufferLength>0 &&
+ ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err)
+ ) {
+ /* U_BUFFER_OVERFLOW_ERROR */
+ return;
+ }
+ /* *target may have moved, therefore stop using t */
+
+ if(!flush && s==sourceLimit && cnv->preFromULength>=0) {
+ /* the overflow buffer is emptied and there is no new input: we are done */
+ return;
+ }
+
+ /*
+ * Do not simply return with a buffer overflow error if
+ * !flush && t==targetLimit
+ * because it is possible that the source will not generate any output.
+ * For example, the skip callback may be called;
+ * it does not output anything.
+ */
+
+ /* prepare the converter arguments */
+ args.converter=cnv;
+ args.flush=flush;
+ args.offsets=offsets;
+ args.source=s;
+ args.sourceLimit=sourceLimit;
+ args.target=*target;
+ args.targetLimit=targetLimit;
+ args.size=sizeof(args);
+
+ _fromUnicodeWithCallback(&args, err);
+
+ *source=args.source;
+ *target=args.target;
+}
+
+/* ucnv_toUnicode() --------------------------------------------------------- */
+
+static void
+_toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
+ UConverterToUnicode toUnicode;
+ UConverter *cnv;
+ const char *s;
+ UChar *t;
+ int32_t *offsets;
+ int32_t sourceIndex;
+ int32_t errorInputLength;
+ UBool converterSawEndOfInput, calledCallback;
+
+ /* variables for m:n conversion */
+ char replay[UCNV_EXT_MAX_BYTES];
+ const char *realSource, *realSourceLimit;
+ int32_t realSourceIndex;
+ UBool realFlush;
+
+ cnv=pArgs->converter;
+ s=pArgs->source;
+ t=pArgs->target;
+ offsets=pArgs->offsets;
+
+ /* get the converter implementation function */
+ sourceIndex=0;
+ if(offsets==NULL) {
+ toUnicode=cnv->sharedData->impl->toUnicode;
+ } else {
+ toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;
+ if(toUnicode==NULL) {
+ /* there is no WithOffsets implementation */
+ toUnicode=cnv->sharedData->impl->toUnicode;
+ /* we will write -1 for each offset */
+ sourceIndex=-1;
+ }
+ }
+
+ if(cnv->preToULength>=0) {
+ /* normal mode */
+ realSource=NULL;
+
+ /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
+ realSourceLimit=NULL;
+ realFlush=FALSE;
+ realSourceIndex=0;
+ } else {
+ /*
+ * Previous m:n conversion stored source units from a partial match
+ * and failed to consume all of them.
+ * We need to "replay" them from a temporary buffer and convert them first.
+ */
+ realSource=pArgs->source;
+ realSourceLimit=pArgs->sourceLimit;
+ realFlush=pArgs->flush;
+ realSourceIndex=sourceIndex;
+
+ uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
+ pArgs->source=replay;
+ pArgs->sourceLimit=replay-cnv->preToULength;
+ pArgs->flush=FALSE;
+ sourceIndex=-1;
+
+ cnv->preToULength=0;
+ }
+
+ /*
+ * loop for conversion and error handling
+ *
+ * loop {
+ * convert
+ * loop {
+ * update offsets
+ * handle end of input
+ * handle errors/call callback
+ * }
+ * }
+ */
+ for(;;) {
+ if(U_SUCCESS(*err)) {
+ /* convert */
+ toUnicode(pArgs, err);
+
+ /*
+ * set a flag for whether the converter
+ * successfully processed the end of the input
+ *
+ * need not check cnv->preToULength==0 because a replay (<0) will cause
+ * s<sourceLimit before converterSawEndOfInput is checked
+ */
+ converterSawEndOfInput=
+ (UBool)(U_SUCCESS(*err) &&
+ pArgs->flush && pArgs->source==pArgs->sourceLimit &&
+ cnv->toULength==0);
+ } else {
+ /* handle error from getNextUChar() or ucnv_convertEx() */
+ converterSawEndOfInput=FALSE;
+ }
+
+ /* no callback called yet for this iteration */
+ calledCallback=FALSE;
+
+ /* no sourceIndex adjustment for conversion, only for callback output */
+ errorInputLength=0;
+
+ /*
+ * loop for offsets and error handling
+ *
+ * iterates at most 3 times:
+ * 1. to clean up after the conversion function
+ * 2. after the callback
+ * 3. after the callback again if there was truncated input
+ */
+ for(;;) {
+ /* update offsets if we write any */
+ if(offsets!=NULL) {
+ int32_t length=(int32_t)(pArgs->target-t);
+ if(length>0) {
+ _updateOffsets(offsets, length, sourceIndex, errorInputLength);
+
+ /*
+ * if a converter handles offsets and updates the offsets
+ * pointer at the end, then pArgs->offset should not change
+ * here;
+ * however, some converters do not handle offsets at all
+ * (sourceIndex<0) or may not update the offsets pointer
+ */
+ pArgs->offsets=offsets+=length;
+ }
+
+ if(sourceIndex>=0) {
+ sourceIndex+=(int32_t)(pArgs->source-s);
+ }
+ }
+
+ if(cnv->preToULength<0) {
+ /*
+ * switch the source to new replay units (cannot occur while replaying)
+ * after offset handling and before end-of-input and callback handling
+ */
+ if(realSource==NULL) {
+ realSource=pArgs->source;
+ realSourceLimit=pArgs->sourceLimit;
+ realFlush=pArgs->flush;
+ realSourceIndex=sourceIndex;
+
+ uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
+ pArgs->source=replay;
+ pArgs->sourceLimit=replay-cnv->preToULength;
+ pArgs->flush=FALSE;
+ if((sourceIndex+=cnv->preToULength)<0) {
+ sourceIndex=-1;
+ }
+
+ cnv->preToULength=0;
+ } else {
+ /* see implementation note before _fromUnicodeWithCallback() */
+ U_ASSERT(realSource==NULL);
+ *err=U_INTERNAL_PROGRAM_ERROR;
+ }
+ }
- /*
- * Check parameters in for all conversions
- */
- if (err == NULL || U_FAILURE (*err)) {
- return;
- }
+ /* update pointers */
+ s=pArgs->source;
+ t=pArgs->target;
+
+ if(U_SUCCESS(*err)) {
+ if(s<pArgs->sourceLimit) {
+ /*
+ * continue with the conversion loop while there is still input left
+ * (continue converting by breaking out of only the inner loop)
+ */
+ break;
+ } else if(realSource!=NULL) {
+ /* switch back from replaying to the real source and continue */
+ pArgs->source=realSource;
+ pArgs->sourceLimit=realSourceLimit;
+ pArgs->flush=realFlush;
+ sourceIndex=realSourceIndex;
+
+ realSource=NULL;
+ break;
+ } else if(pArgs->flush && cnv->toULength>0) {
+ /*
+ * the entire input stream is consumed
+ * and there is a partial, truncated input sequence left
+ */
+
+ /* inject an error and continue with callback handling */
+ *err=U_TRUNCATED_CHAR_FOUND;
+ calledCallback=FALSE; /* new error condition */
+ } else {
+ /* input consumed */
+ if(pArgs->flush) {
+ /*
+ * return to the conversion loop once more if the flush
+ * flag is set and the conversion function has not
+ * successfully processed the end of the input yet
+ *
+ * (continue converting by breaking out of only the inner loop)
+ */
+ if(!converterSawEndOfInput) {
+ break;
+ }
+
+ /* reset the converter without calling the callback function */
+ _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
+ }
+
+ /* done successfully */
+ return;
+ }
+ }
- if (_this == NULL || target == NULL || source == NULL) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
+ /* U_FAILURE(*err) */
+ {
+ UErrorCode e;
+
+ if( calledCallback ||
+ (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
+ (e!=U_INVALID_CHAR_FOUND &&
+ e!=U_ILLEGAL_CHAR_FOUND &&
+ e!=U_TRUNCATED_CHAR_FOUND &&
+ e!=U_ILLEGAL_ESCAPE_SEQUENCE &&
+ e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)
+ ) {
+ /*
+ * the callback did not or cannot resolve the error:
+ * set output pointers and return
+ *
+ * the check for buffer overflow is redundant but it is
+ * a high-runner case and hopefully documents the intent
+ * well
+ *
+ * if we were replaying, then the replay buffer must be
+ * copied back into the UConverter
+ * and the real arguments must be restored
+ */
+ if(realSource!=NULL) {
+ int32_t length;
+
+ U_ASSERT(cnv->preToULength==0);
+
+ length=(int32_t)(pArgs->sourceLimit-pArgs->source);
+ if(length>0) {
+ uprv_memcpy(cnv->preToU, pArgs->source, length);
+ cnv->preToULength=(int8_t)-length;
+ }
+
+ pArgs->source=realSource;
+ pArgs->sourceLimit=realSourceLimit;
+ pArgs->flush=realFlush;
+ }
+
+ return;
+ }
+ }
- t = *target;
- if (targetLimit < t || sourceLimit < *source)
- {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
+ /* copy toUBytes[] to invalidCharBuffer[] */
+ errorInputLength=cnv->invalidCharLength=cnv->toULength;
+ if(errorInputLength>0) {
+ uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength);
+ }
- /*
- * Make sure that the target buffer size does not exceed the number range for int32_t
- * because some functions use the size rather than comparing pointers.
- * size_t is guaranteed to be unsigned.
- */
- if((size_t)(targetLimit - t) > (size_t)0x7fffffff && targetLimit > t)
- {
- targetLimit = t + 0x7fffffff;
- }
-
- /*
- * Deal with stored carry over data. This is done in the common location
- * to avoid doing it for each conversion.
- */
- if (_this->charErrorBufferLength > 0)
- {
- int32_t myTargetIndex = 0;
-
- ucnv_flushInternalCharBuffer (_this,
- (char *)t,
- &myTargetIndex,
- targetLimit - *target,
- offsets?&offsets:NULL,
+ /* set the converter state to deal with the next character */
+ cnv->toULength=0;
+
+ /* call the callback function */
+ if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) {
+ cnv->toUCallbackReason = UCNV_UNASSIGNED;
+ }
+ cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
+ cnv->invalidCharBuffer, errorInputLength,
+ cnv->toUCallbackReason,
err);
- *target += myTargetIndex;
- if (U_FAILURE (*err))
- return;
+ cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */
+
+ /*
+ * loop back to the offset handling
+ *
+ * this flag will indicate after offset handling
+ * that a callback was called;
+ * if the callback did not resolve the error, then we return
+ */
+ calledCallback=TRUE;
+ }
}
+}
- if(!flush && *source == sourceLimit) {
- /* the overflow buffer is emptied and there is no new input: we are done */
- return;
+/*
+ * Output the toUnicode overflow buffer.
+ * Call this function if(cnv->UCharErrorBufferLength>0).
+ * @return TRUE if overflow
+ */
+static UBool
+ucnv_outputOverflowToUnicode(UConverter *cnv,
+ UChar **target, const UChar *targetLimit,
+ int32_t **pOffsets,
+ UErrorCode *err) {
+ int32_t *offsets;
+ UChar *overflow, *t;
+ int32_t i, length;
+
+ t=*target;
+ if(pOffsets!=NULL) {
+ offsets=*pOffsets;
+ } else {
+ offsets=NULL;
}
- args.converter = _this;
- args.flush = flush;
- args.offsets = offsets;
- args.source = *source;
- args.sourceLimit = sourceLimit;
- args.target = *target;
- args.targetLimit = targetLimit;
- args.size = sizeof(args);
- if (offsets)
- {
- if (_this->sharedData->impl->fromUnicodeWithOffsets != NULL)
- {
- _this->sharedData->impl->fromUnicodeWithOffsets(&args, err);
- *source = args.source;
- *target = args.target;
- return;
- }
- else {
- /* there is no implementation that sets offsets, set them all to -1 */
- int32_t i, targetSize = targetLimit - *target;
-
- for (i=0; i<targetSize; i++) {
- offsets[i] = -1;
+ overflow=cnv->UCharErrorBuffer;
+ length=cnv->UCharErrorBufferLength;
+ i=0;
+ while(i<length) {
+ if(t==targetLimit) {
+ /* the overflow buffer contains too much, keep the rest */
+ int32_t j=0;
+
+ do {
+ overflow[j++]=overflow[i++];
+ } while(i<length);
+
+ cnv->UCharErrorBufferLength=(int8_t)j;
+ *target=t;
+ if(offsets!=NULL) {
+ *pOffsets=offsets;
}
+ *err=U_BUFFER_OVERFLOW_ERROR;
+ return TRUE;
}
- }
-
- /*calls the specific conversion routines */
- _this->sharedData->impl->fromUnicode(&args, err);
- *source = args.source;
- *target = args.target;
-}
+ /* copy the overflow contents to the target */
+ *t++=overflow[i++];
+ if(offsets!=NULL) {
+ *offsets++=-1; /* no source index available for old output */
+ }
+ }
+ /* the overflow buffer is completely copied to the target */
+ cnv->UCharErrorBufferLength=0;
+ *target=t;
+ if(offsets!=NULL) {
+ *pOffsets=offsets;
+ }
+ return FALSE;
+}
-U_CAPI void U_EXPORT2
-ucnv_toUnicode (UConverter * _this,
- UChar ** target,
- const UChar * targetLimit,
- const char **source,
- const char *sourceLimit,
- int32_t* offsets,
- UBool flush,
- UErrorCode * err)
-{
+U_CAPI void U_EXPORT2
+ucnv_toUnicode(UConverter *cnv,
+ UChar **target, const UChar *targetLimit,
+ const char **source, const char *sourceLimit,
+ int32_t *offsets,
+ UBool flush,
+ UErrorCode *err) {
UConverterToUnicodeArgs args;
- const UChar *t;
+ const char *s;
+ UChar *t;
- /*
- * Check parameters in for all conversions
- */
- if (err == NULL || U_FAILURE (*err)) {
+ /* check parameters */
+ if(err==NULL || U_FAILURE(*err)) {
return;
}
- if (_this == NULL || target == NULL || source == NULL) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
+ if(cnv==NULL || target==NULL || source==NULL) {
+ *err=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
- t = *target;
- if (targetLimit < t || sourceLimit < *source) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
+ s=*source;
+ t=*target;
- /*
- * Make sure that the target buffer size does not exceed the number range for int32_t
- * because some functions use the size rather than comparing pointers.
- * size_t is guaranteed to be unsigned.
- */
- if((size_t)(targetLimit - t) > (size_t)0x3fffffff && targetLimit > t) {
- targetLimit = t + 0x3fffffff;
+ if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) {
+ /*
+ Prevent code from going into an infinite loop in case we do hit this
+ limit. The limit pointer is expected to be on a UChar * boundary.
+ This also prevents the next argument check from failing.
+ */
+ targetLimit = (const UChar *)(((const char *)targetLimit) - 1);
}
/*
- * Deal with stored carry over data. This is done in the common location
- * to avoid doing it for each conversion.
- */
- if (_this->UCharErrorBufferLength > 0)
- {
- int32_t myTargetIndex = 0;
-
- ucnv_flushInternalUnicodeBuffer (_this,
- (UChar *)t,
- &myTargetIndex,
- targetLimit - *target,
- offsets?&offsets:NULL,
- err);
- *target += myTargetIndex;
- if (U_FAILURE (*err))
- return;
+ * All these conditions should never happen.
+ *
+ * 1) Make sure that the limits are >= to the address source or target
+ *
+ * 2) Make sure that the buffer sizes do not exceed the number range for
+ * int32_t because some functions use the size (in units or bytes)
+ * rather than comparing pointers, and because offsets are int32_t values.
+ *
+ * size_t is guaranteed to be unsigned and large enough for the job.
+ *
+ * Return with an error instead of adjusting the limits because we would
+ * not be able to maintain the semantics that either the source must be
+ * consumed or the target filled (unless an error occurs).
+ * An adjustment would be sourceLimit=t+0x7fffffff; for example.
+ *
+ * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
+ * to a char * pointer and provide an incomplete UChar code unit.
+ */
+ if (sourceLimit<s || targetLimit<t ||
+ ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
+ ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) ||
+ (((const char *)targetLimit-(const char *)t) & 1) != 0
+ ) {
+ *err=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ /* output the target overflow buffer */
+ if( cnv->UCharErrorBufferLength>0 &&
+ ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err)
+ ) {
+ /* U_BUFFER_OVERFLOW_ERROR */
+ return;
}
+ /* *target may have moved, therefore stop using t */
- if(!flush && *source == sourceLimit) {
+ if(!flush && s==sourceLimit && cnv->preToULength>=0) {
/* the overflow buffer is emptied and there is no new input: we are done */
return;
}
- args.converter = _this;
- args.flush = flush;
- args.offsets = offsets;
- args.source = (char *) *source;
- args.sourceLimit = sourceLimit;
- args.target = *target;
- args.targetLimit = targetLimit;
- args.size = sizeof(args);
- if (offsets) {
- if (_this->sharedData->impl->toUnicodeWithOffsets != NULL) {
- _this->sharedData->impl->toUnicodeWithOffsets(&args, err);
- *source = args.source;
- *target = args.target;
- return;
- } else {
- /* there is no implementation that sets offsets, set them all to -1 */
- int32_t i, targetSize = targetLimit - *target;
-
- for (i=0; i<targetSize; i++) {
- offsets[i] = -1;
- }
- }
- }
+ /*
+ * Do not simply return with a buffer overflow error if
+ * !flush && t==targetLimit
+ * because it is possible that the source will not generate any output.
+ * For example, the skip callback may be called;
+ * it does not output anything.
+ */
- /*calls the specific conversion routines */
- _this->sharedData->impl->toUnicode(&args, err);
+ /* prepare the converter arguments */
+ args.converter=cnv;
+ args.flush=flush;
+ args.offsets=offsets;
+ args.source=s;
+ args.sourceLimit=sourceLimit;
+ args.target=*target;
+ args.targetLimit=targetLimit;
+ args.size=sizeof(args);
- *source = args.source;
- *target = args.target;
- return;
+ _toUnicodeWithCallback(&args, err);
+
+ *source=args.source;
+ *target=args.target;
}
+/* ucnv_to/fromUChars() ----------------------------------------------------- */
+
U_CAPI int32_t U_EXPORT2
ucnv_fromUChars(UConverter *cnv,
char *dest, int32_t destCapacity,
ucnv_resetToUnicode(cnv);
originalDest=dest;
if(srcLength==-1) {
- srcLength=uprv_strlen(src);
+ srcLength=(int32_t)uprv_strlen(src);
}
if(srcLength>0) {
srcLimit=src+srcLength;
return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode);
}
-U_CAPI UChar32 U_EXPORT2
-ucnv_getNextUChar(UConverter * converter,
- const char **source,
- const char *sourceLimit,
- UErrorCode * err)
-{
+/* ucnv_getNextUChar() ------------------------------------------------------ */
+
+U_CAPI UChar32 U_EXPORT2
+ucnv_getNextUChar(UConverter *cnv,
+ const char **source, const char *sourceLimit,
+ UErrorCode *err) {
UConverterToUnicodeArgs args;
- UChar32 ch;
+ UChar buffer[U16_MAX_LENGTH];
+ const char *s;
+ UChar32 c;
+ int32_t i, length;
- if(err == NULL || U_FAILURE(*err)) {
+ /* check parameters */
+ if(err==NULL || U_FAILURE(*err)) {
return 0xffff;
}
- if(converter == NULL || source == NULL || sourceLimit < *source) {
- *err = U_ILLEGAL_ARGUMENT_ERROR;
+ if(cnv==NULL || source==NULL) {
+ *err=U_ILLEGAL_ARGUMENT_ERROR;
return 0xffff;
}
- /* In case internal data had been stored
- * we return the first UChar32 in the internal buffer,
- * and update the internal state accordingly
- */
- if (converter->UCharErrorBufferLength > 0)
- {
- int32_t i = 0;
- UChar32 myUChar;
- UTF_NEXT_CHAR(converter->UCharErrorBuffer, i, sizeof(converter->UCharErrorBuffer), myUChar);
- /*In this memmove we update the internal buffer by
- *popping the first character.
- *Note that in the call itself we decrement
- *UCharErrorBufferLength
- */
- uprv_memmove (converter->UCharErrorBuffer,
- converter->UCharErrorBuffer + i,
- (converter->UCharErrorBufferLength - i) * sizeof (UChar));
- converter->UCharErrorBufferLength -= (int8_t)i;
- return myUChar;
- }
- /*calls the specific conversion routines */
- /*as dictated in a code review, avoids a switch statement */
- args.converter = converter;
- args.flush = TRUE;
- args.offsets = NULL;
- args.source = *source;
- args.sourceLimit = sourceLimit;
- args.target = NULL;
- args.targetLimit = NULL;
- args.size = sizeof(args);
- if (converter->sharedData->impl->getNextUChar != NULL)
- {
- ch = converter->sharedData->impl->getNextUChar(&args, err);
+ s=*source;
+ if(sourceLimit<s) {
+ *err=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0xffff;
+ }
+
+ /*
+ * Make sure that the buffer sizes do not exceed the number range for
+ * int32_t because some functions use the size (in units or bytes)
+ * rather than comparing pointers, and because offsets are int32_t values.
+ *
+ * size_t is guaranteed to be unsigned and large enough for the job.
+ *
+ * Return with an error instead of adjusting the limits because we would
+ * not be able to maintain the semantics that either the source must be
+ * consumed or the target filled (unless an error occurs).
+ * An adjustment would be sourceLimit=t+0x7fffffff; for example.
+ */
+ if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {
+ *err=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0xffff;
+ }
+
+ c=U_SENTINEL;
+
+ /* flush the target overflow buffer */
+ if(cnv->UCharErrorBufferLength>0) {
+ UChar *overflow;
+
+ overflow=cnv->UCharErrorBuffer;
+ i=0;
+ length=cnv->UCharErrorBufferLength;
+ U16_NEXT(overflow, i, length, c);
+
+ /* move the remaining overflow contents up to the beginning */
+ if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {
+ uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,
+ cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
+ }
+
+ if(!U16_IS_LEAD(c) || i<length) {
+ return c;
+ }
+ /*
+ * Continue if the overflow buffer contained only a lead surrogate,
+ * in case the converter outputs single surrogates from complete
+ * input sequences.
+ */
+ }
+
+ /*
+ * flush==TRUE is implied for ucnv_getNextUChar()
+ *
+ * do not simply return even if s==sourceLimit because the converter may
+ * not have seen flush==TRUE before
+ */
+
+ /* prepare the converter arguments */
+ args.converter=cnv;
+ args.flush=TRUE;
+ args.offsets=NULL;
+ args.source=s;
+ args.sourceLimit=sourceLimit;
+ args.target=buffer;
+ args.targetLimit=buffer+1;
+ args.size=sizeof(args);
+
+ if(c<0) {
+ /*
+ * call the native getNextUChar() implementation if we are
+ * at a character boundary (toULength==0)
+ *
+ * unlike with _toUnicode(), getNextUChar() implementations must set
+ * U_TRUNCATED_CHAR_FOUND for truncated input,
+ * in addition to setting toULength/toUBytes[]
+ */
+ if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) {
+ c=cnv->sharedData->impl->getNextUChar(&args, err);
+ *source=s=args.source;
+ if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {
+ /* reset the converter without calling the callback function */
+ _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
+ return 0xffff; /* no output */
+ } else if(U_SUCCESS(*err) && c>=0) {
+ return c;
+ /*
+ * else fall through to use _toUnicode() because
+ * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
+ * U_FAILURE: call _toUnicode() for callback handling (do not output c)
+ */
+ }
+ }
+
+ /* convert to one UChar in buffer[0], or handle getNextUChar() errors */
+ _toUnicodeWithCallback(&args, err);
+
+ if(*err==U_BUFFER_OVERFLOW_ERROR) {
+ *err=U_ZERO_ERROR;
+ }
+
+ i=0;
+ length=(int32_t)(args.target-buffer);
} else {
- /* default implementation */
- ch = ucnv_getNextUCharFromToUImpl(&args, converter->sharedData->impl->toUnicode, FALSE, err);
+ /* write the lead surrogate from the overflow buffer */
+ buffer[0]=(UChar)c;
+ args.target=buffer+1;
+ i=0;
+ length=1;
+ }
+
+ /* buffer contents starts at i and ends before length */
+
+ if(U_FAILURE(*err)) {
+ c=0xffff; /* no output */
+ } else if(length==0) {
+ /* no input or only state changes */
+ *err=U_INDEX_OUTOFBOUNDS_ERROR;
+ /* no need to reset explicitly because _toUnicodeWithCallback() did it */
+ c=0xffff; /* no output */
+ } else {
+ c=buffer[0];
+ i=1;
+ if(!U16_IS_LEAD(c)) {
+ /* consume c=buffer[0], done */
+ } else {
+ /* got a lead surrogate, see if a trail surrogate follows */
+ UChar c2;
+
+ if(cnv->UCharErrorBufferLength>0) {
+ /* got overflow output from the conversion */
+ if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {
+ /* got a trail surrogate, too */
+ c=U16_GET_SUPPLEMENTARY(c, c2);
+
+ /* move the remaining overflow contents up to the beginning */
+ if((--cnv->UCharErrorBufferLength)>0) {
+ uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,
+ cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
+ }
+ } else {
+ /* c is an unpaired lead surrogate, just return it */
+ }
+ } else if(args.source<sourceLimit) {
+ /* convert once more, to buffer[1] */
+ args.targetLimit=buffer+2;
+ _toUnicodeWithCallback(&args, err);
+ if(*err==U_BUFFER_OVERFLOW_ERROR) {
+ *err=U_ZERO_ERROR;
+ }
+
+ length=(int32_t)(args.target-buffer);
+ if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {
+ /* got a trail surrogate, too */
+ c=U16_GET_SUPPLEMENTARY(c, c2);
+ i=2;
+ }
+ }
+ }
+ }
+
+ /*
+ * move leftover output from buffer[i..length[
+ * into the beginning of the overflow buffer
+ */
+ if(i<length) {
+ /* move further overflow back */
+ int32_t delta=length-i;
+ if((length=cnv->UCharErrorBufferLength)>0) {
+ uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,
+ length*U_SIZEOF_UCHAR);
+ }
+ cnv->UCharErrorBufferLength=(int8_t)(length+delta);
+
+ cnv->UCharErrorBuffer[0]=buffer[i++];
+ if(delta>1) {
+ cnv->UCharErrorBuffer[1]=buffer[i];
+ }
}
- *source = args.source;
- return ch;
+
+ *source=args.source;
+ return c;
}
+/* ucnv_convert() and siblings ---------------------------------------------- */
+
U_CAPI void U_EXPORT2
ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
char **target, const char *targetLimit,
UBool reset, UBool flush,
UErrorCode *pErrorCode) {
UChar pivotBuffer[CHUNK_SIZE];
- UChar *myPivotSource, *myPivotTarget;
+ const UChar *myPivotSource;
+ UChar *myPivotTarget;
+ const char *s;
+ char *t;
+
+ UConverterToUnicodeArgs toUArgs;
+ UConverterFromUnicodeArgs fromUArgs;
+ UConverterConvert convert;
/* error checking */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return;
}
+ s=*source;
+ t=*target;
+ if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ /*
+ * Make sure that the buffer sizes do not exceed the number range for
+ * int32_t. See ucnv_toUnicode() for a more detailed comment.
+ */
+ if(
+ (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) ||
+ ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
if(pivotStart==NULL) {
+ if(!flush) {
+ /* streaming conversion requires an explicit pivot buffer */
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
/* use the stack pivot buffer */
- pivotStart=myPivotSource=myPivotTarget=pivotBuffer;
- pivotSource=&myPivotSource;
+ myPivotSource=myPivotTarget=pivotStart=pivotBuffer;
+ pivotSource=(UChar **)&myPivotSource;
pivotTarget=&myPivotTarget;
pivotLimit=pivotBuffer+CHUNK_SIZE;
} else if( pivotStart>=pivotLimit ||
if(reset) {
ucnv_resetToUnicode(sourceCnv);
ucnv_resetFromUnicode(targetCnv);
- *pivotTarget=*pivotSource=pivotStart;
+ *pivotSource=*pivotTarget=pivotStart;
+ } else if(targetCnv->charErrorBufferLength>0) {
+ /* output the targetCnv overflow buffer */
+ if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) {
+ /* U_BUFFER_OVERFLOW_ERROR */
+ return;
+ }
+ /* *target has moved, therefore stop using t */
+
+ if( !flush &&
+ targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget &&
+ sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit
+ ) {
+ /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */
+ return;
+ }
+ }
+
+ /* Is direct-UTF-8 conversion available? */
+ if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
+ targetCnv->sharedData->impl->fromUTF8!=NULL
+ ) {
+ convert=targetCnv->sharedData->impl->fromUTF8;
+ } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
+ sourceCnv->sharedData->impl->toUTF8!=NULL
+ ) {
+ convert=sourceCnv->sharedData->impl->toUTF8;
+ } else {
+ convert=NULL;
}
- /* conversion loop */
+ /*
+ * If direct-UTF-8 conversion is available, then we use a smaller
+ * pivot buffer for error handling and partial matches
+ * so that we quickly return to direct conversion.
+ *
+ * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH.
+ *
+ * We could reduce the pivot buffer size further, at the cost of
+ * buffer overflows from callbacks.
+ * The pivot buffer should not be smaller than the maximum number of
+ * fromUnicode extension table input UChars
+ * (for m:n conversion, see
+ * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS])
+ * or 2 for surrogate pairs.
+ *
+ * Too small a buffer can cause thrashing between pivoting and direct
+ * conversion, with function call overhead outweighing the benefits
+ * of direct conversion.
+ */
+ if(convert!=NULL && (pivotLimit-pivotStart)>32) {
+ pivotLimit=pivotStart+32;
+ }
+
+ /* prepare the converter arguments */
+ fromUArgs.converter=targetCnv;
+ fromUArgs.flush=FALSE;
+ fromUArgs.offsets=NULL;
+ fromUArgs.target=*target;
+ fromUArgs.targetLimit=targetLimit;
+ fromUArgs.size=sizeof(fromUArgs);
+
+ toUArgs.converter=sourceCnv;
+ toUArgs.flush=flush;
+ toUArgs.offsets=NULL;
+ toUArgs.source=s;
+ toUArgs.sourceLimit=sourceLimit;
+ toUArgs.targetLimit=pivotLimit;
+ toUArgs.size=sizeof(toUArgs);
+
+ /*
+ * TODO: Consider separating this function into two functions,
+ * extracting exactly the conversion loop,
+ * for readability and to reduce the set of visible variables.
+ *
+ * Otherwise stop using s and t from here on.
+ */
+ s=t=NULL;
+
+ /*
+ * conversion loop
+ *
+ * The sequence of steps in the loop may appear backward,
+ * but the principle is simple:
+ * In the chain of
+ * source - sourceCnv overflow - pivot - targetCnv overflow - target
+ * empty out later buffers before refilling them from earlier ones.
+ *
+ * The targetCnv overflow buffer is flushed out only once before the loop.
+ */
for(;;) {
- if(reset) {
+ /*
+ * if(pivot not empty or error or replay or flush fromUnicode) {
+ * fromUnicode(pivot -> target);
+ * }
+ *
+ * For pivoting conversion; and for direct conversion for
+ * error callback handling and flushing the replay buffer.
+ */
+ if( *pivotSource<*pivotTarget ||
+ U_FAILURE(*pErrorCode) ||
+ targetCnv->preFromULength<0 ||
+ fromUArgs.flush
+ ) {
+ fromUArgs.source=*pivotSource;
+ fromUArgs.sourceLimit=*pivotTarget;
+ _fromUnicodeWithCallback(&fromUArgs, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ /* target overflow, or conversion error */
+ *pivotSource=(UChar *)fromUArgs.source;
+ break;
+ }
+
/*
- * if we did a reset in this function, we know that there is nothing
- * to convert to the target yet, so we save a function call
+ * _fromUnicodeWithCallback() must have consumed the pivot contents
+ * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS()
*/
- reset=FALSE;
- } else {
+ }
+
+ /* The pivot buffer is empty; reset it so we start at pivotStart. */
+ *pivotSource=*pivotTarget=pivotStart;
+
+ /*
+ * if(sourceCnv overflow buffer not empty) {
+ * move(sourceCnv overflow buffer -> pivot);
+ * continue;
+ * }
+ */
+ /* output the sourceCnv overflow buffer */
+ if(sourceCnv->UCharErrorBufferLength>0) {
+ if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) {
+ /* U_BUFFER_OVERFLOW_ERROR */
+ *pErrorCode=U_ZERO_ERROR;
+ }
+ continue;
+ }
+
+ /*
+ * check for end of input and break if done
+ *
+ * Checking both flush and fromUArgs.flush ensures that the converters
+ * have been called with the flush flag set if the ucnv_convertEx()
+ * caller set it.
+ */
+ if( toUArgs.source==sourceLimit &&
+ sourceCnv->preToULength>=0 && sourceCnv->toULength==0 &&
+ (!flush || fromUArgs.flush)
+ ) {
+ /* done successfully */
+ break;
+ }
+
+ /*
+ * use direct conversion if available
+ * but not if continuing a partial match
+ * or flushing the toUnicode replay buffer
+ */
+ if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) {
+ if(*pErrorCode==U_USING_DEFAULT_WARNING) {
+ /* remove a warning that may be set by this function */
+ *pErrorCode=U_ZERO_ERROR;
+ }
+ convert(&fromUArgs, &toUArgs, pErrorCode);
+ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
+ break;
+ } else if(U_FAILURE(*pErrorCode)) {
+ if(sourceCnv->toULength>0) {
+ /*
+ * Fall through to calling _toUnicodeWithCallback()
+ * for callback handling.
+ *
+ * The pivot buffer will be reset with
+ * *pivotSource=*pivotTarget=pivotStart;
+ * which indicates a toUnicode error to the caller
+ * (*pivotSource==pivotStart shows no pivot UChars consumed).
+ */
+ } else {
+ /*
+ * Indicate a fromUnicode error to the caller
+ * (*pivotSource>pivotStart shows some pivot UChars consumed).
+ */
+ *pivotSource=*pivotTarget=pivotStart+1;
+ /*
+ * Loop around to calling _fromUnicodeWithCallbacks()
+ * for callback handling.
+ */
+ continue;
+ }
+ } else if(*pErrorCode==U_USING_DEFAULT_WARNING) {
+ /*
+ * No error, but the implementation requested to temporarily
+ * fall back to pivoting.
+ */
+ *pErrorCode=U_ZERO_ERROR;
/*
- * convert to the target first in case the pivot is filled at entry
- * or the targetCnv has some output bytes in its state
+ * The following else branches are almost identical to the end-of-input
+ * handling in _toUnicodeWithCallback().
+ * Avoid calling it just for the end of input.
*/
- ucnv_fromUnicode(targetCnv,
- target, targetLimit,
- (const UChar **)pivotSource, *pivotTarget,
- NULL,
- (UBool)(flush && *source==sourceLimit),
- pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
+ } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */
+ /*
+ * the entire input stream is consumed
+ * and there is a partial, truncated input sequence left
+ */
+
+ /* inject an error and continue with callback handling */
+ *pErrorCode=U_TRUNCATED_CHAR_FOUND;
+ } else {
+ /* input consumed */
+ if(flush) {
+ /* reset the converters without calling the callback functions */
+ _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE);
+ _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE);
+ }
+
+ /* done successfully */
break;
}
-
- /* ucnv_fromUnicode() must have consumed the pivot contents since it returned with U_SUCCESS() */
- *pivotSource=*pivotTarget=pivotStart;
}
-
- /* convert from the source to the pivot */
- ucnv_toUnicode(sourceCnv,
- pivotTarget, pivotLimit,
- source, sourceLimit,
- NULL,
- flush,
- pErrorCode);
+
+ /*
+ * toUnicode(source -> pivot);
+ *
+ * For pivoting conversion; and for direct conversion for
+ * error callback handling, continuing partial matches
+ * and flushing the replay buffer.
+ *
+ * The pivot buffer is empty and reset.
+ */
+ toUArgs.target=pivotStart; /* ==*pivotTarget */
+ /* toUArgs.targetLimit=pivotLimit; already set before the loop */
+ _toUnicodeWithCallback(&toUArgs, pErrorCode);
+ *pivotTarget=toUArgs.target;
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
/* pivot overflow: continue with the conversion loop */
*pErrorCode=U_ZERO_ERROR;
- } else if(U_FAILURE(*pErrorCode) || *pivotTarget==pivotStart) {
+ } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) {
/* conversion error, or there was nothing left to convert */
break;
}
- /* else ucnv_toUnicode() wrote into the pivot buffer: continue */
+ /*
+ * else:
+ * _toUnicodeWithCallback() wrote into the pivot buffer,
+ * continue with fromUnicode conversion.
+ *
+ * Set the fromUnicode flush flag if we flush and if toUnicode has
+ * processed the end of the input.
+ */
+ if( flush && toUArgs.source==sourceLimit &&
+ sourceCnv->preToULength>=0 &&
+ sourceCnv->UCharErrorBufferLength==0
+ ) {
+ fromUArgs.flush=TRUE;
+ }
}
/*
* - a conversion error occurred
*/
+ *source=toUArgs.source;
+ *target=fromUArgs.target;
+
/* terminate the target buffer if possible */
if(flush && U_SUCCESS(*pErrorCode)) {
if(*target!=targetLimit) {
FALSE,
TRUE,
pErrorCode);
- targetLength=myTarget-target;
+ targetLength=(int32_t)(myTarget-target);
}
/*
FALSE,
TRUE,
pErrorCode);
- targetLength+=(myTarget-targetBuffer);
+ targetLength+=(int32_t)(myTarget-targetBuffer);
} while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
/* done with preflighting, set warnings and errors as appropriate */
int8_t type = converter->sharedData->staticData->conversionType;
#if !UCONFIG_NO_LEGACY_CONVERSION
if(type == UCNV_MBCS) {
- return _MBCSGetType(converter);
+ return ucnv_MBCSGetType(converter);
}
#endif
return (UConverterType)type;
}
if(sourceLength==-1){
- sourceLength=uprv_strlen(source);
+ sourceLength=(int32_t)uprv_strlen(source);
}
*signatureLength=4;
return "UTF-7";
}
+ }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){
+ *signatureLength=4;
+ return "UTF-EBCDIC";
}
+
/* no known Unicode signature byte sequence recognized */
*signatureLength=0;
return NULL;
}
+U_CAPI int32_t U_EXPORT2
+ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status)
+{
+ if(status == NULL || U_FAILURE(*status)){
+ return -1;
+ }
+ if(cnv == NULL){
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return -1;
+ }
+
+ if(cnv->preFromUFirstCP >= 0){
+ return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ;
+ }else if(cnv->preFromULength < 0){
+ return -cnv->preFromULength ;
+ }else if(cnv->fromUChar32 > 0){
+ return 1;
+ }
+ return 0;
+
+}
+
+U_CAPI int32_t U_EXPORT2
+ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){
+
+ if(status == NULL || U_FAILURE(*status)){
+ return -1;
+ }
+ if(cnv == NULL){
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return -1;
+ }
+
+ if(cnv->preToULength > 0){
+ return cnv->preToULength ;
+ }else if(cnv->preToULength < 0){
+ return -cnv->preToULength;
+ }else if(cnv->toULength > 0){
+ return cnv->toULength;
+ }
+ return 0;
+}
+
+U_CAPI UBool U_EXPORT2
+ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){
+ if (U_FAILURE(*status)) {
+ return FALSE;
+ }
+
+ if (cnv == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return FALSE;
+ }
+
+ switch (ucnv_getType(cnv)) {
+ case UCNV_SBCS:
+ case UCNV_DBCS:
+ case UCNV_UTF32_BigEndian:
+ case UCNV_UTF32_LittleEndian:
+ case UCNV_UTF32:
+ case UCNV_US_ASCII:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+#endif
+
/*
* Hey, Emacs, please set the following:
*