/*
**********************************************************************
-* Copyright (C) 2000-2003, International Business Machines
+* Copyright (C) 2000-2006, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnv_lmb.cpp
#include "unicode/utypes.h"
-#if !UCONFIG_NO_LEGACY_CONVERSION
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
-#include "cmemory.h"
#include "unicode/ucnv_err.h"
-#include "ucnv_bld.h"
#include "unicode/ucnv.h"
+#include "unicode/uset.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uassert.h"
+#include "ucnv_imp.h"
+#include "ucnv_bld.h"
#include "ucnv_cnv.h"
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+
/*
LMBCS
keeps a mapping between optimization groups and IBM character sets, so that
ICU converters can be created and used as needed. */
-static const char * const OptGroupByteToCPName[ULMBCS_CTRLOFFSET] = {
+/* As you can see, even though any byte below 0x20 could be an optimization
+byte, only those at 0x13 or below can map to an actual converter. To limit
+some loops and searches, we define a value for that last group converter:*/
+
+#define ULMBCS_GRP_LAST 0x13 /* last LMBCS group that has a converter */
+
+static const char * const OptGroupByteToCPName[ULMBCS_GRP_LAST + 1] = {
/* 0x0000 */ "lmb-excp", /* internal home for the LOTUS exceptions list */
/* 0x0001 */ "ibm-850",
/* 0x0002 */ "ibm-851",
and 0x0019, the 1-2-3 system range control char */
};
-/* As you can see, even though any byte below 0x20 could be an optimization
-byte, only those at 0x13 or below can map to an actual converter. To limit
-some loops and searches, we define a value for that last group converter:*/
-
-#define ULMBCS_GRP_LAST 0x13 /* last LMBCS group that has a converter */
-
/* That's approximately all the data that's needed for translating
LMBCS to Unicode.
if (*pTable->LocaleID == *LocaleID) /* Check only first char for speed */
{
/* First char matches - check whole name, for entry-length */
- if (strncmp(pTable->LocaleID, LocaleID, strlen(pTable->LocaleID)) == 0)
+ if (uprv_strncmp(pTable->LocaleID, LocaleID, strlen(pTable->LocaleID)) == 0)
return pTable->OptGroup;
}
else
the definitions of these structures, see unicode\ucnv_bld.h
*/
+typedef struct
+ {
+ UConverterSharedData *OptGrpConverter[ULMBCS_GRP_LAST+1]; /* Converter per Opt. grp. */
+ uint8_t OptGroup; /* default Opt. grp. for this LMBCS session */
+ uint8_t localeConverterIndex; /* reasonable locale match for index */
+ }
+UConverterDataLMBCS;
#define DECLARE_LMBCS_DATA(n) \
_LMBCSToUnicodeWithOffsets,\
_LMBCSFromUnicode,\
_LMBCSFromUnicode,\
- _LMBCSGetNextUChar,\
NULL,\
NULL,\
NULL,\
NULL,\
- ucnv_getCompleteUnicodeSet\
+ _LMBCSSafeClone,\
+ _LMBCSGetUnicodeSet\
};\
static const UConverterStaticData _LMBCSStaticData##n={\
sizeof(UConverterStaticData),\
"LMBCS-" #n,\
- 0, UCNV_IBM, UCNV_LMBCS_##n, 1, 2,\
+ 0, UCNV_IBM, UCNV_LMBCS_##n, 1, 3,\
{ 0x3f, 0, 0, 0 },1,FALSE,FALSE,0,0,{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} \
};\
const UConverterSharedData _LMBCSData##n={\
ulmbcs_byte_t OptGroup
)
{
- UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS*)uprv_malloc (sizeof (UConverterDataLMBCS));
- if(extraInfo != NULL)
+ UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS*)uprv_malloc (sizeof (UConverterDataLMBCS));
+ if(extraInfo != NULL)
{
- ulmbcs_byte_t i;
- ulmbcs_byte_t imax;
- imax = sizeof(extraInfo->OptGrpConverter)/sizeof(extraInfo->OptGrpConverter[0]);
-
- for (i=0; i < imax; i++)
- {
- extraInfo->OptGrpConverter[i] =
- (OptGroupByteToCPName[i] != NULL) ?
- ucnv_open(OptGroupByteToCPName[i], err) : NULL;
- }
- extraInfo->OptGroup = OptGroup;
- extraInfo->localeConverterIndex = FindLMBCSLocale(locale);
+ ulmbcs_byte_t i;
+
+ uprv_memset(extraInfo, 0, sizeof(UConverterDataLMBCS));
+
+ for (i=0; i <= ULMBCS_GRP_LAST && U_SUCCESS(*err); i++)
+ {
+ if(OptGroupByteToCPName[i] != NULL) {
+ extraInfo->OptGrpConverter[i] = ucnv_loadSharedData(OptGroupByteToCPName[i], NULL, err);
+ }
+ }
+
+ if(U_SUCCESS(*err)) {
+ extraInfo->OptGroup = OptGroup;
+ extraInfo->localeConverterIndex = FindLMBCSLocale(locale);
+ } else {
+ /* one of the subconverters could not be loaded, unload the previous ones */
+ while(i > 0) {
+ if(extraInfo->OptGrpConverter[--i] != NULL) {
+ ucnv_unloadSharedDataIfReady(extraInfo->OptGrpConverter[i]);
+ extraInfo->OptGrpConverter[i] = NULL;
+ }
+ }
+ }
}
else
{
static void
_LMBCSClose(UConverter * _this)
{
- if (_this->extraInfo != NULL && !_this->isExtraLocal)
+ if (_this->extraInfo != NULL)
{
ulmbcs_byte_t Ix;
UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) _this->extraInfo;
- for (Ix=0; Ix < ULMBCS_GRP_UNICODE; Ix++)
+ for (Ix=0; Ix <= ULMBCS_GRP_LAST; Ix++)
{
if (extraInfo->OptGrpConverter[Ix] != NULL)
- ucnv_close (extraInfo->OptGrpConverter[Ix]);
+ ucnv_unloadSharedDataIfReady(extraInfo->OptGrpConverter[Ix]);
+ }
+ if (!_this->isExtraLocal) {
+ uprv_free (_this->extraInfo);
}
- uprv_free (_this->extraInfo);
}
}
-/*
-Here's an all-crash stop for debugging, since ICU does not have asserts.
-Turn this on by defining LMBCS_DEBUG, or by changing it to
-#if 1
-*/
-#if LMBCS_DEBUG
-#define MyAssert(b) {if (!(b)) {*(char *)0 = 1;}}
-#else
-#define MyAssert(b)
-#endif
+typedef struct LMBCSClone {
+ UConverter cnv;
+ UConverterDataLMBCS lmbcs;
+} LMBCSClone;
+
+static UConverter *
+_LMBCSSafeClone(const UConverter *cnv,
+ void *stackBuffer,
+ int32_t *pBufferSize,
+ UErrorCode *status) {
+ LMBCSClone *newLMBCS;
+ UConverterDataLMBCS *extraInfo;
+ int32_t i;
+
+ if(*pBufferSize<=0) {
+ *pBufferSize=(int32_t)sizeof(LMBCSClone);
+ return NULL;
+ }
+
+ extraInfo=(UConverterDataLMBCS *)cnv->extraInfo;
+ newLMBCS=(LMBCSClone *)stackBuffer;
+
+ /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
+
+ uprv_memcpy(&newLMBCS->lmbcs, extraInfo, sizeof(UConverterDataLMBCS));
+
+ /* share the subconverters */
+ for(i = 0; i <= ULMBCS_GRP_LAST; ++i) {
+ if(extraInfo->OptGrpConverter[i] != NULL) {
+ ucnv_incrementRefCount(extraInfo->OptGrpConverter[i]);
+ }
+ }
+
+ newLMBCS->cnv.extraInfo = &newLMBCS->lmbcs;
+ newLMBCS->cnv.isExtraLocal = TRUE;
+ return &newLMBCS->cnv;
+}
+
+static void
+_LMBCSGetUnicodeSet(const UConverter *cnv,
+ const USetAdder *sa,
+ UConverterUnicodeSet which,
+ UErrorCode *pErrorCode) {
+ /* all but U+F6xx, see LMBCS explanation above (search for F6xx) */
+ sa->addRange(sa->set, 0, 0xf5ff);
+ sa->addRange(sa->set, 0xf700, 0x10ffff);
+}
/*
Here's the basic helper function that we use when converting from
)
{
ulmbcs_byte_t * pLMBCS = pStartLMBCS;
- UConverter * xcnv = extraInfo->OptGrpConverter[group];
+ UConverterSharedData * xcnv = extraInfo->OptGrpConverter[group];
int bytesConverted;
uint32_t value;
ulmbcs_byte_t firstByte;
- MyAssert(xcnv);
- MyAssert(group<ULMBCS_GRP_UNICODE);
+ U_ASSERT(xcnv);
+ U_ASSERT(group<ULMBCS_GRP_UNICODE);
- bytesConverted = _MBCSFromUChar32(xcnv->sharedData, *pUniChar, &value, FALSE);
+ bytesConverted = ucnv_MBCSFromUChar32(xcnv, *pUniChar, &value, FALSE);
/* get the first result byte */
- switch(bytesConverted)
- {
- case 4:
- firstByte = (ulmbcs_byte_t)(value >> 24);
- break;
- case 3:
- firstByte = (ulmbcs_byte_t)(value >> 16);
- break;
- case 2:
- firstByte = (ulmbcs_byte_t)(value >> 8);
- break;
- case 1:
- firstByte = (ulmbcs_byte_t)value;
- break;
- default:
+ if(bytesConverted > 0) {
+ firstByte = (ulmbcs_byte_t)(value >> ((bytesConverted - 1) * 8));
+ } else {
/* most common failure mode is an unassigned character */
groups_tried[group] = TRUE;
return 0;
/* All initial byte values in lower ascii range should have been caught by now,
except with the exception group.
*/
- MyAssert((firstByte <= ULMBCS_C0END) || (firstByte >= ULMBCS_C1START) || (group == ULMBCS_GRP_EXCEPT));
+ U_ASSERT((firstByte <= ULMBCS_C0END) || (firstByte >= ULMBCS_C1START) || (group == ULMBCS_GRP_EXCEPT));
/* use converted data: first write 0, 1 or two group bytes */
if (group != ULMBCS_GRP_EXCEPT && extraInfo->OptGroup != group)
}
if (!bytes_written) /* the ambiguous group cases (Strategy 3) */
{
- memset(groups_tried, 0, sizeof(groups_tried));
+ uprv_memset(groups_tried, 0, sizeof(groups_tried));
/* check for non-default optimization group (Strategy 3A )*/
if (extraInfo->OptGroup != 1
/* Now, the Unicode from LMBCS section */
-/*
- Special codes for the getNextUnicodeWorker -- usually as the result of
- special error-callback behavior:
- ULMBCS_SKIP To control skipping over LMBCS sequences
- ULMBCS_MULTI To indicate that a single LMBCS char translates to
- multiple uniChars
-*/
-#define ULMBCS_SKIP U_ERROR_LIMIT
-#define ULMBCS_MULTI ULMBCS_SKIP+1
-
/* A function to call when we are looking at the Unicode group byte in LMBCS */
static UChar
GetUniFromLMBCSUni(char const ** ppLMBCSin) /* Called with LMBCS-style Unicode byte stream */
/* CHECK_SOURCE_LIMIT: Helper macro to verify that there are at least'index'
- bytes left in source up to sourceLimit.Errors appropriately if not
+ bytes left in source up to sourceLimit.Errors appropriately if not.
+ If we reach the limit, then update the source pointer to there to consume
+ all input as required by ICU converter semantics.
*/
#define CHECK_SOURCE_LIMIT(index) \
if (args->source+index > args->sourceLimit){\
*err = U_TRUNCATED_CHAR_FOUND;\
- args->source = saveSource;\
+ args->source = args->sourceLimit;\
return 0xffff;}
-/* Return the Unicode representation for the current LMBCS character
-
- This worker function is used by both ucnv_getNextUChar() and ucnv_ToUnicode().
- The last parameter says whether the return value should be treated as UTF-16 or
- UTF-32. The only difference is in surrogate handling
-*/
+/* Return the Unicode representation for the current LMBCS character */
static UChar32
_LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args,
- UErrorCode* err,
- UBool returnUTF32)
+ UErrorCode* err)
{
- UChar32 uniChar = 0; /* an output UNICODE char */
- ulmbcs_byte_t CurByte; /* A byte from the input stream */
- const char * saveSource;
+ UChar32 uniChar = 0; /* an output UNICODE char */
+ ulmbcs_byte_t CurByte; /* A byte from the input stream */
/* error check */
if (args->source >= args->sourceLimit)
return 0xffff;
}
/* Grab first byte & save address for error recovery */
- CurByte = *((ulmbcs_byte_t *) (saveSource = args->source++));
+ CurByte = *((ulmbcs_byte_t *) (args->source++));
/*
* at entry of each if clause:
|| CurByte == ULMBCS_HT || CurByte == ULMBCS_CR
|| CurByte == ULMBCS_LF || CurByte == ULMBCS_123SYSTEMRANGE)
{
- uniChar = CurByte;
+ uniChar = CurByte;
}
else
{
UConverterDataLMBCS * extraInfo;
ulmbcs_byte_t group;
- UConverter* cnv;
+ UConverterSharedData *cnv;
if (CurByte == ULMBCS_GRP_CTRL) /* Control character group - no opt group update */
{
else
if (CurByte == ULMBCS_GRP_UNICODE) /* Unicode compatibility group: BigEndian UTF16 */
{
- UChar second;
CHECK_SOURCE_LIMIT(2);
- uniChar = GetUniFromLMBCSUni(&(args->source));
-
- /* at this point we are usually done, but we need to make sure we are not in
- a situation where we can successfully put together a surrogate pair */
-
- if(returnUTF32 && UTF_IS_FIRST_SURROGATE(uniChar) && (args->source+3 <= args->sourceLimit)
- && *(args->source)++ == ULMBCS_GRP_UNICODE
- && UTF_IS_SECOND_SURROGATE(second = GetUniFromLMBCSUni(&(args->source))))
- {
- uniChar = UTF16_GET_PAIR_VALUE(uniChar, second);
- }
+ /* don't check for error indicators fffe/ffff below */
+ return GetUniFromLMBCSUni(&(args->source));
}
else if (CurByte <= ULMBCS_CTRLOFFSET)
{
group = CurByte; /* group byte is in the source */
extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo;
- cnv = extraInfo->OptGrpConverter[group];
- if (!cnv)
+ if (group > ULMBCS_GRP_LAST || (cnv = extraInfo->OptGrpConverter[group]) == NULL)
{
/* this is not a valid group byte - no converter*/
*err = U_INVALID_CHAR_FOUND;
if (*args->source == group) {
/* single byte */
++args->source;
- uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, &args->source, args->source + 1, FALSE);
+ uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source, 1, FALSE);
+ ++args->source;
} else {
/* double byte */
- const char *newLimit = args->source + 2;
- uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, &args->source, newLimit, FALSE);
- args->source = newLimit; /* set the correct limit even in case of an error */
+ uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source, 2, FALSE);
+ args->source += 2;
}
}
else { /* single byte conversion */
if (CurByte >= ULMBCS_C1START)
{
- uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv->sharedData, CurByte);
+ uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv, CurByte);
}
else
{
/* The non-optimizable oddballs where there is an explicit byte
* AND the second byte is not in the upper ascii range
*/
- const char *s;
char bytes[2];
extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo;
/* Lookup value must include opt group */
bytes[0] = group;
bytes[1] = CurByte;
- s = bytes;
- uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, &s, bytes + 2, FALSE);
+ uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, bytes, 2, FALSE);
}
}
}
cnv = extraInfo->OptGrpConverter[group];
if (group >= ULMBCS_DOUBLEOPTGROUP_START) /* double byte conversion */
{
- if (!_MBCSIsLeadByte(cnv->sharedData, CurByte))
+ if (!ucnv_MBCSIsLeadByte(cnv, CurByte))
{
CHECK_SOURCE_LIMIT(0);
/* let the MBCS conversion consume CurByte again */
- --args->source;
- uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, &args->source, args->source + 1, FALSE);
+ uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source - 1, 1, FALSE);
}
else
{
CHECK_SOURCE_LIMIT(1);
/* let the MBCS conversion consume CurByte again */
- --args->source;
- /* since we know that we start at a lead byte, args->source _will_ be incremented by 2 */
- uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, &args->source, args->source + 2, FALSE);
+ uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source - 1, 2, FALSE);
+ ++args->source;
}
}
else /* single byte conversion */
{
- uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv->sharedData, CurByte);
+ uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv, CurByte);
}
}
}
- if (((uint32_t)uniChar - 0xfffe) <= 1) /* 0xfffe<=uniChar<=0xffff */
- {
- UConverterToUnicodeArgs cbArgs = *args;
- UConverterCallbackReason reason;
- UChar UCh;
-
- if (uniChar == 0xfffe)
- {
- reason = UCNV_UNASSIGNED;
- *err = U_INVALID_CHAR_FOUND;
- }
- else
- {
- reason = UCNV_ILLEGAL;
- *err = U_ILLEGAL_CHAR_FOUND;
- }
-
- cbArgs.target = &UCh;
- cbArgs.targetLimit = &UCh + 1;
- cbArgs.converter->fromCharErrorBehaviour(cbArgs.converter->toUContext,
- &cbArgs,
- saveSource,
- args->source - saveSource,
- reason,
- err);
-
- if (cbArgs.target != &UCh)
- {
- uniChar = (UChar32) UCh;
- }
- /* Did error functor skip */
- if (U_SUCCESS(*err) && cbArgs.target == &UCh)
- {
- *err = ULMBCS_SKIP;
- }
- /* Did error functor try to write multiple UChars? */
- else if (*err == U_BUFFER_OVERFLOW_ERROR)
- {
- *err = ULMBCS_MULTI;
- }
- }
return uniChar;
}
-/* The exported function that gets one UTF32 character from a LMBCS stream
-*/
-static UChar32
-_LMBCSGetNextUChar(UConverterToUnicodeArgs* args,
- UErrorCode* err)
-{
- UChar32 nextUChar;
- do {
- nextUChar = _LMBCSGetNextUCharWorker(args, err, TRUE);
- } while (*err == ULMBCS_SKIP);
-
- if (*err == ULMBCS_MULTI)
- {
- *err = U_ZERO_ERROR;
- }
- return nextUChar;
-}
-
/* The exported function that converts lmbcs to one or more
UChars - currently UTF-16
*/
_LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args,
UErrorCode* err)
{
+ char LMBCS [ULMBCS_CHARSIZE_MAX];
UChar uniChar; /* one output UNICODE char */
- const char * saveSource = args->source; /* beginning of current code point */
+ const char * saveSource; /* beginning of current code point */
const char * pStartLMBCS = args->source; /* beginning of whole string */
+ const char * errSource = NULL; /* pointer to actual input in case an error occurs */
+ int8_t savebytes = 0;
- if (args->targetLimit == args->target) /* error check may belong in common code */
- {
- *err = U_BUFFER_OVERFLOW_ERROR;
- return;
- }
-
/* Process from source to limit, or until error */
- while (!*err && args->sourceLimit > args->source && args->targetLimit > args->target)
+ while (U_SUCCESS(*err) && args->sourceLimit > args->source && args->targetLimit > args->target)
{
saveSource = args->source; /* beginning of current code point */
- if (args->converter->invalidCharLength) /* reassemble char from previous call */
+ if (args->converter->toULength) /* reassemble char from previous call */
{
- char LMBCS [ULMBCS_CHARSIZE_MAX];
- const char *pLMBCS = LMBCS, *saveSourceLimit;
- size_t size_old = args->converter->invalidCharLength;
+ const char *saveSourceLimit;
+ size_t size_old = args->converter->toULength;
- /* limit from source is either reminder of temp buffer, or user limit on source */
+ /* limit from source is either remainder of temp buffer, or user limit on source */
size_t size_new_maybe_1 = sizeof(LMBCS) - size_old;
size_t size_new_maybe_2 = args->sourceLimit - args->source;
size_t size_new = (size_new_maybe_1 < size_new_maybe_2) ? size_new_maybe_1 : size_new_maybe_2;
- uprv_memcpy(LMBCS, args->converter->invalidCharBuffer, size_old);
+ uprv_memcpy(LMBCS, args->converter->toUBytes, size_old);
uprv_memcpy(LMBCS + size_old, args->source, size_new);
saveSourceLimit = args->sourceLimit;
- args->source = pLMBCS;
- args->sourceLimit = pLMBCS+size_old+size_new;
- uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err, FALSE);
- pLMBCS = args->source;
- args->source =saveSource;
+ args->source = errSource = LMBCS;
+ args->sourceLimit = LMBCS+size_old+size_new;
+ savebytes = (int8_t)(size_old+size_new);
+ uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err);
+ args->source = saveSource + ((args->source - LMBCS) - size_old);
args->sourceLimit = saveSourceLimit;
- args->source += (pLMBCS - LMBCS - size_old);
- if (*err == U_TRUNCATED_CHAR_FOUND && !args->flush)
+ if (*err == U_TRUNCATED_CHAR_FOUND)
{
/* evil special case: source buffers so small a char spans more than 2 buffers */
- int8_t savebytes = (int8_t)(size_old+size_new);
- args->converter->invalidCharLength = savebytes;
- uprv_memcpy(args->converter->invalidCharBuffer, LMBCS, savebytes);
+ args->converter->toULength = savebytes;
+ uprv_memcpy(args->converter->toUBytes, LMBCS, savebytes);
args->source = args->sourceLimit;
*err = U_ZERO_ERROR;
return;
else
{
/* clear the partial-char marker */
- args->converter->invalidCharLength = 0;
+ args->converter->toULength = 0;
}
}
else
{
- uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err, FALSE);
+ errSource = saveSource;
+ uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err);
+ savebytes = (int8_t)(args->source - saveSource);
}
if (U_SUCCESS(*err))
{
*err = U_ILLEGAL_CHAR_FOUND;
}
}
- else if (*err == ULMBCS_MULTI)
- {
- UChar * pUChar = args->converter->UCharErrorBuffer;
- int8_t BufferLength = args->converter->UCharErrorBufferLength;
-
- *err = U_ZERO_ERROR;
- do
- { /* error functor wants to write multiple UniChars */
- *(args->target)++ = uniChar;
- if(args->offsets)
- {
- *(args->offsets)++ = saveSource - pStartLMBCS;
- }
- uniChar = *pUChar++;
- }
- while(BufferLength-- && args->targetLimit > args->target);
-
- if (++BufferLength > 0)
- { /* fix up remaining UChars that can't fit in caller's buffer */
- uprv_memmove( args->converter->UCharErrorBuffer,
- args->converter->UCharErrorBuffer + args->converter->UCharErrorBufferLength - BufferLength,
- sizeof(UChar) * BufferLength);
- }
- args->converter->UCharErrorBufferLength = BufferLength;
- }
- else if (*err == ULMBCS_SKIP)
- {
- *err = U_ZERO_ERROR; /* and just go around again..*/
- }
}
/* if target ran out before source, return U_BUFFER_OVERFLOW_ERROR */
if (U_SUCCESS(*err) && args->sourceLimit > args->source && args->targetLimit <= args->target)
{
*err = U_BUFFER_OVERFLOW_ERROR;
}
-
- /* If character incomplete, store away partial char if more to come */
- if (*err == U_TRUNCATED_CHAR_FOUND)
+ else if (U_FAILURE(*err))
{
- args->source = args->sourceLimit;
- if (!args->flush )
- {
- int8_t savebytes = (int8_t)(args->sourceLimit - saveSource);
- args->converter->invalidCharLength = (int8_t)savebytes;
- uprv_memcpy(args->converter->invalidCharBuffer, saveSource, savebytes);
- *err = U_ZERO_ERROR;
- }
+ /* If character incomplete or unmappable/illegal, store it in toUBytes[] */
+ args->converter->toULength = savebytes;
+ if (savebytes > 0) {
+ uprv_memcpy(args->converter->toUBytes, errSource, savebytes);
+ }
+ if (*err == U_TRUNCATED_CHAR_FOUND) {
+ *err = U_ZERO_ERROR;
+ }
}
}