+static void
+_updateOffsets(int32_t *offsets, int32_t length,
+ int32_t sourceIndex, int32_t errorInputLength) {
+ int32_t *limit;
+ int32_t delta, offset;
+
+ if(sourceIndex>=0) {
+ /*
+ * adjust each offset by adding the previous sourceIndex
+ * minus the length of the input sequence that caused an
+ * error, if any
+ */
+ delta=sourceIndex-errorInputLength;
+ } else {
+ /*
+ * set each offset to -1 because this conversion function
+ * does not handle offsets
+ */
+ delta=-1;
+ }
+
+ limit=offsets+length;
+ if(delta==0) {
+ /* most common case, nothing to do */
+ } else if(delta>0) {
+ /* add the delta to each offset (but not if the offset is <0) */
+ while(offsets<limit) {
+ offset=*offsets;
+ if(offset>=0) {
+ *offsets=offset+delta;
+ }
+ ++offsets;
+ }
+ } else /* delta<0 */ {
+ /*
+ * set each offset to -1 because this conversion function
+ * does not handle offsets
+ * or the error input sequence started in a previous buffer
+ */
+ while(offsets<limit) {
+ *offsets++=-1;
+ }
+ }
+}
+
+/* ucnv_fromUnicode --------------------------------------------------------- */
+
+/*
+ * Implementation note for m:n conversions
+ *
+ * While collecting source units to find the longest match for m:n conversion,
+ * some source units may need to be stored for a partial match.
+ * When a second buffer does not yield a match on all of the previously stored
+ * source units, then they must be "replayed", i.e., fed back into the converter.
+ *
+ * The code relies on the fact that replaying will not nest -
+ * converting a replay buffer will not result in a replay.
+ * This is because a replay is necessary only after the _continuation_ of a
+ * partial match failed, but a replay buffer is converted as a whole.
+ * It may result in some of its units being stored again for a partial match,
+ * but there will not be a continuation _during_ the replay which could fail.
+ *
+ * It is conceivable that a callback function could call the converter
+ * recursively in a way that causes another replay to be stored, but that
+ * would be an error in the callback function.
+ * Such violations will cause assertion failures in a debug build,
+ * and wrong output, but they will not cause a crash.
+ */
+
+static void
+_fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
+ UConverterFromUnicode fromUnicode;
+ UConverter *cnv;
+ const UChar *s;
+ char *t;
+ int32_t *offsets;
+ int32_t sourceIndex;
+ int32_t errorInputLength;
+ UBool converterSawEndOfInput, calledCallback;
+
+ /* variables for m:n conversion */
+ UChar replay[UCNV_EXT_MAX_UCHARS];
+ const UChar *realSource, *realSourceLimit;
+ int32_t realSourceIndex;
+ UBool realFlush;
+
+ cnv=pArgs->converter;
+ s=pArgs->source;
+ t=pArgs->target;
+ offsets=pArgs->offsets;
+
+ /* get the converter implementation function */
+ sourceIndex=0;
+ if(offsets==NULL) {
+ fromUnicode=cnv->sharedData->impl->fromUnicode;
+ } else {
+ fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;
+ if(fromUnicode==NULL) {
+ /* there is no WithOffsets implementation */
+ fromUnicode=cnv->sharedData->impl->fromUnicode;
+ /* we will write -1 for each offset */
+ sourceIndex=-1;
+ }
+ }
+
+ if(cnv->preFromULength>=0) {
+ /* normal mode */
+ realSource=NULL;
+
+ /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
+ realSourceLimit=NULL;
+ realFlush=FALSE;
+ realSourceIndex=0;
+ } else {
+ /*
+ * Previous m:n conversion stored source units from a partial match
+ * and failed to consume all of them.
+ * We need to "replay" them from a temporary buffer and convert them first.
+ */
+ realSource=pArgs->source;
+ realSourceLimit=pArgs->sourceLimit;
+ realFlush=pArgs->flush;
+ realSourceIndex=sourceIndex;
+
+ uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
+ pArgs->source=replay;
+ pArgs->sourceLimit=replay-cnv->preFromULength;
+ pArgs->flush=FALSE;
+ sourceIndex=-1;
+
+ cnv->preFromULength=0;
+ }
+
+ /*
+ * loop for conversion and error handling
+ *
+ * loop {
+ * convert
+ * loop {
+ * update offsets
+ * handle end of input
+ * handle errors/call callback
+ * }
+ * }
+ */
+ for(;;) {
+ if(U_SUCCESS(*err)) {
+ /* convert */
+ fromUnicode(pArgs, err);
+
+ /*
+ * set a flag for whether the converter
+ * successfully processed the end of the input
+ *
+ * need not check cnv->preFromULength==0 because a replay (<0) will cause
+ * s<sourceLimit before converterSawEndOfInput is checked
+ */
+ converterSawEndOfInput=
+ (UBool)(U_SUCCESS(*err) &&
+ pArgs->flush && pArgs->source==pArgs->sourceLimit &&
+ cnv->fromUChar32==0);
+ } else {
+ /* handle error from ucnv_convertEx() */
+ converterSawEndOfInput=FALSE;
+ }
+
+ /* no callback called yet for this iteration */
+ calledCallback=FALSE;
+
+ /* no sourceIndex adjustment for conversion, only for callback output */
+ errorInputLength=0;
+
+ /*
+ * loop for offsets and error handling
+ *
+ * iterates at most 3 times:
+ * 1. to clean up after the conversion function
+ * 2. after the callback
+ * 3. after the callback again if there was truncated input
+ */
+ for(;;) {
+ /* update offsets if we write any */
+ if(offsets!=NULL) {
+ int32_t length=(int32_t)(pArgs->target-t);
+ if(length>0) {
+ _updateOffsets(offsets, length, sourceIndex, errorInputLength);
+
+ /*
+ * if a converter handles offsets and updates the offsets
+ * pointer at the end, then pArgs->offset should not change
+ * here;
+ * however, some converters do not handle offsets at all
+ * (sourceIndex<0) or may not update the offsets pointer
+ */
+ pArgs->offsets=offsets+=length;
+ }
+
+ if(sourceIndex>=0) {
+ sourceIndex+=(int32_t)(pArgs->source-s);
+ }
+ }
+
+ if(cnv->preFromULength<0) {
+ /*
+ * switch the source to new replay units (cannot occur while replaying)
+ * after offset handling and before end-of-input and callback handling
+ */
+ if(realSource==NULL) {
+ realSource=pArgs->source;
+ realSourceLimit=pArgs->sourceLimit;
+ realFlush=pArgs->flush;
+ realSourceIndex=sourceIndex;
+
+ uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
+ pArgs->source=replay;
+ pArgs->sourceLimit=replay-cnv->preFromULength;
+ pArgs->flush=FALSE;
+ if((sourceIndex+=cnv->preFromULength)<0) {
+ sourceIndex=-1;
+ }
+
+ cnv->preFromULength=0;
+ } else {
+ /* see implementation note before _fromUnicodeWithCallback() */
+ U_ASSERT(realSource==NULL);
+ *err=U_INTERNAL_PROGRAM_ERROR;
+ }
+ }
+
+ /* update pointers */
+ s=pArgs->source;
+ t=pArgs->target;
+
+ if(U_SUCCESS(*err)) {
+ if(s<pArgs->sourceLimit) {
+ /*
+ * continue with the conversion loop while there is still input left
+ * (continue converting by breaking out of only the inner loop)
+ */
+ break;
+ } else if(realSource!=NULL) {
+ /* switch back from replaying to the real source and continue */
+ pArgs->source=realSource;
+ pArgs->sourceLimit=realSourceLimit;
+ pArgs->flush=realFlush;
+ sourceIndex=realSourceIndex;
+
+ realSource=NULL;
+ break;
+ } else if(pArgs->flush && cnv->fromUChar32!=0) {
+ /*
+ * the entire input stream is consumed
+ * and there is a partial, truncated input sequence left
+ */
+
+ /* inject an error and continue with callback handling */
+ *err=U_TRUNCATED_CHAR_FOUND;
+ calledCallback=FALSE; /* new error condition */
+ } else {
+ /* input consumed */
+ if(pArgs->flush) {
+ /*
+ * return to the conversion loop once more if the flush
+ * flag is set and the conversion function has not
+ * successfully processed the end of the input yet
+ *
+ * (continue converting by breaking out of only the inner loop)
+ */
+ if(!converterSawEndOfInput) {
+ break;
+ }
+
+ /* reset the converter without calling the callback function */
+ _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE);
+ }
+
+ /* done successfully */
+ return;
+ }
+ }
+
+ /* U_FAILURE(*err) */
+ {
+ UErrorCode e;
+
+ if( calledCallback ||
+ (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
+ (e!=U_INVALID_CHAR_FOUND &&
+ e!=U_ILLEGAL_CHAR_FOUND &&
+ e!=U_TRUNCATED_CHAR_FOUND)
+ ) {
+ /*
+ * the callback did not or cannot resolve the error:
+ * set output pointers and return
+ *
+ * the check for buffer overflow is redundant but it is
+ * a high-runner case and hopefully documents the intent
+ * well
+ *
+ * if we were replaying, then the replay buffer must be
+ * copied back into the UConverter
+ * and the real arguments must be restored
+ */
+ if(realSource!=NULL) {
+ int32_t length;
+
+ U_ASSERT(cnv->preFromULength==0);
+
+ length=(int32_t)(pArgs->sourceLimit-pArgs->source);
+ if(length>0) {
+ uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR);
+ cnv->preFromULength=(int8_t)-length;
+ }
+
+ pArgs->source=realSource;
+ pArgs->sourceLimit=realSourceLimit;
+ pArgs->flush=realFlush;
+ }
+
+ return;
+ }
+ }
+
+ /* callback handling */
+ {
+ UChar32 codePoint;
+
+ /* get and write the code point */
+ codePoint=cnv->fromUChar32;
+ errorInputLength=0;
+ U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
+ cnv->invalidUCharLength=(int8_t)errorInputLength;
+
+ /* set the converter state to deal with the next character */
+ cnv->fromUChar32=0;
+
+ /* call the callback function */
+ cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
+ cnv->invalidUCharBuffer, errorInputLength, codePoint,
+ *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,
+ err);
+ }
+
+ /*
+ * loop back to the offset handling
+ *
+ * this flag will indicate after offset handling
+ * that a callback was called;
+ * if the callback did not resolve the error, then we return
+ */
+ calledCallback=TRUE;
+ }
+ }
+}
+
+/*
+ * Output the fromUnicode overflow buffer.
+ * Call this function if(cnv->charErrorBufferLength>0).
+ * @return TRUE if overflow
+ */
+static UBool
+ucnv_outputOverflowFromUnicode(UConverter *cnv,
+ char **target, const char *targetLimit,
+ int32_t **pOffsets,
+ UErrorCode *err) {
+ int32_t *offsets;
+ char *overflow, *t;
+ int32_t i, length;
+
+ t=*target;
+ if(pOffsets!=NULL) {
+ offsets=*pOffsets;
+ } else {
+ offsets=NULL;
+ }
+
+ overflow=(char *)cnv->charErrorBuffer;
+ length=cnv->charErrorBufferLength;
+ i=0;
+ while(i<length) {
+ if(t==targetLimit) {
+ /* the overflow buffer contains too much, keep the rest */
+ int32_t j=0;
+
+ do {
+ overflow[j++]=overflow[i++];
+ } while(i<length);
+
+ cnv->charErrorBufferLength=(int8_t)j;
+ *target=t;
+ if(offsets!=NULL) {
+ *pOffsets=offsets;
+ }
+ *err=U_BUFFER_OVERFLOW_ERROR;
+ return TRUE;
+ }
+
+ /* copy the overflow contents to the target */
+ *t++=overflow[i++];
+ if(offsets!=NULL) {
+ *offsets++=-1; /* no source index available for old output */
+ }
+ }
+
+ /* the overflow buffer is completely copied to the target */
+ cnv->charErrorBufferLength=0;
+ *target=t;
+ if(offsets!=NULL) {
+ *pOffsets=offsets;
+ }
+ return FALSE;
+}
+
+U_CAPI void U_EXPORT2
+ucnv_fromUnicode(UConverter *cnv,
+ char **target, const char *targetLimit,
+ const UChar **source, const UChar *sourceLimit,
+ int32_t *offsets,
+ UBool flush,
+ UErrorCode *err) {
+ UConverterFromUnicodeArgs args;
+ const UChar *s;
+ char *t;
+
+ /* check parameters */
+ if(err==NULL || U_FAILURE(*err)) {
+ return;
+ }
+
+ if(cnv==NULL || target==NULL || source==NULL) {
+ *err=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ s=*source;
+ t=*target;
+
+ if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) {
+ /*
+ Prevent code from going into an infinite loop in case we do hit this
+ limit. The limit pointer is expected to be on a UChar * boundary.
+ This also prevents the next argument check from failing.
+ */
+ sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1);
+ }
+
+ /*
+ * All these conditions should never happen.
+ *
+ * 1) Make sure that the limits are >= to the address source or target
+ *
+ * 2) Make sure that the buffer sizes do not exceed the number range for
+ * int32_t because some functions use the size (in units or bytes)
+ * rather than comparing pointers, and because offsets are int32_t values.
+ *
+ * size_t is guaranteed to be unsigned and large enough for the job.
+ *
+ * Return with an error instead of adjusting the limits because we would
+ * not be able to maintain the semantics that either the source must be
+ * consumed or the target filled (unless an error occurs).
+ * An adjustment would be targetLimit=t+0x7fffffff; for example.
+ *
+ * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
+ * to a char * pointer and provide an incomplete UChar code unit.
+ */
+ if (sourceLimit<s || targetLimit<t ||
+ ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||
+ ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) ||
+ (((const char *)sourceLimit-(const char *)s) & 1) != 0)
+ {
+ *err=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ /* output the target overflow buffer */
+ if( cnv->charErrorBufferLength>0 &&
+ ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err)
+ ) {
+ /* U_BUFFER_OVERFLOW_ERROR */
+ return;
+ }
+ /* *target may have moved, therefore stop using t */
+
+ if(!flush && s==sourceLimit && cnv->preFromULength>=0) {
+ /* the overflow buffer is emptied and there is no new input: we are done */
+ return;
+ }
+
+ /*
+ * Do not simply return with a buffer overflow error if
+ * !flush && t==targetLimit
+ * because it is possible that the source will not generate any output.
+ * For example, the skip callback may be called;
+ * it does not output anything.
+ */
+
+ /* prepare the converter arguments */
+ args.converter=cnv;
+ args.flush=flush;
+ args.offsets=offsets;
+ args.source=s;
+ args.sourceLimit=sourceLimit;
+ args.target=*target;
+ args.targetLimit=targetLimit;
+ args.size=sizeof(args);
+
+ _fromUnicodeWithCallback(&args, err);
+
+ *source=args.source;
+ *target=args.target;
+}
+
+/* ucnv_toUnicode() --------------------------------------------------------- */
+
+static void
+_toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
+ UConverterToUnicode toUnicode;
+ UConverter *cnv;
+ const char *s;
+ UChar *t;
+ int32_t *offsets;
+ int32_t sourceIndex;
+ int32_t errorInputLength;
+ UBool converterSawEndOfInput, calledCallback;
+
+ /* variables for m:n conversion */
+ char replay[UCNV_EXT_MAX_BYTES];
+ const char *realSource, *realSourceLimit;
+ int32_t realSourceIndex;
+ UBool realFlush;
+
+ cnv=pArgs->converter;
+ s=pArgs->source;
+ t=pArgs->target;
+ offsets=pArgs->offsets;
+
+ /* get the converter implementation function */
+ sourceIndex=0;
+ if(offsets==NULL) {
+ toUnicode=cnv->sharedData->impl->toUnicode;
+ } else {
+ toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;
+ if(toUnicode==NULL) {
+ /* there is no WithOffsets implementation */
+ toUnicode=cnv->sharedData->impl->toUnicode;
+ /* we will write -1 for each offset */
+ sourceIndex=-1;
+ }
+ }
+
+ if(cnv->preToULength>=0) {
+ /* normal mode */
+ realSource=NULL;
+
+ /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
+ realSourceLimit=NULL;
+ realFlush=FALSE;
+ realSourceIndex=0;
+ } else {
+ /*
+ * Previous m:n conversion stored source units from a partial match
+ * and failed to consume all of them.
+ * We need to "replay" them from a temporary buffer and convert them first.
+ */
+ realSource=pArgs->source;
+ realSourceLimit=pArgs->sourceLimit;
+ realFlush=pArgs->flush;
+ realSourceIndex=sourceIndex;
+
+ uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
+ pArgs->source=replay;
+ pArgs->sourceLimit=replay-cnv->preToULength;
+ pArgs->flush=FALSE;
+ sourceIndex=-1;
+
+ cnv->preToULength=0;
+ }
+
+ /*
+ * loop for conversion and error handling
+ *
+ * loop {
+ * convert
+ * loop {
+ * update offsets
+ * handle end of input
+ * handle errors/call callback
+ * }
+ * }
+ */
+ for(;;) {
+ if(U_SUCCESS(*err)) {
+ /* convert */
+ toUnicode(pArgs, err);
+
+ /*
+ * set a flag for whether the converter
+ * successfully processed the end of the input
+ *
+ * need not check cnv->preToULength==0 because a replay (<0) will cause
+ * s<sourceLimit before converterSawEndOfInput is checked
+ */
+ converterSawEndOfInput=
+ (UBool)(U_SUCCESS(*err) &&
+ pArgs->flush && pArgs->source==pArgs->sourceLimit &&
+ cnv->toULength==0);
+ } else {
+ /* handle error from getNextUChar() or ucnv_convertEx() */
+ converterSawEndOfInput=FALSE;
+ }
+
+ /* no callback called yet for this iteration */
+ calledCallback=FALSE;
+
+ /* no sourceIndex adjustment for conversion, only for callback output */
+ errorInputLength=0;
+
+ /*
+ * loop for offsets and error handling
+ *
+ * iterates at most 3 times:
+ * 1. to clean up after the conversion function
+ * 2. after the callback
+ * 3. after the callback again if there was truncated input
+ */
+ for(;;) {
+ /* update offsets if we write any */
+ if(offsets!=NULL) {
+ int32_t length=(int32_t)(pArgs->target-t);
+ if(length>0) {
+ _updateOffsets(offsets, length, sourceIndex, errorInputLength);
+
+ /*
+ * if a converter handles offsets and updates the offsets
+ * pointer at the end, then pArgs->offset should not change
+ * here;
+ * however, some converters do not handle offsets at all
+ * (sourceIndex<0) or may not update the offsets pointer
+ */
+ pArgs->offsets=offsets+=length;
+ }
+
+ if(sourceIndex>=0) {
+ sourceIndex+=(int32_t)(pArgs->source-s);
+ }
+ }
+
+ if(cnv->preToULength<0) {
+ /*
+ * switch the source to new replay units (cannot occur while replaying)
+ * after offset handling and before end-of-input and callback handling
+ */
+ if(realSource==NULL) {
+ realSource=pArgs->source;
+ realSourceLimit=pArgs->sourceLimit;
+ realFlush=pArgs->flush;
+ realSourceIndex=sourceIndex;
+
+ uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
+ pArgs->source=replay;
+ pArgs->sourceLimit=replay-cnv->preToULength;
+ pArgs->flush=FALSE;
+ if((sourceIndex+=cnv->preToULength)<0) {
+ sourceIndex=-1;
+ }
+
+ cnv->preToULength=0;
+ } else {
+ /* see implementation note before _fromUnicodeWithCallback() */
+ U_ASSERT(realSource==NULL);
+ *err=U_INTERNAL_PROGRAM_ERROR;
+ }
+ }