]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/common/ucnv.c
ICU-511.25.tar.gz
[apple/icu.git] / icuSources / common / ucnv.c
index 3cf7576803bb9d9b5c5896d6c82632f2ee60bfd1..52d4f6d6267aff89a86d88bb45d141a27c58e01e 100644 (file)
@@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *
-*   Copyright (C) 1998-2003, International Business Machines
+*   Copyright (C) 1998-2012, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
 */
 
 #include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
 #include "unicode/ustring.h"
-#include "unicode/ures.h"
 #include "unicode/ucnv.h"
 #include "unicode/ucnv_err.h"
 #include "unicode/uset.h"
+#include "unicode/utf.h"
+#include "unicode/utf16.h"
+#include "putilimp.h"
 #include "cmemory.h"
 #include "cstring.h"
+#include "uassert.h"
+#include "utracimp.h"
 #include "ustr_imp.h"
 #include "ucnv_imp.h"
-#include "ucnv_io.h"
 #include "ucnv_cnv.h"
 #include "ucnv_bld.h"
 
-#if 0
-/* debugging for converters */
-# include <stdio.h>
-void UCNV_DEBUG_LOG(const char *what, const char *who, const void *p, int l)
-{
-    static FILE *f = NULL;
-    if(f==NULL)
-    {
-        /* stderr, or open another file */
-        f = stderr;
-        /*  f = fopen("c:\\UCNV_DEBUG_LOG.txt", "w"); */
-    }
-    if (!what) {
-        what = "(null)";
-    }
-    if (!who) {
-        who = "(null)";
-    }
-    if (!p) {
-        p = "(null)";
-    }
-
-    fprintf(f, "%p\t:%d\t%-20s\t%-10s\n",
-        p, l, who, what);
-
-    fflush(f);
-}
-
-
-/* dump the contents of a converter */
-static void UCNV_DEBUG_CNV(const UConverter *c, int line)
-{
-    UErrorCode err = U_ZERO_ERROR;
-    fprintf(stderr, "%p\t:%d\t", c, line);
-    if(c!=NULL) {
-        const char *name = ucnv_getName(c, &err);
-        if (!name) {
-            name = "(null)";
-        }
-        fprintf(stderr, "%s\t", name);
-
-        fprintf(stderr, "shr=%p, ref=%x\n", 
-            c->sharedData,
-            c->sharedData->referenceCounter);
-    } else { 
-        fprintf(stderr, "DEMISED\n");
-    }
-}
-
-# define UCNV_DEBUG 1
-# define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__)
-# define UCNV_DEBUG_CNV(c) UCNV_DEBUG_CNV(c, __LINE__)
-#else
-# define UCNV_DEBUG_LOG(x,y,z)
-# define UCNV_DEBUG_CNV(c)
-#endif
-
-
-
 /* size of intermediate and preflighting buffers in ucnv_convert() */
 #define CHUNK_SIZE 1024
 
@@ -101,25 +48,22 @@ typedef struct UAmbiguousConverter {
 } UAmbiguousConverter;
 
 static const UAmbiguousConverter ambiguousConverters[]={
+    { "ibm-897_P100-1995", 0xa5 },
     { "ibm-942_P120-1999", 0xa5 },
     { "ibm-943_P130-1999", 0xa5 },
+    { "ibm-946_P100-1995", 0xa5 },
     { "ibm-33722_P120-1999", 0xa5 },
+    { "ibm-1041_P100-1995", 0xa5 },
+    /*{ "ibm-54191_P100-2006", 0xa5 },*/
+    /*{ "ibm-62383_P100-2007", 0xa5 },*/
+    /*{ "ibm-891_P100-1995", 0x20a9 },*/
+    { "ibm-944_P100-1995", 0x20a9 },
     { "ibm-949_P110-1999", 0x20a9 },
     { "ibm-1363_P110-1997", 0x20a9 },
-    { "ISO_2022,locale=ko,version=0", 0x20a9 }
+    { "ISO_2022,locale=ko,version=0", 0x20a9 },
+    { "ibm-1088_P100-1995", 0x20a9 }
 };
 
-U_CAPI const char*  U_EXPORT2
-ucnv_getDefaultName ()
-{
-    return ucnv_io_getDefaultConverterName();
-}
-
-U_CAPI void U_EXPORT2
-ucnv_setDefaultName (const char *converterName)
-{
-  ucnv_io_setDefaultConverterName(converterName);
-}
 /*Calls through createConverter */
 U_CAPI UConverter* U_EXPORT2
 ucnv_open (const char *name,
@@ -128,13 +72,10 @@ ucnv_open (const char *name,
     UConverter *r;
 
     if (err == NULL || U_FAILURE (*err)) {
-        UCNV_DEBUG_LOG("open", name, NULL);
         return NULL;
     }
 
     r =  ucnv_createConverter(NULL, name, err);
-    UCNV_DEBUG_LOG("open", name, r);
-    UCNV_DEBUG_CNV(r);
     return r;
 }
 
@@ -163,6 +104,28 @@ ucnv_openU (const UChar * name,
     return ucnv_open(u_austrcpy(asciiName, name), err);
 }
 
+/* Copy the string that is represented by the UConverterPlatform enum
+ * @param platformString An output buffer
+ * @param platform An enum representing a platform
+ * @return the length of the copied string.
+ */
+static int32_t
+ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm)
+{
+    switch (pltfrm)
+    {
+    case UCNV_IBM:
+        uprv_strcpy(platformString, "ibm-");
+        return 4;
+    case UCNV_UNKNOWN:
+        break;
+    }
+
+    /* default to empty string */
+    *platformString = 0;
+    return 0;
+}
+
 /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
  *through createConverter*/
 U_CAPI UConverter*   U_EXPORT2
@@ -215,34 +178,26 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U
             NULL
     };
 
+    UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE);
+
     if (status == NULL || U_FAILURE(*status)){
+        UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR);
         return 0;
     }
 
     if (!pBufferSize || !cnv){
-       *status = U_ILLEGAL_ARGUMENT_ERROR;
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        UTRACE_EXIT_STATUS(*status);
         return 0;
     }
 
-    UCNV_DEBUG_LOG("cloning FROM", ucnv_getName(cnv,status), cnv);
-    UCNV_DEBUG_LOG("cloning WITH", "memory", stackBuffer);
-    UCNV_DEBUG_CNV(cnv);
+    UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",
+                                    ucnv_getName(cnv, status), cnv, stackBuffer);
 
-    /* Pointers on 64-bit platforms need to be aligned
-     * on a 64-bit boundry in memory.
-     */
-    if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
-        int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
-        *pBufferSize -= offsetUp;
-        stackBufferChars += offsetUp;
-    }
-
-    stackBuffer = (void *)stackBufferChars;
-    
     if (cnv->sharedData->impl->safeClone != NULL) {
         /* call the custom safeClone function for sizing */
         bufferSizeNeeded = 0;
-        cnv->sharedData->impl->safeClone(cnv, stackBuffer, &bufferSizeNeeded, status);
+        cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status);
     }
     else
     {
@@ -252,10 +207,27 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U
 
     if (*pBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
         *pBufferSize = bufferSizeNeeded;
+        UTRACE_EXIT_VALUE(bufferSizeNeeded);
         return 0;
     }
 
 
+    /* Pointers on 64-bit platforms need to be aligned
+     * on a 64-bit boundary in memory.
+     */
+    if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
+        int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
+        if(*pBufferSize > offsetUp) {
+            *pBufferSize -= offsetUp;
+            stackBufferChars += offsetUp;
+        } else {
+            /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
+            *pBufferSize = 1;
+        }
+    }
+
+    stackBuffer = (void *)stackBufferChars;
+    
     /* Now, see if we must allocate any memory */
     if (*pBufferSize < bufferSizeNeeded || stackBuffer == NULL)
     {
@@ -264,6 +236,7 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U
 
         if(localConverter == NULL) {
             *status = U_MEMORY_ALLOCATION_ERROR;
+            UTRACE_EXIT_STATUS(*status);
             return NULL;
         }
         
@@ -279,10 +252,25 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U
         allocatedConverter = NULL;
     }
 
+    uprv_memset(localConverter, 0, bufferSizeNeeded);
+
     /* Copy initial state */
     uprv_memcpy(localConverter, cnv, sizeof(UConverter));
     localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;
 
+    /* copy the substitution string */
+    if (cnv->subChars == (uint8_t *)cnv->subUChars) {
+        localConverter->subChars = (uint8_t *)localConverter->subUChars;
+    } else {
+        localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
+        if (localConverter->subChars == NULL) {
+            uprv_free(allocatedConverter);
+            UTRACE_EXIT_STATUS(*status);
+            return NULL;
+        }
+        uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
+    }
+
     /* now either call the safeclone fcn or not */
     if (cnv->sharedData->impl->safeClone != NULL) {
         /* call the custom safeClone function */
@@ -290,7 +278,11 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U
     }
 
     if(localConverter==NULL || U_FAILURE(*status)) {
+        if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) {
+            uprv_free(allocatedConverter->subChars);
+        }
         uprv_free(allocatedConverter);
+        UTRACE_EXIT_STATUS(*status);
         return NULL;
     }
 
@@ -307,21 +299,8 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U
     if(localConverter == (UConverter*)stackBuffer) {
         /* we're using user provided data - set to not destroy */
         localConverter->isCopyLocal = TRUE;
-#ifdef UCNV_DEBUG
-        fprintf(stderr, "%p\t:%d\t\t==stackbuffer %p, isCopyLocal TRUE\n",
-                localConverter, __LINE__, stackBuffer);
-#endif
-
-    } else {
-#ifdef UCNV_DEBUG
-        fprintf(stderr, "%p\t:%d\t\t!=stackbuffer %p, isCopyLocal left at %s\n",
-                localConverter, __LINE__, stackBuffer,
-                localConverter->isCopyLocal?"TRUE":"FALSE");
-#endif
     }
 
-    localConverter->isExtraLocal = localConverter->isCopyLocal;
-
     /* allow callback functions to handle any memory allocation */
     toUArgs.converter = fromUArgs.converter = localConverter;
     cbErr = U_ZERO_ERROR;
@@ -329,11 +308,7 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U
     cbErr = U_ZERO_ERROR;
     cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr);
 
-    UCNV_DEBUG_LOG("cloning TO", ucnv_getName(localConverter,status), localConverter);
-    UCNV_DEBUG_CNV(localConverter);
-    UCNV_DEBUG_CNV(cnv);
-
-
+    UTRACE_EXIT_PTR_STATUS(localConverter, *status);
     return localConverter;
 }
 
@@ -345,61 +320,62 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U
 U_CAPI void  U_EXPORT2
 ucnv_close (UConverter * converter)
 {
-    /* first, notify the callback functions that the converter is closed */
-    UConverterToUnicodeArgs toUArgs = {
-        sizeof(UConverterToUnicodeArgs),
-            TRUE,
-            NULL,
-            NULL,
-            NULL,
-            NULL,
-            NULL,
-            NULL
-    };
-    UConverterFromUnicodeArgs fromUArgs = {
-        sizeof(UConverterFromUnicodeArgs),
-            TRUE,
-            NULL,
-            NULL,
-            NULL,
-            NULL,
-            NULL,
-            NULL
-    };
     UErrorCode errorCode = U_ZERO_ERROR;
 
+    UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE);
+
     if (converter == NULL)
     {
+        UTRACE_EXIT();
         return;
     }
 
-    UCNV_DEBUG_LOG("close", ucnv_getName(converter, &errorCode), converter);
-    UCNV_DEBUG_CNV(converter);
+    UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b",
+        ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal);
 
-    toUArgs.converter = fromUArgs.converter = converter;
-
-    converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);
-    errorCode = U_ZERO_ERROR;
-    converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);
+    /* In order to speed up the close, only call the callbacks when they have been changed.
+    This performance check will only work when the callbacks are set within a shared library
+    or from user code that statically links this code. */
+    /* first, notify the callback functions that the converter is closed */
+    if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
+        UConverterToUnicodeArgs toUArgs = {
+            sizeof(UConverterToUnicodeArgs),
+                TRUE,
+                NULL,
+                NULL,
+                NULL,
+                NULL,
+                NULL,
+                NULL
+        };
+
+        toUArgs.converter = converter;
+        errorCode = U_ZERO_ERROR;
+        converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);
+    }
+    if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
+        UConverterFromUnicodeArgs fromUArgs = {
+            sizeof(UConverterFromUnicodeArgs),
+                TRUE,
+                NULL,
+                NULL,
+                NULL,
+                NULL,
+                NULL,
+                NULL
+        };
+        fromUArgs.converter = converter;
+        errorCode = U_ZERO_ERROR;
+        converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);
+    }
 
-    UCNV_DEBUG_CNV(converter);
-        
     if (converter->sharedData->impl->close != NULL) {
         converter->sharedData->impl->close(converter);
     }
 
-#ifdef UCNV_DEBUG
-    {
-        char c[4];
-        c[0]='0'+converter->sharedData->referenceCounter;
-        c[1]=0;
-        UCNV_DEBUG_LOG("close--", c, converter);
-        if((converter->sharedData->referenceCounter == 0)&&(converter->sharedData->sharedDataCached == FALSE)) {
-            UCNV_DEBUG_CNV(converter);
-            UCNV_DEBUG_LOG("close:delDead", "??", converter);
-        }
+    if (converter->subChars != (uint8_t *)converter->subUChars) {
+        uprv_free(converter->subChars);
     }
-#endif
 
     /*
     Checking whether it's an algorithic converter is okay
@@ -411,10 +387,10 @@ ucnv_close (UConverter * converter)
     }
 
     if(!converter->isCopyLocal){
-        UCNV_DEBUG_LOG("close:free", "", converter);
-        uprv_free (converter);
+        uprv_free(converter);
     }
-    return;
+
+    UTRACE_EXIT();
 }
 
 /*returns a single Name from the list, will return NULL if out of bounds
@@ -422,47 +398,21 @@ ucnv_close (UConverter * converter)
 U_CAPI const char*   U_EXPORT2
 ucnv_getAvailableName (int32_t n)
 {
-  if (0 <= n && n <= 0xffff) {
-    UErrorCode err = U_ZERO_ERROR;
-    const char *name = ucnv_io_getAvailableConverter((uint16_t)n, &err);
-    if (U_SUCCESS(err)) {
-      return name;
+    if (0 <= n && n <= 0xffff) {
+        UErrorCode err = U_ZERO_ERROR;
+        const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err);
+        if (U_SUCCESS(err)) {
+            return name;
+        }
     }
-  }
-  return NULL;
+    return NULL;
 }
 
 U_CAPI int32_t   U_EXPORT2
 ucnv_countAvailable ()
 {
     UErrorCode err = U_ZERO_ERROR;
-    return ucnv_io_countAvailableConverters(&err);
-}
-
-U_CAPI uint16_t U_EXPORT2
-ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
-{
-    return ucnv_io_countAliases(alias, pErrorCode);
-}
-
-
-U_CAPI const char* U_EXPORT2
-ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
-{
-    return ucnv_io_getAlias(alias, n, pErrorCode);
-}
-
-U_CAPI void U_EXPORT2
-ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
-{
-    ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
-}
-
-U_CAPI uint16_t U_EXPORT2
-ucnv_countStandards(void)
-{
-    UErrorCode err = U_ZERO_ERROR;
-    return ucnv_io_countStandards(&err);
+    return ucnv_bld_countAvailableConverters(&err);
 }
 
 U_CAPI void    U_EXPORT2
@@ -474,15 +424,19 @@ ucnv_getSubstChars (const UConverter * converter,
     if (U_FAILURE (*err))
         return;
 
+    if (converter->subCharLen <= 0) {
+        /* Unicode string or empty string from ucnv_setSubstString(). */
+        *len = 0;
+        return;
+    }
+
     if (*len < converter->subCharLen) /*not enough space in subChars */
     {
         *err = U_INDEX_OUTOFBOUNDS_ERROR;
         return;
     }
 
-  uprv_memcpy (mySubChar, converter->subChar, converter->subCharLen);   /*fills in the subchars */
-  *len = converter->subCharLen; /*store # of bytes copied to buffer */
-    uprv_memcpy (mySubChar, converter->subChar, converter->subCharLen);   /*fills in the subchars */
+    uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen);   /*fills in the subchars */
     *len = converter->subCharLen; /*store # of bytes copied to buffer */
 }
 
@@ -503,7 +457,7 @@ ucnv_setSubstChars (UConverter * converter,
         return;
     }
     
-    uprv_memcpy (converter->subChar, mySubChar, len); /*copies the subchars */
+    uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */
     converter->subCharLen = len;  /*sets the new len */
 
     /*
@@ -516,130 +470,182 @@ ucnv_setSubstChars (UConverter * converter,
     return;
 }
 
-U_CAPI int32_t U_EXPORT2
-ucnv_getDisplayName(const UConverter *cnv,
-                    const char *displayLocale,
-                    UChar *displayName, int32_t displayNameCapacity,
-                    UErrorCode *pErrorCode) {
-    UResourceBundle *rb;
-    const UChar *name;
-    int32_t length;
-
-    /* check arguments */
-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
-        return 0;
+U_CAPI void U_EXPORT2
+ucnv_setSubstString(UConverter *cnv,
+                    const UChar *s,
+                    int32_t length,
+                    UErrorCode *err) {
+    UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1];
+    char chars[UCNV_ERROR_BUFFER_LENGTH];
+
+    UConverter *clone;
+    uint8_t *subChars;
+    int32_t cloneSize, length8;
+
+    /* Let the following functions check all arguments. */
+    cloneSize = sizeof(cloneBuffer);
+    clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err);
+    ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err);
+    length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err);
+    ucnv_close(clone);
+    if (U_FAILURE(*err)) {
+        return;
     }
 
-    if(cnv==NULL || displayNameCapacity<0 || (displayNameCapacity>0 && displayName==NULL)) {
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
-        return 0;
+    if (cnv->sharedData->impl->writeSub == NULL
+#if !UCONFIG_NO_LEGACY_CONVERSION
+        || (cnv->sharedData->staticData->conversionType == UCNV_MBCS &&
+         ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL)
+#endif
+    ) {
+        /* The converter is not stateful. Store the charset bytes as a fixed string. */
+        subChars = (uint8_t *)chars;
+    } else {
+        /*
+         * The converter has a non-default writeSub() function, indicating
+         * that it is stateful.
+         * Store the Unicode string for on-the-fly conversion for correct
+         * state handling.
+         */
+        if (length > UCNV_ERROR_BUFFER_LENGTH) {
+            /*
+             * Should not occur. The converter should output at least one byte
+             * per UChar, which means that ucnv_fromUChars() should catch all
+             * overflows.
+             */
+            *err = U_BUFFER_OVERFLOW_ERROR;
+            return;
+        }
+        subChars = (uint8_t *)s;
+        if (length < 0) {
+            length = u_strlen(s);
+        }
+        length8 = length * U_SIZEOF_UCHAR;
     }
 
-    /* open the resource bundle and get the display name string */
-    rb=ures_open(NULL, displayLocale, pErrorCode);
-    if(U_FAILURE(*pErrorCode)) {
-        return 0;
+    /*
+     * For storing the substitution string, select either the small buffer inside
+     * UConverter or allocate a subChars buffer.
+     */
+    if (length8 > UCNV_MAX_SUBCHAR_LEN) {
+        /* Use a separate buffer for the string. Outside UConverter to not make it too large. */
+        if (cnv->subChars == (uint8_t *)cnv->subUChars) {
+            /* Allocate a new buffer for the string. */
+            cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
+            if (cnv->subChars == NULL) {
+                cnv->subChars = (uint8_t *)cnv->subUChars;
+                *err = U_MEMORY_ALLOCATION_ERROR;
+                return;
+            }
+            uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
+        }
     }
 
-    /* use the internal name as the key */
-    name=ures_getStringByKey(rb, cnv->sharedData->staticData->name, &length, pErrorCode);
-    ures_close(rb);
-
-    if(U_SUCCESS(*pErrorCode)) {
-        /* copy the string */
-        u_memcpy(displayName, name, uprv_min(length, displayNameCapacity)*U_SIZEOF_UCHAR);
+    /* Copy the substitution string into the UConverter or its subChars buffer. */
+    if (length8 == 0) {
+        cnv->subCharLen = 0;
     } else {
-        /* convert the internal name into a Unicode string */
-        *pErrorCode=U_ZERO_ERROR;
-        length=uprv_strlen(cnv->sharedData->staticData->name);
-        u_charsToUChars(cnv->sharedData->staticData->name, displayName, uprv_min(length, displayNameCapacity));
+        uprv_memcpy(cnv->subChars, subChars, length8);
+        if (subChars == (uint8_t *)chars) {
+            cnv->subCharLen = (int8_t)length8;
+        } else /* subChars == s */ {
+            cnv->subCharLen = (int8_t)-length;
+        }
     }
-    return u_terminateUChars(displayName, displayNameCapacity, length, pErrorCode);
+
+    /* See comment in ucnv_setSubstChars(). */
+    cnv->subChar1 = 0;
 }
 
 /*resets the internal states of a converter
  *goal : have the same behaviour than a freshly created converter
  */
-static void _reset(UConverter *converter, UConverterResetChoice choice) {
-    /* first, notify the callback functions that the converter is reset */
-    UConverterToUnicodeArgs toUArgs = {
-        sizeof(UConverterToUnicodeArgs),
-            TRUE,
-            NULL,
-            NULL,
-            NULL,
-            NULL,
-            NULL,
-            NULL
-    };
-    UConverterFromUnicodeArgs fromUArgs = {
-        sizeof(UConverterFromUnicodeArgs),
-            TRUE,
-            NULL,
-            NULL,
-            NULL,
-            NULL,
-            NULL,
-            NULL
-    };
-    UErrorCode errorCode;
-
+static void _reset(UConverter *converter, UConverterResetChoice choice,
+                   UBool callCallback) {
     if(converter == NULL) {
         return;
     }
 
-    toUArgs.converter = fromUArgs.converter = converter;
-    if(choice<=UCNV_RESET_TO_UNICODE) {
-        errorCode = U_ZERO_ERROR;
-        converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
-    }
-    if(choice!=UCNV_RESET_TO_UNICODE) {
-        errorCode = U_ZERO_ERROR;
-        converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
+    if(callCallback) {
+        /* first, notify the callback functions that the converter is reset */
+        UErrorCode errorCode;
+
+        if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
+            UConverterToUnicodeArgs toUArgs = {
+                sizeof(UConverterToUnicodeArgs),
+                TRUE,
+                NULL,
+                NULL,
+                NULL,
+                NULL,
+                NULL,
+                NULL
+            };
+            toUArgs.converter = converter;
+            errorCode = U_ZERO_ERROR;
+            converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
+        }
+        if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
+            UConverterFromUnicodeArgs fromUArgs = {
+                sizeof(UConverterFromUnicodeArgs),
+                TRUE,
+                NULL,
+                NULL,
+                NULL,
+                NULL,
+                NULL,
+                NULL
+            };
+            fromUArgs.converter = converter;
+            errorCode = U_ZERO_ERROR;
+            converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
+        }
     }
 
     /* now reset the converter itself */
     if(choice<=UCNV_RESET_TO_UNICODE) {
         converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
+        converter->mode = 0;
         converter->toULength = 0;
         converter->invalidCharLength = converter->UCharErrorBufferLength = 0;
+        converter->preToULength = 0;
     }
     if(choice!=UCNV_RESET_TO_UNICODE) {
         converter->fromUnicodeStatus = 0;
-        converter->fromUSurrogateLead = 0;
+        converter->fromUChar32 = 0;
         converter->invalidUCharLength = converter->charErrorBufferLength = 0;
+        converter->preFromUFirstCP = U_SENTINEL;
+        converter->preFromULength = 0;
     }
 
     if (converter->sharedData->impl->reset != NULL) {
         /* call the custom reset function */
         converter->sharedData->impl->reset(converter, choice);
-    } else if(choice<=UCNV_RESET_TO_UNICODE) {
-        converter->mode = UCNV_SI;
     }
 }
 
 U_CAPI void  U_EXPORT2
 ucnv_reset(UConverter *converter)
 {
-    _reset(converter, UCNV_RESET_BOTH);
+    _reset(converter, UCNV_RESET_BOTH, TRUE);
 }
 
 U_CAPI void  U_EXPORT2
 ucnv_resetToUnicode(UConverter *converter)
 {
-    _reset(converter, UCNV_RESET_TO_UNICODE);
+    _reset(converter, UCNV_RESET_TO_UNICODE, TRUE);
 }
 
 U_CAPI void  U_EXPORT2
 ucnv_resetFromUnicode(UConverter *converter)
 {
-    _reset(converter, UCNV_RESET_FROM_UNICODE);
+    _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE);
 }
 
 U_CAPI int8_t   U_EXPORT2
 ucnv_getMaxCharSize (const UConverter * converter)
 {
-    return converter->sharedData->staticData->maxBytesPerChar;
+    return converter->maxBytesPerUChar;
 }
 
 
@@ -663,14 +669,27 @@ ucnv_getName (const UConverter * converter, UErrorCode * err)
     return converter->sharedData->staticData->name;
 }
 
-U_CAPI int32_t   U_EXPORT2
-ucnv_getCCSID (const UConverter * converter,
-                        UErrorCode * err)
+U_CAPI int32_t U_EXPORT2
+ucnv_getCCSID(const UConverter * converter,
+              UErrorCode * err)
 {
+    int32_t ccsid;
     if (U_FAILURE (*err))
         return -1;
 
-    return converter->sharedData->staticData->codepage;
+    ccsid = converter->sharedData->staticData->codepage;
+    if (ccsid == 0) {
+        /* Rare case. This is for cases like gb18030,
+        which doesn't have an IBM canonical name, but does have an IBM alias. */
+        const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err);
+        if (U_SUCCESS(*err) && standardName) {
+            const char *ccsidStr = uprv_strchr(standardName, '-');
+            if (ccsidStr) {
+                ccsid = (int32_t)atol(ccsidStr+1);  /* +1 to skip '-' */
+            }
+        }
+    }
+    return ccsid;
 }
 
 
@@ -684,33 +703,6 @@ ucnv_getPlatform (const UConverter * converter,
     return (UConverterPlatform)converter->sharedData->staticData->platform;
 }
 
-U_CAPI void U_EXPORT2
-ucnv_getUnicodeSet(const UConverter *cnv,
-                   USet *set,
-                   UConverterUnicodeSet which,
-                   UErrorCode *pErrorCode) {
-    /* argument checking */
-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
-        return;
-    }
-    if(cnv==NULL || set==NULL || which<UCNV_ROUNDTRIP_SET || UCNV_SET_COUNT<=which) {
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
-        return;
-    }
-
-    /* does this converter support this function? */
-    if(cnv->sharedData->impl->getUnicodeSet==NULL) {
-        *pErrorCode=U_UNSUPPORTED_ERROR;
-        return;
-    }
-
-    /* empty the set */
-    uset_clear(set);
-
-    /* call the converter to add the code points it supports */
-    cnv->sharedData->impl->getUnicodeSet(cnv, set, which, pErrorCode);
-}
-
 U_CAPI void U_EXPORT2
     ucnv_getToUCallBack (const UConverter * converter,
                          UConverterToUCallback *action,
@@ -761,203 +753,966 @@ ucnv_setFromUCallBack (UConverter * converter,
     converter->fromUContext = newContext;
 }
 
-U_CAPI void  U_EXPORT2
-ucnv_fromUnicode (UConverter * _this,
-                       char **target,
-                       const char *targetLimit,
-                       const UChar ** source,
-                       const UChar * sourceLimit,
-                       int32_t* offsets,
-                       UBool flush,
-                       UErrorCode * err)
-{
-    UConverterFromUnicodeArgs args;
-    const char *t;
+static void
+_updateOffsets(int32_t *offsets, int32_t length,
+               int32_t sourceIndex, int32_t errorInputLength) {
+    int32_t *limit;
+    int32_t delta, offset;
+
+    if(sourceIndex>=0) {
+        /*
+         * adjust each offset by adding the previous sourceIndex
+         * minus the length of the input sequence that caused an
+         * error, if any
+         */
+        delta=sourceIndex-errorInputLength;
+    } else {
+        /*
+         * set each offset to -1 because this conversion function
+         * does not handle offsets
+         */
+        delta=-1;
+    }
+
+    limit=offsets+length;
+    if(delta==0) {
+        /* most common case, nothing to do */
+    } else if(delta>0) {
+        /* add the delta to each offset (but not if the offset is <0) */
+        while(offsets<limit) {
+            offset=*offsets;
+            if(offset>=0) {
+                *offsets=offset+delta;
+            }
+            ++offsets;
+        }
+    } else /* delta<0 */ {
+        /*
+         * set each offset to -1 because this conversion function
+         * does not handle offsets
+         * or the error input sequence started in a previous buffer
+         */
+        while(offsets<limit) {
+            *offsets++=-1;
+        }
+    }
+}
+
+/* ucnv_fromUnicode --------------------------------------------------------- */
+
+/*
+ * Implementation note for m:n conversions
+ *
+ * While collecting source units to find the longest match for m:n conversion,
+ * some source units may need to be stored for a partial match.
+ * When a second buffer does not yield a match on all of the previously stored
+ * source units, then they must be "replayed", i.e., fed back into the converter.
+ *
+ * The code relies on the fact that replaying will not nest -
+ * converting a replay buffer will not result in a replay.
+ * This is because a replay is necessary only after the _continuation_ of a
+ * partial match failed, but a replay buffer is converted as a whole.
+ * It may result in some of its units being stored again for a partial match,
+ * but there will not be a continuation _during_ the replay which could fail.
+ *
+ * It is conceivable that a callback function could call the converter
+ * recursively in a way that causes another replay to be stored, but that
+ * would be an error in the callback function.
+ * Such violations will cause assertion failures in a debug build,
+ * and wrong output, but they will not cause a crash.
+ */
+
+static void
+_fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
+    UConverterFromUnicode fromUnicode;
+    UConverter *cnv;
+    const UChar *s;
+    char *t;
+    int32_t *offsets;
+    int32_t sourceIndex;
+    int32_t errorInputLength;
+    UBool converterSawEndOfInput, calledCallback;
+
+    /* variables for m:n conversion */
+    UChar replay[UCNV_EXT_MAX_UCHARS];
+    const UChar *realSource, *realSourceLimit;
+    int32_t realSourceIndex;
+    UBool realFlush;
+
+    cnv=pArgs->converter;
+    s=pArgs->source;
+    t=pArgs->target;
+    offsets=pArgs->offsets;
+
+    /* get the converter implementation function */
+    sourceIndex=0;
+    if(offsets==NULL) {
+        fromUnicode=cnv->sharedData->impl->fromUnicode;
+    } else {
+        fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;
+        if(fromUnicode==NULL) {
+            /* there is no WithOffsets implementation */
+            fromUnicode=cnv->sharedData->impl->fromUnicode;
+            /* we will write -1 for each offset */
+            sourceIndex=-1;
+        }
+    }
+
+    if(cnv->preFromULength>=0) {
+        /* normal mode */
+        realSource=NULL;
+
+        /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
+        realSourceLimit=NULL;
+        realFlush=FALSE;
+        realSourceIndex=0;
+    } else {
+        /*
+         * Previous m:n conversion stored source units from a partial match
+         * and failed to consume all of them.
+         * We need to "replay" them from a temporary buffer and convert them first.
+         */
+        realSource=pArgs->source;
+        realSourceLimit=pArgs->sourceLimit;
+        realFlush=pArgs->flush;
+        realSourceIndex=sourceIndex;
+
+        uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
+        pArgs->source=replay;
+        pArgs->sourceLimit=replay-cnv->preFromULength;
+        pArgs->flush=FALSE;
+        sourceIndex=-1;
+
+        cnv->preFromULength=0;
+    }
+
+    /*
+     * loop for conversion and error handling
+     *
+     * loop {
+     *   convert
+     *   loop {
+     *     update offsets
+     *     handle end of input
+     *     handle errors/call callback
+     *   }
+     * }
+     */
+    for(;;) {
+        if(U_SUCCESS(*err)) {
+            /* convert */
+            fromUnicode(pArgs, err);
+
+            /*
+             * set a flag for whether the converter
+             * successfully processed the end of the input
+             *
+             * need not check cnv->preFromULength==0 because a replay (<0) will cause
+             * s<sourceLimit before converterSawEndOfInput is checked
+             */
+            converterSawEndOfInput=
+                (UBool)(U_SUCCESS(*err) &&
+                        pArgs->flush && pArgs->source==pArgs->sourceLimit &&
+                        cnv->fromUChar32==0);
+        } else {
+            /* handle error from ucnv_convertEx() */
+            converterSawEndOfInput=FALSE;
+        }
+
+        /* no callback called yet for this iteration */
+        calledCallback=FALSE;
+
+        /* no sourceIndex adjustment for conversion, only for callback output */
+        errorInputLength=0;
+
+        /*
+         * loop for offsets and error handling
+         *
+         * iterates at most 3 times:
+         * 1. to clean up after the conversion function
+         * 2. after the callback
+         * 3. after the callback again if there was truncated input
+         */
+        for(;;) {
+            /* update offsets if we write any */
+            if(offsets!=NULL) {
+                int32_t length=(int32_t)(pArgs->target-t);
+                if(length>0) {
+                    _updateOffsets(offsets, length, sourceIndex, errorInputLength);
+
+                    /*
+                     * if a converter handles offsets and updates the offsets
+                     * pointer at the end, then pArgs->offset should not change
+                     * here;
+                     * however, some converters do not handle offsets at all
+                     * (sourceIndex<0) or may not update the offsets pointer
+                     */
+                    pArgs->offsets=offsets+=length;
+                }
+
+                if(sourceIndex>=0) {
+                    sourceIndex+=(int32_t)(pArgs->source-s);
+                }
+            }
+
+            if(cnv->preFromULength<0) {
+                /*
+                 * switch the source to new replay units (cannot occur while replaying)
+                 * after offset handling and before end-of-input and callback handling
+                 */
+                if(realSource==NULL) {
+                    realSource=pArgs->source;
+                    realSourceLimit=pArgs->sourceLimit;
+                    realFlush=pArgs->flush;
+                    realSourceIndex=sourceIndex;
+
+                    uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
+                    pArgs->source=replay;
+                    pArgs->sourceLimit=replay-cnv->preFromULength;
+                    pArgs->flush=FALSE;
+                    if((sourceIndex+=cnv->preFromULength)<0) {
+                        sourceIndex=-1;
+                    }
+
+                    cnv->preFromULength=0;
+                } else {
+                    /* see implementation note before _fromUnicodeWithCallback() */
+                    U_ASSERT(realSource==NULL);
+                    *err=U_INTERNAL_PROGRAM_ERROR;
+                }
+            }
+
+            /* update pointers */
+            s=pArgs->source;
+            t=pArgs->target;
+
+            if(U_SUCCESS(*err)) {
+                if(s<pArgs->sourceLimit) {
+                    /*
+                     * continue with the conversion loop while there is still input left
+                     * (continue converting by breaking out of only the inner loop)
+                     */
+                    break;
+                } else if(realSource!=NULL) {
+                    /* switch back from replaying to the real source and continue */
+                    pArgs->source=realSource;
+                    pArgs->sourceLimit=realSourceLimit;
+                    pArgs->flush=realFlush;
+                    sourceIndex=realSourceIndex;
+
+                    realSource=NULL;
+                    break;
+                } else if(pArgs->flush && cnv->fromUChar32!=0) {
+                    /*
+                     * the entire input stream is consumed
+                     * and there is a partial, truncated input sequence left
+                     */
+
+                    /* inject an error and continue with callback handling */
+                    *err=U_TRUNCATED_CHAR_FOUND;
+                    calledCallback=FALSE; /* new error condition */
+                } else {
+                    /* input consumed */
+                    if(pArgs->flush) {
+                        /*
+                         * return to the conversion loop once more if the flush
+                         * flag is set and the conversion function has not
+                         * successfully processed the end of the input yet
+                         *
+                         * (continue converting by breaking out of only the inner loop)
+                         */
+                        if(!converterSawEndOfInput) {
+                            break;
+                        }
+
+                        /* reset the converter without calling the callback function */
+                        _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE);
+                    }
+
+                    /* done successfully */
+                    return;
+                }
+            }
+
+            /* U_FAILURE(*err) */
+            {
+                UErrorCode e;
+
+                if( calledCallback ||
+                    (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
+                    (e!=U_INVALID_CHAR_FOUND &&
+                     e!=U_ILLEGAL_CHAR_FOUND &&
+                     e!=U_TRUNCATED_CHAR_FOUND)
+                ) {
+                    /*
+                     * the callback did not or cannot resolve the error:
+                     * set output pointers and return
+                     *
+                     * the check for buffer overflow is redundant but it is
+                     * a high-runner case and hopefully documents the intent
+                     * well
+                     *
+                     * if we were replaying, then the replay buffer must be
+                     * copied back into the UConverter
+                     * and the real arguments must be restored
+                     */
+                    if(realSource!=NULL) {
+                        int32_t length;
+
+                        U_ASSERT(cnv->preFromULength==0);
+
+                        length=(int32_t)(pArgs->sourceLimit-pArgs->source);
+                        if(length>0) {
+                            uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR);
+                            cnv->preFromULength=(int8_t)-length;
+                        }
+
+                        pArgs->source=realSource;
+                        pArgs->sourceLimit=realSourceLimit;
+                        pArgs->flush=realFlush;
+                    }
+
+                    return;
+                }
+            }
+
+            /* callback handling */
+            {
+                UChar32 codePoint;
+
+                /* get and write the code point */
+                codePoint=cnv->fromUChar32;
+                errorInputLength=0;
+                U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
+                cnv->invalidUCharLength=(int8_t)errorInputLength;
+
+                /* set the converter state to deal with the next character */
+                cnv->fromUChar32=0;
+
+                /* call the callback function */
+                cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
+                    cnv->invalidUCharBuffer, errorInputLength, codePoint,
+                    *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,
+                    err);
+            }
+
+            /*
+             * loop back to the offset handling
+             *
+             * this flag will indicate after offset handling
+             * that a callback was called;
+             * if the callback did not resolve the error, then we return
+             */
+            calledCallback=TRUE;
+        }
+    }
+}
+
+/*
+ * Output the fromUnicode overflow buffer.
+ * Call this function if(cnv->charErrorBufferLength>0).
+ * @return TRUE if overflow
+ */
+static UBool
+ucnv_outputOverflowFromUnicode(UConverter *cnv,
+                               char **target, const char *targetLimit,
+                               int32_t **pOffsets,
+                               UErrorCode *err) {
+    int32_t *offsets;
+    char *overflow, *t;
+    int32_t i, length;
+
+    t=*target;
+    if(pOffsets!=NULL) {
+        offsets=*pOffsets;
+    } else {
+        offsets=NULL;
+    }
+
+    overflow=(char *)cnv->charErrorBuffer;
+    length=cnv->charErrorBufferLength;
+    i=0;
+    while(i<length) {
+        if(t==targetLimit) {
+            /* the overflow buffer contains too much, keep the rest */
+            int32_t j=0;
+
+            do {
+                overflow[j++]=overflow[i++];
+            } while(i<length);
+
+            cnv->charErrorBufferLength=(int8_t)j;
+            *target=t;
+            if(offsets!=NULL) {
+                *pOffsets=offsets;
+            }
+            *err=U_BUFFER_OVERFLOW_ERROR;
+            return TRUE;
+        }
+
+        /* copy the overflow contents to the target */
+        *t++=overflow[i++];
+        if(offsets!=NULL) {
+            *offsets++=-1; /* no source index available for old output */
+        }
+    }
+
+    /* the overflow buffer is completely copied to the target */
+    cnv->charErrorBufferLength=0;
+    *target=t;
+    if(offsets!=NULL) {
+        *pOffsets=offsets;
+    }
+    return FALSE;
+}
+
+U_CAPI void U_EXPORT2
+ucnv_fromUnicode(UConverter *cnv,
+                 char **target, const char *targetLimit,
+                 const UChar **source, const UChar *sourceLimit,
+                 int32_t *offsets,
+                 UBool flush,
+                 UErrorCode *err) {
+    UConverterFromUnicodeArgs args;
+    const UChar *s;
+    char *t;
+
+    /* check parameters */
+    if(err==NULL || U_FAILURE(*err)) {
+        return;
+    }
+
+    if(cnv==NULL || target==NULL || source==NULL) {
+        *err=U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+
+    s=*source;
+    t=*target;
+
+    if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) {
+        /*
+        Prevent code from going into an infinite loop in case we do hit this
+        limit. The limit pointer is expected to be on a UChar * boundary.
+        This also prevents the next argument check from failing.
+        */
+        sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1);
+    }
+
+    /*
+     * All these conditions should never happen.
+     *
+     * 1) Make sure that the limits are >= to the address source or target
+     *
+     * 2) Make sure that the buffer sizes do not exceed the number range for
+     * int32_t because some functions use the size (in units or bytes)
+     * rather than comparing pointers, and because offsets are int32_t values.
+     *
+     * size_t is guaranteed to be unsigned and large enough for the job.
+     *
+     * Return with an error instead of adjusting the limits because we would
+     * not be able to maintain the semantics that either the source must be
+     * consumed or the target filled (unless an error occurs).
+     * An adjustment would be targetLimit=t+0x7fffffff; for example.
+     *
+     * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
+     * to a char * pointer and provide an incomplete UChar code unit.
+     */
+    if (sourceLimit<s || targetLimit<t ||
+        ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||
+        ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) ||
+        (((const char *)sourceLimit-(const char *)s) & 1) != 0)
+    {
+        *err=U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+    
+    /* output the target overflow buffer */
+    if( cnv->charErrorBufferLength>0 &&
+        ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err)
+    ) {
+        /* U_BUFFER_OVERFLOW_ERROR */
+        return;
+    }
+    /* *target may have moved, therefore stop using t */
+
+    if(!flush && s==sourceLimit && cnv->preFromULength>=0) {
+        /* the overflow buffer is emptied and there is no new input: we are done */
+        return;
+    }
+
+    /*
+     * Do not simply return with a buffer overflow error if
+     * !flush && t==targetLimit
+     * because it is possible that the source will not generate any output.
+     * For example, the skip callback may be called;
+     * it does not output anything.
+     */
+
+    /* prepare the converter arguments */
+    args.converter=cnv;
+    args.flush=flush;
+    args.offsets=offsets;
+    args.source=s;
+    args.sourceLimit=sourceLimit;
+    args.target=*target;
+    args.targetLimit=targetLimit;
+    args.size=sizeof(args);
+
+    _fromUnicodeWithCallback(&args, err);
+
+    *source=args.source;
+    *target=args.target;
+}
+
+/* ucnv_toUnicode() --------------------------------------------------------- */
+
+static void
+_toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
+    UConverterToUnicode toUnicode;
+    UConverter *cnv;
+    const char *s;
+    UChar *t;
+    int32_t *offsets;
+    int32_t sourceIndex;
+    int32_t errorInputLength;
+    UBool converterSawEndOfInput, calledCallback;
+
+    /* variables for m:n conversion */
+    char replay[UCNV_EXT_MAX_BYTES];
+    const char *realSource, *realSourceLimit;
+    int32_t realSourceIndex;
+    UBool realFlush;
+
+    cnv=pArgs->converter;
+    s=pArgs->source;
+    t=pArgs->target;
+    offsets=pArgs->offsets;
+
+    /* get the converter implementation function */
+    sourceIndex=0;
+    if(offsets==NULL) {
+        toUnicode=cnv->sharedData->impl->toUnicode;
+    } else {
+        toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;
+        if(toUnicode==NULL) {
+            /* there is no WithOffsets implementation */
+            toUnicode=cnv->sharedData->impl->toUnicode;
+            /* we will write -1 for each offset */
+            sourceIndex=-1;
+        }
+    }
+
+    if(cnv->preToULength>=0) {
+        /* normal mode */
+        realSource=NULL;
+
+        /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
+        realSourceLimit=NULL;
+        realFlush=FALSE;
+        realSourceIndex=0;
+    } else {
+        /*
+         * Previous m:n conversion stored source units from a partial match
+         * and failed to consume all of them.
+         * We need to "replay" them from a temporary buffer and convert them first.
+         */
+        realSource=pArgs->source;
+        realSourceLimit=pArgs->sourceLimit;
+        realFlush=pArgs->flush;
+        realSourceIndex=sourceIndex;
+
+        uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
+        pArgs->source=replay;
+        pArgs->sourceLimit=replay-cnv->preToULength;
+        pArgs->flush=FALSE;
+        sourceIndex=-1;
+
+        cnv->preToULength=0;
+    }
+
+    /*
+     * loop for conversion and error handling
+     *
+     * loop {
+     *   convert
+     *   loop {
+     *     update offsets
+     *     handle end of input
+     *     handle errors/call callback
+     *   }
+     * }
+     */
+    for(;;) {
+        if(U_SUCCESS(*err)) {
+            /* convert */
+            toUnicode(pArgs, err);
+
+            /*
+             * set a flag for whether the converter
+             * successfully processed the end of the input
+             *
+             * need not check cnv->preToULength==0 because a replay (<0) will cause
+             * s<sourceLimit before converterSawEndOfInput is checked
+             */
+            converterSawEndOfInput=
+                (UBool)(U_SUCCESS(*err) &&
+                        pArgs->flush && pArgs->source==pArgs->sourceLimit &&
+                        cnv->toULength==0);
+        } else {
+            /* handle error from getNextUChar() or ucnv_convertEx() */
+            converterSawEndOfInput=FALSE;
+        }
+
+        /* no callback called yet for this iteration */
+        calledCallback=FALSE;
+
+        /* no sourceIndex adjustment for conversion, only for callback output */
+        errorInputLength=0;
+
+        /*
+         * loop for offsets and error handling
+         *
+         * iterates at most 3 times:
+         * 1. to clean up after the conversion function
+         * 2. after the callback
+         * 3. after the callback again if there was truncated input
+         */
+        for(;;) {
+            /* update offsets if we write any */
+            if(offsets!=NULL) {
+                int32_t length=(int32_t)(pArgs->target-t);
+                if(length>0) {
+                    _updateOffsets(offsets, length, sourceIndex, errorInputLength);
+
+                    /*
+                     * if a converter handles offsets and updates the offsets
+                     * pointer at the end, then pArgs->offset should not change
+                     * here;
+                     * however, some converters do not handle offsets at all
+                     * (sourceIndex<0) or may not update the offsets pointer
+                     */
+                    pArgs->offsets=offsets+=length;
+                }
+
+                if(sourceIndex>=0) {
+                    sourceIndex+=(int32_t)(pArgs->source-s);
+                }
+            }
+
+            if(cnv->preToULength<0) {
+                /*
+                 * switch the source to new replay units (cannot occur while replaying)
+                 * after offset handling and before end-of-input and callback handling
+                 */
+                if(realSource==NULL) {
+                    realSource=pArgs->source;
+                    realSourceLimit=pArgs->sourceLimit;
+                    realFlush=pArgs->flush;
+                    realSourceIndex=sourceIndex;
+
+                    uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
+                    pArgs->source=replay;
+                    pArgs->sourceLimit=replay-cnv->preToULength;
+                    pArgs->flush=FALSE;
+                    if((sourceIndex+=cnv->preToULength)<0) {
+                        sourceIndex=-1;
+                    }
+
+                    cnv->preToULength=0;
+                } else {
+                    /* see implementation note before _fromUnicodeWithCallback() */
+                    U_ASSERT(realSource==NULL);
+                    *err=U_INTERNAL_PROGRAM_ERROR;
+                }
+            }
 
-    /*
-    * Check parameters in for all conversions
-    */
-    if (err == NULL || U_FAILURE (*err)) {
-        return;
-    }
+            /* update pointers */
+            s=pArgs->source;
+            t=pArgs->target;
+
+            if(U_SUCCESS(*err)) {
+                if(s<pArgs->sourceLimit) {
+                    /*
+                     * continue with the conversion loop while there is still input left
+                     * (continue converting by breaking out of only the inner loop)
+                     */
+                    break;
+                } else if(realSource!=NULL) {
+                    /* switch back from replaying to the real source and continue */
+                    pArgs->source=realSource;
+                    pArgs->sourceLimit=realSourceLimit;
+                    pArgs->flush=realFlush;
+                    sourceIndex=realSourceIndex;
+
+                    realSource=NULL;
+                    break;
+                } else if(pArgs->flush && cnv->toULength>0) {
+                    /*
+                     * the entire input stream is consumed
+                     * and there is a partial, truncated input sequence left
+                     */
+
+                    /* inject an error and continue with callback handling */
+                    *err=U_TRUNCATED_CHAR_FOUND;
+                    calledCallback=FALSE; /* new error condition */
+                } else {
+                    /* input consumed */
+                    if(pArgs->flush) {
+                        /*
+                         * return to the conversion loop once more if the flush
+                         * flag is set and the conversion function has not
+                         * successfully processed the end of the input yet
+                         *
+                         * (continue converting by breaking out of only the inner loop)
+                         */
+                        if(!converterSawEndOfInput) {
+                            break;
+                        }
+
+                        /* reset the converter without calling the callback function */
+                        _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
+                    }
+
+                    /* done successfully */
+                    return;
+                }
+            }
 
-    if (_this == NULL || target == NULL || source == NULL) {
-        *err = U_ILLEGAL_ARGUMENT_ERROR;
-        return;
-    }
+            /* U_FAILURE(*err) */
+            {
+                UErrorCode e;
+
+                if( calledCallback ||
+                    (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
+                    (e!=U_INVALID_CHAR_FOUND &&
+                     e!=U_ILLEGAL_CHAR_FOUND &&
+                     e!=U_TRUNCATED_CHAR_FOUND &&
+                     e!=U_ILLEGAL_ESCAPE_SEQUENCE &&
+                     e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)
+                ) {
+                    /*
+                     * the callback did not or cannot resolve the error:
+                     * set output pointers and return
+                     *
+                     * the check for buffer overflow is redundant but it is
+                     * a high-runner case and hopefully documents the intent
+                     * well
+                     *
+                     * if we were replaying, then the replay buffer must be
+                     * copied back into the UConverter
+                     * and the real arguments must be restored
+                     */
+                    if(realSource!=NULL) {
+                        int32_t length;
+
+                        U_ASSERT(cnv->preToULength==0);
+
+                        length=(int32_t)(pArgs->sourceLimit-pArgs->source);
+                        if(length>0) {
+                            uprv_memcpy(cnv->preToU, pArgs->source, length);
+                            cnv->preToULength=(int8_t)-length;
+                        }
+
+                        pArgs->source=realSource;
+                        pArgs->sourceLimit=realSourceLimit;
+                        pArgs->flush=realFlush;
+                    }
+
+                    return;
+                }
+            }
 
-    t = *target;
-    if (targetLimit < t || sourceLimit < *source)
-    {
-        *err = U_ILLEGAL_ARGUMENT_ERROR;
-        return;
-    }
+            /* copy toUBytes[] to invalidCharBuffer[] */
+            errorInputLength=cnv->invalidCharLength=cnv->toULength;
+            if(errorInputLength>0) {
+                uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength);
+            }
 
-    /*
-    * Make sure that the target buffer size does not exceed the number range for int32_t
-    * because some functions use the size rather than comparing pointers.
-    * size_t is guaranteed to be unsigned.
-    */
-    if((size_t)(targetLimit - t) > (size_t)0x7fffffff && targetLimit > t)
-    {
-        targetLimit = t + 0x7fffffff;
-    }
-    
-    /*
-    * Deal with stored carry over data.  This is done in the common location
-    * to avoid doing it for each conversion.
-    */
-    if (_this->charErrorBufferLength > 0)
-    {
-        int32_t myTargetIndex = 0;
-        
-        ucnv_flushInternalCharBuffer (_this, 
-                (char *)t,
-                &myTargetIndex,
-                targetLimit - *target,
-                offsets?&offsets:NULL,
+            /* set the converter state to deal with the next character */
+            cnv->toULength=0;
+
+            /* call the callback function */
+            if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) {
+                cnv->toUCallbackReason = UCNV_UNASSIGNED;
+            }
+            cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
+                cnv->invalidCharBuffer, errorInputLength,
+                cnv->toUCallbackReason,
                 err);
-        *target += myTargetIndex;
-        if (U_FAILURE (*err))
-            return;
+            cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */
+
+            /*
+             * loop back to the offset handling
+             *
+             * this flag will indicate after offset handling
+             * that a callback was called;
+             * if the callback did not resolve the error, then we return
+             */
+            calledCallback=TRUE;
+        }
     }
+}
 
-    if(!flush && *source == sourceLimit) {
-        /* the overflow buffer is emptied and there is no new input: we are done */
-        return;
+/*
+ * Output the toUnicode overflow buffer.
+ * Call this function if(cnv->UCharErrorBufferLength>0).
+ * @return TRUE if overflow
+ */
+static UBool
+ucnv_outputOverflowToUnicode(UConverter *cnv,
+                             UChar **target, const UChar *targetLimit,
+                             int32_t **pOffsets,
+                             UErrorCode *err) {
+    int32_t *offsets;
+    UChar *overflow, *t;
+    int32_t i, length;
+
+    t=*target;
+    if(pOffsets!=NULL) {
+        offsets=*pOffsets;
+    } else {
+        offsets=NULL;
     }
 
-    args.converter = _this;
-    args.flush = flush;
-    args.offsets = offsets;
-    args.source = *source;
-    args.sourceLimit = sourceLimit;
-    args.target = *target;
-    args.targetLimit = targetLimit;
-    args.size = sizeof(args);
-    if (offsets)
-    {
-        if (_this->sharedData->impl->fromUnicodeWithOffsets != NULL)
-        {
-            _this->sharedData->impl->fromUnicodeWithOffsets(&args, err);
-            *source = args.source;
-            *target = args.target;
-            return;
-        }
-        else {
-            /* there is no implementation that sets offsets, set them all to -1 */
-            int32_t i, targetSize = targetLimit - *target;
-            
-            for (i=0; i<targetSize; i++) {
-                offsets[i] = -1;
+    overflow=cnv->UCharErrorBuffer;
+    length=cnv->UCharErrorBufferLength;
+    i=0;
+    while(i<length) {
+        if(t==targetLimit) {
+            /* the overflow buffer contains too much, keep the rest */
+            int32_t j=0;
+
+            do {
+                overflow[j++]=overflow[i++];
+            } while(i<length);
+
+            cnv->UCharErrorBufferLength=(int8_t)j;
+            *target=t;
+            if(offsets!=NULL) {
+                *pOffsets=offsets;
             }
+            *err=U_BUFFER_OVERFLOW_ERROR;
+            return TRUE;
         }
-    }
-    
-    /*calls the specific conversion routines */
-    _this->sharedData->impl->fromUnicode(&args, err);
-    *source = args.source;
-    *target = args.target;
-}
 
+        /* copy the overflow contents to the target */
+        *t++=overflow[i++];
+        if(offsets!=NULL) {
+            *offsets++=-1; /* no source index available for old output */
+        }
+    }
 
+    /* the overflow buffer is completely copied to the target */
+    cnv->UCharErrorBufferLength=0;
+    *target=t;
+    if(offsets!=NULL) {
+        *pOffsets=offsets;
+    }
+    return FALSE;
+}
 
-U_CAPI void    U_EXPORT2
-ucnv_toUnicode (UConverter * _this,
-                UChar ** target,
-                const UChar * targetLimit,
-                const char **source,
-                const char *sourceLimit,
-                int32_t* offsets,
-                UBool flush,
-                UErrorCode * err)
-{
+U_CAPI void U_EXPORT2
+ucnv_toUnicode(UConverter *cnv,
+               UChar **target, const UChar *targetLimit,
+               const char **source, const char *sourceLimit,
+               int32_t *offsets,
+               UBool flush,
+               UErrorCode *err) {
     UConverterToUnicodeArgs args;
-    const UChar *t;
+    const char *s;
+    UChar *t;
 
-    /*
-    * Check parameters in for all conversions
-    */
-    if (err == NULL || U_FAILURE (*err)) {
+    /* check parameters */
+    if(err==NULL || U_FAILURE(*err)) {
         return;
     }
 
-    if (_this == NULL || target == NULL || source == NULL) {
-        *err = U_ILLEGAL_ARGUMENT_ERROR;
+    if(cnv==NULL || target==NULL || source==NULL) {
+        *err=U_ILLEGAL_ARGUMENT_ERROR;
         return;
     }
 
-    t = *target;
-    if (targetLimit < t || sourceLimit < *source) {
-        *err = U_ILLEGAL_ARGUMENT_ERROR;
-        return;
-    }
+    s=*source;
+    t=*target;
 
-    /*
-    * Make sure that the target buffer size does not exceed the number range for int32_t
-    * because some functions use the size rather than comparing pointers.
-    * size_t is guaranteed to be unsigned.
-    */
-    if((size_t)(targetLimit - t) > (size_t)0x3fffffff && targetLimit > t) {
-        targetLimit = t + 0x3fffffff;
+    if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) {
+        /*
+        Prevent code from going into an infinite loop in case we do hit this
+        limit. The limit pointer is expected to be on a UChar * boundary.
+        This also prevents the next argument check from failing.
+        */
+        targetLimit = (const UChar *)(((const char *)targetLimit) - 1);
     }
 
     /*
-    * Deal with stored carry over data.  This is done in the common location
-    * to avoid doing it for each conversion.
-    */
-    if (_this->UCharErrorBufferLength > 0)
-    {
-        int32_t myTargetIndex = 0;
-
-        ucnv_flushInternalUnicodeBuffer (_this, 
-                (UChar *)t,
-                &myTargetIndex,
-                targetLimit - *target,
-                offsets?&offsets:NULL,
-                err);
-        *target += myTargetIndex;
-        if (U_FAILURE (*err))
-            return;
+     * All these conditions should never happen.
+     *
+     * 1) Make sure that the limits are >= to the address source or target
+     *
+     * 2) Make sure that the buffer sizes do not exceed the number range for
+     * int32_t because some functions use the size (in units or bytes)
+     * rather than comparing pointers, and because offsets are int32_t values.
+     *
+     * size_t is guaranteed to be unsigned and large enough for the job.
+     *
+     * Return with an error instead of adjusting the limits because we would
+     * not be able to maintain the semantics that either the source must be
+     * consumed or the target filled (unless an error occurs).
+     * An adjustment would be sourceLimit=t+0x7fffffff; for example.
+     *
+     * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
+     * to a char * pointer and provide an incomplete UChar code unit.
+     */
+    if (sourceLimit<s || targetLimit<t ||
+        ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
+        ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) ||
+        (((const char *)targetLimit-(const char *)t) & 1) != 0
+    ) {
+        *err=U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+    
+    /* output the target overflow buffer */
+    if( cnv->UCharErrorBufferLength>0 &&
+        ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err)
+    ) {
+        /* U_BUFFER_OVERFLOW_ERROR */
+        return;
     }
+    /* *target may have moved, therefore stop using t */
 
-    if(!flush && *source == sourceLimit) {
+    if(!flush && s==sourceLimit && cnv->preToULength>=0) {
         /* the overflow buffer is emptied and there is no new input: we are done */
         return;
     }
 
-    args.converter = _this;
-    args.flush = flush;
-    args.offsets = offsets;
-    args.source = (char *) *source;
-    args.sourceLimit = sourceLimit;
-    args.target =  *target;
-    args.targetLimit = targetLimit;
-    args.size = sizeof(args);
-    if (offsets) {
-        if (_this->sharedData->impl->toUnicodeWithOffsets != NULL) {
-            _this->sharedData->impl->toUnicodeWithOffsets(&args, err);
-            *source = args.source;
-            *target = args.target;
-            return;
-        } else {
-            /* there is no implementation that sets offsets, set them all to -1 */
-            int32_t i, targetSize = targetLimit - *target;
-            
-            for (i=0; i<targetSize; i++) {
-                offsets[i] = -1;
-            }
-        }
-    }
+    /*
+     * Do not simply return with a buffer overflow error if
+     * !flush && t==targetLimit
+     * because it is possible that the source will not generate any output.
+     * For example, the skip callback may be called;
+     * it does not output anything.
+     */
 
-    /*calls the specific conversion routines */
-    _this->sharedData->impl->toUnicode(&args, err); 
+    /* prepare the converter arguments */
+    args.converter=cnv;
+    args.flush=flush;
+    args.offsets=offsets;
+    args.source=s;
+    args.sourceLimit=sourceLimit;
+    args.target=*target;
+    args.targetLimit=targetLimit;
+    args.size=sizeof(args);
 
-    *source = args.source;
-    *target = args.target;
-    return;
+    _toUnicodeWithCallback(&args, err);
+
+    *source=args.source;
+    *target=args.target;
 }
 
+/* ucnv_to/fromUChars() ----------------------------------------------------- */
+
 U_CAPI int32_t U_EXPORT2
 ucnv_fromUChars(UConverter *cnv,
                 char *dest, int32_t destCapacity,
@@ -1044,7 +1799,7 @@ ucnv_toUChars(UConverter *cnv,
     ucnv_resetToUnicode(cnv);
     originalDest=dest;
     if(srcLength==-1) {
-        srcLength=uprv_strlen(src);
+        srcLength=(int32_t)uprv_strlen(src);
     }
     if(srcLength>0) {
         srcLimit=src+srcLength;
@@ -1080,65 +1835,213 @@ ucnv_toUChars(UConverter *cnv,
     return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode);
 }
 
-U_CAPI UChar32  U_EXPORT2
-ucnv_getNextUChar(UConverter * converter,
-                  const char **source,
-                  const char *sourceLimit,
-                  UErrorCode * err)
-{
+/* ucnv_getNextUChar() ------------------------------------------------------ */
+
+U_CAPI UChar32 U_EXPORT2
+ucnv_getNextUChar(UConverter *cnv,
+                  const char **source, const char *sourceLimit,
+                  UErrorCode *err) {
     UConverterToUnicodeArgs args;
-    UChar32 ch;
+    UChar buffer[U16_MAX_LENGTH];
+    const char *s;
+    UChar32 c;
+    int32_t i, length;
 
-    if(err == NULL || U_FAILURE(*err)) {
+    /* check parameters */
+    if(err==NULL || U_FAILURE(*err)) {
         return 0xffff;
     }
 
-    if(converter == NULL || source == NULL || sourceLimit < *source) {
-        *err = U_ILLEGAL_ARGUMENT_ERROR;
+    if(cnv==NULL || source==NULL) {
+        *err=U_ILLEGAL_ARGUMENT_ERROR;
         return 0xffff;
     }
 
-    /* In case internal data had been stored
-    * we return the first UChar32 in the internal buffer,
-    * and update the internal state accordingly
-    */
-    if (converter->UCharErrorBufferLength > 0)
-    {
-        int32_t i = 0;
-        UChar32 myUChar;
-        UTF_NEXT_CHAR(converter->UCharErrorBuffer, i, sizeof(converter->UCharErrorBuffer), myUChar);
-        /*In this memmove we update the internal buffer by
-        *popping the first character.
-        *Note that in the call itself we decrement
-        *UCharErrorBufferLength
-        */
-        uprv_memmove (converter->UCharErrorBuffer,
-            converter->UCharErrorBuffer + i,
-            (converter->UCharErrorBufferLength - i) * sizeof (UChar));
-        converter->UCharErrorBufferLength -= (int8_t)i;
-        return myUChar;
-    }
-    /*calls the specific conversion routines */
-    /*as dictated in a code review, avoids a switch statement */
-    args.converter = converter;
-    args.flush = TRUE;
-    args.offsets = NULL;
-    args.source = *source;
-    args.sourceLimit = sourceLimit;
-    args.target = NULL;
-    args.targetLimit = NULL;
-    args.size = sizeof(args);
-    if (converter->sharedData->impl->getNextUChar != NULL)
-    {
-        ch = converter->sharedData->impl->getNextUChar(&args, err);
+    s=*source;
+    if(sourceLimit<s) {
+        *err=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0xffff;
+    }
+
+    /*
+     * Make sure that the buffer sizes do not exceed the number range for
+     * int32_t because some functions use the size (in units or bytes)
+     * rather than comparing pointers, and because offsets are int32_t values.
+     *
+     * size_t is guaranteed to be unsigned and large enough for the job.
+     *
+     * Return with an error instead of adjusting the limits because we would
+     * not be able to maintain the semantics that either the source must be
+     * consumed or the target filled (unless an error occurs).
+     * An adjustment would be sourceLimit=t+0x7fffffff; for example.
+     */
+    if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {
+        *err=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0xffff;
+    }
+
+    c=U_SENTINEL;
+
+    /* flush the target overflow buffer */
+    if(cnv->UCharErrorBufferLength>0) {
+        UChar *overflow;
+
+        overflow=cnv->UCharErrorBuffer;
+        i=0;
+        length=cnv->UCharErrorBufferLength;
+        U16_NEXT(overflow, i, length, c);
+
+        /* move the remaining overflow contents up to the beginning */
+        if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {
+            uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,
+                         cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
+        }
+
+        if(!U16_IS_LEAD(c) || i<length) {
+            return c;
+        }
+        /*
+         * Continue if the overflow buffer contained only a lead surrogate,
+         * in case the converter outputs single surrogates from complete
+         * input sequences.
+         */
+    }
+
+    /*
+     * flush==TRUE is implied for ucnv_getNextUChar()
+     *
+     * do not simply return even if s==sourceLimit because the converter may
+     * not have seen flush==TRUE before
+     */
+
+    /* prepare the converter arguments */
+    args.converter=cnv;
+    args.flush=TRUE;
+    args.offsets=NULL;
+    args.source=s;
+    args.sourceLimit=sourceLimit;
+    args.target=buffer;
+    args.targetLimit=buffer+1;
+    args.size=sizeof(args);
+
+    if(c<0) {
+        /*
+         * call the native getNextUChar() implementation if we are
+         * at a character boundary (toULength==0)
+         *
+         * unlike with _toUnicode(), getNextUChar() implementations must set
+         * U_TRUNCATED_CHAR_FOUND for truncated input,
+         * in addition to setting toULength/toUBytes[]
+         */
+        if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) {
+            c=cnv->sharedData->impl->getNextUChar(&args, err);
+            *source=s=args.source;
+            if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {
+                /* reset the converter without calling the callback function */
+                _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
+                return 0xffff; /* no output */
+            } else if(U_SUCCESS(*err) && c>=0) {
+                return c;
+            /*
+             * else fall through to use _toUnicode() because
+             *   UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
+             *   U_FAILURE: call _toUnicode() for callback handling (do not output c)
+             */
+            }
+        }
+
+        /* convert to one UChar in buffer[0], or handle getNextUChar() errors */
+        _toUnicodeWithCallback(&args, err);
+
+        if(*err==U_BUFFER_OVERFLOW_ERROR) {
+            *err=U_ZERO_ERROR;
+        }
+
+        i=0;
+        length=(int32_t)(args.target-buffer);
     } else {
-        /* default implementation */
-        ch = ucnv_getNextUCharFromToUImpl(&args, converter->sharedData->impl->toUnicode, FALSE, err);
+        /* write the lead surrogate from the overflow buffer */
+        buffer[0]=(UChar)c;
+        args.target=buffer+1;
+        i=0;
+        length=1;
+    }
+
+    /* buffer contents starts at i and ends before length */
+
+    if(U_FAILURE(*err)) {
+        c=0xffff; /* no output */
+    } else if(length==0) {
+        /* no input or only state changes */
+        *err=U_INDEX_OUTOFBOUNDS_ERROR;
+        /* no need to reset explicitly because _toUnicodeWithCallback() did it */
+        c=0xffff; /* no output */
+    } else {
+        c=buffer[0];
+        i=1;
+        if(!U16_IS_LEAD(c)) {
+            /* consume c=buffer[0], done */
+        } else {
+            /* got a lead surrogate, see if a trail surrogate follows */
+            UChar c2;
+
+            if(cnv->UCharErrorBufferLength>0) {
+                /* got overflow output from the conversion */
+                if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {
+                    /* got a trail surrogate, too */
+                    c=U16_GET_SUPPLEMENTARY(c, c2);
+
+                    /* move the remaining overflow contents up to the beginning */
+                    if((--cnv->UCharErrorBufferLength)>0) {
+                        uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,
+                                     cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
+                    }
+                } else {
+                    /* c is an unpaired lead surrogate, just return it */
+                }
+            } else if(args.source<sourceLimit) {
+                /* convert once more, to buffer[1] */
+                args.targetLimit=buffer+2;
+                _toUnicodeWithCallback(&args, err);
+                if(*err==U_BUFFER_OVERFLOW_ERROR) {
+                    *err=U_ZERO_ERROR;
+                }
+
+                length=(int32_t)(args.target-buffer);
+                if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {
+                    /* got a trail surrogate, too */
+                    c=U16_GET_SUPPLEMENTARY(c, c2);
+                    i=2;
+                }
+            }
+        }
+    }
+
+    /*
+     * move leftover output from buffer[i..length[
+     * into the beginning of the overflow buffer
+     */
+    if(i<length) {
+        /* move further overflow back */
+        int32_t delta=length-i;
+        if((length=cnv->UCharErrorBufferLength)>0) {
+            uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,
+                         length*U_SIZEOF_UCHAR);
+        }
+        cnv->UCharErrorBufferLength=(int8_t)(length+delta);
+
+        cnv->UCharErrorBuffer[0]=buffer[i++];
+        if(delta>1) {
+            cnv->UCharErrorBuffer[1]=buffer[i];
+        }
     }
-    *source = args.source;
-    return ch;
+
+    *source=args.source;
+    return c;
 }
 
+/* ucnv_convert() and siblings ---------------------------------------------- */
+
 U_CAPI void U_EXPORT2
 ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
                char **target, const char *targetLimit,
@@ -1148,7 +2051,14 @@ ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
                UBool reset, UBool flush,
                UErrorCode *pErrorCode) {
     UChar pivotBuffer[CHUNK_SIZE];
-    UChar *myPivotSource, *myPivotTarget;
+    const UChar *myPivotSource;
+    UChar *myPivotTarget;
+    const char *s;
+    char *t;
+
+    UConverterToUnicodeArgs toUArgs;
+    UConverterFromUnicodeArgs fromUArgs;
+    UConverterConvert convert;
 
     /* error checking */
     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
@@ -1163,10 +2073,35 @@ ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
         return;
     }
 
+    s=*source;
+    t=*target;
+    if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+
+    /*
+     * Make sure that the buffer sizes do not exceed the number range for
+     * int32_t. See ucnv_toUnicode() for a more detailed comment.
+     */
+    if(
+        (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) ||
+        ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)
+    ) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+    
     if(pivotStart==NULL) {
+        if(!flush) {
+            /* streaming conversion requires an explicit pivot buffer */
+            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+            return;
+        }
+
         /* use the stack pivot buffer */
-        pivotStart=myPivotSource=myPivotTarget=pivotBuffer;
-        pivotSource=&myPivotSource;
+        myPivotSource=myPivotTarget=pivotStart=pivotBuffer;
+        pivotSource=(UChar **)&myPivotSource;
         pivotTarget=&myPivotTarget;
         pivotLimit=pivotBuffer+CHUNK_SIZE;
     } else if(  pivotStart>=pivotLimit ||
@@ -1186,51 +2121,260 @@ ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
     if(reset) {
         ucnv_resetToUnicode(sourceCnv);
         ucnv_resetFromUnicode(targetCnv);
-        *pivotTarget=*pivotSource=pivotStart;
+        *pivotSource=*pivotTarget=pivotStart;
+    } else if(targetCnv->charErrorBufferLength>0) {
+        /* output the targetCnv overflow buffer */
+        if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) {
+            /* U_BUFFER_OVERFLOW_ERROR */
+            return;
+        }
+        /* *target has moved, therefore stop using t */
+
+        if( !flush &&
+            targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget &&
+            sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit
+        ) {
+            /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */
+            return;
+        }
+    }
+
+    /* Is direct-UTF-8 conversion available? */
+    if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
+        targetCnv->sharedData->impl->fromUTF8!=NULL
+    ) {
+        convert=targetCnv->sharedData->impl->fromUTF8;
+    } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
+               sourceCnv->sharedData->impl->toUTF8!=NULL
+    ) {
+        convert=sourceCnv->sharedData->impl->toUTF8;
+    } else {
+        convert=NULL;
     }
 
-    /* conversion loop */
+    /*
+     * If direct-UTF-8 conversion is available, then we use a smaller
+     * pivot buffer for error handling and partial matches
+     * so that we quickly return to direct conversion.
+     *
+     * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH.
+     *
+     * We could reduce the pivot buffer size further, at the cost of
+     * buffer overflows from callbacks.
+     * The pivot buffer should not be smaller than the maximum number of
+     * fromUnicode extension table input UChars
+     * (for m:n conversion, see
+     * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS])
+     * or 2 for surrogate pairs.
+     *
+     * Too small a buffer can cause thrashing between pivoting and direct
+     * conversion, with function call overhead outweighing the benefits
+     * of direct conversion.
+     */
+    if(convert!=NULL && (pivotLimit-pivotStart)>32) {
+        pivotLimit=pivotStart+32;
+    }
+
+    /* prepare the converter arguments */
+    fromUArgs.converter=targetCnv;
+    fromUArgs.flush=FALSE;
+    fromUArgs.offsets=NULL;
+    fromUArgs.target=*target;
+    fromUArgs.targetLimit=targetLimit;
+    fromUArgs.size=sizeof(fromUArgs);
+
+    toUArgs.converter=sourceCnv;
+    toUArgs.flush=flush;
+    toUArgs.offsets=NULL;
+    toUArgs.source=s;
+    toUArgs.sourceLimit=sourceLimit;
+    toUArgs.targetLimit=pivotLimit;
+    toUArgs.size=sizeof(toUArgs);
+
+    /*
+     * TODO: Consider separating this function into two functions,
+     * extracting exactly the conversion loop,
+     * for readability and to reduce the set of visible variables.
+     *
+     * Otherwise stop using s and t from here on.
+     */
+    s=t=NULL;
+
+    /*
+     * conversion loop
+     *
+     * The sequence of steps in the loop may appear backward,
+     * but the principle is simple:
+     * In the chain of
+     *   source - sourceCnv overflow - pivot - targetCnv overflow - target
+     * empty out later buffers before refilling them from earlier ones.
+     *
+     * The targetCnv overflow buffer is flushed out only once before the loop.
+     */
     for(;;) {
-        if(reset) {
+        /*
+         * if(pivot not empty or error or replay or flush fromUnicode) {
+         *   fromUnicode(pivot -> target);
+         * }
+         *
+         * For pivoting conversion; and for direct conversion for
+         * error callback handling and flushing the replay buffer.
+         */
+        if( *pivotSource<*pivotTarget ||
+            U_FAILURE(*pErrorCode) ||
+            targetCnv->preFromULength<0 ||
+            fromUArgs.flush
+        ) {
+            fromUArgs.source=*pivotSource;
+            fromUArgs.sourceLimit=*pivotTarget;
+            _fromUnicodeWithCallback(&fromUArgs, pErrorCode);
+            if(U_FAILURE(*pErrorCode)) {
+                /* target overflow, or conversion error */
+                *pivotSource=(UChar *)fromUArgs.source;
+                break;
+            }
+
             /*
-             * if we did a reset in this function, we know that there is nothing
-             * to convert to the target yet, so we save a function call
+             * _fromUnicodeWithCallback() must have consumed the pivot contents
+             * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS()
              */
-            reset=FALSE;
-        } else {
+        }
+
+        /* The pivot buffer is empty; reset it so we start at pivotStart. */
+        *pivotSource=*pivotTarget=pivotStart;
+
+        /*
+         * if(sourceCnv overflow buffer not empty) {
+         *     move(sourceCnv overflow buffer -> pivot);
+         *     continue;
+         * }
+         */
+        /* output the sourceCnv overflow buffer */
+        if(sourceCnv->UCharErrorBufferLength>0) {
+            if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) {
+                /* U_BUFFER_OVERFLOW_ERROR */
+                *pErrorCode=U_ZERO_ERROR;
+            }
+            continue;
+        }
+
+        /*
+         * check for end of input and break if done
+         *
+         * Checking both flush and fromUArgs.flush ensures that the converters
+         * have been called with the flush flag set if the ucnv_convertEx()
+         * caller set it.
+         */
+        if( toUArgs.source==sourceLimit &&
+            sourceCnv->preToULength>=0 && sourceCnv->toULength==0 &&
+            (!flush || fromUArgs.flush)
+        ) {
+            /* done successfully */
+            break;
+        }
+
+        /*
+         * use direct conversion if available
+         * but not if continuing a partial match
+         * or flushing the toUnicode replay buffer
+         */
+        if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) {
+            if(*pErrorCode==U_USING_DEFAULT_WARNING) {
+                /* remove a warning that may be set by this function */
+                *pErrorCode=U_ZERO_ERROR;
+            }
+            convert(&fromUArgs, &toUArgs, pErrorCode);
+            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
+                break;
+            } else if(U_FAILURE(*pErrorCode)) {
+                if(sourceCnv->toULength>0) {
+                    /*
+                     * Fall through to calling _toUnicodeWithCallback()
+                     * for callback handling.
+                     *
+                     * The pivot buffer will be reset with
+                     *   *pivotSource=*pivotTarget=pivotStart;
+                     * which indicates a toUnicode error to the caller
+                     * (*pivotSource==pivotStart shows no pivot UChars consumed).
+                     */
+                } else {
+                    /*
+                     * Indicate a fromUnicode error to the caller
+                     * (*pivotSource>pivotStart shows some pivot UChars consumed).
+                     */
+                    *pivotSource=*pivotTarget=pivotStart+1;
+                    /*
+                     * Loop around to calling _fromUnicodeWithCallbacks()
+                     * for callback handling.
+                     */
+                    continue;
+                }
+            } else if(*pErrorCode==U_USING_DEFAULT_WARNING) {
+                /*
+                 * No error, but the implementation requested to temporarily
+                 * fall back to pivoting.
+                 */
+                *pErrorCode=U_ZERO_ERROR;
             /*
-             * convert to the target first in case the pivot is filled at entry
-             * or the targetCnv has some output bytes in its state
+             * The following else branches are almost identical to the end-of-input
+             * handling in _toUnicodeWithCallback().
+             * Avoid calling it just for the end of input.
              */
-            ucnv_fromUnicode(targetCnv,
-                             target, targetLimit,
-                             (const UChar **)pivotSource, *pivotTarget,
-                             NULL,
-                             (UBool)(flush && *source==sourceLimit),
-                             pErrorCode);
-            if(U_FAILURE(*pErrorCode)) {
+            } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */
+                /*
+                 * the entire input stream is consumed
+                 * and there is a partial, truncated input sequence left
+                 */
+
+                /* inject an error and continue with callback handling */
+                *pErrorCode=U_TRUNCATED_CHAR_FOUND;
+            } else {
+                /* input consumed */
+                if(flush) {
+                    /* reset the converters without calling the callback functions */
+                    _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE);
+                    _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE);
+                }
+
+                /* done successfully */
                 break;
             }
-
-            /* ucnv_fromUnicode() must have consumed the pivot contents since it returned with U_SUCCESS() */
-            *pivotSource=*pivotTarget=pivotStart;
         }
-
-        /* convert from the source to the pivot */
-        ucnv_toUnicode(sourceCnv,
-                       pivotTarget, pivotLimit,
-                       source, sourceLimit,
-                       NULL,
-                       flush,
-                       pErrorCode);
+        
+        /*
+         * toUnicode(source -> pivot);
+         *
+         * For pivoting conversion; and for direct conversion for
+         * error callback handling, continuing partial matches
+         * and flushing the replay buffer.
+         *
+         * The pivot buffer is empty and reset.
+         */
+        toUArgs.target=pivotStart; /* ==*pivotTarget */
+        /* toUArgs.targetLimit=pivotLimit; already set before the loop */
+        _toUnicodeWithCallback(&toUArgs, pErrorCode);
+        *pivotTarget=toUArgs.target;
         if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
             /* pivot overflow: continue with the conversion loop */
             *pErrorCode=U_ZERO_ERROR;
-        } else if(U_FAILURE(*pErrorCode) || *pivotTarget==pivotStart) {
+        } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) {
             /* conversion error, or there was nothing left to convert */
             break;
         }
-        /* else ucnv_toUnicode() wrote into the pivot buffer: continue */
+        /*
+         * else:
+         * _toUnicodeWithCallback() wrote into the pivot buffer,
+         * continue with fromUnicode conversion.
+         *
+         * Set the fromUnicode flush flag if we flush and if toUnicode has
+         * processed the end of the input.
+         */
+        if( flush && toUArgs.source==sourceLimit &&
+            sourceCnv->preToULength>=0 &&
+            sourceCnv->UCharErrorBufferLength==0
+        ) {
+            fromUArgs.flush=TRUE;
+        }
     }
 
     /*
@@ -1240,6 +2384,9 @@ ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
      * - a conversion error occurred
      */
 
+    *source=toUArgs.source;
+    *target=fromUArgs.target;
+
     /* terminate the target buffer if possible */
     if(flush && U_SUCCESS(*pErrorCode)) {
         if(*target!=targetLimit) {
@@ -1293,7 +2440,7 @@ ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
                        FALSE,
                        TRUE,
                        pErrorCode);
-        targetLength=myTarget-target;
+        targetLength=(int32_t)(myTarget-target);
     }
 
     /*
@@ -1316,7 +2463,7 @@ ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
                            FALSE,
                            TRUE,
                            pErrorCode);
-            targetLength+=(myTarget-targetBuffer);
+            targetLength+=(int32_t)(myTarget-targetBuffer);
         } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
 
         /* done with preflighting, set warnings and errors as appropriate */
@@ -1463,7 +2610,7 @@ ucnv_getType(const UConverter* converter)
     int8_t type = converter->sharedData->staticData->conversionType;
 #if !UCONFIG_NO_LEGACY_CONVERSION
     if(type == UCNV_MBCS) {
-        return _MBCSGetType(converter);
+        return ucnv_MBCSGetType(converter);
     }
 #endif
     return (UConverterType)type;
@@ -1631,7 +2778,7 @@ ucnv_detectUnicodeSignature( const char* source,
     }
 
     if(sourceLength==-1){
-        sourceLength=uprv_strlen(source);
+        sourceLength=(int32_t)uprv_strlen(source);
     }
 
     
@@ -1682,13 +2829,85 @@ ucnv_detectUnicodeSignature( const char* source,
             *signatureLength=4;
             return "UTF-7";
         }
+    }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){
+        *signatureLength=4;
+        return "UTF-EBCDIC";
     }
 
+
     /* no known Unicode signature byte sequence recognized */
     *signatureLength=0;
     return NULL;
 }
 
+U_CAPI int32_t U_EXPORT2
+ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status)
+{
+    if(status == NULL || U_FAILURE(*status)){
+        return -1;
+    }
+    if(cnv == NULL){
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return -1;
+    }
+
+    if(cnv->preFromUFirstCP >= 0){
+        return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ;
+    }else if(cnv->preFromULength < 0){
+        return -cnv->preFromULength ;
+    }else if(cnv->fromUChar32 > 0){
+        return 1;
+    }
+    return 0; 
+
+}
+
+U_CAPI int32_t U_EXPORT2
+ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){
+
+    if(status == NULL || U_FAILURE(*status)){
+        return -1;
+    }
+    if(cnv == NULL){
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return -1;
+    }
+
+    if(cnv->preToULength > 0){
+        return cnv->preToULength ;
+    }else if(cnv->preToULength < 0){
+        return -cnv->preToULength;
+    }else if(cnv->toULength > 0){
+        return cnv->toULength;
+    }
+    return 0;
+}
+
+U_CAPI UBool U_EXPORT2
+ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){
+    if (U_FAILURE(*status)) {
+        return FALSE;
+    }
+
+    if (cnv == NULL) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return FALSE;
+    }
+
+    switch (ucnv_getType(cnv)) {
+        case UCNV_SBCS:
+        case UCNV_DBCS:
+        case UCNV_UTF32_BigEndian:
+        case UCNV_UTF32_LittleEndian:
+        case UCNV_UTF32:
+        case UCNV_US_ASCII:
+            return TRUE;
+        default:
+            return FALSE;
+    }
+}
+#endif
+
 /*
  * Hey, Emacs, please set the following:
  *