X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..2ca993e82fb37b597a3c73ecd1586a139a6579c5:/icuSources/common/ucnvscsu.c

diff --git a/icuSources/common/ucnvscsu.c b/icuSources/common/ucnvscsu.c
index e02f9a70..15ed9c49 100644
--- a/icuSources/common/ucnvscsu.c
+++ b/icuSources/common/ucnvscsu.c
@@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *
-*   Copyright (C) 2000-2003, International Business Machines
+*   Copyright (C) 2000-2016, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
@@ -20,8 +20,12 @@
 */
 
 #include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
+
 #include "unicode/ucnv.h"
 #include "unicode/ucnv_cb.h"
+#include "unicode/utf16.h"
 #include "ucnv_bld.h"
 #include "ucnv_cnv.h"
 #include "cmemory.h"
@@ -181,16 +185,18 @@ _SCSUReset(UConverter *cnv, UConverterResetChoice choice) {
             break;
         }
 
-        cnv->fromUSurrogateLead=0;
+        cnv->fromUChar32=0;
     }
 }
 
 static void
 _SCSUOpen(UConverter *cnv,
-          const char *name,
-          const char *locale,
-          uint32_t options,
+          UConverterLoadArgs *pArgs,
           UErrorCode *pErrorCode) {
+    const char *locale=pArgs->locale;
+    if(pArgs->onlyTestIsLoadable) {
+        return;
+    }
     cnv->extraInfo=uprv_malloc(sizeof(SCSUData));
     if(cnv->extraInfo!=NULL) {
         if(locale!=NULL && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) {
@@ -202,6 +208,10 @@ _SCSUOpen(UConverter *cnv,
     } else {
         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
     }
+
+    /* Set the substitution character U+fffd as a Unicode string. */
+    cnv->subUChars[0]=0xfffd;
+    cnv->subCharLen=-1;
 }
 
 static void
@@ -216,8 +226,6 @@ _SCSUClose(UConverter *cnv) {
 
 /* SCSU-to-Unicode conversion functions ------------------------------------- */
 
-/* ### TODO check operator precedence | << + < */
-
 static void
 _SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
                           UErrorCode *pErrorCode) {
@@ -272,11 +280,9 @@ _SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
      * The end of the input or output buffer is also handled by the slower loop.
      * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
      *
-     * The callback handling is done by jumping (goto) to the callback section at the end
-     * of the function. From there, it either jumps to here to continue or to
-     * the endloop section to clean up and return.
+     * The callback handling is done by returning with an error code.
+     * The conversion framework actually calls the callback function.
      */
-loop:
     if(isSingleByteMode) {
         /* fast path for single-byte mode */
         if(state==readCommand) {
@@ -367,13 +373,20 @@ singleByteMode:
                     goto fastUnicode;
                 } else /* Srs */ {
                     /* callback(illegal) */
-                    cnv->invalidCharBuffer[0]=b;
-                    cnv->invalidCharLength=1;
-                    goto callback;
+                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                    cnv->toUBytes[0]=b;
+                    cnv->toULength=1;
+                    goto endloop;
                 }
+
+                /* store the first byte of a multibyte sequence in toUBytes[] */
+                cnv->toUBytes[0]=b;
+                cnv->toULength=1;
                 break;
             case quotePairOne:
                 byteOne=b;
+                cnv->toUBytes[1]=b;
+                cnv->toULength=2;
                 state=quotePairTwo;
                 break;
             case quotePairTwo:
@@ -426,6 +439,8 @@ singleByteMode:
             case definePairOne:
                 dynamicWindow=(int8_t)((b>>5)&7);
                 byteOne=(uint8_t)(b&0x1f);
+                cnv->toUBytes[1]=b;
+                cnv->toULength=2;
                 state=definePairTwo;
                 break;
             case definePairTwo:
@@ -436,10 +451,9 @@ singleByteMode:
             case defineOne:
                 if(b==0) {
                     /* callback(illegal): Reserved window offset value 0 */
-                    cnv->invalidCharBuffer[0]=(char)(SD0+dynamicWindow);
-                    cnv->invalidCharBuffer[1]=b;
-                    cnv->invalidCharLength=2;
-                    goto callback;
+                    cnv->toUBytes[1]=b;
+                    cnv->toULength=2;
+                    goto endloop;
                 } else if(b<gapThreshold) {
                     scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
                 } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
@@ -448,10 +462,9 @@ singleByteMode:
                     scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
                 } else {
                     /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
-                    cnv->invalidCharBuffer[0]=(char)(SD0+dynamicWindow);
-                    cnv->invalidCharBuffer[1]=b;
-                    cnv->invalidCharLength=2;
-                    goto callback;
+                    cnv->toUBytes[1]=b;
+                    cnv->toULength=2;
+                    goto endloop;
                 }
                 sourceIndex=nextSourceIndex;
                 state=readCommand;
@@ -487,6 +500,8 @@ fastUnicode:
             case readCommand:
                 if((uint8_t)(b-UC0)>(Urs-UC0)) {
                     byteOne=b;
+                    cnv->toUBytes[0]=b;
+                    cnv->toULength=1;
                     state=quotePairTwo;
                 } else if(/* UC0<=b && */ b<=UC7) {
                     dynamicWindow=(int8_t)(b-UC0);
@@ -496,23 +511,32 @@ fastUnicode:
                 } else if(/* UD0<=b && */ b<=UD7) {
                     dynamicWindow=(int8_t)(b-UD0);
                     isSingleByteMode=TRUE;
+                    cnv->toUBytes[0]=b;
+                    cnv->toULength=1;
                     state=defineOne;
                     goto singleByteMode;
                 } else if(b==UDX) {
                     isSingleByteMode=TRUE;
+                    cnv->toUBytes[0]=b;
+                    cnv->toULength=1;
                     state=definePairOne;
                     goto singleByteMode;
                 } else if(b==UQU) {
+                    cnv->toUBytes[0]=b;
+                    cnv->toULength=1;
                     state=quotePairOne;
                 } else /* Urs */ {
                     /* callback(illegal) */
-                    cnv->invalidCharBuffer[0]=b;
-                    cnv->invalidCharLength=1;
-                    goto callback;
+                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                    cnv->toUBytes[0]=b;
+                    cnv->toULength=1;
+                    goto endloop;
                 }
                 break;
             case quotePairOne:
                 byteOne=b;
+                cnv->toUBytes[1]=b;
+                cnv->toULength=2;
                 state=quotePairTwo;
                 break;
             case quotePairTwo:
@@ -528,80 +552,25 @@ fastUnicode:
     }
 endloop:
 
-    if(pArgs->flush && source>=sourceLimit) {
-        /* reset the state for the next conversion */
-        if(state!=readCommand && U_SUCCESS(*pErrorCode)) {
-            /* a character byte sequence remains incomplete */
-            *pErrorCode=U_TRUNCATED_CHAR_FOUND;
-        }
-        _SCSUReset(cnv, UCNV_RESET_TO_UNICODE);
-    } else {
-        /* set the converter state back into UConverter */
-        scsu->toUIsSingleByteMode=isSingleByteMode;
-        scsu->toUState=state;
-        scsu->toUQuoteWindow=quoteWindow;
-        scsu->toUDynamicWindow=dynamicWindow;
-        scsu->toUByteOne=byteOne;
+    /* set the converter state back into UConverter */
+    if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
+        /* reset to deal with the next character */
+        state=readCommand;
+    } else if(state==readCommand) {
+        /* not in a multi-byte sequence, reset toULength */
+        cnv->toULength=0;
     }
+    scsu->toUIsSingleByteMode=isSingleByteMode;
+    scsu->toUState=state;
+    scsu->toUQuoteWindow=quoteWindow;
+    scsu->toUDynamicWindow=dynamicWindow;
+    scsu->toUByteOne=byteOne;
 
-finish:
     /* write back the updated pointers */
     pArgs->source=(const char *)source;
     pArgs->target=target;
     pArgs->offsets=offsets;
     return;
-
-callback:
-    /* call the callback function with all the preparations and post-processing */
-    /* update the arguments structure */
-    pArgs->source=(const char *)source;
-    pArgs->target=target;
-    pArgs->offsets=offsets;
-    /* the current bytes were copied to invalidCharBuffer before the goto callback jump */
-
-    /* set the converter state in UConverter to deal with the next character */
-    scsu->toUIsSingleByteMode=isSingleByteMode;
-    scsu->toUState=readCommand;
-    scsu->toUQuoteWindow=quoteWindow;
-    scsu->toUDynamicWindow=dynamicWindow;
-    scsu->toUByteOne=0;
-
-    /* call the callback function */
-    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
-    cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, cnv->invalidCharLength, UCNV_ILLEGAL, pErrorCode);
-
-    /* get the converter state from UConverter */
-    isSingleByteMode=scsu->toUIsSingleByteMode;
-    state=scsu->toUState;
-    quoteWindow=scsu->toUQuoteWindow;
-    dynamicWindow=scsu->toUDynamicWindow;
-    byteOne=scsu->toUByteOne;
-
-    /* update target and deal with offsets if necessary */
-    offsets=ucnv_updateCallbackOffsets(offsets, (int32_t)(pArgs->target-target), sourceIndex);
-    target=pArgs->target;
-
-    /* update the source pointer and index */
-    sourceIndex=(int32_t)(nextSourceIndex+((const uint8_t *)pArgs->source-source));
-    source=(const uint8_t *)pArgs->source;
-
-    /*
-     * If the callback overflowed the target, then we need to
-     * stop here with an overflow indication.
-     */
-    if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
-        goto endloop;
-    } else if(cnv->UCharErrorBufferLength>0) {
-        /* target is full */
-        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
-        goto endloop;
-    } else if(U_FAILURE(*pErrorCode)) {
-        /* break on error */
-        _SCSUReset(cnv, UCNV_RESET_TO_UNICODE);
-        goto finish;
-    } else {
-        goto loop;
-    }
 }
 
 /*
@@ -619,7 +588,6 @@ _SCSUToUnicode(UConverterToUnicodeArgs *pArgs,
     const uint8_t *source, *sourceLimit;
     UChar *target;
     const UChar *targetLimit;
-
     UBool isSingleByteMode;
     uint8_t state, byteOne;
     int8_t quoteWindow, dynamicWindow;
@@ -658,11 +626,9 @@ _SCSUToUnicode(UConverterToUnicodeArgs *pArgs,
      * The end of the input or output buffer is also handled by the slower loop.
      * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
      *
-     * The callback handling is done by jumping (goto) to the callback section at the end
-     * of the function. From there, it either jumps to here to continue or to
-     * the endloop section to clean up and return.
+     * The callback handling is done by returning with an error code.
+     * The conversion framework actually calls the callback function.
      */
-loop:
     if(isSingleByteMode) {
         /* fast path for single-byte mode */
         if(state==readCommand) {
@@ -731,13 +697,20 @@ singleByteMode:
                     goto fastUnicode;
                 } else /* Srs */ {
                     /* callback(illegal) */
-                    cnv->invalidCharBuffer[0]=b;
-                    cnv->invalidCharLength=1;
-                    goto callback;
+                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                    cnv->toUBytes[0]=b;
+                    cnv->toULength=1;
+                    goto endloop;
                 }
+
+                /* store the first byte of a multibyte sequence in toUBytes[] */
+                cnv->toUBytes[0]=b;
+                cnv->toULength=1;
                 break;
             case quotePairOne:
                 byteOne=b;
+                cnv->toUBytes[1]=b;
+                cnv->toULength=2;
                 state=quotePairTwo;
                 break;
             case quotePairTwo:
@@ -772,6 +745,8 @@ singleByteMode:
             case definePairOne:
                 dynamicWindow=(int8_t)((b>>5)&7);
                 byteOne=(uint8_t)(b&0x1f);
+                cnv->toUBytes[1]=b;
+                cnv->toULength=2;
                 state=definePairTwo;
                 break;
             case definePairTwo:
@@ -781,10 +756,9 @@ singleByteMode:
             case defineOne:
                 if(b==0) {
                     /* callback(illegal): Reserved window offset value 0 */
-                    cnv->invalidCharBuffer[0]=(char)(SD0+dynamicWindow);
-                    cnv->invalidCharBuffer[1]=b;
-                    cnv->invalidCharLength=2;
-                    goto callback;
+                    cnv->toUBytes[1]=b;
+                    cnv->toULength=2;
+                    goto endloop;
                 } else if(b<gapThreshold) {
                     scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
                 } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
@@ -793,10 +767,9 @@ singleByteMode:
                     scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
                 } else {
                     /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
-                    cnv->invalidCharBuffer[0]=(char)(SD0+dynamicWindow);
-                    cnv->invalidCharBuffer[1]=b;
-                    cnv->invalidCharLength=2;
-                    goto callback;
+                    cnv->toUBytes[1]=b;
+                    cnv->toULength=2;
+                    goto endloop;
                 }
                 state=readCommand;
                 goto fastSingle;
@@ -825,6 +798,8 @@ fastUnicode:
             case readCommand:
                 if((uint8_t)(b-UC0)>(Urs-UC0)) {
                     byteOne=b;
+                    cnv->toUBytes[0]=b;
+                    cnv->toULength=1;
                     state=quotePairTwo;
                 } else if(/* UC0<=b && */ b<=UC7) {
                     dynamicWindow=(int8_t)(b-UC0);
@@ -833,23 +808,32 @@ fastUnicode:
                 } else if(/* UD0<=b && */ b<=UD7) {
                     dynamicWindow=(int8_t)(b-UD0);
                     isSingleByteMode=TRUE;
+                    cnv->toUBytes[0]=b;
+                    cnv->toULength=1;
                     state=defineOne;
                     goto singleByteMode;
                 } else if(b==UDX) {
                     isSingleByteMode=TRUE;
+                    cnv->toUBytes[0]=b;
+                    cnv->toULength=1;
                     state=definePairOne;
                     goto singleByteMode;
                 } else if(b==UQU) {
+                    cnv->toUBytes[0]=b;
+                    cnv->toULength=1;
                     state=quotePairOne;
                 } else /* Urs */ {
                     /* callback(illegal) */
-                    cnv->invalidCharBuffer[0]=b;
-                    cnv->invalidCharLength=1;
-                    goto callback;
+                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                    cnv->toUBytes[0]=b;
+                    cnv->toULength=1;
+                    goto endloop;
                 }
                 break;
             case quotePairOne:
                 byteOne=b;
+                cnv->toUBytes[1]=b;
+                cnv->toULength=2;
                 state=quotePairTwo;
                 break;
             case quotePairTwo:
@@ -861,80 +845,24 @@ fastUnicode:
     }
 endloop:
 
-    if(pArgs->flush && source>=sourceLimit) {
-        /* reset the state for the next conversion */
-        if(state!=readCommand && U_SUCCESS(*pErrorCode)) {
-            /* a character byte sequence remains incomplete */
-            *pErrorCode=U_TRUNCATED_CHAR_FOUND;
-        }
-        _SCSUReset(cnv, UCNV_RESET_TO_UNICODE);
-    } else {
-        /* set the converter state back into UConverter */
-        scsu->toUIsSingleByteMode=isSingleByteMode;
-        scsu->toUState=state;
-        scsu->toUQuoteWindow=quoteWindow;
-        scsu->toUDynamicWindow=dynamicWindow;
-        scsu->toUByteOne=byteOne;
+    /* set the converter state back into UConverter */
+    if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
+        /* reset to deal with the next character */
+        state=readCommand;
+    } else if(state==readCommand) {
+        /* not in a multi-byte sequence, reset toULength */
+        cnv->toULength=0;
     }
+    scsu->toUIsSingleByteMode=isSingleByteMode;
+    scsu->toUState=state;
+    scsu->toUQuoteWindow=quoteWindow;
+    scsu->toUDynamicWindow=dynamicWindow;
+    scsu->toUByteOne=byteOne;
 
-finish:
     /* write back the updated pointers */
     pArgs->source=(const char *)source;
     pArgs->target=target;
     return;
-
-callback:
-    /* call the callback function with all the preparations and post-processing */
-    /* update the arguments structure */
-    pArgs->source=(const char *)source;
-    pArgs->target=target;
-    /* the current bytes were copied to invalidCharBuffer before the goto callback jump */
-
-    /* set the converter state in UConverter to deal with the next character */
-    scsu->toUIsSingleByteMode=isSingleByteMode;
-    scsu->toUState=readCommand;
-    scsu->toUQuoteWindow=quoteWindow;
-    scsu->toUDynamicWindow=dynamicWindow;
-    scsu->toUByteOne=0;
-
-    /* call the callback function */
-    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
-    cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, cnv->invalidCharLength, UCNV_ILLEGAL, pErrorCode);
-
-    /* get the converter state from UConverter */
-    isSingleByteMode=scsu->toUIsSingleByteMode;
-    state=scsu->toUState;
-    quoteWindow=scsu->toUQuoteWindow;
-    dynamicWindow=scsu->toUDynamicWindow;
-    byteOne=scsu->toUByteOne;
-
-    target=pArgs->target;
-
-    source=(const uint8_t *)pArgs->source;
-
-    /*
-     * If the callback overflowed the target, then we need to
-     * stop here with an overflow indication.
-     */
-    if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
-        goto endloop;
-    } else if(cnv->UCharErrorBufferLength>0) {
-        /* target is full */
-        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
-        goto endloop;
-    } else if(U_FAILURE(*pErrorCode)) {
-        /* break on error */
-        _SCSUReset(cnv, UCNV_RESET_TO_UNICODE);
-        goto finish;
-    } else {
-        goto loop;
-    }
-}
-
-static UChar32
-_SCSUGetNextUChar(UConverterToUnicodeArgs *pArgs,
-                  UErrorCode *pErrorCode) {
-    return ucnv_getNextUCharFromToUImpl(pArgs, _SCSUToUnicode, TRUE, pErrorCode);
 }
 
 /* SCSU-from-Unicode conversion functions ----------------------------------- */
@@ -1095,7 +1023,6 @@ _SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
 
     int32_t sourceIndex, nextSourceIndex;
 
-    uint32_t i;
     int32_t length;
 
     /* variables for compression heuristics */
@@ -1120,7 +1047,7 @@ _SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
     dynamicWindow=scsu->fromUDynamicWindow;
     currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
 
-    c=cnv->fromUSurrogateLead;
+    c=cnv->fromUChar32;
 
     /* sourceIndex=-1 if the current character began in the previous buffer */
     sourceIndex= c==0 ? 0 : -1;
@@ -1172,23 +1099,24 @@ loop:
                     *offsets++=sourceIndex;
                 }
                 --targetCapacity;
-            } else if(UTF_IS_SURROGATE(c)) {
-                if(UTF_IS_SURROGATE_FIRST(c)) {
+            } else if(U16_IS_SURROGATE(c)) {
+                if(U16_IS_SURROGATE_LEAD(c)) {
 getTrailSingle:
                     lead=(UChar)c;
                     if(source<sourceLimit) {
                         /* test the following code unit */
                         trail=*source;
-                        if(UTF_IS_SECOND_SURROGATE(trail)) {
+                        if(U16_IS_TRAIL(trail)) {
                             ++source;
                             ++nextSourceIndex;
-                            c=UTF16_GET_PAIR_VALUE(c, trail);
+                            c=U16_GET_SUPPLEMENTARY(c, trail);
                             /* convert this surrogate code point */
                             /* exit this condition tree */
                         } else {
                             /* this is an unmatched lead code unit (1st surrogate) */
                             /* callback(illegal) */
-                            goto callback;
+                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                            goto endloop;
                         }
                     } else {
                         /* no more input */
@@ -1197,7 +1125,8 @@ getTrailSingle:
                 } else {
                     /* this is an unmatched trail code unit (2nd surrogate) */
                     /* callback(illegal) */
-                    goto callback;
+                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                    goto endloop;
                 }
 
                 /* compress supplementary character U+10000..U+10ffff */
@@ -1368,22 +1297,23 @@ getTrailSingle:
                 goto outputBytes;
             } else if(c<0xe000) {
                 /* c is a surrogate */
-                if(UTF_IS_SURROGATE_FIRST(c)) {
+                if(U16_IS_SURROGATE_LEAD(c)) {
 getTrailUnicode:
                     lead=(UChar)c;
                     if(source<sourceLimit) {
                         /* test the following code unit */
                         trail=*source;
-                        if(UTF_IS_SECOND_SURROGATE(trail)) {
+                        if(U16_IS_TRAIL(trail)) {
                             ++source;
                             ++nextSourceIndex;
-                            c=UTF16_GET_PAIR_VALUE(c, trail);
+                            c=U16_GET_SUPPLEMENTARY(c, trail);
                             /* convert this surrogate code point */
                             /* exit this condition tree */
                         } else {
                             /* this is an unmatched lead code unit (1st surrogate) */
                             /* callback(illegal) */
-                            goto callback;
+                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                            goto endloop;
                         }
                     } else {
                         /* no more input */
@@ -1392,7 +1322,8 @@ getTrailUnicode:
                 } else {
                     /* this is an unmatched trail code unit (2nd surrogate) */
                     /* callback(illegal) */
-                    goto callback;
+                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                    goto endloop;
                 }
 
                 /* compress supplementary character */
@@ -1443,22 +1374,12 @@ getTrailUnicode:
     }
 endloop:
 
-    if(pArgs->flush && source>=sourceLimit) {
-        /* reset the state for the next conversion */
-        if(c!=0 && U_SUCCESS(*pErrorCode)) {
-            /* a character byte sequence remains incomplete */
-            *pErrorCode=U_TRUNCATED_CHAR_FOUND;
-        }
-        _SCSUReset(cnv, UCNV_RESET_FROM_UNICODE);
-    } else {
-        /* set the converter state back into UConverter */
-        scsu->fromUIsSingleByteMode=isSingleByteMode;
-        scsu->fromUDynamicWindow=dynamicWindow;
+    /* set the converter state back into UConverter */
+    scsu->fromUIsSingleByteMode=isSingleByteMode;
+    scsu->fromUDynamicWindow=dynamicWindow;
 
-        cnv->fromUSurrogateLead=(UChar)c;
-    }
+    cnv->fromUChar32=c;
 
-finish:
     /* write back the updated pointers */
     pArgs->source=source;
     pArgs->target=(char *)target;
@@ -1474,12 +1395,16 @@ outputBytes:
                 /* each branch falls through to the next one */
             case 4:
                 *target++=(uint8_t)(c>>24);
+                U_FALLTHROUGH;
             case 3:
                 *target++=(uint8_t)(c>>16);
+                U_FALLTHROUGH;
             case 2:
                 *target++=(uint8_t)(c>>8);
+                U_FALLTHROUGH;
             case 1:
                 *target++=(uint8_t)c;
+                U_FALLTHROUGH;
             default:
                 /* will never occur */
                 break;
@@ -1490,15 +1415,19 @@ outputBytes:
             case 4:
                 *target++=(uint8_t)(c>>24);
                 *offsets++=sourceIndex;
+                U_FALLTHROUGH;
             case 3:
                 *target++=(uint8_t)(c>>16);
                 *offsets++=sourceIndex;
+                U_FALLTHROUGH;
             case 2:
                 *target++=(uint8_t)(c>>8);
                 *offsets++=sourceIndex;
+                U_FALLTHROUGH;
             case 1:
                 *target++=(uint8_t)c;
                 *offsets++=sourceIndex;
+                U_FALLTHROUGH;
             default:
                 /* will never occur */
                 break;
@@ -1519,17 +1448,24 @@ outputBytes:
          * first to the overflow buffer what does not fit into the
          * regular target.
          */
-        /* we know that 1<=targetCapacity<length<=4 */
+        /* we know that 0<=targetCapacity<length<=4 */
+        /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
         length-=targetCapacity;
         p=(uint8_t *)cnv->charErrorBuffer;
         switch(length) {
             /* each branch falls through to the next one */
+        case 4:
+            *p++=(uint8_t)(c>>24);
+            U_FALLTHROUGH;
         case 3:
             *p++=(uint8_t)(c>>16);
+            U_FALLTHROUGH;
         case 2:
             *p++=(uint8_t)(c>>8);
+            U_FALLTHROUGH;
         case 1:
             *p=(uint8_t)c;
+            U_FALLTHROUGH;
         default:
             /* will never occur */
             break;
@@ -1545,18 +1481,20 @@ outputBytes:
             if(offsets!=NULL) {
                 *offsets++=sourceIndex;
             }
+            U_FALLTHROUGH;
         case 2:
             *target++=(uint8_t)(c>>8);
             if(offsets!=NULL) {
                 *offsets++=sourceIndex;
             }
+            U_FALLTHROUGH;
         case 1:
             *target++=(uint8_t)c;
             if(offsets!=NULL) {
                 *offsets++=sourceIndex;
             }
+            U_FALLTHROUGH;
         default:
-            /* will never occur */
             break;
         }
 
@@ -1566,59 +1504,6 @@ outputBytes:
         c=0;
         goto endloop;
     }
-
-callback:
-    /* call the callback function with all the preparations and post-processing */
-    /* update the arguments structure */
-    pArgs->source=source;
-    pArgs->target=(char *)target;
-    pArgs->offsets=offsets;
-    /* set the converter state in UConverter to deal with the next character */
-    scsu->fromUIsSingleByteMode=isSingleByteMode;
-    scsu->fromUDynamicWindow=dynamicWindow;
-    cnv->fromUSurrogateLead=0;
-
-    /* write the code point as code units */
-    i=0;
-    UTF_APPEND_CHAR_UNSAFE(cnv->invalidUCharBuffer, i, c);
-    cnv->invalidUCharLength=(int8_t)i;
-
-    /* call the callback function */
-    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
-    cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, cnv->invalidUCharBuffer, i, c, UCNV_ILLEGAL, pErrorCode);
-
-    /* get the converter state from UConverter */
-    isSingleByteMode=scsu->fromUIsSingleByteMode;
-    dynamicWindow=scsu->fromUDynamicWindow;
-    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
-    c=cnv->fromUSurrogateLead;
-
-    /* update target and deal with offsets if necessary */
-    offsets=ucnv_updateCallbackOffsets(offsets, (int32_t)(((uint8_t *)pArgs->target)-target), sourceIndex);
-    target=(uint8_t *)pArgs->target;
-
-    /* update the source pointer and index */
-    sourceIndex=(int32_t)(nextSourceIndex+(pArgs->source-source));
-    source=pArgs->source;
-    targetCapacity=(int32_t)((uint8_t *)pArgs->targetLimit-target);
-
-    /*
-     * If the callback overflowed the target, then we need to
-     * stop here with an overflow indication.
-     */
-    if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
-        goto endloop;
-    } else if(cnv->charErrorBufferLength>0) {
-        /* target is full */
-        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
-        goto endloop;
-    } else if(U_FAILURE(*pErrorCode)) {
-        /* break on error */
-        _SCSUReset(cnv, UCNV_RESET_FROM_UNICODE);
-        goto finish;
-    } else {
-        goto loop;
-    }
 }
 
 /*
@@ -1643,7 +1528,6 @@ _SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs,
 
     uint32_t c, delta;
 
-    uint32_t i;
     int32_t length;
 
     /* variables for compression heuristics */
@@ -1667,7 +1551,7 @@ _SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs,
     dynamicWindow=scsu->fromUDynamicWindow;
     currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
 
-    c=cnv->fromUSurrogateLead;
+    c=cnv->fromUChar32;
 
     /* similar conversion "loop" as in toUnicode */
 loop:
@@ -1705,22 +1589,23 @@ loop:
                 /* use the current dynamic window */
                 *target++=(uint8_t)(delta|0x80);
                 --targetCapacity;
-            } else if(UTF_IS_SURROGATE(c)) {
-                if(UTF_IS_SURROGATE_FIRST(c)) {
+            } else if(U16_IS_SURROGATE(c)) {
+                if(U16_IS_SURROGATE_LEAD(c)) {
 getTrailSingle:
                     lead=(UChar)c;
                     if(source<sourceLimit) {
                         /* test the following code unit */
                         trail=*source;
-                        if(UTF_IS_SECOND_SURROGATE(trail)) {
+                        if(U16_IS_TRAIL(trail)) {
                             ++source;
-                            c=UTF16_GET_PAIR_VALUE(c, trail);
+                            c=U16_GET_SUPPLEMENTARY(c, trail);
                             /* convert this surrogate code point */
                             /* exit this condition tree */
                         } else {
                             /* this is an unmatched lead code unit (1st surrogate) */
                             /* callback(illegal) */
-                            goto callback;
+                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                            goto endloop;
                         }
                     } else {
                         /* no more input */
@@ -1729,7 +1614,8 @@ getTrailSingle:
                 } else {
                     /* this is an unmatched trail code unit (2nd surrogate) */
                     /* callback(illegal) */
-                    goto callback;
+                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                    goto endloop;
                 }
 
                 /* compress supplementary character U+10000..U+10ffff */
@@ -1888,21 +1774,22 @@ getTrailSingle:
                 goto outputBytes;
             } else if(c<0xe000) {
                 /* c is a surrogate */
-                if(UTF_IS_SURROGATE_FIRST(c)) {
+                if(U16_IS_SURROGATE_LEAD(c)) {
 getTrailUnicode:
                     lead=(UChar)c;
                     if(source<sourceLimit) {
                         /* test the following code unit */
                         trail=*source;
-                        if(UTF_IS_SECOND_SURROGATE(trail)) {
+                        if(U16_IS_TRAIL(trail)) {
                             ++source;
-                            c=UTF16_GET_PAIR_VALUE(c, trail);
+                            c=U16_GET_SUPPLEMENTARY(c, trail);
                             /* convert this surrogate code point */
                             /* exit this condition tree */
                         } else {
                             /* this is an unmatched lead code unit (1st surrogate) */
                             /* callback(illegal) */
-                            goto callback;
+                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                            goto endloop;
                         }
                     } else {
                         /* no more input */
@@ -1911,7 +1798,8 @@ getTrailUnicode:
                 } else {
                     /* this is an unmatched trail code unit (2nd surrogate) */
                     /* callback(illegal) */
-                    goto callback;
+                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                    goto endloop;
                 }
 
                 /* compress supplementary character */
@@ -1961,22 +1849,12 @@ getTrailUnicode:
     }
 endloop:
 
-    if(pArgs->flush && source>=sourceLimit) {
-        /* reset the state for the next conversion */
-        if(c!=0 && U_SUCCESS(*pErrorCode)) {
-            /* a character byte sequence remains incomplete */
-            *pErrorCode=U_TRUNCATED_CHAR_FOUND;
-        }
-        _SCSUReset(cnv, UCNV_RESET_FROM_UNICODE);
-    } else {
-        /* set the converter state back into UConverter */
-        scsu->fromUIsSingleByteMode=isSingleByteMode;
-        scsu->fromUDynamicWindow=dynamicWindow;
+    /* set the converter state back into UConverter */
+    scsu->fromUIsSingleByteMode=isSingleByteMode;
+    scsu->fromUDynamicWindow=dynamicWindow;
 
-        cnv->fromUSurrogateLead=(UChar)c;
-    }
+    cnv->fromUChar32=c;
 
-finish:
     /* write back the updated pointers */
     pArgs->source=source;
     pArgs->target=(char *)target;
@@ -1990,12 +1868,16 @@ outputBytes:
             /* each branch falls through to the next one */
         case 4:
             *target++=(uint8_t)(c>>24);
+            U_FALLTHROUGH;
         case 3:
             *target++=(uint8_t)(c>>16);
+            U_FALLTHROUGH;
         case 2:
             *target++=(uint8_t)(c>>8);
+            U_FALLTHROUGH;
         case 1:
             *target++=(uint8_t)c;
+            U_FALLTHROUGH;
         default:
             /* will never occur */
             break;
@@ -2014,17 +1896,24 @@ outputBytes:
          * first to the overflow buffer what does not fit into the
          * regular target.
          */
-        /* we know that 1<=targetCapacity<length<=4 */
+        /* we know that 0<=targetCapacity<length<=4 */
+        /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
         length-=targetCapacity;
         p=(uint8_t *)cnv->charErrorBuffer;
         switch(length) {
             /* each branch falls through to the next one */
+        case 4:
+            *p++=(uint8_t)(c>>24);
+            U_FALLTHROUGH;
         case 3:
             *p++=(uint8_t)(c>>16);
+            U_FALLTHROUGH;
         case 2:
             *p++=(uint8_t)(c>>8);
+            U_FALLTHROUGH;
         case 1:
             *p=(uint8_t)c;
+            U_FALLTHROUGH;
         default:
             /* will never occur */
             break;
@@ -2037,12 +1926,14 @@ outputBytes:
             /* each branch falls through to the next one */
         case 3:
             *target++=(uint8_t)(c>>16);
+            U_FALLTHROUGH;
         case 2:
             *target++=(uint8_t)(c>>8);
+            U_FALLTHROUGH;
         case 1:
             *target++=(uint8_t)c;
+            U_FALLTHROUGH;
         default:
-            /* will never occur */
             break;
         }
 
@@ -2052,54 +1943,6 @@ outputBytes:
         c=0;
         goto endloop;
     }
-
-callback:
-    /* call the callback function with all the preparations and post-processing */
-    /* update the arguments structure */
-    pArgs->source=source;
-    pArgs->target=(char *)target;
-    /* set the converter state in UConverter to deal with the next character */
-    scsu->fromUIsSingleByteMode=isSingleByteMode;
-    scsu->fromUDynamicWindow=dynamicWindow;
-    cnv->fromUSurrogateLead=0;
-
-    /* write the code point as code units */
-    i=0;
-    UTF_APPEND_CHAR_UNSAFE(cnv->invalidUCharBuffer, i, c);
-    cnv->invalidUCharLength=(int8_t)i;
-
-    /* call the callback function */
-    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
-    cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, cnv->invalidUCharBuffer, i, c, UCNV_ILLEGAL, pErrorCode);
-
-    /* get the converter state from UConverter */
-    isSingleByteMode=scsu->fromUIsSingleByteMode;
-    dynamicWindow=scsu->fromUDynamicWindow;
-    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
-    c=cnv->fromUSurrogateLead;
-
-    target=(uint8_t *)pArgs->target;
-
-    source=pArgs->source;
-    targetCapacity=(int32_t)((uint8_t *)pArgs->targetLimit-target);
-
-    /*
-     * If the callback overflowed the target, then we need to
-     * stop here with an overflow indication.
-     */
-    if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
-        goto endloop;
-    } else if(cnv->charErrorBufferLength>0) {
-        /* target is full */
-        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
-        goto endloop;
-    } else if(U_FAILURE(*pErrorCode)) {
-        /* break on error */
-        _SCSUReset(cnv, UCNV_RESET_FROM_UNICODE);
-        goto finish;
-    } else {
-        goto loop;
-    }
 }
 
 /* miscellaneous ------------------------------------------------------------ */
@@ -2116,32 +1959,8 @@ _SCSUGetName(const UConverter *cnv) {
     }
 }
 
-static void
-_SCSUWriteSub(UConverterFromUnicodeArgs *pArgs,
-               int32_t offsetIndex,
-               UErrorCode *pErrorCode) {
-    static const char squ_fffd[]={ (char)SQU, (char)0xffu, (char)0xfdu };
-
-    /*
-     * The substitution character is U+fffd={ ff, fd }.
-     * If the SCSU converter is in Unicode mode, then these two bytes just need to
-     * be written. Otherwise, this character is quoted.
-     */
-    if(((SCSUData *)pArgs->converter->extraInfo)->fromUIsSingleByteMode) {
-        /* single-byte mode: quote Unicode */
-        ucnv_cbFromUWriteBytes(pArgs,
-                               squ_fffd, 3,
-                               offsetIndex, pErrorCode);
-    } else {
-        /* Unicode mode: just write U+fffd */
-        ucnv_cbFromUWriteBytes(pArgs,
-                               squ_fffd+1, 2,
-                               offsetIndex, pErrorCode);
-    }
-}
-
 /* structure for SafeClone calculations */
-struct cloneStruct
+struct cloneSCSUStruct
 {
     UConverter cnv;
     SCSUData mydata;
@@ -2153,8 +1972,8 @@ _SCSUSafeClone(const UConverter *cnv,
                int32_t *pBufferSize, 
                UErrorCode *status)
 {
-    struct cloneStruct * localClone;
-    int32_t bufferSizeNeeded = sizeof(struct cloneStruct);
+    struct cloneSCSUStruct * localClone;
+    int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct);
 
     if (U_FAILURE(*status)){
         return 0;
@@ -2165,9 +1984,8 @@ _SCSUSafeClone(const UConverter *cnv,
         return 0;
     }
 
-    localClone = (struct cloneStruct *)stackBuffer;
-    uprv_memcpy(&localClone->cnv, cnv, sizeof(UConverter));
-    localClone->cnv.isCopyLocal = TRUE;
+    localClone = (struct cloneSCSUStruct *)stackBuffer;
+    /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
 
     uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData));
     localClone->cnv.extraInfo = &localClone->mydata;
@@ -2177,9 +1995,6 @@ _SCSUSafeClone(const UConverter *cnv,
 }
 
 
-
-
-
 static const UConverterImpl _SCSUImpl={
     UCNV_SCSU,
 
@@ -2194,11 +2009,11 @@ static const UConverterImpl _SCSUImpl={
     _SCSUToUnicodeWithOffsets,
     _SCSUFromUnicode,
     _SCSUFromUnicodeWithOffsets,
-    _SCSUGetNextUChar,
+    NULL,
 
     NULL,
     _SCSUGetName,
-    _SCSUWriteSub,
+    NULL,
     _SCSUSafeClone,
     ucnv_getCompleteUnicodeSet
 };
@@ -2206,20 +2021,21 @@ static const UConverterImpl _SCSUImpl={
 static const UConverterStaticData _SCSUStaticData={
     sizeof(UConverterStaticData),
     "SCSU",
-    0, /* CCSID for SCSU */
+    1212, /* CCSID for SCSU */
     UCNV_IBM, UCNV_SCSU,
     1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */
-    { 0x0e, 0xff, 0xfd, 0 }, 3, /* ### the subchar really must be written by an SCSU function! */
+    /*
+     * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode
+     * substitution string.
+     */
+    { 0x0e, 0xff, 0xfd, 0 }, 3,
     FALSE, FALSE,
     0,
     0,
     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
 };
 
-const UConverterSharedData _SCSUData={
-    sizeof(UConverterSharedData), ~((uint32_t)0),
-    NULL, NULL, &_SCSUStaticData, FALSE, &_SCSUImpl,
-    0
-};
+const UConverterSharedData _SCSUData=
+        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_SCSUStaticData, &_SCSUImpl);
 
-/* ### clarify: if an error occurs, does a converter reset itself? or is it in a defined or undefined state? */
+#endif