]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/common/ucnvmbcs.c
ICU-531.48.tar.gz
[apple/icu.git] / icuSources / common / ucnvmbcs.c
index 9b55c17d05da023be765e670f1a9ded5fffcde97..143daf69af7a1ae9557408edc556eae83fee3462 100644 (file)
@@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *
-*   Copyright (C) 2000-2008, International Business Machines
+*   Copyright (C) 2000-2013, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
 #include "unicode/ucnv_cb.h"
 #include "unicode/udata.h"
 #include "unicode/uset.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
 #include "ucnv_bld.h"
 #include "ucnvmbcs.h"
 #include "ucnv_ext.h"
 #include "ucnv_cnv.h"
-#include "umutex.h"
 #include "cmemory.h"
 #include "cstring.h"
+#include "cmutex.h"
 
 /* control optimizations according to the platform */
 #define MBCS_UNROLL_SINGLE_TO_BMP 1
@@ -379,10 +381,11 @@ static const UConverterImpl _DBCSUTF8Impl;
  * as of the re-released mapping tables from 2000-nov-30.
  */
 static const uint32_t
-gb18030Ranges[13][4]={
+gb18030Ranges[14][4]={
     {0x10000, 0x10FFFF, LINEAR(0x90308130), LINEAR(0xE3329A35)},
     {0x9FA6, 0xD7FF, LINEAR(0x82358F33), LINEAR(0x8336C738)},
-    {0x0452, 0x200F, LINEAR(0x8130D330), LINEAR(0x8136A531)},
+    {0x0452, 0x1E3E, LINEAR(0x8130D330), LINEAR(0x8135F436)},
+    {0x1E40, 0x200F, LINEAR(0x8135F438), LINEAR(0x8136A531)},
     {0xE865, 0xF92B, LINEAR(0x8336D030), LINEAR(0x84308534)},
     {0x2643, 0x2E80, LINEAR(0x8137A839), LINEAR(0x8138FD38)},
     {0xFA2A, 0xFE2F, LINEAR(0x84309C38), LINEAR(0x84318537)},
@@ -398,6 +401,76 @@ gb18030Ranges[13][4]={
 /* bit flag for UConverter.options indicating GB 18030 special handling */
 #define _MBCS_OPTION_GB18030 0x8000
 
+/* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */
+#define _MBCS_OPTION_KEIS 0x01000
+#define _MBCS_OPTION_JEF  0x02000
+#define _MBCS_OPTION_JIPS 0x04000
+
+#define KEIS_SO_CHAR_1 0x0A
+#define KEIS_SO_CHAR_2 0x42
+#define KEIS_SI_CHAR_1 0x0A
+#define KEIS_SI_CHAR_2 0x41
+
+#define JEF_SO_CHAR 0x28
+#define JEF_SI_CHAR 0x29
+
+#define JIPS_SO_CHAR_1 0x1A
+#define JIPS_SO_CHAR_2 0x70
+#define JIPS_SI_CHAR_1 0x1A
+#define JIPS_SI_CHAR_2 0x71
+
+enum SISO_Option {
+    SI,
+    SO
+};
+typedef enum SISO_Option SISO_Option;
+
+static int32_t getSISOBytes(SISO_Option option, uint32_t cnvOption, uint8_t *value) {
+    int32_t SISOLength = 0;
+
+    switch (option) {
+        case SI:
+            if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
+                value[0] = KEIS_SI_CHAR_1;
+                value[1] = KEIS_SI_CHAR_2;
+                SISOLength = 2;
+            } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
+                value[0] = JEF_SI_CHAR;
+                SISOLength = 1;
+            } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
+                value[0] = JIPS_SI_CHAR_1;
+                value[1] = JIPS_SI_CHAR_2;
+                SISOLength = 2;
+            } else {
+                value[0] = UCNV_SI;
+                SISOLength = 1;
+            }
+            break;
+        case SO:
+            if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
+                value[0] = KEIS_SO_CHAR_1;
+                value[1] = KEIS_SO_CHAR_2;
+                SISOLength = 2;
+            } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
+                value[0] = JEF_SO_CHAR;
+                SISOLength = 1;
+            } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
+                value[0] = JIPS_SO_CHAR_1;
+                value[1] = JIPS_SO_CHAR_2;
+                SISOLength = 2;
+            } else {
+                value[0] = UCNV_SO;
+                SISOLength = 1;
+            }
+            break;
+        default:
+            /* Should never happen. */
+            break;
+    }
+
+    return SISOLength;
+}
+
 /* Miscellaneous ------------------------------------------------------------ */
 
 /**
@@ -753,9 +826,9 @@ ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData,
                                     switch(st3Multiplier) {
                                     case 4:
                                         b|=*stage3++;
-                                    case 3:
+                                    case 3: /*fall through*/
                                         b|=*stage3++;
-                                    case 2:
+                                    case 2: /*fall through*/
                                         b|=stage3[0]|stage3[1];
                                         stage3+=2;
                                     default:
@@ -1270,7 +1343,6 @@ reconstituteData(UConverterMBCSTable *mbcsTable,
                  UErrorCode *pErrorCode) {
     uint16_t *stage1;
     uint32_t *stage2;
-    uint8_t *bytes;
     uint32_t dataLength=stage1Length*2+fullStage2Length*4+mbcsTable->fromUBytesLength;
     mbcsTable->reconstitutedData=(uint8_t *)uprv_malloc(dataLength);
     if(mbcsTable->reconstitutedData==NULL) {
@@ -1289,7 +1361,7 @@ reconstituteData(UConverterMBCSTable *mbcsTable,
                 stage2Length*4);
 
     mbcsTable->fromUnicodeTable=stage1;
-    mbcsTable->fromUnicodeBytes=bytes=(uint8_t *)(stage2+fullStage2Length);
+    mbcsTable->fromUnicodeBytes=(uint8_t *)(stage2+fullStage2Length);
 
     /* indexes into stage 2 count from the bottom of the fromUnicodeTable */
     stage2=(uint32_t *)stage1;
@@ -1400,6 +1472,7 @@ ucnv_MBCSLoad(UConverterSharedData *sharedData,
         /* TODO parse package name out of the prefix of the base name in the extension .cnv file? */
         args.size=sizeof(UConverterLoadArgs);
         args.nestedLoads=2;
+        args.onlyTestIsLoadable=pArgs->onlyTestIsLoadable;
         args.reserved=pArgs->reserved;
         args.options=pArgs->options;
         args.pkg=pArgs->pkg;
@@ -1415,6 +1488,16 @@ ucnv_MBCSLoad(UConverterSharedData *sharedData,
             *pErrorCode=U_INVALID_TABLE_FORMAT;
             return;
         }
+        if(pArgs->onlyTestIsLoadable) {
+            /*
+             * Exit as soon as we know that we can load the converter
+             * and the format is valid and supported.
+             * The worst that can happen in the following code is a memory
+             * allocation error.
+             */
+            ucnv_unload(baseSharedData);
+            return;
+        }
 
         /* copy the base table data */
         uprv_memcpy(mbcsTable, &baseSharedData->mbcs, sizeof(UConverterMBCSTable));
@@ -1529,6 +1612,15 @@ ucnv_MBCSLoad(UConverterSharedData *sharedData,
             *pErrorCode=U_INVALID_TABLE_FORMAT;
             return;
         }
+        if(pArgs->onlyTestIsLoadable) {
+            /*
+             * Exit as soon as we know that we can load the converter
+             * and the format is valid and supported.
+             * The worst that can happen in the following code is a memory
+             * allocation error.
+             */
+            return;
+        }
 
         mbcsTable->countStates=(uint8_t)header->countStates;
         mbcsTable->countToUFallbacks=header->countToUFallbacks;
@@ -1660,24 +1752,26 @@ ucnv_MBCSUnload(UConverterSharedData *sharedData) {
 
 static void
 ucnv_MBCSOpen(UConverter *cnv,
-          const char *name,
-          const char *locale,
-          uint32_t options,
-          UErrorCode *pErrorCode) {
+              UConverterLoadArgs *pArgs,
+              UErrorCode *pErrorCode) {
     UConverterMBCSTable *mbcsTable;
     const int32_t *extIndexes;
     uint8_t outputType;
     int8_t maxBytesPerUChar;
 
+    if(pArgs->onlyTestIsLoadable) {
+        return;
+    }
+
     mbcsTable=&cnv->sharedData->mbcs;
     outputType=mbcsTable->outputType;
 
     if(outputType==MBCS_OUTPUT_DBCS_ONLY) {
         /* the swaplfnl option does not apply, remove it */
-        cnv->options=options&=~UCNV_OPTION_SWAP_LFNL;
+        cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL;
     }
 
-    if((options&UCNV_OPTION_SWAP_LFNL)!=0) {
+    if((pArgs->options&UCNV_OPTION_SWAP_LFNL)!=0) {
         /* do this because double-checked locking is broken */
         UBool isCached;
 
@@ -1692,16 +1786,25 @@ ucnv_MBCSOpen(UConverter *cnv,
                 }
 
                 /* the option does not apply, remove it */
-                cnv->options=options&=~UCNV_OPTION_SWAP_LFNL;
+                cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL;
             }
         }
     }
 
-    if(uprv_strstr(name, "18030")!=NULL) {
-        if(uprv_strstr(name, "gb18030")!=NULL || uprv_strstr(name, "GB18030")!=NULL) {
+    if(uprv_strstr(pArgs->name, "18030")!=NULL) {
+        if(uprv_strstr(pArgs->name, "gb18030")!=NULL || uprv_strstr(pArgs->name, "GB18030")!=NULL) {
             /* set a flag for GB 18030 mode, which changes the callback behavior */
             cnv->options|=_MBCS_OPTION_GB18030;
         }
+    } else if((uprv_strstr(pArgs->name, "KEIS")!=NULL) || (uprv_strstr(pArgs->name, "keis")!=NULL)) {
+        /* set a flag for KEIS converter, which changes the SI/SO character sequence */
+        cnv->options|=_MBCS_OPTION_KEIS;
+    } else if((uprv_strstr(pArgs->name, "JEF")!=NULL) || (uprv_strstr(pArgs->name, "jef")!=NULL)) {
+        /* set a flag for JEF converter, which changes the SI/SO character sequence */
+        cnv->options|=_MBCS_OPTION_JEF;
+    } else if((uprv_strstr(pArgs->name, "JIPS")!=NULL) || (uprv_strstr(pArgs->name, "jips")!=NULL)) {
+        /* set a flag for JIPS converter, which changes the SI/SO character sequence */
+        cnv->options|=_MBCS_OPTION_JIPS;
     }
 
     /* fix maxBytesPerUChar depending on outputType and options etc. */
@@ -2052,7 +2155,7 @@ unrolled:
 #endif
 
     /* conversion loop */
-    while(targetCapacity>0) {
+    while(targetCapacity > 0 && source < sourceLimit) {
         entry=stateTable[0][*source++];
         /* MBCS_ENTRY_IS_FINAL(entry) */
 
@@ -3250,16 +3353,16 @@ ucnv_MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                  * If it does, then surrogates are not paired but mapped separately.
                  * Note that in this case unmatched surrogates are not detected.
                  */
-                if(UTF_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
-                    if(UTF_IS_SURROGATE_FIRST(c)) {
+                if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
+                    if(U16_IS_SURROGATE_LEAD(c)) {
 getTrail:
                         if(source<sourceLimit) {
                             /* test the following code unit */
                             UChar trail=*source;
-                            if(UTF_IS_SECOND_SURROGATE(trail)) {
+                            if(U16_IS_TRAIL(trail)) {
                                 ++source;
                                 ++nextSourceIndex;
-                                c=UTF16_GET_PAIR_VALUE(c, trail);
+                                c=U16_GET_SUPPLEMENTARY(c, trail);
                                 if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
                                     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
                                     /* callback(unassigned) */
@@ -3455,16 +3558,16 @@ ucnv_MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
              */
             c=*source++;
             ++nextSourceIndex;
-            if(UTF_IS_SURROGATE(c)) {
-                if(UTF_IS_SURROGATE_FIRST(c)) {
+            if(U16_IS_SURROGATE(c)) {
+                if(U16_IS_SURROGATE_LEAD(c)) {
 getTrail:
                     if(source<sourceLimit) {
                         /* test the following code unit */
                         UChar trail=*source;
-                        if(UTF_IS_SECOND_SURROGATE(trail)) {
+                        if(U16_IS_TRAIL(trail)) {
                             ++source;
                             ++nextSourceIndex;
-                            c=UTF16_GET_PAIR_VALUE(c, trail);
+                            c=U16_GET_SUPPLEMENTARY(c, trail);
                             if(!hasSupplementary) {
                                 /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
                                 /* callback(unassigned) */
@@ -3703,16 +3806,16 @@ unrolled:
             /* normal end of conversion: prepare for a new character */
             c=0;
             continue;
-        } else if(!UTF_IS_SURROGATE(c)) {
+        } else if(!U16_IS_SURROGATE(c)) {
             /* normal, unassigned BMP character */
-        } else if(UTF_IS_SURROGATE_FIRST(c)) {
+        } else if(U16_IS_SURROGATE_LEAD(c)) {
 getTrail:
             if(source<sourceLimit) {
                 /* test the following code unit */
                 UChar trail=*source;
-                if(UTF_IS_SECOND_SURROGATE(trail)) {
+                if(U16_IS_TRAIL(trail)) {
                     ++source;
-                    c=UTF16_GET_PAIR_VALUE(c, trail);
+                    c=U16_GET_SUPPLEMENTARY(c, trail);
                     /* this codepage does not map supplementary code points */
                     /* callback(unassigned) */
                 } else {
@@ -3837,7 +3940,11 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
     uint32_t stage2Entry;
     uint32_t asciiRoundtrips;
     uint32_t value;
-    int32_t length, prevLength;
+    /* Shift-In and Shift-Out byte sequences differ by encoding scheme. */
+    uint8_t siBytes[2] = {0, 0};
+    uint8_t soBytes[2] = {0, 0};
+    uint8_t siLength, soLength;
+    int32_t length = 0, prevLength;
     uint8_t unicodeMask;
 
     cnv=pArgs->converter;
@@ -3908,6 +4015,10 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
     sourceIndex= c==0 ? 0 : -1;
     nextSourceIndex=0;
 
+    /* Get the SI/SO character for the converter */
+    siLength = getSISOBytes(SI, cnv->options, siBytes);
+    soLength = getSISOBytes(SO, cnv->options, soBytes);
+
     /* conversion loop */
     /*
      * This is another piece of ugly code:
@@ -3997,8 +4108,14 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                             length=1;
                         } else {
                             /* change from double-byte mode to single-byte */
-                            value|=(uint32_t)UCNV_SI<<8;
-                            length=2;
+                            if (siLength == 1) {
+                                value|=(uint32_t)siBytes[0]<<8;
+                                length = 2;
+                            } else if (siLength == 2) {
+                                value|=(uint32_t)siBytes[1]<<8;
+                                value|=(uint32_t)siBytes[0]<<16;
+                                length = 3;
+                            }
                             prevLength=1;
                         }
                     } else {
@@ -4006,8 +4123,14 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                             length=2;
                         } else {
                             /* change from single-byte mode to double-byte */
-                            value|=(uint32_t)UCNV_SO<<16;
-                            length=3;
+                            if (soLength == 1) {
+                                value|=(uint32_t)soBytes[0]<<16;
+                                length = 3;
+                            } else if (soLength == 2) {
+                                value|=(uint32_t)soBytes[1]<<16;
+                                value|=(uint32_t)soBytes[0]<<24;
+                                length = 4;
+                            }
                             prevLength=2;
                         }
                     }
@@ -4114,16 +4237,16 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                  * If it does, then surrogates are not paired but mapped separately.
                  * Note that in this case unmatched surrogates are not detected.
                  */
-                if(UTF_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
-                    if(UTF_IS_SURROGATE_FIRST(c)) {
+                if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
+                    if(U16_IS_SURROGATE_LEAD(c)) {
 getTrail:
                         if(source<sourceLimit) {
                             /* test the following code unit */
                             UChar trail=*source;
-                            if(UTF_IS_SECOND_SURROGATE(trail)) {
+                            if(U16_IS_TRAIL(trail)) {
                                 ++source;
                                 ++nextSourceIndex;
-                                c=UTF16_GET_PAIR_VALUE(c, trail);
+                                c=U16_GET_SUPPLEMENTARY(c, trail);
                                 if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
                                     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
                                     cnv->fromUnicodeStatus=prevLength; /* save the old state */
@@ -4217,8 +4340,14 @@ getTrail:
                             length=1;
                         } else {
                             /* change from double-byte mode to single-byte */
-                            value|=(uint32_t)UCNV_SI<<8;
-                            length=2;
+                            if (siLength == 1) {
+                                value|=(uint32_t)siBytes[0]<<8;
+                                length = 2;
+                            } else if (siLength == 2) {
+                                value|=(uint32_t)siBytes[1]<<8;
+                                value|=(uint32_t)siBytes[0]<<16;
+                                length = 3;
+                            }
                             prevLength=1;
                         }
                     } else {
@@ -4226,8 +4355,14 @@ getTrail:
                             length=2;
                         } else {
                             /* change from single-byte mode to double-byte */
-                            value|=(uint32_t)UCNV_SO<<16;
-                            length=3;
+                            if (soLength == 1) {
+                                value|=(uint32_t)soBytes[0]<<16;
+                                length = 3;
+                            } else if (soLength == 2) {
+                                value|=(uint32_t)soBytes[1]<<16;
+                                value|=(uint32_t)soBytes[0]<<24;
+                                length = 4;
+                            }
                             prevLength=2;
                         }
                     }
@@ -4362,11 +4497,11 @@ unassigned:
                         /* each branch falls through to the next one */
                     case 4:
                         *target++=(uint8_t)(value>>24);
-                    case 3:
+                    case 3: /*fall through*/
                         *target++=(uint8_t)(value>>16);
-                    case 2:
+                    case 2: /*fall through*/
                         *target++=(uint8_t)(value>>8);
-                    case 1:
+                    case 1: /*fall through*/
                         *target++=(uint8_t)value;
                     default:
                         /* will never occur */
@@ -4378,13 +4513,13 @@ unassigned:
                     case 4:
                         *target++=(uint8_t)(value>>24);
                         *offsets++=sourceIndex;
-                    case 3:
+                    case 3: /*fall through*/
                         *target++=(uint8_t)(value>>16);
                         *offsets++=sourceIndex;
-                    case 2:
+                    case 2: /*fall through*/
                         *target++=(uint8_t)(value>>8);
                         *offsets++=sourceIndex;
-                    case 1:
+                    case 1: /*fall through*/
                         *target++=(uint8_t)value;
                         *offsets++=sourceIndex;
                     default:
@@ -4409,9 +4544,9 @@ unassigned:
                     /* each branch falls through to the next one */
                 case 3:
                     *charErrorBuffer++=(uint8_t)(value>>16);
-                case 2:
+                case 2: /*fall through*/
                     *charErrorBuffer++=(uint8_t)(value>>8);
-                case 1:
+                case 1: /*fall through*/
                     *charErrorBuffer=(uint8_t)value;
                 default:
                     /* will never occur */
@@ -4428,12 +4563,12 @@ unassigned:
                     if(offsets!=NULL) {
                         *offsets++=sourceIndex;
                     }
-                case 2:
+                case 2: /*fall through*/
                     *target++=(uint8_t)(value>>8);
                     if(offsets!=NULL) {
                         *offsets++=sourceIndex;
                     }
-                case 1:
+                case 1: /*fall through*/
                     *target++=(uint8_t)value;
                     if(offsets!=NULL) {
                         *offsets++=sourceIndex;
@@ -4480,15 +4615,27 @@ unassigned:
     ) {
         /* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
         if(targetCapacity>0) {
-            *target++=(uint8_t)UCNV_SI;
+            *target++=(uint8_t)siBytes[0];
+            if (siLength == 2) {
+                if (targetCapacity<2) {
+                    cnv->charErrorBuffer[0]=(uint8_t)siBytes[1];
+                    cnv->charErrorBufferLength=1;
+                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                } else {
+                    *target++=(uint8_t)siBytes[1];
+                }
+            }
             if(offsets!=NULL) {
                 /* set the last source character's index (sourceIndex points at sourceLimit now) */
                 *offsets++=prevSourceIndex;
             }
         } else {
             /* target is full */
-            cnv->charErrorBuffer[0]=(char)UCNV_SI;
-            cnv->charErrorBufferLength=1;
+            cnv->charErrorBuffer[0]=(uint8_t)siBytes[0];
+            if (siLength == 2) {
+                cnv->charErrorBuffer[1]=(uint8_t)siBytes[1];
+            }
+            cnv->charErrorBufferLength=siLength;
             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
         }
         prevLength=1; /* we switched into SBCS */
@@ -4784,7 +4931,7 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
             if(U8_IS_TRAIL(b)) {
                 ++i;
             } else {
-                if(i<utf8_countTrailBytes[b]) {
+                if(i<U8_COUNT_TRAIL_BYTES(b)) {
                     /* exit the conversion loop before the lead byte if there are not enough trail bytes for it */
                     sourceLimit-=i+1;
                 }
@@ -4877,11 +5024,18 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
                     /* handle "complicated" and error cases, and continuing partial characters */
                     oldToULength=0;
                     toULength=1;
-                    toULimit=utf8_countTrailBytes[b]+1;
+                    toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
                     c=b;
 moreBytes:
                     while(toULength<toULimit) {
-                        if(source<sourceLimit) {
+                        /*
+                         * The sourceLimit may have been adjusted before the conversion loop
+                         * to stop before a truncated sequence.
+                         * Here we need to use the real limit in case we have two truncated
+                         * sequences at the end.
+                         * See ticket #7492.
+                         */
+                        if(source<(uint8_t *)pToUArgs->sourceLimit) {
                             b=*source;
                             if(U8_IS_TRAIL(b)) {
                                 ++source;
@@ -4969,6 +5123,7 @@ moreBytes:
                      * but then exit the loop because the extension match would
                      * have consumed the source.
                      */
+                    *pErrorCode=U_USING_DEFAULT_WARNING;
                     break;
                 } else {
                     /* a mapping was written to the target, continue */
@@ -4989,10 +5144,12 @@ moreBytes:
      * to stop before a truncated sequence.
      * If so, then collect the truncated sequence now.
      */
-    if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
+    if(U_SUCCESS(*pErrorCode) &&
+            cnv->preFromUFirstCP<0 &&
+            source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
         c=utf8->toUBytes[0]=b=*source++;
         toULength=1;
-        toULimit=utf8_countTrailBytes[b]+1;
+        toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
         while(source<sourceLimit) {
             utf8->toUBytes[toULength++]=b=*source++;
             c=(c<<6)+b;
@@ -5026,7 +5183,7 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
 
     uint32_t stage2Entry;
     uint32_t asciiRoundtrips;
-    uint16_t value, minValue;
+    uint16_t value;
     UBool hasSupplementary;
 
     /* set up the local pointers */
@@ -5046,13 +5203,6 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
     }
     asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
 
-    if(cnv->useFallback) {
-        /* use all roundtrip and fallback results */
-        minValue=0x800;
-    } else {
-        /* use only roundtrips and fallbacks from private-use characters */
-        minValue=0xc00;
-    }
     hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
 
     /* get the converter state from the UTF-8 UConverter */
@@ -5081,7 +5231,7 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
             if(U8_IS_TRAIL(b)) {
                 ++i;
             } else {
-                if(i<utf8_countTrailBytes[b]) {
+                if(i<U8_COUNT_TRAIL_BYTES(b)) {
                     /* exit the conversion loop before the lead byte if there are not enough trail bytes for it */
                     sourceLimit-=i+1;
                 }
@@ -5154,11 +5304,18 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
                     /* handle "complicated" and error cases, and continuing partial characters */
                     oldToULength=0;
                     toULength=1;
-                    toULimit=utf8_countTrailBytes[b]+1;
+                    toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
                     c=b;
 moreBytes:
                     while(toULength<toULimit) {
-                        if(source<sourceLimit) {
+                        /*
+                         * The sourceLimit may have been adjusted before the conversion loop
+                         * to stop before a truncated sequence.
+                         * Here we need to use the real limit in case we have two truncated
+                         * sequences at the end.
+                         * See ticket #7492.
+                         */
+                        if(source<(uint8_t *)pToUArgs->sourceLimit) {
                             b=*source;
                             if(U8_IS_TRAIL(b)) {
                                 ++source;
@@ -5275,6 +5432,7 @@ unassigned:
                      * but then exit the loop because the extension match would
                      * have consumed the source.
                      */
+                    *pErrorCode=U_USING_DEFAULT_WARNING;
                     break;
                 } else {
                     /* a mapping was written to the target, continue */
@@ -5296,10 +5454,12 @@ unassigned:
      * to stop before a truncated sequence.
      * If so, then collect the truncated sequence now.
      */
-    if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
+    if(U_SUCCESS(*pErrorCode) &&
+            cnv->preFromUFirstCP<0 &&
+            source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
         c=utf8->toUBytes[0]=b=*source++;
         toULength=1;
-        toULimit=utf8_countTrailBytes[b]+1;
+        toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
         while(source<sourceLimit) {
             utf8->toUBytes[toULength++]=b=*source++;
             c=(c<<6)+b;