ICU-531.48.tar.gz

[apple/icu.git] / icuSources / common / ucnvmbcs.c
diff --git a/icuSources/common/ucnvmbcs.c b/icuSources/common/ucnvmbcs.c

index 9b55c17d05da023be765e670f1a9ded5fffcde97..143daf69af7a1ae9557408edc556eae83fee3462 100644 (file)
--- a/icuSources/common/ucnvmbcs.c
+++ b/icuSources/common/ucnvmbcs.c
@@ -1,7 +1,7 @@
  /*
  ******************************************************************************
  *
-*   Copyright (C) 2000-2008, International Business Machines
+*   Copyright (C) 2000-2013, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  ******************************************************************************
@@ -48,13 +48,15 @@
  #include "unicode/ucnv_cb.h"
  #include "unicode/udata.h"
  #include "unicode/uset.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
  #include "ucnv_bld.h"
  #include "ucnvmbcs.h"
  #include "ucnv_ext.h"
  #include "ucnv_cnv.h"
-#include "umutex.h"
  #include "cmemory.h"
  #include "cstring.h"
+#include "cmutex.h"
  
  /* control optimizations according to the platform */
  #define MBCS_UNROLL_SINGLE_TO_BMP 1
@@ -379,10 +381,11 @@ static const UConverterImpl _DBCSUTF8Impl;
   * as of the re-released mapping tables from 2000-nov-30.
   */
  static const uint32_t
-gb18030Ranges[13][4]={
+gb18030Ranges[14][4]={
      {0x10000, 0x10FFFF, LINEAR(0x90308130), LINEAR(0xE3329A35)},
      {0x9FA6, 0xD7FF, LINEAR(0x82358F33), LINEAR(0x8336C738)},
-    {0x0452, 0x200F, LINEAR(0x8130D330), LINEAR(0x8136A531)},
+    {0x0452, 0x1E3E, LINEAR(0x8130D330), LINEAR(0x8135F436)},
+    {0x1E40, 0x200F, LINEAR(0x8135F438), LINEAR(0x8136A531)},
      {0xE865, 0xF92B, LINEAR(0x8336D030), LINEAR(0x84308534)},
      {0x2643, 0x2E80, LINEAR(0x8137A839), LINEAR(0x8138FD38)},
      {0xFA2A, 0xFE2F, LINEAR(0x84309C38), LINEAR(0x84318537)},
@@ -398,6 +401,76 @@ gb18030Ranges[13][4]={
  /* bit flag for UConverter.options indicating GB 18030 special handling */
  #define _MBCS_OPTION_GB18030 0x8000
  
+/* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */
+#define _MBCS_OPTION_KEIS 0x01000
+#define _MBCS_OPTION_JEF  0x02000
+#define _MBCS_OPTION_JIPS 0x04000
+
+#define KEIS_SO_CHAR_1 0x0A
+#define KEIS_SO_CHAR_2 0x42
+#define KEIS_SI_CHAR_1 0x0A
+#define KEIS_SI_CHAR_2 0x41
+
+#define JEF_SO_CHAR 0x28
+#define JEF_SI_CHAR 0x29
+
+#define JIPS_SO_CHAR_1 0x1A
+#define JIPS_SO_CHAR_2 0x70
+#define JIPS_SI_CHAR_1 0x1A
+#define JIPS_SI_CHAR_2 0x71
+
+enum SISO_Option {
+    SI,
+    SO
+};
+typedef enum SISO_Option SISO_Option;
+
+static int32_t getSISOBytes(SISO_Option option, uint32_t cnvOption, uint8_t *value) {
+    int32_t SISOLength = 0;
+
+    switch (option) {
+        case SI:
+            if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
+                value[0] = KEIS_SI_CHAR_1;
+                value[1] = KEIS_SI_CHAR_2;
+                SISOLength = 2;
+            } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
+                value[0] = JEF_SI_CHAR;
+                SISOLength = 1;
+            } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
+                value[0] = JIPS_SI_CHAR_1;
+                value[1] = JIPS_SI_CHAR_2;
+                SISOLength = 2;
+            } else {
+                value[0] = UCNV_SI;
+                SISOLength = 1;
+            }
+            break;
+        case SO:
+            if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
+                value[0] = KEIS_SO_CHAR_1;
+                value[1] = KEIS_SO_CHAR_2;
+                SISOLength = 2;
+            } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
+                value[0] = JEF_SO_CHAR;
+                SISOLength = 1;
+            } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
+                value[0] = JIPS_SO_CHAR_1;
+                value[1] = JIPS_SO_CHAR_2;
+                SISOLength = 2;
+            } else {
+                value[0] = UCNV_SO;
+                SISOLength = 1;
+            }
+            break;
+        default:
+            /* Should never happen. */
+            break;
+    }
+
+    return SISOLength;
+}
+
  /* Miscellaneous ------------------------------------------------------------ */
  
  /**
@@ -753,9 +826,9 @@ ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData,
                                      switch(st3Multiplier) {
                                      case 4:
                                          b|=*stage3++;
-                                    case 3:
+                                    case 3: /*fall through*/
                                          b|=*stage3++;
-                                    case 2:
+                                    case 2: /*fall through*/
                                          b|=stage3[0]|stage3[1];
                                          stage3+=2;
                                      default:
@@ -1270,7 +1343,6 @@ reconstituteData(UConverterMBCSTable *mbcsTable,
                   UErrorCode *pErrorCode) {
      uint16_t *stage1;
      uint32_t *stage2;
-    uint8_t *bytes;
      uint32_t dataLength=stage1Length*2+fullStage2Length*4+mbcsTable->fromUBytesLength;
      mbcsTable->reconstitutedData=(uint8_t *)uprv_malloc(dataLength);
      if(mbcsTable->reconstitutedData==NULL) {
@@ -1289,7 +1361,7 @@ reconstituteData(UConverterMBCSTable *mbcsTable,
                  stage2Length*4);
  
      mbcsTable->fromUnicodeTable=stage1;
-    mbcsTable->fromUnicodeBytes=bytes=(uint8_t *)(stage2+fullStage2Length);
+    mbcsTable->fromUnicodeBytes=(uint8_t *)(stage2+fullStage2Length);
  
      /* indexes into stage 2 count from the bottom of the fromUnicodeTable */
      stage2=(uint32_t *)stage1;
@@ -1400,6 +1472,7 @@ ucnv_MBCSLoad(UConverterSharedData *sharedData,
          /* TODO parse package name out of the prefix of the base name in the extension .cnv file? */
          args.size=sizeof(UConverterLoadArgs);
          args.nestedLoads=2;
+        args.onlyTestIsLoadable=pArgs->onlyTestIsLoadable;
          args.reserved=pArgs->reserved;
          args.options=pArgs->options;
          args.pkg=pArgs->pkg;
@@ -1415,6 +1488,16 @@ ucnv_MBCSLoad(UConverterSharedData *sharedData,
              *pErrorCode=U_INVALID_TABLE_FORMAT;
              return;
          }
+        if(pArgs->onlyTestIsLoadable) {
+            /*
+             * Exit as soon as we know that we can load the converter
+             * and the format is valid and supported.
+             * The worst that can happen in the following code is a memory
+             * allocation error.
+             */
+            ucnv_unload(baseSharedData);
+            return;
+        }
  
          /* copy the base table data */
          uprv_memcpy(mbcsTable, &baseSharedData->mbcs, sizeof(UConverterMBCSTable));
@@ -1529,6 +1612,15 @@ ucnv_MBCSLoad(UConverterSharedData *sharedData,
              *pErrorCode=U_INVALID_TABLE_FORMAT;
              return;
          }
+        if(pArgs->onlyTestIsLoadable) {
+            /*
+             * Exit as soon as we know that we can load the converter
+             * and the format is valid and supported.
+             * The worst that can happen in the following code is a memory
+             * allocation error.
+             */
+            return;
+        }
  
          mbcsTable->countStates=(uint8_t)header->countStates;
          mbcsTable->countToUFallbacks=header->countToUFallbacks;
@@ -1660,24 +1752,26 @@ ucnv_MBCSUnload(UConverterSharedData *sharedData) {
  
  static void
  ucnv_MBCSOpen(UConverter *cnv,
-          const char *name,
-          const char *locale,
-          uint32_t options,
-          UErrorCode *pErrorCode) {
+              UConverterLoadArgs *pArgs,
+              UErrorCode *pErrorCode) {
      UConverterMBCSTable *mbcsTable;
      const int32_t *extIndexes;
      uint8_t outputType;
      int8_t maxBytesPerUChar;
  
+    if(pArgs->onlyTestIsLoadable) {
+        return;
+    }
+
      mbcsTable=&cnv->sharedData->mbcs;
      outputType=mbcsTable->outputType;
  
      if(outputType==MBCS_OUTPUT_DBCS_ONLY) {
          /* the swaplfnl option does not apply, remove it */
-        cnv->options=options&=~UCNV_OPTION_SWAP_LFNL;
+        cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL;
      }
  
-    if((options&UCNV_OPTION_SWAP_LFNL)!=0) {
+    if((pArgs->options&UCNV_OPTION_SWAP_LFNL)!=0) {
          /* do this because double-checked locking is broken */
          UBool isCached;
  
@@ -1692,16 +1786,25 @@ ucnv_MBCSOpen(UConverter *cnv,
                  }
  
                  /* the option does not apply, remove it */
-                cnv->options=options&=~UCNV_OPTION_SWAP_LFNL;
+                cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL;
              }
          }
      }
  
-    if(uprv_strstr(name, "18030")!=NULL) {
-        if(uprv_strstr(name, "gb18030")!=NULL || uprv_strstr(name, "GB18030")!=NULL) {
+    if(uprv_strstr(pArgs->name, "18030")!=NULL) {
+        if(uprv_strstr(pArgs->name, "gb18030")!=NULL || uprv_strstr(pArgs->name, "GB18030")!=NULL) {
              /* set a flag for GB 18030 mode, which changes the callback behavior */
              cnv->options|=_MBCS_OPTION_GB18030;
          }
+    } else if((uprv_strstr(pArgs->name, "KEIS")!=NULL) || (uprv_strstr(pArgs->name, "keis")!=NULL)) {
+        /* set a flag for KEIS converter, which changes the SI/SO character sequence */
+        cnv->options|=_MBCS_OPTION_KEIS;
+    } else if((uprv_strstr(pArgs->name, "JEF")!=NULL) || (uprv_strstr(pArgs->name, "jef")!=NULL)) {
+        /* set a flag for JEF converter, which changes the SI/SO character sequence */
+        cnv->options|=_MBCS_OPTION_JEF;
+    } else if((uprv_strstr(pArgs->name, "JIPS")!=NULL) || (uprv_strstr(pArgs->name, "jips")!=NULL)) {
+        /* set a flag for JIPS converter, which changes the SI/SO character sequence */
+        cnv->options|=_MBCS_OPTION_JIPS;
      }
  
      /* fix maxBytesPerUChar depending on outputType and options etc. */
@@ -2052,7 +2155,7 @@ unrolled:
  #endif
  
      /* conversion loop */
-    while(targetCapacity>0) {
+    while(targetCapacity > 0 && source < sourceLimit) {
          entry=stateTable[0][*source++];
          /* MBCS_ENTRY_IS_FINAL(entry) */
  
@@ -3250,16 +3353,16 @@ ucnv_MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                   * If it does, then surrogates are not paired but mapped separately.
                   * Note that in this case unmatched surrogates are not detected.
                   */
-                if(UTF_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
-                    if(UTF_IS_SURROGATE_FIRST(c)) {
+                if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
+                    if(U16_IS_SURROGATE_LEAD(c)) {
  getTrail:
                          if(source<sourceLimit) {
                              /* test the following code unit */
                              UChar trail=*source;
-                            if(UTF_IS_SECOND_SURROGATE(trail)) {
+                            if(U16_IS_TRAIL(trail)) {
                                  ++source;
                                  ++nextSourceIndex;
-                                c=UTF16_GET_PAIR_VALUE(c, trail);
+                                c=U16_GET_SUPPLEMENTARY(c, trail);
                                  if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
                                      /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
                                      /* callback(unassigned) */
@@ -3455,16 +3558,16 @@ ucnv_MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
               */
              c=*source++;
              ++nextSourceIndex;
-            if(UTF_IS_SURROGATE(c)) {
-                if(UTF_IS_SURROGATE_FIRST(c)) {
+            if(U16_IS_SURROGATE(c)) {
+                if(U16_IS_SURROGATE_LEAD(c)) {
  getTrail:
                      if(source<sourceLimit) {
                          /* test the following code unit */
                          UChar trail=*source;
-                        if(UTF_IS_SECOND_SURROGATE(trail)) {
+                        if(U16_IS_TRAIL(trail)) {
                              ++source;
                              ++nextSourceIndex;
-                            c=UTF16_GET_PAIR_VALUE(c, trail);
+                            c=U16_GET_SUPPLEMENTARY(c, trail);
                              if(!hasSupplementary) {
                                  /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
                                  /* callback(unassigned) */
@@ -3703,16 +3806,16 @@ unrolled:
              /* normal end of conversion: prepare for a new character */
              c=0;
              continue;
-        } else if(!UTF_IS_SURROGATE(c)) {
+        } else if(!U16_IS_SURROGATE(c)) {
              /* normal, unassigned BMP character */
-        } else if(UTF_IS_SURROGATE_FIRST(c)) {
+        } else if(U16_IS_SURROGATE_LEAD(c)) {
  getTrail:
              if(source<sourceLimit) {
                  /* test the following code unit */
                  UChar trail=*source;
-                if(UTF_IS_SECOND_SURROGATE(trail)) {
+                if(U16_IS_TRAIL(trail)) {
                      ++source;
-                    c=UTF16_GET_PAIR_VALUE(c, trail);
+                    c=U16_GET_SUPPLEMENTARY(c, trail);
                      /* this codepage does not map supplementary code points */
                      /* callback(unassigned) */
                  } else {
@@ -3837,7 +3940,11 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
      uint32_t stage2Entry;
      uint32_t asciiRoundtrips;
      uint32_t value;
-    int32_t length, prevLength;
+    /* Shift-In and Shift-Out byte sequences differ by encoding scheme. */
+    uint8_t siBytes[2] = {0, 0};
+    uint8_t soBytes[2] = {0, 0};
+    uint8_t siLength, soLength;
+    int32_t length = 0, prevLength;
      uint8_t unicodeMask;
  
      cnv=pArgs->converter;
@@ -3908,6 +4015,10 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
      sourceIndex= c==0 ? 0 : -1;
      nextSourceIndex=0;
  
+    /* Get the SI/SO character for the converter */
+    siLength = getSISOBytes(SI, cnv->options, siBytes);
+    soLength = getSISOBytes(SO, cnv->options, soBytes);
+
      /* conversion loop */
      /*
       * This is another piece of ugly code:
@@ -3997,8 +4108,14 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                              length=1;
                          } else {
                              /* change from double-byte mode to single-byte */
-                            value|=(uint32_t)UCNV_SI<<8;
-                            length=2;
+                            if (siLength == 1) {
+                                value|=(uint32_t)siBytes[0]<<8;
+                                length = 2;
+                            } else if (siLength == 2) {
+                                value|=(uint32_t)siBytes[1]<<8;
+                                value|=(uint32_t)siBytes[0]<<16;
+                                length = 3;
+                            }
                              prevLength=1;
                          }
                      } else {
@@ -4006,8 +4123,14 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                              length=2;
                          } else {
                              /* change from single-byte mode to double-byte */
-                            value|=(uint32_t)UCNV_SO<<16;
-                            length=3;
+                            if (soLength == 1) {
+                                value|=(uint32_t)soBytes[0]<<16;
+                                length = 3;
+                            } else if (soLength == 2) {
+                                value|=(uint32_t)soBytes[1]<<16;
+                                value|=(uint32_t)soBytes[0]<<24;
+                                length = 4;
+                            }
                              prevLength=2;
                          }
                      }
@@ -4114,16 +4237,16 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                   * If it does, then surrogates are not paired but mapped separately.
                   * Note that in this case unmatched surrogates are not detected.
                   */
-                if(UTF_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
-                    if(UTF_IS_SURROGATE_FIRST(c)) {
+                if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
+                    if(U16_IS_SURROGATE_LEAD(c)) {
  getTrail:
                          if(source<sourceLimit) {
                              /* test the following code unit */
                              UChar trail=*source;
-                            if(UTF_IS_SECOND_SURROGATE(trail)) {
+                            if(U16_IS_TRAIL(trail)) {
                                  ++source;
                                  ++nextSourceIndex;
-                                c=UTF16_GET_PAIR_VALUE(c, trail);
+                                c=U16_GET_SUPPLEMENTARY(c, trail);
                                  if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
                                      /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
                                      cnv->fromUnicodeStatus=prevLength; /* save the old state */
@@ -4217,8 +4340,14 @@ getTrail:
                              length=1;
                          } else {
                              /* change from double-byte mode to single-byte */
-                            value|=(uint32_t)UCNV_SI<<8;
-                            length=2;
+                            if (siLength == 1) {
+                                value|=(uint32_t)siBytes[0]<<8;
+                                length = 2;
+                            } else if (siLength == 2) {
+                                value|=(uint32_t)siBytes[1]<<8;
+                                value|=(uint32_t)siBytes[0]<<16;
+                                length = 3;
+                            }
                              prevLength=1;
                          }
                      } else {
@@ -4226,8 +4355,14 @@ getTrail:
                              length=2;
                          } else {
                              /* change from single-byte mode to double-byte */
-                            value|=(uint32_t)UCNV_SO<<16;
-                            length=3;
+                            if (soLength == 1) {
+                                value|=(uint32_t)soBytes[0]<<16;
+                                length = 3;
+                            } else if (soLength == 2) {
+                                value|=(uint32_t)soBytes[1]<<16;
+                                value|=(uint32_t)soBytes[0]<<24;
+                                length = 4;
+                            }
                              prevLength=2;
                          }
                      }
@@ -4362,11 +4497,11 @@ unassigned:
                          /* each branch falls through to the next one */
                      case 4:
                          *target++=(uint8_t)(value>>24);
-                    case 3:
+                    case 3: /*fall through*/
                          *target++=(uint8_t)(value>>16);
-                    case 2:
+                    case 2: /*fall through*/
                          *target++=(uint8_t)(value>>8);
-                    case 1:
+                    case 1: /*fall through*/
                          *target++=(uint8_t)value;
                      default:
                          /* will never occur */
@@ -4378,13 +4513,13 @@ unassigned:
                      case 4:
                          *target++=(uint8_t)(value>>24);
                          *offsets++=sourceIndex;
-                    case 3:
+                    case 3: /*fall through*/
                          *target++=(uint8_t)(value>>16);
                          *offsets++=sourceIndex;
-                    case 2:
+                    case 2: /*fall through*/
                          *target++=(uint8_t)(value>>8);
                          *offsets++=sourceIndex;
-                    case 1:
+                    case 1: /*fall through*/
                          *target++=(uint8_t)value;
                          *offsets++=sourceIndex;
                      default:
@@ -4409,9 +4544,9 @@ unassigned:
                      /* each branch falls through to the next one */
                  case 3:
                      *charErrorBuffer++=(uint8_t)(value>>16);
-                case 2:
+                case 2: /*fall through*/
                      *charErrorBuffer++=(uint8_t)(value>>8);
-                case 1:
+                case 1: /*fall through*/
                      *charErrorBuffer=(uint8_t)value;
                  default:
                      /* will never occur */
@@ -4428,12 +4563,12 @@ unassigned:
                      if(offsets!=NULL) {
                          *offsets++=sourceIndex;
                      }
-                case 2:
+                case 2: /*fall through*/
                      *target++=(uint8_t)(value>>8);
                      if(offsets!=NULL) {
                          *offsets++=sourceIndex;
                      }
-                case 1:
+                case 1: /*fall through*/
                      *target++=(uint8_t)value;
                      if(offsets!=NULL) {
                          *offsets++=sourceIndex;
@@ -4480,15 +4615,27 @@ unassigned:
      ) {
          /* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
          if(targetCapacity>0) {
-            *target++=(uint8_t)UCNV_SI;
+            *target++=(uint8_t)siBytes[0];
+            if (siLength == 2) {
+                if (targetCapacity<2) {
+                    cnv->charErrorBuffer[0]=(uint8_t)siBytes[1];
+                    cnv->charErrorBufferLength=1;
+                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                } else {
+                    *target++=(uint8_t)siBytes[1];
+                }
+            }
              if(offsets!=NULL) {
                  /* set the last source character's index (sourceIndex points at sourceLimit now) */
                  *offsets++=prevSourceIndex;
              }
          } else {
              /* target is full */
-            cnv->charErrorBuffer[0]=(char)UCNV_SI;
-            cnv->charErrorBufferLength=1;
+            cnv->charErrorBuffer[0]=(uint8_t)siBytes[0];
+            if (siLength == 2) {
+                cnv->charErrorBuffer[1]=(uint8_t)siBytes[1];
+            }
+            cnv->charErrorBufferLength=siLength;
              *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
          }
          prevLength=1; /* we switched into SBCS */
@@ -4784,7 +4931,7 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
              if(U8_IS_TRAIL(b)) {
                  ++i;
              } else {
-                if(i<utf8_countTrailBytes[b]) {
+                if(i<U8_COUNT_TRAIL_BYTES(b)) {
                      /* exit the conversion loop before the lead byte if there are not enough trail bytes for it */
                      sourceLimit-=i+1;
                  }
@@ -4877,11 +5024,18 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
                      /* handle "complicated" and error cases, and continuing partial characters */
                      oldToULength=0;
                      toULength=1;
-                    toULimit=utf8_countTrailBytes[b]+1;
+                    toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
                      c=b;
  moreBytes:
                      while(toULength<toULimit) {
-                        if(source<sourceLimit) {
+                        /*
+                         * The sourceLimit may have been adjusted before the conversion loop
+                         * to stop before a truncated sequence.
+                         * Here we need to use the real limit in case we have two truncated
+                         * sequences at the end.
+                         * See ticket #7492.
+                         */
+                        if(source<(uint8_t *)pToUArgs->sourceLimit) {
                              b=*source;
                              if(U8_IS_TRAIL(b)) {
                                  ++source;
@@ -4969,6 +5123,7 @@ moreBytes:
                       * but then exit the loop because the extension match would
                       * have consumed the source.
                       */
+                    *pErrorCode=U_USING_DEFAULT_WARNING;
                      break;
                  } else {
                      /* a mapping was written to the target, continue */
@@ -4989,10 +5144,12 @@ moreBytes:
       * to stop before a truncated sequence.
       * If so, then collect the truncated sequence now.
       */
-    if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
+    if(U_SUCCESS(*pErrorCode) &&
+            cnv->preFromUFirstCP<0 &&
+            source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
          c=utf8->toUBytes[0]=b=*source++;
          toULength=1;
-        toULimit=utf8_countTrailBytes[b]+1;
+        toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
          while(source<sourceLimit) {
              utf8->toUBytes[toULength++]=b=*source++;
              c=(c<<6)+b;
@@ -5026,7 +5183,7 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
  
      uint32_t stage2Entry;
      uint32_t asciiRoundtrips;
-    uint16_t value, minValue;
+    uint16_t value;
      UBool hasSupplementary;
  
      /* set up the local pointers */
@@ -5046,13 +5203,6 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
      }
      asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
  
-    if(cnv->useFallback) {
-        /* use all roundtrip and fallback results */
-        minValue=0x800;
-    } else {
-        /* use only roundtrips and fallbacks from private-use characters */
-        minValue=0xc00;
-    }
      hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
  
      /* get the converter state from the UTF-8 UConverter */
@@ -5081,7 +5231,7 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
              if(U8_IS_TRAIL(b)) {
                  ++i;
              } else {
-                if(i<utf8_countTrailBytes[b]) {
+                if(i<U8_COUNT_TRAIL_BYTES(b)) {
                      /* exit the conversion loop before the lead byte if there are not enough trail bytes for it */
                      sourceLimit-=i+1;
                  }
@@ -5154,11 +5304,18 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
                      /* handle "complicated" and error cases, and continuing partial characters */
                      oldToULength=0;
                      toULength=1;
-                    toULimit=utf8_countTrailBytes[b]+1;
+                    toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
                      c=b;
  moreBytes:
                      while(toULength<toULimit) {
-                        if(source<sourceLimit) {
+                        /*
+                         * The sourceLimit may have been adjusted before the conversion loop
+                         * to stop before a truncated sequence.
+                         * Here we need to use the real limit in case we have two truncated
+                         * sequences at the end.
+                         * See ticket #7492.
+                         */
+                        if(source<(uint8_t *)pToUArgs->sourceLimit) {
                              b=*source;
                              if(U8_IS_TRAIL(b)) {
                                  ++source;
@@ -5275,6 +5432,7 @@ unassigned:
                       * but then exit the loop because the extension match would
                       * have consumed the source.
                       */
+                    *pErrorCode=U_USING_DEFAULT_WARNING;
                      break;
                  } else {
                      /* a mapping was written to the target, continue */
@@ -5296,10 +5454,12 @@ unassigned:
       * to stop before a truncated sequence.
       * If so, then collect the truncated sequence now.
       */
-    if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
+    if(U_SUCCESS(*pErrorCode) &&
+            cnv->preFromUFirstCP<0 &&
+            source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
          c=utf8->toUBytes[0]=b=*source++;
          toULength=1;
-        toULimit=utf8_countTrailBytes[b]+1;
+        toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
          while(source<sourceLimit) {
              utf8->toUBytes[toULength++]=b=*source++;
              c=(c<<6)+b;