ICU-8.11.1.tar.gz

author Apple <opensource@apple.com>

Fri, 7 Mar 2008 01:20:00 +0000 (01:20 +0000)

committer Apple <opensource@apple.com>

Fri, 7 Mar 2008 01:20:00 +0000 (01:20 +0000)
author Apple <opensource@apple.com>
Fri, 7 Mar 2008 01:20:00 +0000 (01:20 +0000)
committer Apple <opensource@apple.com>
Fri, 7 Mar 2008 01:20:00 +0000 (01:20 +0000)
diff --git a/icuSources/common/ucnv.c b/icuSources/common/ucnv.c

index ff6eebafd1a0c41d4afc4f4803a3d4153c716deb..f764f361a1fc27fe53128504cc6b32a76a2eae7e 100644 (file)
--- a/icuSources/common/ucnv.c
+++ b/icuSources/common/ucnv.c
@@ -1,7 +1,7 @@
  /*
  ******************************************************************************
  *
-*   Copyright (C) 1998-2006, International Business Machines
+*   Copyright (C) 1998-2006,2008 International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  ******************************************************************************
@@ -1429,7 +1429,8 @@ _toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
                       e!=U_ILLEGAL_CHAR_FOUND &&
                       e!=U_TRUNCATED_CHAR_FOUND &&
                       e!=U_ILLEGAL_ESCAPE_SEQUENCE &&
-                     e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)
+                     e!=U_UNSUPPORTED_ESCAPE_SEQUENCE &&
+                     e!=U_PARSE_ERROR) /* temporary err to flag empty segment, will be reset to U_ILLEGAL_ESCAPE_SEQUENCE below */
                  ) {
                      /*
                       * the callback did not or cannot resolve the error:
@@ -1473,11 +1474,18 @@ _toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
              cnv->toULength=0;
  
              /* call the callback function */
-            cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
-                cnv->invalidCharBuffer, errorInputLength,
-                (*err==U_INVALID_CHAR_FOUND || *err==U_UNSUPPORTED_ESCAPE_SEQUENCE) ?
-                    UCNV_UNASSIGNED : UCNV_ILLEGAL,
-                err);
+            {
+                UConverterCallbackReason reason;
+                if (*err == U_PARSE_ERROR) {   /* Here U_PARSE_ERROR indicates empty segment */
+                    *err = U_ILLEGAL_ESCAPE_SEQUENCE;
+                    reason = UCNV_IRREGULAR;
+                } else {
+                       reason = (*err==U_INVALID_CHAR_FOUND || *err==U_UNSUPPORTED_ESCAPE_SEQUENCE) ?
+                                 UCNV_UNASSIGNED : UCNV_ILLEGAL;
+                }
+                cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
+                    cnv->invalidCharBuffer, errorInputLength, reason, err);
+            }
  
              /*
               * loop back to the offset handling
diff --git a/icuSources/common/ucnv2022.c b/icuSources/common/ucnv2022.c

index 9dc1c2838dd15085b4c6c5c4e2a45214fe68750a..cd83c69c88648255561d5ca5d6dcc2aff5cb7aa2 100644 (file)
--- a/icuSources/common/ucnv2022.c
+++ b/icuSources/common/ucnv2022.c
@@ -1,6 +1,6 @@
  /*
  **********************************************************************
-*   Copyright (C) 2000-2006, International Business Machines
+*   Copyright (C) 2000-2006,2008 International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  *   file name:  ucnv2022.c
@@ -181,6 +181,7 @@ typedef struct{
  #ifdef U_ENABLE_GENERIC_ISO_2022
      UBool isFirstBuffer;
  #endif
+    UBool isEmptySegment;
      char name[30];
      char locale[3];
  }UConverterDataISO2022;
@@ -590,6 +591,7 @@ _ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {
      if(choice<=UCNV_RESET_TO_UNICODE) {
          uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State));
          myConverterData->key = 0;
+        myConverterData->isEmptySegment = FALSE;
      }
      if(choice!=UCNV_RESET_TO_UNICODE) {
          uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State));
@@ -1705,6 +1707,7 @@ UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
                      continue;
                  } else {
                      /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
+                    myData->isEmptySegment = FALSE;    /* reset this, we have a different error */
                      break;
                  }
  
@@ -1716,21 +1719,38 @@ UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
                      continue;
                  } else {
                      /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
+                    myData->isEmptySegment = FALSE;    /* reset this, we have a different error */
                      break;
                  }
  
              case ESC_2022:
                  mySource--;
  escape:
-                changeState_2022(args->converter,&(mySource), 
-                    mySourceLimit, ISO_2022_JP,err);
+                {
+                    const char * mySourceBefore = mySource;
+                    int8_t toULengthBefore = args->converter->toULength;
+
+                    changeState_2022(args->converter,&(mySource), 
+                        mySourceLimit, ISO_2022_JP,err);
+
+                    /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
+                    if ( myData->version == 0 && myData->key == 0 && U_SUCCESS(*err) && myData->isEmptySegment ) {
+                        *err = U_PARSE_ERROR;  /* temporary err to flag empty segment, will be reset to U_ILLEGAL_ESCAPE_SEQUENCE in _toUnicodeWithCallback */
+                        args->converter->toULength = toULengthBefore + (mySource - mySourceBefore);
+                    }
  
+                }
                  /* invalid or illegal escape sequence */
                  if(U_FAILURE(*err)){
                      args->target = myTarget;
                      args->source = mySource;
+                    myData->isEmptySegment = FALSE;    /* Reset to avoid future spurious errors */
                      return;
                  }
+                /* If we successfully completed an escape sequence, we begin a new segment, empty so far */
+                if (myData->key == 0) {
+                    myData->isEmptySegment = TRUE;
+                }
                  continue;
  
              /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
@@ -1747,6 +1767,7 @@ escape:
                  /* falls through */
              default:
                  /* convert one or two bytes */
+                myData->isEmptySegment = FALSE;
                  cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
                  if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 &&
                      !IS_JP_DBCS(cs)
@@ -2240,15 +2261,26 @@ UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
  
              if(mySourceChar==UCNV_SI){
                  myData->toU2022State.g = 0;
+                if (myData->isEmptySegment) {
+                    myData->isEmptySegment = FALSE;    /* we are handling it, reset to avoid future spurious errors */
+                    *err = U_PARSE_ERROR;      /* temporary err to flag empty segment, will be reset to U_ILLEGAL_ESCAPE_SEQUENCE in _toUnicodeWithCallback */
+                    args->converter->toUBytes[0] = mySourceChar;
+                    args->converter->toULength = 1;
+                    args->target = myTarget;
+                    args->source = mySource;
+                    return;
+                }
                  /*consume the source */
                  continue;
              }else if(mySourceChar==UCNV_SO){
                  myData->toU2022State.g = 1;
+                myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */
                  /*consume the source */
                  continue;
              }else if(mySourceChar==ESC_2022){
                  mySource--;
  escape:
+                myData->isEmptySegment = FALSE;        /* Any invalid ESC sequences will be detected separately, so just reset this */
                  changeState_2022(args->converter,&(mySource), 
                                  mySourceLimit, ISO_2022_KR, err);
                  if(U_FAILURE(*err)){
@@ -2259,6 +2291,7 @@ escape:
                  continue;
              }   
  
+            myData->isEmptySegment = FALSE;    /* Any invalid char errors will be detected separately, so just reset this */
              if(myData->toU2022State.g == 1) {
                  if(mySource < mySourceLimit) {
                      char trailByte;
@@ -2759,27 +2792,50 @@ UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
              switch(mySourceChar){
              case UCNV_SI:
                  pToU2022State->g=0;
+                if (myData->isEmptySegment) {
+                    myData->isEmptySegment = FALSE;    /* we are handling it, reset to avoid future spurious errors */
+                    *err = U_PARSE_ERROR;      /* temporary err to flag empty segment, will be reset to U_ILLEGAL_ESCAPE_SEQUENCE in _toUnicodeWithCallback */
+                    args->converter->toUBytes[0] = mySourceChar;
+                    args->converter->toULength = 1;
+                    args->target = myTarget;
+                    args->source = mySource;
+                    return;
+                }
                  continue;
  
              case UCNV_SO:
                  if(pToU2022State->cs[1] != 0) {
                      pToU2022State->g=1;
+                    myData->isEmptySegment = TRUE;     /* Begin a new segment, empty so far */
                      continue;
                  } else {
                      /* illegal to have SO before a matching designator */
+                    myData->isEmptySegment = FALSE;    /* Handling a different error, reset this to avoid future spurious errs */
                      break;
                  }
  
              case ESC_2022:
                  mySource--;
  escape:
-                changeState_2022(args->converter,&(mySource), 
-                    mySourceLimit, ISO_2022_CN,err);
+                {
+                    const char * mySourceBefore = mySource;
+                    int8_t toULengthBefore = args->converter->toULength;
+
+                    changeState_2022(args->converter,&(mySource), 
+                        mySourceLimit, ISO_2022_CN,err);
+
+                    /* After SO there must be at least one character before a designator (designator error handled separately) */
+                    if ( myData->key == 0 && U_SUCCESS(*err) && myData->isEmptySegment ) {
+                        *err = U_PARSE_ERROR;  /* temporary err to flag empty segment, will be reset to U_ILLEGAL_ESCAPE_SEQUENCE in _toUnicodeWithCallback */
+                        args->converter->toULength = toULengthBefore + (mySource - mySourceBefore);
+                    }
+                }
  
                  /* invalid or illegal escape sequence */
                  if(U_FAILURE(*err)){
                      args->target = myTarget;
                      args->source = mySource;
+                    myData->isEmptySegment = FALSE;    /* Reset to avoid future spurious errors */
                      return;
                  }
                  continue;
@@ -2793,6 +2849,7 @@ escape:
                  /* falls through */
              default:
                  /* convert one or two bytes */
+                myData->isEmptySegment = FALSE;
                  if(pToU2022State->g != 0) {
                      if(mySource < mySourceLimit) {
                          UConverterSharedData *cnv;
diff --git a/icuSources/common/ucnvhz.c b/icuSources/common/ucnvhz.c

index b94811b81222c618dc8fc44ad5d923b035817426..c3f63fca1081a4f64d632f400b5d81cc9b016819 100644 (file)
--- a/icuSources/common/ucnvhz.c
+++ b/icuSources/common/ucnvhz.c
@@ -1,6 +1,6 @@
  /*  
  **********************************************************************
-*   Copyright (C) 2000-2006, International Business Machines
+*   Copyright (C) 2000-2006, 2008 International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  *   file name:  ucnvhz.c
@@ -59,6 +59,7 @@ typedef struct{
      UBool isEscapeAppended;
      UBool isStateDBCS;
      UBool isTargetUCharDBCS;
+    UBool isEmptySegment;
  }UConverterDataHZ;
  
  
@@ -98,6 +99,7 @@ _HZReset(UConverter *cnv, UConverterResetChoice choice){
          cnv->mode=0;
          if(cnv->extraInfo != NULL){
              ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE;
+            ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE;
          }
      }
      if(choice!=UCNV_RESET_TO_UNICODE) {
@@ -163,12 +165,14 @@ UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
                          
                      }
                      *(myTarget++)=(UChar)mySourceChar;
+                    myData->isEmptySegment = FALSE;
                      continue;
              
                  case UCNV_TILDE:
                      if(args->converter->mode ==UCNV_TILDE){
                          *(myTarget++)=(UChar)mySourceChar;
                          args->converter->mode=0;
+                        myData->isEmptySegment = FALSE;
                          continue;
                          
                      }
@@ -186,6 +190,7 @@ UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
                      if(args->converter->mode == UCNV_TILDE){
                          args->converter->mode=0;
                          myData->isStateDBCS = TRUE;
+                        myData->isEmptySegment = TRUE;
                          continue;
                      }
                      else{
@@ -197,6 +202,15 @@ UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
                      if(args->converter->mode == UCNV_TILDE){
                          args->converter->mode=0;
                           myData->isStateDBCS = FALSE;
+                         if (myData->isEmptySegment) {
+                            myData->isEmptySegment = FALSE;    /* we are handling it, reset to avoid future spurious errors */
+                            *err = U_PARSE_ERROR;      /* temporary err to flag empty segment, will be reset to U_ILLEGAL_ESCAPE_SEQUENCE in _toUnicodeWithCallback */
+                            args->converter->toUBytes[0] = UCNV_TILDE;
+                            args->converter->toUBytes[1] = mySourceChar;
+                            args->converter->toULength = 2;
+                               goto EXIT;
+                         }
+                         myData->isEmptySegment = TRUE;
                          continue;
                      }
                      else{
@@ -210,6 +224,7 @@ UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
                      if(args->converter->mode == UCNV_TILDE){
                          args->converter->mode=0;
                          mySourceChar= (UChar)(((UCNV_TILDE+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80));
+                        myData->isEmptySegment = FALSE;        /* different error here, reset this to avoid spurious future error */
                          goto SAVE_STATE;
                      }
                      
@@ -217,6 +232,7 @@ UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
  
              }
               
+            myData->isEmptySegment = FALSE;    /* the segment has something, either valid or will produce a different error, so reset this */
              if(myData->isStateDBCS){
                  if(args->converter->toUnicodeStatus == 0x00){
                      args->converter->toUnicodeStatus = (UChar) mySourceChar;
@@ -281,7 +297,7 @@ SAVE_STATE:
              break;
          }
      }
-
+EXIT:
      args->target = myTarget;
      args->source = mySource;
  }
diff --git a/icuSources/test/cintltst/nucnvtst.c b/icuSources/test/cintltst/nucnvtst.c

index baecb4586d9e3894d19722859c5821aabf4acac2..6791a1ebbec03761cc4bb2bbefdc0ebbc4c2dd22 100644 (file)
--- a/icuSources/test/cintltst/nucnvtst.c
+++ b/icuSources/test/cintltst/nucnvtst.c
@@ -1,6 +1,6 @@
  /********************************************************************
   * COPYRIGHT:
- * Copyright (c) 1997-2006, International Business Machines Corporation and
+ * Copyright (c) 1997-2006,2008 International Business Machines Corporation and
   * others. All Rights Reserved.
   ********************************************************************/
  /*******************************************************************************
@@ -79,6 +79,7 @@ static void TestISCII(void);
  static void TestCoverageMBCS(void);
  static void TestJitterbug2346(void);
  static void TestJitterbug2411(void);
+static void TestJitterbug6175(void);
  #endif
  
  static void TestRoundTrippingAllUTF(void);
@@ -294,8 +295,8 @@ void addTestNewConvert(TestNode** root)
  #if !UCONFIG_NO_LEGACY_CONVERSION
     addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
     addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
+   addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
  #endif
-
  }
  
  
@@ -4454,6 +4455,68 @@ TestISO_2022_CN() {
      free(offsets);
  }
  
+/* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
+typedef struct {
+    const char *    converterName;
+    const char *    inputText;
+    int             inputTextLength;
+} EmptySegmentTest;
+
+/* Callback for TestJitterbug6175, should only get called for empty segment errors */
+static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
+                                             int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
+    if (reason > UCNV_IRREGULAR)
+        return;
+    if (reason != UCNV_IRREGULAR)
+        log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
+    /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
+    *err = U_ZERO_ERROR;
+    ucnv_cbToUWriteSub(toArgs,0,err);
+}
+
+enum { kEmptySegmentToUCharsMax = 64 };
+static void TestJitterbug6175(void) {
+    static const char  iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
+    static const char  iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
+    static const char  iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
+    static const char  iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
+    static const char  hzGB2312_a[]  = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
+    static const EmptySegmentTest emptySegmentTests[] = {
+        /* converterName inputText    inputTextLength */
+        { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
+        { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
+        { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
+        { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
+        { "HZ-GB-2312",  hzGB2312_a,  sizeof(hzGB2312_a)  },
+        /* terminator: */
+        { NULL,          NULL,        0,                  }
+    };
+    const EmptySegmentTest * testPtr;
+    for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
+        UErrorCode   err = U_ZERO_ERROR;
+        UConverter * cnv = ucnv_open(testPtr->converterName, &err);
+        if (U_FAILURE(err)) {
+            log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
+            return;
+        }
+        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
+        if (U_FAILURE(err)) {
+            log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
+            ucnv_close(cnv);
+            return;
+        }
+        {
+            UChar         toUChars[kEmptySegmentToUCharsMax];
+            UChar *       toUCharsPtr = toUChars;
+            const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
+            const char *  inCharsPtr = testPtr->inputText;
+            const char *  inCharsLimit = inCharsPtr + testPtr->inputTextLength;
+            ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
+        }
+        ucnv_close(cnv);
+    }
+}
+
  static void
  TestEBCDIC_STATEFUL() {
      /* test input */
diff --git a/icuSources/test/testdata/conversion.txt b/icuSources/test/testdata/conversion.txt

index 4e94f4713f91000846dbde47662e3882b0b09c6c..a8cd8a90ac3f2379232718a2ded2f28aba178df9 100644 (file)
--- a/icuSources/test/testdata/conversion.txt
+++ b/icuSources/test/testdata/conversion.txt
@@ -182,6 +182,21 @@ conversion:table(nofallback) {
            :intvector{ 0, 5, 7, 9, 9, 9, 9, 9, 9, 9, 9, 12 },
            :int{1}, :int{1}, "", "&", :bin{""}
          }
+        // empty segment (using substitution and stop)
+        {
+          "ISO-2022-KR",
+          :bin{ 1b242943610e0f620d0a },
+          "a\uFFFDb\u000D\u000A",
+          :intvector{ 4, 6, 7, 8, 9 },
+          :int{1}, :int{1}, "", "?", :bin{""}
+        }
+        {
+          "ISO-2022-KR",
+          :bin{ 1b242943610e0f620d0a },
+          "a",
+          :intvector{ 4 },
+          :int{1}, :int{1}, "illesc", ".", :bin{"0f"}
+        }
  
          // ISO-2022-JP
  
@@ -232,6 +247,21 @@ conversion:table(nofallback) {
            :bin{ 41c15c1b284a5cc242 }, "A\uff81\\\xa5\uff82B", :intvector{ 0, 1, 2, 6, 7, 8 },
            :int{1}, :int{1}, "", ".", :bin{""}
          }
+        // empty segment (using substitution and stop)
+        {
+          "ISO-2022-JP",
+          :bin{ 61621b24421b284263640d0a },
+          "ab\uFFFDcd\u000D\u000A",
+          :intvector{ 0, 1, 5, 8, 9, 10, 11 },
+          :int{1}, :int{1}, "", "?", :bin{""}
+        }
+        {
+          "ISO-2022-JP",
+          :bin{ 61621b24421b284263640d0a },
+          "ab",
+          :intvector{ 0, 1 },
+          :int{1}, :int{1}, "illesc", ".", :bin{"1b2842"}
+        }
  
          // ISO-2022-CN
  
@@ -302,6 +332,36 @@ conversion:table(nofallback) {
            :bin{ 411b242b491b4f2121 }, "\x41", :intvector{ 0 },
            :int{1}, :int{1}, "unsuppesc", ".", :bin{ 1b242b49 }
          }
+        // empty segment 1 (using substitution and stop)
+        {
+          "ISO-2022-CN",
+          :bin{ 611b242941620e0f1b242a481b4e6a65630d0a },
+          "ab\uFFFD\u994Cc\u000D\u000A",
+          :intvector{ 0, 5, 7, 14, 16, 17, 18 },
+          :int{1}, :int{1}, "", "?", :bin{""}
+        }
+        {
+          "ISO-2022-CN",
+          :bin{ 611b242941620e0f1b242a481b4e6a65630d0a },
+          "ab",
+          :intvector{ 0, 5 },
+          :int{1}, :int{1}, "illesc", ".", :bin{"0f"}
+        }
+        // empty segment 2 (using substitution and stop)
+        {
+          "ISO-2022-CN",
+          :bin{ 611b242941620e1b24294768640f630d0a },
+          "ab\uFFFD\u5F70c\u000D\u000A",
+          :intvector{ 0, 5, 7, 11, 14, 15, 16 },
+          :int{1}, :int{1}, "", "?", :bin{""}
+        }
+        {
+          "ISO-2022-CN",
+          :bin{ 611b242941620e1b24294768640f630d0a },
+          "ab",
+          :intvector{ 0, 5 },
+          :int{1}, :int{1}, "illesc", ".", :bin{"1b242947"}
+        }
  
          // ISO-2022 SBCS
          // [U_ENABLE_GENERIC_ISO_2022]
@@ -316,6 +376,24 @@ conversion:table(nofallback) {
          //  :int{1}, :int{1}, "", ".", :bin{""}
          //}
  
+        // HZ-GB-2312
+
+        // empty segment (using substitution and stop)
+        {
+          "HZ-GB-2312",
+          :bin{ 61627e7b7e7d6364 },
+          "ab\uFFFDcd",
+          :intvector{ 0, 1, 4, 6, 7 },
+          :int{1}, :int{1}, "", "?", :bin{""}
+        }
+        {
+          "HZ-GB-2312",
+          :bin{ 61627e7b7e7d63640d0a },
+          "ab",
+          :intvector{ 0, 1 },
+          :int{1}, :int{1}, "illesc", ".", :bin{"7e7d"}
+        }
+
          // DBCS-only extensions
          {
            "ibm-970",
author	Apple <opensource@apple.com>
	Fri, 7 Mar 2008 01:20:00 +0000 (01:20 +0000)
committer	Apple <opensource@apple.com>
	Fri, 7 Mar 2008 01:20:00 +0000 (01:20 +0000)
icuSources/common/ucnv.c		patch \| blob \| blame \| history
icuSources/common/ucnv2022.c		patch \| blob \| blame \| history
icuSources/common/ucnvhz.c		patch \| blob \| blame \| history
icuSources/test/cintltst/nucnvtst.c		patch \| blob \| blame \| history
icuSources/test/testdata/conversion.txt		patch \| blob \| blame \| history