ICU-57132.0.1.tar.gz

[apple/icu.git] / icuSources / test / intltest / convtest.cpp
diff --git a/icuSources/test/intltest/convtest.cpp b/icuSources/test/intltest/convtest.cpp

index 791ed3c76c3ad5d078f78d8b369d02e261e3630c..19bc7d520b7450ead294c4cd9d511677fb8fa6e3 100644 (file)
--- a/icuSources/test/intltest/convtest.cpp
+++ b/icuSources/test/intltest/convtest.cpp
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 2003-2004, International Business Machines
+*   Copyright (C) 2003-2014, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -36,12 +36,11 @@
  #include "unicode/ustring.h"
  #include "unicode/ures.h"
  #include "convtest.h"
+#include "cmemory.h"
  #include "unicode/tstdtmod.h"
  #include <string.h>
  #include <stdlib.h>
  
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-
  enum {
      // characters used in test data for callbacks
      SUB_CB='?',
@@ -50,15 +49,35 @@ enum {
      ESC_CB='&'
  };
  
-ConversionTest::~ConversionTest() {}
+ConversionTest::ConversionTest() {
+    UErrorCode errorCode=U_ZERO_ERROR;
+    utf8Cnv=ucnv_open("UTF-8", &errorCode);
+    ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        errln("unable to open UTF-8 converter");
+    }
+}
+
+ConversionTest::~ConversionTest() {
+    ucnv_close(utf8Cnv);
+}
  
  void
  ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
      if (exec) logln("TestSuite ConversionTest: ");
      switch (index) {
+#if !UCONFIG_NO_FILE_IO
          case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break;
          case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break;
          case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break;
+        case 3: name="TestDefaultIgnorableCallback"; if (exec) TestDefaultIgnorableCallback(); break;
+#else
+        case 0:
+        case 1:
+        case 2:
+        case 3: name="skip"; break;
+#endif
+        case 4: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break;
          default: name=""; break; //needed to end loop
      }
  }
@@ -173,7 +192,7 @@ ConversionTest::TestToUnicode() {
          delete dataModule;
      }
      else {
-        errln("Failed: could not load test conversion data");
+        dataerrln("Could not load test conversion data");
      }
  }
  
@@ -183,7 +202,7 @@ ConversionTest::TestFromUnicode() {
      char charset[100], cbopt[4];
      const char *option;
      UnicodeString s, unicode, invalidUChars;
-    int32_t offsetsLength;
+    int32_t offsetsLength, index;
      UConverterFromUCallback callback;
  
      TestDataModule *dataModule;
@@ -242,15 +261,17 @@ ConversionTest::TestFromUnicode() {
                  }
  
                  s=testCase->getString("callback", errorCode);
+                cc.setSub=0; // default: no subchar
  
-                // read NUL-separated subchar first, if any
-                length=u_strlen(p=s.getTerminatedBuffer());
-                if(++length<s.length()) {
+                if((index=s.indexOf((UChar)0))>0) {
+                    // read NUL-separated subchar first, if any
                      // copy the subchar from Latin-1 characters
                      // start after the NUL
+                    p=s.getTerminatedBuffer();
+                    length=index+1;
                      p+=length;
                      length=s.length()-length;
-                    if(length>=(int32_t)sizeof(cc.subchar)) {
+                    if(length<=0 || length>=(int32_t)sizeof(cc.subchar)) {
                          errorCode=U_ILLEGAL_ARGUMENT_ERROR;
                      } else {
                          int32_t j;
@@ -260,13 +281,26 @@ ConversionTest::TestFromUnicode() {
                          }
                          // NUL-terminate the subchar
                          cc.subchar[j]=0;
+                        cc.setSub=1;
                      }
  
                      // remove the NUL and subchar from s
-                    s.truncate(u_strlen(s.getBuffer()));
-                } else {
-                    // no subchar
-                    cc.subchar[0]=0;
+                    s.truncate(index);
+                } else if((index=s.indexOf((UChar)0x3d))>0) /* '=' */ {
+                    // read a substitution string, separated by an equal sign
+                    p=s.getBuffer()+index+1;
+                    length=s.length()-(index+1);
+                    if(length<0 || length>=UPRV_LENGTHOF(cc.subString)) {
+                        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+                    } else {
+                        u_memcpy(cc.subString, p, length);
+                        // NUL-terminate the subString
+                        cc.subString[length]=0;
+                        cc.setSub=-1;
+                    }
+
+                    // remove the equal sign and subString from s
+                    s.truncate(index);
                  }
  
                  s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");
@@ -311,7 +345,7 @@ ConversionTest::TestFromUnicode() {
          delete dataModule;
      }
      else {
-        errln("Failed: could not load test conversion data");
+        dataerrln("Could not load test conversion data");
      }
  }
  
@@ -325,7 +359,8 @@ ConversionTest::TestGetUnicodeSet() {
  
      ParsePosition pos;
      UnicodeSet cnvSet, mapSet, mapnotSet, diffSet;
-    UConverter *cnv;
+    UnicodeSet *cnvSetPtr = &cnvSet;
+    LocalUConverterPointer cnv;
  
      TestDataModule *dataModule;
      TestData *testData;
@@ -387,16 +422,15 @@ ConversionTest::TestGetUnicodeSet() {
  
                  logln("TestGetUnicodeSet[%d] %s", i, charset);
  
-                cnv=cnv_open(charset, errorCode);
+                cnv.adoptInstead(cnv_open(charset, errorCode));
                  if(U_FAILURE(errorCode)) {
-                    errln("error opening \"%s\" for conversion/getUnicodeSet test case %d - %s",
+                    errcheckln(errorCode, "error opening \"%s\" for conversion/getUnicodeSet test case %d - %s",
                              charset, i, u_errorName(errorCode));
                      errorCode=U_ZERO_ERROR;
                      continue;
                  }
  
-                ucnv_getUnicodeSet(cnv, (USet *)&cnvSet, (UConverterUnicodeSet)which, &errorCode);
-                ucnv_close(cnv);
+                ucnv_getUnicodeSet(cnv.getAlias(), cnvSetPtr->toUSet(), (UConverterUnicodeSet)which, &errorCode);
  
                  if(U_FAILURE(errorCode)) {
                      errln("error in ucnv_getUnicodeSet(\"%s\") for conversion/getUnicodeSet test case %d - %s",
@@ -410,7 +444,7 @@ ConversionTest::TestGetUnicodeSet() {
                  if(!diffSet.isEmpty()) {
                      diffSet.toPattern(s, TRUE);
                      if(s.length()>100) {
-                        s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
+                        s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
                      }
                      errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d",
                              charset, i);
@@ -422,7 +456,7 @@ ConversionTest::TestGetUnicodeSet() {
                  if(!diffSet.isEmpty()) {
                      diffSet.toPattern(s, TRUE);
                      if(s.length()>100) {
-                        s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
+                        s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
                      }
                      errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d",
                              charset, i);
@@ -434,14 +468,266 @@ ConversionTest::TestGetUnicodeSet() {
          delete dataModule;
      }
      else {
-        errln("Failed: could not load test conversion data");
+        dataerrln("Could not load test conversion data");
      }
  }
  
+U_CDECL_BEGIN
+static void U_CALLCONV
+getUnicodeSetCallback(const void *context,
+                      UConverterFromUnicodeArgs * /*fromUArgs*/,
+                      const UChar* /*codeUnits*/,
+                      int32_t /*length*/,
+                      UChar32 codePoint,
+                      UConverterCallbackReason reason,
+                      UErrorCode *pErrorCode) {
+    if(reason<=UCNV_IRREGULAR) {
+        ((UnicodeSet *)context)->remove(codePoint);  // the converter cannot convert this code point
+        *pErrorCode=U_ZERO_ERROR;                    // skip
+    }  // else ignore the reset, close and clone calls.
+}
+U_CDECL_END
+
+// Compare ucnv_getUnicodeSet() with the set of characters that can be converted.
+void
+ConversionTest::TestGetUnicodeSet2() {
+    // Build a string with all code points.
+    UChar32 cpLimit;
+    int32_t s0Length;
+    if(quick) {
+        cpLimit=s0Length=0x10000;  // BMP only
+    } else {
+        cpLimit=0x110000;
+        s0Length=0x10000+0x200000;  // BMP + surrogate pairs
+    }
+    UChar *s0=new UChar[s0Length];
+    if(s0==NULL) {
+        return;
+    }
+    UChar *s=s0;
+    UChar32 c;
+    UChar c2;
+    // low BMP
+    for(c=0; c<=0xd7ff; ++c) {
+        *s++=(UChar)c;
+    }
+    // trail surrogates
+    for(c=0xdc00; c<=0xdfff; ++c) {
+        *s++=(UChar)c;
+    }
+    // lead surrogates
+    // (after trails so that there is not even one surrogate pair in between)
+    for(c=0xd800; c<=0xdbff; ++c) {
+        *s++=(UChar)c;
+    }
+    // high BMP
+    for(c=0xe000; c<=0xffff; ++c) {
+        *s++=(UChar)c;
+    }
+    // supplementary code points = surrogate pairs
+    if(cpLimit==0x110000) {
+        for(c=0xd800; c<=0xdbff; ++c) {
+            for(c2=0xdc00; c2<=0xdfff; ++c2) {
+                *s++=(UChar)c;
+                *s++=c2;
+            }
+        }
+    }
+
+    static const char *const cnvNames[]={
+        "UTF-8",
+        "UTF-7",
+        "UTF-16",
+        "US-ASCII",
+        "ISO-8859-1",
+        "windows-1252",
+        "Shift-JIS",
+        "ibm-1390",  // EBCDIC_STATEFUL table
+        "ibm-16684",  // DBCS-only extension table based on EBCDIC_STATEFUL table
+        "HZ",
+        "ISO-2022-JP",
+        "JIS7",
+        "ISO-2022-CN",
+        "ISO-2022-CN-EXT",
+        "LMBCS"
+    };
+    LocalUConverterPointer cnv;
+    char buffer[1024];
+    int32_t i;
+    for(i=0; i<UPRV_LENGTHOF(cnvNames); ++i) {
+        UErrorCode errorCode=U_ZERO_ERROR;
+        cnv.adoptInstead(cnv_open(cnvNames[i], errorCode));
+        if(U_FAILURE(errorCode)) {
+            errcheckln(errorCode, "failed to open converter %s - %s", cnvNames[i], u_errorName(errorCode));
+            continue;
+        }
+        UnicodeSet expected;
+        ucnv_setFromUCallBack(cnv.getAlias(), getUnicodeSetCallback, &expected, NULL, NULL, &errorCode);
+        if(U_FAILURE(errorCode)) {
+            errln("failed to set the callback on converter %s - %s", cnvNames[i], u_errorName(errorCode));
+            continue;
+        }
+        UConverterUnicodeSet which;
+        for(which=UCNV_ROUNDTRIP_SET; which<UCNV_SET_COUNT; which=(UConverterUnicodeSet)((int)which+1)) {
+            if(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
+                ucnv_setFallback(cnv.getAlias(), TRUE);
+            }
+            expected.add(0, cpLimit-1);
+            s=s0;
+            UBool flush;
+            do {
+                char *t=buffer;
+                flush=(UBool)(s==s0+s0Length);
+                ucnv_fromUnicode(cnv.getAlias(), &t, buffer+sizeof(buffer), (const UChar **)&s, s0+s0Length, NULL, flush, &errorCode);
+                if(U_FAILURE(errorCode)) {
+                    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+                        errorCode=U_ZERO_ERROR;
+                        continue;
+                    } else {
+                        break;  // unexpected error, should not occur
+                    }
+                }
+            } while(!flush);
+            UnicodeSet set;
+            ucnv_getUnicodeSet(cnv.getAlias(), set.toUSet(), which, &errorCode);
+            if(cpLimit<0x110000) {
+                set.remove(cpLimit, 0x10ffff);
+            }
+            if(which==UCNV_ROUNDTRIP_SET) {
+                // ignore PUA code points because they will be converted even if they
+                // are fallbacks and when other fallbacks are turned off,
+                // but ucnv_getUnicodeSet(UCNV_ROUNDTRIP_SET) delivers true roundtrips
+                expected.remove(0xe000, 0xf8ff);
+                expected.remove(0xf0000, 0xffffd);
+                expected.remove(0x100000, 0x10fffd);
+                set.remove(0xe000, 0xf8ff);
+                set.remove(0xf0000, 0xffffd);
+                set.remove(0x100000, 0x10fffd);
+            }
+            if(set!=expected) {
+                // First try to see if we have different sets because ucnv_getUnicodeSet()
+                // added strings: The above conversion method does not tell us what strings might be convertible.
+                // Remove strings from the set and compare again.
+                // Unfortunately, there are no good, direct set methods for finding out whether there are strings
+                // in the set, nor for enumerating or removing just them.
+                // Intersect all code points with the set. The intersection will not contain strings.
+                UnicodeSet temp(0, 0x10ffff);
+                temp.retainAll(set);
+                set=temp;
+            }
+            if(set!=expected) {
+                UnicodeSet diffSet;
+                UnicodeString out;
+
+                // are there items that must be in the set but are not?
+                (diffSet=expected).removeAll(set);
+                if(!diffSet.isEmpty()) {
+                    diffSet.toPattern(out, TRUE);
+                    if(out.length()>100) {
+                        out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
+                    }
+                    errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d",
+                            cnvNames[i], which);
+                    errln(out);
+                }
+
+                // are there items that must not be in the set but are?
+                (diffSet=set).removeAll(expected);
+                if(!diffSet.isEmpty()) {
+                    diffSet.toPattern(out, TRUE);
+                    if(out.length()>100) {
+                        out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
+                    }
+                    errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d",
+                            cnvNames[i], which);
+                    errln(out);
+                }
+            }
+        }
+    }
+
+    delete [] s0;
+}
+
+// Test all codepoints which has the default ignorable Unicode property are ignored if they have no mapping
+// If there are any failures, the hard coded list (IS_DEFAULT_IGNORABLE_CODE_POINT) in ucnv_err.c should be updated
+void
+ConversionTest::TestDefaultIgnorableCallback() {
+    UErrorCode status = U_ZERO_ERROR;
+    const char *cnv_name = "euc-jp-2007";
+    const char *pattern_ignorable = "[:Default_Ignorable_Code_Point:]";
+    const char *pattern_not_ignorable = "[:^Default_Ignorable_Code_Point:]";
+
+    UnicodeSet *set_ignorable = new UnicodeSet(pattern_ignorable, status);
+    if (U_FAILURE(status)) {
+        dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_ignorable, u_errorName(status));
+        return;
+    }
+
+    UnicodeSet *set_not_ignorable = new UnicodeSet(pattern_not_ignorable, status);
+    if (U_FAILURE(status)) {
+        dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_not_ignorable, u_errorName(status));
+        return;
+    }
+
+    UConverter *cnv = cnv_open(cnv_name, status);
+    if (U_FAILURE(status)) {
+        dataerrln("Unable to open converter: %s - %s\n", cnv_name, u_errorName(status));
+        return;
+    }
+
+    // set callback for the converter 
+    ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &status);
+
+    UChar32 input[1];
+    char output[10];
+    int32_t outputLength;
+    
+    // test default ignorables are ignored
+    int size = set_ignorable->size();
+    for (int i = 0; i < size; i++) {
+        status = U_ZERO_ERROR;
+        outputLength= 0;
+
+        input[0] = set_ignorable->charAt(i);
+
+        outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
+        if (U_FAILURE(status) || outputLength != 0) {
+            errln("Ignorable code point: U+%04X not skipped as expected - %s", input[0], u_errorName(status));
+        }
+    }
+
+    // test non-ignorables are not ignored
+    size = set_not_ignorable->size();
+    for (int i = 0; i < size; i++) {
+        status = U_ZERO_ERROR;
+        outputLength= 0;
+
+        input[0] = set_not_ignorable->charAt(i);
+
+        if (input[0] == 0) {
+            continue;
+        }
+
+        outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
+        if (U_FAILURE(status) || outputLength <= 0) {
+            errln("Non-ignorable code point: U+%04X skipped unexpectedly - %s", input[0], u_errorName(status));
+        }
+    }
+    
+    ucnv_close(cnv);
+    delete set_not_ignorable;
+    delete set_ignorable;
+}
+
  // open testdata or ICU data converter ------------------------------------- ***
  
  UConverter *
  ConversionTest::cnv_open(const char *name, UErrorCode &errorCode) {
+    if(name!=NULL && *name=='+') {
+        // Converter names that start with '+' are ignored in ICU4J tests.
+        ++name;
+    }
      if(name!=NULL && *name=='*') {
          /* loadTestData(): set the data directory */
          return ucnv_openPackage(loadTestData(errorCode), name+1, &errorCode);
@@ -732,31 +1018,28 @@ stepToUnicode(ConversionCase &cc, UConverter *cnv,
  
  UBool
  ConversionTest::ToUnicodeCase(ConversionCase &cc, UConverterToUCallback callback, const char *option) {
-    UConverter *cnv;
-    UErrorCode errorCode;
-
      // open the converter
-    errorCode=U_ZERO_ERROR;
-    cnv=cnv_open(cc.charset, errorCode);
-    if(U_FAILURE(errorCode)) {
-        errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
-                cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
+    IcuTestErrorCode errorCode(*this, "ToUnicodeCase");
+    LocalUConverterPointer cnv(cnv_open(cc.charset, errorCode));
+    if(errorCode.isFailure()) {
+        errcheckln(errorCode, "toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
+                cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, errorCode.errorName());
+        errorCode.reset();
          return FALSE;
      }
  
      // set the callback
      if(callback!=NULL) {
-        ucnv_setToUCallBack(cnv, callback, option, NULL, NULL, &errorCode);
+        ucnv_setToUCallBack(cnv.getAlias(), callback, option, NULL, NULL, errorCode);
          if(U_FAILURE(errorCode)) {
              errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setToUCallBack() failed - %s",
                      cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
-            ucnv_close(cnv);
              return FALSE;
          }
      }
  
-    int32_t resultOffsets[200];
-    UChar result[200];
+    int32_t resultOffsets[256];
+    UChar result[256];
      int32_t resultLength;
      UBool ok;
  
@@ -779,7 +1062,7 @@ ConversionTest::ToUnicodeCase(ConversionCase &cc, UConverterToUCallback callback
      int32_t i, step;
  
      ok=TRUE;
-    for(i=0; i<LENGTHOF(steps) && ok; ++i) {
+    for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {
          step=steps[i].step;
          if(step<0 && !cc.finalFlush) {
              // skip ucnv_getNextUChar() if !finalFlush because
@@ -790,20 +1073,32 @@ ConversionTest::ToUnicodeCase(ConversionCase &cc, UConverterToUCallback callback
              // bulk test is first, then offsets are not checked any more
              cc.offsets=NULL;
          }
-        errorCode=U_ZERO_ERROR;
-        resultLength=stepToUnicode(cc, cnv,
-                                result, LENGTHOF(result),
+        else {
+            memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets));
+        }
+        memset(result, -1, UPRV_LENGTHOF(result));
+        errorCode.reset();
+        resultLength=stepToUnicode(cc, cnv.getAlias(),
+                                result, UPRV_LENGTHOF(result),
                                  step==0 ? resultOffsets : NULL,
-                                step, &errorCode);
+                                step, errorCode);
          ok=checkToUnicode(
-                cc, cnv, steps[i].name,
+                cc, cnv.getAlias(), steps[i].name,
                  result, resultLength,
                  cc.offsets!=NULL ? resultOffsets : NULL,
                  errorCode);
-        if(U_FAILURE(errorCode) || !cc.finalFlush) {
+        if(errorCode.isFailure() || !cc.finalFlush) {
              // reset if an error occurred or we did not flush
              // otherwise do nothing to make sure that flushing resets
-            ucnv_resetToUnicode(cnv);
+            ucnv_resetToUnicode(cnv.getAlias());
+        }
+        if (cc.offsets != NULL && resultOffsets[resultLength] != -1) {
+            errln("toUnicode[%d](%s) Conversion wrote too much to offsets at index %d",
+                cc.caseNr, cc.charset, resultLength);
+        }
+        if (result[resultLength] != (UChar)-1) {
+            errln("toUnicode[%d](%s) Conversion wrote too much to result at index %d",
+                cc.caseNr, cc.charset, resultLength);
          }
      }
  
@@ -812,13 +1107,13 @@ ConversionTest::ToUnicodeCase(ConversionCase &cc, UConverterToUCallback callback
          // test ucnv_toUChars()
          memset(result, 0, sizeof(result));
  
-        errorCode=U_ZERO_ERROR;
-        resultLength=ucnv_toUChars(cnv,
-                        result, LENGTHOF(result),
+        errorCode.reset();
+        resultLength=ucnv_toUChars(cnv.getAlias(),
+                        result, UPRV_LENGTHOF(result),
                          (const char *)cc.bytes, cc.bytesLength,
-                        &errorCode);
+                        errorCode);
          ok=checkToUnicode(
-                cc, cnv, "toUChars",
+                cc, cnv.getAlias(), "toUChars",
                  result, resultLength,
                  NULL,
                  errorCode);
@@ -828,23 +1123,23 @@ ConversionTest::ToUnicodeCase(ConversionCase &cc, UConverterToUCallback callback
  
          // test preflighting
          // keep the correct result for simple checking
-        errorCode=U_ZERO_ERROR;
-        resultLength=ucnv_toUChars(cnv,
+        errorCode.reset();
+        resultLength=ucnv_toUChars(cnv.getAlias(),
                          NULL, 0,
                          (const char *)cc.bytes, cc.bytesLength,
-                        &errorCode);
-        if(errorCode==U_STRING_NOT_TERMINATED_WARNING || errorCode==U_BUFFER_OVERFLOW_ERROR) {
-            errorCode=U_ZERO_ERROR;
+                        errorCode);
+        if(errorCode.get()==U_STRING_NOT_TERMINATED_WARNING || errorCode.get()==U_BUFFER_OVERFLOW_ERROR) {
+            errorCode.reset();
          }
          ok=checkToUnicode(
-                cc, cnv, "preflight toUChars",
+                cc, cnv.getAlias(), "preflight toUChars",
                  result, resultLength,
                  NULL,
                  errorCode);
          break;
      }
  
-    ucnv_close(cnv);
+    errorCode.reset();  // all errors have already been reported
      return ok;
  }
  
@@ -932,6 +1227,112 @@ ConversionTest::checkToUnicode(ConversionCase &cc, UConverter *cnv, const char *
  
  // fromUnicode test worker functions --------------------------------------- ***
  
+static int32_t
+stepFromUTF8(ConversionCase &cc,
+             UConverter *utf8Cnv, UConverter *cnv,
+             char *result, int32_t resultCapacity,
+             int32_t step,
+             UErrorCode *pErrorCode) {
+    const char *source, *sourceLimit, *utf8Limit;
+    UChar pivotBuffer[32];
+    UChar *pivotSource, *pivotTarget, *pivotLimit;
+    char *target, *targetLimit, *resultLimit;
+    UBool flush;
+
+    source=cc.utf8;
+    pivotSource=pivotTarget=pivotBuffer;
+    target=result;
+    utf8Limit=source+cc.utf8Length;
+    resultLimit=result+resultCapacity;
+
+    // call ucnv_convertEx() with in/out buffers no larger than (step) at a time
+    // move only one buffer (in vs. out) at a time to be extra mean
+    // step==0 performs bulk conversion
+
+    // initialize the partial limits for the loop
+    if(step==0) {
+        // use the entire buffers
+        sourceLimit=utf8Limit;
+        targetLimit=resultLimit;
+        flush=cc.finalFlush;
+
+        pivotLimit=pivotBuffer+UPRV_LENGTHOF(pivotBuffer);
+    } else {
+        // start with empty partial buffers
+        sourceLimit=source;
+        targetLimit=target;
+        flush=FALSE;
+
+        // empty pivot is not allowed, make it of length step
+        pivotLimit=pivotBuffer+step;
+    }
+
+    for(;;) {
+        // resetting the opposite conversion direction must not affect this one
+        ucnv_resetFromUnicode(utf8Cnv);
+        ucnv_resetToUnicode(cnv);
+
+        // convert
+        ucnv_convertEx(cnv, utf8Cnv,
+            &target, targetLimit,
+            &source, sourceLimit,
+            pivotBuffer, &pivotSource, &pivotTarget, pivotLimit,
+            FALSE, flush, pErrorCode);
+
+        // check pointers and errors
+        if(source>sourceLimit || target>targetLimit) {
+            *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
+            break;
+        } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
+            if(target!=targetLimit) {
+                // buffer overflow must only be set when the target is filled
+                *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
+                break;
+            } else if(targetLimit==resultLimit) {
+                // not just a partial overflow
+                break;
+            }
+
+            // the partial target is filled, set a new limit, reset the error and continue
+            targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
+            *pErrorCode=U_ZERO_ERROR;
+        } else if(U_FAILURE(*pErrorCode)) {
+            if(pivotSource==pivotBuffer) {
+                // toUnicode error, should not occur
+                // toUnicode errors are tested in cintltst TestConvertExFromUTF8()
+                break;
+            } else {
+                // fromUnicode error
+                // some other error occurred, done
+                break;
+            }
+        } else {
+            if(source!=sourceLimit) {
+                // when no error occurs, then the input must be consumed
+                *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
+                break;
+            }
+
+            if(sourceLimit==utf8Limit) {
+                // we are done
+                if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
+                    // ucnv_convertEx() warns about not terminating the output
+                    // but ucnv_fromUnicode() does not and so
+                    // checkFromUnicode() does not expect it
+                    *pErrorCode=U_ZERO_ERROR;
+                }
+                break;
+            }
+
+            // the partial conversion succeeded, set a new limit and continue
+            sourceLimit=(utf8Limit-source)>=step ? source+step : utf8Limit;
+            flush=(UBool)(cc.finalFlush && sourceLimit==utf8Limit);
+        }
+    }
+
+    return (int32_t)(target-result);
+}
+
  static int32_t
  stepFromUnicode(ConversionCase &cc, UConverter *cnv,
                  char *result, int32_t resultCapacity,
@@ -1028,10 +1429,11 @@ ConversionTest::FromUnicodeCase(ConversionCase &cc, UConverterFromUCallback call
      errorCode=U_ZERO_ERROR;
      cnv=cnv_open(cc.charset, errorCode);
      if(U_FAILURE(errorCode)) {
-        errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
+        errcheckln(errorCode, "fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
                  cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
          return FALSE;
      }
+    ucnv_resetToUnicode(utf8Cnv);
  
      // set the callback
      if(callback!=NULL) {
@@ -1051,42 +1453,62 @@ ConversionTest::FromUnicodeCase(ConversionCase &cc, UConverterFromUCallback call
      // set the subchar
      int32_t length;
  
-    if((length=(int32_t)strlen(cc.subchar))!=0) {
+    if(cc.setSub>0) {
+        length=(int32_t)strlen(cc.subchar);
          ucnv_setSubstChars(cnv, cc.subchar, (int8_t)length, &errorCode);
          if(U_FAILURE(errorCode)) {
-            errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubChars() failed - %s",
+            errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstChars() failed - %s",
+                    cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
+            ucnv_close(cnv);
+            return FALSE;
+        }
+    } else if(cc.setSub<0) {
+        ucnv_setSubstString(cnv, cc.subString, -1, &errorCode);
+        if(U_FAILURE(errorCode)) {
+            errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstString() failed - %s",
                      cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
              ucnv_close(cnv);
              return FALSE;
          }
      }
  
-    int32_t resultOffsets[200];
-    char result[200];
+    // convert unicode to utf8
+    char utf8[256];
+    cc.utf8=utf8;
+    u_strToUTF8(utf8, UPRV_LENGTHOF(utf8), &cc.utf8Length,
+                cc.unicode, cc.unicodeLength,
+                &errorCode);
+    if(U_FAILURE(errorCode)) {
+        // skip UTF-8 testing of a string with an unpaired surrogate,
+        // or of one that's too long
+        // toUnicode errors are tested in cintltst TestConvertExFromUTF8()
+        cc.utf8Length=-1;
+    }
+
+    int32_t resultOffsets[256];
+    char result[256];
      int32_t resultLength;
      UBool ok;
  
      static const struct {
          int32_t step;
-        const char *name;
+        const char *name, *utf8Name;
      } steps[]={
-        { 0, "bulk" }, // must be first for offsets to be checked
-        { 1, "step=1" },
-        { 3, "step=3" },
-        { 7, "step=7" }
+        { 0, "bulk",   "utf8" }, // must be first for offsets to be checked
+        { 1, "step=1", "utf8 step=1" },
+        { 3, "step=3", "utf8 step=3" },
+        { 7, "step=7", "utf8 step=7" }
      };
      int32_t i, step;
  
      ok=TRUE;
-    for(i=0; i<LENGTHOF(steps) && ok; ++i) {
+    for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {
          step=steps[i].step;
-        if(step!=0) {
-            // bulk test is first, then offsets are not checked any more
-            cc.offsets=NULL;
-        }
+        memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets));
+        memset(result, -1, UPRV_LENGTHOF(result));
          errorCode=U_ZERO_ERROR;
          resultLength=stepFromUnicode(cc, cnv,
-                                result, LENGTHOF(result),
+                                result, UPRV_LENGTHOF(result),
                                  step==0 ? resultOffsets : NULL,
                                  step, &errorCode);
          ok=checkFromUnicode(
@@ -1099,6 +1521,36 @@ ConversionTest::FromUnicodeCase(ConversionCase &cc, UConverterFromUCallback call
              // otherwise do nothing to make sure that flushing resets
              ucnv_resetFromUnicode(cnv);
          }
+        if (resultOffsets[resultLength] != -1) {
+            errln("fromUnicode[%d](%s) Conversion wrote too much to offsets at index %d",
+                cc.caseNr, cc.charset, resultLength);
+        }
+        if (result[resultLength] != (char)-1) {
+            errln("fromUnicode[%d](%s) Conversion wrote too much to result at index %d",
+                cc.caseNr, cc.charset, resultLength);
+        }
+
+        // bulk test is first, then offsets are not checked any more
+        cc.offsets=NULL;
+
+        // test direct conversion from UTF-8
+        if(cc.utf8Length>=0) {
+            errorCode=U_ZERO_ERROR;
+            resultLength=stepFromUTF8(cc, utf8Cnv, cnv,
+                                    result, UPRV_LENGTHOF(result),
+                                    step, &errorCode);
+            ok=checkFromUnicode(
+                    cc, cnv, steps[i].utf8Name,
+                    (uint8_t *)result, resultLength,
+                    NULL,
+                    errorCode);
+            if(U_FAILURE(errorCode) || !cc.finalFlush) {
+                // reset if an error occurred or we did not flush
+                // otherwise do nothing to make sure that flushing resets
+                ucnv_resetToUnicode(utf8Cnv);
+                ucnv_resetFromUnicode(cnv);
+            }
+        }
      }
  
      // not a real loop, just a convenience for breaking out of the block
@@ -1108,7 +1560,7 @@ ConversionTest::FromUnicodeCase(ConversionCase &cc, UConverterFromUCallback call
  
          errorCode=U_ZERO_ERROR;
          resultLength=ucnv_fromUChars(cnv,
-                        result, LENGTHOF(result),
+                        result, UPRV_LENGTHOF(result),
                          cc.unicode, cc.unicodeLength,
                          &errorCode);
          ok=checkFromUnicode(
@@ -1157,7 +1609,7 @@ ConversionTest::checkFromUnicode(ConversionCase &cc, UConverter *cnv, const char
      msg=NULL;
  
      errorCode=U_ZERO_ERROR;
-    resultInvalidLength=LENGTHOF(resultInvalidUChars);
+    resultInvalidLength=UPRV_LENGTHOF(resultInvalidUChars);
      ucnv_getInvalidUChars(cnv, resultInvalidUChars, &resultInvalidLength, &errorCode);
      if(U_FAILURE(errorCode)) {
          errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s",