ICU-57132.0.1.tar.gz

[apple/icu.git] / icuSources / test / intltest / usettest.cpp
diff --git a/icuSources/test/intltest/usettest.cpp b/icuSources/test/intltest/usettest.cpp

index 6a0a2f08f202ce4ea262af4981aeeaccab0bda7b..6dbf9255e56c80ff607db9e53f93c6fcbaade276 100644 (file)
--- a/icuSources/test/intltest/usettest.cpp
+++ b/icuSources/test/intltest/usettest.cpp
@@ -1,16 +1,20 @@
  /*
-**************************************************************************************
-*   Copyright (C) 1999-2006 International Business Machines Corporation and
+********************************************************************************
+*   Copyright (C) 1999-2016 International Business Machines Corporation and
  *   others. All Rights Reserved.
-**************************************************************************************
+********************************************************************************
  *   Date        Name        Description
  *   10/20/99    alan        Creation.
  *   03/22/2000  Madhu       Added additional tests
-**************************************************************************************
+********************************************************************************
  */
  
+#include <stdio.h>
+
+#include <string.h>
  #include "unicode/utypes.h"
  #include "usettest.h"
+#include "unicode/ucnv.h"
  #include "unicode/uniset.h"
  #include "unicode/uchar.h"
  #include "unicode/usetiter.h"
@@ -18,15 +22,15 @@
  #include "unicode/parsepos.h"
  #include "unicode/symtable.h"
  #include "unicode/uversion.h"
+#include "cmemory.h"
  #include "hash.h"
  
-
  #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
-    errln("fail in file \"%s\", line %d: \"%s\"", __FILE__, __LINE__, \
+    dataerrln("fail in file \"%s\", line %d: \"%s\"", __FILE__, __LINE__, \
      u_errorName(status));}}
  
  #define TEST_ASSERT(expr) {if (!(expr)) { \
-    errln("fail in file \"%s\", line %d", __FILE__, __LINE__); }}
+    dataerrln("fail in file \"%s\", line %d", __FILE__, __LINE__); }}
  
  UnicodeString operator+(const UnicodeString& left, const UnicodeSet& set) {
      UnicodeString pat;
@@ -38,11 +42,26 @@ UnicodeString operator+(const UnicodeString& left, const UnicodeSet& set) {
                            name = #test;                 \
                            if (exec) {                   \
                                logln(#test "---");       \
-                              logln((UnicodeString)""); \
+                              logln();                  \
                                test();                   \
                            }                             \
                            break
  
+UnicodeSetTest::UnicodeSetTest() : utf8Cnv(NULL) {
+}
+
+UConverter *UnicodeSetTest::openUTF8Converter() {
+    if(utf8Cnv==NULL) {
+        UErrorCode errorCode=U_ZERO_ERROR;
+        utf8Cnv=ucnv_open("UTF-8", &errorCode);
+    }
+    return utf8Cnv;
+}
+
+UnicodeSetTest::~UnicodeSetTest() {
+    ucnv_close(utf8Cnv);
+}
+
  void
  UnicodeSetTest::runIndexedTest(int32_t index, UBool exec,
                                 const char* &name, char* /*par*/) {
@@ -69,6 +88,10 @@ UnicodeSetTest::runIndexedTest(int32_t index, UBool exec,
          CASE(18,TestSurrogate);
          CASE(19,TestPosixClasses);
          CASE(20,TestIteration);
+        CASE(21,TestFreezable);
+        CASE(22,TestSpan);
+        CASE(23,TestStringSpan);
+        CASE(24,TestUCAUnsafeBackwards);
          default: name = ""; break;
      }
  }
@@ -107,7 +130,7 @@ void UnicodeSetTest::TestToPattern() {
              ec = U_ZERO_ERROR;
              UnicodeSet s(OTHER_TOPATTERN_TESTS[j], ec);
              if (U_FAILURE(ec)) {
-                errln((UnicodeString)"FAIL: bad pattern " + OTHER_TOPATTERN_TESTS[j]);
+                dataerrln((UnicodeString)"FAIL: bad pattern " + OTHER_TOPATTERN_TESTS[j] + " - " + UnicodeString(u_errorName(ec)));
                  continue;
              }
              checkPat(OTHER_TOPATTERN_TESTS[j], s);
@@ -147,19 +170,19 @@ void UnicodeSetTest::TestToPattern() {
              const char* exp2[] = {"aa", "ab", "ac", NOT, "xy", NULL};
              expectToPattern(*s, "[a-z{aa}{ab}{ac}]", exp2);
  
-            s->applyPattern("[a-z {\\{l} {r\\}}]", ec);
+            s->applyPattern(UNICODE_STRING_SIMPLE("[a-z {\\{l} {r\\}}]"), ec);
              if (U_FAILURE(ec)) break;
              const char* exp3[] = {"{l", "r}", NOT, "xy", NULL};
-            expectToPattern(*s, "[a-z{r\\}}{\\{l}]", exp3);
+            expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{r\\}}{\\{l}]"), exp3);
  
              s->add("[]");
              const char* exp4[] = {"{l", "r}", "[]", NOT, "xy", NULL};
-            expectToPattern(*s, "[a-z{\\[\\]}{r\\}}{\\{l}]", exp4);
+            expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{\\[\\]}{r\\}}{\\{l}]"), exp4);
  
-            s->applyPattern("[a-z {\\u4E01\\u4E02}{\\n\\r}]", ec);
+            s->applyPattern(UNICODE_STRING_SIMPLE("[a-z {\\u4E01\\u4E02}{\\n\\r}]"), ec);
              if (U_FAILURE(ec)) break;
              const char* exp5[] = {"\\u4E01\\u4E02", "\n\r", NULL};
-            expectToPattern(*s, "[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]", exp5);
+            expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]"), exp5);
  
              // j2189
              s->clear();
@@ -258,7 +281,8 @@ UnicodeSetTest::TestCategories(void) {
      const char* pat = " [:Lu:] "; // Whitespace ok outside [:..:]
      UnicodeSet set(pat, status);
      if (U_FAILURE(status)) {
-        errln((UnicodeString)"Fail: Can't construct set with " + pat);
+        dataerrln((UnicodeString)"Fail: Can't construct set with " + pat + " - " + UnicodeString(u_errorName(status)));
+        return;
      } else {
          expectContainment(set, pat, "ABC", "abc");
      }
@@ -295,14 +319,14 @@ UnicodeSetTest::TestCloneEqualHash(void) {
      // set1 and set2 used to be built with the obsolete constructor taking
      // UCharCategory values; replaced with pattern constructors
      // markus 20030502
-    UnicodeSet *set1=new UnicodeSet("\\p{Lowercase Letter}", status); //  :Ll: Letter, lowercase
-    UnicodeSet *set1a=new UnicodeSet("[:Ll:]", status); //  Letter, lowercase
+    UnicodeSet *set1=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Lowercase Letter}"), status); //  :Ll: Letter, lowercase
+    UnicodeSet *set1a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Ll:]"), status); //  Letter, lowercase
      if (U_FAILURE(status)){
-        errln((UnicodeString)"FAIL: Can't construst set with category->Ll");
+        dataerrln((UnicodeString)"FAIL: Can't construst set with category->Ll" + " - " + UnicodeString(u_errorName(status)));
          return;
      }
-    UnicodeSet *set2=new UnicodeSet("\\p{Decimal Number}", status);   //Number, Decimal digit
-    UnicodeSet *set2a=new UnicodeSet("[:Nd:]", status);   //Number, Decimal digit
+    UnicodeSet *set2=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Decimal Number}"), status);   //Number, Decimal digit
+    UnicodeSet *set2a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Nd:]"), status);   //Number, Decimal digit
      if (U_FAILURE(status)){
          errln((UnicodeString)"FAIL: Can't construct set with category->Nd");
          return;
@@ -667,12 +691,55 @@ void UnicodeSetTest::TestAPI() {
      if (set != exp) { errln("FAIL: retain('s')"); return; }
  
      uint16_t buf[32];
-    int32_t slen = set.serialize(buf, sizeof(buf)/sizeof(buf[0]), status);
+    int32_t slen = set.serialize(buf, UPRV_LENGTHOF(buf), status);
      if (U_FAILURE(status)) { errln("FAIL: serialize"); return; }
      if (slen != 3 || buf[0] != 2 || buf[1] != 0x73 || buf[2] != 0x74) {
          errln("FAIL: serialize");
          return;
      }
+
+    // Conversions to and from USet
+    UnicodeSet *uniset = &set;
+    USet *uset = uniset->toUSet();
+    TEST_ASSERT((void *)uset == (void *)uniset);
+    UnicodeSet *setx = UnicodeSet::fromUSet(uset);
+    TEST_ASSERT((void *)setx == (void *)uset);
+    const UnicodeSet *constSet = uniset;
+    const USet *constUSet = constSet->toUSet();
+    TEST_ASSERT((void *)constUSet == (void *)constSet);
+    const UnicodeSet *constSetx = UnicodeSet::fromUSet(constUSet);
+    TEST_ASSERT((void *)constSetx == (void *)constUSet);
+
+    // span(UnicodeString) and spanBack(UnicodeString) convenience methods
+    UnicodeString longString=UNICODE_STRING_SIMPLE("aaaaaaaaaabbbbbbbbbbcccccccccc");
+    UnicodeSet ac(0x61, 0x63);
+    ac.remove(0x62).freeze();
+    if( ac.span(longString, -5, USET_SPAN_CONTAINED)!=10 ||
+        ac.span(longString, 0, USET_SPAN_CONTAINED)!=10 ||
+        ac.span(longString, 5, USET_SPAN_CONTAINED)!=10 ||
+        ac.span(longString, 10, USET_SPAN_CONTAINED)!=10 ||
+        ac.span(longString, 15, USET_SPAN_CONTAINED)!=15 ||
+        ac.span(longString, 20, USET_SPAN_CONTAINED)!=30 ||
+        ac.span(longString, 25, USET_SPAN_CONTAINED)!=30 ||
+        ac.span(longString, 30, USET_SPAN_CONTAINED)!=30 ||
+        ac.span(longString, 35, USET_SPAN_CONTAINED)!=30 ||
+        ac.span(longString, INT32_MAX, USET_SPAN_CONTAINED)!=30
+    ) {
+        errln("UnicodeSet.span(UnicodeString, ...) returns incorrect end indexes");
+    }
+    if( ac.spanBack(longString, -5, USET_SPAN_CONTAINED)!=0 ||
+        ac.spanBack(longString, 0, USET_SPAN_CONTAINED)!=0 ||
+        ac.spanBack(longString, 5, USET_SPAN_CONTAINED)!=0 ||
+        ac.spanBack(longString, 10, USET_SPAN_CONTAINED)!=0 ||
+        ac.spanBack(longString, 15, USET_SPAN_CONTAINED)!=15 ||
+        ac.spanBack(longString, 20, USET_SPAN_CONTAINED)!=20 ||
+        ac.spanBack(longString, 25, USET_SPAN_CONTAINED)!=20 ||
+        ac.spanBack(longString, 30, USET_SPAN_CONTAINED)!=20 ||
+        ac.spanBack(longString, 35, USET_SPAN_CONTAINED)!=20 ||
+        ac.spanBack(longString, INT32_MAX, USET_SPAN_CONTAINED)!=20
+    ) {
+        errln("UnicodeSet.spanBack(UnicodeString, ...) returns incorrect start indexes");
+    }
  }
  
  void UnicodeSetTest::TestIteration() {
@@ -682,7 +749,7 @@ void UnicodeSetTest::TestIteration() {
      
      // 6 code points, 3 ranges, 2 strings, 8 total elements
      //   Iteration will access them in sorted order -  a, b, c, y, z, U0001abcd, "str1", "str2"
-    UnicodeSet set("[zabyc\\U0001abcd{str1}{str2}]", ec);
+    UnicodeSet set(UNICODE_STRING_SIMPLE("[zabyc\\U0001abcd{str1}{str2}]"), ec);
      TEST_ASSERT_SUCCESS(ec);
      UnicodeSetIterator it(set);
  
@@ -799,12 +866,12 @@ void UnicodeSetTest::TestStrings() {
   * Test the [:Latin:] syntax.
   */
  void UnicodeSetTest::TestScriptSet() {
-    expectContainment("[:Latin:]", "aA", CharsToUnicodeString("\\u0391\\u03B1"));
+    expectContainment(UNICODE_STRING_SIMPLE("[:Latin:]"), "aA", CharsToUnicodeString("\\u0391\\u03B1"));
  
-    expectContainment("[:Greek:]", CharsToUnicodeString("\\u0391\\u03B1"), "aA");
+    expectContainment(UNICODE_STRING_SIMPLE("[:Greek:]"), CharsToUnicodeString("\\u0391\\u03B1"), "aA");
      
      /* Jitterbug 1423 */
-    expectContainment("[[:Common:][:Inherited:]]", CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"), "aA");
+    expectContainment(UNICODE_STRING_SIMPLE("[[:Common:][:Inherited:]]"), CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"), "aA");
  
  }
  
@@ -812,7 +879,7 @@ void UnicodeSetTest::TestScriptSet() {
   * Test the [:Latin:] syntax.
   */
  void UnicodeSetTest::TestPropertySet() {
-    static const char* DATA[] = {
+    static const char* const DATA[] = {
          // Pattern, Chars IN, Chars NOT in
  
          "[:Latin:]",
@@ -827,6 +894,7 @@ void UnicodeSetTest::TestPropertySet() {
          "abc",
          "ABC",
  
+#if !UCONFIG_NO_NORMALIZATION
          // Combining class: @since ICU 2.2
          // Check both symbolic and numeric
          "\\p{ccc=Nukta}",
@@ -840,6 +908,7 @@ void UnicodeSetTest::TestPropertySet() {
          "[:c c c = iota subscript :]",
          "\\u0345",
          "xyz",
+#endif
  
          // Bidi class: @since ICU 2.2
          "\\p{bidiclass=lefttoright}",
@@ -951,6 +1020,7 @@ void UnicodeSetTest::TestPropertySet() {
          "abcd\\uDC00",
          "ef\\uD800\\U00010000",
  
+#if !UCONFIG_NO_NORMALIZATION
          "[:^lccc=0:]", // Lead canonical class
          "\\u0300\\u0301",
          "abcd\\u00c0\\u00c5",
@@ -970,16 +1040,28 @@ void UnicodeSetTest::TestPropertySet() {
          "[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not
          "\\u0F73\\u0F75\\u0F81",
          "abcd\\u0300\\u0301\\u00c0\\u00c5",
+#endif /* !UCONFIG_NO_NORMALIZATION */
  
          "[:Assigned:]",
          "A\\uE000\\uF8FF\\uFDC7\\U00010000\\U0010FFFD",
-        "\\u0888\\uFDD3\\uFFFE\\U00050005"
+        "\\u0888\\uFDD3\\uFFFE\\U00050005",
+
+        // Script_Extensions, new in Unicode 6.0
+        "[:scx=Arab:]",
+        "\\u061E\\u061F\\u0620\\u0621\\u063F\\u0640\\u0650\\u065E\\uFDF1\\uFDF2\\uFDF3",
+        "\\u061D\\uFDEF\\uFDFE",
+
+        // U+FDF2 has Script=Arabic and also Arab in its Script_Extensions,
+        // so scx-sc is missing U+FDF2.
+        "[[:Script_Extensions=Arabic:]-[:Arab:]]",
+        "\\u0640\\u064B\\u0650\\u0655",
+        "\\uFDF2"
      };
  
-    static const int32_t DATA_LEN = sizeof(DATA)/sizeof(DATA[0]);
+    static const int32_t DATA_LEN = UPRV_LENGTHOF(DATA);
  
      for (int32_t i=0; i<DATA_LEN; i+=3) {  
-        expectContainment(DATA[i], CharsToUnicodeString(DATA[i+1]),
+        expectContainment(UnicodeString(DATA[i], -1, US_INV), CharsToUnicodeString(DATA[i+1]),
                            CharsToUnicodeString(DATA[i+2]));
      }
  }
@@ -992,56 +1074,56 @@ void UnicodeSetTest::TestPosixClasses() {
      {
          UErrorCode status = U_ZERO_ERROR;
          UnicodeSet s1("[:alpha:]", status);
-        UnicodeSet s2("\\p{Alphabetic}", status);
+        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Alphabetic}"), status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT(s1==s2);
      }
      {
          UErrorCode status = U_ZERO_ERROR;
          UnicodeSet s1("[:lower:]", status);
-        UnicodeSet s2("\\p{lowercase}", status);
+        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{lowercase}"), status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT(s1==s2);
      }
      {
          UErrorCode status = U_ZERO_ERROR;
          UnicodeSet s1("[:upper:]", status);
-        UnicodeSet s2("\\p{Uppercase}", status);
+        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Uppercase}"), status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT(s1==s2);
      }
      {
          UErrorCode status = U_ZERO_ERROR;
          UnicodeSet s1("[:punct:]", status);
-        UnicodeSet s2("\\p{gc=Punctuation}", status);
+        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{gc=Punctuation}"), status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT(s1==s2);
      }
      {
          UErrorCode status = U_ZERO_ERROR;
          UnicodeSet s1("[:digit:]", status);
-        UnicodeSet s2("\\p{gc=DecimalNumber}", status);
+        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{gc=DecimalNumber}"), status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT(s1==s2);
      }
      {
          UErrorCode status = U_ZERO_ERROR;
          UnicodeSet s1("[:xdigit:]", status);
-        UnicodeSet s2("[\\p{DecimalNumber}\\p{HexDigit}]", status);
+        UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{DecimalNumber}\\p{HexDigit}]"), status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT(s1==s2);
      }
      {
          UErrorCode status = U_ZERO_ERROR;
          UnicodeSet s1("[:alnum:]", status);
-        UnicodeSet s2("[\\p{Alphabetic}\\p{DecimalNumber}]", status);
+        UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{Alphabetic}\\p{DecimalNumber}]"), status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT(s1==s2);
      }
      {
          UErrorCode status = U_ZERO_ERROR;
          UnicodeSet s1("[:space:]", status);
-        UnicodeSet s2("\\p{Whitespace}", status);
+        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Whitespace}"), status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT(s1==s2);
      }
@@ -1049,7 +1131,7 @@ void UnicodeSetTest::TestPosixClasses() {
          UErrorCode status = U_ZERO_ERROR;
          UnicodeSet s1("[:blank:]", status);
          TEST_ASSERT_SUCCESS(status);
-        UnicodeSet s2("[\\p{Whitespace}-[\\u000a\\u000B\\u000c\\u000d\\u0085\\p{LineSeparator}\\p{ParagraphSeparator}]]",
+        UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{Whitespace}-[\\u000a\\u000B\\u000c\\u000d\\u0085\\p{LineSeparator}\\p{ParagraphSeparator}]]"),
              status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT(s1==s2);
@@ -1058,7 +1140,7 @@ void UnicodeSetTest::TestPosixClasses() {
          UErrorCode status = U_ZERO_ERROR;
          UnicodeSet s1("[:cntrl:]", status);
          TEST_ASSERT_SUCCESS(status);
-        UnicodeSet s2("\\p{Control}", status);
+        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Control}"), status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT(s1==s2);
      }
@@ -1066,7 +1148,7 @@ void UnicodeSetTest::TestPosixClasses() {
          UErrorCode status = U_ZERO_ERROR;
          UnicodeSet s1("[:graph:]", status);
          TEST_ASSERT_SUCCESS(status);
-        UnicodeSet s2("[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]", status);
+        UnicodeSet s2(UNICODE_STRING_SIMPLE("[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]"), status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT(s1==s2);
      }
@@ -1074,7 +1156,7 @@ void UnicodeSetTest::TestPosixClasses() {
          UErrorCode status = U_ZERO_ERROR;
          UnicodeSet s1("[:print:]", status);
          TEST_ASSERT_SUCCESS(status);
-        UnicodeSet s2("[[:graph:][:blank:]-[\\p{Control}]]" ,status);
+        UnicodeSet s2(UNICODE_STRING_SIMPLE("[[:graph:][:blank:]-[\\p{Control}]]") ,status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT(s1==s2);
      }
@@ -1128,7 +1210,7 @@ void UnicodeSetTest::TestCloseOver() {
          // selector, input, output
          CASE,
          "[aq\\u00DF{Bc}{bC}{Fi}]",
-        "[aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]",
+        "[aAqQ\\u00DF\\u1E9E\\uFB01{ss}{bc}{fi}]",  // U+1E9E LATIN CAPITAL LETTER SHARP S is new in Unicode 5.1
  
          CASE,
          "[\\u01F1]", // 'DZ'
@@ -1179,9 +1261,11 @@ void UnicodeSetTest::TestCloseOver() {
  
          CASE, "[{\\u1f7c\\u03b9}]", "[\\u1ff2{\\u1f7c\\u03b9}]", // last in sorted table
  
+#if !UCONFIG_NO_FILE_IO
          CASE_MAPPINGS,
          "[aq\\u00DF{Bc}{bC}{Fi}]",
          "[aAqQ\\u00DF{ss}{Ss}{SS}{Bc}{BC}{bC}{bc}{FI}{Fi}{fi}]",
+#endif
  
          CASE_MAPPINGS,
          "[\\u01F1]", // 'DZ'
@@ -1199,8 +1283,8 @@ void UnicodeSetTest::TestCloseOver() {
      UnicodeString buf;
      for (int32_t i=0; DATA[i]!=NULL; i+=3) {
          int32_t selector = DATA[i][0];
-        UnicodeString pat(DATA[i+1]);
-        UnicodeString exp(DATA[i+2]);
+        UnicodeString pat(DATA[i+1], -1, US_INV);
+        UnicodeString exp(DATA[i+2], -1, US_INV);
          s.applyPattern(pat, ec);
          s.closeOver(selector);
          t.applyPattern(exp, ec);
@@ -1211,7 +1295,7 @@ void UnicodeSetTest::TestCloseOver() {
          if (s == t) {
              logln((UnicodeString)"Ok: " + pat + ".closeOver(" + selector + ") => " + exp);
          } else {
-            errln((UnicodeString)"FAIL: " + pat + ".closeOver(" + selector + ") => " +
+            dataerrln((UnicodeString)"FAIL: " + pat + ".closeOver(" + selector + ") => " +
                    s.toPattern(buf, TRUE) + ", expected " + exp);
          }
      }
@@ -1304,11 +1388,11 @@ void UnicodeSetTest::TestEscapePattern() {
      const char exp[] =
          "[\\u200A-\\u200E\\uFEFF\\U0001D173-\\U0001D17A\\U000F0000-\\U000FFFFD]";
      // We test this with two passes; in the second pass we
-    // pre-unescape the pattern.  Since U+200E is rule whitespace,
+    // pre-unescape the pattern.  Since U+200E is Pattern_White_Space,
      // this fails -- which is what we expect.
      for (int32_t pass=1; pass<=2; ++pass) {
          UErrorCode ec = U_ZERO_ERROR;
-        UnicodeString pat(pattern);
+        UnicodeString pat(pattern, -1, US_INV);
          if (pass==2) {
              pat = pat.unescape();
          }
@@ -1331,7 +1415,7 @@ void UnicodeSetTest::TestEscapePattern() {
  
          UnicodeString newpat;
          set.toPattern(newpat, TRUE);
-        if (newpat == exp) {
+        if (newpat == UnicodeString(exp, -1, US_INV)) {
              logln(escape(pat) + " => " + newpat);
          } else {
              errln((UnicodeString)"FAIL: " + escape(pat) + " => " + newpat);
@@ -1378,7 +1462,7 @@ void UnicodeSetTest::TestInvalidCodePoint() {
          (UChar32)-1, 8,           0, 8,
          8, 0x110000,              8, 0x10FFFF
      };
-    const int32_t DATA_LENGTH = sizeof(DATA)/sizeof(DATA[0]);
+    const int32_t DATA_LENGTH = UPRV_LENGTHOF(DATA);
  
      UnicodeString pat;
      int32_t i;
@@ -1404,6 +1488,7 @@ void UnicodeSetTest::TestInvalidCodePoint() {
          b = set.contains(start, end);
          b = set.containsNone(start, end);
          b = set.containsSome(start, end);
+        (void)b;   // Suppress set but not used warning.
  
          /*int32_t index = set.indexOf(start);*/
          
@@ -1440,7 +1525,7 @@ void UnicodeSetTest::TestInvalidCodePoint() {
          (UChar32)-1,
          0x110000
      };
-    const int32_t DATA2_LENGTH = sizeof(DATA2)/sizeof(DATA2[0]);
+    const int32_t DATA2_LENGTH = UPRV_LENGTHOF(DATA2);
  
      for (i=0; i<DATA2_LENGTH; ++i) {
          UChar32 c = DATA2[i], end = 0x10FFFF;
@@ -1489,7 +1574,7 @@ public:
      Hashtable contents;
  
      TokenSymbolTable(UErrorCode& ec) : contents(FALSE, ec) {
-        contents.setValueDeleter(uhash_deleteUnicodeString);
+        contents.setValueDeleter(uprv_deleteUObject);
      }
  
      ~TokenSymbolTable() {}
@@ -1564,7 +1649,7 @@ void UnicodeSetTest::TestSymbolTable() {
  
          // Set up variables
          while (DATA[i+2] != NULL) {
-            sym.add(DATA[i], DATA[i+1], ec);
+            sym.add(UnicodeString(DATA[i], -1, US_INV), UnicodeString(DATA[i+1], -1, US_INV), ec);
              if (U_FAILURE(ec)) {
                  errln("FAIL: couldn't add to TokenSymbolTable");
                  continue;
@@ -1573,7 +1658,7 @@ void UnicodeSetTest::TestSymbolTable() {
          }
  
          // Input pattern and expected output pattern
-        UnicodeString inpat = DATA[i], exppat = DATA[i+1];
+        UnicodeString inpat = UnicodeString(DATA[i], -1, US_INV), exppat = UnicodeString(DATA[i+1], -1, US_INV);
          i += 2;
  
          ParsePosition pos(0);
@@ -1617,8 +1702,9 @@ void UnicodeSetTest::TestSurrogate() {
      };
      for (int i=0; DATA[i] != 0; ++i) {
          UErrorCode ec = U_ZERO_ERROR;
-        logln((UnicodeString)"Test pattern " + i + " :" + DATA[i]);
-        UnicodeSet set(DATA[i], ec);
+        logln((UnicodeString)"Test pattern " + i + " :" + UnicodeString(DATA[i], -1, US_INV));
+        UnicodeString str = UnicodeString(DATA[i], -1, US_INV);
+        UnicodeSet set(str, ec);
          if (U_FAILURE(ec)) {
              errln("FAIL: UnicodeSet constructor");
              continue;
@@ -1627,9 +1713,15 @@ void UnicodeSetTest::TestSurrogate() {
                            CharsToUnicodeString("abc\\U00010000"),
                            CharsToUnicodeString("\\uD800;\\uDC00")); // split apart surrogate-pair
          if (set.size() != 4) {
-            errln((UnicodeString)"FAIL: " + DATA[i] + ".size() == " + 
+            errln((UnicodeString)"FAIL: " + UnicodeString(DATA[i], -1, US_INV) + ".size() == " + 
                    set.size() + ", expected 4");
          }
+
+        {
+          UErrorCode subErr = U_ZERO_ERROR;
+          checkRoundTrip(set);
+          checkSerializeRoundTrip(set, subErr);
+        }
      }
  }
  
@@ -1646,8 +1738,12 @@ void UnicodeSetTest::TestExhaustive() {
          logln((UnicodeString)"Testing " + i + ", " + x);
          _testComplement(i, x, y);
  
+        UnicodeSet &toTest = bitsToSet(i, aa);
+
          // AS LONG AS WE ARE HERE, check roundtrip
-        checkRoundTrip(bitsToSet(i, aa));
+        checkRoundTrip(toTest);
+        UErrorCode ec = U_ZERO_ERROR;
+        checkSerializeRoundTrip(toTest, ec);
  
          for (int32_t j = 0; j < limit; ++j) {
              _testAdd(i,j,  x,y,z);
@@ -1806,39 +1902,80 @@ UnicodeString UnicodeSetTest::getPairs(const UnicodeSet& set) {
   * get the same thing back
   */
  void UnicodeSetTest::checkRoundTrip(const UnicodeSet& s) {
-    UErrorCode ec = U_ZERO_ERROR;
+    {
+        UnicodeSet t(s);
+        checkEqual(s, t, "copy ct");
+    }
  
-    UnicodeSet t(s);
-    checkEqual(s, t, "copy ct");
+    {
+        UnicodeSet t(0xabcd, 0xdef0);  // dummy contents should be overwritten
+        t = s;
+        checkEqual(s, t, "operator=");
+    }
  
-    t = s;
-    checkEqual(s, t, "operator=");
+    {
+        UnicodeSet t;
+        copyWithIterator(t, s, FALSE);
+        checkEqual(s, t, "iterator roundtrip");
+    }
  
-    copyWithIterator(t, s, FALSE);
-    checkEqual(s, t, "iterator roundtrip");
+    {
+        UnicodeSet t;
+        copyWithIterator(t, s, TRUE); // try range
+        checkEqual(s, t, "iterator roundtrip");
+    }
  
-    copyWithIterator(t, s, TRUE); // try range
-    checkEqual(s, t, "iterator roundtrip");
-        
-    UnicodeString pat; s.toPattern(pat, FALSE);
-    t.applyPattern(pat, ec);
-    if (U_FAILURE(ec)) {
-        errln("FAIL: applyPattern");
-        return;
-    } else {
-        checkEqual(s, t, "toPattern(false)");
+    {
+        UnicodeSet t;
+        UnicodeString pat;
+        UErrorCode ec = U_ZERO_ERROR;
+        s.toPattern(pat, FALSE);
+        t.applyPattern(pat, ec);
+        if (U_FAILURE(ec)) {
+            errln("FAIL: toPattern(escapeUnprintable=FALSE), applyPattern - %s", u_errorName(ec));
+            return;
+        } else {
+            checkEqual(s, t, "toPattern(false)");
+        }
      }
-        
-    s.toPattern(pat, TRUE);
-    t.applyPattern(pat, ec);
-    if (U_FAILURE(ec)) {
-        errln("FAIL: applyPattern");
-        return;
-    } else {
-        checkEqual(s, t, "toPattern(true)");
+
+    {
+        UnicodeSet t;
+        UnicodeString pat;
+        UErrorCode ec = U_ZERO_ERROR;
+        s.toPattern(pat, TRUE);
+        t.applyPattern(pat, ec);
+        if (U_FAILURE(ec)) {
+            errln("FAIL: toPattern(escapeUnprintable=TRUE), applyPattern - %s", u_errorName(ec));
+            return;
+        } else {
+            checkEqual(s, t, "toPattern(true)");
+        }
      }
  }
-    
+
+void UnicodeSetTest::checkSerializeRoundTrip(const UnicodeSet& t, UErrorCode &status) {
+  if(U_FAILURE(status)) return;
+  int32_t len = t.serialize(serializeBuffer.getAlias(), serializeBuffer.getCapacity(), status);
+  if(status == U_BUFFER_OVERFLOW_ERROR) {
+    status = U_ZERO_ERROR;
+    serializeBuffer.resize(len);
+    len = t.serialize(serializeBuffer.getAlias(), serializeBuffer.getCapacity(), status);
+    // let 2nd error stand
+  }
+  if(U_FAILURE(status)) {
+    errln("checkSerializeRoundTrip: error %s serializing buffer\n", u_errorName(status));
+    return;
+  }
+  UnicodeSet deserialized(serializeBuffer.getAlias(), len, UnicodeSet::kSerialized, status);
+  if(U_FAILURE(status)) {
+    errln("checkSerializeRoundTrip: error %s deserializing buffer: buf %p len %d, original %d\n", u_errorName(status), serializeBuffer.getAlias(), len, t.getRangeCount());
+    return;
+  }
+
+  checkEqual(t, deserialized, "Set was unequal when deserialized");
+}
+
  void UnicodeSetTest::copyWithIterator(UnicodeSet& t, const UnicodeSet& s, UBool withRange) {
      t.clear();
      UnicodeSetIterator it(s);
@@ -1862,6 +1999,8 @@ void UnicodeSetTest::copyWithIterator(UnicodeSet& t, const UnicodeSet& s, UBool
  }
      
  UBool UnicodeSetTest::checkEqual(const UnicodeSet& s, const UnicodeSet& t, const char* message) {
+  assertEquals(UnicodeString("RangeCount: ","") + message, s.getRangeCount(), t.getRangeCount());
+  assertEquals(UnicodeString("size: ","") + message, s.size(), t.size());
      UnicodeString source; s.toPattern(source, TRUE);
      UnicodeString result; t.toPattern(result, TRUE);
      if (s != t) {
@@ -1886,7 +2025,7 @@ UnicodeSetTest::expectContainment(const UnicodeString& pat,
      UErrorCode ec = U_ZERO_ERROR;
      UnicodeSet set(pat, ec);
      if (U_FAILURE(ec)) {
-        errln((UnicodeString)"FAIL: pattern \"" +
+        dataerrln((UnicodeString)"FAIL: pattern \"" +
                pat + "\" => " + u_errorName(ec));
          return;
      }
@@ -2062,3 +2201,1725 @@ UnicodeSetTest::escape(const UnicodeString& s) {
      }
      return buf;
  }
+
+void UnicodeSetTest::TestFreezable() {
+    UErrorCode errorCode=U_ZERO_ERROR;
+    UnicodeString idPattern=UNICODE_STRING("[:ID_Continue:]", 15);
+    UnicodeSet idSet(idPattern, errorCode);
+    if(U_FAILURE(errorCode)) {
+        dataerrln("FAIL: unable to create UnicodeSet([:ID_Continue:]) - %s", u_errorName(errorCode));
+        return;
+    }
+
+    UnicodeString wsPattern=UNICODE_STRING("[:White_Space:]", 15);
+    UnicodeSet wsSet(wsPattern, errorCode);
+    if(U_FAILURE(errorCode)) {
+        dataerrln("FAIL: unable to create UnicodeSet([:White_Space:]) - %s", u_errorName(errorCode));
+        return;
+    }
+
+    idSet.add(idPattern);
+    UnicodeSet frozen(idSet);
+    frozen.freeze();
+
+    if(idSet.isFrozen() || !frozen.isFrozen()) {
+        errln("FAIL: isFrozen() is wrong");
+    }
+    if(frozen!=idSet || !(frozen==idSet)) {
+        errln("FAIL: a copy-constructed frozen set differs from its original");
+    }
+
+    frozen=wsSet;
+    if(frozen!=idSet || !(frozen==idSet)) {
+        errln("FAIL: a frozen set was modified by operator=");
+    }
+
+    UnicodeSet frozen2(frozen);
+    if(frozen2!=frozen || frozen2!=idSet) {
+        errln("FAIL: a copied frozen set differs from its frozen original");
+    }
+    if(!frozen2.isFrozen()) {
+        errln("FAIL: copy-constructing a frozen set results in a thawed one");
+    }
+    UnicodeSet frozen3(5, 55);  // Set to some values to really test assignment below, not copy construction.
+    if(frozen3.contains(0, 4) || !frozen3.contains(5, 55) || frozen3.contains(56, 0x10ffff)) {
+        errln("FAIL: UnicodeSet(5, 55) failed");
+    }
+    frozen3=frozen;
+    if(!frozen3.isFrozen()) {
+        errln("FAIL: copying a frozen set results in a thawed one");
+    }
+
+    UnicodeSet *cloned=(UnicodeSet *)frozen.clone();
+    if(!cloned->isFrozen() || *cloned!=frozen || cloned->containsSome(0xd802, 0xd805)) {
+        errln("FAIL: clone() failed");
+    }
+    cloned->add(0xd802, 0xd805);
+    if(cloned->containsSome(0xd802, 0xd805)) {
+        errln("FAIL: unable to modify clone");
+    }
+    delete cloned;
+
+    UnicodeSet *thawed=(UnicodeSet *)frozen.cloneAsThawed();
+    if(thawed->isFrozen() || *thawed!=frozen || thawed->containsSome(0xd802, 0xd805)) {
+        errln("FAIL: cloneAsThawed() failed");
+    }
+    thawed->add(0xd802, 0xd805);
+    if(!thawed->contains(0xd802, 0xd805)) {
+        errln("FAIL: unable to modify thawed clone");
+    }
+    delete thawed;
+
+    frozen.set(5, 55);
+    if(frozen!=idSet || !(frozen==idSet)) {
+        errln("FAIL: UnicodeSet::set() modified a frozen set");
+    }
+
+    frozen.clear();
+    if(frozen!=idSet || !(frozen==idSet)) {
+        errln("FAIL: UnicodeSet::clear() modified a frozen set");
+    }
+
+    frozen.closeOver(USET_CASE_INSENSITIVE);
+    if(frozen!=idSet || !(frozen==idSet)) {
+        errln("FAIL: UnicodeSet::closeOver() modified a frozen set");
+    }
+
+    frozen.compact();
+    if(frozen!=idSet || !(frozen==idSet)) {
+        errln("FAIL: UnicodeSet::compact() modified a frozen set");
+    }
+
+    ParsePosition pos;
+    frozen.
+        applyPattern(wsPattern, errorCode).
+        applyPattern(wsPattern, USET_IGNORE_SPACE, NULL, errorCode).
+        applyPattern(wsPattern, pos, USET_IGNORE_SPACE, NULL, errorCode).
+        applyIntPropertyValue(UCHAR_CANONICAL_COMBINING_CLASS, 230, errorCode).
+        applyPropertyAlias(UNICODE_STRING_SIMPLE("Assigned"), UnicodeString(), errorCode);
+    if(frozen!=idSet || !(frozen==idSet)) {
+        errln("FAIL: UnicodeSet::applyXYZ() modified a frozen set");
+    }
+
+    frozen.
+        add(0xd800).
+        add(0xd802, 0xd805).
+        add(wsPattern).
+        addAll(idPattern).
+        addAll(wsSet);
+    if(frozen!=idSet || !(frozen==idSet)) {
+        errln("FAIL: UnicodeSet::addXYZ() modified a frozen set");
+    }
+
+    frozen.
+        retain(0x62).
+        retain(0x64, 0x69).
+        retainAll(wsPattern).
+        retainAll(wsSet);
+    if(frozen!=idSet || !(frozen==idSet)) {
+        errln("FAIL: UnicodeSet::retainXYZ() modified a frozen set");
+    }
+
+    frozen.
+        remove(0x62).
+        remove(0x64, 0x69).
+        remove(idPattern).
+        removeAll(idPattern).
+        removeAll(idSet);
+    if(frozen!=idSet || !(frozen==idSet)) {
+        errln("FAIL: UnicodeSet::removeXYZ() modified a frozen set");
+    }
+
+    frozen.
+        complement().
+        complement(0x62).
+        complement(0x64, 0x69).
+        complement(idPattern).
+        complementAll(idPattern).
+        complementAll(idSet);
+    if(frozen!=idSet || !(frozen==idSet)) {
+        errln("FAIL: UnicodeSet::complementXYZ() modified a frozen set");
+    }
+}
+
+// Test span() etc. -------------------------------------------------------- ***
+
+// Append the UTF-8 version of the string to t and return the appended UTF-8 length.
+static int32_t
+appendUTF8(const UChar *s, int32_t length, char *t, int32_t capacity) {
+    UErrorCode errorCode=U_ZERO_ERROR;
+    int32_t length8=0;
+    u_strToUTF8(t, capacity, &length8, s, length, &errorCode);
+    if(U_SUCCESS(errorCode)) {
+        return length8;
+    } else {
+        // The string contains an unpaired surrogate.
+        // Ignore this string.
+        return 0;
+    }
+}
+
+class UnicodeSetWithStringsIterator;
+
+// Make the strings in a UnicodeSet easily accessible.
+class UnicodeSetWithStrings {
+public:
+    UnicodeSetWithStrings(const UnicodeSet &normalSet) :
+            set(normalSet), stringsLength(0), hasSurrogates(FALSE) {
+        int32_t size=set.size();
+        if(size>0 && set.charAt(size-1)<0) {
+            // If a set's last element is not a code point, then it must contain strings.
+            // Iterate over the set, skip all code point ranges, and cache the strings.
+            // Convert them to UTF-8 for spanUTF8().
+            UnicodeSetIterator iter(set);
+            const UnicodeString *s;
+            char *s8=utf8;
+            int32_t length8, utf8Count=0;
+            while(iter.nextRange() && stringsLength<UPRV_LENGTHOF(strings)) {
+                if(iter.isString()) {
+                    // Store the pointer to the set's string element
+                    // which we happen to know is a stable pointer.
+                    strings[stringsLength]=s=&iter.getString();
+                    utf8Count+=
+                        utf8Lengths[stringsLength]=length8=
+                        appendUTF8(s->getBuffer(), s->length(),
+                                   s8, (int32_t)(sizeof(utf8)-utf8Count));
+                    if(length8==0) {
+                        hasSurrogates=TRUE;  // Contains unpaired surrogates.
+                    }
+                    s8+=length8;
+                    ++stringsLength;
+                }
+            }
+        }
+    }
+
+    const UnicodeSet &getSet() const {
+        return set;
+    }
+
+    UBool hasStrings() const {
+        return (UBool)(stringsLength>0);
+    }
+
+    UBool hasStringsWithSurrogates() const {
+        return hasSurrogates;
+    }
+
+private:
+    friend class UnicodeSetWithStringsIterator;
+
+    const UnicodeSet &set;
+
+    const UnicodeString *strings[20];
+    int32_t stringsLength;
+    UBool hasSurrogates;
+
+    char utf8[1024];
+    int32_t utf8Lengths[20];
+};
+
+class UnicodeSetWithStringsIterator {
+public:
+    UnicodeSetWithStringsIterator(const UnicodeSetWithStrings &set) :
+            fSet(set), nextStringIndex(0), nextUTF8Start(0) {
+    }
+
+    void reset() {
+        nextStringIndex=nextUTF8Start=0;
+    }
+
+    const UnicodeString *nextString() {
+        if(nextStringIndex<fSet.stringsLength) {
+            return fSet.strings[nextStringIndex++];
+        } else {
+            return NULL;
+        }
+    }
+
+    // Do not mix with calls to nextString().
+    const char *nextUTF8(int32_t &length) {
+        if(nextStringIndex<fSet.stringsLength) {
+            const char *s8=fSet.utf8+nextUTF8Start;
+            nextUTF8Start+=length=fSet.utf8Lengths[nextStringIndex++];
+            return s8;
+        } else {
+            length=0;
+            return NULL;
+        }
+    }
+
+private:
+    const UnicodeSetWithStrings &fSet;
+    int32_t nextStringIndex;
+    int32_t nextUTF8Start;
+};
+
+// Compare 16-bit Unicode strings (which may be malformed UTF-16)
+// at code point boundaries.
+// That is, each edge of a match must not be in the middle of a surrogate pair.
+static inline UBool
+matches16CPB(const UChar *s, int32_t start, int32_t limit, const UnicodeString &t) {
+    s+=start;
+    limit-=start;
+    int32_t length=t.length();
+    return 0==t.compare(s, length) &&
+           !(0<start && U16_IS_LEAD(s[-1]) && U16_IS_TRAIL(s[0])) &&
+           !(length<limit && U16_IS_LEAD(s[length-1]) && U16_IS_TRAIL(s[length]));
+}
+
+// Implement span() with contains() for comparison.
+static int32_t containsSpanUTF16(const UnicodeSetWithStrings &set, const UChar *s, int32_t length,
+                                 USetSpanCondition spanCondition) {
+    const UnicodeSet &realSet(set.getSet());
+    if(!set.hasStrings()) {
+        if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+            spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
+        }
+
+        UChar32 c;
+        int32_t start=0, prev;
+        while((prev=start)<length) {
+            U16_NEXT(s, start, length, c);
+            if(realSet.contains(c)!=spanCondition) {
+                break;
+            }
+        }
+        return prev;
+    } else if(spanCondition==USET_SPAN_NOT_CONTAINED) {
+        UnicodeSetWithStringsIterator iter(set);
+        UChar32 c;
+        int32_t start, next;
+        for(start=next=0; start<length;) {
+            U16_NEXT(s, next, length, c);
+            if(realSet.contains(c)) {
+                break;
+            }
+            const UnicodeString *str;
+            iter.reset();
+            while((str=iter.nextString())!=NULL) {
+                if(str->length()<=(length-start) && matches16CPB(s, start, length, *str)) {
+                    // spanNeedsStrings=TRUE;
+                    return start;
+                }
+            }
+            start=next;
+        }
+        return start;
+    } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ {
+        UnicodeSetWithStringsIterator iter(set);
+        UChar32 c;
+        int32_t start, next, maxSpanLimit=0;
+        for(start=next=0; start<length;) {
+            U16_NEXT(s, next, length, c);
+            if(!realSet.contains(c)) {
+                next=start;  // Do not span this single, not-contained code point.
+            }
+            const UnicodeString *str;
+            iter.reset();
+            while((str=iter.nextString())!=NULL) {
+                if(str->length()<=(length-start) && matches16CPB(s, start, length, *str)) {
+                    // spanNeedsStrings=TRUE;
+                    int32_t matchLimit=start+str->length();
+                    if(matchLimit==length) {
+                        return length;
+                    }
+                    if(spanCondition==USET_SPAN_CONTAINED) {
+                        // Iterate for the shortest match at each position.
+                        // Recurse for each but the shortest match.
+                        if(next==start) {
+                            next=matchLimit;  // First match from start.
+                        } else {
+                            if(matchLimit<next) {
+                                // Remember shortest match from start for iteration.
+                                int32_t temp=next;
+                                next=matchLimit;
+                                matchLimit=temp;
+                            }
+                            // Recurse for non-shortest match from start.
+                            int32_t spanLength=containsSpanUTF16(set, s+matchLimit, length-matchLimit,
+                                                                 USET_SPAN_CONTAINED);
+                            if((matchLimit+spanLength)>maxSpanLimit) {
+                                maxSpanLimit=matchLimit+spanLength;
+                                if(maxSpanLimit==length) {
+                                    return length;
+                                }
+                            }
+                        }
+                    } else /* spanCondition==USET_SPAN_SIMPLE */ {
+                        if(matchLimit>next) {
+                            // Remember longest match from start.
+                            next=matchLimit;
+                        }
+                    }
+                }
+            }
+            if(next==start) {
+                break;  // No match from start.
+            }
+            start=next;
+        }
+        if(start>maxSpanLimit) {
+            return start;
+        } else {
+            return maxSpanLimit;
+        }
+    }
+}
+
+static int32_t containsSpanBackUTF16(const UnicodeSetWithStrings &set, const UChar *s, int32_t length,
+                                     USetSpanCondition spanCondition) {
+    if(length==0) {
+        return 0;
+    }
+    const UnicodeSet &realSet(set.getSet());
+    if(!set.hasStrings()) {
+        if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+            spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
+        }
+
+        UChar32 c;
+        int32_t prev=length;
+        do {
+            U16_PREV(s, 0, length, c);
+            if(realSet.contains(c)!=spanCondition) {
+                break;
+            }
+        } while((prev=length)>0);
+        return prev;
+    } else if(spanCondition==USET_SPAN_NOT_CONTAINED) {
+        UnicodeSetWithStringsIterator iter(set);
+        UChar32 c;
+        int32_t prev=length, length0=length;
+        do {
+            U16_PREV(s, 0, length, c);
+            if(realSet.contains(c)) {
+                break;
+            }
+            const UnicodeString *str;
+            iter.reset();
+            while((str=iter.nextString())!=NULL) {
+                if(str->length()<=prev && matches16CPB(s, prev-str->length(), length0, *str)) {
+                    // spanNeedsStrings=TRUE;
+                    return prev;
+                }
+            }
+        } while((prev=length)>0);
+        return prev;
+    } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ {
+        UnicodeSetWithStringsIterator iter(set);
+        UChar32 c;
+        int32_t prev=length, minSpanStart=length, length0=length;
+        do {
+            U16_PREV(s, 0, length, c);
+            if(!realSet.contains(c)) {
+                length=prev;  // Do not span this single, not-contained code point.
+            }
+            const UnicodeString *str;
+            iter.reset();
+            while((str=iter.nextString())!=NULL) {
+                if(str->length()<=prev && matches16CPB(s, prev-str->length(), length0, *str)) {
+                    // spanNeedsStrings=TRUE;
+                    int32_t matchStart=prev-str->length();
+                    if(matchStart==0) {
+                        return 0;
+                    }
+                    if(spanCondition==USET_SPAN_CONTAINED) {
+                        // Iterate for the shortest match at each position.
+                        // Recurse for each but the shortest match.
+                        if(length==prev) {
+                            length=matchStart;  // First match from prev.
+                        } else {
+                            if(matchStart>length) {
+                                // Remember shortest match from prev for iteration.
+                                int32_t temp=length;
+                                length=matchStart;
+                                matchStart=temp;
+                            }
+                            // Recurse for non-shortest match from prev.
+                            int32_t spanStart=containsSpanBackUTF16(set, s, matchStart,
+                                                                    USET_SPAN_CONTAINED);
+                            if(spanStart<minSpanStart) {
+                                minSpanStart=spanStart;
+                                if(minSpanStart==0) {
+                                    return 0;
+                                }
+                            }
+                        }
+                    } else /* spanCondition==USET_SPAN_SIMPLE */ {
+                        if(matchStart<length) {
+                            // Remember longest match from prev.
+                            length=matchStart;
+                        }
+                    }
+                }
+            }
+            if(length==prev) {
+                break;  // No match from prev.
+            }
+        } while((prev=length)>0);
+        if(prev<minSpanStart) {
+            return prev;
+        } else {
+            return minSpanStart;
+        }
+    }
+}
+
+static int32_t containsSpanUTF8(const UnicodeSetWithStrings &set, const char *s, int32_t length,
+                                USetSpanCondition spanCondition) {
+    const UnicodeSet &realSet(set.getSet());
+    if(!set.hasStrings()) {
+        if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+            spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
+        }
+
+        UChar32 c;
+        int32_t start=0, prev;
+        while((prev=start)<length) {
+            U8_NEXT_OR_FFFD(s, start, length, c);
+            if(realSet.contains(c)!=spanCondition) {
+                break;
+            }
+        }
+        return prev;
+    } else if(spanCondition==USET_SPAN_NOT_CONTAINED) {
+        UnicodeSetWithStringsIterator iter(set);
+        UChar32 c;
+        int32_t start, next;
+        for(start=next=0; start<length;) {
+            U8_NEXT_OR_FFFD(s, next, length, c);
+            if(realSet.contains(c)) {
+                break;
+            }
+            const char *s8;
+            int32_t length8;
+            iter.reset();
+            while((s8=iter.nextUTF8(length8))!=NULL) {
+                if(length8!=0 && length8<=(length-start) && 0==memcmp(s+start, s8, length8)) {
+                    // spanNeedsStrings=TRUE;
+                    return start;
+                }
+            }
+            start=next;
+        }
+        return start;
+    } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ {
+        UnicodeSetWithStringsIterator iter(set);
+        UChar32 c;
+        int32_t start, next, maxSpanLimit=0;
+        for(start=next=0; start<length;) {
+            U8_NEXT_OR_FFFD(s, next, length, c);
+            if(!realSet.contains(c)) {
+                next=start;  // Do not span this single, not-contained code point.
+            }
+            const char *s8;
+            int32_t length8;
+            iter.reset();
+            while((s8=iter.nextUTF8(length8))!=NULL) {
+                if(length8!=0 && length8<=(length-start) && 0==memcmp(s+start, s8, length8)) {
+                    // spanNeedsStrings=TRUE;
+                    int32_t matchLimit=start+length8;
+                    if(matchLimit==length) {
+                        return length;
+                    }
+                    if(spanCondition==USET_SPAN_CONTAINED) {
+                        // Iterate for the shortest match at each position.
+                        // Recurse for each but the shortest match.
+                        if(next==start) {
+                            next=matchLimit;  // First match from start.
+                        } else {
+                            if(matchLimit<next) {
+                                // Remember shortest match from start for iteration.
+                                int32_t temp=next;
+                                next=matchLimit;
+                                matchLimit=temp;
+                            }
+                            // Recurse for non-shortest match from start.
+                            int32_t spanLength=containsSpanUTF8(set, s+matchLimit, length-matchLimit,
+                                                                USET_SPAN_CONTAINED);
+                            if((matchLimit+spanLength)>maxSpanLimit) {
+                                maxSpanLimit=matchLimit+spanLength;
+                                if(maxSpanLimit==length) {
+                                    return length;
+                                }
+                            }
+                        }
+                    } else /* spanCondition==USET_SPAN_SIMPLE */ {
+                        if(matchLimit>next) {
+                            // Remember longest match from start.
+                            next=matchLimit;
+                        }
+                    }
+                }
+            }
+            if(next==start) {
+                break;  // No match from start.
+            }
+            start=next;
+        }
+        if(start>maxSpanLimit) {
+            return start;
+        } else {
+            return maxSpanLimit;
+        }
+    }
+}
+
+static int32_t containsSpanBackUTF8(const UnicodeSetWithStrings &set, const char *s, int32_t length,
+                                    USetSpanCondition spanCondition) {
+    if(length==0) {
+        return 0;
+    }
+    const UnicodeSet &realSet(set.getSet());
+    if(!set.hasStrings()) {
+        if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+            spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
+        }
+
+        UChar32 c;
+        int32_t prev=length;
+        do {
+            U8_PREV_OR_FFFD(s, 0, length, c);
+            if(realSet.contains(c)!=spanCondition) {
+                break;
+            }
+        } while((prev=length)>0);
+        return prev;
+    } else if(spanCondition==USET_SPAN_NOT_CONTAINED) {
+        UnicodeSetWithStringsIterator iter(set);
+        UChar32 c;
+        int32_t prev=length;
+        do {
+            U8_PREV_OR_FFFD(s, 0, length, c);
+            if(realSet.contains(c)) {
+                break;
+            }
+            const char *s8;
+            int32_t length8;
+            iter.reset();
+            while((s8=iter.nextUTF8(length8))!=NULL) {
+                if(length8!=0 && length8<=prev && 0==memcmp(s+prev-length8, s8, length8)) {
+                    // spanNeedsStrings=TRUE;
+                    return prev;
+                }
+            }
+        } while((prev=length)>0);
+        return prev;
+    } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ {
+        UnicodeSetWithStringsIterator iter(set);
+        UChar32 c;
+        int32_t prev=length, minSpanStart=length;
+        do {
+            U8_PREV_OR_FFFD(s, 0, length, c);
+            if(!realSet.contains(c)) {
+                length=prev;  // Do not span this single, not-contained code point.
+            }
+            const char *s8;
+            int32_t length8;
+            iter.reset();
+            while((s8=iter.nextUTF8(length8))!=NULL) {
+                if(length8!=0 && length8<=prev && 0==memcmp(s+prev-length8, s8, length8)) {
+                    // spanNeedsStrings=TRUE;
+                    int32_t matchStart=prev-length8;
+                    if(matchStart==0) {
+                        return 0;
+                    }
+                    if(spanCondition==USET_SPAN_CONTAINED) {
+                        // Iterate for the shortest match at each position.
+                        // Recurse for each but the shortest match.
+                        if(length==prev) {
+                            length=matchStart;  // First match from prev.
+                        } else {
+                            if(matchStart>length) {
+                                // Remember shortest match from prev for iteration.
+                                int32_t temp=length;
+                                length=matchStart;
+                                matchStart=temp;
+                            }
+                            // Recurse for non-shortest match from prev.
+                            int32_t spanStart=containsSpanBackUTF8(set, s, matchStart,
+                                                                   USET_SPAN_CONTAINED);
+                            if(spanStart<minSpanStart) {
+                                minSpanStart=spanStart;
+                                if(minSpanStart==0) {
+                                    return 0;
+                                }
+                            }
+                        }
+                    } else /* spanCondition==USET_SPAN_SIMPLE */ {
+                        if(matchStart<length) {
+                            // Remember longest match from prev.
+                            length=matchStart;
+                        }
+                    }
+                }
+            }
+            if(length==prev) {
+                break;  // No match from prev.
+            }
+        } while((prev=length)>0);
+        if(prev<minSpanStart) {
+            return prev;
+        } else {
+            return minSpanStart;
+        }
+    }
+}
+
+// spans to be performed and compared
+enum {
+    SPAN_UTF16          =1,
+    SPAN_UTF8           =2,
+    SPAN_UTFS           =3,
+
+    SPAN_SET            =4,
+    SPAN_COMPLEMENT     =8,
+    SPAN_POLARITY       =0xc,
+
+    SPAN_FWD            =0x10,
+    SPAN_BACK           =0x20,
+    SPAN_DIRS           =0x30,
+
+    SPAN_CONTAINED      =0x100,
+    SPAN_SIMPLE         =0x200,
+    SPAN_CONDITION      =0x300,
+
+    SPAN_ALL            =0x33f
+};
+
+static inline USetSpanCondition invertSpanCondition(USetSpanCondition spanCondition, USetSpanCondition contained) {
+    return spanCondition == USET_SPAN_NOT_CONTAINED ? contained : USET_SPAN_NOT_CONTAINED;
+}
+
+static inline int32_t slen(const void *s, UBool isUTF16) {
+    return isUTF16 ? u_strlen((const UChar *)s) : strlen((const char *)s);
+}
+
+/*
+ * Count spans on a string with the method according to type and set the span limits.
+ * The set may be the complement of the original.
+ * When using spanBack() and comparing with span(), use a span condition for the first spanBack()
+ * according to the expected number of spans.
+ * Sets typeName to an empty string if there is no such type.
+ * Returns -1 if the span option is filtered out.
+ */
+static int32_t getSpans(const UnicodeSetWithStrings &set, UBool isComplement,
+                        const void *s, int32_t length, UBool isUTF16,
+                        uint32_t whichSpans,
+                        int type, const char *&typeName,
+                        int32_t limits[], int32_t limitsCapacity,
+                        int32_t expectCount) {
+    const UnicodeSet &realSet(set.getSet());
+    int32_t start, count;
+    USetSpanCondition spanCondition, firstSpanCondition, contained;
+    UBool isForward;
+
+    if(type<0 || 7<type) {
+        typeName="";
+        return 0;
+    }
+
+    static const char *const typeNames16[]={
+        "contains", "contains(LM)",
+        "span", "span(LM)",
+        "containsBack", "containsBack(LM)",
+        "spanBack", "spanBack(LM)"
+    };
+
+    static const char *const typeNames8[]={
+        "containsUTF8", "containsUTF8(LM)",
+        "spanUTF8", "spanUTF8(LM)",
+        "containsBackUTF8", "containsBackUTF8(LM)", // not implemented
+        "spanBackUTF8", "spanBackUTF8(LM)"
+    };
+
+    typeName= isUTF16 ? typeNames16[type] : typeNames8[type];
+
+    // filter span options
+    if(type<=3) {
+        // span forward
+        if((whichSpans&SPAN_FWD)==0) {
+            return -1;
+        }
+        isForward=TRUE;
+    } else {
+        // span backward
+        if((whichSpans&SPAN_BACK)==0) {
+            return -1;
+        }
+        isForward=FALSE;
+    }
+    if((type&1)==0) {
+        // use USET_SPAN_CONTAINED
+        if((whichSpans&SPAN_CONTAINED)==0) {
+            return -1;
+        }
+        contained=USET_SPAN_CONTAINED;
+    } else {
+        // use USET_SPAN_SIMPLE
+        if((whichSpans&SPAN_SIMPLE)==0) {
+            return -1;
+        }
+        contained=USET_SPAN_SIMPLE;
+    }
+
+    // Default first span condition for going forward with an uncomplemented set.
+    spanCondition=USET_SPAN_NOT_CONTAINED;
+    if(isComplement) {
+        spanCondition=invertSpanCondition(spanCondition, contained);
+    }
+
+    // First span condition for span(), used to terminate the spanBack() iteration.
+    firstSpanCondition=spanCondition;
+
+    // spanBack(): Its initial span condition is span()'s last span condition,
+    // which is the opposite of span()'s first span condition
+    // if we expect an even number of spans.
+    // (The loop inverts spanCondition (expectCount-1) times
+    // before the expectCount'th span() call.)
+    // If we do not compare forward and backward directions, then we do not have an
+    // expectCount and just start with firstSpanCondition.
+    if(!isForward && (whichSpans&SPAN_FWD)!=0 && (expectCount&1)==0) {
+        spanCondition=invertSpanCondition(spanCondition, contained);
+    }
+
+    count=0;
+    switch(type) {
+    case 0:
+    case 1:
+        start=0;
+        if(length<0) {
+            length=slen(s, isUTF16);
+        }
+        for(;;) {
+            start+= isUTF16 ? containsSpanUTF16(set, (const UChar *)s+start, length-start, spanCondition) :
+                              containsSpanUTF8(set, (const char *)s+start, length-start, spanCondition);
+            if(count<limitsCapacity) {
+                limits[count]=start;
+            }
+            ++count;
+            if(start>=length) {
+                break;
+            }
+            spanCondition=invertSpanCondition(spanCondition, contained);
+        }
+        break;
+    case 2:
+    case 3:
+        start=0;
+        for(;;) {
+            start+= isUTF16 ? realSet.span((const UChar *)s+start, length>=0 ? length-start : length, spanCondition) :
+                              realSet.spanUTF8((const char *)s+start, length>=0 ? length-start : length, spanCondition);
+            if(count<limitsCapacity) {
+                limits[count]=start;
+            }
+            ++count;
+            if(length>=0 ? start>=length :
+                           isUTF16 ? ((const UChar *)s)[start]==0 :
+                                     ((const char *)s)[start]==0
+            ) {
+                break;
+            }
+            spanCondition=invertSpanCondition(spanCondition, contained);
+        }
+        break;
+    case 4:
+    case 5:
+        if(length<0) {
+            length=slen(s, isUTF16);
+        }
+        for(;;) {
+            ++count;
+            if(count<=limitsCapacity) {
+                limits[limitsCapacity-count]=length;
+            }
+            length= isUTF16 ? containsSpanBackUTF16(set, (const UChar *)s, length, spanCondition) :
+                              containsSpanBackUTF8(set, (const char *)s, length, spanCondition);
+            if(length==0 && spanCondition==firstSpanCondition) {
+                break;
+            }
+            spanCondition=invertSpanCondition(spanCondition, contained);
+        }
+        if(count<limitsCapacity) {
+            memmove(limits, limits+(limitsCapacity-count), count*4);
+        }
+        break;
+    case 6:
+    case 7:
+        for(;;) {
+            ++count;
+            if(count<=limitsCapacity) {
+                limits[limitsCapacity-count]= length >=0 ? length : slen(s, isUTF16);
+            }
+            // Note: Length<0 is tested only for the first spanBack().
+            // If we wanted to keep length<0 for all spanBack()s, we would have to
+            // temporarily modify the string by placing a NUL where the previous spanBack() stopped.
+            length= isUTF16 ? realSet.spanBack((const UChar *)s, length, spanCondition) :
+                              realSet.spanBackUTF8((const char *)s, length, spanCondition);
+            if(length==0 && spanCondition==firstSpanCondition) {
+                break;
+            }
+            spanCondition=invertSpanCondition(spanCondition, contained);
+        }
+        if(count<limitsCapacity) {
+            memmove(limits, limits+(limitsCapacity-count), count*4);
+        }
+        break;
+    default:
+        typeName="";
+        return -1;
+    }
+
+    return count;
+}
+
+// sets to be tested; odd index=isComplement
+enum {
+    SLOW,
+    SLOW_NOT,
+    FAST,
+    FAST_NOT,
+    SET_COUNT
+};
+
+static const char *const setNames[SET_COUNT]={
+    "slow",
+    "slow.not",
+    "fast",
+    "fast.not"
+};
+
+/*
+ * Verify that we get the same results whether we look at text with contains(),
+ * span() or spanBack(), using unfrozen or frozen versions of the set,
+ * and using the set or its complement (switching the spanConditions accordingly).
+ * The latter verifies that
+ *   set.span(spanCondition) == set.complement().span(!spanCondition).
+ *
+ * The expectLimits[] are either provided by the caller (with expectCount>=0)
+ * or returned to the caller (with an input expectCount<0).
+ */
+void UnicodeSetTest::testSpan(const UnicodeSetWithStrings *sets[4],
+                              const void *s, int32_t length, UBool isUTF16,
+                              uint32_t whichSpans,
+                              int32_t expectLimits[], int32_t &expectCount,
+                              const char *testName, int32_t index) {
+    int32_t limits[500];
+    int32_t limitsCount;
+    int i, j;
+
+    const char *typeName;
+    int type;
+
+    for(i=0; i<SET_COUNT; ++i) {
+        if((i&1)==0) {
+            // Even-numbered sets are original, uncomplemented sets.
+            if((whichSpans&SPAN_SET)==0) {
+                continue;
+            }
+        } else {
+            // Odd-numbered sets are complemented.
+            if((whichSpans&SPAN_COMPLEMENT)==0) {
+                continue;
+            }
+        }
+        for(type=0;; ++type) {
+            limitsCount=getSpans(*sets[i], (UBool)(i&1),
+                                 s, length, isUTF16,
+                                 whichSpans,
+                                 type, typeName,
+                                 limits, UPRV_LENGTHOF(limits), expectCount);
+            if(typeName[0]==0) {
+                break; // All types tried.
+            }
+            if(limitsCount<0) {
+                continue; // Span option filtered out.
+            }
+            if(expectCount<0) {
+                expectCount=limitsCount;
+                if(limitsCount>UPRV_LENGTHOF(limits)) {
+                    errln("FAIL: %s[0x%lx].%s.%s span count=%ld > %ld capacity - too many spans",
+                          testName, (long)index, setNames[i], typeName, (long)limitsCount, (long)UPRV_LENGTHOF(limits));
+                    return;
+                }
+                memcpy(expectLimits, limits, limitsCount*4);
+            } else if(limitsCount!=expectCount) {
+                errln("FAIL: %s[0x%lx].%s.%s span count=%ld != %ld",
+                      testName, (long)index, setNames[i], typeName, (long)limitsCount, (long)expectCount);
+            } else {
+                for(j=0; j<limitsCount; ++j) {
+                    if(limits[j]!=expectLimits[j]) {
+                        errln("FAIL: %s[0x%lx].%s.%s span count=%ld limits[%d]=%ld != %ld",
+                              testName, (long)index, setNames[i], typeName, (long)limitsCount,
+                              j, (long)limits[j], (long)expectLimits[j]);
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    // Compare span() with containsAll()/containsNone(),
+    // but only if we have expectLimits[] from the uncomplemented set.
+    if(isUTF16 && (whichSpans&SPAN_SET)!=0) {
+        const UChar *s16=(const UChar *)s;
+        UnicodeString string;
+        int32_t prev=0, limit, length;
+        for(i=0; i<expectCount; ++i) {
+            limit=expectLimits[i];
+            length=limit-prev;
+            if(length>0) {
+                string.setTo(FALSE, s16+prev, length);  // read-only alias
+                if(i&1) {
+                    if(!sets[SLOW]->getSet().containsAll(string)) {
+                        errln("FAIL: %s[0x%lx].%s.containsAll(%ld..%ld)==FALSE contradicts span()",
+                              testName, (long)index, setNames[SLOW], (long)prev, (long)limit);
+                        return;
+                    }
+                    if(!sets[FAST]->getSet().containsAll(string)) {
+                        errln("FAIL: %s[0x%lx].%s.containsAll(%ld..%ld)==FALSE contradicts span()",
+                              testName, (long)index, setNames[FAST], (long)prev, (long)limit);
+                        return;
+                    }
+                } else {
+                    if(!sets[SLOW]->getSet().containsNone(string)) {
+                        errln("FAIL: %s[0x%lx].%s.containsNone(%ld..%ld)==FALSE contradicts span()",
+                              testName, (long)index, setNames[SLOW], (long)prev, (long)limit);
+                        return;
+                    }
+                    if(!sets[FAST]->getSet().containsNone(string)) {
+                        errln("FAIL: %s[0x%lx].%s.containsNone(%ld..%ld)==FALSE contradicts span()",
+                              testName, (long)index, setNames[FAST], (long)prev, (long)limit);
+                        return;
+                    }
+                }
+            }
+            prev=limit;
+        }
+    }
+}
+
+// Specifically test either UTF-16 or UTF-8.
+void UnicodeSetTest::testSpan(const UnicodeSetWithStrings *sets[4],
+                              const void *s, int32_t length, UBool isUTF16,
+                              uint32_t whichSpans,
+                              const char *testName, int32_t index) {
+    int32_t expectLimits[500];
+    int32_t expectCount=-1;
+    testSpan(sets, s, length, isUTF16, whichSpans, expectLimits, expectCount, testName, index);
+}
+
+UBool stringContainsUnpairedSurrogate(const UChar *s, int32_t length) {
+    UChar c, c2;
+
+    if(length>=0) {
+        while(length>0) {
+            c=*s++;
+            --length;
+            if(0xd800<=c && c<0xe000) {
+                if(c>=0xdc00 || length==0 || !U16_IS_TRAIL(c2=*s++)) {
+                    return TRUE;
+                }
+                --length;
+            }
+        }
+    } else {
+        while((c=*s++)!=0) {
+            if(0xd800<=c && c<0xe000) {
+                if(c>=0xdc00 || !U16_IS_TRAIL(c2=*s++)) {
+                    return TRUE;
+                }
+            }
+        }
+    }
+    return FALSE;
+}
+
+// Test both UTF-16 and UTF-8 versions of span() etc. on the same sets and text,
+// unless either UTF is turned off in whichSpans.
+// Testing UTF-16 and UTF-8 together requires that surrogate code points
+// have the same contains(c) value as U+FFFD.
+void UnicodeSetTest::testSpanBothUTFs(const UnicodeSetWithStrings *sets[4],
+                                      const UChar *s16, int32_t length16,
+                                      uint32_t whichSpans,
+                                      const char *testName, int32_t index) {
+    int32_t expectLimits[500];
+    int32_t expectCount;
+
+    expectCount=-1;  // Get expectLimits[] from testSpan().
+
+    if((whichSpans&SPAN_UTF16)!=0) {
+        testSpan(sets, s16, length16, TRUE, whichSpans, expectLimits, expectCount, testName, index);
+    }
+    if((whichSpans&SPAN_UTF8)==0) {
+        return;
+    }
+
+    // Convert s16[] and expectLimits[] to UTF-8.
+    uint8_t s8[3000];
+    int32_t offsets[3000];
+
+    const UChar *s16Limit=s16+length16;
+    char *t=(char *)s8;
+    char *tLimit=t+sizeof(s8);
+    int32_t *o=offsets;
+    UErrorCode errorCode=U_ZERO_ERROR;
+
+    // Convert with substitution: Turn unpaired surrogates into U+FFFD.
+    ucnv_fromUnicode(openUTF8Converter(), &t, tLimit, &s16, s16Limit, o, TRUE, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        errln("FAIL: %s[0x%lx] ucnv_fromUnicode(to UTF-8) fails with %s",
+              testName, (long)index, u_errorName(errorCode));
+        ucnv_resetFromUnicode(utf8Cnv);
+        return;
+    }
+    int32_t length8=(int32_t)(t-(char *)s8);
+
+    // Convert expectLimits[].
+    int32_t i, j, expect;
+    for(i=j=0; i<expectCount; ++i) {
+        expect=expectLimits[i];
+        if(expect==length16) {
+            expectLimits[i]=length8;
+        } else {
+            while(offsets[j]<expect) {
+                ++j;
+            }
+            expectLimits[i]=j;
+        }
+    }
+
+    testSpan(sets, s8, length8, FALSE, whichSpans, expectLimits, expectCount, testName, index);
+}
+
+static UChar32 nextCodePoint(UChar32 c) {
+    // Skip some large and boring ranges.
+    switch(c) {
+    case 0x3441:
+        return 0x4d7f;
+    case 0x5100:
+        return 0x9f00;
+    case 0xb040:
+        return 0xd780;
+    case 0xe041:
+        return 0xf8fe;
+    case 0x10100:
+        return 0x20000;
+    case 0x20041:
+        return 0xe0000;
+    case 0xe0101:
+        return 0x10fffd;
+    default:
+        return c+1;
+    }
+}
+
+// Verify that all implementations represent the same set.
+void UnicodeSetTest::testSpanContents(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName) {
+    // contains(U+FFFD) is inconsistent with contains(some surrogates),
+    // or the set contains strings with unpaired surrogates which don't translate to valid UTF-8:
+    // Skip the UTF-8 part of the test - if the string contains surrogates -
+    // because it is likely to produce a different result.
+    UBool inconsistentSurrogates=
+            (!(sets[0]->getSet().contains(0xfffd) ?
+               sets[0]->getSet().contains(0xd800, 0xdfff) :
+               sets[0]->getSet().containsNone(0xd800, 0xdfff)) ||
+             sets[0]->hasStringsWithSurrogates());
+
+    UChar s[1000];
+    int32_t length=0;
+    uint32_t localWhichSpans;
+
+    UChar32 c, first;
+    for(first=c=0;; c=nextCodePoint(c)) {
+        if(c>0x10ffff || length>(UPRV_LENGTHOF(s)-U16_MAX_LENGTH)) {
+            localWhichSpans=whichSpans;
+            if(stringContainsUnpairedSurrogate(s, length) && inconsistentSurrogates) {
+                localWhichSpans&=~SPAN_UTF8;
+            }
+            testSpanBothUTFs(sets, s, length, localWhichSpans, testName, first);
+            if(c>0x10ffff) {
+                break;
+            }
+            length=0;
+            first=c;
+        }
+        U16_APPEND_UNSAFE(s, length, c);
+    }
+}
+
+// Test with a particular, interesting string.
+// Specify length and try NUL-termination.
+void UnicodeSetTest::testSpanUTF16String(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName) {
+    static const UChar s[]={
+        0x61, 0x62, 0x20,                       // Latin, space
+        0x3b1, 0x3b2, 0x3b3,                    // Greek
+        0xd900,                                 // lead surrogate
+        0x3000, 0x30ab, 0x30ad,                 // wide space, Katakana
+        0xdc05,                                 // trail surrogate
+        0xa0, 0xac00, 0xd7a3,                   // nbsp, Hangul
+        0xd900, 0xdc05,                         // unassigned supplementary
+        0xd840, 0xdfff, 0xd860, 0xdffe,         // Han supplementary
+        0xd7a4, 0xdc05, 0xd900, 0x2028,         // unassigned, surrogates in wrong order, LS
+        0                                       // NUL
+    };
+
+    if((whichSpans&SPAN_UTF16)==0) {
+        return;
+    }
+    testSpan(sets, s, -1, TRUE, (whichSpans&~SPAN_UTF8), testName, 0);
+    testSpan(sets, s, UPRV_LENGTHOF(s)-1, TRUE, (whichSpans&~SPAN_UTF8), testName, 1);
+}
+
+void UnicodeSetTest::testSpanUTF8String(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName) {
+    static const char s[]={
+        "abc"                                   // Latin
+
+        /* trail byte in lead position */
+        "\x80"
+
+        " "                                     // space
+
+        /* truncated multi-byte sequences */
+        "\xd0"
+        "\xe0"
+        "\xe1"
+        "\xed"
+        "\xee"
+        "\xf0"
+        "\xf1"
+        "\xf4"
+        "\xf8"
+        "\xfc"
+
+        "\xCE\xB1\xCE\xB2\xCE\xB3"              // Greek
+
+        /* trail byte in lead position */
+        "\x80"
+
+        "\xe0\x80"
+        "\xe0\xa0"
+        "\xe1\x80"
+        "\xed\x80"
+        "\xed\xa0"
+        "\xee\x80"
+        "\xf0\x80"
+        "\xf0\x90"
+        "\xf1\x80"
+        "\xf4\x80"
+        "\xf4\x90"
+        "\xf8\x80"
+        "\xfc\x80"
+
+        "\xE3\x80\x80\xE3\x82\xAB\xE3\x82\xAD"  // wide space, Katakana
+
+        /* trail byte in lead position */
+        "\x80"
+
+        "\xf0\x80\x80"
+        "\xf0\x90\x80"
+        "\xf1\x80\x80"
+        "\xf4\x80\x80"
+        "\xf4\x90\x80"
+        "\xf8\x80\x80"
+        "\xfc\x80\x80"
+
+        "\xC2\xA0\xEA\xB0\x80\xED\x9E\xA3"      // nbsp, Hangul
+
+        /* trail byte in lead position */
+        "\x80"
+
+        "\xf8\x80\x80\x80"
+        "\xfc\x80\x80\x80"
+
+        "\xF1\x90\x80\x85"                      // unassigned supplementary
+
+        /* trail byte in lead position */
+        "\x80"
+
+        "\xfc\x80\x80\x80\x80"
+
+        "\xF0\xA0\x8F\xBF\xF0\xA8\x8F\xBE"      // Han supplementary
+
+        /* trail byte in lead position */
+        "\x80"
+
+        /* complete sequences but non-shortest forms or out of range etc. */
+        "\xc0\x80"
+        "\xe0\x80\x80"
+        "\xed\xa0\x80"
+        "\xf0\x80\x80\x80"
+        "\xf4\x90\x80\x80"
+        "\xf8\x80\x80\x80\x80"
+        "\xfc\x80\x80\x80\x80\x80"
+        "\xfe"
+        "\xff"
+
+        /* trail byte in lead position */
+        "\x80"
+
+        "\xED\x9E\xA4\xE2\x80\xA8"              // unassigned, LS, NUL-terminated
+    };
+
+    if((whichSpans&SPAN_UTF8)==0) {
+        return;
+    }
+    testSpan(sets, s, -1, FALSE, (whichSpans&~SPAN_UTF16), testName, 0);
+    testSpan(sets, s, UPRV_LENGTHOF(s)-1, FALSE, (whichSpans&~SPAN_UTF16), testName, 1);
+}
+
+// Take a set of span options and multiply them so that
+// each portion only has one of the options a, b and c.
+// If b==0, then the set of options is just modified with mask and a.
+// If b!=0 and c==0, then the set of options is just modified with mask, a and b.
+static int32_t
+addAlternative(uint32_t whichSpans[], int32_t whichSpansCount,
+               uint32_t mask, uint32_t a, uint32_t b, uint32_t c) {
+    uint32_t s;
+    int32_t i;
+
+    for(i=0; i<whichSpansCount; ++i) {
+        s=whichSpans[i]&mask;
+        whichSpans[i]=s|a;
+        if(b!=0) {
+            whichSpans[whichSpansCount+i]=s|b;
+            if(c!=0) {
+                whichSpans[2*whichSpansCount+i]=s|c;
+            }
+        }
+    }
+    return b==0 ? whichSpansCount : c==0 ? 2*whichSpansCount : 3*whichSpansCount;
+}
+
+#define _63_a "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+#define _64_a "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+#define _63_b "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
+#define _64_b "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
+
+void UnicodeSetTest::TestSpan() {
+    // "[...]" is a UnicodeSet pattern.
+    // "*" performs tests on all Unicode code points and on a selection of
+    //   malformed UTF-8/16 strings.
+    // "-options" limits the scope of testing for the current set.
+    //   By default, the test verifies that equivalent boundaries are found
+    //   for UTF-16 and UTF-8, going forward and backward,
+    //   alternating USET_SPAN_NOT_CONTAINED with
+    //   either USET_SPAN_CONTAINED or USET_SPAN_SIMPLE.
+    //   Single-character options:
+    //     8 -- UTF-16 and UTF-8 boundaries may differ.
+    //          Cause: contains(U+FFFD) is inconsistent with contains(some surrogates),
+    //          or the set contains strings with unpaired surrogates
+    //          which do not translate to valid UTF-8.
+    //     c -- set.span() and set.complement().span() boundaries may differ.
+    //          Cause: Set strings are not complemented.
+    //     b -- span() and spanBack() boundaries may differ.
+    //          Cause: Strings in the set overlap, and spanBack(USET_SPAN_CONTAINED)
+    //          and spanBack(USET_SPAN_SIMPLE) are defined to
+    //          match with non-overlapping substrings.
+    //          For example, with a set containing "ab" and "ba",
+    //          span() of "aba" yields boundaries { 0, 2, 3 }
+    //          because the initial "ab" matches from 0 to 2,
+    //          while spanBack() yields boundaries { 0, 1, 3 }
+    //          because the final "ba" matches from 1 to 3.
+    //     l -- USET_SPAN_CONTAINED and USET_SPAN_SIMPLE boundaries may differ.
+    //          Cause: Strings in the set overlap, and a longer match may
+    //          require a sequence including non-longest substrings.
+    //          For example, with a set containing "ab", "abc" and "cd",
+    //          span(contained) of "abcd" spans the entire string
+    //          but span(longest match) only spans the first 3 characters.
+    //   Each "-options" first resets all options and then applies the specified options.
+    //   A "-" without options resets the options.
+    //   The options are also reset for each new set.
+    // Other strings will be spanned.
+    static const char *const testdata[]={
+        "[:ID_Continue:]",
+        "*",
+        "[:White_Space:]",
+        "*",
+        "[]",
+        "*",
+        "[\\u0000-\\U0010FFFF]",
+        "*",
+        "[\\u0000\\u0080\\u0800\\U00010000]",
+        "*",
+        "[\\u007F\\u07FF\\uFFFF\\U0010FFFF]",
+        "*",
+        "[[[:ID_Continue:]-[\\u30ab\\u30ad]]{\\u3000\\u30ab}{\\u3000\\u30ab\\u30ad}]",
+        "-c",
+        "*",
+        "[[[:ID_Continue:]-[\\u30ab\\u30ad]]{\\u30ab\\u30ad}{\\u3000\\u30ab\\u30ad}]",
+        "-c",
+        "*",
+
+        // Overlapping strings cause overlapping attempts to match.
+        "[x{xy}{xya}{axy}{ax}]",
+        "-cl",
+
+        // More repetitions of "xya" would take too long with the recursive
+        // reference implementation.
+        // containsAll()=FALSE
+        // test_string 0x14
+        "xx"
+        "xyaxyaxyaxya"  // set.complement().span(longest match) will stop here.
+        "xx"            // set.complement().span(contained) will stop between the two 'x'es.
+        "xyaxyaxyaxya"
+        "xx"
+        "xyaxyaxyaxya"  // span() ends here.
+        "aaa",
+
+        // containsAll()=TRUE
+        // test_string 0x15
+        "xx"
+        "xyaxyaxyaxya"
+        "xx"
+        "xyaxyaxyaxya"
+        "xx"
+        "xyaxyaxyaxy",
+
+        "-bc",
+        // test_string 0x17
+        "byayaxya",  // span() -> { 4, 7, 8 }  spanBack() -> { 5, 8 }
+        "-c",
+        "byayaxy",   // span() -> { 4, 7 }     complement.span() -> { 7 }
+        "byayax",    // span() -> { 4, 6 }     complement.span() -> { 6 }
+        "-",
+        "byaya",     // span() -> { 5 }
+        "byay",      // span() -> { 4 }
+        "bya",       // span() -> { 3 }
+
+        // span(longest match) will not span the whole string.
+        "[a{ab}{bc}]",
+        "-cl",
+        // test_string 0x21
+        "abc",
+
+        "[a{ab}{abc}{cd}]",
+        "-cl",
+        "acdabcdabccd",
+
+        // spanBack(longest match) will not span the whole string.
+        "[c{ab}{bc}]",
+        "-cl",
+        "abc",
+
+        "[d{cd}{bcd}{ab}]",
+        "-cl",
+        "abbcdabcdabd",
+
+        // Test with non-ASCII set strings - test proper handling of surrogate pairs
+        // and UTF-8 trail bytes.
+        // Copies of above test sets and strings, but transliterated to have
+        // different code points with similar trail units.
+        // Previous: a      b         c            d
+        // Unicode:  042B   30AB      200AB        204AB
+        // UTF-16:   042B   30AB      D840 DCAB    D841 DCAB
+        // UTF-8:    D0 AB  E3 82 AB  F0 A0 82 AB  F0 A0 92 AB
+        "[\\u042B{\\u042B\\u30AB}{\\u042B\\u30AB\\U000200AB}{\\U000200AB\\U000204AB}]",
+        "-cl",
+        "\\u042B\\U000200AB\\U000204AB\\u042B\\u30AB\\U000200AB\\U000204AB\\u042B\\u30AB\\U000200AB\\U000200AB\\U000204AB",
+
+        "[\\U000204AB{\\U000200AB\\U000204AB}{\\u30AB\\U000200AB\\U000204AB}{\\u042B\\u30AB}]",
+        "-cl",
+        "\\u042B\\u30AB\\u30AB\\U000200AB\\U000204AB\\u042B\\u30AB\\U000200AB\\U000204AB\\u042B\\u30AB\\U000204AB",
+
+        // Stress bookkeeping and recursion.
+        // The following strings are barely doable with the recursive
+        // reference implementation.
+        // The not-contained character at the end prevents an early exit from the span().
+        "[b{bb}]",
+        "-c",
+        // test_string 0x33
+        "bbbbbbbbbbbbbbbbbbbbbbbb-",
+        // On complement sets, span() and spanBack() get different results
+        // because b is not in the complement set and there is an odd number of b's
+        // in the test string.
+        "-bc",
+        "bbbbbbbbbbbbbbbbbbbbbbbbb-",
+
+        // Test with set strings with an initial or final code point span
+        // longer than 254.
+        "[a{" _64_a _64_a _64_a _64_a "b}"
+          "{a" _64_b _64_b _64_b _64_b "}]",
+        "-c",
+        _64_a _64_a _64_a _63_a "b",
+        _64_a _64_a _64_a _64_a "b",
+        _64_a _64_a _64_a _64_a "aaaabbbb",
+        "a" _64_b _64_b _64_b _63_b,
+        "a" _64_b _64_b _64_b _64_b,
+        "aaaabbbb" _64_b _64_b _64_b _64_b,
+
+        // Test with strings containing unpaired surrogates.
+        // They are not representable in UTF-8, and a leading trail surrogate
+        // and a trailing lead surrogate must not match in the middle of a proper surrogate pair.
+        // U+20001 == \\uD840\\uDC01
+        // U+20400 == \\uD841\\uDC00
+        "[a\\U00020001\\U00020400{ab}{b\\uD840}{\\uDC00a}]",
+        "-8cl",
+        "aaab\\U00020001ba\\U00020400aba\\uD840ab\\uD840\\U00020000b\\U00020000a\\U00020000\\uDC00a\\uDC00babbb"
+    };
+    uint32_t whichSpans[96]={ SPAN_ALL };
+    int32_t whichSpansCount=1;
+
+    UnicodeSet *sets[SET_COUNT]={ NULL };
+    const UnicodeSetWithStrings *sets_with_str[SET_COUNT]={ NULL };
+
+    char testName[1024];
+    char *testNameLimit=testName;
+
+    int32_t i, j;
+    for(i=0; i<UPRV_LENGTHOF(testdata); ++i) {
+        const char *s=testdata[i];
+        if(s[0]=='[') {
+            // Create new test sets from this pattern.
+            for(j=0; j<SET_COUNT; ++j) {
+                delete sets_with_str[j];
+                delete sets[j];
+            }
+            UErrorCode errorCode=U_ZERO_ERROR;
+            sets[SLOW]=new UnicodeSet(UnicodeString(s, -1, US_INV).unescape(), errorCode);
+            if(U_FAILURE(errorCode)) {
+                dataerrln("FAIL: Unable to create UnicodeSet(%s) - %s", s, u_errorName(errorCode));
+                break;
+            }
+            sets[SLOW_NOT]=new UnicodeSet(*sets[SLOW]);
+            sets[SLOW_NOT]->complement();
+            // Intermediate set: Test cloning of a frozen set.
+            UnicodeSet *fast=new UnicodeSet(*sets[SLOW]);
+            fast->freeze();
+            sets[FAST]=(UnicodeSet *)fast->clone();
+            delete fast;
+            UnicodeSet *fastNot=new UnicodeSet(*sets[SLOW_NOT]);
+            fastNot->freeze();
+            sets[FAST_NOT]=(UnicodeSet *)fastNot->clone();
+            delete fastNot;
+
+            for(j=0; j<SET_COUNT; ++j) {
+                sets_with_str[j]=new UnicodeSetWithStrings(*sets[j]);
+            }
+
+            strcpy(testName, s);
+            testNameLimit=strchr(testName, 0);
+            *testNameLimit++=':';
+            *testNameLimit=0;
+
+            whichSpans[0]=SPAN_ALL;
+            whichSpansCount=1;
+        } else if(s[0]=='-') {
+            whichSpans[0]=SPAN_ALL;
+            whichSpansCount=1;
+
+            while(*++s!=0) {
+                switch(*s) {
+                case 'c':
+                    whichSpansCount=addAlternative(whichSpans, whichSpansCount,
+                                                   ~SPAN_POLARITY,
+                                                   SPAN_SET,
+                                                   SPAN_COMPLEMENT,
+                                                   0);
+                    break;
+                case 'b':
+                    whichSpansCount=addAlternative(whichSpans, whichSpansCount,
+                                                   ~SPAN_DIRS,
+                                                   SPAN_FWD,
+                                                   SPAN_BACK,
+                                                   0);
+                    break;
+                case 'l':
+                    // test USET_SPAN_CONTAINED FWD & BACK, and separately
+                    // USET_SPAN_SIMPLE only FWD, and separately
+                    // USET_SPAN_SIMPLE only BACK
+                    whichSpansCount=addAlternative(whichSpans, whichSpansCount,
+                                                   ~(SPAN_DIRS|SPAN_CONDITION),
+                                                   SPAN_DIRS|SPAN_CONTAINED,
+                                                   SPAN_FWD|SPAN_SIMPLE,
+                                                   SPAN_BACK|SPAN_SIMPLE);
+                    break;
+                case '8':
+                    whichSpansCount=addAlternative(whichSpans, whichSpansCount,
+                                                   ~SPAN_UTFS,
+                                                   SPAN_UTF16,
+                                                   SPAN_UTF8,
+                                                   0);
+                    break;
+                default:
+                    errln("FAIL: unrecognized span set option in \"%s\"", testdata[i]);
+                    break;
+                }
+            }
+        } else if(0==strcmp(s, "*")) {
+            strcpy(testNameLimit, "bad_string");
+            for(j=0; j<whichSpansCount; ++j) {
+                if(whichSpansCount>1) {
+                    sprintf(testNameLimit+10 /* strlen("bad_string") */,
+                            "%%0x%3x",
+                            whichSpans[j]);
+                }
+                testSpanUTF16String(sets_with_str, whichSpans[j], testName);
+                testSpanUTF8String(sets_with_str, whichSpans[j], testName);
+            }
+
+            strcpy(testNameLimit, "contents");
+            for(j=0; j<whichSpansCount; ++j) {
+                if(whichSpansCount>1) {
+                    sprintf(testNameLimit+8 /* strlen("contents") */,
+                            "%%0x%3x",
+                            whichSpans[j]);
+                }
+                testSpanContents(sets_with_str, whichSpans[j], testName);
+            }
+        } else {
+            UnicodeString string=UnicodeString(s, -1, US_INV).unescape();
+            strcpy(testNameLimit, "test_string");
+            for(j=0; j<whichSpansCount; ++j) {
+                if(whichSpansCount>1) {
+                    sprintf(testNameLimit+11 /* strlen("test_string") */,
+                            "%%0x%3x",
+                            whichSpans[j]);
+                }
+                testSpanBothUTFs(sets_with_str, string.getBuffer(), string.length(), whichSpans[j], testName, i);
+            }
+        }
+    }
+    for(j=0; j<SET_COUNT; ++j) {
+        delete sets_with_str[j];
+        delete sets[j];
+    }
+}
+
+// Test select patterns and strings, and test USET_SPAN_SIMPLE.
+void UnicodeSetTest::TestStringSpan() {
+    static const char *pattern="[x{xy}{xya}{axy}{ax}]";
+    static const char *const string=
+        "xx"
+        "xyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxya"
+        "xx"
+        "xyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxya"
+        "xx"
+        "xyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxy"
+        "aaaa";
+
+    UErrorCode errorCode=U_ZERO_ERROR;
+    UnicodeString pattern16=UnicodeString(pattern, -1, US_INV);
+    UnicodeSet set(pattern16, errorCode);
+    if(U_FAILURE(errorCode)) {
+        errln("FAIL: Unable to create UnicodeSet(%s) - %s", pattern, u_errorName(errorCode));
+        return;
+    }
+
+    UnicodeString string16=UnicodeString(string, -1, US_INV).unescape();
+
+    if(set.containsAll(string16)) {
+        errln("FAIL: UnicodeSet(%s).containsAll(%s) should be FALSE", pattern, string);
+    }
+
+    // Remove trailing "aaaa".
+    string16.truncate(string16.length()-4);
+    if(!set.containsAll(string16)) {
+        errln("FAIL: UnicodeSet(%s).containsAll(%s[:-4]) should be TRUE", pattern, string);
+    }
+
+    string16=UNICODE_STRING_SIMPLE("byayaxya");
+    const UChar *s16=string16.getBuffer();
+    int32_t length16=string16.length();
+    (void)length16;   // Suppress set but not used warning.
+    if( set.span(s16, 8, USET_SPAN_NOT_CONTAINED)!=4 ||
+        set.span(s16, 7, USET_SPAN_NOT_CONTAINED)!=4 ||
+        set.span(s16, 6, USET_SPAN_NOT_CONTAINED)!=4 ||
+        set.span(s16, 5, USET_SPAN_NOT_CONTAINED)!=5 ||
+        set.span(s16, 4, USET_SPAN_NOT_CONTAINED)!=4 ||
+        set.span(s16, 3, USET_SPAN_NOT_CONTAINED)!=3
+    ) {
+        errln("FAIL: UnicodeSet(%s).span(while not) returns the wrong value", pattern);
+    }
+
+    pattern="[a{ab}{abc}{cd}]";
+    pattern16=UnicodeString(pattern, -1, US_INV);
+    set.applyPattern(pattern16, errorCode);
+    if(U_FAILURE(errorCode)) {
+        errln("FAIL: Unable to create UnicodeSet(%s) - %s", pattern, u_errorName(errorCode));
+        return;
+    }
+    string16=UNICODE_STRING_SIMPLE("acdabcdabccd");
+    s16=string16.getBuffer();
+    length16=string16.length();
+    if( set.span(s16, 12, USET_SPAN_CONTAINED)!=12 ||
+        set.span(s16, 12, USET_SPAN_SIMPLE)!=6 ||
+        set.span(s16+7, 5, USET_SPAN_SIMPLE)!=5
+    ) {
+        errln("FAIL: UnicodeSet(%s).span(while longest match) returns the wrong value", pattern);
+    }
+
+    pattern="[d{cd}{bcd}{ab}]";
+    pattern16=UnicodeString(pattern, -1, US_INV);
+    set.applyPattern(pattern16, errorCode).freeze();
+    if(U_FAILURE(errorCode)) {
+        errln("FAIL: Unable to create UnicodeSet(%s) - %s", pattern, u_errorName(errorCode));
+        return;
+    }
+    string16=UNICODE_STRING_SIMPLE("abbcdabcdabd");
+    s16=string16.getBuffer();
+    length16=string16.length();
+    if( set.spanBack(s16, 12, USET_SPAN_CONTAINED)!=0 ||
+        set.spanBack(s16, 12, USET_SPAN_SIMPLE)!=6 ||
+        set.spanBack(s16, 5, USET_SPAN_SIMPLE)!=0
+    ) {
+        errln("FAIL: UnicodeSet(%s).spanBack(while longest match) returns the wrong value", pattern);
+    }
+}
+
+/**
+ * Including collationroot.h fails here with
+1>c:\Program Files (x86)\Microsoft SDKs\Windows\v7.0A\include\driverspecs.h(142): error C2008: '$' : unexpected in macro definition
+ *  .. so, we skip this test on Windows.
+ * 
+ * the cause is that  intltest builds with /Za which disables language extensions - which means
+ *  windows header files can't be used.
+ */
+#if !UCONFIG_NO_COLLATION && !U_PLATFORM_HAS_WIN32_API
+#include "collationroot.h"
+#include "collationtailoring.h"
+#endif
+
+void UnicodeSetTest::TestUCAUnsafeBackwards() {
+#if U_PLATFORM_HAS_WIN32_API
+    infoln("Skipping TestUCAUnsafeBackwards() - can't include collationroot.h on Windows without language extensions!");
+#elif !UCONFIG_NO_COLLATION
+    UErrorCode errorCode = U_ZERO_ERROR;
+
+    // Get the unsafeBackwardsSet
+    const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode);
+    if(U_FAILURE(errorCode)) {
+      dataerrln("FAIL: %s getting root cache entry", u_errorName(errorCode));
+      return;
+    }
+    //const UVersionInfo &version = rootEntry->tailoring->version;
+    const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet;
+
+    checkSerializeRoundTrip(*unsafeBackwardSet, errorCode);
+
+    if(!logKnownIssue("11891","UnicodeSet fails to round trip on CollationRoot...unsafeBackwards set")) {
+        // simple test case
+        // TODO(ticket #11891): Simplify this test function to this simple case. Rename it appropriately.
+        // TODO(ticket #11891): Port test to Java. Is this a bug there, too?
+        UnicodeSet surrogates;
+        surrogates.add(0xd83a);  // a lead surrogate
+        surrogates.add(0xdc00, 0xdfff);  // a range of trail surrogates
+        UnicodeString pat;
+        surrogates.toPattern(pat, FALSE);  // bad: [ 0xd83a, 0xdc00, 0x2d, 0xdfff ]
+        // TODO: Probably fix either UnicodeSet::_generatePattern() or _appendToPat()
+        // so that at least one type of surrogate code points are escaped,
+        // or (minimally) so that adjacent lead+trail surrogate code points are escaped.
+        errorCode = U_ZERO_ERROR;
+        UnicodeSet s2;
+        s2.applyPattern(pat, errorCode);  // looks like invalid range [ 0x1e800, 0x2d, 0xdfff ]
+        if(U_FAILURE(errorCode)) {
+            errln("FAIL: surrogates to/from pattern - %s", u_errorName(errorCode));
+        } else {
+            checkEqual(surrogates, s2, "surrogates to/from pattern");
+        }
+        // This occurs in the UCA unsafe-backwards set.
+        checkRoundTrip(*unsafeBackwardSet);
+    }
+#endif
+}