X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/46f4442e9a5a4f3b98b7c1083586332f6a8a99a4..ef6cf650f4a75c3f97de06b51fa104f2069b9ea2:/icuSources/test/intltest/usettest.cpp

diff --git a/icuSources/test/intltest/usettest.cpp b/icuSources/test/intltest/usettest.cpp
index 30da4b3f..6dbf9255 100644
--- a/icuSources/test/intltest/usettest.cpp
+++ b/icuSources/test/intltest/usettest.cpp
@@ -1,6 +1,6 @@
 /*
 ********************************************************************************
-*   Copyright (C) 1999-2008 International Business Machines Corporation and
+*   Copyright (C) 1999-2016 International Business Machines Corporation and
 *   others. All Rights Reserved.
 ********************************************************************************
 *   Date        Name        Description
@@ -22,16 +22,15 @@
 #include "unicode/parsepos.h"
 #include "unicode/symtable.h"
 #include "unicode/uversion.h"
+#include "cmemory.h"
 #include "hash.h"
 
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-
 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
-    errln("fail in file \"%s\", line %d: \"%s\"", __FILE__, __LINE__, \
+    dataerrln("fail in file \"%s\", line %d: \"%s\"", __FILE__, __LINE__, \
     u_errorName(status));}}
 
 #define TEST_ASSERT(expr) {if (!(expr)) { \
-    errln("fail in file \"%s\", line %d", __FILE__, __LINE__); }}
+    dataerrln("fail in file \"%s\", line %d", __FILE__, __LINE__); }}
 
 UnicodeString operator+(const UnicodeString& left, const UnicodeSet& set) {
     UnicodeString pat;
@@ -92,6 +91,7 @@ UnicodeSetTest::runIndexedTest(int32_t index, UBool exec,
         CASE(21,TestFreezable);
         CASE(22,TestSpan);
         CASE(23,TestStringSpan);
+        CASE(24,TestUCAUnsafeBackwards);
         default: name = ""; break;
     }
 }
@@ -130,7 +130,7 @@ void UnicodeSetTest::TestToPattern() {
             ec = U_ZERO_ERROR;
             UnicodeSet s(OTHER_TOPATTERN_TESTS[j], ec);
             if (U_FAILURE(ec)) {
-                errln((UnicodeString)"FAIL: bad pattern " + OTHER_TOPATTERN_TESTS[j]);
+                dataerrln((UnicodeString)"FAIL: bad pattern " + OTHER_TOPATTERN_TESTS[j] + " - " + UnicodeString(u_errorName(ec)));
                 continue;
             }
             checkPat(OTHER_TOPATTERN_TESTS[j], s);
@@ -281,7 +281,8 @@ UnicodeSetTest::TestCategories(void) {
     const char* pat = " [:Lu:] "; // Whitespace ok outside [:..:]
     UnicodeSet set(pat, status);
     if (U_FAILURE(status)) {
-        errln((UnicodeString)"Fail: Can't construct set with " + pat);
+        dataerrln((UnicodeString)"Fail: Can't construct set with " + pat + " - " + UnicodeString(u_errorName(status)));
+        return;
     } else {
         expectContainment(set, pat, "ABC", "abc");
     }
@@ -321,7 +322,7 @@ UnicodeSetTest::TestCloneEqualHash(void) {
     UnicodeSet *set1=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Lowercase Letter}"), status); //  :Ll: Letter, lowercase
     UnicodeSet *set1a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Ll:]"), status); //  Letter, lowercase
     if (U_FAILURE(status)){
-        errln((UnicodeString)"FAIL: Can't construst set with category->Ll");
+        dataerrln((UnicodeString)"FAIL: Can't construst set with category->Ll" + " - " + UnicodeString(u_errorName(status)));
         return;
     }
     UnicodeSet *set2=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Decimal Number}"), status);   //Number, Decimal digit
@@ -690,12 +691,55 @@ void UnicodeSetTest::TestAPI() {
     if (set != exp) { errln("FAIL: retain('s')"); return; }
 
     uint16_t buf[32];
-    int32_t slen = set.serialize(buf, sizeof(buf)/sizeof(buf[0]), status);
+    int32_t slen = set.serialize(buf, UPRV_LENGTHOF(buf), status);
     if (U_FAILURE(status)) { errln("FAIL: serialize"); return; }
     if (slen != 3 || buf[0] != 2 || buf[1] != 0x73 || buf[2] != 0x74) {
         errln("FAIL: serialize");
         return;
     }
+
+    // Conversions to and from USet
+    UnicodeSet *uniset = &set;
+    USet *uset = uniset->toUSet();
+    TEST_ASSERT((void *)uset == (void *)uniset);
+    UnicodeSet *setx = UnicodeSet::fromUSet(uset);
+    TEST_ASSERT((void *)setx == (void *)uset);
+    const UnicodeSet *constSet = uniset;
+    const USet *constUSet = constSet->toUSet();
+    TEST_ASSERT((void *)constUSet == (void *)constSet);
+    const UnicodeSet *constSetx = UnicodeSet::fromUSet(constUSet);
+    TEST_ASSERT((void *)constSetx == (void *)constUSet);
+
+    // span(UnicodeString) and spanBack(UnicodeString) convenience methods
+    UnicodeString longString=UNICODE_STRING_SIMPLE("aaaaaaaaaabbbbbbbbbbcccccccccc");
+    UnicodeSet ac(0x61, 0x63);
+    ac.remove(0x62).freeze();
+    if( ac.span(longString, -5, USET_SPAN_CONTAINED)!=10 ||
+        ac.span(longString, 0, USET_SPAN_CONTAINED)!=10 ||
+        ac.span(longString, 5, USET_SPAN_CONTAINED)!=10 ||
+        ac.span(longString, 10, USET_SPAN_CONTAINED)!=10 ||
+        ac.span(longString, 15, USET_SPAN_CONTAINED)!=15 ||
+        ac.span(longString, 20, USET_SPAN_CONTAINED)!=30 ||
+        ac.span(longString, 25, USET_SPAN_CONTAINED)!=30 ||
+        ac.span(longString, 30, USET_SPAN_CONTAINED)!=30 ||
+        ac.span(longString, 35, USET_SPAN_CONTAINED)!=30 ||
+        ac.span(longString, INT32_MAX, USET_SPAN_CONTAINED)!=30
+    ) {
+        errln("UnicodeSet.span(UnicodeString, ...) returns incorrect end indexes");
+    }
+    if( ac.spanBack(longString, -5, USET_SPAN_CONTAINED)!=0 ||
+        ac.spanBack(longString, 0, USET_SPAN_CONTAINED)!=0 ||
+        ac.spanBack(longString, 5, USET_SPAN_CONTAINED)!=0 ||
+        ac.spanBack(longString, 10, USET_SPAN_CONTAINED)!=0 ||
+        ac.spanBack(longString, 15, USET_SPAN_CONTAINED)!=15 ||
+        ac.spanBack(longString, 20, USET_SPAN_CONTAINED)!=20 ||
+        ac.spanBack(longString, 25, USET_SPAN_CONTAINED)!=20 ||
+        ac.spanBack(longString, 30, USET_SPAN_CONTAINED)!=20 ||
+        ac.spanBack(longString, 35, USET_SPAN_CONTAINED)!=20 ||
+        ac.spanBack(longString, INT32_MAX, USET_SPAN_CONTAINED)!=20
+    ) {
+        errln("UnicodeSet.spanBack(UnicodeString, ...) returns incorrect start indexes");
+    }
 }
 
 void UnicodeSetTest::TestIteration() {
@@ -850,6 +894,7 @@ void UnicodeSetTest::TestPropertySet() {
         "abc",
         "ABC",
 
+#if !UCONFIG_NO_NORMALIZATION
         // Combining class: @since ICU 2.2
         // Check both symbolic and numeric
         "\\p{ccc=Nukta}",
@@ -863,6 +908,7 @@ void UnicodeSetTest::TestPropertySet() {
         "[:c c c = iota subscript :]",
         "\\u0345",
         "xyz",
+#endif
 
         // Bidi class: @since ICU 2.2
         "\\p{bidiclass=lefttoright}",
@@ -974,6 +1020,7 @@ void UnicodeSetTest::TestPropertySet() {
         "abcd\\uDC00",
         "ef\\uD800\\U00010000",
 
+#if !UCONFIG_NO_NORMALIZATION
         "[:^lccc=0:]", // Lead canonical class
         "\\u0300\\u0301",
         "abcd\\u00c0\\u00c5",
@@ -993,13 +1040,25 @@ void UnicodeSetTest::TestPropertySet() {
         "[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not
         "\\u0F73\\u0F75\\u0F81",
         "abcd\\u0300\\u0301\\u00c0\\u00c5",
+#endif /* !UCONFIG_NO_NORMALIZATION */
 
         "[:Assigned:]",
         "A\\uE000\\uF8FF\\uFDC7\\U00010000\\U0010FFFD",
-        "\\u0888\\uFDD3\\uFFFE\\U00050005"
+        "\\u0888\\uFDD3\\uFFFE\\U00050005",
+
+        // Script_Extensions, new in Unicode 6.0
+        "[:scx=Arab:]",
+        "\\u061E\\u061F\\u0620\\u0621\\u063F\\u0640\\u0650\\u065E\\uFDF1\\uFDF2\\uFDF3",
+        "\\u061D\\uFDEF\\uFDFE",
+
+        // U+FDF2 has Script=Arabic and also Arab in its Script_Extensions,
+        // so scx-sc is missing U+FDF2.
+        "[[:Script_Extensions=Arabic:]-[:Arab:]]",
+        "\\u0640\\u064B\\u0650\\u0655",
+        "\\uFDF2"
     };
 
-    static const int32_t DATA_LEN = sizeof(DATA)/sizeof(DATA[0]);
+    static const int32_t DATA_LEN = UPRV_LENGTHOF(DATA);
 
     for (int32_t i=0; i<DATA_LEN; i+=3) {  
         expectContainment(UnicodeString(DATA[i], -1, US_INV), CharsToUnicodeString(DATA[i+1]),
@@ -1202,9 +1261,11 @@ void UnicodeSetTest::TestCloseOver() {
 
         CASE, "[{\\u1f7c\\u03b9}]", "[\\u1ff2{\\u1f7c\\u03b9}]", // last in sorted table
 
+#if !UCONFIG_NO_FILE_IO
         CASE_MAPPINGS,
         "[aq\\u00DF{Bc}{bC}{Fi}]",
         "[aAqQ\\u00DF{ss}{Ss}{SS}{Bc}{BC}{bC}{bc}{FI}{Fi}{fi}]",
+#endif
 
         CASE_MAPPINGS,
         "[\\u01F1]", // 'DZ'
@@ -1234,7 +1295,7 @@ void UnicodeSetTest::TestCloseOver() {
         if (s == t) {
             logln((UnicodeString)"Ok: " + pat + ".closeOver(" + selector + ") => " + exp);
         } else {
-            errln((UnicodeString)"FAIL: " + pat + ".closeOver(" + selector + ") => " +
+            dataerrln((UnicodeString)"FAIL: " + pat + ".closeOver(" + selector + ") => " +
                   s.toPattern(buf, TRUE) + ", expected " + exp);
         }
     }
@@ -1327,7 +1388,7 @@ void UnicodeSetTest::TestEscapePattern() {
     const char exp[] =
         "[\\u200A-\\u200E\\uFEFF\\U0001D173-\\U0001D17A\\U000F0000-\\U000FFFFD]";
     // We test this with two passes; in the second pass we
-    // pre-unescape the pattern.  Since U+200E is rule whitespace,
+    // pre-unescape the pattern.  Since U+200E is Pattern_White_Space,
     // this fails -- which is what we expect.
     for (int32_t pass=1; pass<=2; ++pass) {
         UErrorCode ec = U_ZERO_ERROR;
@@ -1401,7 +1462,7 @@ void UnicodeSetTest::TestInvalidCodePoint() {
         (UChar32)-1, 8,           0, 8,
         8, 0x110000,              8, 0x10FFFF
     };
-    const int32_t DATA_LENGTH = sizeof(DATA)/sizeof(DATA[0]);
+    const int32_t DATA_LENGTH = UPRV_LENGTHOF(DATA);
 
     UnicodeString pat;
     int32_t i;
@@ -1427,6 +1488,7 @@ void UnicodeSetTest::TestInvalidCodePoint() {
         b = set.contains(start, end);
         b = set.containsNone(start, end);
         b = set.containsSome(start, end);
+        (void)b;   // Suppress set but not used warning.
 
         /*int32_t index = set.indexOf(start);*/
         
@@ -1463,7 +1525,7 @@ void UnicodeSetTest::TestInvalidCodePoint() {
         (UChar32)-1,
         0x110000
     };
-    const int32_t DATA2_LENGTH = sizeof(DATA2)/sizeof(DATA2[0]);
+    const int32_t DATA2_LENGTH = UPRV_LENGTHOF(DATA2);
 
     for (i=0; i<DATA2_LENGTH; ++i) {
         UChar32 c = DATA2[i], end = 0x10FFFF;
@@ -1512,7 +1574,7 @@ public:
     Hashtable contents;
 
     TokenSymbolTable(UErrorCode& ec) : contents(FALSE, ec) {
-        contents.setValueDeleter(uhash_deleteUnicodeString);
+        contents.setValueDeleter(uprv_deleteUObject);
     }
 
     ~TokenSymbolTable() {}
@@ -1641,7 +1703,8 @@ void UnicodeSetTest::TestSurrogate() {
     for (int i=0; DATA[i] != 0; ++i) {
         UErrorCode ec = U_ZERO_ERROR;
         logln((UnicodeString)"Test pattern " + i + " :" + UnicodeString(DATA[i], -1, US_INV));
-        UnicodeSet set(UnicodeString(DATA[i], -1, US_INV), ec);
+        UnicodeString str = UnicodeString(DATA[i], -1, US_INV);
+        UnicodeSet set(str, ec);
         if (U_FAILURE(ec)) {
             errln("FAIL: UnicodeSet constructor");
             continue;
@@ -1653,6 +1716,12 @@ void UnicodeSetTest::TestSurrogate() {
             errln((UnicodeString)"FAIL: " + UnicodeString(DATA[i], -1, US_INV) + ".size() == " + 
                   set.size() + ", expected 4");
         }
+
+        {
+          UErrorCode subErr = U_ZERO_ERROR;
+          checkRoundTrip(set);
+          checkSerializeRoundTrip(set, subErr);
+        }
     }
 }
 
@@ -1669,8 +1738,12 @@ void UnicodeSetTest::TestExhaustive() {
         logln((UnicodeString)"Testing " + i + ", " + x);
         _testComplement(i, x, y);
 
+        UnicodeSet &toTest = bitsToSet(i, aa);
+
         // AS LONG AS WE ARE HERE, check roundtrip
-        checkRoundTrip(bitsToSet(i, aa));
+        checkRoundTrip(toTest);
+        UErrorCode ec = U_ZERO_ERROR;
+        checkSerializeRoundTrip(toTest, ec);
 
         for (int32_t j = 0; j < limit; ++j) {
             _testAdd(i,j,  x,y,z);
@@ -1829,39 +1902,80 @@ UnicodeString UnicodeSetTest::getPairs(const UnicodeSet& set) {
  * get the same thing back
  */
 void UnicodeSetTest::checkRoundTrip(const UnicodeSet& s) {
-    UErrorCode ec = U_ZERO_ERROR;
+    {
+        UnicodeSet t(s);
+        checkEqual(s, t, "copy ct");
+    }
 
-    UnicodeSet t(s);
-    checkEqual(s, t, "copy ct");
+    {
+        UnicodeSet t(0xabcd, 0xdef0);  // dummy contents should be overwritten
+        t = s;
+        checkEqual(s, t, "operator=");
+    }
 
-    t = s;
-    checkEqual(s, t, "operator=");
+    {
+        UnicodeSet t;
+        copyWithIterator(t, s, FALSE);
+        checkEqual(s, t, "iterator roundtrip");
+    }
 
-    copyWithIterator(t, s, FALSE);
-    checkEqual(s, t, "iterator roundtrip");
+    {
+        UnicodeSet t;
+        copyWithIterator(t, s, TRUE); // try range
+        checkEqual(s, t, "iterator roundtrip");
+    }
 
-    copyWithIterator(t, s, TRUE); // try range
-    checkEqual(s, t, "iterator roundtrip");
-        
-    UnicodeString pat; s.toPattern(pat, FALSE);
-    t.applyPattern(pat, ec);
-    if (U_FAILURE(ec)) {
-        errln("FAIL: applyPattern");
-        return;
-    } else {
-        checkEqual(s, t, "toPattern(false)");
+    {
+        UnicodeSet t;
+        UnicodeString pat;
+        UErrorCode ec = U_ZERO_ERROR;
+        s.toPattern(pat, FALSE);
+        t.applyPattern(pat, ec);
+        if (U_FAILURE(ec)) {
+            errln("FAIL: toPattern(escapeUnprintable=FALSE), applyPattern - %s", u_errorName(ec));
+            return;
+        } else {
+            checkEqual(s, t, "toPattern(false)");
+        }
     }
-        
-    s.toPattern(pat, TRUE);
-    t.applyPattern(pat, ec);
-    if (U_FAILURE(ec)) {
-        errln("FAIL: applyPattern");
-        return;
-    } else {
-        checkEqual(s, t, "toPattern(true)");
+
+    {
+        UnicodeSet t;
+        UnicodeString pat;
+        UErrorCode ec = U_ZERO_ERROR;
+        s.toPattern(pat, TRUE);
+        t.applyPattern(pat, ec);
+        if (U_FAILURE(ec)) {
+            errln("FAIL: toPattern(escapeUnprintable=TRUE), applyPattern - %s", u_errorName(ec));
+            return;
+        } else {
+            checkEqual(s, t, "toPattern(true)");
+        }
     }
 }
-    
+
+void UnicodeSetTest::checkSerializeRoundTrip(const UnicodeSet& t, UErrorCode &status) {
+  if(U_FAILURE(status)) return;
+  int32_t len = t.serialize(serializeBuffer.getAlias(), serializeBuffer.getCapacity(), status);
+  if(status == U_BUFFER_OVERFLOW_ERROR) {
+    status = U_ZERO_ERROR;
+    serializeBuffer.resize(len);
+    len = t.serialize(serializeBuffer.getAlias(), serializeBuffer.getCapacity(), status);
+    // let 2nd error stand
+  }
+  if(U_FAILURE(status)) {
+    errln("checkSerializeRoundTrip: error %s serializing buffer\n", u_errorName(status));
+    return;
+  }
+  UnicodeSet deserialized(serializeBuffer.getAlias(), len, UnicodeSet::kSerialized, status);
+  if(U_FAILURE(status)) {
+    errln("checkSerializeRoundTrip: error %s deserializing buffer: buf %p len %d, original %d\n", u_errorName(status), serializeBuffer.getAlias(), len, t.getRangeCount());
+    return;
+  }
+
+  checkEqual(t, deserialized, "Set was unequal when deserialized");
+}
+
 void UnicodeSetTest::copyWithIterator(UnicodeSet& t, const UnicodeSet& s, UBool withRange) {
     t.clear();
     UnicodeSetIterator it(s);
@@ -1885,6 +1999,8 @@ void UnicodeSetTest::copyWithIterator(UnicodeSet& t, const UnicodeSet& s, UBool
 }
     
 UBool UnicodeSetTest::checkEqual(const UnicodeSet& s, const UnicodeSet& t, const char* message) {
+  assertEquals(UnicodeString("RangeCount: ","") + message, s.getRangeCount(), t.getRangeCount());
+  assertEquals(UnicodeString("size: ","") + message, s.size(), t.size());
     UnicodeString source; s.toPattern(source, TRUE);
     UnicodeString result; t.toPattern(result, TRUE);
     if (s != t) {
@@ -1909,7 +2025,7 @@ UnicodeSetTest::expectContainment(const UnicodeString& pat,
     UErrorCode ec = U_ZERO_ERROR;
     UnicodeSet set(pat, ec);
     if (U_FAILURE(ec)) {
-        errln((UnicodeString)"FAIL: pattern \"" +
+        dataerrln((UnicodeString)"FAIL: pattern \"" +
               pat + "\" => " + u_errorName(ec));
         return;
     }
@@ -2091,14 +2207,14 @@ void UnicodeSetTest::TestFreezable() {
     UnicodeString idPattern=UNICODE_STRING("[:ID_Continue:]", 15);
     UnicodeSet idSet(idPattern, errorCode);
     if(U_FAILURE(errorCode)) {
-        errln("FAIL: unable to create UnicodeSet([:ID_Continue:]) - %s", u_errorName(errorCode));
+        dataerrln("FAIL: unable to create UnicodeSet([:ID_Continue:]) - %s", u_errorName(errorCode));
         return;
     }
 
     UnicodeString wsPattern=UNICODE_STRING("[:White_Space:]", 15);
     UnicodeSet wsSet(wsPattern, errorCode);
     if(U_FAILURE(errorCode)) {
-        errln("FAIL: unable to create UnicodeSet([:White_Space:]) - %s", u_errorName(errorCode));
+        dataerrln("FAIL: unable to create UnicodeSet([:White_Space:]) - %s", u_errorName(errorCode));
         return;
     }
 
@@ -2259,7 +2375,7 @@ public:
             const UnicodeString *s;
             char *s8=utf8;
             int32_t length8, utf8Count=0;
-            while(iter.nextRange() && stringsLength<LENGTHOF(strings)) {
+            while(iter.nextRange() && stringsLength<UPRV_LENGTHOF(strings)) {
                 if(iter.isString()) {
                     // Store the pointer to the set's string element
                     // which we happen to know is a stable pointer.
@@ -2301,9 +2417,6 @@ private:
 
     char utf8[1024];
     int32_t utf8Lengths[20];
-
-    int32_t nextStringIndex;
-    int32_t nextUTF8Start;
 };
 
 class UnicodeSetWithStringsIterator {
@@ -2564,10 +2677,7 @@ static int32_t containsSpanUTF8(const UnicodeSetWithStrings &set, const char *s,
         UChar32 c;
         int32_t start=0, prev;
         while((prev=start)<length) {
-            U8_NEXT(s, start, length, c);
-            if(c<0) {
-                c=0xfffd;
-            }
+            U8_NEXT_OR_FFFD(s, start, length, c);
             if(realSet.contains(c)!=spanCondition) {
                 break;
             }
@@ -2578,10 +2688,7 @@ static int32_t containsSpanUTF8(const UnicodeSetWithStrings &set, const char *s,
         UChar32 c;
         int32_t start, next;
         for(start=next=0; start<length;) {
-            U8_NEXT(s, next, length, c);
-            if(c<0) {
-                c=0xfffd;
-            }
+            U8_NEXT_OR_FFFD(s, next, length, c);
             if(realSet.contains(c)) {
                 break;
             }
@@ -2602,10 +2709,7 @@ static int32_t containsSpanUTF8(const UnicodeSetWithStrings &set, const char *s,
         UChar32 c;
         int32_t start, next, maxSpanLimit=0;
         for(start=next=0; start<length;) {
-            U8_NEXT(s, next, length, c);
-            if(c<0) {
-                c=0xfffd;
-            }
+            U8_NEXT_OR_FFFD(s, next, length, c);
             if(!realSet.contains(c)) {
                 next=start;  // Do not span this single, not-contained code point.
             }
@@ -2676,10 +2780,7 @@ static int32_t containsSpanBackUTF8(const UnicodeSetWithStrings &set, const char
         UChar32 c;
         int32_t prev=length;
         do {
-            U8_PREV(s, 0, length, c);
-            if(c<0) {
-                c=0xfffd;
-            }
+            U8_PREV_OR_FFFD(s, 0, length, c);
             if(realSet.contains(c)!=spanCondition) {
                 break;
             }
@@ -2690,10 +2791,7 @@ static int32_t containsSpanBackUTF8(const UnicodeSetWithStrings &set, const char
         UChar32 c;
         int32_t prev=length;
         do {
-            U8_PREV(s, 0, length, c);
-            if(c<0) {
-                c=0xfffd;
-            }
+            U8_PREV_OR_FFFD(s, 0, length, c);
             if(realSet.contains(c)) {
                 break;
             }
@@ -2713,10 +2811,7 @@ static int32_t containsSpanBackUTF8(const UnicodeSetWithStrings &set, const char
         UChar32 c;
         int32_t prev=length, minSpanStart=length;
         do {
-            U8_PREV(s, 0, length, c);
-            if(c<0) {
-                c=0xfffd;
-            }
+            U8_PREV_OR_FFFD(s, 0, length, c);
             if(!realSet.contains(c)) {
                 length=prev;  // Do not span this single, not-contained code point.
             }
@@ -3034,7 +3129,7 @@ void UnicodeSetTest::testSpan(const UnicodeSetWithStrings *sets[4],
                                  s, length, isUTF16,
                                  whichSpans,
                                  type, typeName,
-                                 limits, LENGTHOF(limits), expectCount);
+                                 limits, UPRV_LENGTHOF(limits), expectCount);
             if(typeName[0]==0) {
                 break; // All types tried.
             }
@@ -3043,9 +3138,9 @@ void UnicodeSetTest::testSpan(const UnicodeSetWithStrings *sets[4],
             }
             if(expectCount<0) {
                 expectCount=limitsCount;
-                if(limitsCount>LENGTHOF(limits)) {
+                if(limitsCount>UPRV_LENGTHOF(limits)) {
                     errln("FAIL: %s[0x%lx].%s.%s span count=%ld > %ld capacity - too many spans",
-                          testName, (long)index, setNames[i], typeName, (long)limitsCount, (long)LENGTHOF(limits));
+                          testName, (long)index, setNames[i], typeName, (long)limitsCount, (long)UPRV_LENGTHOF(limits));
                     return;
                 }
                 memcpy(expectLimits, limits, limitsCount*4);
@@ -3238,7 +3333,7 @@ void UnicodeSetTest::testSpanContents(const UnicodeSetWithStrings *sets[4], uint
 
     UChar32 c, first;
     for(first=c=0;; c=nextCodePoint(c)) {
-        if(c>0x10ffff || length>(LENGTHOF(s)-U16_MAX_LENGTH)) {
+        if(c>0x10ffff || length>(UPRV_LENGTHOF(s)-U16_MAX_LENGTH)) {
             localWhichSpans=whichSpans;
             if(stringContainsUnpairedSurrogate(s, length) && inconsistentSurrogates) {
                 localWhichSpans&=~SPAN_UTF8;
@@ -3274,7 +3369,7 @@ void UnicodeSetTest::testSpanUTF16String(const UnicodeSetWithStrings *sets[4], u
         return;
     }
     testSpan(sets, s, -1, TRUE, (whichSpans&~SPAN_UTF8), testName, 0);
-    testSpan(sets, s, LENGTHOF(s)-1, TRUE, (whichSpans&~SPAN_UTF8), testName, 1);
+    testSpan(sets, s, UPRV_LENGTHOF(s)-1, TRUE, (whichSpans&~SPAN_UTF8), testName, 1);
 }
 
 void UnicodeSetTest::testSpanUTF8String(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName) {
@@ -3371,7 +3466,7 @@ void UnicodeSetTest::testSpanUTF8String(const UnicodeSetWithStrings *sets[4], ui
         return;
     }
     testSpan(sets, s, -1, FALSE, (whichSpans&~SPAN_UTF16), testName, 0);
-    testSpan(sets, s, LENGTHOF(s)-1, FALSE, (whichSpans&~SPAN_UTF16), testName, 1);
+    testSpan(sets, s, UPRV_LENGTHOF(s)-1, FALSE, (whichSpans&~SPAN_UTF16), testName, 1);
 }
 
 // Take a set of span options and multiply them so that
@@ -3573,7 +3668,7 @@ void UnicodeSetTest::TestSpan() {
     char *testNameLimit=testName;
 
     int32_t i, j;
-    for(i=0; i<LENGTHOF(testdata); ++i) {
+    for(i=0; i<UPRV_LENGTHOF(testdata); ++i) {
         const char *s=testdata[i];
         if(s[0]=='[') {
             // Create new test sets from this pattern.
@@ -3584,7 +3679,7 @@ void UnicodeSetTest::TestSpan() {
             UErrorCode errorCode=U_ZERO_ERROR;
             sets[SLOW]=new UnicodeSet(UnicodeString(s, -1, US_INV).unescape(), errorCode);
             if(U_FAILURE(errorCode)) {
-                errln("FAIL: Unable to create UnicodeSet(%s) - %s", s, u_errorName(errorCode));
+                dataerrln("FAIL: Unable to create UnicodeSet(%s) - %s", s, u_errorName(errorCode));
                 break;
             }
             sets[SLOW_NOT]=new UnicodeSet(*sets[SLOW]);
@@ -3727,6 +3822,7 @@ void UnicodeSetTest::TestStringSpan() {
     string16=UNICODE_STRING_SIMPLE("byayaxya");
     const UChar *s16=string16.getBuffer();
     int32_t length16=string16.length();
+    (void)length16;   // Suppress set but not used warning.
     if( set.span(s16, 8, USET_SPAN_NOT_CONTAINED)!=4 ||
         set.span(s16, 7, USET_SPAN_NOT_CONTAINED)!=4 ||
         set.span(s16, 6, USET_SPAN_NOT_CONTAINED)!=4 ||
@@ -3771,3 +3867,59 @@ void UnicodeSetTest::TestStringSpan() {
         errln("FAIL: UnicodeSet(%s).spanBack(while longest match) returns the wrong value", pattern);
     }
 }
+
+/**
+ * Including collationroot.h fails here with
+1>c:\Program Files (x86)\Microsoft SDKs\Windows\v7.0A\include\driverspecs.h(142): error C2008: '$' : unexpected in macro definition
+ *  .. so, we skip this test on Windows.
+ * 
+ * the cause is that  intltest builds with /Za which disables language extensions - which means
+ *  windows header files can't be used.
+ */
+#if !UCONFIG_NO_COLLATION && !U_PLATFORM_HAS_WIN32_API
+#include "collationroot.h"
+#include "collationtailoring.h"
+#endif
+
+void UnicodeSetTest::TestUCAUnsafeBackwards() {
+#if U_PLATFORM_HAS_WIN32_API
+    infoln("Skipping TestUCAUnsafeBackwards() - can't include collationroot.h on Windows without language extensions!");
+#elif !UCONFIG_NO_COLLATION
+    UErrorCode errorCode = U_ZERO_ERROR;
+
+    // Get the unsafeBackwardsSet
+    const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode);
+    if(U_FAILURE(errorCode)) {
+      dataerrln("FAIL: %s getting root cache entry", u_errorName(errorCode));
+      return;
+    }
+    //const UVersionInfo &version = rootEntry->tailoring->version;
+    const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet;
+
+    checkSerializeRoundTrip(*unsafeBackwardSet, errorCode);
+
+    if(!logKnownIssue("11891","UnicodeSet fails to round trip on CollationRoot...unsafeBackwards set")) {
+        // simple test case
+        // TODO(ticket #11891): Simplify this test function to this simple case. Rename it appropriately.
+        // TODO(ticket #11891): Port test to Java. Is this a bug there, too?
+        UnicodeSet surrogates;
+        surrogates.add(0xd83a);  // a lead surrogate
+        surrogates.add(0xdc00, 0xdfff);  // a range of trail surrogates
+        UnicodeString pat;
+        surrogates.toPattern(pat, FALSE);  // bad: [ 0xd83a, 0xdc00, 0x2d, 0xdfff ]
+        // TODO: Probably fix either UnicodeSet::_generatePattern() or _appendToPat()
+        // so that at least one type of surrogate code points are escaped,
+        // or (minimally) so that adjacent lead+trail surrogate code points are escaped.
+        errorCode = U_ZERO_ERROR;
+        UnicodeSet s2;
+        s2.applyPattern(pat, errorCode);  // looks like invalid range [ 0x1e800, 0x2d, 0xdfff ]
+        if(U_FAILURE(errorCode)) {
+            errln("FAIL: surrogates to/from pattern - %s", u_errorName(errorCode));
+        } else {
+            checkEqual(surrogates, s2, "surrogates to/from pattern");
+        }
+        // This occurs in the UCA unsafe-backwards set.
+        checkRoundTrip(*unsafeBackwardSet);
+    }
+#endif
+}