/*
********************************************************************************
-* Copyright (C) 1999-2008 International Business Machines Corporation and
+* Copyright (C) 1999-2016 International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************************
* Date Name Description
#include "unicode/parsepos.h"
#include "unicode/symtable.h"
#include "unicode/uversion.h"
+#include "cmemory.h"
#include "hash.h"
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-
#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
- errln("fail in file \"%s\", line %d: \"%s\"", __FILE__, __LINE__, \
+ dataerrln("fail in file \"%s\", line %d: \"%s\"", __FILE__, __LINE__, \
u_errorName(status));}}
#define TEST_ASSERT(expr) {if (!(expr)) { \
- errln("fail in file \"%s\", line %d", __FILE__, __LINE__); }}
+ dataerrln("fail in file \"%s\", line %d", __FILE__, __LINE__); }}
UnicodeString operator+(const UnicodeString& left, const UnicodeSet& set) {
UnicodeString pat;
CASE(21,TestFreezable);
CASE(22,TestSpan);
CASE(23,TestStringSpan);
+ CASE(24,TestUCAUnsafeBackwards);
default: name = ""; break;
}
}
ec = U_ZERO_ERROR;
UnicodeSet s(OTHER_TOPATTERN_TESTS[j], ec);
if (U_FAILURE(ec)) {
- errln((UnicodeString)"FAIL: bad pattern " + OTHER_TOPATTERN_TESTS[j]);
+ dataerrln((UnicodeString)"FAIL: bad pattern " + OTHER_TOPATTERN_TESTS[j] + " - " + UnicodeString(u_errorName(ec)));
continue;
}
checkPat(OTHER_TOPATTERN_TESTS[j], s);
const char* pat = " [:Lu:] "; // Whitespace ok outside [:..:]
UnicodeSet set(pat, status);
if (U_FAILURE(status)) {
- errln((UnicodeString)"Fail: Can't construct set with " + pat);
+ dataerrln((UnicodeString)"Fail: Can't construct set with " + pat + " - " + UnicodeString(u_errorName(status)));
+ return;
} else {
expectContainment(set, pat, "ABC", "abc");
}
UnicodeSet *set1=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Lowercase Letter}"), status); // :Ll: Letter, lowercase
UnicodeSet *set1a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Ll:]"), status); // Letter, lowercase
if (U_FAILURE(status)){
- errln((UnicodeString)"FAIL: Can't construst set with category->Ll");
+ dataerrln((UnicodeString)"FAIL: Can't construst set with category->Ll" + " - " + UnicodeString(u_errorName(status)));
return;
}
UnicodeSet *set2=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Decimal Number}"), status); //Number, Decimal digit
if (set != exp) { errln("FAIL: retain('s')"); return; }
uint16_t buf[32];
- int32_t slen = set.serialize(buf, sizeof(buf)/sizeof(buf[0]), status);
+ int32_t slen = set.serialize(buf, UPRV_LENGTHOF(buf), status);
if (U_FAILURE(status)) { errln("FAIL: serialize"); return; }
if (slen != 3 || buf[0] != 2 || buf[1] != 0x73 || buf[2] != 0x74) {
errln("FAIL: serialize");
return;
}
+
+ // Conversions to and from USet
+ UnicodeSet *uniset = &set;
+ USet *uset = uniset->toUSet();
+ TEST_ASSERT((void *)uset == (void *)uniset);
+ UnicodeSet *setx = UnicodeSet::fromUSet(uset);
+ TEST_ASSERT((void *)setx == (void *)uset);
+ const UnicodeSet *constSet = uniset;
+ const USet *constUSet = constSet->toUSet();
+ TEST_ASSERT((void *)constUSet == (void *)constSet);
+ const UnicodeSet *constSetx = UnicodeSet::fromUSet(constUSet);
+ TEST_ASSERT((void *)constSetx == (void *)constUSet);
+
+ // span(UnicodeString) and spanBack(UnicodeString) convenience methods
+ UnicodeString longString=UNICODE_STRING_SIMPLE("aaaaaaaaaabbbbbbbbbbcccccccccc");
+ UnicodeSet ac(0x61, 0x63);
+ ac.remove(0x62).freeze();
+ if( ac.span(longString, -5, USET_SPAN_CONTAINED)!=10 ||
+ ac.span(longString, 0, USET_SPAN_CONTAINED)!=10 ||
+ ac.span(longString, 5, USET_SPAN_CONTAINED)!=10 ||
+ ac.span(longString, 10, USET_SPAN_CONTAINED)!=10 ||
+ ac.span(longString, 15, USET_SPAN_CONTAINED)!=15 ||
+ ac.span(longString, 20, USET_SPAN_CONTAINED)!=30 ||
+ ac.span(longString, 25, USET_SPAN_CONTAINED)!=30 ||
+ ac.span(longString, 30, USET_SPAN_CONTAINED)!=30 ||
+ ac.span(longString, 35, USET_SPAN_CONTAINED)!=30 ||
+ ac.span(longString, INT32_MAX, USET_SPAN_CONTAINED)!=30
+ ) {
+ errln("UnicodeSet.span(UnicodeString, ...) returns incorrect end indexes");
+ }
+ if( ac.spanBack(longString, -5, USET_SPAN_CONTAINED)!=0 ||
+ ac.spanBack(longString, 0, USET_SPAN_CONTAINED)!=0 ||
+ ac.spanBack(longString, 5, USET_SPAN_CONTAINED)!=0 ||
+ ac.spanBack(longString, 10, USET_SPAN_CONTAINED)!=0 ||
+ ac.spanBack(longString, 15, USET_SPAN_CONTAINED)!=15 ||
+ ac.spanBack(longString, 20, USET_SPAN_CONTAINED)!=20 ||
+ ac.spanBack(longString, 25, USET_SPAN_CONTAINED)!=20 ||
+ ac.spanBack(longString, 30, USET_SPAN_CONTAINED)!=20 ||
+ ac.spanBack(longString, 35, USET_SPAN_CONTAINED)!=20 ||
+ ac.spanBack(longString, INT32_MAX, USET_SPAN_CONTAINED)!=20
+ ) {
+ errln("UnicodeSet.spanBack(UnicodeString, ...) returns incorrect start indexes");
+ }
}
void UnicodeSetTest::TestIteration() {
"abc",
"ABC",
+#if !UCONFIG_NO_NORMALIZATION
// Combining class: @since ICU 2.2
// Check both symbolic and numeric
"\\p{ccc=Nukta}",
"[:c c c = iota subscript :]",
"\\u0345",
"xyz",
+#endif
// Bidi class: @since ICU 2.2
"\\p{bidiclass=lefttoright}",
"abcd\\uDC00",
"ef\\uD800\\U00010000",
+#if !UCONFIG_NO_NORMALIZATION
"[:^lccc=0:]", // Lead canonical class
"\\u0300\\u0301",
"abcd\\u00c0\\u00c5",
"[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not
"\\u0F73\\u0F75\\u0F81",
"abcd\\u0300\\u0301\\u00c0\\u00c5",
+#endif /* !UCONFIG_NO_NORMALIZATION */
"[:Assigned:]",
"A\\uE000\\uF8FF\\uFDC7\\U00010000\\U0010FFFD",
- "\\u0888\\uFDD3\\uFFFE\\U00050005"
+ "\\u0888\\uFDD3\\uFFFE\\U00050005",
+
+ // Script_Extensions, new in Unicode 6.0
+ "[:scx=Arab:]",
+ "\\u061E\\u061F\\u0620\\u0621\\u063F\\u0640\\u0650\\u065E\\uFDF1\\uFDF2\\uFDF3",
+ "\\u061D\\uFDEF\\uFDFE",
+
+ // U+FDF2 has Script=Arabic and also Arab in its Script_Extensions,
+ // so scx-sc is missing U+FDF2.
+ "[[:Script_Extensions=Arabic:]-[:Arab:]]",
+ "\\u0640\\u064B\\u0650\\u0655",
+ "\\uFDF2"
};
- static const int32_t DATA_LEN = sizeof(DATA)/sizeof(DATA[0]);
+ static const int32_t DATA_LEN = UPRV_LENGTHOF(DATA);
for (int32_t i=0; i<DATA_LEN; i+=3) {
expectContainment(UnicodeString(DATA[i], -1, US_INV), CharsToUnicodeString(DATA[i+1]),
CASE, "[{\\u1f7c\\u03b9}]", "[\\u1ff2{\\u1f7c\\u03b9}]", // last in sorted table
+#if !UCONFIG_NO_FILE_IO
CASE_MAPPINGS,
"[aq\\u00DF{Bc}{bC}{Fi}]",
"[aAqQ\\u00DF{ss}{Ss}{SS}{Bc}{BC}{bC}{bc}{FI}{Fi}{fi}]",
+#endif
CASE_MAPPINGS,
"[\\u01F1]", // 'DZ'
if (s == t) {
logln((UnicodeString)"Ok: " + pat + ".closeOver(" + selector + ") => " + exp);
} else {
- errln((UnicodeString)"FAIL: " + pat + ".closeOver(" + selector + ") => " +
+ dataerrln((UnicodeString)"FAIL: " + pat + ".closeOver(" + selector + ") => " +
s.toPattern(buf, TRUE) + ", expected " + exp);
}
}
const char exp[] =
"[\\u200A-\\u200E\\uFEFF\\U0001D173-\\U0001D17A\\U000F0000-\\U000FFFFD]";
// We test this with two passes; in the second pass we
- // pre-unescape the pattern. Since U+200E is rule whitespace,
+ // pre-unescape the pattern. Since U+200E is Pattern_White_Space,
// this fails -- which is what we expect.
for (int32_t pass=1; pass<=2; ++pass) {
UErrorCode ec = U_ZERO_ERROR;
(UChar32)-1, 8, 0, 8,
8, 0x110000, 8, 0x10FFFF
};
- const int32_t DATA_LENGTH = sizeof(DATA)/sizeof(DATA[0]);
+ const int32_t DATA_LENGTH = UPRV_LENGTHOF(DATA);
UnicodeString pat;
int32_t i;
b = set.contains(start, end);
b = set.containsNone(start, end);
b = set.containsSome(start, end);
+ (void)b; // Suppress set but not used warning.
/*int32_t index = set.indexOf(start);*/
(UChar32)-1,
0x110000
};
- const int32_t DATA2_LENGTH = sizeof(DATA2)/sizeof(DATA2[0]);
+ const int32_t DATA2_LENGTH = UPRV_LENGTHOF(DATA2);
for (i=0; i<DATA2_LENGTH; ++i) {
UChar32 c = DATA2[i], end = 0x10FFFF;
Hashtable contents;
TokenSymbolTable(UErrorCode& ec) : contents(FALSE, ec) {
- contents.setValueDeleter(uhash_deleteUnicodeString);
+ contents.setValueDeleter(uprv_deleteUObject);
}
~TokenSymbolTable() {}
for (int i=0; DATA[i] != 0; ++i) {
UErrorCode ec = U_ZERO_ERROR;
logln((UnicodeString)"Test pattern " + i + " :" + UnicodeString(DATA[i], -1, US_INV));
- UnicodeSet set(UnicodeString(DATA[i], -1, US_INV), ec);
+ UnicodeString str = UnicodeString(DATA[i], -1, US_INV);
+ UnicodeSet set(str, ec);
if (U_FAILURE(ec)) {
errln("FAIL: UnicodeSet constructor");
continue;
errln((UnicodeString)"FAIL: " + UnicodeString(DATA[i], -1, US_INV) + ".size() == " +
set.size() + ", expected 4");
}
+
+ {
+ UErrorCode subErr = U_ZERO_ERROR;
+ checkRoundTrip(set);
+ checkSerializeRoundTrip(set, subErr);
+ }
}
}
logln((UnicodeString)"Testing " + i + ", " + x);
_testComplement(i, x, y);
+ UnicodeSet &toTest = bitsToSet(i, aa);
+
// AS LONG AS WE ARE HERE, check roundtrip
- checkRoundTrip(bitsToSet(i, aa));
+ checkRoundTrip(toTest);
+ UErrorCode ec = U_ZERO_ERROR;
+ checkSerializeRoundTrip(toTest, ec);
for (int32_t j = 0; j < limit; ++j) {
_testAdd(i,j, x,y,z);
* get the same thing back
*/
void UnicodeSetTest::checkRoundTrip(const UnicodeSet& s) {
- UErrorCode ec = U_ZERO_ERROR;
+ {
+ UnicodeSet t(s);
+ checkEqual(s, t, "copy ct");
+ }
- UnicodeSet t(s);
- checkEqual(s, t, "copy ct");
+ {
+ UnicodeSet t(0xabcd, 0xdef0); // dummy contents should be overwritten
+ t = s;
+ checkEqual(s, t, "operator=");
+ }
- t = s;
- checkEqual(s, t, "operator=");
+ {
+ UnicodeSet t;
+ copyWithIterator(t, s, FALSE);
+ checkEqual(s, t, "iterator roundtrip");
+ }
- copyWithIterator(t, s, FALSE);
- checkEqual(s, t, "iterator roundtrip");
+ {
+ UnicodeSet t;
+ copyWithIterator(t, s, TRUE); // try range
+ checkEqual(s, t, "iterator roundtrip");
+ }
- copyWithIterator(t, s, TRUE); // try range
- checkEqual(s, t, "iterator roundtrip");
-
- UnicodeString pat; s.toPattern(pat, FALSE);
- t.applyPattern(pat, ec);
- if (U_FAILURE(ec)) {
- errln("FAIL: applyPattern");
- return;
- } else {
- checkEqual(s, t, "toPattern(false)");
+ {
+ UnicodeSet t;
+ UnicodeString pat;
+ UErrorCode ec = U_ZERO_ERROR;
+ s.toPattern(pat, FALSE);
+ t.applyPattern(pat, ec);
+ if (U_FAILURE(ec)) {
+ errln("FAIL: toPattern(escapeUnprintable=FALSE), applyPattern - %s", u_errorName(ec));
+ return;
+ } else {
+ checkEqual(s, t, "toPattern(false)");
+ }
}
-
- s.toPattern(pat, TRUE);
- t.applyPattern(pat, ec);
- if (U_FAILURE(ec)) {
- errln("FAIL: applyPattern");
- return;
- } else {
- checkEqual(s, t, "toPattern(true)");
+
+ {
+ UnicodeSet t;
+ UnicodeString pat;
+ UErrorCode ec = U_ZERO_ERROR;
+ s.toPattern(pat, TRUE);
+ t.applyPattern(pat, ec);
+ if (U_FAILURE(ec)) {
+ errln("FAIL: toPattern(escapeUnprintable=TRUE), applyPattern - %s", u_errorName(ec));
+ return;
+ } else {
+ checkEqual(s, t, "toPattern(true)");
+ }
}
}
-
+
+void UnicodeSetTest::checkSerializeRoundTrip(const UnicodeSet& t, UErrorCode &status) {
+ if(U_FAILURE(status)) return;
+ int32_t len = t.serialize(serializeBuffer.getAlias(), serializeBuffer.getCapacity(), status);
+ if(status == U_BUFFER_OVERFLOW_ERROR) {
+ status = U_ZERO_ERROR;
+ serializeBuffer.resize(len);
+ len = t.serialize(serializeBuffer.getAlias(), serializeBuffer.getCapacity(), status);
+ // let 2nd error stand
+ }
+ if(U_FAILURE(status)) {
+ errln("checkSerializeRoundTrip: error %s serializing buffer\n", u_errorName(status));
+ return;
+ }
+ UnicodeSet deserialized(serializeBuffer.getAlias(), len, UnicodeSet::kSerialized, status);
+ if(U_FAILURE(status)) {
+ errln("checkSerializeRoundTrip: error %s deserializing buffer: buf %p len %d, original %d\n", u_errorName(status), serializeBuffer.getAlias(), len, t.getRangeCount());
+ return;
+ }
+
+ checkEqual(t, deserialized, "Set was unequal when deserialized");
+}
+
void UnicodeSetTest::copyWithIterator(UnicodeSet& t, const UnicodeSet& s, UBool withRange) {
t.clear();
UnicodeSetIterator it(s);
}
UBool UnicodeSetTest::checkEqual(const UnicodeSet& s, const UnicodeSet& t, const char* message) {
+ assertEquals(UnicodeString("RangeCount: ","") + message, s.getRangeCount(), t.getRangeCount());
+ assertEquals(UnicodeString("size: ","") + message, s.size(), t.size());
UnicodeString source; s.toPattern(source, TRUE);
UnicodeString result; t.toPattern(result, TRUE);
if (s != t) {
UErrorCode ec = U_ZERO_ERROR;
UnicodeSet set(pat, ec);
if (U_FAILURE(ec)) {
- errln((UnicodeString)"FAIL: pattern \"" +
+ dataerrln((UnicodeString)"FAIL: pattern \"" +
pat + "\" => " + u_errorName(ec));
return;
}
UnicodeString idPattern=UNICODE_STRING("[:ID_Continue:]", 15);
UnicodeSet idSet(idPattern, errorCode);
if(U_FAILURE(errorCode)) {
- errln("FAIL: unable to create UnicodeSet([:ID_Continue:]) - %s", u_errorName(errorCode));
+ dataerrln("FAIL: unable to create UnicodeSet([:ID_Continue:]) - %s", u_errorName(errorCode));
return;
}
UnicodeString wsPattern=UNICODE_STRING("[:White_Space:]", 15);
UnicodeSet wsSet(wsPattern, errorCode);
if(U_FAILURE(errorCode)) {
- errln("FAIL: unable to create UnicodeSet([:White_Space:]) - %s", u_errorName(errorCode));
+ dataerrln("FAIL: unable to create UnicodeSet([:White_Space:]) - %s", u_errorName(errorCode));
return;
}
const UnicodeString *s;
char *s8=utf8;
int32_t length8, utf8Count=0;
- while(iter.nextRange() && stringsLength<LENGTHOF(strings)) {
+ while(iter.nextRange() && stringsLength<UPRV_LENGTHOF(strings)) {
if(iter.isString()) {
// Store the pointer to the set's string element
// which we happen to know is a stable pointer.
char utf8[1024];
int32_t utf8Lengths[20];
-
- int32_t nextStringIndex;
- int32_t nextUTF8Start;
};
class UnicodeSetWithStringsIterator {
UChar32 c;
int32_t start=0, prev;
while((prev=start)<length) {
- U8_NEXT(s, start, length, c);
- if(c<0) {
- c=0xfffd;
- }
+ U8_NEXT_OR_FFFD(s, start, length, c);
if(realSet.contains(c)!=spanCondition) {
break;
}
UChar32 c;
int32_t start, next;
for(start=next=0; start<length;) {
- U8_NEXT(s, next, length, c);
- if(c<0) {
- c=0xfffd;
- }
+ U8_NEXT_OR_FFFD(s, next, length, c);
if(realSet.contains(c)) {
break;
}
UChar32 c;
int32_t start, next, maxSpanLimit=0;
for(start=next=0; start<length;) {
- U8_NEXT(s, next, length, c);
- if(c<0) {
- c=0xfffd;
- }
+ U8_NEXT_OR_FFFD(s, next, length, c);
if(!realSet.contains(c)) {
next=start; // Do not span this single, not-contained code point.
}
UChar32 c;
int32_t prev=length;
do {
- U8_PREV(s, 0, length, c);
- if(c<0) {
- c=0xfffd;
- }
+ U8_PREV_OR_FFFD(s, 0, length, c);
if(realSet.contains(c)!=spanCondition) {
break;
}
UChar32 c;
int32_t prev=length;
do {
- U8_PREV(s, 0, length, c);
- if(c<0) {
- c=0xfffd;
- }
+ U8_PREV_OR_FFFD(s, 0, length, c);
if(realSet.contains(c)) {
break;
}
UChar32 c;
int32_t prev=length, minSpanStart=length;
do {
- U8_PREV(s, 0, length, c);
- if(c<0) {
- c=0xfffd;
- }
+ U8_PREV_OR_FFFD(s, 0, length, c);
if(!realSet.contains(c)) {
length=prev; // Do not span this single, not-contained code point.
}
s, length, isUTF16,
whichSpans,
type, typeName,
- limits, LENGTHOF(limits), expectCount);
+ limits, UPRV_LENGTHOF(limits), expectCount);
if(typeName[0]==0) {
break; // All types tried.
}
}
if(expectCount<0) {
expectCount=limitsCount;
- if(limitsCount>LENGTHOF(limits)) {
+ if(limitsCount>UPRV_LENGTHOF(limits)) {
errln("FAIL: %s[0x%lx].%s.%s span count=%ld > %ld capacity - too many spans",
- testName, (long)index, setNames[i], typeName, (long)limitsCount, (long)LENGTHOF(limits));
+ testName, (long)index, setNames[i], typeName, (long)limitsCount, (long)UPRV_LENGTHOF(limits));
return;
}
memcpy(expectLimits, limits, limitsCount*4);
UChar32 c, first;
for(first=c=0;; c=nextCodePoint(c)) {
- if(c>0x10ffff || length>(LENGTHOF(s)-U16_MAX_LENGTH)) {
+ if(c>0x10ffff || length>(UPRV_LENGTHOF(s)-U16_MAX_LENGTH)) {
localWhichSpans=whichSpans;
if(stringContainsUnpairedSurrogate(s, length) && inconsistentSurrogates) {
localWhichSpans&=~SPAN_UTF8;
return;
}
testSpan(sets, s, -1, TRUE, (whichSpans&~SPAN_UTF8), testName, 0);
- testSpan(sets, s, LENGTHOF(s)-1, TRUE, (whichSpans&~SPAN_UTF8), testName, 1);
+ testSpan(sets, s, UPRV_LENGTHOF(s)-1, TRUE, (whichSpans&~SPAN_UTF8), testName, 1);
}
void UnicodeSetTest::testSpanUTF8String(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName) {
return;
}
testSpan(sets, s, -1, FALSE, (whichSpans&~SPAN_UTF16), testName, 0);
- testSpan(sets, s, LENGTHOF(s)-1, FALSE, (whichSpans&~SPAN_UTF16), testName, 1);
+ testSpan(sets, s, UPRV_LENGTHOF(s)-1, FALSE, (whichSpans&~SPAN_UTF16), testName, 1);
}
// Take a set of span options and multiply them so that
char *testNameLimit=testName;
int32_t i, j;
- for(i=0; i<LENGTHOF(testdata); ++i) {
+ for(i=0; i<UPRV_LENGTHOF(testdata); ++i) {
const char *s=testdata[i];
if(s[0]=='[') {
// Create new test sets from this pattern.
UErrorCode errorCode=U_ZERO_ERROR;
sets[SLOW]=new UnicodeSet(UnicodeString(s, -1, US_INV).unescape(), errorCode);
if(U_FAILURE(errorCode)) {
- errln("FAIL: Unable to create UnicodeSet(%s) - %s", s, u_errorName(errorCode));
+ dataerrln("FAIL: Unable to create UnicodeSet(%s) - %s", s, u_errorName(errorCode));
break;
}
sets[SLOW_NOT]=new UnicodeSet(*sets[SLOW]);
string16=UNICODE_STRING_SIMPLE("byayaxya");
const UChar *s16=string16.getBuffer();
int32_t length16=string16.length();
+ (void)length16; // Suppress set but not used warning.
if( set.span(s16, 8, USET_SPAN_NOT_CONTAINED)!=4 ||
set.span(s16, 7, USET_SPAN_NOT_CONTAINED)!=4 ||
set.span(s16, 6, USET_SPAN_NOT_CONTAINED)!=4 ||
errln("FAIL: UnicodeSet(%s).spanBack(while longest match) returns the wrong value", pattern);
}
}
+
+/**
+ * Including collationroot.h fails here with
+1>c:\Program Files (x86)\Microsoft SDKs\Windows\v7.0A\include\driverspecs.h(142): error C2008: '$' : unexpected in macro definition
+ * .. so, we skip this test on Windows.
+ *
+ * the cause is that intltest builds with /Za which disables language extensions - which means
+ * windows header files can't be used.
+ */
+#if !UCONFIG_NO_COLLATION && !U_PLATFORM_HAS_WIN32_API
+#include "collationroot.h"
+#include "collationtailoring.h"
+#endif
+
+void UnicodeSetTest::TestUCAUnsafeBackwards() {
+#if U_PLATFORM_HAS_WIN32_API
+ infoln("Skipping TestUCAUnsafeBackwards() - can't include collationroot.h on Windows without language extensions!");
+#elif !UCONFIG_NO_COLLATION
+ UErrorCode errorCode = U_ZERO_ERROR;
+
+ // Get the unsafeBackwardsSet
+ const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode);
+ if(U_FAILURE(errorCode)) {
+ dataerrln("FAIL: %s getting root cache entry", u_errorName(errorCode));
+ return;
+ }
+ //const UVersionInfo &version = rootEntry->tailoring->version;
+ const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet;
+
+ checkSerializeRoundTrip(*unsafeBackwardSet, errorCode);
+
+ if(!logKnownIssue("11891","UnicodeSet fails to round trip on CollationRoot...unsafeBackwards set")) {
+ // simple test case
+ // TODO(ticket #11891): Simplify this test function to this simple case. Rename it appropriately.
+ // TODO(ticket #11891): Port test to Java. Is this a bug there, too?
+ UnicodeSet surrogates;
+ surrogates.add(0xd83a); // a lead surrogate
+ surrogates.add(0xdc00, 0xdfff); // a range of trail surrogates
+ UnicodeString pat;
+ surrogates.toPattern(pat, FALSE); // bad: [ 0xd83a, 0xdc00, 0x2d, 0xdfff ]
+ // TODO: Probably fix either UnicodeSet::_generatePattern() or _appendToPat()
+ // so that at least one type of surrogate code points are escaped,
+ // or (minimally) so that adjacent lead+trail surrogate code points are escaped.
+ errorCode = U_ZERO_ERROR;
+ UnicodeSet s2;
+ s2.applyPattern(pat, errorCode); // looks like invalid range [ 0x1e800, 0x2d, 0xdfff ]
+ if(U_FAILURE(errorCode)) {
+ errln("FAIL: surrogates to/from pattern - %s", u_errorName(errorCode));
+ } else {
+ checkEqual(surrogates, s2, "surrogates to/from pattern");
+ }
+ // This occurs in the UCA unsafe-backwards set.
+ checkRoundTrip(*unsafeBackwardSet);
+ }
+#endif
+}