X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/46f4442e9a5a4f3b98b7c1083586332f6a8a99a4..ef6cf650f4a75c3f97de06b51fa104f2069b9ea2:/icuSources/test/intltest/usettest.cpp diff --git a/icuSources/test/intltest/usettest.cpp b/icuSources/test/intltest/usettest.cpp index 30da4b3f..6dbf9255 100644 --- a/icuSources/test/intltest/usettest.cpp +++ b/icuSources/test/intltest/usettest.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************** -* Copyright (C) 1999-2008 International Business Machines Corporation and +* Copyright (C) 1999-2016 International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************** * Date Name Description @@ -22,16 +22,15 @@ #include "unicode/parsepos.h" #include "unicode/symtable.h" #include "unicode/uversion.h" +#include "cmemory.h" #include "hash.h" -#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) - #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ - errln("fail in file \"%s\", line %d: \"%s\"", __FILE__, __LINE__, \ + dataerrln("fail in file \"%s\", line %d: \"%s\"", __FILE__, __LINE__, \ u_errorName(status));}} #define TEST_ASSERT(expr) {if (!(expr)) { \ - errln("fail in file \"%s\", line %d", __FILE__, __LINE__); }} + dataerrln("fail in file \"%s\", line %d", __FILE__, __LINE__); }} UnicodeString operator+(const UnicodeString& left, const UnicodeSet& set) { UnicodeString pat; @@ -92,6 +91,7 @@ UnicodeSetTest::runIndexedTest(int32_t index, UBool exec, CASE(21,TestFreezable); CASE(22,TestSpan); CASE(23,TestStringSpan); + CASE(24,TestUCAUnsafeBackwards); default: name = ""; break; } } @@ -130,7 +130,7 @@ void UnicodeSetTest::TestToPattern() { ec = U_ZERO_ERROR; UnicodeSet s(OTHER_TOPATTERN_TESTS[j], ec); if (U_FAILURE(ec)) { - errln((UnicodeString)"FAIL: bad pattern " + OTHER_TOPATTERN_TESTS[j]); + dataerrln((UnicodeString)"FAIL: bad pattern " + OTHER_TOPATTERN_TESTS[j] + " - " + UnicodeString(u_errorName(ec))); continue; } checkPat(OTHER_TOPATTERN_TESTS[j], s); @@ -281,7 +281,8 @@ UnicodeSetTest::TestCategories(void) { const char* pat = " [:Lu:] "; // Whitespace ok outside [:..:] UnicodeSet set(pat, status); if (U_FAILURE(status)) { - errln((UnicodeString)"Fail: Can't construct set with " + pat); + dataerrln((UnicodeString)"Fail: Can't construct set with " + pat + " - " + UnicodeString(u_errorName(status))); + return; } else { expectContainment(set, pat, "ABC", "abc"); } @@ -321,7 +322,7 @@ UnicodeSetTest::TestCloneEqualHash(void) { UnicodeSet *set1=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Lowercase Letter}"), status); // :Ll: Letter, lowercase UnicodeSet *set1a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Ll:]"), status); // Letter, lowercase if (U_FAILURE(status)){ - errln((UnicodeString)"FAIL: Can't construst set with category->Ll"); + dataerrln((UnicodeString)"FAIL: Can't construst set with category->Ll" + " - " + UnicodeString(u_errorName(status))); return; } UnicodeSet *set2=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Decimal Number}"), status); //Number, Decimal digit @@ -690,12 +691,55 @@ void UnicodeSetTest::TestAPI() { if (set != exp) { errln("FAIL: retain('s')"); return; } uint16_t buf[32]; - int32_t slen = set.serialize(buf, sizeof(buf)/sizeof(buf[0]), status); + int32_t slen = set.serialize(buf, UPRV_LENGTHOF(buf), status); if (U_FAILURE(status)) { errln("FAIL: serialize"); return; } if (slen != 3 || buf[0] != 2 || buf[1] != 0x73 || buf[2] != 0x74) { errln("FAIL: serialize"); return; } + + // Conversions to and from USet + UnicodeSet *uniset = &set; + USet *uset = uniset->toUSet(); + TEST_ASSERT((void *)uset == (void *)uniset); + UnicodeSet *setx = UnicodeSet::fromUSet(uset); + TEST_ASSERT((void *)setx == (void *)uset); + const UnicodeSet *constSet = uniset; + const USet *constUSet = constSet->toUSet(); + TEST_ASSERT((void *)constUSet == (void *)constSet); + const UnicodeSet *constSetx = UnicodeSet::fromUSet(constUSet); + TEST_ASSERT((void *)constSetx == (void *)constUSet); + + // span(UnicodeString) and spanBack(UnicodeString) convenience methods + UnicodeString longString=UNICODE_STRING_SIMPLE("aaaaaaaaaabbbbbbbbbbcccccccccc"); + UnicodeSet ac(0x61, 0x63); + ac.remove(0x62).freeze(); + if( ac.span(longString, -5, USET_SPAN_CONTAINED)!=10 || + ac.span(longString, 0, USET_SPAN_CONTAINED)!=10 || + ac.span(longString, 5, USET_SPAN_CONTAINED)!=10 || + ac.span(longString, 10, USET_SPAN_CONTAINED)!=10 || + ac.span(longString, 15, USET_SPAN_CONTAINED)!=15 || + ac.span(longString, 20, USET_SPAN_CONTAINED)!=30 || + ac.span(longString, 25, USET_SPAN_CONTAINED)!=30 || + ac.span(longString, 30, USET_SPAN_CONTAINED)!=30 || + ac.span(longString, 35, USET_SPAN_CONTAINED)!=30 || + ac.span(longString, INT32_MAX, USET_SPAN_CONTAINED)!=30 + ) { + errln("UnicodeSet.span(UnicodeString, ...) returns incorrect end indexes"); + } + if( ac.spanBack(longString, -5, USET_SPAN_CONTAINED)!=0 || + ac.spanBack(longString, 0, USET_SPAN_CONTAINED)!=0 || + ac.spanBack(longString, 5, USET_SPAN_CONTAINED)!=0 || + ac.spanBack(longString, 10, USET_SPAN_CONTAINED)!=0 || + ac.spanBack(longString, 15, USET_SPAN_CONTAINED)!=15 || + ac.spanBack(longString, 20, USET_SPAN_CONTAINED)!=20 || + ac.spanBack(longString, 25, USET_SPAN_CONTAINED)!=20 || + ac.spanBack(longString, 30, USET_SPAN_CONTAINED)!=20 || + ac.spanBack(longString, 35, USET_SPAN_CONTAINED)!=20 || + ac.spanBack(longString, INT32_MAX, USET_SPAN_CONTAINED)!=20 + ) { + errln("UnicodeSet.spanBack(UnicodeString, ...) returns incorrect start indexes"); + } } void UnicodeSetTest::TestIteration() { @@ -850,6 +894,7 @@ void UnicodeSetTest::TestPropertySet() { "abc", "ABC", +#if !UCONFIG_NO_NORMALIZATION // Combining class: @since ICU 2.2 // Check both symbolic and numeric "\\p{ccc=Nukta}", @@ -863,6 +908,7 @@ void UnicodeSetTest::TestPropertySet() { "[:c c c = iota subscript :]", "\\u0345", "xyz", +#endif // Bidi class: @since ICU 2.2 "\\p{bidiclass=lefttoright}", @@ -974,6 +1020,7 @@ void UnicodeSetTest::TestPropertySet() { "abcd\\uDC00", "ef\\uD800\\U00010000", +#if !UCONFIG_NO_NORMALIZATION "[:^lccc=0:]", // Lead canonical class "\\u0300\\u0301", "abcd\\u00c0\\u00c5", @@ -993,13 +1040,25 @@ void UnicodeSetTest::TestPropertySet() { "[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not "\\u0F73\\u0F75\\u0F81", "abcd\\u0300\\u0301\\u00c0\\u00c5", +#endif /* !UCONFIG_NO_NORMALIZATION */ "[:Assigned:]", "A\\uE000\\uF8FF\\uFDC7\\U00010000\\U0010FFFD", - "\\u0888\\uFDD3\\uFFFE\\U00050005" + "\\u0888\\uFDD3\\uFFFE\\U00050005", + + // Script_Extensions, new in Unicode 6.0 + "[:scx=Arab:]", + "\\u061E\\u061F\\u0620\\u0621\\u063F\\u0640\\u0650\\u065E\\uFDF1\\uFDF2\\uFDF3", + "\\u061D\\uFDEF\\uFDFE", + + // U+FDF2 has Script=Arabic and also Arab in its Script_Extensions, + // so scx-sc is missing U+FDF2. + "[[:Script_Extensions=Arabic:]-[:Arab:]]", + "\\u0640\\u064B\\u0650\\u0655", + "\\uFDF2" }; - static const int32_t DATA_LEN = sizeof(DATA)/sizeof(DATA[0]); + static const int32_t DATA_LEN = UPRV_LENGTHOF(DATA); for (int32_t i=0; i " + exp); } else { - errln((UnicodeString)"FAIL: " + pat + ".closeOver(" + selector + ") => " + + dataerrln((UnicodeString)"FAIL: " + pat + ".closeOver(" + selector + ") => " + s.toPattern(buf, TRUE) + ", expected " + exp); } } @@ -1327,7 +1388,7 @@ void UnicodeSetTest::TestEscapePattern() { const char exp[] = "[\\u200A-\\u200E\\uFEFF\\U0001D173-\\U0001D17A\\U000F0000-\\U000FFFFD]"; // We test this with two passes; in the second pass we - // pre-unescape the pattern. Since U+200E is rule whitespace, + // pre-unescape the pattern. Since U+200E is Pattern_White_Space, // this fails -- which is what we expect. for (int32_t pass=1; pass<=2; ++pass) { UErrorCode ec = U_ZERO_ERROR; @@ -1401,7 +1462,7 @@ void UnicodeSetTest::TestInvalidCodePoint() { (UChar32)-1, 8, 0, 8, 8, 0x110000, 8, 0x10FFFF }; - const int32_t DATA_LENGTH = sizeof(DATA)/sizeof(DATA[0]); + const int32_t DATA_LENGTH = UPRV_LENGTHOF(DATA); UnicodeString pat; int32_t i; @@ -1427,6 +1488,7 @@ void UnicodeSetTest::TestInvalidCodePoint() { b = set.contains(start, end); b = set.containsNone(start, end); b = set.containsSome(start, end); + (void)b; // Suppress set but not used warning. /*int32_t index = set.indexOf(start);*/ @@ -1463,7 +1525,7 @@ void UnicodeSetTest::TestInvalidCodePoint() { (UChar32)-1, 0x110000 }; - const int32_t DATA2_LENGTH = sizeof(DATA2)/sizeof(DATA2[0]); + const int32_t DATA2_LENGTH = UPRV_LENGTHOF(DATA2); for (i=0; i " + u_errorName(ec)); return; } @@ -2091,14 +2207,14 @@ void UnicodeSetTest::TestFreezable() { UnicodeString idPattern=UNICODE_STRING("[:ID_Continue:]", 15); UnicodeSet idSet(idPattern, errorCode); if(U_FAILURE(errorCode)) { - errln("FAIL: unable to create UnicodeSet([:ID_Continue:]) - %s", u_errorName(errorCode)); + dataerrln("FAIL: unable to create UnicodeSet([:ID_Continue:]) - %s", u_errorName(errorCode)); return; } UnicodeString wsPattern=UNICODE_STRING("[:White_Space:]", 15); UnicodeSet wsSet(wsPattern, errorCode); if(U_FAILURE(errorCode)) { - errln("FAIL: unable to create UnicodeSet([:White_Space:]) - %s", u_errorName(errorCode)); + dataerrln("FAIL: unable to create UnicodeSet([:White_Space:]) - %s", u_errorName(errorCode)); return; } @@ -2259,7 +2375,7 @@ public: const UnicodeString *s; char *s8=utf8; int32_t length8, utf8Count=0; - while(iter.nextRange() && stringsLengthLENGTHOF(limits)) { + if(limitsCount>UPRV_LENGTHOF(limits)) { errln("FAIL: %s[0x%lx].%s.%s span count=%ld > %ld capacity - too many spans", - testName, (long)index, setNames[i], typeName, (long)limitsCount, (long)LENGTHOF(limits)); + testName, (long)index, setNames[i], typeName, (long)limitsCount, (long)UPRV_LENGTHOF(limits)); return; } memcpy(expectLimits, limits, limitsCount*4); @@ -3238,7 +3333,7 @@ void UnicodeSetTest::testSpanContents(const UnicodeSetWithStrings *sets[4], uint UChar32 c, first; for(first=c=0;; c=nextCodePoint(c)) { - if(c>0x10ffff || length>(LENGTHOF(s)-U16_MAX_LENGTH)) { + if(c>0x10ffff || length>(UPRV_LENGTHOF(s)-U16_MAX_LENGTH)) { localWhichSpans=whichSpans; if(stringContainsUnpairedSurrogate(s, length) && inconsistentSurrogates) { localWhichSpans&=~SPAN_UTF8; @@ -3274,7 +3369,7 @@ void UnicodeSetTest::testSpanUTF16String(const UnicodeSetWithStrings *sets[4], u return; } testSpan(sets, s, -1, TRUE, (whichSpans&~SPAN_UTF8), testName, 0); - testSpan(sets, s, LENGTHOF(s)-1, TRUE, (whichSpans&~SPAN_UTF8), testName, 1); + testSpan(sets, s, UPRV_LENGTHOF(s)-1, TRUE, (whichSpans&~SPAN_UTF8), testName, 1); } void UnicodeSetTest::testSpanUTF8String(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName) { @@ -3371,7 +3466,7 @@ void UnicodeSetTest::testSpanUTF8String(const UnicodeSetWithStrings *sets[4], ui return; } testSpan(sets, s, -1, FALSE, (whichSpans&~SPAN_UTF16), testName, 0); - testSpan(sets, s, LENGTHOF(s)-1, FALSE, (whichSpans&~SPAN_UTF16), testName, 1); + testSpan(sets, s, UPRV_LENGTHOF(s)-1, FALSE, (whichSpans&~SPAN_UTF16), testName, 1); } // Take a set of span options and multiply them so that @@ -3573,7 +3668,7 @@ void UnicodeSetTest::TestSpan() { char *testNameLimit=testName; int32_t i, j; - for(i=0; ic:\Program Files (x86)\Microsoft SDKs\Windows\v7.0A\include\driverspecs.h(142): error C2008: '$' : unexpected in macro definition + * .. so, we skip this test on Windows. + * + * the cause is that intltest builds with /Za which disables language extensions - which means + * windows header files can't be used. + */ +#if !UCONFIG_NO_COLLATION && !U_PLATFORM_HAS_WIN32_API +#include "collationroot.h" +#include "collationtailoring.h" +#endif + +void UnicodeSetTest::TestUCAUnsafeBackwards() { +#if U_PLATFORM_HAS_WIN32_API + infoln("Skipping TestUCAUnsafeBackwards() - can't include collationroot.h on Windows without language extensions!"); +#elif !UCONFIG_NO_COLLATION + UErrorCode errorCode = U_ZERO_ERROR; + + // Get the unsafeBackwardsSet + const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode); + if(U_FAILURE(errorCode)) { + dataerrln("FAIL: %s getting root cache entry", u_errorName(errorCode)); + return; + } + //const UVersionInfo &version = rootEntry->tailoring->version; + const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet; + + checkSerializeRoundTrip(*unsafeBackwardSet, errorCode); + + if(!logKnownIssue("11891","UnicodeSet fails to round trip on CollationRoot...unsafeBackwards set")) { + // simple test case + // TODO(ticket #11891): Simplify this test function to this simple case. Rename it appropriately. + // TODO(ticket #11891): Port test to Java. Is this a bug there, too? + UnicodeSet surrogates; + surrogates.add(0xd83a); // a lead surrogate + surrogates.add(0xdc00, 0xdfff); // a range of trail surrogates + UnicodeString pat; + surrogates.toPattern(pat, FALSE); // bad: [ 0xd83a, 0xdc00, 0x2d, 0xdfff ] + // TODO: Probably fix either UnicodeSet::_generatePattern() or _appendToPat() + // so that at least one type of surrogate code points are escaped, + // or (minimally) so that adjacent lead+trail surrogate code points are escaped. + errorCode = U_ZERO_ERROR; + UnicodeSet s2; + s2.applyPattern(pat, errorCode); // looks like invalid range [ 0x1e800, 0x2d, 0xdfff ] + if(U_FAILURE(errorCode)) { + errln("FAIL: surrogates to/from pattern - %s", u_errorName(errorCode)); + } else { + checkEqual(surrogates, s2, "surrogates to/from pattern"); + } + // This occurs in the UCA unsafe-backwards set. + checkRoundTrip(*unsafeBackwardSet); + } +#endif +}