X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/729e4ab9bc6618bc3d8a898e575df7f4019e29ca..e4f10fab0c078f399c9deef476d9c9b73b47dff8:/icuSources/test/intltest/tstnorm.cpp diff --git a/icuSources/test/intltest/tstnorm.cpp b/icuSources/test/intltest/tstnorm.cpp index 8b6391c1..cde6dbe8 100644 --- a/icuSources/test/intltest/tstnorm.cpp +++ b/icuSources/test/intltest/tstnorm.cpp @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2010, International Business Machines Corporation and + * Copyright (c) 1997-2011, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ @@ -14,6 +14,7 @@ #include "unicode/uniset.h" #include "unicode/usetiter.h" #include "unicode/schriter.h" +#include "unicode/utf16.h" #include "cstring.h" #include "normalizer2impl.h" #include "tstnorm.h" @@ -688,8 +689,8 @@ void BasicNormalizerTest::TestPreviousNext() { // src and expect strings static const UChar src[]={ - UTF16_LEAD(0x2f999), UTF16_TRAIL(0x2f999), - UTF16_LEAD(0x1d15f), UTF16_TRAIL(0x1d15f), + U16_LEAD(0x2f999), U16_TRAIL(0x2f999), + U16_LEAD(0x1d15f), U16_TRAIL(0x1d15f), 0xc4, 0x1ed0 }; @@ -711,7 +712,7 @@ BasicNormalizerTest::TestPreviousNext() { // src and expect strings for regression test for j2911 static const UChar src_j2911[]={ - UTF16_LEAD(0x2f999), UTF16_TRAIL(0x2f999), + U16_LEAD(0x2f999), U16_TRAIL(0x2f999), 0xdd00, 0xd900, // unpaired surrogates - regression test for j2911 0xc4, 0x4f, 0x302, 0x301 @@ -1166,6 +1167,23 @@ BasicNormalizerTest::TestCompare() { errln("NFC.getDecomposition() returns TRUE for characters which do not have decompositions"); } + // test getRawDecomposition() for some characters that do not decompose + if( nfcNorm2->getRawDecomposition(0x20, s2) || + nfcNorm2->getRawDecomposition(0x4e00, s2) || + nfcNorm2->getRawDecomposition(0x20002, s2) + ) { + errln("NFC.getRawDecomposition() returns TRUE for characters which do not have decompositions"); + } + + // test composePair() for some pairs of characters that do not compose + if( nfcNorm2->composePair(0x20, 0x301)>=0 || + nfcNorm2->composePair(0x61, 0x305)>=0 || + nfcNorm2->composePair(0x1100, 0x1160)>=0 || + nfcNorm2->composePair(0xac00, 0x11a7)>=0 + ) { + errln("NFC.composePair() incorrectly composes some pairs of characters"); + } + // test FilteredNormalizer2::getDecomposition() UnicodeSet filter(UNICODE_STRING_SIMPLE("[^\\u00a0-\\u00ff]"), errorCode); FilteredNormalizer2 fn2(*nfcNorm2, filter); @@ -1174,6 +1192,20 @@ BasicNormalizerTest::TestCompare() { ) { errln("FilteredNormalizer2(NFC, ^A0-FF).getDecomposition() failed"); } + + // test FilteredNormalizer2::getRawDecomposition() + if( fn2.getRawDecomposition(0xe4, s1) || !fn2.getRawDecomposition(0x100, s2) || + s2.length()!=2 || s2[0]!=0x41 || s2[1]!=0x304 + ) { + errln("FilteredNormalizer2(NFC, ^A0-FF).getRawDecomposition() failed"); + } + + // test FilteredNormalizer2::composePair() + if( 0x100!=fn2.composePair(0x41, 0x304) || + fn2.composePair(0xc7, 0x301)>=0 // unfiltered result: U+1E08 + ) { + errln("FilteredNormalizer2(NFC, ^A0-FF).composePair() failed"); + } } // verify that case-folding does not un-FCD strings @@ -1314,7 +1346,7 @@ initExpectedSkippables(UnicodeSet skipSets[UNORM_MODE_COUNT], UErrorCode &errorC // For each character about which we are unsure, see if it changes when we add // one of the back-combining characters. - const Normalizer2 *norm2=Normalizer2::getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode); + const Normalizer2 *norm2=Normalizer2::getNFCInstance(errorCode); UnicodeString s; iter.reset(*unsure); while(iter.next()) { @@ -1458,9 +1490,7 @@ BasicNormalizerTest::TestFilteredNormalizer2Coverage() { dataerrln("Normalizer2Factory::getNFCInstance() call failed - %s", u_errorName(status)); return; } - UnicodeSet filter(UNICODE_STRING_SIMPLE("[^\\u00a0-\\u00ff]"), errorCode); - UnicodeString newString1 = UNICODE_STRING_SIMPLE("[^\\u0100-\\u01ff]"); - UnicodeString newString2 = UNICODE_STRING_SIMPLE("[^\\u0200-\\u02ff]"); + UnicodeSet filter(UNICODE_STRING_SIMPLE("[^\\u00a0-\\u00ff\\u0310-\\u031f]"), errorCode); FilteredNormalizer2 fn2(*nfcNorm2, filter); UChar32 char32 = 0x0054; @@ -1473,6 +1503,20 @@ BasicNormalizerTest::TestFilteredNormalizer2Coverage() { errln("FilteredNormalizer2.hasBoundaryAfter() failed."); } + UChar32 c; + for(c=0; c<=0x3ff; ++c) { + uint8_t expectedCC= filter.contains(c) ? nfcNorm2->getCombiningClass(c) : 0; + uint8_t cc=fn2.getCombiningClass(c); + if(cc!=expectedCC) { + errln( + UnicodeString("FilteredNormalizer2(NFC, ^A0-FF,310-31F).getCombiningClass(U+")+ + hex(c)+ + ")==filtered NFC.getCC()"); + } + } + + UnicodeString newString1 = UNICODE_STRING_SIMPLE("[^\\u0100-\\u01ff]"); + UnicodeString newString2 = UNICODE_STRING_SIMPLE("[^\\u0200-\\u02ff]"); fn2.append(newString1, newString2, errorCode); if (U_FAILURE(errorCode)) { errln("FilteredNormalizer2.append() failed.");