]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/test/intltest/tstnorm.cpp
ICU-511.32.tar.gz
[apple/icu.git] / icuSources / test / intltest / tstnorm.cpp
index 8b6391c19c7957b3d00ac72fb26bbef0421eb0f0..cde6dbe88c77ab4a58fbae68a014d24ce2e69fb9 100644 (file)
@@ -1,6 +1,6 @@
 /********************************************************************
  * COPYRIGHT: 
- * Copyright (c) 1997-2010, International Business Machines Corporation and
+ * Copyright (c) 1997-2011, International Business Machines Corporation and
  * others. All Rights Reserved.
  ********************************************************************/
 
@@ -14,6 +14,7 @@
 #include "unicode/uniset.h"
 #include "unicode/usetiter.h"
 #include "unicode/schriter.h"
+#include "unicode/utf16.h"
 #include "cstring.h"
 #include "normalizer2impl.h"
 #include "tstnorm.h"
@@ -688,8 +689,8 @@ void
 BasicNormalizerTest::TestPreviousNext() {
     // src and expect strings
     static const UChar src[]={
-        UTF16_LEAD(0x2f999), UTF16_TRAIL(0x2f999),
-        UTF16_LEAD(0x1d15f), UTF16_TRAIL(0x1d15f),
+        U16_LEAD(0x2f999), U16_TRAIL(0x2f999),
+        U16_LEAD(0x1d15f), U16_TRAIL(0x1d15f),
         0xc4,
         0x1ed0
     };
@@ -711,7 +712,7 @@ BasicNormalizerTest::TestPreviousNext() {
 
     // src and expect strings for regression test for j2911
     static const UChar src_j2911[]={
-        UTF16_LEAD(0x2f999), UTF16_TRAIL(0x2f999),
+        U16_LEAD(0x2f999), U16_TRAIL(0x2f999),
         0xdd00, 0xd900, // unpaired surrogates - regression test for j2911
         0xc4,
         0x4f, 0x302, 0x301
@@ -1166,6 +1167,23 @@ BasicNormalizerTest::TestCompare() {
         errln("NFC.getDecomposition() returns TRUE for characters which do not have decompositions");
     }
 
+    // test getRawDecomposition() for some characters that do not decompose
+    if( nfcNorm2->getRawDecomposition(0x20, s2) ||
+        nfcNorm2->getRawDecomposition(0x4e00, s2) ||
+        nfcNorm2->getRawDecomposition(0x20002, s2)
+    ) {
+        errln("NFC.getRawDecomposition() returns TRUE for characters which do not have decompositions");
+    }
+
+    // test composePair() for some pairs of characters that do not compose
+    if( nfcNorm2->composePair(0x20, 0x301)>=0 ||
+        nfcNorm2->composePair(0x61, 0x305)>=0 ||
+        nfcNorm2->composePair(0x1100, 0x1160)>=0 ||
+        nfcNorm2->composePair(0xac00, 0x11a7)>=0
+    ) {
+        errln("NFC.composePair() incorrectly composes some pairs of characters");
+    }
+
     // test FilteredNormalizer2::getDecomposition()
     UnicodeSet filter(UNICODE_STRING_SIMPLE("[^\\u00a0-\\u00ff]"), errorCode);
     FilteredNormalizer2 fn2(*nfcNorm2, filter);
@@ -1174,6 +1192,20 @@ BasicNormalizerTest::TestCompare() {
     ) {
         errln("FilteredNormalizer2(NFC, ^A0-FF).getDecomposition() failed");
     }
+
+    // test FilteredNormalizer2::getRawDecomposition()
+    if( fn2.getRawDecomposition(0xe4, s1) || !fn2.getRawDecomposition(0x100, s2) ||
+        s2.length()!=2 || s2[0]!=0x41 || s2[1]!=0x304
+    ) {
+        errln("FilteredNormalizer2(NFC, ^A0-FF).getRawDecomposition() failed");
+    }
+
+    // test FilteredNormalizer2::composePair()
+    if( 0x100!=fn2.composePair(0x41, 0x304) ||
+        fn2.composePair(0xc7, 0x301)>=0 // unfiltered result: U+1E08
+    ) {
+        errln("FilteredNormalizer2(NFC, ^A0-FF).composePair() failed");
+    }
 }
 
 // verify that case-folding does not un-FCD strings
@@ -1314,7 +1346,7 @@ initExpectedSkippables(UnicodeSet skipSets[UNORM_MODE_COUNT], UErrorCode &errorC
 
     // For each character about which we are unsure, see if it changes when we add
     // one of the back-combining characters.
-    const Normalizer2 *norm2=Normalizer2::getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode);
+    const Normalizer2 *norm2=Normalizer2::getNFCInstance(errorCode);
     UnicodeString s;
     iter.reset(*unsure);
     while(iter.next()) {
@@ -1458,9 +1490,7 @@ BasicNormalizerTest::TestFilteredNormalizer2Coverage() {
         dataerrln("Normalizer2Factory::getNFCInstance() call failed - %s", u_errorName(status));
         return;
     }
-    UnicodeSet filter(UNICODE_STRING_SIMPLE("[^\\u00a0-\\u00ff]"), errorCode);
-    UnicodeString newString1 = UNICODE_STRING_SIMPLE("[^\\u0100-\\u01ff]");
-    UnicodeString newString2 = UNICODE_STRING_SIMPLE("[^\\u0200-\\u02ff]");
+    UnicodeSet filter(UNICODE_STRING_SIMPLE("[^\\u00a0-\\u00ff\\u0310-\\u031f]"), errorCode);
     FilteredNormalizer2 fn2(*nfcNorm2, filter);
 
     UChar32 char32 = 0x0054;
@@ -1473,6 +1503,20 @@ BasicNormalizerTest::TestFilteredNormalizer2Coverage() {
         errln("FilteredNormalizer2.hasBoundaryAfter() failed.");
     }
 
+    UChar32 c;
+    for(c=0; c<=0x3ff; ++c) {
+        uint8_t expectedCC= filter.contains(c) ? nfcNorm2->getCombiningClass(c) : 0;
+        uint8_t cc=fn2.getCombiningClass(c);
+        if(cc!=expectedCC) {
+            errln(
+                UnicodeString("FilteredNormalizer2(NFC, ^A0-FF,310-31F).getCombiningClass(U+")+
+                hex(c)+
+                ")==filtered NFC.getCC()");
+        }
+    }
+
+    UnicodeString newString1 = UNICODE_STRING_SIMPLE("[^\\u0100-\\u01ff]");
+    UnicodeString newString2 = UNICODE_STRING_SIMPLE("[^\\u0200-\\u02ff]");
     fn2.append(newString1, newString2, errorCode);
     if (U_FAILURE(errorCode)) {
         errln("FilteredNormalizer2.append() failed.");