]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/test/intltest/normconf.cpp
ICU-64232.0.1.tar.gz
[apple/icu.git] / icuSources / test / intltest / normconf.cpp
index 10e7ae89ceef2af62f6492de3c3a3fdc9e4f59d7..7c574ddb3edcbdd052447103877f4bf9e6ec79fc 100644 (file)
@@ -1,6 +1,8 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
 /*
 ************************************************************************
-* Copyright (c) 1997-2008, International Business Machines
+* Copyright (c) 1997-2016, International Business Machines
 * Corporation and others.  All Rights Reserved.
 ************************************************************************
 */
 
 #if !UCONFIG_NO_NORMALIZATION
 
+#include <string>
+#include "unicode/bytestream.h"
+#include "unicode/edits.h"
 #include "unicode/uchar.h"
+#include "unicode/normalizer2.h"
 #include "unicode/normlzr.h"
 #include "unicode/uniset.h"
 #include "unicode/putil.h"
-#include "unormimp.h"
+#include "cmemory.h"
 #include "cstring.h"
 #include "filestrm.h"
 #include "normconf.h"
+#include "uassert.h"
 #include <stdio.h>
 
-#define ARRAY_LENGTH(array) (sizeof(array) / sizeof(array[0]))
-
-#define CASE(id,test) case id:                          \
-                          name = #test;                 \
-                          if (exec) {                   \
-                              logln(#test "---");       \
-                              logln((UnicodeString)""); \
-                              test();                   \
-                          }                             \
-                          break
-
 void NormalizerConformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/) {
-    switch (index) {
-        CASE(0, TestConformance);
-        CASE(1, TestConformance32);
-        // CASE(2, TestCase6);
-        default: name = ""; break;
-    }
+    TESTCASE_AUTO_BEGIN;
+    TESTCASE_AUTO(TestConformance);
+    TESTCASE_AUTO(TestConformance32);
+    TESTCASE_AUTO(TestCase6);
+    TESTCASE_AUTO_END;
 }
 
 #define FIELD_COUNT 5
 
 NormalizerConformanceTest::NormalizerConformanceTest() :
-    normalizer(UnicodeString(), UNORM_NFC) {}
+        normalizer(UnicodeString(), UNORM_NFC) {
+    UErrorCode errorCode = U_ZERO_ERROR;
+    nfc = Normalizer2::getNFCInstance(errorCode);
+    nfd = Normalizer2::getNFDInstance(errorCode);
+    nfkc = Normalizer2::getNFKCInstance(errorCode);
+    nfkd = Normalizer2::getNFKDInstance(errorCode);
+    assertSuccess("", errorCode, true, __FILE__, __LINE__);
+}
 
 NormalizerConformanceTest::~NormalizerConformanceTest() {}
 
@@ -141,7 +143,7 @@ NormalizerConformanceTest::openNormalizationTestFile(const char *filename) {
     }
 #endif
 
-    dataerrln("[DATA] Failed to open %s", filename);
+    dataerrln("Failed to open %s", filename);
     return NULL;
 }
 
@@ -173,7 +175,7 @@ void NormalizerConformanceTest::TestConformance(FileStream *input, int32_t optio
     // UnicodeSet for all code points that are not mentioned in NormalizationTest.txt
     UnicodeSet other(0, 0x10ffff);
 
-    int32_t count, countMoreCases = sizeof(moreCases)/sizeof(moreCases[0]);
+    int32_t count, countMoreCases = UPRV_LENGTHOF(moreCases);
     for (count = 1;;++count) {
         if (!T_FileStream_eof(input)) {
             T_FileStream_readLine(input, lineBuf, (int32_t)sizeof(lineBuf));
@@ -225,7 +227,7 @@ void NormalizerConformanceTest::TestConformance(FileStream *input, int32_t optio
         } else {
             ++failCount;
             if(status == U_FILE_ACCESS_ERROR) {
-              errln("Something is wrong with the normalizer, skipping the rest of the test.");
+              dataerrln("Something is wrong with the normalizer, skipping the rest of the test.");
               break;
             }
         }
@@ -261,7 +263,7 @@ void NormalizerConformanceTest::TestConformance(FileStream *input, int32_t optio
         } else {
             ++failCount;
             if(status == U_FILE_ACCESS_ERROR) {
-              errln("Something is wrong with the normalizer, skipping the rest of the test.");
+              dataerrln("Something is wrong with the normalizer, skipping the rest of the test.: %s", u_errorName(status));
               break;
             }
         }
@@ -271,13 +273,22 @@ void NormalizerConformanceTest::TestConformance(FileStream *input, int32_t optio
     }
 
     if (failCount != 0) {
-        errln((UnicodeString)"Total: " + failCount + " lines/code points failed, " +
+        dataerrln((UnicodeString)"Total: " + failCount + " lines/code points failed, " +
               passCount + " lines/code points passed");
     } else {
         logln((UnicodeString)"Total: " + passCount + " lines/code points passed");
     }
 }
 
+namespace {
+
+UBool isNormalizedUTF8(const Normalizer2 &norm2, const UnicodeString &s, UErrorCode &errorCode) {
+    std::string s8;
+    return norm2.isNormalizedUTF8(s.toUTF8String(s8), errorCode);
+}
+
+}  // namespace
+
 /**
  * Verify the conformance of the given line of the Unicode
  * normalization (UTR 15) test suite file.  For each line,
@@ -298,40 +309,17 @@ UBool NormalizerConformanceTest::checkConformance(const UnicodeString* field,
                                                   int32_t options,
                                                   UErrorCode &status) {
     UBool pass = TRUE, result;
-    //UErrorCode status = U_ZERO_ERROR;
     UnicodeString out, fcd;
     int32_t fieldNum;
 
     for (int32_t i=0; i<FIELD_COUNT; ++i) {
         fieldNum = i+1;
         if (i<3) {
-            Normalizer::normalize(field[i], UNORM_NFC, options, out, status);
-            pass &= assertEqual("C", field[i], out, field[1], "c2!=C(c", fieldNum);
-            iterativeNorm(field[i], UNORM_NFC, options, out, +1);
-            pass &= assertEqual("C(+1)", field[i], out, field[1], "c2!=C(c", fieldNum);
-            iterativeNorm(field[i], UNORM_NFC, options, out, -1);
-            pass &= assertEqual("C(-1)", field[i], out, field[1], "c2!=C(c", fieldNum);
-
-            Normalizer::normalize(field[i], UNORM_NFD, options, out, status);
-            pass &= assertEqual("D", field[i], out, field[2], "c3!=D(c", fieldNum);
-            iterativeNorm(field[i], UNORM_NFD, options, out, +1);
-            pass &= assertEqual("D(+1)", field[i], out, field[2], "c3!=D(c", fieldNum);
-            iterativeNorm(field[i], UNORM_NFD, options, out, -1);
-            pass &= assertEqual("D(-1)", field[i], out, field[2], "c3!=D(c", fieldNum);
+            pass &= checkNorm(UNORM_NFC, options, nfc, field[i], field[1], fieldNum);
+            pass &= checkNorm(UNORM_NFD, options, nfd, field[i], field[2], fieldNum);
         }
-        Normalizer::normalize(field[i], UNORM_NFKC, options, out, status);
-        pass &= assertEqual("KC", field[i], out, field[3], "c4!=KC(c", fieldNum);
-        iterativeNorm(field[i], UNORM_NFKC, options, out, +1);
-        pass &= assertEqual("KC(+1)", field[i], out, field[3], "c4!=KC(c", fieldNum);
-        iterativeNorm(field[i], UNORM_NFKC, options, out, -1);
-        pass &= assertEqual("KC(-1)", field[i], out, field[3], "c4!=KC(c", fieldNum);
-
-        Normalizer::normalize(field[i], UNORM_NFKD, options, out, status);
-        pass &= assertEqual("KD", field[i], out, field[4], "c5!=KD(c", fieldNum);
-        iterativeNorm(field[i], UNORM_NFKD, options, out, +1);
-        pass &= assertEqual("KD(+1)", field[i], out, field[4], "c5!=KD(c", fieldNum);
-        iterativeNorm(field[i], UNORM_NFKD, options, out, -1);
-        pass &= assertEqual("KD(-1)", field[i], out, field[4], "c5!=KD(c", fieldNum);
+        pass &= checkNorm(UNORM_NFKC, options, nfkc, field[i], field[3], fieldNum);
+        pass &= checkNorm(UNORM_NFKD, options, nfkd, field[i], field[4], fieldNum);
     }
     compare(field[1],field[2]);
     compare(field[0],field[1]);
@@ -360,20 +348,41 @@ UBool NormalizerConformanceTest::checkConformance(const UnicodeString* field,
         result = Normalizer::isNormalized(field[1], UNORM_NFC, options, status);
     }
     if(!result) {
-        errln("Normalizer error: isNormalized(NFC(s), UNORM_NFC) is FALSE");
+        dataerrln("Normalizer error: isNormalized(NFC(s), UNORM_NFC) is FALSE");
         pass = FALSE;
     }
-    if(field[0]!=field[1] && Normalizer::isNormalized(field[0], UNORM_NFC, options, status)) {
-        errln("Normalizer error: isNormalized(s, UNORM_NFC) is TRUE");
+    if(options==0 && !isNormalizedUTF8(*nfc, field[1], status)) {
+        dataerrln("Normalizer error: nfc.isNormalizedUTF8(NFC(s)) is FALSE");
         pass = FALSE;
     }
-    if(!Normalizer::isNormalized(field[3], UNORM_NFKC, options, status)) {
-        errln("Normalizer error: isNormalized(NFKC(s), UNORM_NFKC) is FALSE");
-        pass = FALSE;
+    if(field[0]!=field[1]) {
+        if(Normalizer::isNormalized(field[0], UNORM_NFC, options, status)) {
+            errln("Normalizer error: isNormalized(s, UNORM_NFC) is TRUE");
+            pass = FALSE;
+        }
+        if(isNormalizedUTF8(*nfc, field[0], status)) {
+            errln("Normalizer error: nfc.isNormalizedUTF8(s) is TRUE");
+            pass = FALSE;
+        }
     }
-    if(field[0]!=field[3] && Normalizer::isNormalized(field[0], UNORM_NFKC, options, status)) {
-        errln("Normalizer error: isNormalized(s, UNORM_NFKC) is TRUE");
+    if(!Normalizer::isNormalized(field[3], UNORM_NFKC, options, status)) {
+        dataerrln("Normalizer error: isNormalized(NFKC(s), UNORM_NFKC) is FALSE");
         pass = FALSE;
+    } else {
+        if(options==0 && !isNormalizedUTF8(*nfkc, field[3], status)) {
+            dataerrln("Normalizer error: nfkc.isNormalizedUTF8(NFKC(s)) is FALSE");
+            pass = FALSE;
+        }
+        if(field[0]!=field[3]) {
+            if(Normalizer::isNormalized(field[0], UNORM_NFKC, options, status)) {
+                errln("Normalizer error: isNormalized(s, UNORM_NFKC) is TRUE");
+                pass = FALSE;
+            }
+            if(options==0 && isNormalizedUTF8(*nfkc, field[0], status)) {
+                errln("Normalizer error: nfkc.isNormalizedUTF8(s) is TRUE");
+                pass = FALSE;
+            }
+        }
     }
 
     // test FCD quick check and "makeFCD"
@@ -393,12 +402,12 @@ UBool NormalizerConformanceTest::checkConformance(const UnicodeString* field,
 
     Normalizer::normalize(fcd, UNORM_NFD, options, out, status);
     if(out != field[2]) {
-        errln("Normalizer error: NFD(FCD(s))!=NFD(s)");
+        dataerrln("Normalizer error: NFD(FCD(s))!=NFD(s)");
         pass = FALSE;
     }
 
     if (U_FAILURE(status)) {
-        errln("Normalizer::normalize returned error status");
+        dataerrln("Normalizer::normalize returned error status: %s", u_errorName(status));
         pass = FALSE;
     }
 
@@ -412,7 +421,7 @@ UBool NormalizerConformanceTest::checkConformance(const UnicodeString* field,
         status=U_ZERO_ERROR;
         rc=Normalizer::compare(field[0], field[2], (options<<UNORM_COMPARE_NORM_OPTIONS_SHIFT)|U_COMPARE_IGNORE_CASE, status);
         if(U_FAILURE(status)) {
-            errln("Normalizer::compare(case-insensitive) sets %s", u_errorName(status));
+            dataerrln("Normalizer::compare(case-insensitive) sets %s", u_errorName(status));
             pass=FALSE;
         } else if(rc!=0) {
             errln("Normalizer::compare(original, NFD, case-insensitive) returned %d instead of 0 for equal", rc);
@@ -421,8 +430,83 @@ UBool NormalizerConformanceTest::checkConformance(const UnicodeString* field,
     }
 
     if (!pass) {
-        errln("FAIL: %s", line);
+        dataerrln("FAIL: %s", line);
+    }
+    return pass;
+}
+
+static const char *const kModeStrings[UNORM_MODE_COUNT] = {
+    "?", "none", "D", "KD", "C", "KC", "FCD"
+};
+
+static const char *const kMessages[UNORM_MODE_COUNT] = {
+    "?!=?", "?!=?", "c3!=D(c%d)", "c5!=KC(c%d)", "c2!=C(c%d)", "c4!=KC(c%d)", "FCD"
+};
+
+UBool NormalizerConformanceTest::checkNorm(UNormalizationMode mode, int32_t options,
+                                           const Normalizer2 *norm2,
+                                           const UnicodeString &s, const UnicodeString &exp,
+                                           int32_t field) {
+    const char *modeString = kModeStrings[mode];
+    char msg[20];
+    snprintf(msg, sizeof(msg), kMessages[mode], field);
+    UnicodeString out;
+    UErrorCode errorCode = U_ZERO_ERROR;
+    Normalizer::normalize(s, mode, options, out, errorCode);
+    if (U_FAILURE(errorCode)) {
+        dataerrln("Error running normalize UNORM_NF%s: %s", modeString, u_errorName(errorCode));
+        return FALSE;
+    }
+    if (!assertEqual(modeString, "", s, out, exp, msg)) {
+        return FALSE;
+    }
+
+    iterativeNorm(s, mode, options, out, +1);
+    if (!assertEqual(modeString, "(+1)", s, out, exp, msg)) {
+        return FALSE;
+    }
+
+    iterativeNorm(s, mode, options, out, -1);
+    if (!assertEqual(modeString, "(-1)", s, out, exp, msg)) {
+        return FALSE;
+    }
+
+    if (norm2 == nullptr || options != 0) {
+        return TRUE;
+    }
+
+    std::string s8;
+    s.toUTF8String(s8);
+    std::string exp8;
+    exp.toUTF8String(exp8);
+    std::string out8;
+    Edits edits;
+    Edits *editsPtr = (mode == UNORM_NFC || mode == UNORM_NFKC) ? &edits : nullptr;
+    StringByteSink<std::string> sink(&out8, static_cast<int32_t>(exp8.length()));
+    norm2->normalizeUTF8(0, s8, sink, editsPtr, errorCode);
+    if (U_FAILURE(errorCode)) {
+        errln("Normalizer2.%s.normalizeUTF8(%s) failed: %s",
+              modeString, s8.c_str(), u_errorName(errorCode));
+        return FALSE;
+    }
+    if (out8 != exp8) {
+        errln("Normalizer2.%s.normalizeUTF8(%s)=%s != %s",
+              modeString, s8.c_str(), out8.c_str(), exp8.c_str());
+        return FALSE;
+    }
+    if (editsPtr == nullptr) {
+        return TRUE;
     }
+
+    // Do the Edits cover the entire input & output?
+    UBool pass = TRUE;
+    pass &= assertEquals("edits.hasChanges()", (UBool)(s8 != out8), edits.hasChanges());
+    pass &= assertEquals("edits.lengthDelta()",
+                         (int32_t)(out8.length() - s8.length()), edits.lengthDelta());
+    Edits::Iterator iter = edits.getCoarseIterator();
+    while (iter.next(errorCode)) {}
+    pass &= assertEquals("edits source length", static_cast<int32_t>(s8.length()), iter.sourceIndex());
+    pass &= assertEquals("edits destination length", static_cast<int32_t>(out8.length()), iter.destinationIndex());
     return pass;
 }
 
@@ -457,21 +541,11 @@ void NormalizerConformanceTest::iterativeNorm(const UnicodeString& str,
     }
 }
 
-/**
- * @param op name of normalization form, e.g., "KC"
- * @param s string being normalized
- * @param got value received
- * @param exp expected value
- * @param msg description of this test
- * @param return true if got == exp
- */
-UBool NormalizerConformanceTest::assertEqual(const char *op,
+UBool NormalizerConformanceTest::assertEqual(const char *op, const char *op2,
                                              const UnicodeString& s,
                                              const UnicodeString& got,
                                              const UnicodeString& exp,
-                                             const char *msg,
-                                             int32_t field)
-{
+                                             const char *msg) {
     if (exp == got)
         return TRUE;
 
@@ -491,7 +565,7 @@ UBool NormalizerConformanceTest::assertEqual(const char *op,
     expPretty.extract(0, expPretty.length(), expChars, expPretty.length() + 1);
     expChars[expPretty.length()] = 0;
 
-    errln("    %s%d)%s(%s)=%s, exp. %s", msg, field, op, sChars, gotChars, expChars);
+    errln("    %s: %s%s(%s)=%s, exp. %s", msg, op, op2, sChars, gotChars, expChars);
 
     delete []sChars;
     delete []gotChars;