+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
************************************************************************
-* Copyright (c) 1997-2010, International Business Machines
+* Copyright (c) 1997-2016, International Business Machines
* Corporation and others. All Rights Reserved.
************************************************************************
*/
#if !UCONFIG_NO_NORMALIZATION
+#include <string>
+#include "unicode/bytestream.h"
+#include "unicode/edits.h"
#include "unicode/uchar.h"
+#include "unicode/normalizer2.h"
#include "unicode/normlzr.h"
#include "unicode/uniset.h"
#include "unicode/putil.h"
+#include "cmemory.h"
#include "cstring.h"
#include "filestrm.h"
#include "normconf.h"
+#include "uassert.h"
#include <stdio.h>
-#define ARRAY_LENGTH(array) (sizeof(array) / sizeof(array[0]))
-
-#define CASE(id,test,exec) case id: \
- name = #test; \
- if (exec) { \
- logln(#test "---"); \
- logln((UnicodeString)""); \
- test(); \
- } \
- break
-
void NormalizerConformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/) {
- switch (index) {
- CASE(0, TestConformance, exec);
-#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
- CASE(1, TestConformance32, exec);
-#endif
- // CASE(2, TestCase6);
- default: name = ""; break;
- }
+ TESTCASE_AUTO_BEGIN;
+ TESTCASE_AUTO(TestConformance);
+ TESTCASE_AUTO(TestConformance32);
+ TESTCASE_AUTO(TestCase6);
+ TESTCASE_AUTO_END;
}
#define FIELD_COUNT 5
NormalizerConformanceTest::NormalizerConformanceTest() :
- normalizer(UnicodeString(), UNORM_NFC) {}
+ normalizer(UnicodeString(), UNORM_NFC) {
+ UErrorCode errorCode = U_ZERO_ERROR;
+ nfc = Normalizer2::getNFCInstance(errorCode);
+ nfd = Normalizer2::getNFDInstance(errorCode);
+ nfkc = Normalizer2::getNFKCInstance(errorCode);
+ nfkd = Normalizer2::getNFKDInstance(errorCode);
+ assertSuccess("", errorCode, true, __FILE__, __LINE__);
+}
NormalizerConformanceTest::~NormalizerConformanceTest() {}
// UnicodeSet for all code points that are not mentioned in NormalizationTest.txt
UnicodeSet other(0, 0x10ffff);
- int32_t count, countMoreCases = sizeof(moreCases)/sizeof(moreCases[0]);
+ int32_t count, countMoreCases = UPRV_LENGTHOF(moreCases);
for (count = 1;;++count) {
if (!T_FileStream_eof(input)) {
T_FileStream_readLine(input, lineBuf, (int32_t)sizeof(lineBuf));
}
}
+namespace {
+
+UBool isNormalizedUTF8(const Normalizer2 &norm2, const UnicodeString &s, UErrorCode &errorCode) {
+ std::string s8;
+ return norm2.isNormalizedUTF8(s.toUTF8String(s8), errorCode);
+}
+
+} // namespace
+
/**
* Verify the conformance of the given line of the Unicode
* normalization (UTR 15) test suite file. For each line,
int32_t options,
UErrorCode &status) {
UBool pass = TRUE, result;
- //UErrorCode status = U_ZERO_ERROR;
UnicodeString out, fcd;
int32_t fieldNum;
for (int32_t i=0; i<FIELD_COUNT; ++i) {
fieldNum = i+1;
if (i<3) {
- Normalizer::normalize(field[i], UNORM_NFC, options, out, status);
- if (U_FAILURE(status)) {
- dataerrln("Error running normalize UNORM_NFC: %s", u_errorName(status));
- } else {
- pass &= assertEqual("C", field[i], out, field[1], "c2!=C(c", fieldNum);
- iterativeNorm(field[i], UNORM_NFC, options, out, +1);
- pass &= assertEqual("C(+1)", field[i], out, field[1], "c2!=C(c", fieldNum);
- iterativeNorm(field[i], UNORM_NFC, options, out, -1);
- pass &= assertEqual("C(-1)", field[i], out, field[1], "c2!=C(c", fieldNum);
- }
-
- Normalizer::normalize(field[i], UNORM_NFD, options, out, status);
- if (U_FAILURE(status)) {
- dataerrln("Error running normalize UNORM_NFD: %s", u_errorName(status));
- } else {
- pass &= assertEqual("D", field[i], out, field[2], "c3!=D(c", fieldNum);
- iterativeNorm(field[i], UNORM_NFD, options, out, +1);
- pass &= assertEqual("D(+1)", field[i], out, field[2], "c3!=D(c", fieldNum);
- iterativeNorm(field[i], UNORM_NFD, options, out, -1);
- pass &= assertEqual("D(-1)", field[i], out, field[2], "c3!=D(c", fieldNum);
- }
- }
- Normalizer::normalize(field[i], UNORM_NFKC, options, out, status);
- if (U_FAILURE(status)) {
- dataerrln("Error running normalize UNORM_NFKC: %s", u_errorName(status));
- } else {
- pass &= assertEqual("KC", field[i], out, field[3], "c4!=KC(c", fieldNum);
- iterativeNorm(field[i], UNORM_NFKC, options, out, +1);
- pass &= assertEqual("KC(+1)", field[i], out, field[3], "c4!=KC(c", fieldNum);
- iterativeNorm(field[i], UNORM_NFKC, options, out, -1);
- pass &= assertEqual("KC(-1)", field[i], out, field[3], "c4!=KC(c", fieldNum);
- }
-
- Normalizer::normalize(field[i], UNORM_NFKD, options, out, status);
- if (U_FAILURE(status)) {
- dataerrln("Error running normalize UNORM_NFKD: %s", u_errorName(status));
- } else {
- pass &= assertEqual("KD", field[i], out, field[4], "c5!=KD(c", fieldNum);
- iterativeNorm(field[i], UNORM_NFKD, options, out, +1);
- pass &= assertEqual("KD(+1)", field[i], out, field[4], "c5!=KD(c", fieldNum);
- iterativeNorm(field[i], UNORM_NFKD, options, out, -1);
- pass &= assertEqual("KD(-1)", field[i], out, field[4], "c5!=KD(c", fieldNum);
+ pass &= checkNorm(UNORM_NFC, options, nfc, field[i], field[1], fieldNum);
+ pass &= checkNorm(UNORM_NFD, options, nfd, field[i], field[2], fieldNum);
}
+ pass &= checkNorm(UNORM_NFKC, options, nfkc, field[i], field[3], fieldNum);
+ pass &= checkNorm(UNORM_NFKD, options, nfkd, field[i], field[4], fieldNum);
}
compare(field[1],field[2]);
compare(field[0],field[1]);
dataerrln("Normalizer error: isNormalized(NFC(s), UNORM_NFC) is FALSE");
pass = FALSE;
}
- if(field[0]!=field[1] && Normalizer::isNormalized(field[0], UNORM_NFC, options, status)) {
- errln("Normalizer error: isNormalized(s, UNORM_NFC) is TRUE");
+ if(options==0 && !isNormalizedUTF8(*nfc, field[1], status)) {
+ dataerrln("Normalizer error: nfc.isNormalizedUTF8(NFC(s)) is FALSE");
pass = FALSE;
}
+ if(field[0]!=field[1]) {
+ if(Normalizer::isNormalized(field[0], UNORM_NFC, options, status)) {
+ errln("Normalizer error: isNormalized(s, UNORM_NFC) is TRUE");
+ pass = FALSE;
+ }
+ if(isNormalizedUTF8(*nfc, field[0], status)) {
+ errln("Normalizer error: nfc.isNormalizedUTF8(s) is TRUE");
+ pass = FALSE;
+ }
+ }
if(!Normalizer::isNormalized(field[3], UNORM_NFKC, options, status)) {
dataerrln("Normalizer error: isNormalized(NFKC(s), UNORM_NFKC) is FALSE");
pass = FALSE;
- }
- if(field[0]!=field[3] && Normalizer::isNormalized(field[0], UNORM_NFKC, options, status)) {
- errln("Normalizer error: isNormalized(s, UNORM_NFKC) is TRUE");
- pass = FALSE;
+ } else {
+ if(options==0 && !isNormalizedUTF8(*nfkc, field[3], status)) {
+ dataerrln("Normalizer error: nfkc.isNormalizedUTF8(NFKC(s)) is FALSE");
+ pass = FALSE;
+ }
+ if(field[0]!=field[3]) {
+ if(Normalizer::isNormalized(field[0], UNORM_NFKC, options, status)) {
+ errln("Normalizer error: isNormalized(s, UNORM_NFKC) is TRUE");
+ pass = FALSE;
+ }
+ if(options==0 && isNormalizedUTF8(*nfkc, field[0], status)) {
+ errln("Normalizer error: nfkc.isNormalizedUTF8(s) is TRUE");
+ pass = FALSE;
+ }
+ }
}
// test FCD quick check and "makeFCD"
return pass;
}
+static const char *const kModeStrings[UNORM_MODE_COUNT] = {
+ "?", "none", "D", "KD", "C", "KC", "FCD"
+};
+
+static const char *const kMessages[UNORM_MODE_COUNT] = {
+ "?!=?", "?!=?", "c3!=D(c%d)", "c5!=KC(c%d)", "c2!=C(c%d)", "c4!=KC(c%d)", "FCD"
+};
+
+UBool NormalizerConformanceTest::checkNorm(UNormalizationMode mode, int32_t options,
+ const Normalizer2 *norm2,
+ const UnicodeString &s, const UnicodeString &exp,
+ int32_t field) {
+ const char *modeString = kModeStrings[mode];
+ char msg[20];
+ snprintf(msg, sizeof(msg), kMessages[mode], field);
+ UnicodeString out;
+ UErrorCode errorCode = U_ZERO_ERROR;
+ Normalizer::normalize(s, mode, options, out, errorCode);
+ if (U_FAILURE(errorCode)) {
+ dataerrln("Error running normalize UNORM_NF%s: %s", modeString, u_errorName(errorCode));
+ return FALSE;
+ }
+ if (!assertEqual(modeString, "", s, out, exp, msg)) {
+ return FALSE;
+ }
+
+ iterativeNorm(s, mode, options, out, +1);
+ if (!assertEqual(modeString, "(+1)", s, out, exp, msg)) {
+ return FALSE;
+ }
+
+ iterativeNorm(s, mode, options, out, -1);
+ if (!assertEqual(modeString, "(-1)", s, out, exp, msg)) {
+ return FALSE;
+ }
+
+ if (norm2 == nullptr || options != 0) {
+ return TRUE;
+ }
+
+ std::string s8;
+ s.toUTF8String(s8);
+ std::string exp8;
+ exp.toUTF8String(exp8);
+ std::string out8;
+ Edits edits;
+ Edits *editsPtr = (mode == UNORM_NFC || mode == UNORM_NFKC) ? &edits : nullptr;
+ StringByteSink<std::string> sink(&out8, exp8.length());
+ norm2->normalizeUTF8(0, s8, sink, editsPtr, errorCode);
+ if (U_FAILURE(errorCode)) {
+ errln("Normalizer2.%s.normalizeUTF8(%s) failed: %s",
+ modeString, s8.c_str(), u_errorName(errorCode));
+ return FALSE;
+ }
+ if (out8 != exp8) {
+ errln("Normalizer2.%s.normalizeUTF8(%s)=%s != %s",
+ modeString, s8.c_str(), out8.c_str(), exp8.c_str());
+ return FALSE;
+ }
+ if (editsPtr == nullptr) {
+ return TRUE;
+ }
+
+ // Do the Edits cover the entire input & output?
+ UBool pass = TRUE;
+ pass &= assertEquals("edits.hasChanges()", (UBool)(s8 != out8), edits.hasChanges());
+ pass &= assertEquals("edits.lengthDelta()",
+ (int32_t)(out8.length() - s8.length()), edits.lengthDelta());
+ Edits::Iterator iter = edits.getCoarseIterator();
+ while (iter.next(errorCode)) {}
+ pass &= assertEquals("edits source length", s8.length(), iter.sourceIndex());
+ pass &= assertEquals("edits destination length", out8.length(), iter.destinationIndex());
+ return pass;
+}
+
/**
* Do a normalization using the iterative API in the given direction.
* @param dir either +1 or -1
}
}
-/**
- * @param op name of normalization form, e.g., "KC"
- * @param s string being normalized
- * @param got value received
- * @param exp expected value
- * @param msg description of this test
- * @param return true if got == exp
- */
-UBool NormalizerConformanceTest::assertEqual(const char *op,
+UBool NormalizerConformanceTest::assertEqual(const char *op, const char *op2,
const UnicodeString& s,
const UnicodeString& got,
const UnicodeString& exp,
- const char *msg,
- int32_t field)
-{
+ const char *msg) {
if (exp == got)
return TRUE;
expPretty.extract(0, expPretty.length(), expChars, expPretty.length() + 1);
expChars[expPretty.length()] = 0;
- errln(" %s%d)%s(%s)=%s, exp. %s", msg, field, op, sChars, gotChars, expChars);
+ errln(" %s: %s%s(%s)=%s, exp. %s", msg, op, op2, sChars, gotChars, expChars);
delete []sChars;
delete []gotChars;