+namespace {
+
+const int32_t kNumFields = 7;
+
+void U_CALLCONV
+idnaTestLineFn(void *context,
+ char *fields[][2], int32_t /* fieldCount */,
+ UErrorCode *pErrorCode) {
+ reinterpret_cast<UTS46Test *>(context)->idnaTestOneLine(fields, *pErrorCode);
+}
+
+UnicodeString s16FromField(char *(&field)[2]) {
+ int32_t length = (int32_t)(field[1] - field[0]);
+ return UnicodeString::fromUTF8(StringPiece(field[0], length)).trim().unescape();
+}
+
+std::string statusFromField(char *(&field)[2]) {
+ const char *start = u_skipWhitespace(field[0]);
+ std::string status;
+ if (start != field[1]) {
+ int32_t length = (int32_t)(field[1] - start);
+ while (length > 0 && (start[length - 1] == u' ' || start[length - 1] == u'\t')) {
+ --length;
+ }
+ status.assign(start, length);
+ }
+ return status;
+}
+
+} // namespace
+
+void UTS46Test::checkIdnaTestResult(const char *line, const char *type,
+ const UnicodeString &expected, const UnicodeString &result,
+ const char *status, const IDNAInfo &info) {
+ // An error in toUnicode or toASCII is indicated by a value in square brackets,
+ // such as "[B5 B6]".
+ UBool expectedHasErrors = FALSE;
+ if (*status != 0) {
+ if (*status != u'[') {
+ errln("%s status field does not start with '[': %s\n %s", type, status, line);
+ }
+ if (strcmp(status, u8"[]") != 0) {
+ expectedHasErrors = TRUE;
+ }
+ }
+ if (expectedHasErrors != info.hasErrors()) {
+ errln("%s expected errors %s %d != %d = actual has errors: %04lx\n %s",
+ type, status, expectedHasErrors, info.hasErrors(), (long)info.getErrors(), line);
+ }
+ if (!expectedHasErrors && expected != result) {
+ errln("%s expected != actual\n %s", type, line);
+ errln(UnicodeString(u" ") + expected);
+ errln(UnicodeString(u" ") + result);
+ }
+}
+
+void UTS46Test::idnaTestOneLine(char *fields[][2], UErrorCode &errorCode) {
+ // IdnaTestV2.txt (since Unicode 11)
+ // Column 1: source
+ // The source string to be tested
+ UnicodeString source = s16FromField(fields[0]);
+
+ // Column 2: toUnicode
+ // The result of applying toUnicode to the source, with Transitional_Processing=false.
+ // A blank value means the same as the source value.
+ UnicodeString toUnicode = s16FromField(fields[1]);
+ if (toUnicode.isEmpty()) {
+ toUnicode = source;
+ }
+
+ // Column 3: toUnicodeStatus
+ // A set of status codes, each corresponding to a particular test.
+ // A blank value means [].
+ std::string toUnicodeStatus = statusFromField(fields[2]);
+
+ // Column 4: toAsciiN
+ // The result of applying toASCII to the source, with Transitional_Processing=false.
+ // A blank value means the same as the toUnicode value.
+ UnicodeString toAsciiN = s16FromField(fields[3]);
+ if (toAsciiN.isEmpty()) {
+ toAsciiN = toUnicode;
+ }
+
+ // Column 5: toAsciiNStatus
+ // A set of status codes, each corresponding to a particular test.
+ // A blank value means the same as the toUnicodeStatus value.
+ std::string toAsciiNStatus = statusFromField(fields[4]);
+ if (toAsciiNStatus.empty()) {
+ toAsciiNStatus = toUnicodeStatus;
+ }
+
+ // Column 6: toAsciiT
+ // The result of applying toASCII to the source, with Transitional_Processing=true.
+ // A blank value means the same as the toAsciiN value.
+ UnicodeString toAsciiT = s16FromField(fields[5]);
+ if (toAsciiT.isEmpty()) {
+ toAsciiT = toAsciiN;
+ }
+
+ // Column 7: toAsciiTStatus
+ // A set of status codes, each corresponding to a particular test.
+ // A blank value means the same as the toAsciiNStatus value.
+ std::string toAsciiTStatus = statusFromField(fields[6]);
+ if (toAsciiTStatus.empty()) {
+ toAsciiTStatus = toAsciiNStatus;
+ }
+
+ // ToASCII/ToUnicode, transitional/nontransitional
+ UnicodeString uN, aN, aT;
+ IDNAInfo uNInfo, aNInfo, aTInfo;
+ nontrans->nameToUnicode(source, uN, uNInfo, errorCode);
+ checkIdnaTestResult(fields[0][0], "toUnicodeNontrans", toUnicode, uN,
+ toUnicodeStatus.c_str(), uNInfo);
+ nontrans->nameToASCII(source, aN, aNInfo, errorCode);
+ checkIdnaTestResult(fields[0][0], "toASCIINontrans", toAsciiN, aN,
+ toAsciiNStatus.c_str(), aNInfo);
+ trans->nameToASCII(source, aT, aTInfo, errorCode);
+ checkIdnaTestResult(fields[0][0], "toASCIITrans", toAsciiT, aT,
+ toAsciiTStatus.c_str(), aTInfo);
+}
+
+// TODO: de-duplicate
+U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
+
+// http://www.unicode.org/Public/idna/latest/IdnaTest.txt
+void UTS46Test::IdnaTest() {
+ IcuTestErrorCode errorCode(*this, "IdnaTest");
+ const char *sourceTestDataPath = getSourceTestData(errorCode);
+ if (errorCode.errIfFailureAndReset("unable to find the source/test/testdata "
+ "folder (getSourceTestData())")) {
+ return;
+ }
+ CharString path(sourceTestDataPath, errorCode);
+ path.appendPathPart("IdnaTestV2.txt", errorCode);
+ LocalStdioFilePointer idnaTestFile(fopen(path.data(), "r"));
+ if (idnaTestFile.isNull()) {
+ errln("unable to open %s", path.data());
+ return;
+ }
+
+ // Columns (c1, c2,...) are separated by semicolons.
+ // Leading and trailing spaces and tabs in each column are ignored.
+ // Comments are indicated with hash marks.
+ char *fields[kNumFields][2];
+ u_parseDelimitedFile(path.data(), ';', fields, kNumFields, idnaTestLineFn, this, errorCode);
+ if (errorCode.errIfFailureAndReset("error parsing IdnaTest.txt")) {
+ return;
+ }
+}
+