X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/f59164e3d128c7675a4d3934206346a3384e53a5..f3c0d7a59d99c2a94c6b8822291f0e42be3773c9:/icuSources/test/intltest/itspoof.cpp diff --git a/icuSources/test/intltest/itspoof.cpp b/icuSources/test/intltest/itspoof.cpp index 42869a73..ac7a14cb 100644 --- a/icuSources/test/intltest/itspoof.cpp +++ b/icuSources/test/intltest/itspoof.cpp @@ -1,3 +1,5 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** * Copyright (C) 2011-2015, International Business Machines Corporation @@ -21,7 +23,6 @@ #include "unicode/uspoof.h" #include "cstring.h" -#include "identifier_info.h" #include "scriptset.h" #include "uhash.h" @@ -56,11 +57,15 @@ USpoofChecker *sc; \ sc = uspoof_open(&status); \ TEST_ASSERT_SUCCESS(status); \ + USpoofCheckResult *checkResult; \ + checkResult = uspoof_openCheckResult(&status); \ + TEST_ASSERT_SUCCESS(status); \ if (U_SUCCESS(status)){ #define TEST_TEARDOWN \ } \ TEST_ASSERT_SUCCESS(status); \ + uspoof_closeCheckResult(checkResult); \ uspoof_close(sc); \ } @@ -69,72 +74,23 @@ void IntlTestSpoof::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) { - if (exec) logln("TestSuite spoof: "); - switch (index) { - case 0: - name = "TestSpoofAPI"; - if (exec) { - testSpoofAPI(); - } - break; - case 1: - name = "TestSkeleton"; - if (exec) { - testSkeleton(); - } - break; - case 2: - name = "TestAreConfusable"; - if (exec) { - testAreConfusable(); - } - break; - case 3: - name = "TestInvisible"; - if (exec) { - testInvisible(); - } - break; - case 4: - name = "testConfData"; - if (exec) { - testConfData(); - } - break; - case 5: - name = "testBug8654"; - if (exec) { - testBug8654(); - } - break; - case 6: - name = "testIdentifierInfo"; - if (exec) { - testIdentifierInfo(); - } - break; - case 7: - name = "testScriptSet"; - if (exec) { - testScriptSet(); - } - break; - case 8: - name = "testRestrictionLevel"; - if (exec) { - testRestrictionLevel(); - } - break; - case 9: - name = "testMixedNumbers"; - if (exec) { - testMixedNumbers(); - } - break; - - - default: name=""; break; + if (exec) { + logln("TestSuite spoof: "); } + TESTCASE_AUTO_BEGIN; + TESTCASE_AUTO(testSpoofAPI); + TESTCASE_AUTO(testSkeleton); + TESTCASE_AUTO(testAreConfusable); + TESTCASE_AUTO(testInvisible); + TESTCASE_AUTO(testConfData); + TESTCASE_AUTO(testBug8654); + TESTCASE_AUTO(testScriptSet); + TESTCASE_AUTO(testRestrictionLevel); + TESTCASE_AUTO(testMixedNumbers); + TESTCASE_AUTO(testBug12153); + TESTCASE_AUTO(testBug12825); + TESTCASE_AUTO(testBug12815); + TESTCASE_AUTO_END; } void IntlTestSpoof::testSpoofAPI() { @@ -153,6 +109,7 @@ void IntlTestSpoof::testSpoofAPI() { UnicodeString s1("cxs"); UnicodeString s2 = UnicodeString("\\u0441\\u0445\\u0455").unescape(); // Cyrillic "cxs" int32_t checkResults = uspoof_areConfusableUnicodeString(sc, s1, s2, &status); + TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE, checkResults); TEST_TEARDOWN; @@ -271,8 +228,9 @@ void IntlTestSpoof::testAreConfusable() { "A long string that will overflow stack buffers. A long string that will overflow stack buffers. "); UnicodeString s2("A long string that wi11 overflow stack buffers. A long string that will overflow stack buffers. " "A long string that wi11 overflow stack buffers. A long string that will overflow stack buffers. "); - TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, uspoof_areConfusableUnicodeString(sc, s1, s2, &status)); + int32_t result = uspoof_areConfusableUnicodeString(sc, s1, s2, &status); TEST_ASSERT_SUCCESS(status); + TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, result); TEST_TEARDOWN; } @@ -446,142 +404,6 @@ void IntlTestSpoof::testConfData() { } } -// testIdentifierInfo. Note that IdentifierInfo is not public ICU API at this time -void IntlTestSpoof::testIdentifierInfo() { - UErrorCode status = U_ZERO_ERROR; - ScriptSet bitset12; bitset12.set(USCRIPT_LATIN, status).set(USCRIPT_HANGUL, status); - ScriptSet bitset2; bitset2.set(USCRIPT_HANGUL, status); - TEST_ASSERT(bitset12.contains(bitset2)); - TEST_ASSERT(bitset12.contains(bitset12)); - TEST_ASSERT(!bitset2.contains(bitset12)); - - ScriptSet arabSet; arabSet.set(USCRIPT_ARABIC, status); - ScriptSet latinSet; latinSet.set(USCRIPT_LATIN, status); - UElement arabEl; arabEl.pointer = &arabSet; - UElement latinEl; latinEl.pointer = &latinSet; - TEST_ASSERT(uhash_compareScriptSet(arabEl, latinEl) < 0); - TEST_ASSERT(uhash_compareScriptSet(latinEl, arabEl) > 0); - - UnicodeString scriptString; - bitset12.displayScripts(scriptString); - TEST_ASSERT(UNICODE_STRING_SIMPLE("Hang Latn") == scriptString); - - status = U_ZERO_ERROR; - UHashtable *alternates = uhash_open(uhash_hashScriptSet ,uhash_compareScriptSet, NULL, &status); - uhash_puti(alternates, &bitset12, 1, &status); - uhash_puti(alternates, &bitset2, 1, &status); - UnicodeString alternatesString; - IdentifierInfo::displayAlternates(alternatesString, alternates, status); - TEST_ASSERT(UNICODE_STRING_SIMPLE("Hang; Hang Latn") == alternatesString); - TEST_ASSERT_SUCCESS(status); - - status = U_ZERO_ERROR; - ScriptSet tScriptSet; - tScriptSet.parseScripts(scriptString, status); - TEST_ASSERT_SUCCESS(status); - TEST_ASSERT(bitset12 == tScriptSet); - UnicodeString ss; - ss.remove(); - uhash_close(alternates); - - struct Test { - const char *fTestString; - URestrictionLevel fRestrictionLevel; - const char *fNumerics; - const char *fScripts; - const char *fAlternates; - const char *fCommonAlternates; - } tests[] = { - {"\\u0061\\u2665", USPOOF_UNRESTRICTIVE, "[]", "Latn", "", ""}, - {"\\u0061\\u3006", USPOOF_HIGHLY_RESTRICTIVE, "[]", "Latn", "Hani Hira Kana", "Hani Hira Kana"}, - {"\\u0061\\u30FC\\u3006", USPOOF_HIGHLY_RESTRICTIVE, "[]", "Latn", "Hira Kana", "Hira Kana"}, - {"\\u0061\\u30FC\\u3006\\u30A2", USPOOF_HIGHLY_RESTRICTIVE, "[]", "Latn Kana", "", ""}, - {"\\u30A2\\u0061\\u30FC\\u3006", USPOOF_HIGHLY_RESTRICTIVE, "[]", "Latn Kana", "", ""}, - {"\\u0061\\u0031\\u0661", USPOOF_UNRESTRICTIVE, "[\\u0030\\u0660]", "Latn", "Arab Thaa", "Arab Thaa"}, - {"\\u0061\\u0031\\u0661\\u06F1", USPOOF_UNRESTRICTIVE, "[\\u0030\\u0660\\u06F0]", "Latn Arab", "", ""}, - {"\\u0661\\u30FC\\u3006\\u0061\\u30A2\\u0031\\u0967\\u06F1", USPOOF_UNRESTRICTIVE, - "[\\u0030\\u0660\\u06F0\\u0966]", "Latn Kana Arab", "Deva Kthi Mahj", "Deva Kthi Mahj"}, - {"\\u0061\\u30A2\\u30FC\\u3006\\u0031\\u0967\\u0661\\u06F1", USPOOF_UNRESTRICTIVE, - "[\\u0030\\u0660\\u06F0\\u0966]", "Latn Kana Arab", "Deva Kthi Mahj", "Deva Kthi Mahj"} - }; - - int testNum; - for (testNum = 0; testNum < UPRV_LENGTHOF(tests); testNum++) { - char testNumStr[40]; - sprintf(testNumStr, "testNum = %d", testNum); - Test &test = tests[testNum]; - status = U_ZERO_ERROR; - UnicodeString testString(test.fTestString); // Note: may do charset conversion. - testString = testString.unescape(); - IdentifierInfo idInfo(status); - TEST_ASSERT_SUCCESS(status); - idInfo.setIdentifierProfile(*uspoof_getRecommendedUnicodeSet(&status)); - idInfo.setIdentifier(testString, status); - TEST_ASSERT_MSG(*idInfo.getIdentifier() == testString, testNumStr); - - URestrictionLevel restrictionLevel = test.fRestrictionLevel; - TEST_ASSERT_MSG(restrictionLevel == idInfo.getRestrictionLevel(status), testNumStr); - - status = U_ZERO_ERROR; - UnicodeSet numerics(UnicodeString(test.fNumerics).unescape(), status); - TEST_ASSERT_SUCCESS(status); - TEST_ASSERT_MSG(numerics == *idInfo.getNumerics(), testNumStr); - - ScriptSet scripts; - scripts.parseScripts(UnicodeString(test.fScripts), status); - TEST_ASSERT_MSG(scripts == *idInfo.getScripts(), testNumStr); - - UnicodeString alternatesStr; - IdentifierInfo::displayAlternates(alternatesStr, idInfo.getAlternates(), status); - TEST_ASSERT_MSG(UnicodeString(test.fAlternates) == alternatesStr, testNumStr); - - ScriptSet commonAlternates; - commonAlternates.parseScripts(UnicodeString(test.fCommonAlternates), status); - TEST_ASSERT_MSG(commonAlternates == *idInfo.getCommonAmongAlternates(), testNumStr); - } - - // Test of getScriptCount() - // Script and or Script Extension for chars used in the tests - // \\u3013 ; Bopo Hang Hani Hira Kana # So GETA MARK - // \\uA838 ; Deva Gujr Guru Kthi Takr # Sc NORTH INDIC RUPEE MARK - // \\u0951 ; Deva Latn # Mn DEVANAGARI STRESS SIGN UDATTA - // - // \\u0370 ; Greek # L GREEK CAPITAL LETTER HETA - // \\u0481 ; Cyrillic # L& CYRILLIC SMALL LETTER KOPPA - // \\u0904 ; Devanagari # Lo DEVANAGARI LETTER SHORT A - // \\u3041 ; Hiragana # Lo HIRAGANA LETTER SMALL A - // 1234 ; Common # ascii digits - // \\u0300 ; Inherited # Mn COMBINING GRAVE ACCENT - - struct ScriptTest { - const char *fTestString; - int32_t fScriptCount; - } scriptTests[] = { - {"Hello", 1}, - {"Hello\\u0370", 2}, - {"1234", 0}, - {"Hello1234\\u0300", 1}, // Common and Inherited are ignored. - {"\\u0030", 0}, - {"abc\\u0951", 1}, - {"abc\\u3013", 2}, - {"\\uA838\\u0951", 1}, // Triggers commonAmongAlternates path. - {"\\u3013\\uA838", 2} - }; - - status = U_ZERO_ERROR; - IdentifierInfo identifierInfo(status); - for (testNum=0; testNum 0); + + UnicodeString scriptString; + bitset12.displayScripts(scriptString); + TEST_ASSERT(UNICODE_STRING_SIMPLE("Hang Latn") == scriptString); } @@ -673,35 +536,40 @@ void IntlTestSpoof::testRestrictionLevel() { {"\\u03B3", USPOOF_SINGLE_SCRIPT_RESTRICTIVE}, {"\\u0061\\u30A2\\u30FC", USPOOF_HIGHLY_RESTRICTIVE}, {"\\u0061\\u0904", USPOOF_MODERATELY_RESTRICTIVE}, - {"\\u0061\\u03B3", USPOOF_MINIMALLY_RESTRICTIVE} + {"\\u0061\\u03B3", USPOOF_MINIMALLY_RESTRICTIVE}, + {"\\u0061\\u2665", USPOOF_UNRESTRICTIVE}, + {"\\u0061\\u303C", USPOOF_HIGHLY_RESTRICTIVE}, + {"\\u0061\\u30FC\\u303C", USPOOF_HIGHLY_RESTRICTIVE}, + {"\\u0061\\u30FC\\u303C\\u30A2", USPOOF_HIGHLY_RESTRICTIVE}, + {"\\u30A2\\u0061\\u30FC\\u303C", USPOOF_HIGHLY_RESTRICTIVE}, + {"\\u0061\\u0031\\u0661", USPOOF_MODERATELY_RESTRICTIVE}, + {"\\u0061\\u0031\\u0661\\u06F1", USPOOF_MODERATELY_RESTRICTIVE}, + {"\\u0661\\u30FC\\u303C\\u0061\\u30A2\\u0031\\u0967\\u06F1", USPOOF_MINIMALLY_RESTRICTIVE}, + {"\\u0061\\u30A2\\u30FC\\u303C\\u0031\\u0967\\u0661\\u06F1", USPOOF_MINIMALLY_RESTRICTIVE} }; char msgBuffer[100]; - URestrictionLevel restrictionLevels[] = { USPOOF_ASCII, USPOOF_SINGLE_SCRIPT_RESTRICTIVE, - USPOOF_HIGHLY_RESTRICTIVE, USPOOF_MODERATELY_RESTRICTIVE, USPOOF_MINIMALLY_RESTRICTIVE, - USPOOF_UNRESTRICTIVE}; - + USPOOF_HIGHLY_RESTRICTIVE, USPOOF_MODERATELY_RESTRICTIVE, USPOOF_MINIMALLY_RESTRICTIVE, + USPOOF_UNRESTRICTIVE}; + UErrorCode status = U_ZERO_ERROR; - IdentifierInfo idInfo(status); - TEST_ASSERT_SUCCESS(status); - idInfo.setIdentifierProfile(*uspoof_getRecommendedUnicodeSet(&status)); - TEST_ASSERT_SUCCESS(status); + UnicodeSet allowedChars; + // Allowed Identifier Characters. In addition to the Recommended Set, + // allow u303c, which has an interesting script extension of Hani Hira Kana. + allowedChars.addAll(*uspoof_getRecommendedUnicodeSet(&status)).add(0x303C); + for (int32_t testNum=0; testNum < UPRV_LENGTHOF(tests); testNum++) { status = U_ZERO_ERROR; const Test &test = tests[testNum]; UnicodeString testString = UnicodeString(test.fId).unescape(); URestrictionLevel expectedLevel = test.fExpectedRestrictionLevel; - idInfo.setIdentifier(testString, status); - sprintf(msgBuffer, "testNum = %d ", testNum); - TEST_ASSERT_SUCCESS(status); - TEST_ASSERT_MSG(expectedLevel == idInfo.getRestrictionLevel(status), msgBuffer); for (int levelIndex=0; levelIndex levelSetInSpoofChecker) { expectedValue |= USPOOF_RESTRICTION_LEVEL; } - if (!uspoof_getRecommendedUnicodeSet(&status)->containsAll(testString)) { - expectedValue |= USPOOF_CHAR_LIMIT; - } sprintf(msgBuffer, "testNum = %d, levelIndex = %d, expected = %#x, actual = %#x", testNum, levelIndex, expectedValue, actualValue); TEST_ASSERT_MSG(expectedValue == actualValue, msgBuffer); @@ -719,9 +584,9 @@ void IntlTestSpoof::testRestrictionLevel() { // Run the same check again, with the Spoof Checker configured to return // the actual restriction level. - uspoof_setChecks(sc, USPOOF_AUX_INFO | USPOOF_RESTRICTION_LEVEL, &status); - uspoof_setAllowedChars(sc, uspoof_getRecommendedSet(&status), &status); + uspoof_setAllowedChars(sc, allowedChars.toUSet(), &status); uspoof_setRestrictionLevel(sc, levelSetInSpoofChecker); + uspoof_setChecks(sc, USPOOF_AUX_INFO | USPOOF_RESTRICTION_LEVEL, &status); int32_t result = uspoof_checkUnicodeString(sc, testString, NULL, &status); TEST_ASSERT_SUCCESS(status); if (U_SUCCESS(status)) { @@ -731,8 +596,8 @@ void IntlTestSpoof::testRestrictionLevel() { uspoof_close(sc); } } -} +} void IntlTestSpoof::testMixedNumbers() { struct Test { @@ -742,10 +607,18 @@ void IntlTestSpoof::testMixedNumbers() { {"1", "[0]"}, {"\\u0967", "[\\u0966]"}, {"1\\u0967", "[0\\u0966]"}, - {"\\u0661\\u06F1", "[\\u0660\\u06F0]"} + {"\\u0661\\u06F1", "[\\u0660\\u06F0]"}, + {"\\u0061\\u2665", "[]"}, + {"\\u0061\\u303C", "[]"}, + {"\\u0061\\u30FC\\u303C", "[]"}, + {"\\u0061\\u30FC\\u303C\\u30A2", "[]"}, + {"\\u30A2\\u0061\\u30FC\\u303C", "[]"}, + {"\\u0061\\u0031\\u0661", "[\\u0030\\u0660]"}, + {"\\u0061\\u0031\\u0661\\u06F1", "[\\u0030\\u0660\\u06F0]"}, + {"\\u0661\\u30FC\\u303C\\u0061\\u30A2\\u0031\\u0967\\u06F1", "[\\u0030\\u0660\\u06F0\\u0966]"}, + {"\\u0061\\u30A2\\u30FC\\u303C\\u0031\\u0967\\u0661\\u06F1", "[\\u0030\\u0660\\u06F0\\u0966]"} }; UErrorCode status = U_ZERO_ERROR; - IdentifierInfo idInfo(status); for (int32_t testNum=0; testNum < UPRV_LENGTHOF(tests); testNum++) { char msgBuf[100]; sprintf(msgBuf, "testNum = %d ", testNum); @@ -754,18 +627,59 @@ void IntlTestSpoof::testMixedNumbers() { status = U_ZERO_ERROR; UnicodeString testString = UnicodeString(test.fTestString).unescape(); UnicodeSet expectedSet(UnicodeString(test.fExpectedSet).unescape(), status); - idInfo.setIdentifier(testString, status); - TEST_ASSERT_SUCCESS(status); - TEST_ASSERT_MSG(expectedSet == *idInfo.getNumerics(), msgBuf); status = U_ZERO_ERROR; - USpoofChecker *sc = uspoof_open(&status); - uspoof_setChecks(sc, USPOOF_MIXED_NUMBERS, &status); // only check this - int32_t result = uspoof_checkUnicodeString(sc, testString, NULL, &status); - UBool mixedNumberFailure = ((result & USPOOF_MIXED_NUMBERS) != 0); - TEST_ASSERT_MSG((expectedSet.size() > 1) == mixedNumberFailure, msgBuf); - uspoof_close(sc); + TEST_SETUP + uspoof_setChecks(sc, USPOOF_MIXED_NUMBERS, &status); // only check this + uspoof_check2UnicodeString(sc, testString, checkResult, &status); + UBool mixedNumberFailure = ((uspoof_getCheckResultChecks(checkResult, &status) & USPOOF_MIXED_NUMBERS) != 0); + TEST_ASSERT_MSG((expectedSet.size() > 1) == mixedNumberFailure, msgBuf); + const UnicodeSet* actualSet = UnicodeSet::fromUSet(uspoof_getCheckResultNumerics(checkResult, &status)); + TEST_ASSERT_MSG(expectedSet == *actualSet, msgBuf); + TEST_TEARDOWN } } +// Bug #12153 - uspoof_setRestrictionLevel() should enable restriction level testing. +// +void IntlTestSpoof::testBug12153() { + UErrorCode status = U_ZERO_ERROR; + LocalUSpoofCheckerPointer sc(uspoof_open(&status)); + TEST_ASSERT_SUCCESS(status); + int32_t checks = uspoof_getChecks(sc.getAlias(), &status); + TEST_ASSERT((checks & USPOOF_RESTRICTION_LEVEL) != 0); + checks &= ~USPOOF_RESTRICTION_LEVEL; + uspoof_setChecks(sc.getAlias(), checks, &status); + checks = uspoof_getChecks(sc.getAlias(), &status); + TEST_ASSERT((checks & USPOOF_RESTRICTION_LEVEL) == 0); + + uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE); + checks = uspoof_getChecks(sc.getAlias(), &status); + TEST_ASSERT((checks & USPOOF_RESTRICTION_LEVEL) != 0); + TEST_ASSERT_SUCCESS(status); +} + +// uspoof_checkUnicodeString should NOT have an infinite loop. +void IntlTestSpoof::testBug12825() { + UErrorCode status = U_ZERO_ERROR; + LocalUSpoofCheckerPointer sc(uspoof_open(&status)); + TEST_ASSERT_SUCCESS(status); + uspoof_setChecks(sc.getAlias(), USPOOF_ALL_CHECKS | USPOOF_AUX_INFO, &status); + TEST_ASSERT_SUCCESS(status); + uspoof_checkUnicodeString(sc.getAlias(), UnicodeString("\\u30FB").unescape(), NULL, &status); + TEST_ASSERT_SUCCESS(status); +} + +// uspoof_getSkeleton should NOT set an ILLEGAL_ARGUMENT_EXCEPTION. +void IntlTestSpoof::testBug12815() { + UErrorCode status = U_ZERO_ERROR; + LocalUSpoofCheckerPointer sc(uspoof_open(&status)); + TEST_ASSERT_SUCCESS(status); + uspoof_setChecks(sc.getAlias(), USPOOF_RESTRICTION_LEVEL, &status); + TEST_ASSERT_SUCCESS(status); + UnicodeString result; + uspoof_getSkeletonUnicodeString(sc.getAlias(), 0, UnicodeString("hello world"), result, &status); + TEST_ASSERT_SUCCESS(status); +} + #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION && !UCONFIG_NO_FILE_IO */