-// testIdentifierInfo. Note that IdentifierInfo is not public ICU API at this time
-void IntlTestSpoof::testIdentifierInfo() {
- UErrorCode status = U_ZERO_ERROR;
- ScriptSet bitset12; bitset12.set(USCRIPT_LATIN, status).set(USCRIPT_HANGUL, status);
- ScriptSet bitset2; bitset2.set(USCRIPT_HANGUL, status);
- TEST_ASSERT(bitset12.contains(bitset2));
- TEST_ASSERT(bitset12.contains(bitset12));
- TEST_ASSERT(!bitset2.contains(bitset12));
-
- ScriptSet arabSet; arabSet.set(USCRIPT_ARABIC, status);
- ScriptSet latinSet; latinSet.set(USCRIPT_LATIN, status);
- UElement arabEl; arabEl.pointer = &arabSet;
- UElement latinEl; latinEl.pointer = &latinSet;
- TEST_ASSERT(uhash_compareScriptSet(arabEl, latinEl) < 0);
- TEST_ASSERT(uhash_compareScriptSet(latinEl, arabEl) > 0);
-
- UnicodeString scriptString;
- bitset12.displayScripts(scriptString);
- TEST_ASSERT(UNICODE_STRING_SIMPLE("Hang Latn") == scriptString);
-
- status = U_ZERO_ERROR;
- UHashtable *alternates = uhash_open(uhash_hashScriptSet ,uhash_compareScriptSet, NULL, &status);
- uhash_puti(alternates, &bitset12, 1, &status);
- uhash_puti(alternates, &bitset2, 1, &status);
- UnicodeString alternatesString;
- IdentifierInfo::displayAlternates(alternatesString, alternates, status);
- TEST_ASSERT(UNICODE_STRING_SIMPLE("Hang; Hang Latn") == alternatesString);
- TEST_ASSERT_SUCCESS(status);
-
- status = U_ZERO_ERROR;
- ScriptSet tScriptSet;
- tScriptSet.parseScripts(scriptString, status);
- TEST_ASSERT_SUCCESS(status);
- TEST_ASSERT(bitset12 == tScriptSet);
- UnicodeString ss;
- ss.remove();
- uhash_close(alternates);
-
- struct Test {
- const char *fTestString;
- URestrictionLevel fRestrictionLevel;
- const char *fNumerics;
- const char *fScripts;
- const char *fAlternates;
- const char *fCommonAlternates;
- } tests[] = {
- {"\\u0061\\u2665", USPOOF_UNRESTRICTIVE, "[]", "Latn", "", ""},
- {"\\u0061\\u3006", USPOOF_HIGHLY_RESTRICTIVE, "[]", "Latn", "Hani Hira Kana", "Hani Hira Kana"},
- {"\\u0061\\u30FC\\u3006", USPOOF_HIGHLY_RESTRICTIVE, "[]", "Latn", "Hira Kana", "Hira Kana"},
- {"\\u0061\\u30FC\\u3006\\u30A2", USPOOF_HIGHLY_RESTRICTIVE, "[]", "Latn Kana", "", ""},
- {"\\u30A2\\u0061\\u30FC\\u3006", USPOOF_HIGHLY_RESTRICTIVE, "[]", "Latn Kana", "", ""},
- {"\\u0061\\u0031\\u0661", USPOOF_UNRESTRICTIVE, "[\\u0030\\u0660]", "Latn", "Arab Thaa", "Arab Thaa"},
- {"\\u0061\\u0031\\u0661\\u06F1", USPOOF_UNRESTRICTIVE, "[\\u0030\\u0660\\u06F0]", "Latn Arab", "", ""},
- {"\\u0661\\u30FC\\u3006\\u0061\\u30A2\\u0031\\u0967\\u06F1", USPOOF_UNRESTRICTIVE,
- "[\\u0030\\u0660\\u06F0\\u0966]", "Latn Kana Arab", "Deva Kthi Mahj", "Deva Kthi Mahj"},
- {"\\u0061\\u30A2\\u30FC\\u3006\\u0031\\u0967\\u0661\\u06F1", USPOOF_UNRESTRICTIVE,
- "[\\u0030\\u0660\\u06F0\\u0966]", "Latn Kana Arab", "Deva Kthi Mahj", "Deva Kthi Mahj"}
- };
-
- int testNum;
- for (testNum = 0; testNum < UPRV_LENGTHOF(tests); testNum++) {
- char testNumStr[40];
- sprintf(testNumStr, "testNum = %d", testNum);
- Test &test = tests[testNum];
- status = U_ZERO_ERROR;
- UnicodeString testString(test.fTestString); // Note: may do charset conversion.
- testString = testString.unescape();
- IdentifierInfo idInfo(status);
- TEST_ASSERT_SUCCESS(status);
- idInfo.setIdentifierProfile(*uspoof_getRecommendedUnicodeSet(&status));
- idInfo.setIdentifier(testString, status);
- TEST_ASSERT_MSG(*idInfo.getIdentifier() == testString, testNumStr);
-
- URestrictionLevel restrictionLevel = test.fRestrictionLevel;
- TEST_ASSERT_MSG(restrictionLevel == idInfo.getRestrictionLevel(status), testNumStr);
-
- status = U_ZERO_ERROR;
- UnicodeSet numerics(UnicodeString(test.fNumerics).unescape(), status);
- TEST_ASSERT_SUCCESS(status);
- TEST_ASSERT_MSG(numerics == *idInfo.getNumerics(), testNumStr);
-
- ScriptSet scripts;
- scripts.parseScripts(UnicodeString(test.fScripts), status);
- TEST_ASSERT_MSG(scripts == *idInfo.getScripts(), testNumStr);
-
- UnicodeString alternatesStr;
- IdentifierInfo::displayAlternates(alternatesStr, idInfo.getAlternates(), status);
- TEST_ASSERT_MSG(UnicodeString(test.fAlternates) == alternatesStr, testNumStr);
-
- ScriptSet commonAlternates;
- commonAlternates.parseScripts(UnicodeString(test.fCommonAlternates), status);
- TEST_ASSERT_MSG(commonAlternates == *idInfo.getCommonAmongAlternates(), testNumStr);
- }
-
- // Test of getScriptCount()
- // Script and or Script Extension for chars used in the tests
- // \\u3013 ; Bopo Hang Hani Hira Kana # So GETA MARK
- // \\uA838 ; Deva Gujr Guru Kthi Takr # Sc NORTH INDIC RUPEE MARK
- // \\u0951 ; Deva Latn # Mn DEVANAGARI STRESS SIGN UDATTA
- //
- // \\u0370 ; Greek # L GREEK CAPITAL LETTER HETA
- // \\u0481 ; Cyrillic # L& CYRILLIC SMALL LETTER KOPPA
- // \\u0904 ; Devanagari # Lo DEVANAGARI LETTER SHORT A
- // \\u3041 ; Hiragana # Lo HIRAGANA LETTER SMALL A
- // 1234 ; Common # ascii digits
- // \\u0300 ; Inherited # Mn COMBINING GRAVE ACCENT
-
- struct ScriptTest {
- const char *fTestString;
- int32_t fScriptCount;
- } scriptTests[] = {
- {"Hello", 1},
- {"Hello\\u0370", 2},
- {"1234", 0},
- {"Hello1234\\u0300", 1}, // Common and Inherited are ignored.
- {"\\u0030", 0},
- {"abc\\u0951", 1},
- {"abc\\u3013", 2},
- {"\\uA838\\u0951", 1}, // Triggers commonAmongAlternates path.
- {"\\u3013\\uA838", 2}
- };
-
- status = U_ZERO_ERROR;
- IdentifierInfo identifierInfo(status);
- for (testNum=0; testNum<UPRV_LENGTHOF(scriptTests); testNum++) {
- ScriptTest &test = scriptTests[testNum];
- char msgBuf[100];
- sprintf(msgBuf, "testNum = %d ", testNum);
- UnicodeString testString = UnicodeString(test.fTestString).unescape();
-
- status = U_ZERO_ERROR;
- identifierInfo.setIdentifier(testString, status);
- int32_t scriptCount = identifierInfo.getScriptCount();
- TEST_ASSERT_MSG(test.fScriptCount == scriptCount, msgBuf);
- }
-}