git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/test/intltest/itspoof.cpp

... / ...

Commit	Line	Data
	1	/*
	2	**********************************************************************
	3	* Copyright (C) 2011-2015, International Business Machines Corporation
	4	* and others. All Rights Reserved.
	5	**********************************************************************
	6	*/
	7	/**
	8	* IntlTestSpoof tests for USpoofDetector
	9	*/
	10
	11	#include "unicode/utypes.h"
	12
	13	#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION && !UCONFIG_NO_FILE_IO
	14
	15	#include "itspoof.h"
	16
	17	#include "unicode/normlzr.h"
	18	#include "unicode/regex.h"
	19	#include "unicode/unistr.h"
	20	#include "unicode/uscript.h"
	21	#include "unicode/uspoof.h"
	22
	23	#include "cstring.h"
	24	#include "identifier_info.h"
	25	#include "scriptset.h"
	26	#include "uhash.h"
	27
	28	#include <stdlib.h>
	29	#include <stdio.h>
	30
	31	#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
	32	errcheckln(status, "Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
	33
	34	#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
	35	errln("Test Failure at file %s, line %d: \"%s\" is false.", __FILE__, __LINE__, #expr);};}
	36
	37	#define TEST_ASSERT_MSG(expr, msg) {if ((expr)==FALSE) { \
	38	dataerrln("Test Failure at file %s, line %d, %s: \"%s\" is false.", __FILE__, __LINE__, msg, #expr);};}
	39
	40	#define TEST_ASSERT_EQ(a, b) { if ((a) != (b)) { \
	41	errln("Test Failure at file %s, line %d: \"%s\" (%d) != \"%s\" (%d)", \
	42	__FILE__, __LINE__, #a, (a), #b, (b)); }}
	43
	44	#define TEST_ASSERT_NE(a, b) { if ((a) == (b)) { \
	45	errln("Test Failure at file %s, line %d: \"%s\" (%d) == \"%s\" (%d)", \
	46	__FILE__, __LINE__, #a, (a), #b, (b)); }}
	47
	48	/*
	49	* TEST_SETUP and TEST_TEARDOWN
	50	* macros to handle the boilerplate around setting up test case.
	51	* Put arbitrary test code between SETUP and TEARDOWN.
	52	* "sc" is the ready-to-go SpoofChecker for use in the tests.
	53	*/
	54	#define TEST_SETUP { \
	55	UErrorCode status = U_ZERO_ERROR; \
	56	USpoofChecker *sc; \
	57	sc = uspoof_open(&status); \
	58	TEST_ASSERT_SUCCESS(status); \
	59	if (U_SUCCESS(status)){
	60
	61	#define TEST_TEARDOWN \
	62	} \
	63	TEST_ASSERT_SUCCESS(status); \
	64	uspoof_close(sc); \
	65	}
	66
	67
	68
	69
	70	void IntlTestSpoof::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /par/ )
	71	{
	72	if (exec) logln("TestSuite spoof: ");
	73	switch (index) {
	74	case 0:
	75	name = "TestSpoofAPI";
	76	if (exec) {
	77	testSpoofAPI();
	78	}
	79	break;
	80	case 1:
	81	name = "TestSkeleton";
	82	if (exec) {
	83	testSkeleton();
	84	}
	85	break;
	86	case 2:
	87	name = "TestAreConfusable";
	88	if (exec) {
	89	testAreConfusable();
	90	}
	91	break;
	92	case 3:
	93	name = "TestInvisible";
	94	if (exec) {
	95	testInvisible();
	96	}
	97	break;
	98	case 4:
	99	name = "testConfData";
	100	if (exec) {
	101	testConfData();
	102	}
	103	break;
	104	case 5:
	105	name = "testBug8654";
	106	if (exec) {
	107	testBug8654();
	108	}
	109	break;
	110	case 6:
	111	name = "testIdentifierInfo";
	112	if (exec) {
	113	testIdentifierInfo();
	114	}
	115	break;
	116	case 7:
	117	name = "testScriptSet";
	118	if (exec) {
	119	testScriptSet();
	120	}
	121	break;
	122	case 8:
	123	name = "testRestrictionLevel";
	124	if (exec) {
	125	testRestrictionLevel();
	126	}
	127	break;
	128	case 9:
	129	name = "testMixedNumbers";
	130	if (exec) {
	131	testMixedNumbers();
	132	}
	133	break;
	134
	135
	136	default: name=""; break;
	137	}
	138	}
	139
	140	void IntlTestSpoof::testSpoofAPI() {
	141
	142	TEST_SETUP
	143	UnicodeString s("xyz"); // Many latin ranges are whole-script confusable with other scripts.
	144	// If this test starts failing, consult confusablesWholeScript.txt
	145	int32_t position = 666;
	146	int32_t checkResults = uspoof_checkUnicodeString(sc, s, &position, &status);
	147	TEST_ASSERT_SUCCESS(status);
	148	TEST_ASSERT_EQ(0, checkResults);
	149	TEST_ASSERT_EQ(0, position);
	150	TEST_TEARDOWN;
	151
	152	TEST_SETUP
	153	UnicodeString s1("cxs");
	154	UnicodeString s2 = UnicodeString("\\u0441\\u0445\\u0455").unescape(); // Cyrillic "cxs"
	155	int32_t checkResults = uspoof_areConfusableUnicodeString(sc, s1, s2, &status);
	156	TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE \| USPOOF_WHOLE_SCRIPT_CONFUSABLE, checkResults);
	157
	158	TEST_TEARDOWN;
	159
	160	TEST_SETUP
	161	UnicodeString s("I1l0O");
	162	UnicodeString dest;
	163	UnicodeString &retStr = uspoof_getSkeletonUnicodeString(sc, USPOOF_ANY_CASE, s, dest, &status);
	164	TEST_ASSERT_SUCCESS(status);
	165	TEST_ASSERT(UnicodeString("lllOO") == dest);
	166	TEST_ASSERT(&dest == &retStr);
	167	TEST_TEARDOWN;
	168	}
	169
	170
	171	#define CHECK_SKELETON(type, input, expected) { \
	172	checkSkeleton(sc, type, input, expected, __LINE__); \
	173	}
	174
	175
	176	// testSkeleton. Spot check a number of confusable skeleton substitutions from the
	177	// Unicode data file confusables.txt
	178	// Test cases chosen for substitutions of various lengths, and
	179	// membership in different mapping tables.
	180	// Note: for ICU 55, all tables collapsed to the MA table data.
	181	// TODO: for ICU 56 with Unicode 8, revisit this test.
	182	//
	183	void IntlTestSpoof::testSkeleton() {
	184	const uint32_t ML = 0;
	185	const uint32_t SL = USPOOF_SINGLE_SCRIPT_CONFUSABLE;
	186	const uint32_t MA = USPOOF_ANY_CASE;
	187	const uint32_t SA = USPOOF_SINGLE_SCRIPT_CONFUSABLE \| USPOOF_ANY_CASE;
	188
	189	TEST_SETUP
	190	CHECK_SKELETON(SL, "nochange", "nochange");
	191	CHECK_SKELETON(SA, "nochange", "nochange");
	192	CHECK_SKELETON(ML, "nochange", "nochange");
	193	CHECK_SKELETON(MA, "nochange", "nochange");
	194	CHECK_SKELETON(MA, "love", "love");
	195	CHECK_SKELETON(MA, "1ove", "love"); // Digit 1 to letter l
	196	CHECK_SKELETON(ML, "OOPS", "OOPS");
	197	CHECK_SKELETON(ML, "00PS", "OOPS");
	198	CHECK_SKELETON(MA, "OOPS", "OOPS");
	199	CHECK_SKELETON(MA, "00PS", "OOPS"); // Digit 0 to letter O in any case mode only
	200	CHECK_SKELETON(SL, "\\u059c", "\\u0301");
	201	CHECK_SKELETON(SL, "\\u2A74", "\\u003A\\u003A\\u003D");
	202	CHECK_SKELETON(SL, "\\u247E", "\\u0028\\u006C\\u006C\\u0029"); // "(ll)"
	203	CHECK_SKELETON(SL, "\\uFDFB", "\\u062C\\u0644\\u0020\\u062C\\u0644\\u006c\\u0644\\u006f");
	204
	205	// This mapping exists in the ML and MA tables, does not exist in SL, SA
	206	// 0C83 ; 0983 ; ML
	207	// 0C83 ; 0983 ; MA
	208	//
	209
	210	CHECK_SKELETON(SL, "\\u0C83", "\\u0983");
	211	CHECK_SKELETON(SA, "\\u0C83", "\\u0983");
	212	CHECK_SKELETON(ML, "\\u0C83", "\\u0983");
	213	CHECK_SKELETON(MA, "\\u0C83", "\\u0983");
	214
	215	// 0391 mappings exist only in MA and SA tables.
	216	CHECK_SKELETON(MA, "\\u0391", "A");
	217	CHECK_SKELETON(SA, "\\u0391", "A");
	218	CHECK_SKELETON(ML, "\\u0391", "A");
	219	CHECK_SKELETON(SL, "\\u0391", "A");
	220
	221	// 13CF Mappings in all four tables, different in MA.
	222	CHECK_SKELETON(ML, "\\u13CF", "b");
	223	CHECK_SKELETON(MA, "\\u13CF", "b");
	224	CHECK_SKELETON(SL, "\\u13CF", "b");
	225	CHECK_SKELETON(SA, "\\u13CF", "b");
	226
	227	// 0022 ; 0027 0027 ;
	228	// all tables.
	229	CHECK_SKELETON(SL, "\\u0022", "\\u0027\\u0027");
	230	CHECK_SKELETON(SA, "\\u0022", "\\u0027\\u0027");
	231	CHECK_SKELETON(ML, "\\u0022", "\\u0027\\u0027");
	232	CHECK_SKELETON(MA, "\\u0022", "\\u0027\\u0027");
	233
	234	// 017F mappings exist only in MA and SA tables.
	235	CHECK_SKELETON(MA, "\\u017F", "f");
	236	CHECK_SKELETON(SA, "\\u017F", "f");
	237	CHECK_SKELETON(ML, "\\u017F", "f");
	238	CHECK_SKELETON(SL, "\\u017F", "f");
	239
	240	TEST_TEARDOWN;
	241	}
	242
	243
	244	//
	245	// Run a single confusable skeleton transformation test case.
	246	//
	247	void IntlTestSpoof::checkSkeleton(const USpoofChecker *sc, uint32_t type,
	248	const char input, const char expected, int32_t lineNum) {
	249	UnicodeString uInput = UnicodeString(input).unescape();
	250	UnicodeString uExpected = UnicodeString(expected).unescape();
	251
	252	UErrorCode status = U_ZERO_ERROR;
	253	UnicodeString actual;
	254	uspoof_getSkeletonUnicodeString(sc, type, uInput, actual, &status);
	255	if (U_FAILURE(status)) {
	256	errln("File %s, Line %d, Test case from line %d, status is %s", __FILE__, __LINE__, lineNum,
	257	u_errorName(status));
	258	return;
	259	}
	260	if (uExpected != actual) {
	261	errln("File %s, Line %d, Test case from line %d, Actual and Expected skeletons differ.",
	262	__FILE__, __LINE__, lineNum);
	263	errln(UnicodeString(" Actual Skeleton: \"") + actual + UnicodeString("\"\n") +
	264	UnicodeString(" Expected Skeleton: \"") + uExpected + UnicodeString("\""));
	265	}
	266	}
	267
	268	void IntlTestSpoof::testAreConfusable() {
	269	TEST_SETUP
	270	UnicodeString s1("A long string that will overflow stack buffers. A long string that will overflow stack buffers. "
	271	"A long string that will overflow stack buffers. A long string that will overflow stack buffers. ");
	272	UnicodeString s2("A long string that wi11 overflow stack buffers. A long string that will overflow stack buffers. "
	273	"A long string that wi11 overflow stack buffers. A long string that will overflow stack buffers. ");
	274	TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, uspoof_areConfusableUnicodeString(sc, s1, s2, &status));
	275	TEST_ASSERT_SUCCESS(status);
	276
	277	TEST_TEARDOWN;
	278	}
	279
	280	void IntlTestSpoof::testInvisible() {
	281	TEST_SETUP
	282	UnicodeString s = UnicodeString("abcd\\u0301ef").unescape();
	283	int32_t position = -42;
	284	TEST_ASSERT_EQ(0, uspoof_checkUnicodeString(sc, s, &position, &status));
	285	TEST_ASSERT_SUCCESS(status);
	286	TEST_ASSERT(0 == position);
	287
	288	UnicodeString s2 = UnicodeString("abcd\\u0301\\u0302\\u0301ef").unescape();
	289	TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s2, &position, &status));
	290	TEST_ASSERT_SUCCESS(status);
	291	TEST_ASSERT_EQ(0, position);
	292
	293	// Two acute accents, one from the composed a with acute accent, \u00e1,
	294	// and one separate.
	295	position = -42;
	296	UnicodeString s3 = UnicodeString("abcd\\u00e1\\u0301xyz").unescape();
	297	TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s3, &position, &status));
	298	TEST_ASSERT_SUCCESS(status);
	299	TEST_ASSERT_EQ(0, position);
	300	TEST_TEARDOWN;
	301	}
	302
	303	void IntlTestSpoof::testBug8654() {
	304	TEST_SETUP
	305	UnicodeString s = UnicodeString("B\\u00c1\\u0301").unescape();
	306	int32_t position = -42;
	307	TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s, &position, &status) & USPOOF_INVISIBLE );
	308	TEST_ASSERT_SUCCESS(status);
	309	TEST_ASSERT_EQ(0, position);
	310	TEST_TEARDOWN;
	311	}
	312
	313	static UnicodeString parseHex(const UnicodeString &in) {
	314	// Convert a series of hex numbers in a Unicode String to a string with the
	315	// corresponding characters.
	316	// The conversion is _really_ annoying. There must be some function to just do it.
	317	UnicodeString result;
	318	UChar32 cc = 0;
	319	for (int32_t i=0; i<in.length(); i++) {
	320	UChar c = in.charAt(i);
	321	if (c == 0x20) { // Space
	322	if (cc > 0) {
	323	result.append(cc);
	324	cc = 0;
	325	}
	326	} else if (c>=0x30 && c<=0x39) {
	327	cc = (cc<<4) + (c - 0x30);
	328	} else if ((c>=0x41 && c<=0x46) \|\| (c>=0x61 && c<=0x66)) {
	329	cc = (cc<<4) + (c & 0x0f)+9;
	330	}
	331	// else do something with bad input.
	332	}
	333	if (cc > 0) {
	334	result.append(cc);
	335	}
	336	return result;
	337	}
	338
	339
	340	//
	341	// Append the hex form of a UChar32 to a UnicodeString.
	342	// Used in formatting error messages.
	343	// Match the formatting of numbers in confusables.txt
	344	// Minimum of 4 digits, no leading zeroes for positions 5 and up.
	345	//
	346	static void appendHexUChar(UnicodeString &dest, UChar32 c) {
	347	UBool doZeroes = FALSE;
	348	for (int bitNum=28; bitNum>=0; bitNum-=4) {
	349	if (bitNum <= 12) {
	350	doZeroes = TRUE;
	351	}
	352	int hexDigit = (c>>bitNum) & 0x0f;
	353	if (hexDigit != 0 \|\| doZeroes) {
	354	doZeroes = TRUE;
	355	dest.append((UChar)(hexDigit<=9? hexDigit + 0x30: hexDigit -10 + 0x41));
	356	}
	357	}
	358	dest.append((UChar)0x20);
	359	}
	360
	361	U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
	362
	363	// testConfData - Check each data item from the Unicode confusables.txt file,
	364	// verify that it transforms correctly in a skeleton.
	365	//
	366	void IntlTestSpoof::testConfData() {
	367	char buffer[2000];
	368	if (getUnidataPath(buffer) == NULL) {
	369	errln("Skipping test spoof/testConfData. Unable to find path to source/data/unidata/.");
	370	return;
	371	}
	372	uprv_strcat(buffer, "confusables.txt");
	373
	374	LocalStdioFilePointer f(fopen(buffer, "rb"));
	375	if (f.isNull()) {
	376	errln("Skipping test spoof/testConfData. File confusables.txt not accessible.");
	377	return;
	378	}
	379	fseek(f.getAlias(), 0, SEEK_END);
	380	int32_t fileSize = ftell(f.getAlias());
	381	LocalArray<char> fileBuf(new char[fileSize]);
	382	fseek(f.getAlias(), 0, SEEK_SET);
	383	int32_t amt_read = fread(fileBuf.getAlias(), 1, fileSize, f.getAlias());
	384	TEST_ASSERT_EQ(amt_read, fileSize);
	385	TEST_ASSERT(fileSize>0);
	386	if (amt_read != fileSize \|\| fileSize <=0) {
	387	return;
	388	}
	389	UnicodeString confusablesTxt = UnicodeString::fromUTF8(StringPiece(fileBuf.getAlias(), fileSize));
	390
	391	UErrorCode status = U_ZERO_ERROR;
	392	LocalUSpoofCheckerPointer sc(uspoof_open(&status));
	393	TEST_ASSERT_SUCCESS(status);
	394
	395	// Parse lines from the confusables.txt file. Example Line:
	396	// FF44 ; 0064 ; SL # ( d -> d ) FULLWIDTH ....
	397	// Three fields. The hex fields can contain more than one character,
	398	// and each character may be more than 4 digits (for supplemntals)
	399	// This regular expression matches lines and splits the fields into capture groups.
	400	RegexMatcher parseLine("(?m)^([0-9A-F]{4}[^#;]?);([^#;]?);([^#]*)", confusablesTxt, 0, status);
	401	TEST_ASSERT_SUCCESS(status);
	402	while (parseLine.find()) {
	403	UnicodeString from = parseHex(parseLine.group(1, status));
	404	if (!Normalizer::isNormalized(from, UNORM_NFD, status)) {
	405	// The source character was not NFD.
	406	// Skip this case; the first step in obtaining a skeleton is to NFD the input,
	407	// so the mapping in this line of confusables.txt will never be applied.
	408	continue;
	409	}
	410
	411	UnicodeString rawExpected = parseHex(parseLine.group(2, status));
	412	UnicodeString expected;
	413	Normalizer::decompose(rawExpected, FALSE /NFD/, 0, expected, status);
	414	TEST_ASSERT_SUCCESS(status);
	415
	416	int32_t skeletonType = 0;
	417	UnicodeString tableType = parseLine.group(3, status);
	418	TEST_ASSERT_SUCCESS(status);
	419	if (tableType.indexOf("SL") >= 0) {
	420	skeletonType = USPOOF_SINGLE_SCRIPT_CONFUSABLE;
	421	} else if (tableType.indexOf("SA") >= 0) {
	422	skeletonType = USPOOF_SINGLE_SCRIPT_CONFUSABLE \| USPOOF_ANY_CASE;
	423	} else if (tableType.indexOf("ML") >= 0) {
	424	skeletonType = 0;
	425	} else if (tableType.indexOf("MA") >= 0) {
	426	skeletonType = USPOOF_ANY_CASE;
	427	}
	428
	429	UnicodeString actual;
	430	uspoof_getSkeletonUnicodeString(sc.getAlias(), skeletonType, from, actual, &status);
	431	TEST_ASSERT_SUCCESS(status);
	432	TEST_ASSERT(actual == expected);
	433	if (actual != expected) {
	434	errln(parseLine.group(0, status));
	435	UnicodeString line = "Actual: ";
	436	int i = 0;
	437	while (i < actual.length()) {
	438	appendHexUChar(line, actual.char32At(i));
	439	i = actual.moveIndex32(i, 1);
	440	}
	441	errln(line);
	442	}
	443	if (U_FAILURE(status)) {
	444	break;
	445	}
	446	}
	447	}
	448
	449	// testIdentifierInfo. Note that IdentifierInfo is not public ICU API at this time
	450	void IntlTestSpoof::testIdentifierInfo() {
	451	UErrorCode status = U_ZERO_ERROR;
	452	ScriptSet bitset12; bitset12.set(USCRIPT_LATIN, status).set(USCRIPT_HANGUL, status);
	453	ScriptSet bitset2; bitset2.set(USCRIPT_HANGUL, status);
	454	TEST_ASSERT(bitset12.contains(bitset2));
	455	TEST_ASSERT(bitset12.contains(bitset12));
	456	TEST_ASSERT(!bitset2.contains(bitset12));
	457
	458	ScriptSet arabSet; arabSet.set(USCRIPT_ARABIC, status);
	459	ScriptSet latinSet; latinSet.set(USCRIPT_LATIN, status);
	460	UElement arabEl; arabEl.pointer = &arabSet;
	461	UElement latinEl; latinEl.pointer = &latinSet;
	462	TEST_ASSERT(uhash_compareScriptSet(arabEl, latinEl) < 0);
	463	TEST_ASSERT(uhash_compareScriptSet(latinEl, arabEl) > 0);
	464
	465	UnicodeString scriptString;
	466	bitset12.displayScripts(scriptString);
	467	TEST_ASSERT(UNICODE_STRING_SIMPLE("Hang Latn") == scriptString);
	468
	469	status = U_ZERO_ERROR;
	470	UHashtable *alternates = uhash_open(uhash_hashScriptSet ,uhash_compareScriptSet, NULL, &status);
	471	uhash_puti(alternates, &bitset12, 1, &status);
	472	uhash_puti(alternates, &bitset2, 1, &status);
	473	UnicodeString alternatesString;
	474	IdentifierInfo::displayAlternates(alternatesString, alternates, status);
	475	TEST_ASSERT(UNICODE_STRING_SIMPLE("Hang; Hang Latn") == alternatesString);
	476	TEST_ASSERT_SUCCESS(status);
	477
	478	status = U_ZERO_ERROR;
	479	ScriptSet tScriptSet;
	480	tScriptSet.parseScripts(scriptString, status);
	481	TEST_ASSERT_SUCCESS(status);
	482	TEST_ASSERT(bitset12 == tScriptSet);
	483	UnicodeString ss;
	484	ss.remove();
	485	uhash_close(alternates);
	486
	487	struct Test {
	488	const char *fTestString;
	489	URestrictionLevel fRestrictionLevel;
	490	const char *fNumerics;
	491	const char *fScripts;
	492	const char *fAlternates;
	493	const char *fCommonAlternates;
	494	} tests[] = {
	495	{"\\u0061\\u2665", USPOOF_UNRESTRICTIVE, "[]", "Latn", "", ""},
	496	{"\\u0061\\u3006", USPOOF_HIGHLY_RESTRICTIVE, "[]", "Latn", "Hani Hira Kana", "Hani Hira Kana"},
	497	{"\\u0061\\u30FC\\u3006", USPOOF_HIGHLY_RESTRICTIVE, "[]", "Latn", "Hira Kana", "Hira Kana"},
	498	{"\\u0061\\u30FC\\u3006\\u30A2", USPOOF_HIGHLY_RESTRICTIVE, "[]", "Latn Kana", "", ""},
	499	{"\\u30A2\\u0061\\u30FC\\u3006", USPOOF_HIGHLY_RESTRICTIVE, "[]", "Latn Kana", "", ""},
	500	{"\\u0061\\u0031\\u0661", USPOOF_UNRESTRICTIVE, "[\\u0030\\u0660]", "Latn", "Arab Thaa", "Arab Thaa"},
	501	{"\\u0061\\u0031\\u0661\\u06F1", USPOOF_UNRESTRICTIVE, "[\\u0030\\u0660\\u06F0]", "Latn Arab", "", ""},
	502	{"\\u0661\\u30FC\\u3006\\u0061\\u30A2\\u0031\\u0967\\u06F1", USPOOF_UNRESTRICTIVE,
	503	"[\\u0030\\u0660\\u06F0\\u0966]", "Latn Kana Arab", "Deva Kthi Mahj", "Deva Kthi Mahj"},
	504	{"\\u0061\\u30A2\\u30FC\\u3006\\u0031\\u0967\\u0661\\u06F1", USPOOF_UNRESTRICTIVE,
	505	"[\\u0030\\u0660\\u06F0\\u0966]", "Latn Kana Arab", "Deva Kthi Mahj", "Deva Kthi Mahj"}
	506	};
	507
	508	int testNum;
	509	for (testNum = 0; testNum < UPRV_LENGTHOF(tests); testNum++) {
	510	char testNumStr[40];
	511	sprintf(testNumStr, "testNum = %d", testNum);
	512	Test &test = tests[testNum];
	513	status = U_ZERO_ERROR;
	514	UnicodeString testString(test.fTestString); // Note: may do charset conversion.
	515	testString = testString.unescape();
	516	IdentifierInfo idInfo(status);
	517	TEST_ASSERT_SUCCESS(status);
	518	idInfo.setIdentifierProfile(*uspoof_getRecommendedUnicodeSet(&status));
	519	idInfo.setIdentifier(testString, status);
	520	TEST_ASSERT_MSG(*idInfo.getIdentifier() == testString, testNumStr);
	521
	522	URestrictionLevel restrictionLevel = test.fRestrictionLevel;
	523	TEST_ASSERT_MSG(restrictionLevel == idInfo.getRestrictionLevel(status), testNumStr);
	524
	525	status = U_ZERO_ERROR;
	526	UnicodeSet numerics(UnicodeString(test.fNumerics).unescape(), status);
	527	TEST_ASSERT_SUCCESS(status);
	528	TEST_ASSERT_MSG(numerics == *idInfo.getNumerics(), testNumStr);
	529
	530	ScriptSet scripts;
	531	scripts.parseScripts(UnicodeString(test.fScripts), status);
	532	TEST_ASSERT_MSG(scripts == *idInfo.getScripts(), testNumStr);
	533
	534	UnicodeString alternatesStr;
	535	IdentifierInfo::displayAlternates(alternatesStr, idInfo.getAlternates(), status);
	536	TEST_ASSERT_MSG(UnicodeString(test.fAlternates) == alternatesStr, testNumStr);
	537
	538	ScriptSet commonAlternates;
	539	commonAlternates.parseScripts(UnicodeString(test.fCommonAlternates), status);
	540	TEST_ASSERT_MSG(commonAlternates == *idInfo.getCommonAmongAlternates(), testNumStr);
	541	}
	542
	543	// Test of getScriptCount()
	544	// Script and or Script Extension for chars used in the tests
	545	// \\u3013 ; Bopo Hang Hani Hira Kana # So GETA MARK
	546	// \\uA838 ; Deva Gujr Guru Kthi Takr # Sc NORTH INDIC RUPEE MARK
	547	// \\u0951 ; Deva Latn # Mn DEVANAGARI STRESS SIGN UDATTA
	548	//
	549	// \\u0370 ; Greek # L GREEK CAPITAL LETTER HETA
	550	// \\u0481 ; Cyrillic # L& CYRILLIC SMALL LETTER KOPPA
	551	// \\u0904 ; Devanagari # Lo DEVANAGARI LETTER SHORT A
	552	// \\u3041 ; Hiragana # Lo HIRAGANA LETTER SMALL A
	553	// 1234 ; Common # ascii digits
	554	// \\u0300 ; Inherited # Mn COMBINING GRAVE ACCENT
	555
	556	struct ScriptTest {
	557	const char *fTestString;
	558	int32_t fScriptCount;
	559	} scriptTests[] = {
	560	{"Hello", 1},
	561	{"Hello\\u0370", 2},
	562	{"1234", 0},
	563	{"Hello1234\\u0300", 1}, // Common and Inherited are ignored.
	564	{"\\u0030", 0},
	565	{"abc\\u0951", 1},
	566	{"abc\\u3013", 2},
	567	{"\\uA838\\u0951", 1}, // Triggers commonAmongAlternates path.
	568	{"\\u3013\\uA838", 2}
	569	};
	570
	571	status = U_ZERO_ERROR;
	572	IdentifierInfo identifierInfo(status);
	573	for (testNum=0; testNum<UPRV_LENGTHOF(scriptTests); testNum++) {
	574	ScriptTest &test = scriptTests[testNum];
	575	char msgBuf[100];
	576	sprintf(msgBuf, "testNum = %d ", testNum);
	577	UnicodeString testString = UnicodeString(test.fTestString).unescape();
	578
	579	status = U_ZERO_ERROR;
	580	identifierInfo.setIdentifier(testString, status);
	581	int32_t scriptCount = identifierInfo.getScriptCount();
	582	TEST_ASSERT_MSG(test.fScriptCount == scriptCount, msgBuf);
	583	}
	584	}
	585
	586	void IntlTestSpoof::testScriptSet() {
	587	ScriptSet s1;
	588	ScriptSet s2;
	589	UErrorCode status = U_ZERO_ERROR;
	590
	591	TEST_ASSERT(s1 == s2);
	592	s1.set(USCRIPT_ARABIC,status);
	593	TEST_ASSERT_SUCCESS(status);
	594	TEST_ASSERT(!(s1 == s2));
	595	TEST_ASSERT(s1.test(USCRIPT_ARABIC, status));
	596	TEST_ASSERT(s1.test(USCRIPT_GREEK, status) == FALSE);
	597
	598	status = U_ZERO_ERROR;
	599	s1.reset(USCRIPT_ARABIC, status);
	600	TEST_ASSERT(s1 == s2);
	601
	602	status = U_ZERO_ERROR;
	603	s1.setAll();
	604	TEST_ASSERT(s1.test(USCRIPT_COMMON, status));
	605	TEST_ASSERT(s1.test(USCRIPT_ETHIOPIC, status));
	606	TEST_ASSERT(s1.test(USCRIPT_CODE_LIMIT, status));
	607	s1.resetAll();
	608	TEST_ASSERT(!s1.test(USCRIPT_COMMON, status));
	609	TEST_ASSERT(!s1.test(USCRIPT_ETHIOPIC, status));
	610	TEST_ASSERT(!s1.test(USCRIPT_CODE_LIMIT, status));
	611
	612	status = U_ZERO_ERROR;
	613	s1.set(USCRIPT_TAKRI, status);
	614	s1.set(USCRIPT_BLISSYMBOLS, status);
	615	s2.setAll();
	616	TEST_ASSERT(s2.contains(s1));
	617	TEST_ASSERT(!s1.contains(s2));
	618	TEST_ASSERT(s2.intersects(s1));
	619	TEST_ASSERT(s1.intersects(s2));
	620	s2.reset(USCRIPT_TAKRI, status);
	621	TEST_ASSERT(!s2.contains(s1));
	622	TEST_ASSERT(!s1.contains(s2));
	623	TEST_ASSERT(s1.intersects(s2));
	624	TEST_ASSERT(s2.intersects(s1));
	625	TEST_ASSERT_SUCCESS(status);
	626
	627	status = U_ZERO_ERROR;
	628	s1.resetAll();
	629	s1.set(USCRIPT_NKO, status);
	630	s1.set(USCRIPT_COMMON, status);
	631	s2 = s1;
	632	TEST_ASSERT(s2 == s1);
	633	TEST_ASSERT_EQ(2, s2.countMembers());
	634	s2.intersect(s1);
	635	TEST_ASSERT(s2 == s1);
	636	s2.setAll();
	637	TEST_ASSERT(!(s2 == s1));
	638	TEST_ASSERT(s2.countMembers() >= USCRIPT_CODE_LIMIT);
	639	s2.intersect(s1);
	640	TEST_ASSERT(s2 == s1);
	641
	642	s2.setAll();
	643	s2.reset(USCRIPT_COMMON, status);
	644	s2.intersect(s1);
	645	TEST_ASSERT(s2.countMembers() == 1);
	646
	647	s1.resetAll();
	648	s1.set(USCRIPT_AFAKA, status);
	649	s1.set(USCRIPT_VAI, status);
	650	s1.set(USCRIPT_INHERITED, status);
	651	int32_t n = -1;
	652	for (int32_t i=0; i<4; i++) {
	653	n = s1.nextSetBit(n+1);
	654	switch (i) {
	655	case 0: TEST_ASSERT_EQ(USCRIPT_INHERITED, n); break;
	656	case 1: TEST_ASSERT_EQ(USCRIPT_VAI, n); break;
	657	case 2: TEST_ASSERT_EQ(USCRIPT_AFAKA, n); break;
	658	case 3: TEST_ASSERT_EQ(-1, (int32_t)n); break;
	659	default: TEST_ASSERT(FALSE);
	660	}
	661	}
	662	TEST_ASSERT_SUCCESS(status);
	663	}
	664
	665
	666	void IntlTestSpoof::testRestrictionLevel() {
	667	struct Test {
	668	const char *fId;
	669	URestrictionLevel fExpectedRestrictionLevel;
	670	} tests[] = {
	671	{"\\u0061\\u03B3\\u2665", USPOOF_UNRESTRICTIVE},
	672	{"a", USPOOF_ASCII},
	673	{"\\u03B3", USPOOF_SINGLE_SCRIPT_RESTRICTIVE},
	674	{"\\u0061\\u30A2\\u30FC", USPOOF_HIGHLY_RESTRICTIVE},
	675	{"\\u0061\\u0904", USPOOF_MODERATELY_RESTRICTIVE},
	676	{"\\u0061\\u03B3", USPOOF_MINIMALLY_RESTRICTIVE}
	677	};
	678	char msgBuffer[100];
	679
	680	URestrictionLevel restrictionLevels[] = { USPOOF_ASCII, USPOOF_SINGLE_SCRIPT_RESTRICTIVE,
	681	USPOOF_HIGHLY_RESTRICTIVE, USPOOF_MODERATELY_RESTRICTIVE, USPOOF_MINIMALLY_RESTRICTIVE,
	682	USPOOF_UNRESTRICTIVE};
	683
	684	UErrorCode status = U_ZERO_ERROR;
	685	IdentifierInfo idInfo(status);
	686	TEST_ASSERT_SUCCESS(status);
	687	idInfo.setIdentifierProfile(*uspoof_getRecommendedUnicodeSet(&status));
	688	TEST_ASSERT_SUCCESS(status);
	689	for (int32_t testNum=0; testNum < UPRV_LENGTHOF(tests); testNum++) {
	690	status = U_ZERO_ERROR;
	691	const Test &test = tests[testNum];
	692	UnicodeString testString = UnicodeString(test.fId).unescape();
	693	URestrictionLevel expectedLevel = test.fExpectedRestrictionLevel;
	694	idInfo.setIdentifier(testString, status);
	695	sprintf(msgBuffer, "testNum = %d ", testNum);
	696	TEST_ASSERT_SUCCESS(status);
	697	TEST_ASSERT_MSG(expectedLevel == idInfo.getRestrictionLevel(status), msgBuffer);
	698	for (int levelIndex=0; levelIndex<UPRV_LENGTHOF(restrictionLevels); levelIndex++) {
	699	status = U_ZERO_ERROR;
	700	URestrictionLevel levelSetInSpoofChecker = restrictionLevels[levelIndex];
	701	USpoofChecker *sc = uspoof_open(&status);
	702	uspoof_setChecks(sc, USPOOF_RESTRICTION_LEVEL, &status);
	703	uspoof_setAllowedChars(sc, uspoof_getRecommendedSet(&status), &status);
	704	uspoof_setRestrictionLevel(sc, levelSetInSpoofChecker);
	705	int32_t actualValue = uspoof_checkUnicodeString(sc, testString, NULL, &status);
	706
	707	// we want to fail if the text is (say) MODERATE and the testLevel is ASCII
	708	int32_t expectedValue = 0;
	709	if (expectedLevel > levelSetInSpoofChecker) {
	710	expectedValue \|= USPOOF_RESTRICTION_LEVEL;
	711	}
	712	if (!uspoof_getRecommendedUnicodeSet(&status)->containsAll(testString)) {
	713	expectedValue \|= USPOOF_CHAR_LIMIT;
	714	}
	715	sprintf(msgBuffer, "testNum = %d, levelIndex = %d, expected = %#x, actual = %#x",
	716	testNum, levelIndex, expectedValue, actualValue);
	717	TEST_ASSERT_MSG(expectedValue == actualValue, msgBuffer);
	718	TEST_ASSERT_SUCCESS(status);
	719
	720	// Run the same check again, with the Spoof Checker configured to return
	721	// the actual restriction level.
	722	uspoof_setChecks(sc, USPOOF_AUX_INFO \| USPOOF_RESTRICTION_LEVEL, &status);
	723	uspoof_setAllowedChars(sc, uspoof_getRecommendedSet(&status), &status);
	724	uspoof_setRestrictionLevel(sc, levelSetInSpoofChecker);
	725	int32_t result = uspoof_checkUnicodeString(sc, testString, NULL, &status);
	726	TEST_ASSERT_SUCCESS(status);
	727	if (U_SUCCESS(status)) {
	728	TEST_ASSERT_EQ(expectedLevel, result & USPOOF_RESTRICTION_LEVEL_MASK);
	729	TEST_ASSERT_EQ(expectedValue, result & USPOOF_ALL_CHECKS);
	730	}
	731	uspoof_close(sc);
	732	}
	733	}
	734	}
	735
	736
	737	void IntlTestSpoof::testMixedNumbers() {
	738	struct Test {
	739	const char *fTestString;
	740	const char *fExpectedSet;
	741	} tests[] = {
	742	{"1", "[0]"},
	743	{"\\u0967", "[\\u0966]"},
	744	{"1\\u0967", "[0\\u0966]"},
	745	{"\\u0661\\u06F1", "[\\u0660\\u06F0]"}
	746	};
	747	UErrorCode status = U_ZERO_ERROR;
	748	IdentifierInfo idInfo(status);
	749	for (int32_t testNum=0; testNum < UPRV_LENGTHOF(tests); testNum++) {
	750	char msgBuf[100];
	751	sprintf(msgBuf, "testNum = %d ", testNum);
	752	Test &test = tests[testNum];
	753
	754	status = U_ZERO_ERROR;
	755	UnicodeString testString = UnicodeString(test.fTestString).unescape();
	756	UnicodeSet expectedSet(UnicodeString(test.fExpectedSet).unescape(), status);
	757	idInfo.setIdentifier(testString, status);
	758	TEST_ASSERT_SUCCESS(status);
	759	TEST_ASSERT_MSG(expectedSet == *idInfo.getNumerics(), msgBuf);
	760
	761	status = U_ZERO_ERROR;
	762	USpoofChecker *sc = uspoof_open(&status);
	763	uspoof_setChecks(sc, USPOOF_MIXED_NUMBERS, &status); // only check this
	764	int32_t result = uspoof_checkUnicodeString(sc, testString, NULL, &status);
	765	UBool mixedNumberFailure = ((result & USPOOF_MIXED_NUMBERS) != 0);
	766	TEST_ASSERT_MSG((expectedSet.size() > 1) == mixedNumberFailure, msgBuf);
	767	uspoof_close(sc);
	768	}
	769	}
	770
	771	#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION && !UCONFIG_NO_FILE_IO */