1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (C) 2011-2015, International Business Machines Corporation
6 * and others. All Rights Reserved.
7 **********************************************************************
10 * IntlTestSpoof tests for USpoofDetector
13 #include "unicode/utypes.h"
15 #if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION && !UCONFIG_NO_FILE_IO
19 #include "unicode/normlzr.h"
20 #include "unicode/regex.h"
21 #include "unicode/unistr.h"
22 #include "unicode/uscript.h"
23 #include "unicode/uspoof.h"
26 #include "scriptset.h"
32 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
33 errcheckln(status, "Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
35 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
36 errln("Test Failure at file %s, line %d: \"%s\" is false.", __FILE__, __LINE__, #expr);};}
38 #define TEST_ASSERT_MSG(expr, msg) {if ((expr)==FALSE) { \
39 dataerrln("Test Failure at file %s, line %d, %s: \"%s\" is false.", __FILE__, __LINE__, msg, #expr);};}
41 #define TEST_ASSERT_EQ(a, b) { if ((a) != (b)) { \
42 errln("Test Failure at file %s, line %d: \"%s\" (%d) != \"%s\" (%d)", \
43 __FILE__, __LINE__, #a, (a), #b, (b)); }}
45 #define TEST_ASSERT_NE(a, b) { if ((a) == (b)) { \
46 errln("Test Failure at file %s, line %d: \"%s\" (%d) == \"%s\" (%d)", \
47 __FILE__, __LINE__, #a, (a), #b, (b)); }}
50 * TEST_SETUP and TEST_TEARDOWN
51 * macros to handle the boilerplate around setting up test case.
52 * Put arbitrary test code between SETUP and TEARDOWN.
53 * "sc" is the ready-to-go SpoofChecker for use in the tests.
55 #define TEST_SETUP { \
56 UErrorCode status = U_ZERO_ERROR; \
58 sc = uspoof_open(&status); \
59 TEST_ASSERT_SUCCESS(status); \
60 USpoofCheckResult *checkResult; \
61 checkResult = uspoof_openCheckResult(&status); \
62 TEST_ASSERT_SUCCESS(status); \
63 if (U_SUCCESS(status)){
65 #define TEST_TEARDOWN \
67 TEST_ASSERT_SUCCESS(status); \
68 uspoof_closeCheckResult(checkResult); \
75 void IntlTestSpoof::runIndexedTest( int32_t index
, UBool exec
, const char* &name
, char* /*par*/ )
78 logln("TestSuite spoof: ");
81 TESTCASE_AUTO(testSpoofAPI
);
82 TESTCASE_AUTO(testSkeleton
);
83 TESTCASE_AUTO(testAreConfusable
);
84 TESTCASE_AUTO(testInvisible
);
85 TESTCASE_AUTO(testConfData
);
86 TESTCASE_AUTO(testBug8654
);
87 TESTCASE_AUTO(testScriptSet
);
88 TESTCASE_AUTO(testRestrictionLevel
);
89 TESTCASE_AUTO(testMixedNumbers
);
90 TESTCASE_AUTO(testBug12153
);
91 TESTCASE_AUTO(testBug12825
);
92 TESTCASE_AUTO(testBug12815
);
93 TESTCASE_AUTO(testBug13314_MixedNumbers
);
94 TESTCASE_AUTO(testBug13328_MixedCombiningMarks
);
95 TESTCASE_AUTO(testCombiningDot
);
99 void IntlTestSpoof::testSpoofAPI() {
102 UnicodeString
s("xyz"); // Many latin ranges are whole-script confusable with other scripts.
103 // If this test starts failing, consult confusablesWholeScript.txt
104 int32_t position
= 666;
105 int32_t checkResults
= uspoof_checkUnicodeString(sc
, s
, &position
, &status
);
106 TEST_ASSERT_SUCCESS(status
);
107 TEST_ASSERT_EQ(0, checkResults
);
108 TEST_ASSERT_EQ(0, position
);
112 UnicodeString
s1("cxs");
113 UnicodeString s2
= UnicodeString("\\u0441\\u0445\\u0455").unescape(); // Cyrillic "cxs"
114 int32_t checkResults
= uspoof_areConfusableUnicodeString(sc
, s1
, s2
, &status
);
115 TEST_ASSERT_SUCCESS(status
);
116 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE
| USPOOF_WHOLE_SCRIPT_CONFUSABLE
, checkResults
);
121 UnicodeString
s("I1l0O");
123 UnicodeString
&retStr
= uspoof_getSkeletonUnicodeString(sc
, USPOOF_ANY_CASE
, s
, dest
, &status
);
124 TEST_ASSERT_SUCCESS(status
);
125 TEST_ASSERT(UnicodeString("lllOO") == dest
);
126 TEST_ASSERT(&dest
== &retStr
);
131 #define CHECK_SKELETON(type, input, expected) { \
132 checkSkeleton(sc, type, input, expected, __LINE__); \
136 // testSkeleton. Spot check a number of confusable skeleton substitutions from the
137 // Unicode data file confusables.txt
138 // Test cases chosen for substitutions of various lengths, and
139 // membership in different mapping tables.
140 // Note: for ICU 55, all tables collapsed to the MA table data.
141 // TODO: for ICU 56 with Unicode 8, revisit this test.
143 void IntlTestSpoof::testSkeleton() {
144 const uint32_t ML
= 0;
145 const uint32_t SL
= USPOOF_SINGLE_SCRIPT_CONFUSABLE
;
146 const uint32_t MA
= USPOOF_ANY_CASE
;
147 const uint32_t SA
= USPOOF_SINGLE_SCRIPT_CONFUSABLE
| USPOOF_ANY_CASE
;
150 CHECK_SKELETON(SL
, "nochange", "nochange");
151 CHECK_SKELETON(SA
, "nochange", "nochange");
152 CHECK_SKELETON(ML
, "nochange", "nochange");
153 CHECK_SKELETON(MA
, "nochange", "nochange");
154 CHECK_SKELETON(MA
, "love", "love");
155 CHECK_SKELETON(MA
, "1ove", "love"); // Digit 1 to letter l
156 CHECK_SKELETON(ML
, "OOPS", "OOPS");
157 CHECK_SKELETON(ML
, "00PS", "OOPS");
158 CHECK_SKELETON(MA
, "OOPS", "OOPS");
159 CHECK_SKELETON(MA
, "00PS", "OOPS"); // Digit 0 to letter O in any case mode only
160 CHECK_SKELETON(SL
, "\\u059c", "\\u0301");
161 CHECK_SKELETON(SL
, "\\u2A74", "\\u003A\\u003A\\u003D");
162 CHECK_SKELETON(SL
, "\\u247E", "\\u0028\\u006C\\u006C\\u0029"); // "(ll)"
163 CHECK_SKELETON(SL
, "\\uFDFB", "\\u062C\\u0644\\u0020\\u062C\\u0644\\u006c\\u0644\\u006f");
165 // This mapping exists in the ML and MA tables, does not exist in SL, SA
170 CHECK_SKELETON(SL
, "\\u0C83", "\\u0983");
171 CHECK_SKELETON(SA
, "\\u0C83", "\\u0983");
172 CHECK_SKELETON(ML
, "\\u0C83", "\\u0983");
173 CHECK_SKELETON(MA
, "\\u0C83", "\\u0983");
175 // 0391 mappings exist only in MA and SA tables.
176 CHECK_SKELETON(MA
, "\\u0391", "A");
177 CHECK_SKELETON(SA
, "\\u0391", "A");
178 CHECK_SKELETON(ML
, "\\u0391", "A");
179 CHECK_SKELETON(SL
, "\\u0391", "A");
181 // 13CF Mappings in all four tables, different in MA.
182 CHECK_SKELETON(ML
, "\\u13CF", "b");
183 CHECK_SKELETON(MA
, "\\u13CF", "b");
184 CHECK_SKELETON(SL
, "\\u13CF", "b");
185 CHECK_SKELETON(SA
, "\\u13CF", "b");
187 // 0022 ; 0027 0027 ;
189 CHECK_SKELETON(SL
, "\\u0022", "\\u0027\\u0027");
190 CHECK_SKELETON(SA
, "\\u0022", "\\u0027\\u0027");
191 CHECK_SKELETON(ML
, "\\u0022", "\\u0027\\u0027");
192 CHECK_SKELETON(MA
, "\\u0022", "\\u0027\\u0027");
194 // 017F mappings exist only in MA and SA tables.
195 CHECK_SKELETON(MA
, "\\u017F", "f");
196 CHECK_SKELETON(SA
, "\\u017F", "f");
197 CHECK_SKELETON(ML
, "\\u017F", "f");
198 CHECK_SKELETON(SL
, "\\u017F", "f");
205 // Run a single confusable skeleton transformation test case.
207 void IntlTestSpoof::checkSkeleton(const USpoofChecker
*sc
, uint32_t type
,
208 const char *input
, const char *expected
, int32_t lineNum
) {
209 UnicodeString uInput
= UnicodeString(input
).unescape();
210 UnicodeString uExpected
= UnicodeString(expected
).unescape();
212 UErrorCode status
= U_ZERO_ERROR
;
213 UnicodeString actual
;
214 uspoof_getSkeletonUnicodeString(sc
, type
, uInput
, actual
, &status
);
215 if (U_FAILURE(status
)) {
216 errln("File %s, Line %d, Test case from line %d, status is %s", __FILE__
, __LINE__
, lineNum
,
217 u_errorName(status
));
220 if (uExpected
!= actual
) {
221 errln("File %s, Line %d, Test case from line %d, Actual and Expected skeletons differ.",
222 __FILE__
, __LINE__
, lineNum
);
223 errln(UnicodeString(" Actual Skeleton: \"") + actual
+ UnicodeString("\"\n") +
224 UnicodeString(" Expected Skeleton: \"") + uExpected
+ UnicodeString("\""));
228 void IntlTestSpoof::testAreConfusable() {
230 UnicodeString
s1("A long string that will overflow stack buffers. A long string that will overflow stack buffers. "
231 "A long string that will overflow stack buffers. A long string that will overflow stack buffers. ");
232 UnicodeString
s2("A long string that wi11 overflow stack buffers. A long string that will overflow stack buffers. "
233 "A long string that wi11 overflow stack buffers. A long string that will overflow stack buffers. ");
234 int32_t result
= uspoof_areConfusableUnicodeString(sc
, s1
, s2
, &status
);
235 TEST_ASSERT_SUCCESS(status
);
236 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE
, result
);
241 void IntlTestSpoof::testInvisible() {
243 UnicodeString s
= UnicodeString("abcd\\u0301ef").unescape();
244 int32_t position
= -42;
245 TEST_ASSERT_EQ(0, uspoof_checkUnicodeString(sc
, s
, &position
, &status
));
246 TEST_ASSERT_SUCCESS(status
);
247 TEST_ASSERT(0 == position
);
249 UnicodeString s2
= UnicodeString("abcd\\u0301\\u0302\\u0301ef").unescape();
250 TEST_ASSERT_EQ(USPOOF_INVISIBLE
, uspoof_checkUnicodeString(sc
, s2
, &position
, &status
));
251 TEST_ASSERT_SUCCESS(status
);
252 TEST_ASSERT_EQ(0, position
);
254 // Two acute accents, one from the composed a with acute accent, \u00e1,
257 UnicodeString s3
= UnicodeString("abcd\\u00e1\\u0301xyz").unescape();
258 TEST_ASSERT_EQ(USPOOF_INVISIBLE
, uspoof_checkUnicodeString(sc
, s3
, &position
, &status
));
259 TEST_ASSERT_SUCCESS(status
);
260 TEST_ASSERT_EQ(0, position
);
264 void IntlTestSpoof::testBug8654() {
266 UnicodeString s
= UnicodeString("B\\u00c1\\u0301").unescape();
267 int32_t position
= -42;
268 TEST_ASSERT_EQ(USPOOF_INVISIBLE
, uspoof_checkUnicodeString(sc
, s
, &position
, &status
) & USPOOF_INVISIBLE
);
269 TEST_ASSERT_SUCCESS(status
);
270 TEST_ASSERT_EQ(0, position
);
274 static UnicodeString
parseHex(const UnicodeString
&in
) {
275 // Convert a series of hex numbers in a Unicode String to a string with the
276 // corresponding characters.
277 // The conversion is _really_ annoying. There must be some function to just do it.
278 UnicodeString result
;
280 for (int32_t i
=0; i
<in
.length(); i
++) {
281 UChar c
= in
.charAt(i
);
282 if (c
== 0x20) { // Space
287 } else if (c
>=0x30 && c
<=0x39) {
288 cc
= (cc
<<4) + (c
- 0x30);
289 } else if ((c
>=0x41 && c
<=0x46) || (c
>=0x61 && c
<=0x66)) {
290 cc
= (cc
<<4) + (c
& 0x0f)+9;
292 // else do something with bad input.
302 // Append the hex form of a UChar32 to a UnicodeString.
303 // Used in formatting error messages.
304 // Match the formatting of numbers in confusables.txt
305 // Minimum of 4 digits, no leading zeroes for positions 5 and up.
307 static void appendHexUChar(UnicodeString
&dest
, UChar32 c
) {
308 UBool doZeroes
= FALSE
;
309 for (int bitNum
=28; bitNum
>=0; bitNum
-=4) {
313 int hexDigit
= (c
>>bitNum
) & 0x0f;
314 if (hexDigit
!= 0 || doZeroes
) {
316 dest
.append((UChar
)(hexDigit
<=9? hexDigit
+ 0x30: hexDigit
-10 + 0x41));
319 dest
.append((UChar
)0x20);
322 U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer
, FILE, fclose
);
324 // testConfData - Check each data item from the Unicode confusables.txt file,
325 // verify that it transforms correctly in a skeleton.
327 void IntlTestSpoof::testConfData() {
329 if (getUnidataPath(buffer
) == NULL
) {
330 errln("Skipping test spoof/testConfData. Unable to find path to source/data/unidata/.");
333 uprv_strcat(buffer
, "confusables.txt");
335 LocalStdioFilePointer
f(fopen(buffer
, "rb"));
337 errln("Skipping test spoof/testConfData. File confusables.txt not accessible.");
340 fseek(f
.getAlias(), 0, SEEK_END
);
341 int32_t fileSize
= ftell(f
.getAlias());
342 LocalArray
<char> fileBuf(new char[fileSize
]);
343 fseek(f
.getAlias(), 0, SEEK_SET
);
344 int32_t amt_read
= static_cast<int32_t>(fread(fileBuf
.getAlias(), 1, fileSize
, f
.getAlias()));
345 TEST_ASSERT_EQ(amt_read
, fileSize
);
346 TEST_ASSERT(fileSize
>0);
347 if (amt_read
!= fileSize
|| fileSize
<=0) {
350 UnicodeString confusablesTxt
= UnicodeString::fromUTF8(StringPiece(fileBuf
.getAlias(), fileSize
));
352 UErrorCode status
= U_ZERO_ERROR
;
353 LocalUSpoofCheckerPointer
sc(uspoof_open(&status
));
354 TEST_ASSERT_SUCCESS(status
);
356 // Parse lines from the confusables.txt file. Example Line:
357 // FF44 ; 0064 ; SL # ( d -> d ) FULLWIDTH ....
358 // Three fields. The hex fields can contain more than one character,
359 // and each character may be more than 4 digits (for supplemntals)
360 // This regular expression matches lines and splits the fields into capture groups.
361 RegexMatcher
parseLine("(?m)^([0-9A-F]{4}[^#;]*?);([^#;]*?);([^#]*)", confusablesTxt
, 0, status
);
362 TEST_ASSERT_SUCCESS(status
);
363 while (parseLine
.find()) {
364 UnicodeString from
= parseHex(parseLine
.group(1, status
));
365 if (!Normalizer::isNormalized(from
, UNORM_NFD
, status
)) {
366 // The source character was not NFD.
367 // Skip this case; the first step in obtaining a skeleton is to NFD the input,
368 // so the mapping in this line of confusables.txt will never be applied.
372 UnicodeString rawExpected
= parseHex(parseLine
.group(2, status
));
373 UnicodeString expected
;
374 Normalizer::decompose(rawExpected
, FALSE
/*NFD*/, 0, expected
, status
);
375 TEST_ASSERT_SUCCESS(status
);
377 int32_t skeletonType
= 0;
378 UnicodeString tableType
= parseLine
.group(3, status
);
379 TEST_ASSERT_SUCCESS(status
);
380 if (tableType
.indexOf("SL") >= 0) {
381 skeletonType
= USPOOF_SINGLE_SCRIPT_CONFUSABLE
;
382 } else if (tableType
.indexOf("SA") >= 0) {
383 skeletonType
= USPOOF_SINGLE_SCRIPT_CONFUSABLE
| USPOOF_ANY_CASE
;
384 } else if (tableType
.indexOf("ML") >= 0) {
386 } else if (tableType
.indexOf("MA") >= 0) {
387 skeletonType
= USPOOF_ANY_CASE
;
390 UnicodeString actual
;
391 uspoof_getSkeletonUnicodeString(sc
.getAlias(), skeletonType
, from
, actual
, &status
);
392 TEST_ASSERT_SUCCESS(status
);
393 TEST_ASSERT(actual
== expected
);
394 if (actual
!= expected
) {
395 errln(parseLine
.group(0, status
));
396 UnicodeString line
= "Actual: ";
398 while (i
< actual
.length()) {
399 appendHexUChar(line
, actual
.char32At(i
));
400 i
= actual
.moveIndex32(i
, 1);
404 if (U_FAILURE(status
)) {
411 void IntlTestSpoof::testScriptSet() {
414 UErrorCode status
= U_ZERO_ERROR
;
416 TEST_ASSERT(s1
== s2
);
417 s1
.set(USCRIPT_ARABIC
,status
);
418 TEST_ASSERT_SUCCESS(status
);
419 TEST_ASSERT(!(s1
== s2
));
420 TEST_ASSERT(s1
.test(USCRIPT_ARABIC
, status
));
421 TEST_ASSERT(s1
.test(USCRIPT_GREEK
, status
) == FALSE
);
423 status
= U_ZERO_ERROR
;
424 s1
.reset(USCRIPT_ARABIC
, status
);
425 TEST_ASSERT(s1
== s2
);
427 status
= U_ZERO_ERROR
;
429 TEST_ASSERT(s1
.test(USCRIPT_COMMON
, status
));
430 TEST_ASSERT(s1
.test(USCRIPT_ETHIOPIC
, status
));
431 TEST_ASSERT(s1
.test(USCRIPT_CODE_LIMIT
, status
));
433 TEST_ASSERT(!s1
.test(USCRIPT_COMMON
, status
));
434 TEST_ASSERT(!s1
.test(USCRIPT_ETHIOPIC
, status
));
435 TEST_ASSERT(!s1
.test(USCRIPT_CODE_LIMIT
, status
));
437 status
= U_ZERO_ERROR
;
438 s1
.set(USCRIPT_TAKRI
, status
);
439 s1
.set(USCRIPT_BLISSYMBOLS
, status
);
441 TEST_ASSERT(s2
.contains(s1
));
442 TEST_ASSERT(!s1
.contains(s2
));
443 TEST_ASSERT(s2
.intersects(s1
));
444 TEST_ASSERT(s1
.intersects(s2
));
445 s2
.reset(USCRIPT_TAKRI
, status
);
446 TEST_ASSERT(!s2
.contains(s1
));
447 TEST_ASSERT(!s1
.contains(s2
));
448 TEST_ASSERT(s1
.intersects(s2
));
449 TEST_ASSERT(s2
.intersects(s1
));
450 TEST_ASSERT_SUCCESS(status
);
452 status
= U_ZERO_ERROR
;
454 s1
.set(USCRIPT_NKO
, status
);
455 s1
.set(USCRIPT_COMMON
, status
);
457 TEST_ASSERT(s2
== s1
);
458 TEST_ASSERT_EQ(2, s2
.countMembers());
460 TEST_ASSERT(s2
== s1
);
462 TEST_ASSERT(!(s2
== s1
));
463 TEST_ASSERT(s2
.countMembers() >= USCRIPT_CODE_LIMIT
);
465 TEST_ASSERT(s2
== s1
);
468 s2
.reset(USCRIPT_COMMON
, status
);
470 TEST_ASSERT(s2
.countMembers() == 1);
473 TEST_ASSERT(s1
.isEmpty());
474 s1
.set(USCRIPT_LATIN
, status
);
475 TEST_ASSERT(!s1
.isEmpty());
477 TEST_ASSERT(!s1
.isEmpty());
478 TEST_ASSERT_SUCCESS(status
);
481 s1
.set(USCRIPT_AFAKA
, status
);
482 s1
.set(USCRIPT_VAI
, status
);
483 s1
.set(USCRIPT_INHERITED
, status
);
485 for (int32_t i
=0; i
<4; i
++) {
486 n
= s1
.nextSetBit(n
+1);
488 case 0: TEST_ASSERT_EQ(USCRIPT_INHERITED
, n
); break;
489 case 1: TEST_ASSERT_EQ(USCRIPT_VAI
, n
); break;
490 case 2: TEST_ASSERT_EQ(USCRIPT_AFAKA
, n
); break;
491 case 3: TEST_ASSERT_EQ(-1, (int32_t)n
); break;
492 default: TEST_ASSERT(FALSE
);
495 TEST_ASSERT_SUCCESS(status
);
497 // Script extensions. Depends on data.
499 s1
.setScriptExtensions(0x67, status
);
500 TEST_ASSERT(s1
.countMembers() == 1);
501 TEST_ASSERT(s1
.test(USCRIPT_LATIN
, status
));
502 TEST_ASSERT_SUCCESS(status
);
505 s1
.setScriptExtensions(0x303C, status
);
506 TEST_ASSERT(s1
.countMembers() == 3);
507 TEST_ASSERT(s1
.test(USCRIPT_HAN
, status
));
508 TEST_ASSERT(s1
.test(USCRIPT_HIRAGANA
, status
));
509 TEST_ASSERT(s1
.test(USCRIPT_KATAKANA
, status
));
510 TEST_ASSERT_SUCCESS(status
);
513 ScriptSet bitset12
; bitset12
.set(USCRIPT_LATIN
, status
).set(USCRIPT_HANGUL
, status
);
514 ScriptSet bitset2
; bitset2
.set(USCRIPT_HANGUL
, status
);
515 TEST_ASSERT(bitset12
.contains(bitset2
));
516 TEST_ASSERT(bitset12
.contains(bitset12
));
517 TEST_ASSERT(!bitset2
.contains(bitset12
));
519 ScriptSet arabSet
; arabSet
.set(USCRIPT_ARABIC
, status
);
520 ScriptSet latinSet
; latinSet
.set(USCRIPT_LATIN
, status
);
521 UElement arabEl
; arabEl
.pointer
= &arabSet
;
522 UElement latinEl
; latinEl
.pointer
= &latinSet
;
523 TEST_ASSERT(uhash_compareScriptSet(arabEl
, latinEl
) < 0);
524 TEST_ASSERT(uhash_compareScriptSet(latinEl
, arabEl
) > 0);
526 UnicodeString scriptString
;
527 bitset12
.displayScripts(scriptString
);
528 TEST_ASSERT(UNICODE_STRING_SIMPLE("Hang Latn") == scriptString
);
532 void IntlTestSpoof::testRestrictionLevel() {
535 URestrictionLevel fExpectedRestrictionLevel
;
537 {"\\u0061\\u03B3\\u2665", USPOOF_UNRESTRICTIVE
},
539 {"\\u03B3", USPOOF_SINGLE_SCRIPT_RESTRICTIVE
},
540 {"\\u0061\\u30A2\\u30FC", USPOOF_HIGHLY_RESTRICTIVE
},
541 {"\\u0061\\u0904", USPOOF_MODERATELY_RESTRICTIVE
},
542 {"\\u0061\\u03B3", USPOOF_MINIMALLY_RESTRICTIVE
},
543 {"\\u0061\\u2665", USPOOF_UNRESTRICTIVE
},
544 {"\\u0061\\u303C", USPOOF_HIGHLY_RESTRICTIVE
},
545 {"\\u0061\\u30FC\\u303C", USPOOF_HIGHLY_RESTRICTIVE
},
546 {"\\u0061\\u30FC\\u303C\\u30A2", USPOOF_HIGHLY_RESTRICTIVE
},
547 {"\\u30A2\\u0061\\u30FC\\u303C", USPOOF_HIGHLY_RESTRICTIVE
},
548 {"\\u0061\\u0031\\u0661", USPOOF_MODERATELY_RESTRICTIVE
},
549 {"\\u0061\\u0031\\u0661\\u06F1", USPOOF_MODERATELY_RESTRICTIVE
},
550 {"\\u0661\\u30FC\\u303C\\u0061\\u30A2\\u0031\\u0967\\u06F1", USPOOF_MINIMALLY_RESTRICTIVE
},
551 {"\\u0061\\u30A2\\u30FC\\u303C\\u0031\\u0967\\u0661\\u06F1", USPOOF_MINIMALLY_RESTRICTIVE
}
554 URestrictionLevel restrictionLevels
[] = { USPOOF_ASCII
, USPOOF_SINGLE_SCRIPT_RESTRICTIVE
,
555 USPOOF_HIGHLY_RESTRICTIVE
, USPOOF_MODERATELY_RESTRICTIVE
, USPOOF_MINIMALLY_RESTRICTIVE
,
556 USPOOF_UNRESTRICTIVE
};
558 UErrorCode status
= U_ZERO_ERROR
;
559 UnicodeSet allowedChars
;
560 // Allowed Identifier Characters. In addition to the Recommended Set,
561 // allow u303c, which has an interesting script extension of Hani Hira Kana.
562 allowedChars
.addAll(*uspoof_getRecommendedUnicodeSet(&status
)).add(0x303C);
564 for (int32_t testNum
=0; testNum
< UPRV_LENGTHOF(tests
); testNum
++) {
565 status
= U_ZERO_ERROR
;
566 const Test
&test
= tests
[testNum
];
567 UnicodeString testString
= UnicodeString(test
.fId
).unescape();
568 URestrictionLevel expectedLevel
= test
.fExpectedRestrictionLevel
;
569 for (int levelIndex
=0; levelIndex
<UPRV_LENGTHOF(restrictionLevels
); levelIndex
++) {
570 status
= U_ZERO_ERROR
;
571 URestrictionLevel levelSetInSpoofChecker
= restrictionLevels
[levelIndex
];
572 USpoofChecker
*sc
= uspoof_open(&status
);
573 uspoof_setAllowedChars(sc
, allowedChars
.toUSet(), &status
);
574 uspoof_setRestrictionLevel(sc
, levelSetInSpoofChecker
);
575 uspoof_setChecks(sc
, USPOOF_RESTRICTION_LEVEL
, &status
);
576 int32_t actualValue
= uspoof_checkUnicodeString(sc
, testString
, NULL
, &status
);
578 // we want to fail if the text is (say) MODERATE and the testLevel is ASCII
579 int32_t expectedValue
= 0;
580 if (expectedLevel
> levelSetInSpoofChecker
) {
581 expectedValue
|= USPOOF_RESTRICTION_LEVEL
;
583 sprintf(msgBuffer
, "testNum = %d, levelIndex = %d, expected = %#x, actual = %#x",
584 testNum
, levelIndex
, expectedValue
, actualValue
);
585 TEST_ASSERT_MSG(expectedValue
== actualValue
, msgBuffer
);
586 TEST_ASSERT_SUCCESS(status
);
588 // Run the same check again, with the Spoof Checker configured to return
589 // the actual restriction level.
590 uspoof_setAllowedChars(sc
, allowedChars
.toUSet(), &status
);
591 uspoof_setRestrictionLevel(sc
, levelSetInSpoofChecker
);
592 uspoof_setChecks(sc
, USPOOF_AUX_INFO
| USPOOF_RESTRICTION_LEVEL
, &status
);
593 int32_t result
= uspoof_checkUnicodeString(sc
, testString
, NULL
, &status
);
594 TEST_ASSERT_SUCCESS(status
);
595 if (U_SUCCESS(status
)) {
596 TEST_ASSERT_EQ(expectedLevel
, result
& USPOOF_RESTRICTION_LEVEL_MASK
);
597 TEST_ASSERT_EQ(expectedValue
, result
& USPOOF_ALL_CHECKS
);
605 void IntlTestSpoof::testMixedNumbers() {
607 const char *fTestString
;
608 const char *fExpectedSet
;
611 {"\\u0967", "[\\u0966]"},
612 {"1\\u0967", "[0\\u0966]"},
613 {"\\u0661\\u06F1", "[\\u0660\\u06F0]"},
614 {"\\u0061\\u2665", "[]"},
615 {"\\u0061\\u303C", "[]"},
616 {"\\u0061\\u30FC\\u303C", "[]"},
617 {"\\u0061\\u30FC\\u303C\\u30A2", "[]"},
618 {"\\u30A2\\u0061\\u30FC\\u303C", "[]"},
619 {"\\u0061\\u0031\\u0661", "[\\u0030\\u0660]"},
620 {"\\u0061\\u0031\\u0661\\u06F1", "[\\u0030\\u0660\\u06F0]"},
621 {"\\u0661\\u30FC\\u303C\\u0061\\u30A2\\u0031\\u0967\\u06F1", "[\\u0030\\u0660\\u06F0\\u0966]"},
622 {"\\u0061\\u30A2\\u30FC\\u303C\\u0031\\u0967\\u0661\\u06F1", "[\\u0030\\u0660\\u06F0\\u0966]"}
624 UErrorCode status
= U_ZERO_ERROR
;
625 for (int32_t testNum
=0; testNum
< UPRV_LENGTHOF(tests
); testNum
++) {
627 sprintf(msgBuf
, "testNum = %d ", testNum
);
628 Test
&test
= tests
[testNum
];
630 status
= U_ZERO_ERROR
;
631 UnicodeString testString
= UnicodeString(test
.fTestString
).unescape();
632 UnicodeSet
expectedSet(UnicodeString(test
.fExpectedSet
).unescape(), status
);
634 status
= U_ZERO_ERROR
;
636 uspoof_setChecks(sc
, USPOOF_MIXED_NUMBERS
, &status
); // only check this
637 uspoof_check2UnicodeString(sc
, testString
, checkResult
, &status
);
638 UBool mixedNumberFailure
= ((uspoof_getCheckResultChecks(checkResult
, &status
) & USPOOF_MIXED_NUMBERS
) != 0);
639 TEST_ASSERT_MSG((expectedSet
.size() > 1) == mixedNumberFailure
, msgBuf
);
640 const UnicodeSet
* actualSet
= UnicodeSet::fromUSet(uspoof_getCheckResultNumerics(checkResult
, &status
));
641 TEST_ASSERT_MSG(expectedSet
== *actualSet
, msgBuf
);
646 // Bug #12153 - uspoof_setRestrictionLevel() should enable restriction level testing.
648 void IntlTestSpoof::testBug12153() {
649 UErrorCode status
= U_ZERO_ERROR
;
650 LocalUSpoofCheckerPointer
sc(uspoof_open(&status
));
651 if (!assertSuccess("", status
, true, __FILE__
, __LINE__
)) { return; }
652 int32_t checks
= uspoof_getChecks(sc
.getAlias(), &status
);
653 TEST_ASSERT((checks
& USPOOF_RESTRICTION_LEVEL
) != 0);
654 checks
&= ~USPOOF_RESTRICTION_LEVEL
;
655 uspoof_setChecks(sc
.getAlias(), checks
, &status
);
656 checks
= uspoof_getChecks(sc
.getAlias(), &status
);
657 TEST_ASSERT((checks
& USPOOF_RESTRICTION_LEVEL
) == 0);
659 uspoof_setRestrictionLevel(sc
.getAlias(), USPOOF_MODERATELY_RESTRICTIVE
);
660 checks
= uspoof_getChecks(sc
.getAlias(), &status
);
661 TEST_ASSERT((checks
& USPOOF_RESTRICTION_LEVEL
) != 0);
662 TEST_ASSERT_SUCCESS(status
);
665 // uspoof_checkUnicodeString should NOT have an infinite loop.
666 void IntlTestSpoof::testBug12825() {
667 UErrorCode status
= U_ZERO_ERROR
;
668 LocalUSpoofCheckerPointer
sc(uspoof_open(&status
));
669 TEST_ASSERT_SUCCESS(status
);
670 uspoof_setChecks(sc
.getAlias(), USPOOF_ALL_CHECKS
| USPOOF_AUX_INFO
, &status
);
671 TEST_ASSERT_SUCCESS(status
);
672 uspoof_checkUnicodeString(sc
.getAlias(), UnicodeString("\\u30FB").unescape(), NULL
, &status
);
673 TEST_ASSERT_SUCCESS(status
);
676 // uspoof_getSkeleton should NOT set an ILLEGAL_ARGUMENT_EXCEPTION.
677 void IntlTestSpoof::testBug12815() {
678 UErrorCode status
= U_ZERO_ERROR
;
679 LocalUSpoofCheckerPointer
sc(uspoof_open(&status
));
680 TEST_ASSERT_SUCCESS(status
);
681 uspoof_setChecks(sc
.getAlias(), USPOOF_RESTRICTION_LEVEL
, &status
);
682 TEST_ASSERT_SUCCESS(status
);
683 UnicodeString result
;
684 uspoof_getSkeletonUnicodeString(sc
.getAlias(), 0, UnicodeString("hello world"), result
, &status
);
685 TEST_ASSERT_SUCCESS(status
);
688 void IntlTestSpoof::testBug13314_MixedNumbers() {
689 UErrorCode status
= U_ZERO_ERROR
;
690 LocalUSpoofCheckerPointer
sc(uspoof_open(&status
));
691 if (!assertSuccess("", status
, true, __FILE__
, __LINE__
)) { return; }
692 uspoof_setChecks(sc
.getAlias(), USPOOF_ALL_CHECKS
, &status
);
693 TEST_ASSERT_SUCCESS(status
);
694 int32_t failedChecks
= uspoof_areConfusableUnicodeString(sc
.getAlias(), u
"列", u
"列", &status
);
695 TEST_ASSERT_SUCCESS(status
);
696 assertEquals("The CJK strings should be confusable", USPOOF_SINGLE_SCRIPT_CONFUSABLE
, failedChecks
);
697 failedChecks
= uspoof_check2UnicodeString(sc
.getAlias(), u
"3Ȝ", nullptr, &status
);
698 TEST_ASSERT_SUCCESS(status
);
699 assertEquals("The '33' string does not fail spoof", 0, failedChecks
);
702 void IntlTestSpoof::testBug13328_MixedCombiningMarks() {
703 UErrorCode status
= U_ZERO_ERROR
;
704 LocalUSpoofCheckerPointer
sc(uspoof_open(&status
));
705 if (!assertSuccess("", status
, true, __FILE__
, __LINE__
)) { return; }
706 int32_t failedChecks
= uspoof_check2UnicodeString(sc
.getAlias(), u
"\u0061\u0F84", nullptr, &status
);
707 TEST_ASSERT_SUCCESS(status
);
709 "The mismatched combining marks string fails spoof",
710 USPOOF_RESTRICTION_LEVEL
,
714 void IntlTestSpoof::testCombiningDot() {
715 UErrorCode status
= U_ZERO_ERROR
;
716 LocalUSpoofCheckerPointer
sc(uspoof_open(&status
));
717 if (!assertSuccess("", status
, true, __FILE__
, __LINE__
)) { return; }
718 uspoof_setChecks(sc
.getAlias(), USPOOF_HIDDEN_OVERLAY
, &status
);
719 TEST_ASSERT_SUCCESS(status
);
721 static const struct TestCase
{
723 const char16_t* input
;
738 {true, u
"i\u0307\u0307"},
739 {true, u
"abci\u0307def"},
740 {false, u
"i\u0301\u0307"}, // U+0301 has combining class ABOVE (230)
741 {true, u
"i\u0320\u0307"}, // U+0320 has combining class BELOW
742 {true, u
"i\u0320\u0321\u0307"}, // U+0321 also has combining class BELOW
743 {false, u
"i\u0320\u0301\u0307"},
744 {false, u
"iz\u0307"},
747 for (auto& cas
: cases
) {
748 int32_t failedChecks
= uspoof_check2(sc
.getAlias(), cas
.input
, -1, nullptr, &status
);
749 TEST_ASSERT_SUCCESS(status
);
750 int32_t expected
= cas
.shouldFail
? USPOOF_HIDDEN_OVERLAY
: 0;
751 assertEquals(cas
.input
, expected
, failedChecks
);
755 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION && !UCONFIG_NO_FILE_IO */