1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (C) 2011-2015, International Business Machines Corporation
6 * and others. All Rights Reserved.
7 **********************************************************************
10 * IntlTestSpoof tests for USpoofDetector
13 #include "unicode/utypes.h"
15 #if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION && !UCONFIG_NO_FILE_IO
19 #include "unicode/normlzr.h"
20 #include "unicode/regex.h"
21 #include "unicode/unistr.h"
22 #include "unicode/uscript.h"
23 #include "unicode/uspoof.h"
26 #include "scriptset.h"
32 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
33 errcheckln(status, "Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
35 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
36 errln("Test Failure at file %s, line %d: \"%s\" is false.", __FILE__, __LINE__, #expr);};}
38 #define TEST_ASSERT_MSG(expr, msg) {if ((expr)==FALSE) { \
39 dataerrln("Test Failure at file %s, line %d, %s: \"%s\" is false.", __FILE__, __LINE__, msg, #expr);};}
41 #define TEST_ASSERT_EQ(a, b) { if ((a) != (b)) { \
42 errln("Test Failure at file %s, line %d: \"%s\" (%d) != \"%s\" (%d)", \
43 __FILE__, __LINE__, #a, (a), #b, (b)); }}
45 #define TEST_ASSERT_NE(a, b) { if ((a) == (b)) { \
46 errln("Test Failure at file %s, line %d: \"%s\" (%d) == \"%s\" (%d)", \
47 __FILE__, __LINE__, #a, (a), #b, (b)); }}
50 * TEST_SETUP and TEST_TEARDOWN
51 * macros to handle the boilerplate around setting up test case.
52 * Put arbitrary test code between SETUP and TEARDOWN.
53 * "sc" is the ready-to-go SpoofChecker for use in the tests.
55 #define TEST_SETUP { \
56 UErrorCode status = U_ZERO_ERROR; \
58 sc = uspoof_open(&status); \
59 TEST_ASSERT_SUCCESS(status); \
60 USpoofCheckResult *checkResult; \
61 checkResult = uspoof_openCheckResult(&status); \
62 TEST_ASSERT_SUCCESS(status); \
63 if (U_SUCCESS(status)){
65 #define TEST_TEARDOWN \
67 TEST_ASSERT_SUCCESS(status); \
68 uspoof_closeCheckResult(checkResult); \
75 void IntlTestSpoof::runIndexedTest( int32_t index
, UBool exec
, const char* &name
, char* /*par*/ )
78 logln("TestSuite spoof: ");
81 TESTCASE_AUTO(testSpoofAPI
);
82 TESTCASE_AUTO(testSkeleton
);
83 TESTCASE_AUTO(testAreConfusable
);
84 TESTCASE_AUTO(testInvisible
);
85 TESTCASE_AUTO(testConfData
);
86 TESTCASE_AUTO(testBug8654
);
87 TESTCASE_AUTO(testScriptSet
);
88 TESTCASE_AUTO(testRestrictionLevel
);
89 TESTCASE_AUTO(testMixedNumbers
);
90 TESTCASE_AUTO(testBug12153
);
91 TESTCASE_AUTO(testBug12825
);
92 TESTCASE_AUTO(testBug12815
);
96 void IntlTestSpoof::testSpoofAPI() {
99 UnicodeString
s("xyz"); // Many latin ranges are whole-script confusable with other scripts.
100 // If this test starts failing, consult confusablesWholeScript.txt
101 int32_t position
= 666;
102 int32_t checkResults
= uspoof_checkUnicodeString(sc
, s
, &position
, &status
);
103 TEST_ASSERT_SUCCESS(status
);
104 TEST_ASSERT_EQ(0, checkResults
);
105 TEST_ASSERT_EQ(0, position
);
109 UnicodeString
s1("cxs");
110 UnicodeString s2
= UnicodeString("\\u0441\\u0445\\u0455").unescape(); // Cyrillic "cxs"
111 int32_t checkResults
= uspoof_areConfusableUnicodeString(sc
, s1
, s2
, &status
);
112 TEST_ASSERT_SUCCESS(status
);
113 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE
| USPOOF_WHOLE_SCRIPT_CONFUSABLE
, checkResults
);
118 UnicodeString
s("I1l0O");
120 UnicodeString
&retStr
= uspoof_getSkeletonUnicodeString(sc
, USPOOF_ANY_CASE
, s
, dest
, &status
);
121 TEST_ASSERT_SUCCESS(status
);
122 TEST_ASSERT(UnicodeString("lllOO") == dest
);
123 TEST_ASSERT(&dest
== &retStr
);
128 #define CHECK_SKELETON(type, input, expected) { \
129 checkSkeleton(sc, type, input, expected, __LINE__); \
133 // testSkeleton. Spot check a number of confusable skeleton substitutions from the
134 // Unicode data file confusables.txt
135 // Test cases chosen for substitutions of various lengths, and
136 // membership in different mapping tables.
137 // Note: for ICU 55, all tables collapsed to the MA table data.
138 // TODO: for ICU 56 with Unicode 8, revisit this test.
140 void IntlTestSpoof::testSkeleton() {
141 const uint32_t ML
= 0;
142 const uint32_t SL
= USPOOF_SINGLE_SCRIPT_CONFUSABLE
;
143 const uint32_t MA
= USPOOF_ANY_CASE
;
144 const uint32_t SA
= USPOOF_SINGLE_SCRIPT_CONFUSABLE
| USPOOF_ANY_CASE
;
147 CHECK_SKELETON(SL
, "nochange", "nochange");
148 CHECK_SKELETON(SA
, "nochange", "nochange");
149 CHECK_SKELETON(ML
, "nochange", "nochange");
150 CHECK_SKELETON(MA
, "nochange", "nochange");
151 CHECK_SKELETON(MA
, "love", "love");
152 CHECK_SKELETON(MA
, "1ove", "love"); // Digit 1 to letter l
153 CHECK_SKELETON(ML
, "OOPS", "OOPS");
154 CHECK_SKELETON(ML
, "00PS", "OOPS");
155 CHECK_SKELETON(MA
, "OOPS", "OOPS");
156 CHECK_SKELETON(MA
, "00PS", "OOPS"); // Digit 0 to letter O in any case mode only
157 CHECK_SKELETON(SL
, "\\u059c", "\\u0301");
158 CHECK_SKELETON(SL
, "\\u2A74", "\\u003A\\u003A\\u003D");
159 CHECK_SKELETON(SL
, "\\u247E", "\\u0028\\u006C\\u006C\\u0029"); // "(ll)"
160 CHECK_SKELETON(SL
, "\\uFDFB", "\\u062C\\u0644\\u0020\\u062C\\u0644\\u006c\\u0644\\u006f");
162 // This mapping exists in the ML and MA tables, does not exist in SL, SA
167 CHECK_SKELETON(SL
, "\\u0C83", "\\u0983");
168 CHECK_SKELETON(SA
, "\\u0C83", "\\u0983");
169 CHECK_SKELETON(ML
, "\\u0C83", "\\u0983");
170 CHECK_SKELETON(MA
, "\\u0C83", "\\u0983");
172 // 0391 mappings exist only in MA and SA tables.
173 CHECK_SKELETON(MA
, "\\u0391", "A");
174 CHECK_SKELETON(SA
, "\\u0391", "A");
175 CHECK_SKELETON(ML
, "\\u0391", "A");
176 CHECK_SKELETON(SL
, "\\u0391", "A");
178 // 13CF Mappings in all four tables, different in MA.
179 CHECK_SKELETON(ML
, "\\u13CF", "b");
180 CHECK_SKELETON(MA
, "\\u13CF", "b");
181 CHECK_SKELETON(SL
, "\\u13CF", "b");
182 CHECK_SKELETON(SA
, "\\u13CF", "b");
184 // 0022 ; 0027 0027 ;
186 CHECK_SKELETON(SL
, "\\u0022", "\\u0027\\u0027");
187 CHECK_SKELETON(SA
, "\\u0022", "\\u0027\\u0027");
188 CHECK_SKELETON(ML
, "\\u0022", "\\u0027\\u0027");
189 CHECK_SKELETON(MA
, "\\u0022", "\\u0027\\u0027");
191 // 017F mappings exist only in MA and SA tables.
192 CHECK_SKELETON(MA
, "\\u017F", "f");
193 CHECK_SKELETON(SA
, "\\u017F", "f");
194 CHECK_SKELETON(ML
, "\\u017F", "f");
195 CHECK_SKELETON(SL
, "\\u017F", "f");
202 // Run a single confusable skeleton transformation test case.
204 void IntlTestSpoof::checkSkeleton(const USpoofChecker
*sc
, uint32_t type
,
205 const char *input
, const char *expected
, int32_t lineNum
) {
206 UnicodeString uInput
= UnicodeString(input
).unescape();
207 UnicodeString uExpected
= UnicodeString(expected
).unescape();
209 UErrorCode status
= U_ZERO_ERROR
;
210 UnicodeString actual
;
211 uspoof_getSkeletonUnicodeString(sc
, type
, uInput
, actual
, &status
);
212 if (U_FAILURE(status
)) {
213 errln("File %s, Line %d, Test case from line %d, status is %s", __FILE__
, __LINE__
, lineNum
,
214 u_errorName(status
));
217 if (uExpected
!= actual
) {
218 errln("File %s, Line %d, Test case from line %d, Actual and Expected skeletons differ.",
219 __FILE__
, __LINE__
, lineNum
);
220 errln(UnicodeString(" Actual Skeleton: \"") + actual
+ UnicodeString("\"\n") +
221 UnicodeString(" Expected Skeleton: \"") + uExpected
+ UnicodeString("\""));
225 void IntlTestSpoof::testAreConfusable() {
227 UnicodeString
s1("A long string that will overflow stack buffers. A long string that will overflow stack buffers. "
228 "A long string that will overflow stack buffers. A long string that will overflow stack buffers. ");
229 UnicodeString
s2("A long string that wi11 overflow stack buffers. A long string that will overflow stack buffers. "
230 "A long string that wi11 overflow stack buffers. A long string that will overflow stack buffers. ");
231 int32_t result
= uspoof_areConfusableUnicodeString(sc
, s1
, s2
, &status
);
232 TEST_ASSERT_SUCCESS(status
);
233 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE
, result
);
238 void IntlTestSpoof::testInvisible() {
240 UnicodeString s
= UnicodeString("abcd\\u0301ef").unescape();
241 int32_t position
= -42;
242 TEST_ASSERT_EQ(0, uspoof_checkUnicodeString(sc
, s
, &position
, &status
));
243 TEST_ASSERT_SUCCESS(status
);
244 TEST_ASSERT(0 == position
);
246 UnicodeString s2
= UnicodeString("abcd\\u0301\\u0302\\u0301ef").unescape();
247 TEST_ASSERT_EQ(USPOOF_INVISIBLE
, uspoof_checkUnicodeString(sc
, s2
, &position
, &status
));
248 TEST_ASSERT_SUCCESS(status
);
249 TEST_ASSERT_EQ(0, position
);
251 // Two acute accents, one from the composed a with acute accent, \u00e1,
254 UnicodeString s3
= UnicodeString("abcd\\u00e1\\u0301xyz").unescape();
255 TEST_ASSERT_EQ(USPOOF_INVISIBLE
, uspoof_checkUnicodeString(sc
, s3
, &position
, &status
));
256 TEST_ASSERT_SUCCESS(status
);
257 TEST_ASSERT_EQ(0, position
);
261 void IntlTestSpoof::testBug8654() {
263 UnicodeString s
= UnicodeString("B\\u00c1\\u0301").unescape();
264 int32_t position
= -42;
265 TEST_ASSERT_EQ(USPOOF_INVISIBLE
, uspoof_checkUnicodeString(sc
, s
, &position
, &status
) & USPOOF_INVISIBLE
);
266 TEST_ASSERT_SUCCESS(status
);
267 TEST_ASSERT_EQ(0, position
);
271 static UnicodeString
parseHex(const UnicodeString
&in
) {
272 // Convert a series of hex numbers in a Unicode String to a string with the
273 // corresponding characters.
274 // The conversion is _really_ annoying. There must be some function to just do it.
275 UnicodeString result
;
277 for (int32_t i
=0; i
<in
.length(); i
++) {
278 UChar c
= in
.charAt(i
);
279 if (c
== 0x20) { // Space
284 } else if (c
>=0x30 && c
<=0x39) {
285 cc
= (cc
<<4) + (c
- 0x30);
286 } else if ((c
>=0x41 && c
<=0x46) || (c
>=0x61 && c
<=0x66)) {
287 cc
= (cc
<<4) + (c
& 0x0f)+9;
289 // else do something with bad input.
299 // Append the hex form of a UChar32 to a UnicodeString.
300 // Used in formatting error messages.
301 // Match the formatting of numbers in confusables.txt
302 // Minimum of 4 digits, no leading zeroes for positions 5 and up.
304 static void appendHexUChar(UnicodeString
&dest
, UChar32 c
) {
305 UBool doZeroes
= FALSE
;
306 for (int bitNum
=28; bitNum
>=0; bitNum
-=4) {
310 int hexDigit
= (c
>>bitNum
) & 0x0f;
311 if (hexDigit
!= 0 || doZeroes
) {
313 dest
.append((UChar
)(hexDigit
<=9? hexDigit
+ 0x30: hexDigit
-10 + 0x41));
316 dest
.append((UChar
)0x20);
319 U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer
, FILE, fclose
);
321 // testConfData - Check each data item from the Unicode confusables.txt file,
322 // verify that it transforms correctly in a skeleton.
324 void IntlTestSpoof::testConfData() {
326 if (getUnidataPath(buffer
) == NULL
) {
327 errln("Skipping test spoof/testConfData. Unable to find path to source/data/unidata/.");
330 uprv_strcat(buffer
, "confusables.txt");
332 LocalStdioFilePointer
f(fopen(buffer
, "rb"));
334 errln("Skipping test spoof/testConfData. File confusables.txt not accessible.");
337 fseek(f
.getAlias(), 0, SEEK_END
);
338 int32_t fileSize
= ftell(f
.getAlias());
339 LocalArray
<char> fileBuf(new char[fileSize
]);
340 fseek(f
.getAlias(), 0, SEEK_SET
);
341 int32_t amt_read
= fread(fileBuf
.getAlias(), 1, fileSize
, f
.getAlias());
342 TEST_ASSERT_EQ(amt_read
, fileSize
);
343 TEST_ASSERT(fileSize
>0);
344 if (amt_read
!= fileSize
|| fileSize
<=0) {
347 UnicodeString confusablesTxt
= UnicodeString::fromUTF8(StringPiece(fileBuf
.getAlias(), fileSize
));
349 UErrorCode status
= U_ZERO_ERROR
;
350 LocalUSpoofCheckerPointer
sc(uspoof_open(&status
));
351 TEST_ASSERT_SUCCESS(status
);
353 // Parse lines from the confusables.txt file. Example Line:
354 // FF44 ; 0064 ; SL # ( d -> d ) FULLWIDTH ....
355 // Three fields. The hex fields can contain more than one character,
356 // and each character may be more than 4 digits (for supplemntals)
357 // This regular expression matches lines and splits the fields into capture groups.
358 RegexMatcher
parseLine("(?m)^([0-9A-F]{4}[^#;]*?);([^#;]*?);([^#]*)", confusablesTxt
, 0, status
);
359 TEST_ASSERT_SUCCESS(status
);
360 while (parseLine
.find()) {
361 UnicodeString from
= parseHex(parseLine
.group(1, status
));
362 if (!Normalizer::isNormalized(from
, UNORM_NFD
, status
)) {
363 // The source character was not NFD.
364 // Skip this case; the first step in obtaining a skeleton is to NFD the input,
365 // so the mapping in this line of confusables.txt will never be applied.
369 UnicodeString rawExpected
= parseHex(parseLine
.group(2, status
));
370 UnicodeString expected
;
371 Normalizer::decompose(rawExpected
, FALSE
/*NFD*/, 0, expected
, status
);
372 TEST_ASSERT_SUCCESS(status
);
374 int32_t skeletonType
= 0;
375 UnicodeString tableType
= parseLine
.group(3, status
);
376 TEST_ASSERT_SUCCESS(status
);
377 if (tableType
.indexOf("SL") >= 0) {
378 skeletonType
= USPOOF_SINGLE_SCRIPT_CONFUSABLE
;
379 } else if (tableType
.indexOf("SA") >= 0) {
380 skeletonType
= USPOOF_SINGLE_SCRIPT_CONFUSABLE
| USPOOF_ANY_CASE
;
381 } else if (tableType
.indexOf("ML") >= 0) {
383 } else if (tableType
.indexOf("MA") >= 0) {
384 skeletonType
= USPOOF_ANY_CASE
;
387 UnicodeString actual
;
388 uspoof_getSkeletonUnicodeString(sc
.getAlias(), skeletonType
, from
, actual
, &status
);
389 TEST_ASSERT_SUCCESS(status
);
390 TEST_ASSERT(actual
== expected
);
391 if (actual
!= expected
) {
392 errln(parseLine
.group(0, status
));
393 UnicodeString line
= "Actual: ";
395 while (i
< actual
.length()) {
396 appendHexUChar(line
, actual
.char32At(i
));
397 i
= actual
.moveIndex32(i
, 1);
401 if (U_FAILURE(status
)) {
408 void IntlTestSpoof::testScriptSet() {
411 UErrorCode status
= U_ZERO_ERROR
;
413 TEST_ASSERT(s1
== s2
);
414 s1
.set(USCRIPT_ARABIC
,status
);
415 TEST_ASSERT_SUCCESS(status
);
416 TEST_ASSERT(!(s1
== s2
));
417 TEST_ASSERT(s1
.test(USCRIPT_ARABIC
, status
));
418 TEST_ASSERT(s1
.test(USCRIPT_GREEK
, status
) == FALSE
);
420 status
= U_ZERO_ERROR
;
421 s1
.reset(USCRIPT_ARABIC
, status
);
422 TEST_ASSERT(s1
== s2
);
424 status
= U_ZERO_ERROR
;
426 TEST_ASSERT(s1
.test(USCRIPT_COMMON
, status
));
427 TEST_ASSERT(s1
.test(USCRIPT_ETHIOPIC
, status
));
428 TEST_ASSERT(s1
.test(USCRIPT_CODE_LIMIT
, status
));
430 TEST_ASSERT(!s1
.test(USCRIPT_COMMON
, status
));
431 TEST_ASSERT(!s1
.test(USCRIPT_ETHIOPIC
, status
));
432 TEST_ASSERT(!s1
.test(USCRIPT_CODE_LIMIT
, status
));
434 status
= U_ZERO_ERROR
;
435 s1
.set(USCRIPT_TAKRI
, status
);
436 s1
.set(USCRIPT_BLISSYMBOLS
, status
);
438 TEST_ASSERT(s2
.contains(s1
));
439 TEST_ASSERT(!s1
.contains(s2
));
440 TEST_ASSERT(s2
.intersects(s1
));
441 TEST_ASSERT(s1
.intersects(s2
));
442 s2
.reset(USCRIPT_TAKRI
, status
);
443 TEST_ASSERT(!s2
.contains(s1
));
444 TEST_ASSERT(!s1
.contains(s2
));
445 TEST_ASSERT(s1
.intersects(s2
));
446 TEST_ASSERT(s2
.intersects(s1
));
447 TEST_ASSERT_SUCCESS(status
);
449 status
= U_ZERO_ERROR
;
451 s1
.set(USCRIPT_NKO
, status
);
452 s1
.set(USCRIPT_COMMON
, status
);
454 TEST_ASSERT(s2
== s1
);
455 TEST_ASSERT_EQ(2, s2
.countMembers());
457 TEST_ASSERT(s2
== s1
);
459 TEST_ASSERT(!(s2
== s1
));
460 TEST_ASSERT(s2
.countMembers() >= USCRIPT_CODE_LIMIT
);
462 TEST_ASSERT(s2
== s1
);
465 s2
.reset(USCRIPT_COMMON
, status
);
467 TEST_ASSERT(s2
.countMembers() == 1);
470 TEST_ASSERT(s1
.isEmpty());
471 s1
.set(USCRIPT_LATIN
, status
);
472 TEST_ASSERT(!s1
.isEmpty());
474 TEST_ASSERT(!s1
.isEmpty());
475 TEST_ASSERT_SUCCESS(status
);
478 s1
.set(USCRIPT_AFAKA
, status
);
479 s1
.set(USCRIPT_VAI
, status
);
480 s1
.set(USCRIPT_INHERITED
, status
);
482 for (int32_t i
=0; i
<4; i
++) {
483 n
= s1
.nextSetBit(n
+1);
485 case 0: TEST_ASSERT_EQ(USCRIPT_INHERITED
, n
); break;
486 case 1: TEST_ASSERT_EQ(USCRIPT_VAI
, n
); break;
487 case 2: TEST_ASSERT_EQ(USCRIPT_AFAKA
, n
); break;
488 case 3: TEST_ASSERT_EQ(-1, (int32_t)n
); break;
489 default: TEST_ASSERT(FALSE
);
492 TEST_ASSERT_SUCCESS(status
);
494 // Script extensions. Depends on data.
496 s1
.setScriptExtensions(0x67, status
);
497 TEST_ASSERT(s1
.countMembers() == 1);
498 TEST_ASSERT(s1
.test(USCRIPT_LATIN
, status
));
499 TEST_ASSERT_SUCCESS(status
);
502 s1
.setScriptExtensions(0x303C, status
);
503 TEST_ASSERT(s1
.countMembers() == 3);
504 TEST_ASSERT(s1
.test(USCRIPT_HAN
, status
));
505 TEST_ASSERT(s1
.test(USCRIPT_HIRAGANA
, status
));
506 TEST_ASSERT(s1
.test(USCRIPT_KATAKANA
, status
));
507 TEST_ASSERT_SUCCESS(status
);
510 ScriptSet bitset12
; bitset12
.set(USCRIPT_LATIN
, status
).set(USCRIPT_HANGUL
, status
);
511 ScriptSet bitset2
; bitset2
.set(USCRIPT_HANGUL
, status
);
512 TEST_ASSERT(bitset12
.contains(bitset2
));
513 TEST_ASSERT(bitset12
.contains(bitset12
));
514 TEST_ASSERT(!bitset2
.contains(bitset12
));
516 ScriptSet arabSet
; arabSet
.set(USCRIPT_ARABIC
, status
);
517 ScriptSet latinSet
; latinSet
.set(USCRIPT_LATIN
, status
);
518 UElement arabEl
; arabEl
.pointer
= &arabSet
;
519 UElement latinEl
; latinEl
.pointer
= &latinSet
;
520 TEST_ASSERT(uhash_compareScriptSet(arabEl
, latinEl
) < 0);
521 TEST_ASSERT(uhash_compareScriptSet(latinEl
, arabEl
) > 0);
523 UnicodeString scriptString
;
524 bitset12
.displayScripts(scriptString
);
525 TEST_ASSERT(UNICODE_STRING_SIMPLE("Hang Latn") == scriptString
);
529 void IntlTestSpoof::testRestrictionLevel() {
532 URestrictionLevel fExpectedRestrictionLevel
;
534 {"\\u0061\\u03B3\\u2665", USPOOF_UNRESTRICTIVE
},
536 {"\\u03B3", USPOOF_SINGLE_SCRIPT_RESTRICTIVE
},
537 {"\\u0061\\u30A2\\u30FC", USPOOF_HIGHLY_RESTRICTIVE
},
538 {"\\u0061\\u0904", USPOOF_MODERATELY_RESTRICTIVE
},
539 {"\\u0061\\u03B3", USPOOF_MINIMALLY_RESTRICTIVE
},
540 {"\\u0061\\u2665", USPOOF_UNRESTRICTIVE
},
541 {"\\u0061\\u303C", USPOOF_HIGHLY_RESTRICTIVE
},
542 {"\\u0061\\u30FC\\u303C", USPOOF_HIGHLY_RESTRICTIVE
},
543 {"\\u0061\\u30FC\\u303C\\u30A2", USPOOF_HIGHLY_RESTRICTIVE
},
544 {"\\u30A2\\u0061\\u30FC\\u303C", USPOOF_HIGHLY_RESTRICTIVE
},
545 {"\\u0061\\u0031\\u0661", USPOOF_MODERATELY_RESTRICTIVE
},
546 {"\\u0061\\u0031\\u0661\\u06F1", USPOOF_MODERATELY_RESTRICTIVE
},
547 {"\\u0661\\u30FC\\u303C\\u0061\\u30A2\\u0031\\u0967\\u06F1", USPOOF_MINIMALLY_RESTRICTIVE
},
548 {"\\u0061\\u30A2\\u30FC\\u303C\\u0031\\u0967\\u0661\\u06F1", USPOOF_MINIMALLY_RESTRICTIVE
}
551 URestrictionLevel restrictionLevels
[] = { USPOOF_ASCII
, USPOOF_SINGLE_SCRIPT_RESTRICTIVE
,
552 USPOOF_HIGHLY_RESTRICTIVE
, USPOOF_MODERATELY_RESTRICTIVE
, USPOOF_MINIMALLY_RESTRICTIVE
,
553 USPOOF_UNRESTRICTIVE
};
555 UErrorCode status
= U_ZERO_ERROR
;
556 UnicodeSet allowedChars
;
557 // Allowed Identifier Characters. In addition to the Recommended Set,
558 // allow u303c, which has an interesting script extension of Hani Hira Kana.
559 allowedChars
.addAll(*uspoof_getRecommendedUnicodeSet(&status
)).add(0x303C);
561 for (int32_t testNum
=0; testNum
< UPRV_LENGTHOF(tests
); testNum
++) {
562 status
= U_ZERO_ERROR
;
563 const Test
&test
= tests
[testNum
];
564 UnicodeString testString
= UnicodeString(test
.fId
).unescape();
565 URestrictionLevel expectedLevel
= test
.fExpectedRestrictionLevel
;
566 for (int levelIndex
=0; levelIndex
<UPRV_LENGTHOF(restrictionLevels
); levelIndex
++) {
567 status
= U_ZERO_ERROR
;
568 URestrictionLevel levelSetInSpoofChecker
= restrictionLevels
[levelIndex
];
569 USpoofChecker
*sc
= uspoof_open(&status
);
570 uspoof_setAllowedChars(sc
, allowedChars
.toUSet(), &status
);
571 uspoof_setRestrictionLevel(sc
, levelSetInSpoofChecker
);
572 uspoof_setChecks(sc
, USPOOF_RESTRICTION_LEVEL
, &status
);
573 int32_t actualValue
= uspoof_checkUnicodeString(sc
, testString
, NULL
, &status
);
575 // we want to fail if the text is (say) MODERATE and the testLevel is ASCII
576 int32_t expectedValue
= 0;
577 if (expectedLevel
> levelSetInSpoofChecker
) {
578 expectedValue
|= USPOOF_RESTRICTION_LEVEL
;
580 sprintf(msgBuffer
, "testNum = %d, levelIndex = %d, expected = %#x, actual = %#x",
581 testNum
, levelIndex
, expectedValue
, actualValue
);
582 TEST_ASSERT_MSG(expectedValue
== actualValue
, msgBuffer
);
583 TEST_ASSERT_SUCCESS(status
);
585 // Run the same check again, with the Spoof Checker configured to return
586 // the actual restriction level.
587 uspoof_setAllowedChars(sc
, allowedChars
.toUSet(), &status
);
588 uspoof_setRestrictionLevel(sc
, levelSetInSpoofChecker
);
589 uspoof_setChecks(sc
, USPOOF_AUX_INFO
| USPOOF_RESTRICTION_LEVEL
, &status
);
590 int32_t result
= uspoof_checkUnicodeString(sc
, testString
, NULL
, &status
);
591 TEST_ASSERT_SUCCESS(status
);
592 if (U_SUCCESS(status
)) {
593 TEST_ASSERT_EQ(expectedLevel
, result
& USPOOF_RESTRICTION_LEVEL_MASK
);
594 TEST_ASSERT_EQ(expectedValue
, result
& USPOOF_ALL_CHECKS
);
602 void IntlTestSpoof::testMixedNumbers() {
604 const char *fTestString
;
605 const char *fExpectedSet
;
608 {"\\u0967", "[\\u0966]"},
609 {"1\\u0967", "[0\\u0966]"},
610 {"\\u0661\\u06F1", "[\\u0660\\u06F0]"},
611 {"\\u0061\\u2665", "[]"},
612 {"\\u0061\\u303C", "[]"},
613 {"\\u0061\\u30FC\\u303C", "[]"},
614 {"\\u0061\\u30FC\\u303C\\u30A2", "[]"},
615 {"\\u30A2\\u0061\\u30FC\\u303C", "[]"},
616 {"\\u0061\\u0031\\u0661", "[\\u0030\\u0660]"},
617 {"\\u0061\\u0031\\u0661\\u06F1", "[\\u0030\\u0660\\u06F0]"},
618 {"\\u0661\\u30FC\\u303C\\u0061\\u30A2\\u0031\\u0967\\u06F1", "[\\u0030\\u0660\\u06F0\\u0966]"},
619 {"\\u0061\\u30A2\\u30FC\\u303C\\u0031\\u0967\\u0661\\u06F1", "[\\u0030\\u0660\\u06F0\\u0966]"}
621 UErrorCode status
= U_ZERO_ERROR
;
622 for (int32_t testNum
=0; testNum
< UPRV_LENGTHOF(tests
); testNum
++) {
624 sprintf(msgBuf
, "testNum = %d ", testNum
);
625 Test
&test
= tests
[testNum
];
627 status
= U_ZERO_ERROR
;
628 UnicodeString testString
= UnicodeString(test
.fTestString
).unescape();
629 UnicodeSet
expectedSet(UnicodeString(test
.fExpectedSet
).unescape(), status
);
631 status
= U_ZERO_ERROR
;
633 uspoof_setChecks(sc
, USPOOF_MIXED_NUMBERS
, &status
); // only check this
634 uspoof_check2UnicodeString(sc
, testString
, checkResult
, &status
);
635 UBool mixedNumberFailure
= ((uspoof_getCheckResultChecks(checkResult
, &status
) & USPOOF_MIXED_NUMBERS
) != 0);
636 TEST_ASSERT_MSG((expectedSet
.size() > 1) == mixedNumberFailure
, msgBuf
);
637 const UnicodeSet
* actualSet
= UnicodeSet::fromUSet(uspoof_getCheckResultNumerics(checkResult
, &status
));
638 TEST_ASSERT_MSG(expectedSet
== *actualSet
, msgBuf
);
643 // Bug #12153 - uspoof_setRestrictionLevel() should enable restriction level testing.
645 void IntlTestSpoof::testBug12153() {
646 UErrorCode status
= U_ZERO_ERROR
;
647 LocalUSpoofCheckerPointer
sc(uspoof_open(&status
));
648 TEST_ASSERT_SUCCESS(status
);
649 int32_t checks
= uspoof_getChecks(sc
.getAlias(), &status
);
650 TEST_ASSERT((checks
& USPOOF_RESTRICTION_LEVEL
) != 0);
651 checks
&= ~USPOOF_RESTRICTION_LEVEL
;
652 uspoof_setChecks(sc
.getAlias(), checks
, &status
);
653 checks
= uspoof_getChecks(sc
.getAlias(), &status
);
654 TEST_ASSERT((checks
& USPOOF_RESTRICTION_LEVEL
) == 0);
656 uspoof_setRestrictionLevel(sc
.getAlias(), USPOOF_MODERATELY_RESTRICTIVE
);
657 checks
= uspoof_getChecks(sc
.getAlias(), &status
);
658 TEST_ASSERT((checks
& USPOOF_RESTRICTION_LEVEL
) != 0);
659 TEST_ASSERT_SUCCESS(status
);
662 // uspoof_checkUnicodeString should NOT have an infinite loop.
663 void IntlTestSpoof::testBug12825() {
664 UErrorCode status
= U_ZERO_ERROR
;
665 LocalUSpoofCheckerPointer
sc(uspoof_open(&status
));
666 TEST_ASSERT_SUCCESS(status
);
667 uspoof_setChecks(sc
.getAlias(), USPOOF_ALL_CHECKS
| USPOOF_AUX_INFO
, &status
);
668 TEST_ASSERT_SUCCESS(status
);
669 uspoof_checkUnicodeString(sc
.getAlias(), UnicodeString("\\u30FB").unescape(), NULL
, &status
);
670 TEST_ASSERT_SUCCESS(status
);
673 // uspoof_getSkeleton should NOT set an ILLEGAL_ARGUMENT_EXCEPTION.
674 void IntlTestSpoof::testBug12815() {
675 UErrorCode status
= U_ZERO_ERROR
;
676 LocalUSpoofCheckerPointer
sc(uspoof_open(&status
));
677 TEST_ASSERT_SUCCESS(status
);
678 uspoof_setChecks(sc
.getAlias(), USPOOF_RESTRICTION_LEVEL
, &status
);
679 TEST_ASSERT_SUCCESS(status
);
680 UnicodeString result
;
681 uspoof_getSkeletonUnicodeString(sc
.getAlias(), 0, UnicodeString("hello world"), result
, &status
);
682 TEST_ASSERT_SUCCESS(status
);
685 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION && !UCONFIG_NO_FILE_IO */