1 /********************************************************************
3 * Copyright (c) 2009-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
10 *********************************************************************************/
11 /*C API TEST for the uspoof Unicode Indentifier Spoofing and Security API */
13 * This is an API test for ICU spoof detection in plain C. It doesn't test very many cases, and doesn't
14 * try to test the full functionality. It just calls each function and verifies that it
15 * works on a basic level.
17 * More complete testing of spoof detection functionality is done with the C++ tests.
20 #include "unicode/utypes.h"
21 #if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION
26 #include "unicode/uspoof.h"
27 #include "unicode/ustring.h"
28 #include "unicode/uset.h"
31 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
32 log_err_status(status, "Failure at file %s, line %d, error = %s\n", __FILE__, __LINE__, u_errorName(status));}}
34 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
35 log_err("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
37 #define TEST_ASSERT_EQ(a, b) { if ((a) != (b)) { \
38 log_err("Test Failure at file %s, line %d: \"%s\" (%d) != \"%s\" (%d) \n", \
39 __FILE__, __LINE__, #a, (a), #b, (b)); }}
41 #define TEST_ASSERT_NE(a, b) { if ((a) == (b)) { \
42 log_err("Test Failure at file %s, line %d: \"%s\" (%d) == \"%s\" (%d) \n", \
43 __FILE__, __LINE__, #a, (a), #b, (b)); }}
47 * TEST_SETUP and TEST_TEARDOWN
48 * macros to handle the boilerplate around setting up test case.
49 * Put arbitrary test code between SETUP and TEARDOWN.
50 * "sc" is the ready-to-go SpoofChecker for use in the tests.
52 #define TEST_SETUP { \
53 UErrorCode status = U_ZERO_ERROR; \
55 sc = uspoof_open(&status); \
56 TEST_ASSERT_SUCCESS(status); \
57 if (U_SUCCESS(status)){
59 #define TEST_TEARDOWN \
61 TEST_ASSERT_SUCCESS(status); \
66 static void TestUSpoofCAPI(void);
68 void addUSpoofTest(TestNode
** root
);
70 void addUSpoofTest(TestNode
** root
)
72 #if !UCONFIG_NO_FILE_IO
73 addTest(root
, &TestUSpoofCAPI
, "uspoof/TestUSpoofCAPI");
78 * Identifiers for verifying that spoof checking is minimally alive and working.
80 const UChar goodLatin
[] = {(UChar
)0x75, (UChar
)0x7a, 0}; /* "uz", all ASCII */
81 /* (not confusable) */
82 const UChar scMixed
[] = {(UChar
)0x73, (UChar
)0x0441, 0}; /* "sc", with Cyrillic 'c' */
83 /* (mixed script, confusable */
85 const UChar scLatin
[] = {(UChar
)0x73, (UChar
)0x63, 0}; /* "sc", plain ascii. */
86 const UChar goodCyrl
[] = {(UChar
)0x438, (UChar
)0x43B, 0}; /* Plain lower case Cyrillic letters,
87 no latin confusables */
89 const UChar goodGreek
[] = {(UChar
)0x3c0, (UChar
)0x3c6, 0}; /* Plain lower case Greek letters */
91 const UChar lll_Latin_a
[] = {(UChar
)0x6c, (UChar
)0x49, (UChar
)0x31, 0}; /* lI1, all ASCII */
93 /* Full-width I, Small Roman Numeral fifty, Latin Cap Letter IOTA*/
94 const UChar lll_Latin_b
[] = {(UChar
)0xff29, (UChar
)0x217c, (UChar
)0x196, 0};
96 const UChar lll_Cyrl
[] = {(UChar
)0x0406, (UChar
)0x04C0, (UChar
)0x31, 0};
98 /* The skeleton transform for all of thes 'lll' lookalikes is all lower case l. */
99 const UChar lll_Skel
[] = {(UChar
)0x6c, (UChar
)0x6c, (UChar
)0x6c, 0};
101 const UChar han_Hiragana
[] = {(UChar
)0x3086, (UChar
)0x308A, (UChar
)0x0020, (UChar
)0x77F3, (UChar
)0x7530, 0};
103 /* Provide better code coverage */
104 const char goodLatinUTF8
[] = {0x75, 0x77, 0};
106 * Spoof Detction C API Tests
108 static void TestUSpoofCAPI(void) {
111 * basic uspoof_open().
115 UErrorCode status
= U_ZERO_ERROR
;
116 sc
= uspoof_open(&status
);
117 TEST_ASSERT_SUCCESS(status
);
118 if (U_FAILURE(status
)) {
119 /* If things are so broken that we can't even open a default spoof checker, */
120 /* don't even try the rest of the tests. They would all fail. */
129 * Test Open from source rules.
132 const char *dataSrcDir
;
135 int confusablesLength
= 0;
136 char *confusablesWholeScript
;
137 int confusablesWholeScriptLength
= 0;
143 dataSrcDir
= ctest_dataSrcDir();
144 fileName
= malloc(strlen(dataSrcDir
) + 100);
145 strcpy(fileName
, dataSrcDir
);
146 strcat(fileName
, U_FILE_SEP_STRING
"unidata" U_FILE_SEP_STRING
"confusables.txt");
147 f
= fopen(fileName
, "rb");
148 TEST_ASSERT_NE(f
, NULL
);
149 confusables
= malloc(3000000);
151 confusablesLength
= fread(confusables
, 1, 3000000, f
);
155 strcpy(fileName
, dataSrcDir
);
156 strcat(fileName
, U_FILE_SEP_STRING
"unidata" U_FILE_SEP_STRING
"confusablesWholeScript.txt");
157 f
= fopen(fileName
, "rb");
158 TEST_ASSERT_NE(f
, NULL
);
159 confusablesWholeScript
= malloc(1000000);
161 confusablesWholeScriptLength
= fread(confusablesWholeScript
, 1, 1000000, f
);
165 rsc
= uspoof_openFromSource(confusables
, confusablesLength
,
166 confusablesWholeScript
, confusablesWholeScriptLength
,
167 &errType
, &pe
, &status
);
168 TEST_ASSERT_SUCCESS(status
);
170 free(confusablesWholeScript
);
174 /* printf("ParseError Line is %d\n", pe.line); */
179 * openFromSerialized and serialize
182 int32_t serializedSize
= 0;
183 int32_t actualLength
= 0;
186 int32_t checkResults
;
189 serializedSize
= uspoof_serialize(sc
, NULL
, 0, &status
);
190 TEST_ASSERT_EQ(status
, U_BUFFER_OVERFLOW_ERROR
);
191 TEST_ASSERT(serializedSize
> 0);
193 /* Serialize the default spoof checker */
194 status
= U_ZERO_ERROR
;
195 buf
= (char *)malloc(serializedSize
+ 10);
196 TEST_ASSERT(buf
!= NULL
);
197 buf
[serializedSize
] = 42;
198 uspoof_serialize(sc
, buf
, serializedSize
, &status
);
199 TEST_ASSERT_SUCCESS(status
);
200 TEST_ASSERT_EQ(42, buf
[serializedSize
]);
202 /* Create a new spoof checker from the freshly serialized data */
203 sc2
= uspoof_openFromSerialized(buf
, serializedSize
+10, &actualLength
, &status
);
204 TEST_ASSERT_SUCCESS(status
);
205 TEST_ASSERT_NE(NULL
, sc2
);
206 TEST_ASSERT_EQ(serializedSize
, actualLength
);
208 /* Verify that the new spoof checker at least wiggles */
209 checkResults
= uspoof_check(sc2
, goodLatin
, -1, NULL
, &status
);
210 TEST_ASSERT_SUCCESS(status
);
211 TEST_ASSERT_EQ(0, checkResults
);
213 checkResults
= uspoof_check(sc2
, scMixed
, -1, NULL
, &status
);
214 TEST_ASSERT_SUCCESS(status
);
215 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT
| USPOOF_MIXED_SCRIPT_CONFUSABLE
, checkResults
);
224 * Set & Get Check Flags
228 uspoof_setChecks(sc
, USPOOF_ALL_CHECKS
, &status
);
229 TEST_ASSERT_SUCCESS(status
);
230 t
= uspoof_getChecks(sc
, &status
);
231 TEST_ASSERT_EQ(t
, USPOOF_ALL_CHECKS
);
233 uspoof_setChecks(sc
, 0, &status
);
234 TEST_ASSERT_SUCCESS(status
);
235 t
= uspoof_getChecks(sc
, &status
);
236 TEST_ASSERT_EQ(0, t
);
239 USPOOF_WHOLE_SCRIPT_CONFUSABLE
| USPOOF_MIXED_SCRIPT_CONFUSABLE
| USPOOF_ANY_CASE
,
241 TEST_ASSERT_SUCCESS(status
);
242 t
= uspoof_getChecks(sc
, &status
);
243 TEST_ASSERT_SUCCESS(status
);
244 TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE
| USPOOF_MIXED_SCRIPT_CONFUSABLE
| USPOOF_ANY_CASE
, t
);
248 * get & setAllowedChars
254 uset
= uspoof_getAllowedChars(sc
, &status
);
255 TEST_ASSERT_SUCCESS(status
);
256 TEST_ASSERT(uset_isFrozen(uset
));
257 us
= uset_open((UChar32
)0x41, (UChar32
)0x5A); /* [A-Z] */
258 uspoof_setAllowedChars(sc
, us
, &status
);
259 TEST_ASSERT_SUCCESS(status
);
260 TEST_ASSERT_NE(us
, uspoof_getAllowedChars(sc
, &status
));
261 TEST_ASSERT(uset_equals(us
, uspoof_getAllowedChars(sc
, &status
)));
262 TEST_ASSERT_SUCCESS(status
);
271 USpoofChecker
*clone1
= NULL
;
272 USpoofChecker
*clone2
= NULL
;
273 int32_t checkResults
= 0;
275 clone1
= uspoof_clone(sc
, &status
);
276 TEST_ASSERT_SUCCESS(status
);
277 TEST_ASSERT_NE(clone1
, sc
);
279 clone2
= uspoof_clone(clone1
, &status
);
280 TEST_ASSERT_SUCCESS(status
);
281 TEST_ASSERT_NE(clone2
, clone1
);
283 uspoof_close(clone1
);
285 /* Verify that the cloned spoof checker is alive */
286 checkResults
= uspoof_check(clone2
, goodLatin
, -1, NULL
, &status
);
287 TEST_ASSERT_SUCCESS(status
);
288 TEST_ASSERT_EQ(0, checkResults
);
290 checkResults
= uspoof_check(clone2
, scMixed
, -1, NULL
, &status
);
291 TEST_ASSERT_SUCCESS(status
);
292 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT
| USPOOF_MIXED_SCRIPT_CONFUSABLE
, checkResults
);
293 uspoof_close(clone2
);
297 * basic uspoof_check()
301 result
= uspoof_check(sc
, goodLatin
, -1, NULL
, &status
);
302 TEST_ASSERT_SUCCESS(status
);
303 TEST_ASSERT_EQ(0, result
);
305 result
= uspoof_check(sc
, han_Hiragana
, -1, NULL
, &status
);
306 TEST_ASSERT_SUCCESS(status
);
307 TEST_ASSERT_EQ(0, result
);
309 result
= uspoof_check(sc
, scMixed
, -1, NULL
, &status
);
310 TEST_ASSERT_SUCCESS(status
);
311 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT
| USPOOF_MIXED_SCRIPT_CONFUSABLE
, result
);
321 int32_t checkResults
;
323 checks
= uspoof_getChecks(sc
, &status
);
324 TEST_ASSERT_SUCCESS(status
);
325 TEST_ASSERT_EQ(USPOOF_ALL_CHECKS
, checks
);
327 checks
&= ~(USPOOF_SINGLE_SCRIPT
| USPOOF_MIXED_SCRIPT_CONFUSABLE
);
328 uspoof_setChecks(sc
, checks
, &status
);
329 TEST_ASSERT_SUCCESS(status
);
330 checks2
= uspoof_getChecks(sc
, &status
);
331 TEST_ASSERT_EQ(checks
, checks2
);
333 /* The checks that were disabled just above are the same ones that the "scMixed" test fails.
334 So with those tests gone checking that Identifier should now succeed */
335 checkResults
= uspoof_check(sc
, scMixed
, -1, NULL
, &status
);
336 TEST_ASSERT_SUCCESS(status
);
337 TEST_ASSERT_EQ(0, checkResults
);
345 const char *allowedLocales
;
346 int32_t checkResults
;
348 /* Default allowed locales list should be empty */
349 allowedLocales
= uspoof_getAllowedLocales(sc
, &status
);
350 TEST_ASSERT_SUCCESS(status
);
351 TEST_ASSERT(strcmp("", allowedLocales
) == 0)
353 /* Allow en and ru, which should enable Latin and Cyrillic only to pass */
354 uspoof_setAllowedLocales(sc
, "en, ru_RU", &status
);
355 TEST_ASSERT_SUCCESS(status
);
356 allowedLocales
= uspoof_getAllowedLocales(sc
, &status
);
357 TEST_ASSERT_SUCCESS(status
);
358 TEST_ASSERT(strstr(allowedLocales
, "en") != NULL
);
359 TEST_ASSERT(strstr(allowedLocales
, "ru") != NULL
);
361 /* Limit checks to USPOOF_CHAR_LIMIT. Some of the test data has whole script confusables also,
362 * which we don't want to see in this test. */
363 uspoof_setChecks(sc
, USPOOF_CHAR_LIMIT
, &status
);
364 TEST_ASSERT_SUCCESS(status
);
366 checkResults
= uspoof_check(sc
, goodLatin
, -1, NULL
, &status
);
367 TEST_ASSERT_SUCCESS(status
);
368 TEST_ASSERT_EQ(0, checkResults
);
370 checkResults
= uspoof_check(sc
, goodGreek
, -1, NULL
, &status
);
371 TEST_ASSERT_SUCCESS(status
);
372 TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT
, checkResults
);
374 checkResults
= uspoof_check(sc
, goodCyrl
, -1, NULL
, &status
);
375 TEST_ASSERT_SUCCESS(status
);
376 TEST_ASSERT_EQ(0, checkResults
);
378 /* Reset with an empty locale list, which should allow all characters to pass */
379 uspoof_setAllowedLocales(sc
, " ", &status
);
380 TEST_ASSERT_SUCCESS(status
);
382 checkResults
= uspoof_check(sc
, goodGreek
, -1, NULL
, &status
);
383 TEST_ASSERT_SUCCESS(status
);
384 TEST_ASSERT_EQ(0, checkResults
);
388 * AllowedChars set/get the USet of allowed characters.
393 int32_t checkResults
;
395 /* By default, we should see no restriction; the USet should allow all characters. */
396 set
= uspoof_getAllowedChars(sc
, &status
);
397 TEST_ASSERT_SUCCESS(status
);
398 tmpSet
= uset_open(0, 0x10ffff);
399 TEST_ASSERT(uset_equals(tmpSet
, set
));
401 /* Setting the allowed chars should enable the check. */
402 uspoof_setChecks(sc
, USPOOF_ALL_CHECKS
& ~USPOOF_CHAR_LIMIT
, &status
);
403 TEST_ASSERT_SUCCESS(status
);
405 /* Remove a character that is in our good Latin test identifier from the allowed chars set. */
406 uset_remove(tmpSet
, goodLatin
[1]);
407 uspoof_setAllowedChars(sc
, tmpSet
, &status
);
408 TEST_ASSERT_SUCCESS(status
);
411 /* Latin Identifier should now fail; other non-latin test cases should still be OK
412 * Note: fail of CHAR_LIMIT also causes the restriction level to be USPOOF_UNRESTRICTIVE
413 * which will give us a USPOOF_RESTRICTION_LEVEL failure.
415 checkResults
= uspoof_check(sc
, goodLatin
, -1, NULL
, &status
);
416 TEST_ASSERT_SUCCESS(status
);
417 TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT
| USPOOF_RESTRICTION_LEVEL
, checkResults
);
419 checkResults
= uspoof_check(sc
, goodGreek
, -1, NULL
, &status
);
420 TEST_ASSERT_SUCCESS(status
);
421 TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE
, checkResults
);
429 int32_t checkResults
;
432 u_strToUTF8(utf8buf
, sizeof(utf8buf
), NULL
, goodLatin
, -1, &status
);
433 TEST_ASSERT_SUCCESS(status
);
435 checkResults
= uspoof_checkUTF8(sc
, utf8buf
, -1, &position
, &status
);
436 TEST_ASSERT_SUCCESS(status
);
437 TEST_ASSERT_EQ(0, checkResults
);
438 TEST_ASSERT_EQ(0, position
);
440 u_strToUTF8(utf8buf
, sizeof(utf8buf
), NULL
, goodCyrl
, -1, &status
);
441 TEST_ASSERT_SUCCESS(status
);
442 checkResults
= uspoof_checkUTF8(sc
, utf8buf
, -1, &position
, &status
);
443 TEST_ASSERT_SUCCESS(status
);
444 TEST_ASSERT_EQ(0, checkResults
);
446 u_strToUTF8(utf8buf
, sizeof(utf8buf
), NULL
, scMixed
, -1, &status
);
447 TEST_ASSERT_SUCCESS(status
);
449 checkResults
= uspoof_checkUTF8(sc
, utf8buf
, -1, &position
, &status
);
450 TEST_ASSERT_SUCCESS(status
);
451 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE
| USPOOF_SINGLE_SCRIPT
, checkResults
);
452 TEST_ASSERT_EQ(0, position
);
457 * uspoof_areConfusable()
460 int32_t checkResults
;
462 checkResults
= uspoof_areConfusable(sc
, scLatin
, -1, scMixed
, -1, &status
);
463 TEST_ASSERT_SUCCESS(status
);
464 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE
, checkResults
);
466 checkResults
= uspoof_areConfusable(sc
, goodGreek
, -1, scLatin
, -1, &status
);
467 TEST_ASSERT_SUCCESS(status
);
468 TEST_ASSERT_EQ(0, checkResults
);
470 checkResults
= uspoof_areConfusable(sc
, lll_Latin_a
, -1, lll_Latin_b
, -1, &status
);
471 TEST_ASSERT_SUCCESS(status
);
472 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE
, checkResults
);
480 int32_t checkResults
;
485 u_strToUTF8(s1
, sizeof(s1
), NULL
, scLatin
, -1, &status
);
486 u_strToUTF8(s2
, sizeof(s2
), NULL
, scMixed
, -1, &status
);
487 TEST_ASSERT_SUCCESS(status
);
488 checkResults
= uspoof_areConfusableUTF8(sc
, s1
, -1, s2
, -1, &status
);
489 TEST_ASSERT_SUCCESS(status
);
490 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE
, checkResults
);
492 u_strToUTF8(s1
, sizeof(s1
), NULL
, goodGreek
, -1, &status
);
493 u_strToUTF8(s2
, sizeof(s2
), NULL
, scLatin
, -1, &status
);
494 TEST_ASSERT_SUCCESS(status
);
495 checkResults
= uspoof_areConfusableUTF8(sc
, s1
, -1, s2
, -1, &status
);
496 TEST_ASSERT_SUCCESS(status
);
497 TEST_ASSERT_EQ(0, checkResults
);
499 u_strToUTF8(s1
, sizeof(s1
), NULL
, lll_Latin_a
, -1, &status
);
500 u_strToUTF8(s2
, sizeof(s2
), NULL
, lll_Latin_b
, -1, &status
);
501 TEST_ASSERT_SUCCESS(status
);
502 checkResults
= uspoof_areConfusableUTF8(sc
, s1
, -1, s2
, -1, &status
);
503 TEST_ASSERT_SUCCESS(status
);
504 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE
, checkResults
);
517 skelLength
= uspoof_getSkeleton(sc
, USPOOF_ANY_CASE
, lll_Latin_a
, -1, dest
, sizeof(dest
)/sizeof(UChar
), &status
);
518 TEST_ASSERT_SUCCESS(status
);
519 TEST_ASSERT_EQ(0, u_strcmp(lll_Skel
, dest
));
520 TEST_ASSERT_EQ(u_strlen(lll_Skel
), skelLength
);
522 skelLength
= uspoof_getSkeletonUTF8(sc
, USPOOF_ANY_CASE
, goodLatinUTF8
, -1, (char*)dest
,
523 sizeof(dest
)/sizeof(UChar
), &status
);
524 TEST_ASSERT_SUCCESS(status
);
526 skelLength
= uspoof_getSkeleton(sc
, USPOOF_ANY_CASE
, lll_Latin_a
, -1, NULL
, 0, &status
);
527 TEST_ASSERT_EQ(U_BUFFER_OVERFLOW_ERROR
, status
);
528 TEST_ASSERT_EQ(3, skelLength
);
529 status
= U_ZERO_ERROR
;
534 * get Inclusion and Recommended sets
537 const USet
*inclusions
= NULL
;
538 const USet
*recommended
= NULL
;
540 inclusions
= uspoof_getInclusionSet(&status
);
541 TEST_ASSERT_SUCCESS(status
);
542 TEST_ASSERT_EQ(TRUE
, uset_isFrozen(inclusions
));
544 status
= U_ZERO_ERROR
;
545 recommended
= uspoof_getRecommendedSet(&status
);
546 TEST_ASSERT_SUCCESS(status
);
547 TEST_ASSERT_EQ(TRUE
, uset_isFrozen(recommended
));
552 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */