1 /********************************************************************
3 * Copyright (c) 2009-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
10 *********************************************************************************/
11 /*C API TEST for the uspoof Unicode Indentifier Spoofing and Security API */
13 * This is an API test for ICU spoof detection in plain C. It doesn't test very many cases, and doesn't
14 * try to test the full functionality. It just calls each function and verifies that it
15 * works on a basic level.
17 * More complete testing of spoof detection functionality is done with the C++ tests.
20 #include "unicode/utypes.h"
21 #if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION
26 #include "unicode/uspoof.h"
27 #include "unicode/ustring.h"
28 #include "unicode/uset.h"
31 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
32 log_err_status(status, "Failure at file %s, line %d, error = %s\n", __FILE__, __LINE__, u_errorName(status));}}
34 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
35 log_err("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
37 #define TEST_ASSERT_EQ(a, b) { if ((a) != (b)) { \
38 log_err("Test Failure at file %s, line %d: \"%s\" (%d) != \"%s\" (%d) \n", \
39 __FILE__, __LINE__, #a, (a), #b, (b)); }}
41 #define TEST_ASSERT_NE(a, b) { if ((a) == (b)) { \
42 log_err("Test Failure at file %s, line %d: \"%s\" (%d) == \"%s\" (%d) \n", \
43 __FILE__, __LINE__, #a, (a), #b, (b)); }}
47 * TEST_SETUP and TEST_TEARDOWN
48 * macros to handle the boilerplate around setting up test case.
49 * Put arbitrary test code between SETUP and TEARDOWN.
50 * "sc" is the ready-to-go SpoofChecker for use in the tests.
52 #define TEST_SETUP { \
53 UErrorCode status = U_ZERO_ERROR; \
55 sc = uspoof_open(&status); \
56 TEST_ASSERT_SUCCESS(status); \
57 if (U_SUCCESS(status)){
59 #define TEST_TEARDOWN \
61 TEST_ASSERT_SUCCESS(status); \
66 static void TestUSpoofCAPI(void);
68 void addUSpoofTest(TestNode
** root
);
70 void addUSpoofTest(TestNode
** root
)
72 #if !UCONFIG_NO_FILE_IO
73 addTest(root
, &TestUSpoofCAPI
, "uspoof/TestUSpoofCAPI");
78 * Identifiers for verifying that spoof checking is minimally alive and working.
80 const UChar goodLatin
[] = {(UChar
)0x75, (UChar
)0x7a, 0}; /* "uz", all ASCII */
81 /* (not confusable) */
82 const UChar scMixed
[] = {(UChar
)0x73, (UChar
)0x0441, 0}; /* "sc", with Cyrillic 'c' */
83 /* (mixed script, confusable */
85 const UChar scLatin
[] = {(UChar
)0x73, (UChar
)0x63, 0}; /* "sc", plain ascii. */
86 const UChar goodCyrl
[] = {(UChar
)0x438, (UChar
)0x43B, 0}; /* Plain lower case Cyrillic letters,
87 no latin confusables */
89 const UChar goodGreek
[] = {(UChar
)0x3c0, (UChar
)0x3c6, 0}; /* Plain lower case Greek letters */
91 const UChar lll_Latin_a
[] = {(UChar
)0x6c, (UChar
)0x49, (UChar
)0x31, 0}; /* lI1, all ASCII */
93 /* Full-width I, Small Roman Numeral fifty, Latin Cap Letter IOTA*/
94 const UChar lll_Latin_b
[] = {(UChar
)0xff29, (UChar
)0x217c, (UChar
)0x196, 0};
96 const UChar lll_Cyrl
[] = {(UChar
)0x0406, (UChar
)0x04C0, (UChar
)0x31, 0};
98 /* The skeleton transform for all of thes 'lll' lookalikes is all lower case l. */
99 const UChar lll_Skel
[] = {(UChar
)0x6c, (UChar
)0x6c, (UChar
)0x6c, 0};
101 /* Provide better code coverage */
102 const char goodLatinUTF8
[] = {0x75, 0x77, 0};
104 * Spoof Detction C API Tests
106 static void TestUSpoofCAPI(void) {
109 * basic uspoof_open().
113 UErrorCode status
= U_ZERO_ERROR
;
114 sc
= uspoof_open(&status
);
115 TEST_ASSERT_SUCCESS(status
);
116 if (U_FAILURE(status
)) {
117 /* If things are so broken that we can't even open a default spoof checker, */
118 /* don't even try the rest of the tests. They would all fail. */
127 * Test Open from source rules.
130 const char *dataSrcDir
;
133 int confusablesLength
;
134 char *confusablesWholeScript
;
135 int confusablesWholeScriptLength
;
141 dataSrcDir
= ctest_dataSrcDir();
142 fileName
= malloc(strlen(dataSrcDir
) + 100);
143 strcpy(fileName
, dataSrcDir
);
144 strcat(fileName
, U_FILE_SEP_STRING
"unidata" U_FILE_SEP_STRING
"confusables.txt");
145 f
= fopen(fileName
, "r");
146 TEST_ASSERT_NE(f
, NULL
);
147 confusables
= malloc(3000000);
148 confusablesLength
= fread(confusables
, 1, 3000000, f
);
152 strcpy(fileName
, dataSrcDir
);
153 strcat(fileName
, U_FILE_SEP_STRING
"unidata" U_FILE_SEP_STRING
"confusablesWholeScript.txt");
154 f
= fopen(fileName
, "r");
155 TEST_ASSERT_NE(f
, NULL
);
156 confusablesWholeScript
= malloc(1000000);
157 confusablesWholeScriptLength
= fread(confusablesWholeScript
, 1, 1000000, f
);
160 rsc
= uspoof_openFromSource(confusables
, confusablesLength
,
161 confusablesWholeScript
, confusablesWholeScriptLength
,
162 &errType
, &pe
, &status
);
163 TEST_ASSERT_SUCCESS(status
);
165 free(confusablesWholeScript
);
169 /* printf("ParseError Line is %d\n", pe.line); */
174 * openFromSerialized and serialize
177 int32_t serializedSize
= 0;
178 int32_t actualLength
= 0;
181 int32_t checkResults
;
184 serializedSize
= uspoof_serialize(sc
, NULL
, 0, &status
);
185 TEST_ASSERT_EQ(status
, U_BUFFER_OVERFLOW_ERROR
);
186 TEST_ASSERT(serializedSize
> 0);
188 /* Serialize the default spoof checker */
189 status
= U_ZERO_ERROR
;
190 buf
= (char *)malloc(serializedSize
+ 10);
191 TEST_ASSERT(buf
!= NULL
);
192 buf
[serializedSize
] = 42;
193 uspoof_serialize(sc
, buf
, serializedSize
, &status
);
194 TEST_ASSERT_SUCCESS(status
);
195 TEST_ASSERT_EQ(42, buf
[serializedSize
]);
197 /* Create a new spoof checker from the freshly serialized data */
198 sc2
= uspoof_openFromSerialized(buf
, serializedSize
+10, &actualLength
, &status
);
199 TEST_ASSERT_SUCCESS(status
);
200 TEST_ASSERT_NE(NULL
, sc2
);
201 TEST_ASSERT_EQ(serializedSize
, actualLength
);
203 /* Verify that the new spoof checker at least wiggles */
204 checkResults
= uspoof_check(sc2
, goodLatin
, -1, NULL
, &status
);
205 TEST_ASSERT_SUCCESS(status
);
206 TEST_ASSERT_EQ(0, checkResults
);
208 checkResults
= uspoof_check(sc2
, scMixed
, -1, NULL
, &status
);
209 TEST_ASSERT_SUCCESS(status
);
210 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT
| USPOOF_MIXED_SCRIPT_CONFUSABLE
, checkResults
);
219 * Set & Get Check Flags
223 uspoof_setChecks(sc
, USPOOF_ALL_CHECKS
, &status
);
224 TEST_ASSERT_SUCCESS(status
);
225 t
= uspoof_getChecks(sc
, &status
);
226 TEST_ASSERT_EQ(t
, USPOOF_ALL_CHECKS
);
228 uspoof_setChecks(sc
, 0, &status
);
229 TEST_ASSERT_SUCCESS(status
);
230 t
= uspoof_getChecks(sc
, &status
);
231 TEST_ASSERT_EQ(0, t
);
234 USPOOF_WHOLE_SCRIPT_CONFUSABLE
| USPOOF_MIXED_SCRIPT_CONFUSABLE
| USPOOF_ANY_CASE
,
236 TEST_ASSERT_SUCCESS(status
);
237 t
= uspoof_getChecks(sc
, &status
);
238 TEST_ASSERT_SUCCESS(status
);
239 TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE
| USPOOF_MIXED_SCRIPT_CONFUSABLE
| USPOOF_ANY_CASE
, t
);
243 * get & setAllowedChars
249 uset
= uspoof_getAllowedChars(sc
, &status
);
250 TEST_ASSERT_SUCCESS(status
);
251 TEST_ASSERT(uset_isFrozen(uset
));
252 us
= uset_open((UChar32
)0x41, (UChar32
)0x5A); /* [A-Z] */
253 uspoof_setAllowedChars(sc
, us
, &status
);
254 TEST_ASSERT_SUCCESS(status
);
255 TEST_ASSERT_NE(us
, uspoof_getAllowedChars(sc
, &status
));
256 TEST_ASSERT(uset_equals(us
, uspoof_getAllowedChars(sc
, &status
)));
257 TEST_ASSERT_SUCCESS(status
);
266 USpoofChecker
*clone1
= NULL
;
267 USpoofChecker
*clone2
= NULL
;
268 int32_t checkResults
= 0;
270 clone1
= uspoof_clone(sc
, &status
);
271 TEST_ASSERT_SUCCESS(status
);
272 TEST_ASSERT_NE(clone1
, sc
);
274 clone2
= uspoof_clone(clone1
, &status
);
275 TEST_ASSERT_SUCCESS(status
);
276 TEST_ASSERT_NE(clone2
, clone1
);
278 uspoof_close(clone1
);
280 /* Verify that the cloned spoof checker is alive */
281 checkResults
= uspoof_check(clone2
, goodLatin
, -1, NULL
, &status
);
282 TEST_ASSERT_SUCCESS(status
);
283 TEST_ASSERT_EQ(0, checkResults
);
285 checkResults
= uspoof_check(clone2
, scMixed
, -1, NULL
, &status
);
286 TEST_ASSERT_SUCCESS(status
);
287 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT
| USPOOF_MIXED_SCRIPT_CONFUSABLE
, checkResults
);
288 uspoof_close(clone2
);
297 int32_t checkResults
;
299 checks
= uspoof_getChecks(sc
, &status
);
300 TEST_ASSERT_SUCCESS(status
);
301 TEST_ASSERT_EQ(USPOOF_ALL_CHECKS
, checks
);
303 checks
&= ~(USPOOF_SINGLE_SCRIPT
| USPOOF_MIXED_SCRIPT_CONFUSABLE
);
304 uspoof_setChecks(sc
, checks
, &status
);
305 TEST_ASSERT_SUCCESS(status
);
306 checks2
= uspoof_getChecks(sc
, &status
);
307 TEST_ASSERT_EQ(checks
, checks2
);
309 /* The checks that were disabled just above are the same ones that the "scMixed" test fails.
310 So with those tests gone checking that Identifier should now succeed */
311 checkResults
= uspoof_check(sc
, scMixed
, -1, NULL
, &status
);
312 TEST_ASSERT_SUCCESS(status
);
313 TEST_ASSERT_EQ(0, checkResults
);
321 const char *allowedLocales
;
322 int32_t checkResults
;
324 /* Default allowed locales list should be empty */
325 allowedLocales
= uspoof_getAllowedLocales(sc
, &status
);
326 TEST_ASSERT_SUCCESS(status
);
327 TEST_ASSERT(strcmp("", allowedLocales
) == 0)
329 /* Allow en and ru, which should enable Latin and Cyrillic only to pass */
330 uspoof_setAllowedLocales(sc
, "en, ru_RU", &status
);
331 TEST_ASSERT_SUCCESS(status
);
332 allowedLocales
= uspoof_getAllowedLocales(sc
, &status
);
333 TEST_ASSERT_SUCCESS(status
);
334 TEST_ASSERT(strstr(allowedLocales
, "en") != NULL
);
335 TEST_ASSERT(strstr(allowedLocales
, "ru") != NULL
);
337 /* Limit checks to USPOOF_CHAR_LIMIT. Some of the test data has whole script confusables also,
338 * which we don't want to see in this test. */
339 uspoof_setChecks(sc
, USPOOF_CHAR_LIMIT
, &status
);
340 TEST_ASSERT_SUCCESS(status
);
342 checkResults
= uspoof_check(sc
, goodLatin
, -1, NULL
, &status
);
343 TEST_ASSERT_SUCCESS(status
);
344 TEST_ASSERT_EQ(0, checkResults
);
346 checkResults
= uspoof_check(sc
, goodGreek
, -1, NULL
, &status
);
347 TEST_ASSERT_SUCCESS(status
);
348 TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT
, checkResults
);
350 checkResults
= uspoof_check(sc
, goodCyrl
, -1, NULL
, &status
);
351 TEST_ASSERT_SUCCESS(status
);
352 TEST_ASSERT_EQ(0, checkResults
);
354 /* Reset with an empty locale list, which should allow all characters to pass */
355 uspoof_setAllowedLocales(sc
, " ", &status
);
356 TEST_ASSERT_SUCCESS(status
);
358 checkResults
= uspoof_check(sc
, goodGreek
, -1, NULL
, &status
);
359 TEST_ASSERT_SUCCESS(status
);
360 TEST_ASSERT_EQ(0, checkResults
);
364 * AllowedChars set/get the USet of allowed characters.
369 int32_t checkResults
;
371 /* By default, we should see no restriction; the USet should allow all characters. */
372 set
= uspoof_getAllowedChars(sc
, &status
);
373 TEST_ASSERT_SUCCESS(status
);
374 tmpSet
= uset_open(0, 0x10ffff);
375 TEST_ASSERT(uset_equals(tmpSet
, set
));
377 /* Setting the allowed chars should enable the check. */
378 uspoof_setChecks(sc
, USPOOF_ALL_CHECKS
& ~USPOOF_CHAR_LIMIT
, &status
);
379 TEST_ASSERT_SUCCESS(status
);
381 /* Remove a character that is in our good Latin test identifier from the allowed chars set. */
382 uset_remove(tmpSet
, goodLatin
[1]);
383 uspoof_setAllowedChars(sc
, tmpSet
, &status
);
384 TEST_ASSERT_SUCCESS(status
);
387 /* Latin Identifier should now fail; other non-latin test cases should still be OK */
388 checkResults
= uspoof_check(sc
, goodLatin
, -1, NULL
, &status
);
389 TEST_ASSERT_SUCCESS(status
);
390 TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT
, checkResults
);
392 checkResults
= uspoof_check(sc
, goodGreek
, -1, NULL
, &status
);
393 TEST_ASSERT_SUCCESS(status
);
394 TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE
, checkResults
);
402 int32_t checkResults
;
405 u_strToUTF8(utf8buf
, sizeof(utf8buf
), NULL
, goodLatin
, -1, &status
);
406 TEST_ASSERT_SUCCESS(status
);
408 checkResults
= uspoof_checkUTF8(sc
, utf8buf
, -1, &position
, &status
);
409 TEST_ASSERT_SUCCESS(status
);
410 TEST_ASSERT_EQ(0, checkResults
);
411 TEST_ASSERT_EQ(666, position
);
413 u_strToUTF8(utf8buf
, sizeof(utf8buf
), NULL
, goodCyrl
, -1, &status
);
414 TEST_ASSERT_SUCCESS(status
);
415 checkResults
= uspoof_checkUTF8(sc
, utf8buf
, -1, &position
, &status
);
416 TEST_ASSERT_SUCCESS(status
);
417 TEST_ASSERT_EQ(0, checkResults
);
419 u_strToUTF8(utf8buf
, sizeof(utf8buf
), NULL
, scMixed
, -1, &status
);
420 TEST_ASSERT_SUCCESS(status
);
422 checkResults
= uspoof_checkUTF8(sc
, utf8buf
, -1, &position
, &status
);
423 TEST_ASSERT_SUCCESS(status
);
424 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE
| USPOOF_SINGLE_SCRIPT
, checkResults
);
425 TEST_ASSERT_EQ(2, position
);
430 * uspoof_areConfusable()
433 int32_t checkResults
;
435 checkResults
= uspoof_areConfusable(sc
, scLatin
, -1, scMixed
, -1, &status
);
436 TEST_ASSERT_SUCCESS(status
);
437 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE
, checkResults
);
439 checkResults
= uspoof_areConfusable(sc
, goodGreek
, -1, scLatin
, -1, &status
);
440 TEST_ASSERT_SUCCESS(status
);
441 TEST_ASSERT_EQ(0, checkResults
);
443 checkResults
= uspoof_areConfusable(sc
, lll_Latin_a
, -1, lll_Latin_b
, -1, &status
);
444 TEST_ASSERT_SUCCESS(status
);
445 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE
, checkResults
);
453 int32_t checkResults
;
458 u_strToUTF8(s1
, sizeof(s1
), NULL
, scLatin
, -1, &status
);
459 u_strToUTF8(s2
, sizeof(s2
), NULL
, scMixed
, -1, &status
);
460 TEST_ASSERT_SUCCESS(status
);
461 checkResults
= uspoof_areConfusableUTF8(sc
, s1
, -1, s2
, -1, &status
);
462 TEST_ASSERT_SUCCESS(status
);
463 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE
, checkResults
);
465 u_strToUTF8(s1
, sizeof(s1
), NULL
, goodGreek
, -1, &status
);
466 u_strToUTF8(s2
, sizeof(s2
), NULL
, scLatin
, -1, &status
);
467 TEST_ASSERT_SUCCESS(status
);
468 checkResults
= uspoof_areConfusableUTF8(sc
, s1
, -1, s2
, -1, &status
);
469 TEST_ASSERT_SUCCESS(status
);
470 TEST_ASSERT_EQ(0, checkResults
);
472 u_strToUTF8(s1
, sizeof(s1
), NULL
, lll_Latin_a
, -1, &status
);
473 u_strToUTF8(s2
, sizeof(s2
), NULL
, lll_Latin_b
, -1, &status
);
474 TEST_ASSERT_SUCCESS(status
);
475 checkResults
= uspoof_areConfusableUTF8(sc
, s1
, -1, s2
, -1, &status
);
476 TEST_ASSERT_SUCCESS(status
);
477 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE
, checkResults
);
490 skelLength
= uspoof_getSkeleton(sc
, USPOOF_ANY_CASE
, lll_Latin_a
, -1, dest
, sizeof(dest
)/sizeof(UChar
), &status
);
491 TEST_ASSERT_SUCCESS(status
);
492 TEST_ASSERT_EQ(0, u_strcmp(lll_Skel
, dest
));
493 TEST_ASSERT_EQ(u_strlen(lll_Skel
), skelLength
);
495 skelLength
= uspoof_getSkeletonUTF8(sc
, USPOOF_ANY_CASE
, goodLatinUTF8
, -1, (char*)dest
,
496 sizeof(dest
)/sizeof(UChar
), &status
);
497 TEST_ASSERT_SUCCESS(status
);
499 skelLength
= uspoof_getSkeleton(sc
, USPOOF_ANY_CASE
, lll_Latin_a
, -1, NULL
, 0, &status
);
500 TEST_ASSERT_EQ(U_BUFFER_OVERFLOW_ERROR
, status
);
501 TEST_ASSERT_EQ(3, skelLength
);
502 status
= U_ZERO_ERROR
;
507 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */