1 /********************************************************************
3 * Copyright (c) 2009-2011, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
10 *********************************************************************************/
11 /*C API TEST for the uspoof Unicode Indentifier Spoofing and Security API */
13 * This is an API test for ICU spoof detection in plain C. It doesn't test very many cases, and doesn't
14 * try to test the full functionality. It just calls each function and verifies that it
15 * works on a basic level.
17 * More complete testing of spoof detection functionality is done with the C++ tests.
20 #include "unicode/utypes.h"
21 #if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION
26 #include "unicode/uspoof.h"
27 #include "unicode/ustring.h"
28 #include "unicode/uset.h"
31 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
32 log_err_status(status, "Failure at file %s, line %d, error = %s\n", __FILE__, __LINE__, u_errorName(status));}}
34 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
35 log_err("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
37 #define TEST_ASSERT_EQ(a, b) { if ((a) != (b)) { \
38 log_err("Test Failure at file %s, line %d: \"%s\" (%d) != \"%s\" (%d) \n", \
39 __FILE__, __LINE__, #a, (a), #b, (b)); }}
41 #define TEST_ASSERT_NE(a, b) { if ((a) == (b)) { \
42 log_err("Test Failure at file %s, line %d: \"%s\" (%d) == \"%s\" (%d) \n", \
43 __FILE__, __LINE__, #a, (a), #b, (b)); }}
47 * TEST_SETUP and TEST_TEARDOWN
48 * macros to handle the boilerplate around setting up test case.
49 * Put arbitrary test code between SETUP and TEARDOWN.
50 * "sc" is the ready-to-go SpoofChecker for use in the tests.
52 #define TEST_SETUP { \
53 UErrorCode status = U_ZERO_ERROR; \
55 sc = uspoof_open(&status); \
56 TEST_ASSERT_SUCCESS(status); \
57 if (U_SUCCESS(status)){
59 #define TEST_TEARDOWN \
61 TEST_ASSERT_SUCCESS(status); \
66 static void TestUSpoofCAPI(void);
68 void addUSpoofTest(TestNode
** root
);
70 void addUSpoofTest(TestNode
** root
)
72 #if !UCONFIG_NO_FILE_IO
73 addTest(root
, &TestUSpoofCAPI
, "uspoof/TestUSpoofCAPI");
78 * Identifiers for verifying that spoof checking is minimally alive and working.
80 const UChar goodLatin
[] = {(UChar
)0x75, (UChar
)0x7a, 0}; /* "uz", all ASCII */
81 /* (not confusable) */
82 const UChar scMixed
[] = {(UChar
)0x73, (UChar
)0x0441, 0}; /* "sc", with Cyrillic 'c' */
83 /* (mixed script, confusable */
85 const UChar scLatin
[] = {(UChar
)0x73, (UChar
)0x63, 0}; /* "sc", plain ascii. */
86 const UChar goodCyrl
[] = {(UChar
)0x438, (UChar
)0x43B, 0}; /* Plain lower case Cyrillic letters,
87 no latin confusables */
89 const UChar goodGreek
[] = {(UChar
)0x3c0, (UChar
)0x3c6, 0}; /* Plain lower case Greek letters */
91 const UChar lll_Latin_a
[] = {(UChar
)0x6c, (UChar
)0x49, (UChar
)0x31, 0}; /* lI1, all ASCII */
93 /* Full-width I, Small Roman Numeral fifty, Latin Cap Letter IOTA*/
94 const UChar lll_Latin_b
[] = {(UChar
)0xff29, (UChar
)0x217c, (UChar
)0x196, 0};
96 const UChar lll_Cyrl
[] = {(UChar
)0x0406, (UChar
)0x04C0, (UChar
)0x31, 0};
98 /* The skeleton transform for all of thes 'lll' lookalikes is all lower case l. */
99 const UChar lll_Skel
[] = {(UChar
)0x6c, (UChar
)0x6c, (UChar
)0x6c, 0};
101 const UChar han_Hiragana
[] = {(UChar
)0x3086, (UChar
)0x308A, (UChar
)0x0020, (UChar
)0x77F3, (UChar
)0x7530, 0};
103 /* Provide better code coverage */
104 const char goodLatinUTF8
[] = {0x75, 0x77, 0};
106 * Spoof Detction C API Tests
108 static void TestUSpoofCAPI(void) {
111 * basic uspoof_open().
115 UErrorCode status
= U_ZERO_ERROR
;
116 sc
= uspoof_open(&status
);
117 TEST_ASSERT_SUCCESS(status
);
118 if (U_FAILURE(status
)) {
119 /* If things are so broken that we can't even open a default spoof checker, */
120 /* don't even try the rest of the tests. They would all fail. */
129 * Test Open from source rules.
132 const char *dataSrcDir
;
135 int confusablesLength
;
136 char *confusablesWholeScript
;
137 int confusablesWholeScriptLength
;
143 dataSrcDir
= ctest_dataSrcDir();
144 fileName
= malloc(strlen(dataSrcDir
) + 100);
145 strcpy(fileName
, dataSrcDir
);
146 strcat(fileName
, U_FILE_SEP_STRING
"unidata" U_FILE_SEP_STRING
"confusables.txt");
147 f
= fopen(fileName
, "rb");
148 TEST_ASSERT_NE(f
, NULL
);
149 confusables
= malloc(3000000);
150 confusablesLength
= fread(confusables
, 1, 3000000, f
);
154 strcpy(fileName
, dataSrcDir
);
155 strcat(fileName
, U_FILE_SEP_STRING
"unidata" U_FILE_SEP_STRING
"confusablesWholeScript.txt");
156 f
= fopen(fileName
, "rb");
157 TEST_ASSERT_NE(f
, NULL
);
158 confusablesWholeScript
= malloc(1000000);
159 confusablesWholeScriptLength
= fread(confusablesWholeScript
, 1, 1000000, f
);
162 rsc
= uspoof_openFromSource(confusables
, confusablesLength
,
163 confusablesWholeScript
, confusablesWholeScriptLength
,
164 &errType
, &pe
, &status
);
165 TEST_ASSERT_SUCCESS(status
);
167 free(confusablesWholeScript
);
171 /* printf("ParseError Line is %d\n", pe.line); */
176 * openFromSerialized and serialize
179 int32_t serializedSize
= 0;
180 int32_t actualLength
= 0;
183 int32_t checkResults
;
186 serializedSize
= uspoof_serialize(sc
, NULL
, 0, &status
);
187 TEST_ASSERT_EQ(status
, U_BUFFER_OVERFLOW_ERROR
);
188 TEST_ASSERT(serializedSize
> 0);
190 /* Serialize the default spoof checker */
191 status
= U_ZERO_ERROR
;
192 buf
= (char *)malloc(serializedSize
+ 10);
193 TEST_ASSERT(buf
!= NULL
);
194 buf
[serializedSize
] = 42;
195 uspoof_serialize(sc
, buf
, serializedSize
, &status
);
196 TEST_ASSERT_SUCCESS(status
);
197 TEST_ASSERT_EQ(42, buf
[serializedSize
]);
199 /* Create a new spoof checker from the freshly serialized data */
200 sc2
= uspoof_openFromSerialized(buf
, serializedSize
+10, &actualLength
, &status
);
201 TEST_ASSERT_SUCCESS(status
);
202 TEST_ASSERT_NE(NULL
, sc2
);
203 TEST_ASSERT_EQ(serializedSize
, actualLength
);
205 /* Verify that the new spoof checker at least wiggles */
206 checkResults
= uspoof_check(sc2
, goodLatin
, -1, NULL
, &status
);
207 TEST_ASSERT_SUCCESS(status
);
208 TEST_ASSERT_EQ(0, checkResults
);
210 checkResults
= uspoof_check(sc2
, scMixed
, -1, NULL
, &status
);
211 TEST_ASSERT_SUCCESS(status
);
212 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT
| USPOOF_MIXED_SCRIPT_CONFUSABLE
, checkResults
);
221 * Set & Get Check Flags
225 uspoof_setChecks(sc
, USPOOF_ALL_CHECKS
, &status
);
226 TEST_ASSERT_SUCCESS(status
);
227 t
= uspoof_getChecks(sc
, &status
);
228 TEST_ASSERT_EQ(t
, USPOOF_ALL_CHECKS
);
230 uspoof_setChecks(sc
, 0, &status
);
231 TEST_ASSERT_SUCCESS(status
);
232 t
= uspoof_getChecks(sc
, &status
);
233 TEST_ASSERT_EQ(0, t
);
236 USPOOF_WHOLE_SCRIPT_CONFUSABLE
| USPOOF_MIXED_SCRIPT_CONFUSABLE
| USPOOF_ANY_CASE
,
238 TEST_ASSERT_SUCCESS(status
);
239 t
= uspoof_getChecks(sc
, &status
);
240 TEST_ASSERT_SUCCESS(status
);
241 TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE
| USPOOF_MIXED_SCRIPT_CONFUSABLE
| USPOOF_ANY_CASE
, t
);
245 * get & setAllowedChars
251 uset
= uspoof_getAllowedChars(sc
, &status
);
252 TEST_ASSERT_SUCCESS(status
);
253 TEST_ASSERT(uset_isFrozen(uset
));
254 us
= uset_open((UChar32
)0x41, (UChar32
)0x5A); /* [A-Z] */
255 uspoof_setAllowedChars(sc
, us
, &status
);
256 TEST_ASSERT_SUCCESS(status
);
257 TEST_ASSERT_NE(us
, uspoof_getAllowedChars(sc
, &status
));
258 TEST_ASSERT(uset_equals(us
, uspoof_getAllowedChars(sc
, &status
)));
259 TEST_ASSERT_SUCCESS(status
);
268 USpoofChecker
*clone1
= NULL
;
269 USpoofChecker
*clone2
= NULL
;
270 int32_t checkResults
= 0;
272 clone1
= uspoof_clone(sc
, &status
);
273 TEST_ASSERT_SUCCESS(status
);
274 TEST_ASSERT_NE(clone1
, sc
);
276 clone2
= uspoof_clone(clone1
, &status
);
277 TEST_ASSERT_SUCCESS(status
);
278 TEST_ASSERT_NE(clone2
, clone1
);
280 uspoof_close(clone1
);
282 /* Verify that the cloned spoof checker is alive */
283 checkResults
= uspoof_check(clone2
, goodLatin
, -1, NULL
, &status
);
284 TEST_ASSERT_SUCCESS(status
);
285 TEST_ASSERT_EQ(0, checkResults
);
287 checkResults
= uspoof_check(clone2
, scMixed
, -1, NULL
, &status
);
288 TEST_ASSERT_SUCCESS(status
);
289 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT
| USPOOF_MIXED_SCRIPT_CONFUSABLE
, checkResults
);
290 uspoof_close(clone2
);
294 * basic uspoof_check()
298 result
= uspoof_check(sc
, goodLatin
, -1, NULL
, &status
);
299 TEST_ASSERT_SUCCESS(status
);
300 TEST_ASSERT_EQ(0, result
);
302 result
= uspoof_check(sc
, han_Hiragana
, -1, NULL
, &status
);
303 TEST_ASSERT_SUCCESS(status
);
304 TEST_ASSERT_EQ(0, result
);
306 result
= uspoof_check(sc
, scMixed
, -1, NULL
, &status
);
307 TEST_ASSERT_SUCCESS(status
);
308 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT
| USPOOF_MIXED_SCRIPT_CONFUSABLE
, result
);
318 int32_t checkResults
;
320 checks
= uspoof_getChecks(sc
, &status
);
321 TEST_ASSERT_SUCCESS(status
);
322 TEST_ASSERT_EQ(USPOOF_ALL_CHECKS
, checks
);
324 checks
&= ~(USPOOF_SINGLE_SCRIPT
| USPOOF_MIXED_SCRIPT_CONFUSABLE
);
325 uspoof_setChecks(sc
, checks
, &status
);
326 TEST_ASSERT_SUCCESS(status
);
327 checks2
= uspoof_getChecks(sc
, &status
);
328 TEST_ASSERT_EQ(checks
, checks2
);
330 /* The checks that were disabled just above are the same ones that the "scMixed" test fails.
331 So with those tests gone checking that Identifier should now succeed */
332 checkResults
= uspoof_check(sc
, scMixed
, -1, NULL
, &status
);
333 TEST_ASSERT_SUCCESS(status
);
334 TEST_ASSERT_EQ(0, checkResults
);
342 const char *allowedLocales
;
343 int32_t checkResults
;
345 /* Default allowed locales list should be empty */
346 allowedLocales
= uspoof_getAllowedLocales(sc
, &status
);
347 TEST_ASSERT_SUCCESS(status
);
348 TEST_ASSERT(strcmp("", allowedLocales
) == 0)
350 /* Allow en and ru, which should enable Latin and Cyrillic only to pass */
351 uspoof_setAllowedLocales(sc
, "en, ru_RU", &status
);
352 TEST_ASSERT_SUCCESS(status
);
353 allowedLocales
= uspoof_getAllowedLocales(sc
, &status
);
354 TEST_ASSERT_SUCCESS(status
);
355 TEST_ASSERT(strstr(allowedLocales
, "en") != NULL
);
356 TEST_ASSERT(strstr(allowedLocales
, "ru") != NULL
);
358 /* Limit checks to USPOOF_CHAR_LIMIT. Some of the test data has whole script confusables also,
359 * which we don't want to see in this test. */
360 uspoof_setChecks(sc
, USPOOF_CHAR_LIMIT
, &status
);
361 TEST_ASSERT_SUCCESS(status
);
363 checkResults
= uspoof_check(sc
, goodLatin
, -1, NULL
, &status
);
364 TEST_ASSERT_SUCCESS(status
);
365 TEST_ASSERT_EQ(0, checkResults
);
367 checkResults
= uspoof_check(sc
, goodGreek
, -1, NULL
, &status
);
368 TEST_ASSERT_SUCCESS(status
);
369 TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT
, checkResults
);
371 checkResults
= uspoof_check(sc
, goodCyrl
, -1, NULL
, &status
);
372 TEST_ASSERT_SUCCESS(status
);
373 TEST_ASSERT_EQ(0, checkResults
);
375 /* Reset with an empty locale list, which should allow all characters to pass */
376 uspoof_setAllowedLocales(sc
, " ", &status
);
377 TEST_ASSERT_SUCCESS(status
);
379 checkResults
= uspoof_check(sc
, goodGreek
, -1, NULL
, &status
);
380 TEST_ASSERT_SUCCESS(status
);
381 TEST_ASSERT_EQ(0, checkResults
);
385 * AllowedChars set/get the USet of allowed characters.
390 int32_t checkResults
;
392 /* By default, we should see no restriction; the USet should allow all characters. */
393 set
= uspoof_getAllowedChars(sc
, &status
);
394 TEST_ASSERT_SUCCESS(status
);
395 tmpSet
= uset_open(0, 0x10ffff);
396 TEST_ASSERT(uset_equals(tmpSet
, set
));
398 /* Setting the allowed chars should enable the check. */
399 uspoof_setChecks(sc
, USPOOF_ALL_CHECKS
& ~USPOOF_CHAR_LIMIT
, &status
);
400 TEST_ASSERT_SUCCESS(status
);
402 /* Remove a character that is in our good Latin test identifier from the allowed chars set. */
403 uset_remove(tmpSet
, goodLatin
[1]);
404 uspoof_setAllowedChars(sc
, tmpSet
, &status
);
405 TEST_ASSERT_SUCCESS(status
);
408 /* Latin Identifier should now fail; other non-latin test cases should still be OK */
409 checkResults
= uspoof_check(sc
, goodLatin
, -1, NULL
, &status
);
410 TEST_ASSERT_SUCCESS(status
);
411 TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT
, checkResults
);
413 checkResults
= uspoof_check(sc
, goodGreek
, -1, NULL
, &status
);
414 TEST_ASSERT_SUCCESS(status
);
415 TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE
, checkResults
);
423 int32_t checkResults
;
426 u_strToUTF8(utf8buf
, sizeof(utf8buf
), NULL
, goodLatin
, -1, &status
);
427 TEST_ASSERT_SUCCESS(status
);
429 checkResults
= uspoof_checkUTF8(sc
, utf8buf
, -1, &position
, &status
);
430 TEST_ASSERT_SUCCESS(status
);
431 TEST_ASSERT_EQ(0, checkResults
);
432 TEST_ASSERT_EQ(666, position
);
434 u_strToUTF8(utf8buf
, sizeof(utf8buf
), NULL
, goodCyrl
, -1, &status
);
435 TEST_ASSERT_SUCCESS(status
);
436 checkResults
= uspoof_checkUTF8(sc
, utf8buf
, -1, &position
, &status
);
437 TEST_ASSERT_SUCCESS(status
);
438 TEST_ASSERT_EQ(0, checkResults
);
440 u_strToUTF8(utf8buf
, sizeof(utf8buf
), NULL
, scMixed
, -1, &status
);
441 TEST_ASSERT_SUCCESS(status
);
443 checkResults
= uspoof_checkUTF8(sc
, utf8buf
, -1, &position
, &status
);
444 TEST_ASSERT_SUCCESS(status
);
445 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE
| USPOOF_SINGLE_SCRIPT
, checkResults
);
446 TEST_ASSERT_EQ(2, position
);
451 * uspoof_areConfusable()
454 int32_t checkResults
;
456 checkResults
= uspoof_areConfusable(sc
, scLatin
, -1, scMixed
, -1, &status
);
457 TEST_ASSERT_SUCCESS(status
);
458 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE
, checkResults
);
460 checkResults
= uspoof_areConfusable(sc
, goodGreek
, -1, scLatin
, -1, &status
);
461 TEST_ASSERT_SUCCESS(status
);
462 TEST_ASSERT_EQ(0, checkResults
);
464 checkResults
= uspoof_areConfusable(sc
, lll_Latin_a
, -1, lll_Latin_b
, -1, &status
);
465 TEST_ASSERT_SUCCESS(status
);
466 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE
, checkResults
);
474 int32_t checkResults
;
479 u_strToUTF8(s1
, sizeof(s1
), NULL
, scLatin
, -1, &status
);
480 u_strToUTF8(s2
, sizeof(s2
), NULL
, scMixed
, -1, &status
);
481 TEST_ASSERT_SUCCESS(status
);
482 checkResults
= uspoof_areConfusableUTF8(sc
, s1
, -1, s2
, -1, &status
);
483 TEST_ASSERT_SUCCESS(status
);
484 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE
, checkResults
);
486 u_strToUTF8(s1
, sizeof(s1
), NULL
, goodGreek
, -1, &status
);
487 u_strToUTF8(s2
, sizeof(s2
), NULL
, scLatin
, -1, &status
);
488 TEST_ASSERT_SUCCESS(status
);
489 checkResults
= uspoof_areConfusableUTF8(sc
, s1
, -1, s2
, -1, &status
);
490 TEST_ASSERT_SUCCESS(status
);
491 TEST_ASSERT_EQ(0, checkResults
);
493 u_strToUTF8(s1
, sizeof(s1
), NULL
, lll_Latin_a
, -1, &status
);
494 u_strToUTF8(s2
, sizeof(s2
), NULL
, lll_Latin_b
, -1, &status
);
495 TEST_ASSERT_SUCCESS(status
);
496 checkResults
= uspoof_areConfusableUTF8(sc
, s1
, -1, s2
, -1, &status
);
497 TEST_ASSERT_SUCCESS(status
);
498 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE
, checkResults
);
511 skelLength
= uspoof_getSkeleton(sc
, USPOOF_ANY_CASE
, lll_Latin_a
, -1, dest
, sizeof(dest
)/sizeof(UChar
), &status
);
512 TEST_ASSERT_SUCCESS(status
);
513 TEST_ASSERT_EQ(0, u_strcmp(lll_Skel
, dest
));
514 TEST_ASSERT_EQ(u_strlen(lll_Skel
), skelLength
);
516 skelLength
= uspoof_getSkeletonUTF8(sc
, USPOOF_ANY_CASE
, goodLatinUTF8
, -1, (char*)dest
,
517 sizeof(dest
)/sizeof(UChar
), &status
);
518 TEST_ASSERT_SUCCESS(status
);
520 skelLength
= uspoof_getSkeleton(sc
, USPOOF_ANY_CASE
, lll_Latin_a
, -1, NULL
, 0, &status
);
521 TEST_ASSERT_EQ(U_BUFFER_OVERFLOW_ERROR
, status
);
522 TEST_ASSERT_EQ(3, skelLength
);
523 status
= U_ZERO_ERROR
;
528 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */