]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/spooftest.c
ICU-491.11.2.tar.gz
[apple/icu.git] / icuSources / test / cintltst / spooftest.c
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 2009-2011, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
7 *
8 * File spooftest.c
9 *
10 *********************************************************************************/
11 /*C API TEST for the uspoof Unicode Indentifier Spoofing and Security API */
12 /**
13 * This is an API test for ICU spoof detection in plain C. It doesn't test very many cases, and doesn't
14 * try to test the full functionality. It just calls each function and verifies that it
15 * works on a basic level.
16 *
17 * More complete testing of spoof detection functionality is done with the C++ tests.
18 **/
19
20 #include "unicode/utypes.h"
21 #if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION
22
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include "unicode/uspoof.h"
27 #include "unicode/ustring.h"
28 #include "unicode/uset.h"
29 #include "cintltst.h"
30
31 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
32 log_err_status(status, "Failure at file %s, line %d, error = %s\n", __FILE__, __LINE__, u_errorName(status));}}
33
34 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
35 log_err("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
36
37 #define TEST_ASSERT_EQ(a, b) { if ((a) != (b)) { \
38 log_err("Test Failure at file %s, line %d: \"%s\" (%d) != \"%s\" (%d) \n", \
39 __FILE__, __LINE__, #a, (a), #b, (b)); }}
40
41 #define TEST_ASSERT_NE(a, b) { if ((a) == (b)) { \
42 log_err("Test Failure at file %s, line %d: \"%s\" (%d) == \"%s\" (%d) \n", \
43 __FILE__, __LINE__, #a, (a), #b, (b)); }}
44
45
46 /*
47 * TEST_SETUP and TEST_TEARDOWN
48 * macros to handle the boilerplate around setting up test case.
49 * Put arbitrary test code between SETUP and TEARDOWN.
50 * "sc" is the ready-to-go SpoofChecker for use in the tests.
51 */
52 #define TEST_SETUP { \
53 UErrorCode status = U_ZERO_ERROR; \
54 USpoofChecker *sc; \
55 sc = uspoof_open(&status); \
56 TEST_ASSERT_SUCCESS(status); \
57 if (U_SUCCESS(status)){
58
59 #define TEST_TEARDOWN \
60 } \
61 TEST_ASSERT_SUCCESS(status); \
62 uspoof_close(sc); \
63 }
64
65
66 static void TestUSpoofCAPI(void);
67
68 void addUSpoofTest(TestNode** root);
69
70 void addUSpoofTest(TestNode** root)
71 {
72 #if !UCONFIG_NO_FILE_IO
73 addTest(root, &TestUSpoofCAPI, "uspoof/TestUSpoofCAPI");
74 #endif
75 }
76
77 /*
78 * Identifiers for verifying that spoof checking is minimally alive and working.
79 */
80 const UChar goodLatin[] = {(UChar)0x75, (UChar)0x7a, 0}; /* "uz", all ASCII */
81 /* (not confusable) */
82 const UChar scMixed[] = {(UChar)0x73, (UChar)0x0441, 0}; /* "sc", with Cyrillic 'c' */
83 /* (mixed script, confusable */
84
85 const UChar scLatin[] = {(UChar)0x73, (UChar)0x63, 0}; /* "sc", plain ascii. */
86 const UChar goodCyrl[] = {(UChar)0x438, (UChar)0x43B, 0}; /* Plain lower case Cyrillic letters,
87 no latin confusables */
88
89 const UChar goodGreek[] = {(UChar)0x3c0, (UChar)0x3c6, 0}; /* Plain lower case Greek letters */
90
91 const UChar lll_Latin_a[] = {(UChar)0x6c, (UChar)0x49, (UChar)0x31, 0}; /* lI1, all ASCII */
92
93 /* Full-width I, Small Roman Numeral fifty, Latin Cap Letter IOTA*/
94 const UChar lll_Latin_b[] = {(UChar)0xff29, (UChar)0x217c, (UChar)0x196, 0};
95
96 const UChar lll_Cyrl[] = {(UChar)0x0406, (UChar)0x04C0, (UChar)0x31, 0};
97
98 /* The skeleton transform for all of thes 'lll' lookalikes is all lower case l. */
99 const UChar lll_Skel[] = {(UChar)0x6c, (UChar)0x6c, (UChar)0x6c, 0};
100
101 const UChar han_Hiragana[] = {(UChar)0x3086, (UChar)0x308A, (UChar)0x0020, (UChar)0x77F3, (UChar)0x7530, 0};
102
103 /* Provide better code coverage */
104 const char goodLatinUTF8[] = {0x75, 0x77, 0};
105 /*
106 * Spoof Detction C API Tests
107 */
108 static void TestUSpoofCAPI(void) {
109
110 /*
111 * basic uspoof_open().
112 */
113 {
114 USpoofChecker *sc;
115 UErrorCode status = U_ZERO_ERROR;
116 sc = uspoof_open(&status);
117 TEST_ASSERT_SUCCESS(status);
118 if (U_FAILURE(status)) {
119 /* If things are so broken that we can't even open a default spoof checker, */
120 /* don't even try the rest of the tests. They would all fail. */
121 return;
122 }
123 uspoof_close(sc);
124 }
125
126
127
128 /*
129 * Test Open from source rules.
130 */
131 TEST_SETUP
132 const char *dataSrcDir;
133 char *fileName;
134 char *confusables;
135 int confusablesLength;
136 char *confusablesWholeScript;
137 int confusablesWholeScriptLength;
138 FILE *f;
139 UParseError pe;
140 int32_t errType;
141 USpoofChecker *rsc;
142
143 dataSrcDir = ctest_dataSrcDir();
144 fileName = malloc(strlen(dataSrcDir) + 100);
145 strcpy(fileName, dataSrcDir);
146 strcat(fileName, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "confusables.txt");
147 f = fopen(fileName, "rb");
148 TEST_ASSERT_NE(f, NULL);
149 confusables = malloc(3000000);
150 confusablesLength = fread(confusables, 1, 3000000, f);
151 fclose(f);
152
153
154 strcpy(fileName, dataSrcDir);
155 strcat(fileName, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "confusablesWholeScript.txt");
156 f = fopen(fileName, "rb");
157 TEST_ASSERT_NE(f, NULL);
158 confusablesWholeScript = malloc(1000000);
159 confusablesWholeScriptLength = fread(confusablesWholeScript, 1, 1000000, f);
160 fclose(f);
161
162 rsc = uspoof_openFromSource(confusables, confusablesLength,
163 confusablesWholeScript, confusablesWholeScriptLength,
164 &errType, &pe, &status);
165 TEST_ASSERT_SUCCESS(status);
166
167 free(confusablesWholeScript);
168 free(confusables);
169 free(fileName);
170 uspoof_close(rsc);
171 /* printf("ParseError Line is %d\n", pe.line); */
172 TEST_TEARDOWN;
173
174
175 /*
176 * openFromSerialized and serialize
177 */
178 TEST_SETUP
179 int32_t serializedSize = 0;
180 int32_t actualLength = 0;
181 char *buf;
182 USpoofChecker *sc2;
183 int32_t checkResults;
184
185
186 serializedSize = uspoof_serialize(sc, NULL, 0, &status);
187 TEST_ASSERT_EQ(status, U_BUFFER_OVERFLOW_ERROR);
188 TEST_ASSERT(serializedSize > 0);
189
190 /* Serialize the default spoof checker */
191 status = U_ZERO_ERROR;
192 buf = (char *)malloc(serializedSize + 10);
193 TEST_ASSERT(buf != NULL);
194 buf[serializedSize] = 42;
195 uspoof_serialize(sc, buf, serializedSize, &status);
196 TEST_ASSERT_SUCCESS(status);
197 TEST_ASSERT_EQ(42, buf[serializedSize]);
198
199 /* Create a new spoof checker from the freshly serialized data */
200 sc2 = uspoof_openFromSerialized(buf, serializedSize+10, &actualLength, &status);
201 TEST_ASSERT_SUCCESS(status);
202 TEST_ASSERT_NE(NULL, sc2);
203 TEST_ASSERT_EQ(serializedSize, actualLength);
204
205 /* Verify that the new spoof checker at least wiggles */
206 checkResults = uspoof_check(sc2, goodLatin, -1, NULL, &status);
207 TEST_ASSERT_SUCCESS(status);
208 TEST_ASSERT_EQ(0, checkResults);
209
210 checkResults = uspoof_check(sc2, scMixed, -1, NULL, &status);
211 TEST_ASSERT_SUCCESS(status);
212 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);
213
214 uspoof_close(sc2);
215 free(buf);
216 TEST_TEARDOWN;
217
218
219
220 /*
221 * Set & Get Check Flags
222 */
223 TEST_SETUP
224 int32_t t;
225 uspoof_setChecks(sc, USPOOF_ALL_CHECKS, &status);
226 TEST_ASSERT_SUCCESS(status);
227 t = uspoof_getChecks(sc, &status);
228 TEST_ASSERT_EQ(t, USPOOF_ALL_CHECKS);
229
230 uspoof_setChecks(sc, 0, &status);
231 TEST_ASSERT_SUCCESS(status);
232 t = uspoof_getChecks(sc, &status);
233 TEST_ASSERT_EQ(0, t);
234
235 uspoof_setChecks(sc,
236 USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE,
237 &status);
238 TEST_ASSERT_SUCCESS(status);
239 t = uspoof_getChecks(sc, &status);
240 TEST_ASSERT_SUCCESS(status);
241 TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE, t);
242 TEST_TEARDOWN;
243
244 /*
245 * get & setAllowedChars
246 */
247 TEST_SETUP
248 USet *us;
249 const USet *uset;
250
251 uset = uspoof_getAllowedChars(sc, &status);
252 TEST_ASSERT_SUCCESS(status);
253 TEST_ASSERT(uset_isFrozen(uset));
254 us = uset_open((UChar32)0x41, (UChar32)0x5A); /* [A-Z] */
255 uspoof_setAllowedChars(sc, us, &status);
256 TEST_ASSERT_SUCCESS(status);
257 TEST_ASSERT_NE(us, uspoof_getAllowedChars(sc, &status));
258 TEST_ASSERT(uset_equals(us, uspoof_getAllowedChars(sc, &status)));
259 TEST_ASSERT_SUCCESS(status);
260 uset_close(us);
261 TEST_TEARDOWN;
262
263 /*
264 * clone()
265 */
266
267 TEST_SETUP
268 USpoofChecker *clone1 = NULL;
269 USpoofChecker *clone2 = NULL;
270 int32_t checkResults = 0;
271
272 clone1 = uspoof_clone(sc, &status);
273 TEST_ASSERT_SUCCESS(status);
274 TEST_ASSERT_NE(clone1, sc);
275
276 clone2 = uspoof_clone(clone1, &status);
277 TEST_ASSERT_SUCCESS(status);
278 TEST_ASSERT_NE(clone2, clone1);
279
280 uspoof_close(clone1);
281
282 /* Verify that the cloned spoof checker is alive */
283 checkResults = uspoof_check(clone2, goodLatin, -1, NULL, &status);
284 TEST_ASSERT_SUCCESS(status);
285 TEST_ASSERT_EQ(0, checkResults);
286
287 checkResults = uspoof_check(clone2, scMixed, -1, NULL, &status);
288 TEST_ASSERT_SUCCESS(status);
289 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);
290 uspoof_close(clone2);
291 TEST_TEARDOWN;
292
293 /*
294 * basic uspoof_check()
295 */
296 TEST_SETUP
297 int32_t result;
298 result = uspoof_check(sc, goodLatin, -1, NULL, &status);
299 TEST_ASSERT_SUCCESS(status);
300 TEST_ASSERT_EQ(0, result);
301
302 result = uspoof_check(sc, han_Hiragana, -1, NULL, &status);
303 TEST_ASSERT_SUCCESS(status);
304 TEST_ASSERT_EQ(0, result);
305
306 result = uspoof_check(sc, scMixed, -1, NULL, &status);
307 TEST_ASSERT_SUCCESS(status);
308 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, result);
309 TEST_TEARDOWN
310
311
312 /*
313 * get & set Checks
314 */
315 TEST_SETUP
316 int32_t checks;
317 int32_t checks2;
318 int32_t checkResults;
319
320 checks = uspoof_getChecks(sc, &status);
321 TEST_ASSERT_SUCCESS(status);
322 TEST_ASSERT_EQ(USPOOF_ALL_CHECKS, checks);
323
324 checks &= ~(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE);
325 uspoof_setChecks(sc, checks, &status);
326 TEST_ASSERT_SUCCESS(status);
327 checks2 = uspoof_getChecks(sc, &status);
328 TEST_ASSERT_EQ(checks, checks2);
329
330 /* The checks that were disabled just above are the same ones that the "scMixed" test fails.
331 So with those tests gone checking that Identifier should now succeed */
332 checkResults = uspoof_check(sc, scMixed, -1, NULL, &status);
333 TEST_ASSERT_SUCCESS(status);
334 TEST_ASSERT_EQ(0, checkResults);
335 TEST_TEARDOWN;
336
337 /*
338 * AllowedLoacles
339 */
340
341 TEST_SETUP
342 const char *allowedLocales;
343 int32_t checkResults;
344
345 /* Default allowed locales list should be empty */
346 allowedLocales = uspoof_getAllowedLocales(sc, &status);
347 TEST_ASSERT_SUCCESS(status);
348 TEST_ASSERT(strcmp("", allowedLocales) == 0)
349
350 /* Allow en and ru, which should enable Latin and Cyrillic only to pass */
351 uspoof_setAllowedLocales(sc, "en, ru_RU", &status);
352 TEST_ASSERT_SUCCESS(status);
353 allowedLocales = uspoof_getAllowedLocales(sc, &status);
354 TEST_ASSERT_SUCCESS(status);
355 TEST_ASSERT(strstr(allowedLocales, "en") != NULL);
356 TEST_ASSERT(strstr(allowedLocales, "ru") != NULL);
357
358 /* Limit checks to USPOOF_CHAR_LIMIT. Some of the test data has whole script confusables also,
359 * which we don't want to see in this test. */
360 uspoof_setChecks(sc, USPOOF_CHAR_LIMIT, &status);
361 TEST_ASSERT_SUCCESS(status);
362
363 checkResults = uspoof_check(sc, goodLatin, -1, NULL, &status);
364 TEST_ASSERT_SUCCESS(status);
365 TEST_ASSERT_EQ(0, checkResults);
366
367 checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status);
368 TEST_ASSERT_SUCCESS(status);
369 TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT, checkResults);
370
371 checkResults = uspoof_check(sc, goodCyrl, -1, NULL, &status);
372 TEST_ASSERT_SUCCESS(status);
373 TEST_ASSERT_EQ(0, checkResults);
374
375 /* Reset with an empty locale list, which should allow all characters to pass */
376 uspoof_setAllowedLocales(sc, " ", &status);
377 TEST_ASSERT_SUCCESS(status);
378
379 checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status);
380 TEST_ASSERT_SUCCESS(status);
381 TEST_ASSERT_EQ(0, checkResults);
382 TEST_TEARDOWN;
383
384 /*
385 * AllowedChars set/get the USet of allowed characters.
386 */
387 TEST_SETUP
388 const USet *set;
389 USet *tmpSet;
390 int32_t checkResults;
391
392 /* By default, we should see no restriction; the USet should allow all characters. */
393 set = uspoof_getAllowedChars(sc, &status);
394 TEST_ASSERT_SUCCESS(status);
395 tmpSet = uset_open(0, 0x10ffff);
396 TEST_ASSERT(uset_equals(tmpSet, set));
397
398 /* Setting the allowed chars should enable the check. */
399 uspoof_setChecks(sc, USPOOF_ALL_CHECKS & ~USPOOF_CHAR_LIMIT, &status);
400 TEST_ASSERT_SUCCESS(status);
401
402 /* Remove a character that is in our good Latin test identifier from the allowed chars set. */
403 uset_remove(tmpSet, goodLatin[1]);
404 uspoof_setAllowedChars(sc, tmpSet, &status);
405 TEST_ASSERT_SUCCESS(status);
406 uset_close(tmpSet);
407
408 /* Latin Identifier should now fail; other non-latin test cases should still be OK */
409 checkResults = uspoof_check(sc, goodLatin, -1, NULL, &status);
410 TEST_ASSERT_SUCCESS(status);
411 TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT, checkResults);
412
413 checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status);
414 TEST_ASSERT_SUCCESS(status);
415 TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE, checkResults);
416 TEST_TEARDOWN;
417
418 /*
419 * check UTF-8
420 */
421 TEST_SETUP
422 char utf8buf[200];
423 int32_t checkResults;
424 int32_t position;
425
426 u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, goodLatin, -1, &status);
427 TEST_ASSERT_SUCCESS(status);
428 position = 666;
429 checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status);
430 TEST_ASSERT_SUCCESS(status);
431 TEST_ASSERT_EQ(0, checkResults);
432 TEST_ASSERT_EQ(666, position);
433
434 u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, goodCyrl, -1, &status);
435 TEST_ASSERT_SUCCESS(status);
436 checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status);
437 TEST_ASSERT_SUCCESS(status);
438 TEST_ASSERT_EQ(0, checkResults);
439
440 u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, scMixed, -1, &status);
441 TEST_ASSERT_SUCCESS(status);
442 position = 666;
443 checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status);
444 TEST_ASSERT_SUCCESS(status);
445 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_SINGLE_SCRIPT , checkResults);
446 TEST_ASSERT_EQ(2, position);
447
448 TEST_TEARDOWN;
449
450 /*
451 * uspoof_areConfusable()
452 */
453 TEST_SETUP
454 int32_t checkResults;
455
456 checkResults = uspoof_areConfusable(sc, scLatin, -1, scMixed, -1, &status);
457 TEST_ASSERT_SUCCESS(status);
458 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);
459
460 checkResults = uspoof_areConfusable(sc, goodGreek, -1, scLatin, -1, &status);
461 TEST_ASSERT_SUCCESS(status);
462 TEST_ASSERT_EQ(0, checkResults);
463
464 checkResults = uspoof_areConfusable(sc, lll_Latin_a, -1, lll_Latin_b, -1, &status);
465 TEST_ASSERT_SUCCESS(status);
466 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, checkResults);
467
468 TEST_TEARDOWN;
469
470 /*
471 * areConfusableUTF8
472 */
473 TEST_SETUP
474 int32_t checkResults;
475 char s1[200];
476 char s2[200];
477
478
479 u_strToUTF8(s1, sizeof(s1), NULL, scLatin, -1, &status);
480 u_strToUTF8(s2, sizeof(s2), NULL, scMixed, -1, &status);
481 TEST_ASSERT_SUCCESS(status);
482 checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status);
483 TEST_ASSERT_SUCCESS(status);
484 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);
485
486 u_strToUTF8(s1, sizeof(s1), NULL, goodGreek, -1, &status);
487 u_strToUTF8(s2, sizeof(s2), NULL, scLatin, -1, &status);
488 TEST_ASSERT_SUCCESS(status);
489 checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status);
490 TEST_ASSERT_SUCCESS(status);
491 TEST_ASSERT_EQ(0, checkResults);
492
493 u_strToUTF8(s1, sizeof(s1), NULL, lll_Latin_a, -1, &status);
494 u_strToUTF8(s2, sizeof(s2), NULL, lll_Latin_b, -1, &status);
495 TEST_ASSERT_SUCCESS(status);
496 checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status);
497 TEST_ASSERT_SUCCESS(status);
498 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, checkResults);
499
500 TEST_TEARDOWN;
501
502
503 /*
504 * getSkeleton
505 */
506
507 TEST_SETUP
508 UChar dest[100];
509 int32_t skelLength;
510
511 skelLength = uspoof_getSkeleton(sc, USPOOF_ANY_CASE, lll_Latin_a, -1, dest, sizeof(dest)/sizeof(UChar), &status);
512 TEST_ASSERT_SUCCESS(status);
513 TEST_ASSERT_EQ(0, u_strcmp(lll_Skel, dest));
514 TEST_ASSERT_EQ(u_strlen(lll_Skel), skelLength);
515
516 skelLength = uspoof_getSkeletonUTF8(sc, USPOOF_ANY_CASE, goodLatinUTF8, -1, (char*)dest,
517 sizeof(dest)/sizeof(UChar), &status);
518 TEST_ASSERT_SUCCESS(status);
519
520 skelLength = uspoof_getSkeleton(sc, USPOOF_ANY_CASE, lll_Latin_a, -1, NULL, 0, &status);
521 TEST_ASSERT_EQ(U_BUFFER_OVERFLOW_ERROR, status);
522 TEST_ASSERT_EQ(3, skelLength);
523 status = U_ZERO_ERROR;
524
525 TEST_TEARDOWN;
526 }
527
528 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */