]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/spooftest.c
ICU-511.35.tar.gz
[apple/icu.git] / icuSources / test / cintltst / spooftest.c
CommitLineData
729e4ab9
A
1/********************************************************************
2 * COPYRIGHT:
51004dcb 3 * Copyright (c) 2009-2013, International Business Machines Corporation and
729e4ab9
A
4 * others. All Rights Reserved.
5 ********************************************************************/
6/********************************************************************************
7*
8* File spooftest.c
9*
10*********************************************************************************/
11/*C API TEST for the uspoof Unicode Indentifier Spoofing and Security API */
12/**
13* This is an API test for ICU spoof detection in plain C. It doesn't test very many cases, and doesn't
14* try to test the full functionality. It just calls each function and verifies that it
15* works on a basic level.
16*
17* More complete testing of spoof detection functionality is done with the C++ tests.
18**/
19
20#include "unicode/utypes.h"
21#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION
22
23#include <stdlib.h>
24#include <stdio.h>
25#include <string.h>
26#include "unicode/uspoof.h"
27#include "unicode/ustring.h"
28#include "unicode/uset.h"
29#include "cintltst.h"
30
31#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
32 log_err_status(status, "Failure at file %s, line %d, error = %s\n", __FILE__, __LINE__, u_errorName(status));}}
33
34#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
35log_err("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
36
37#define TEST_ASSERT_EQ(a, b) { if ((a) != (b)) { \
38 log_err("Test Failure at file %s, line %d: \"%s\" (%d) != \"%s\" (%d) \n", \
39 __FILE__, __LINE__, #a, (a), #b, (b)); }}
40
41#define TEST_ASSERT_NE(a, b) { if ((a) == (b)) { \
42 log_err("Test Failure at file %s, line %d: \"%s\" (%d) == \"%s\" (%d) \n", \
43 __FILE__, __LINE__, #a, (a), #b, (b)); }}
44
45
46/*
47 * TEST_SETUP and TEST_TEARDOWN
48 * macros to handle the boilerplate around setting up test case.
49 * Put arbitrary test code between SETUP and TEARDOWN.
50 * "sc" is the ready-to-go SpoofChecker for use in the tests.
51 */
52#define TEST_SETUP { \
53 UErrorCode status = U_ZERO_ERROR; \
54 USpoofChecker *sc; \
55 sc = uspoof_open(&status); \
56 TEST_ASSERT_SUCCESS(status); \
57 if (U_SUCCESS(status)){
58
59#define TEST_TEARDOWN \
60 } \
61 TEST_ASSERT_SUCCESS(status); \
62 uspoof_close(sc); \
63}
64
65
66static void TestUSpoofCAPI(void);
67
68void addUSpoofTest(TestNode** root);
69
70void addUSpoofTest(TestNode** root)
71{
72#if !UCONFIG_NO_FILE_IO
73 addTest(root, &TestUSpoofCAPI, "uspoof/TestUSpoofCAPI");
74#endif
75}
76
77/*
78 * Identifiers for verifying that spoof checking is minimally alive and working.
79 */
80const UChar goodLatin[] = {(UChar)0x75, (UChar)0x7a, 0}; /* "uz", all ASCII */
81 /* (not confusable) */
82const UChar scMixed[] = {(UChar)0x73, (UChar)0x0441, 0}; /* "sc", with Cyrillic 'c' */
83 /* (mixed script, confusable */
84
85const UChar scLatin[] = {(UChar)0x73, (UChar)0x63, 0}; /* "sc", plain ascii. */
86const UChar goodCyrl[] = {(UChar)0x438, (UChar)0x43B, 0}; /* Plain lower case Cyrillic letters,
87 no latin confusables */
88
89const UChar goodGreek[] = {(UChar)0x3c0, (UChar)0x3c6, 0}; /* Plain lower case Greek letters */
90
91const UChar lll_Latin_a[] = {(UChar)0x6c, (UChar)0x49, (UChar)0x31, 0}; /* lI1, all ASCII */
92
93 /* Full-width I, Small Roman Numeral fifty, Latin Cap Letter IOTA*/
94const UChar lll_Latin_b[] = {(UChar)0xff29, (UChar)0x217c, (UChar)0x196, 0};
95
96const UChar lll_Cyrl[] = {(UChar)0x0406, (UChar)0x04C0, (UChar)0x31, 0};
97
98/* The skeleton transform for all of thes 'lll' lookalikes is all lower case l. */
99const UChar lll_Skel[] = {(UChar)0x6c, (UChar)0x6c, (UChar)0x6c, 0};
100
4388f060
A
101const UChar han_Hiragana[] = {(UChar)0x3086, (UChar)0x308A, (UChar)0x0020, (UChar)0x77F3, (UChar)0x7530, 0};
102
729e4ab9
A
103/* Provide better code coverage */
104const char goodLatinUTF8[] = {0x75, 0x77, 0};
105/*
106 * Spoof Detction C API Tests
107 */
108static void TestUSpoofCAPI(void) {
109
110 /*
111 * basic uspoof_open().
112 */
113 {
114 USpoofChecker *sc;
115 UErrorCode status = U_ZERO_ERROR;
116 sc = uspoof_open(&status);
117 TEST_ASSERT_SUCCESS(status);
118 if (U_FAILURE(status)) {
119 /* If things are so broken that we can't even open a default spoof checker, */
120 /* don't even try the rest of the tests. They would all fail. */
121 return;
122 }
123 uspoof_close(sc);
124 }
125
126
127
128 /*
129 * Test Open from source rules.
130 */
131 TEST_SETUP
132 const char *dataSrcDir;
133 char *fileName;
134 char *confusables;
135 int confusablesLength;
136 char *confusablesWholeScript;
137 int confusablesWholeScriptLength;
138 FILE *f;
139 UParseError pe;
140 int32_t errType;
141 USpoofChecker *rsc;
142
143 dataSrcDir = ctest_dataSrcDir();
144 fileName = malloc(strlen(dataSrcDir) + 100);
145 strcpy(fileName, dataSrcDir);
146 strcat(fileName, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "confusables.txt");
4388f060 147 f = fopen(fileName, "rb");
729e4ab9
A
148 TEST_ASSERT_NE(f, NULL);
149 confusables = malloc(3000000);
51004dcb 150 if (f != NULL) {
729e4ab9
A
151 confusablesLength = fread(confusables, 1, 3000000, f);
152 fclose(f);
51004dcb 153 }
729e4ab9 154
729e4ab9
A
155 strcpy(fileName, dataSrcDir);
156 strcat(fileName, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "confusablesWholeScript.txt");
4388f060 157 f = fopen(fileName, "rb");
729e4ab9
A
158 TEST_ASSERT_NE(f, NULL);
159 confusablesWholeScript = malloc(1000000);
51004dcb 160 if (f != NULL) {
729e4ab9
A
161 confusablesWholeScriptLength = fread(confusablesWholeScript, 1, 1000000, f);
162 fclose(f);
51004dcb 163 }
729e4ab9
A
164
165 rsc = uspoof_openFromSource(confusables, confusablesLength,
166 confusablesWholeScript, confusablesWholeScriptLength,
167 &errType, &pe, &status);
168 TEST_ASSERT_SUCCESS(status);
169
170 free(confusablesWholeScript);
171 free(confusables);
172 free(fileName);
173 uspoof_close(rsc);
174 /* printf("ParseError Line is %d\n", pe.line); */
175 TEST_TEARDOWN;
176
177
178 /*
179 * openFromSerialized and serialize
180 */
181 TEST_SETUP
182 int32_t serializedSize = 0;
183 int32_t actualLength = 0;
184 char *buf;
185 USpoofChecker *sc2;
186 int32_t checkResults;
187
188
189 serializedSize = uspoof_serialize(sc, NULL, 0, &status);
190 TEST_ASSERT_EQ(status, U_BUFFER_OVERFLOW_ERROR);
191 TEST_ASSERT(serializedSize > 0);
192
193 /* Serialize the default spoof checker */
194 status = U_ZERO_ERROR;
195 buf = (char *)malloc(serializedSize + 10);
196 TEST_ASSERT(buf != NULL);
197 buf[serializedSize] = 42;
198 uspoof_serialize(sc, buf, serializedSize, &status);
199 TEST_ASSERT_SUCCESS(status);
200 TEST_ASSERT_EQ(42, buf[serializedSize]);
201
202 /* Create a new spoof checker from the freshly serialized data */
203 sc2 = uspoof_openFromSerialized(buf, serializedSize+10, &actualLength, &status);
204 TEST_ASSERT_SUCCESS(status);
205 TEST_ASSERT_NE(NULL, sc2);
206 TEST_ASSERT_EQ(serializedSize, actualLength);
207
208 /* Verify that the new spoof checker at least wiggles */
209 checkResults = uspoof_check(sc2, goodLatin, -1, NULL, &status);
210 TEST_ASSERT_SUCCESS(status);
211 TEST_ASSERT_EQ(0, checkResults);
212
213 checkResults = uspoof_check(sc2, scMixed, -1, NULL, &status);
214 TEST_ASSERT_SUCCESS(status);
215 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);
216
217 uspoof_close(sc2);
218 free(buf);
219 TEST_TEARDOWN;
220
221
222
223 /*
224 * Set & Get Check Flags
225 */
226 TEST_SETUP
227 int32_t t;
228 uspoof_setChecks(sc, USPOOF_ALL_CHECKS, &status);
229 TEST_ASSERT_SUCCESS(status);
230 t = uspoof_getChecks(sc, &status);
231 TEST_ASSERT_EQ(t, USPOOF_ALL_CHECKS);
232
233 uspoof_setChecks(sc, 0, &status);
234 TEST_ASSERT_SUCCESS(status);
235 t = uspoof_getChecks(sc, &status);
236 TEST_ASSERT_EQ(0, t);
237
238 uspoof_setChecks(sc,
239 USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE,
240 &status);
241 TEST_ASSERT_SUCCESS(status);
242 t = uspoof_getChecks(sc, &status);
243 TEST_ASSERT_SUCCESS(status);
244 TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE, t);
245 TEST_TEARDOWN;
246
247 /*
248 * get & setAllowedChars
249 */
250 TEST_SETUP
251 USet *us;
252 const USet *uset;
253
254 uset = uspoof_getAllowedChars(sc, &status);
255 TEST_ASSERT_SUCCESS(status);
256 TEST_ASSERT(uset_isFrozen(uset));
257 us = uset_open((UChar32)0x41, (UChar32)0x5A); /* [A-Z] */
258 uspoof_setAllowedChars(sc, us, &status);
259 TEST_ASSERT_SUCCESS(status);
260 TEST_ASSERT_NE(us, uspoof_getAllowedChars(sc, &status));
261 TEST_ASSERT(uset_equals(us, uspoof_getAllowedChars(sc, &status)));
262 TEST_ASSERT_SUCCESS(status);
263 uset_close(us);
264 TEST_TEARDOWN;
265
266 /*
267 * clone()
268 */
269
270 TEST_SETUP
271 USpoofChecker *clone1 = NULL;
272 USpoofChecker *clone2 = NULL;
273 int32_t checkResults = 0;
274
275 clone1 = uspoof_clone(sc, &status);
276 TEST_ASSERT_SUCCESS(status);
277 TEST_ASSERT_NE(clone1, sc);
278
279 clone2 = uspoof_clone(clone1, &status);
280 TEST_ASSERT_SUCCESS(status);
281 TEST_ASSERT_NE(clone2, clone1);
282
283 uspoof_close(clone1);
284
285 /* Verify that the cloned spoof checker is alive */
286 checkResults = uspoof_check(clone2, goodLatin, -1, NULL, &status);
287 TEST_ASSERT_SUCCESS(status);
288 TEST_ASSERT_EQ(0, checkResults);
289
290 checkResults = uspoof_check(clone2, scMixed, -1, NULL, &status);
291 TEST_ASSERT_SUCCESS(status);
292 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);
293 uspoof_close(clone2);
294 TEST_TEARDOWN;
295
4388f060
A
296 /*
297 * basic uspoof_check()
298 */
299 TEST_SETUP
300 int32_t result;
301 result = uspoof_check(sc, goodLatin, -1, NULL, &status);
302 TEST_ASSERT_SUCCESS(status);
303 TEST_ASSERT_EQ(0, result);
304
305 result = uspoof_check(sc, han_Hiragana, -1, NULL, &status);
306 TEST_ASSERT_SUCCESS(status);
307 TEST_ASSERT_EQ(0, result);
308
309 result = uspoof_check(sc, scMixed, -1, NULL, &status);
310 TEST_ASSERT_SUCCESS(status);
311 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, result);
312 TEST_TEARDOWN
313
314
729e4ab9
A
315 /*
316 * get & set Checks
317 */
318 TEST_SETUP
319 int32_t checks;
320 int32_t checks2;
321 int32_t checkResults;
322
323 checks = uspoof_getChecks(sc, &status);
324 TEST_ASSERT_SUCCESS(status);
325 TEST_ASSERT_EQ(USPOOF_ALL_CHECKS, checks);
326
327 checks &= ~(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE);
328 uspoof_setChecks(sc, checks, &status);
329 TEST_ASSERT_SUCCESS(status);
330 checks2 = uspoof_getChecks(sc, &status);
331 TEST_ASSERT_EQ(checks, checks2);
332
333 /* The checks that were disabled just above are the same ones that the "scMixed" test fails.
334 So with those tests gone checking that Identifier should now succeed */
335 checkResults = uspoof_check(sc, scMixed, -1, NULL, &status);
336 TEST_ASSERT_SUCCESS(status);
337 TEST_ASSERT_EQ(0, checkResults);
338 TEST_TEARDOWN;
339
340 /*
341 * AllowedLoacles
342 */
343
344 TEST_SETUP
345 const char *allowedLocales;
346 int32_t checkResults;
347
348 /* Default allowed locales list should be empty */
349 allowedLocales = uspoof_getAllowedLocales(sc, &status);
350 TEST_ASSERT_SUCCESS(status);
351 TEST_ASSERT(strcmp("", allowedLocales) == 0)
352
353 /* Allow en and ru, which should enable Latin and Cyrillic only to pass */
354 uspoof_setAllowedLocales(sc, "en, ru_RU", &status);
355 TEST_ASSERT_SUCCESS(status);
356 allowedLocales = uspoof_getAllowedLocales(sc, &status);
357 TEST_ASSERT_SUCCESS(status);
358 TEST_ASSERT(strstr(allowedLocales, "en") != NULL);
359 TEST_ASSERT(strstr(allowedLocales, "ru") != NULL);
360
361 /* Limit checks to USPOOF_CHAR_LIMIT. Some of the test data has whole script confusables also,
362 * which we don't want to see in this test. */
363 uspoof_setChecks(sc, USPOOF_CHAR_LIMIT, &status);
364 TEST_ASSERT_SUCCESS(status);
365
366 checkResults = uspoof_check(sc, goodLatin, -1, NULL, &status);
367 TEST_ASSERT_SUCCESS(status);
368 TEST_ASSERT_EQ(0, checkResults);
369
370 checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status);
371 TEST_ASSERT_SUCCESS(status);
372 TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT, checkResults);
373
374 checkResults = uspoof_check(sc, goodCyrl, -1, NULL, &status);
375 TEST_ASSERT_SUCCESS(status);
376 TEST_ASSERT_EQ(0, checkResults);
377
378 /* Reset with an empty locale list, which should allow all characters to pass */
379 uspoof_setAllowedLocales(sc, " ", &status);
380 TEST_ASSERT_SUCCESS(status);
381
382 checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status);
383 TEST_ASSERT_SUCCESS(status);
384 TEST_ASSERT_EQ(0, checkResults);
385 TEST_TEARDOWN;
386
387 /*
388 * AllowedChars set/get the USet of allowed characters.
389 */
390 TEST_SETUP
391 const USet *set;
392 USet *tmpSet;
393 int32_t checkResults;
394
395 /* By default, we should see no restriction; the USet should allow all characters. */
396 set = uspoof_getAllowedChars(sc, &status);
397 TEST_ASSERT_SUCCESS(status);
398 tmpSet = uset_open(0, 0x10ffff);
399 TEST_ASSERT(uset_equals(tmpSet, set));
400
401 /* Setting the allowed chars should enable the check. */
402 uspoof_setChecks(sc, USPOOF_ALL_CHECKS & ~USPOOF_CHAR_LIMIT, &status);
403 TEST_ASSERT_SUCCESS(status);
404
405 /* Remove a character that is in our good Latin test identifier from the allowed chars set. */
406 uset_remove(tmpSet, goodLatin[1]);
407 uspoof_setAllowedChars(sc, tmpSet, &status);
408 TEST_ASSERT_SUCCESS(status);
409 uset_close(tmpSet);
410
51004dcb
A
411 /* Latin Identifier should now fail; other non-latin test cases should still be OK
412 * Note: fail of CHAR_LIMIT also causes the restriction level to be USPOOF_UNRESTRICTIVE
413 * which will give us a USPOOF_RESTRICTION_LEVEL failure.
414 */
729e4ab9
A
415 checkResults = uspoof_check(sc, goodLatin, -1, NULL, &status);
416 TEST_ASSERT_SUCCESS(status);
51004dcb 417 TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT | USPOOF_RESTRICTION_LEVEL, checkResults);
729e4ab9
A
418
419 checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status);
420 TEST_ASSERT_SUCCESS(status);
421 TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE, checkResults);
422 TEST_TEARDOWN;
423
424 /*
425 * check UTF-8
426 */
427 TEST_SETUP
428 char utf8buf[200];
429 int32_t checkResults;
430 int32_t position;
431
432 u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, goodLatin, -1, &status);
433 TEST_ASSERT_SUCCESS(status);
434 position = 666;
435 checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status);
436 TEST_ASSERT_SUCCESS(status);
437 TEST_ASSERT_EQ(0, checkResults);
51004dcb 438 TEST_ASSERT_EQ(0, position);
729e4ab9
A
439
440 u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, goodCyrl, -1, &status);
441 TEST_ASSERT_SUCCESS(status);
442 checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status);
443 TEST_ASSERT_SUCCESS(status);
444 TEST_ASSERT_EQ(0, checkResults);
445
446 u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, scMixed, -1, &status);
447 TEST_ASSERT_SUCCESS(status);
448 position = 666;
449 checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status);
450 TEST_ASSERT_SUCCESS(status);
451 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_SINGLE_SCRIPT , checkResults);
51004dcb 452 TEST_ASSERT_EQ(0, position);
729e4ab9
A
453
454 TEST_TEARDOWN;
455
456 /*
457 * uspoof_areConfusable()
458 */
459 TEST_SETUP
460 int32_t checkResults;
461
462 checkResults = uspoof_areConfusable(sc, scLatin, -1, scMixed, -1, &status);
463 TEST_ASSERT_SUCCESS(status);
464 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);
465
466 checkResults = uspoof_areConfusable(sc, goodGreek, -1, scLatin, -1, &status);
467 TEST_ASSERT_SUCCESS(status);
468 TEST_ASSERT_EQ(0, checkResults);
469
470 checkResults = uspoof_areConfusable(sc, lll_Latin_a, -1, lll_Latin_b, -1, &status);
471 TEST_ASSERT_SUCCESS(status);
472 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, checkResults);
473
474 TEST_TEARDOWN;
475
476 /*
477 * areConfusableUTF8
478 */
479 TEST_SETUP
480 int32_t checkResults;
481 char s1[200];
482 char s2[200];
483
484
485 u_strToUTF8(s1, sizeof(s1), NULL, scLatin, -1, &status);
486 u_strToUTF8(s2, sizeof(s2), NULL, scMixed, -1, &status);
487 TEST_ASSERT_SUCCESS(status);
488 checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status);
489 TEST_ASSERT_SUCCESS(status);
490 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);
491
492 u_strToUTF8(s1, sizeof(s1), NULL, goodGreek, -1, &status);
493 u_strToUTF8(s2, sizeof(s2), NULL, scLatin, -1, &status);
494 TEST_ASSERT_SUCCESS(status);
495 checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status);
496 TEST_ASSERT_SUCCESS(status);
497 TEST_ASSERT_EQ(0, checkResults);
498
499 u_strToUTF8(s1, sizeof(s1), NULL, lll_Latin_a, -1, &status);
500 u_strToUTF8(s2, sizeof(s2), NULL, lll_Latin_b, -1, &status);
501 TEST_ASSERT_SUCCESS(status);
502 checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status);
503 TEST_ASSERT_SUCCESS(status);
504 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, checkResults);
505
506 TEST_TEARDOWN;
507
508
509 /*
510 * getSkeleton
511 */
512
513 TEST_SETUP
514 UChar dest[100];
515 int32_t skelLength;
516
517 skelLength = uspoof_getSkeleton(sc, USPOOF_ANY_CASE, lll_Latin_a, -1, dest, sizeof(dest)/sizeof(UChar), &status);
518 TEST_ASSERT_SUCCESS(status);
519 TEST_ASSERT_EQ(0, u_strcmp(lll_Skel, dest));
520 TEST_ASSERT_EQ(u_strlen(lll_Skel), skelLength);
521
522 skelLength = uspoof_getSkeletonUTF8(sc, USPOOF_ANY_CASE, goodLatinUTF8, -1, (char*)dest,
523 sizeof(dest)/sizeof(UChar), &status);
524 TEST_ASSERT_SUCCESS(status);
525
526 skelLength = uspoof_getSkeleton(sc, USPOOF_ANY_CASE, lll_Latin_a, -1, NULL, 0, &status);
527 TEST_ASSERT_EQ(U_BUFFER_OVERFLOW_ERROR, status);
528 TEST_ASSERT_EQ(3, skelLength);
529 status = U_ZERO_ERROR;
530
531 TEST_TEARDOWN;
532}
533
534#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */