]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
729e4ab9 A |
3 | /* |
4 | ********************************************************************** | |
b331163b | 5 | * Copyright (C) 2011-2015, International Business Machines Corporation |
729e4ab9 A |
6 | * and others. All Rights Reserved. |
7 | ********************************************************************** | |
8 | */ | |
9 | /** | |
10 | * IntlTestSpoof tests for USpoofDetector | |
11 | */ | |
12 | ||
13 | #include "unicode/utypes.h" | |
14 | ||
15 | #if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION && !UCONFIG_NO_FILE_IO | |
16 | ||
17 | #include "itspoof.h" | |
51004dcb | 18 | |
729e4ab9 | 19 | #include "unicode/normlzr.h" |
51004dcb A |
20 | #include "unicode/regex.h" |
21 | #include "unicode/unistr.h" | |
22 | #include "unicode/uscript.h" | |
23 | #include "unicode/uspoof.h" | |
24 | ||
729e4ab9 | 25 | #include "cstring.h" |
51004dcb A |
26 | #include "scriptset.h" |
27 | #include "uhash.h" | |
28 | ||
729e4ab9 A |
29 | #include <stdlib.h> |
30 | #include <stdio.h> | |
31 | ||
32 | #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ | |
33 | errcheckln(status, "Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}} | |
34 | ||
35 | #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ | |
51004dcb A |
36 | errln("Test Failure at file %s, line %d: \"%s\" is false.", __FILE__, __LINE__, #expr);};} |
37 | ||
38 | #define TEST_ASSERT_MSG(expr, msg) {if ((expr)==FALSE) { \ | |
39 | dataerrln("Test Failure at file %s, line %d, %s: \"%s\" is false.", __FILE__, __LINE__, msg, #expr);};} | |
729e4ab9 A |
40 | |
41 | #define TEST_ASSERT_EQ(a, b) { if ((a) != (b)) { \ | |
51004dcb | 42 | errln("Test Failure at file %s, line %d: \"%s\" (%d) != \"%s\" (%d)", \ |
729e4ab9 A |
43 | __FILE__, __LINE__, #a, (a), #b, (b)); }} |
44 | ||
45 | #define TEST_ASSERT_NE(a, b) { if ((a) == (b)) { \ | |
51004dcb | 46 | errln("Test Failure at file %s, line %d: \"%s\" (%d) == \"%s\" (%d)", \ |
729e4ab9 A |
47 | __FILE__, __LINE__, #a, (a), #b, (b)); }} |
48 | ||
49 | /* | |
50 | * TEST_SETUP and TEST_TEARDOWN | |
51 | * macros to handle the boilerplate around setting up test case. | |
52 | * Put arbitrary test code between SETUP and TEARDOWN. | |
53 | * "sc" is the ready-to-go SpoofChecker for use in the tests. | |
54 | */ | |
55 | #define TEST_SETUP { \ | |
56 | UErrorCode status = U_ZERO_ERROR; \ | |
57 | USpoofChecker *sc; \ | |
58 | sc = uspoof_open(&status); \ | |
59 | TEST_ASSERT_SUCCESS(status); \ | |
f3c0d7a5 A |
60 | USpoofCheckResult *checkResult; \ |
61 | checkResult = uspoof_openCheckResult(&status); \ | |
62 | TEST_ASSERT_SUCCESS(status); \ | |
729e4ab9 A |
63 | if (U_SUCCESS(status)){ |
64 | ||
65 | #define TEST_TEARDOWN \ | |
66 | } \ | |
67 | TEST_ASSERT_SUCCESS(status); \ | |
f3c0d7a5 | 68 | uspoof_closeCheckResult(checkResult); \ |
729e4ab9 A |
69 | uspoof_close(sc); \ |
70 | } | |
71 | ||
72 | ||
73 | ||
74 | ||
75 | void IntlTestSpoof::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) | |
76 | { | |
f3c0d7a5 A |
77 | if (exec) { |
78 | logln("TestSuite spoof: "); | |
729e4ab9 | 79 | } |
f3c0d7a5 A |
80 | TESTCASE_AUTO_BEGIN; |
81 | TESTCASE_AUTO(testSpoofAPI); | |
82 | TESTCASE_AUTO(testSkeleton); | |
83 | TESTCASE_AUTO(testAreConfusable); | |
84 | TESTCASE_AUTO(testInvisible); | |
85 | TESTCASE_AUTO(testConfData); | |
86 | TESTCASE_AUTO(testBug8654); | |
87 | TESTCASE_AUTO(testScriptSet); | |
88 | TESTCASE_AUTO(testRestrictionLevel); | |
89 | TESTCASE_AUTO(testMixedNumbers); | |
90 | TESTCASE_AUTO(testBug12153); | |
91 | TESTCASE_AUTO(testBug12825); | |
92 | TESTCASE_AUTO(testBug12815); | |
93 | TESTCASE_AUTO_END; | |
729e4ab9 A |
94 | } |
95 | ||
96 | void IntlTestSpoof::testSpoofAPI() { | |
97 | ||
98 | TEST_SETUP | |
99 | UnicodeString s("xyz"); // Many latin ranges are whole-script confusable with other scripts. | |
100 | // If this test starts failing, consult confusablesWholeScript.txt | |
101 | int32_t position = 666; | |
102 | int32_t checkResults = uspoof_checkUnicodeString(sc, s, &position, &status); | |
103 | TEST_ASSERT_SUCCESS(status); | |
104 | TEST_ASSERT_EQ(0, checkResults); | |
51004dcb | 105 | TEST_ASSERT_EQ(0, position); |
729e4ab9 A |
106 | TEST_TEARDOWN; |
107 | ||
108 | TEST_SETUP | |
109 | UnicodeString s1("cxs"); | |
110 | UnicodeString s2 = UnicodeString("\\u0441\\u0445\\u0455").unescape(); // Cyrillic "cxs" | |
111 | int32_t checkResults = uspoof_areConfusableUnicodeString(sc, s1, s2, &status); | |
f3c0d7a5 | 112 | TEST_ASSERT_SUCCESS(status); |
729e4ab9 A |
113 | TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE, checkResults); |
114 | ||
115 | TEST_TEARDOWN; | |
116 | ||
117 | TEST_SETUP | |
118 | UnicodeString s("I1l0O"); | |
119 | UnicodeString dest; | |
120 | UnicodeString &retStr = uspoof_getSkeletonUnicodeString(sc, USPOOF_ANY_CASE, s, dest, &status); | |
121 | TEST_ASSERT_SUCCESS(status); | |
122 | TEST_ASSERT(UnicodeString("lllOO") == dest); | |
123 | TEST_ASSERT(&dest == &retStr); | |
124 | TEST_TEARDOWN; | |
125 | } | |
126 | ||
127 | ||
128 | #define CHECK_SKELETON(type, input, expected) { \ | |
129 | checkSkeleton(sc, type, input, expected, __LINE__); \ | |
130 | } | |
131 | ||
132 | ||
133 | // testSkeleton. Spot check a number of confusable skeleton substitutions from the | |
134 | // Unicode data file confusables.txt | |
135 | // Test cases chosen for substitutions of various lengths, and | |
136 | // membership in different mapping tables. | |
b331163b A |
137 | // Note: for ICU 55, all tables collapsed to the MA table data. |
138 | // TODO: for ICU 56 with Unicode 8, revisit this test. | |
139 | // | |
729e4ab9 A |
140 | void IntlTestSpoof::testSkeleton() { |
141 | const uint32_t ML = 0; | |
142 | const uint32_t SL = USPOOF_SINGLE_SCRIPT_CONFUSABLE; | |
143 | const uint32_t MA = USPOOF_ANY_CASE; | |
144 | const uint32_t SA = USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE; | |
145 | ||
146 | TEST_SETUP | |
729e4ab9 | 147 | CHECK_SKELETON(SL, "nochange", "nochange"); |
b331163b A |
148 | CHECK_SKELETON(SA, "nochange", "nochange"); |
149 | CHECK_SKELETON(ML, "nochange", "nochange"); | |
150 | CHECK_SKELETON(MA, "nochange", "nochange"); | |
729e4ab9 A |
151 | CHECK_SKELETON(MA, "love", "love"); |
152 | CHECK_SKELETON(MA, "1ove", "love"); // Digit 1 to letter l | |
153 | CHECK_SKELETON(ML, "OOPS", "OOPS"); | |
b331163b | 154 | CHECK_SKELETON(ML, "00PS", "OOPS"); |
729e4ab9 A |
155 | CHECK_SKELETON(MA, "OOPS", "OOPS"); |
156 | CHECK_SKELETON(MA, "00PS", "OOPS"); // Digit 0 to letter O in any case mode only | |
157 | CHECK_SKELETON(SL, "\\u059c", "\\u0301"); | |
158 | CHECK_SKELETON(SL, "\\u2A74", "\\u003A\\u003A\\u003D"); | |
159 | CHECK_SKELETON(SL, "\\u247E", "\\u0028\\u006C\\u006C\\u0029"); // "(ll)" | |
b331163b | 160 | CHECK_SKELETON(SL, "\\uFDFB", "\\u062C\\u0644\\u0020\\u062C\\u0644\\u006c\\u0644\\u006f"); |
729e4ab9 A |
161 | |
162 | // This mapping exists in the ML and MA tables, does not exist in SL, SA | |
b331163b A |
163 | // 0C83 ; 0983 ; ML |
164 | // 0C83 ; 0983 ; MA | |
165 | // | |
166 | ||
167 | CHECK_SKELETON(SL, "\\u0C83", "\\u0983"); | |
168 | CHECK_SKELETON(SA, "\\u0C83", "\\u0983"); | |
729e4ab9 A |
169 | CHECK_SKELETON(ML, "\\u0C83", "\\u0983"); |
170 | CHECK_SKELETON(MA, "\\u0C83", "\\u0983"); | |
171 | ||
b331163b | 172 | // 0391 mappings exist only in MA and SA tables. |
729e4ab9 | 173 | CHECK_SKELETON(MA, "\\u0391", "A"); |
b331163b A |
174 | CHECK_SKELETON(SA, "\\u0391", "A"); |
175 | CHECK_SKELETON(ML, "\\u0391", "A"); | |
176 | CHECK_SKELETON(SL, "\\u0391", "A"); | |
729e4ab9 | 177 | |
b331163b | 178 | // 13CF Mappings in all four tables, different in MA. |
729e4ab9 A |
179 | CHECK_SKELETON(ML, "\\u13CF", "b"); |
180 | CHECK_SKELETON(MA, "\\u13CF", "b"); | |
b331163b A |
181 | CHECK_SKELETON(SL, "\\u13CF", "b"); |
182 | CHECK_SKELETON(SA, "\\u13CF", "b"); | |
729e4ab9 A |
183 | |
184 | // 0022 ; 0027 0027 ; | |
185 | // all tables. | |
186 | CHECK_SKELETON(SL, "\\u0022", "\\u0027\\u0027"); | |
187 | CHECK_SKELETON(SA, "\\u0022", "\\u0027\\u0027"); | |
188 | CHECK_SKELETON(ML, "\\u0022", "\\u0027\\u0027"); | |
189 | CHECK_SKELETON(MA, "\\u0022", "\\u0027\\u0027"); | |
190 | ||
b331163b | 191 | // 017F mappings exist only in MA and SA tables. |
729e4ab9 A |
192 | CHECK_SKELETON(MA, "\\u017F", "f"); |
193 | CHECK_SKELETON(SA, "\\u017F", "f"); | |
b331163b A |
194 | CHECK_SKELETON(ML, "\\u017F", "f"); |
195 | CHECK_SKELETON(SL, "\\u017F", "f"); | |
729e4ab9 A |
196 | |
197 | TEST_TEARDOWN; | |
198 | } | |
199 | ||
200 | ||
201 | // | |
202 | // Run a single confusable skeleton transformation test case. | |
203 | // | |
204 | void IntlTestSpoof::checkSkeleton(const USpoofChecker *sc, uint32_t type, | |
205 | const char *input, const char *expected, int32_t lineNum) { | |
206 | UnicodeString uInput = UnicodeString(input).unescape(); | |
207 | UnicodeString uExpected = UnicodeString(expected).unescape(); | |
208 | ||
209 | UErrorCode status = U_ZERO_ERROR; | |
210 | UnicodeString actual; | |
211 | uspoof_getSkeletonUnicodeString(sc, type, uInput, actual, &status); | |
212 | if (U_FAILURE(status)) { | |
213 | errln("File %s, Line %d, Test case from line %d, status is %s", __FILE__, __LINE__, lineNum, | |
214 | u_errorName(status)); | |
215 | return; | |
216 | } | |
217 | if (uExpected != actual) { | |
218 | errln("File %s, Line %d, Test case from line %d, Actual and Expected skeletons differ.", | |
219 | __FILE__, __LINE__, lineNum); | |
220 | errln(UnicodeString(" Actual Skeleton: \"") + actual + UnicodeString("\"\n") + | |
221 | UnicodeString(" Expected Skeleton: \"") + uExpected + UnicodeString("\"")); | |
222 | } | |
223 | } | |
224 | ||
225 | void IntlTestSpoof::testAreConfusable() { | |
226 | TEST_SETUP | |
227 | UnicodeString s1("A long string that will overflow stack buffers. A long string that will overflow stack buffers. " | |
228 | "A long string that will overflow stack buffers. A long string that will overflow stack buffers. "); | |
229 | UnicodeString s2("A long string that wi11 overflow stack buffers. A long string that will overflow stack buffers. " | |
230 | "A long string that wi11 overflow stack buffers. A long string that will overflow stack buffers. "); | |
f3c0d7a5 | 231 | int32_t result = uspoof_areConfusableUnicodeString(sc, s1, s2, &status); |
729e4ab9 | 232 | TEST_ASSERT_SUCCESS(status); |
f3c0d7a5 | 233 | TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, result); |
729e4ab9 A |
234 | |
235 | TEST_TEARDOWN; | |
236 | } | |
237 | ||
238 | void IntlTestSpoof::testInvisible() { | |
239 | TEST_SETUP | |
240 | UnicodeString s = UnicodeString("abcd\\u0301ef").unescape(); | |
241 | int32_t position = -42; | |
242 | TEST_ASSERT_EQ(0, uspoof_checkUnicodeString(sc, s, &position, &status)); | |
243 | TEST_ASSERT_SUCCESS(status); | |
51004dcb | 244 | TEST_ASSERT(0 == position); |
729e4ab9 A |
245 | |
246 | UnicodeString s2 = UnicodeString("abcd\\u0301\\u0302\\u0301ef").unescape(); | |
247 | TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s2, &position, &status)); | |
248 | TEST_ASSERT_SUCCESS(status); | |
51004dcb | 249 | TEST_ASSERT_EQ(0, position); |
729e4ab9 | 250 | |
4388f060 | 251 | // Two acute accents, one from the composed a with acute accent, \u00e1, |
729e4ab9 A |
252 | // and one separate. |
253 | position = -42; | |
254 | UnicodeString s3 = UnicodeString("abcd\\u00e1\\u0301xyz").unescape(); | |
255 | TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s3, &position, &status)); | |
256 | TEST_ASSERT_SUCCESS(status); | |
51004dcb | 257 | TEST_ASSERT_EQ(0, position); |
729e4ab9 A |
258 | TEST_TEARDOWN; |
259 | } | |
260 | ||
4388f060 A |
261 | void IntlTestSpoof::testBug8654() { |
262 | TEST_SETUP | |
263 | UnicodeString s = UnicodeString("B\\u00c1\\u0301").unescape(); | |
264 | int32_t position = -42; | |
265 | TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s, &position, &status) & USPOOF_INVISIBLE ); | |
266 | TEST_ASSERT_SUCCESS(status); | |
51004dcb | 267 | TEST_ASSERT_EQ(0, position); |
4388f060 A |
268 | TEST_TEARDOWN; |
269 | } | |
729e4ab9 A |
270 | |
271 | static UnicodeString parseHex(const UnicodeString &in) { | |
272 | // Convert a series of hex numbers in a Unicode String to a string with the | |
273 | // corresponding characters. | |
274 | // The conversion is _really_ annoying. There must be some function to just do it. | |
275 | UnicodeString result; | |
276 | UChar32 cc = 0; | |
277 | for (int32_t i=0; i<in.length(); i++) { | |
278 | UChar c = in.charAt(i); | |
279 | if (c == 0x20) { // Space | |
280 | if (cc > 0) { | |
281 | result.append(cc); | |
282 | cc = 0; | |
283 | } | |
284 | } else if (c>=0x30 && c<=0x39) { | |
285 | cc = (cc<<4) + (c - 0x30); | |
286 | } else if ((c>=0x41 && c<=0x46) || (c>=0x61 && c<=0x66)) { | |
287 | cc = (cc<<4) + (c & 0x0f)+9; | |
288 | } | |
289 | // else do something with bad input. | |
290 | } | |
291 | if (cc > 0) { | |
292 | result.append(cc); | |
293 | } | |
294 | return result; | |
295 | } | |
296 | ||
297 | ||
298 | // | |
299 | // Append the hex form of a UChar32 to a UnicodeString. | |
300 | // Used in formatting error messages. | |
301 | // Match the formatting of numbers in confusables.txt | |
302 | // Minimum of 4 digits, no leading zeroes for positions 5 and up. | |
303 | // | |
304 | static void appendHexUChar(UnicodeString &dest, UChar32 c) { | |
305 | UBool doZeroes = FALSE; | |
306 | for (int bitNum=28; bitNum>=0; bitNum-=4) { | |
307 | if (bitNum <= 12) { | |
308 | doZeroes = TRUE; | |
309 | } | |
310 | int hexDigit = (c>>bitNum) & 0x0f; | |
311 | if (hexDigit != 0 || doZeroes) { | |
312 | doZeroes = TRUE; | |
313 | dest.append((UChar)(hexDigit<=9? hexDigit + 0x30: hexDigit -10 + 0x41)); | |
314 | } | |
315 | } | |
316 | dest.append((UChar)0x20); | |
317 | } | |
318 | ||
319 | U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose); | |
320 | ||
321 | // testConfData - Check each data item from the Unicode confusables.txt file, | |
322 | // verify that it transforms correctly in a skeleton. | |
323 | // | |
324 | void IntlTestSpoof::testConfData() { | |
729e4ab9 | 325 | char buffer[2000]; |
b331163b A |
326 | if (getUnidataPath(buffer) == NULL) { |
327 | errln("Skipping test spoof/testConfData. Unable to find path to source/data/unidata/."); | |
328 | return; | |
329 | } | |
729e4ab9 A |
330 | uprv_strcat(buffer, "confusables.txt"); |
331 | ||
332 | LocalStdioFilePointer f(fopen(buffer, "rb")); | |
333 | if (f.isNull()) { | |
334 | errln("Skipping test spoof/testConfData. File confusables.txt not accessible."); | |
335 | return; | |
336 | } | |
337 | fseek(f.getAlias(), 0, SEEK_END); | |
338 | int32_t fileSize = ftell(f.getAlias()); | |
339 | LocalArray<char> fileBuf(new char[fileSize]); | |
340 | fseek(f.getAlias(), 0, SEEK_SET); | |
341 | int32_t amt_read = fread(fileBuf.getAlias(), 1, fileSize, f.getAlias()); | |
342 | TEST_ASSERT_EQ(amt_read, fileSize); | |
343 | TEST_ASSERT(fileSize>0); | |
344 | if (amt_read != fileSize || fileSize <=0) { | |
345 | return; | |
346 | } | |
347 | UnicodeString confusablesTxt = UnicodeString::fromUTF8(StringPiece(fileBuf.getAlias(), fileSize)); | |
348 | ||
b331163b | 349 | UErrorCode status = U_ZERO_ERROR; |
729e4ab9 A |
350 | LocalUSpoofCheckerPointer sc(uspoof_open(&status)); |
351 | TEST_ASSERT_SUCCESS(status); | |
352 | ||
353 | // Parse lines from the confusables.txt file. Example Line: | |
354 | // FF44 ; 0064 ; SL # ( d -> d ) FULLWIDTH .... | |
355 | // Three fields. The hex fields can contain more than one character, | |
356 | // and each character may be more than 4 digits (for supplemntals) | |
357 | // This regular expression matches lines and splits the fields into capture groups. | |
358 | RegexMatcher parseLine("(?m)^([0-9A-F]{4}[^#;]*?);([^#;]*?);([^#]*)", confusablesTxt, 0, status); | |
359 | TEST_ASSERT_SUCCESS(status); | |
360 | while (parseLine.find()) { | |
361 | UnicodeString from = parseHex(parseLine.group(1, status)); | |
362 | if (!Normalizer::isNormalized(from, UNORM_NFD, status)) { | |
363 | // The source character was not NFD. | |
364 | // Skip this case; the first step in obtaining a skeleton is to NFD the input, | |
365 | // so the mapping in this line of confusables.txt will never be applied. | |
366 | continue; | |
367 | } | |
368 | ||
369 | UnicodeString rawExpected = parseHex(parseLine.group(2, status)); | |
370 | UnicodeString expected; | |
371 | Normalizer::decompose(rawExpected, FALSE /*NFD*/, 0, expected, status); | |
372 | TEST_ASSERT_SUCCESS(status); | |
373 | ||
374 | int32_t skeletonType = 0; | |
375 | UnicodeString tableType = parseLine.group(3, status); | |
376 | TEST_ASSERT_SUCCESS(status); | |
377 | if (tableType.indexOf("SL") >= 0) { | |
378 | skeletonType = USPOOF_SINGLE_SCRIPT_CONFUSABLE; | |
379 | } else if (tableType.indexOf("SA") >= 0) { | |
380 | skeletonType = USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE; | |
381 | } else if (tableType.indexOf("ML") >= 0) { | |
382 | skeletonType = 0; | |
383 | } else if (tableType.indexOf("MA") >= 0) { | |
384 | skeletonType = USPOOF_ANY_CASE; | |
385 | } | |
386 | ||
387 | UnicodeString actual; | |
388 | uspoof_getSkeletonUnicodeString(sc.getAlias(), skeletonType, from, actual, &status); | |
389 | TEST_ASSERT_SUCCESS(status); | |
390 | TEST_ASSERT(actual == expected); | |
391 | if (actual != expected) { | |
392 | errln(parseLine.group(0, status)); | |
393 | UnicodeString line = "Actual: "; | |
394 | int i = 0; | |
395 | while (i < actual.length()) { | |
396 | appendHexUChar(line, actual.char32At(i)); | |
397 | i = actual.moveIndex32(i, 1); | |
398 | } | |
399 | errln(line); | |
400 | } | |
401 | if (U_FAILURE(status)) { | |
402 | break; | |
403 | } | |
404 | } | |
405 | } | |
729e4ab9 | 406 | |
51004dcb A |
407 | |
408 | void IntlTestSpoof::testScriptSet() { | |
409 | ScriptSet s1; | |
410 | ScriptSet s2; | |
411 | UErrorCode status = U_ZERO_ERROR; | |
412 | ||
413 | TEST_ASSERT(s1 == s2); | |
414 | s1.set(USCRIPT_ARABIC,status); | |
415 | TEST_ASSERT_SUCCESS(status); | |
416 | TEST_ASSERT(!(s1 == s2)); | |
417 | TEST_ASSERT(s1.test(USCRIPT_ARABIC, status)); | |
418 | TEST_ASSERT(s1.test(USCRIPT_GREEK, status) == FALSE); | |
419 | ||
420 | status = U_ZERO_ERROR; | |
421 | s1.reset(USCRIPT_ARABIC, status); | |
422 | TEST_ASSERT(s1 == s2); | |
423 | ||
424 | status = U_ZERO_ERROR; | |
425 | s1.setAll(); | |
426 | TEST_ASSERT(s1.test(USCRIPT_COMMON, status)); | |
427 | TEST_ASSERT(s1.test(USCRIPT_ETHIOPIC, status)); | |
428 | TEST_ASSERT(s1.test(USCRIPT_CODE_LIMIT, status)); | |
429 | s1.resetAll(); | |
430 | TEST_ASSERT(!s1.test(USCRIPT_COMMON, status)); | |
431 | TEST_ASSERT(!s1.test(USCRIPT_ETHIOPIC, status)); | |
432 | TEST_ASSERT(!s1.test(USCRIPT_CODE_LIMIT, status)); | |
433 | ||
434 | status = U_ZERO_ERROR; | |
435 | s1.set(USCRIPT_TAKRI, status); | |
436 | s1.set(USCRIPT_BLISSYMBOLS, status); | |
437 | s2.setAll(); | |
438 | TEST_ASSERT(s2.contains(s1)); | |
439 | TEST_ASSERT(!s1.contains(s2)); | |
440 | TEST_ASSERT(s2.intersects(s1)); | |
441 | TEST_ASSERT(s1.intersects(s2)); | |
442 | s2.reset(USCRIPT_TAKRI, status); | |
443 | TEST_ASSERT(!s2.contains(s1)); | |
444 | TEST_ASSERT(!s1.contains(s2)); | |
445 | TEST_ASSERT(s1.intersects(s2)); | |
446 | TEST_ASSERT(s2.intersects(s1)); | |
447 | TEST_ASSERT_SUCCESS(status); | |
448 | ||
449 | status = U_ZERO_ERROR; | |
450 | s1.resetAll(); | |
451 | s1.set(USCRIPT_NKO, status); | |
452 | s1.set(USCRIPT_COMMON, status); | |
453 | s2 = s1; | |
454 | TEST_ASSERT(s2 == s1); | |
455 | TEST_ASSERT_EQ(2, s2.countMembers()); | |
456 | s2.intersect(s1); | |
457 | TEST_ASSERT(s2 == s1); | |
458 | s2.setAll(); | |
459 | TEST_ASSERT(!(s2 == s1)); | |
460 | TEST_ASSERT(s2.countMembers() >= USCRIPT_CODE_LIMIT); | |
461 | s2.intersect(s1); | |
462 | TEST_ASSERT(s2 == s1); | |
463 | ||
464 | s2.setAll(); | |
465 | s2.reset(USCRIPT_COMMON, status); | |
466 | s2.intersect(s1); | |
467 | TEST_ASSERT(s2.countMembers() == 1); | |
468 | ||
f3c0d7a5 A |
469 | s1.resetAll(); |
470 | TEST_ASSERT(s1.isEmpty()); | |
471 | s1.set(USCRIPT_LATIN, status); | |
472 | TEST_ASSERT(!s1.isEmpty()); | |
473 | s1.setAll(); | |
474 | TEST_ASSERT(!s1.isEmpty()); | |
475 | TEST_ASSERT_SUCCESS(status); | |
476 | ||
51004dcb A |
477 | s1.resetAll(); |
478 | s1.set(USCRIPT_AFAKA, status); | |
479 | s1.set(USCRIPT_VAI, status); | |
480 | s1.set(USCRIPT_INHERITED, status); | |
481 | int32_t n = -1; | |
482 | for (int32_t i=0; i<4; i++) { | |
483 | n = s1.nextSetBit(n+1); | |
484 | switch (i) { | |
485 | case 0: TEST_ASSERT_EQ(USCRIPT_INHERITED, n); break; | |
486 | case 1: TEST_ASSERT_EQ(USCRIPT_VAI, n); break; | |
487 | case 2: TEST_ASSERT_EQ(USCRIPT_AFAKA, n); break; | |
488 | case 3: TEST_ASSERT_EQ(-1, (int32_t)n); break; | |
489 | default: TEST_ASSERT(FALSE); | |
490 | } | |
491 | } | |
492 | TEST_ASSERT_SUCCESS(status); | |
f3c0d7a5 A |
493 | |
494 | // Script extensions. Depends on data. | |
495 | s1.resetAll(); | |
496 | s1.setScriptExtensions(0x67, status); | |
497 | TEST_ASSERT(s1.countMembers() == 1); | |
498 | TEST_ASSERT(s1.test(USCRIPT_LATIN, status)); | |
499 | TEST_ASSERT_SUCCESS(status); | |
500 | ||
501 | s1.resetAll(); | |
502 | s1.setScriptExtensions(0x303C, status); | |
503 | TEST_ASSERT(s1.countMembers() == 3); | |
504 | TEST_ASSERT(s1.test(USCRIPT_HAN, status)); | |
505 | TEST_ASSERT(s1.test(USCRIPT_HIRAGANA, status)); | |
506 | TEST_ASSERT(s1.test(USCRIPT_KATAKANA, status)); | |
507 | TEST_ASSERT_SUCCESS(status); | |
508 | ||
509 | // Additional tests | |
510 | ScriptSet bitset12; bitset12.set(USCRIPT_LATIN, status).set(USCRIPT_HANGUL, status); | |
511 | ScriptSet bitset2; bitset2.set(USCRIPT_HANGUL, status); | |
512 | TEST_ASSERT(bitset12.contains(bitset2)); | |
513 | TEST_ASSERT(bitset12.contains(bitset12)); | |
514 | TEST_ASSERT(!bitset2.contains(bitset12)); | |
515 | ||
516 | ScriptSet arabSet; arabSet.set(USCRIPT_ARABIC, status); | |
517 | ScriptSet latinSet; latinSet.set(USCRIPT_LATIN, status); | |
518 | UElement arabEl; arabEl.pointer = &arabSet; | |
519 | UElement latinEl; latinEl.pointer = &latinSet; | |
520 | TEST_ASSERT(uhash_compareScriptSet(arabEl, latinEl) < 0); | |
521 | TEST_ASSERT(uhash_compareScriptSet(latinEl, arabEl) > 0); | |
522 | ||
523 | UnicodeString scriptString; | |
524 | bitset12.displayScripts(scriptString); | |
525 | TEST_ASSERT(UNICODE_STRING_SIMPLE("Hang Latn") == scriptString); | |
51004dcb A |
526 | } |
527 | ||
528 | ||
529 | void IntlTestSpoof::testRestrictionLevel() { | |
530 | struct Test { | |
531 | const char *fId; | |
532 | URestrictionLevel fExpectedRestrictionLevel; | |
533 | } tests[] = { | |
534 | {"\\u0061\\u03B3\\u2665", USPOOF_UNRESTRICTIVE}, | |
535 | {"a", USPOOF_ASCII}, | |
57a6839d | 536 | {"\\u03B3", USPOOF_SINGLE_SCRIPT_RESTRICTIVE}, |
51004dcb A |
537 | {"\\u0061\\u30A2\\u30FC", USPOOF_HIGHLY_RESTRICTIVE}, |
538 | {"\\u0061\\u0904", USPOOF_MODERATELY_RESTRICTIVE}, | |
f3c0d7a5 A |
539 | {"\\u0061\\u03B3", USPOOF_MINIMALLY_RESTRICTIVE}, |
540 | {"\\u0061\\u2665", USPOOF_UNRESTRICTIVE}, | |
541 | {"\\u0061\\u303C", USPOOF_HIGHLY_RESTRICTIVE}, | |
542 | {"\\u0061\\u30FC\\u303C", USPOOF_HIGHLY_RESTRICTIVE}, | |
543 | {"\\u0061\\u30FC\\u303C\\u30A2", USPOOF_HIGHLY_RESTRICTIVE}, | |
544 | {"\\u30A2\\u0061\\u30FC\\u303C", USPOOF_HIGHLY_RESTRICTIVE}, | |
545 | {"\\u0061\\u0031\\u0661", USPOOF_MODERATELY_RESTRICTIVE}, | |
546 | {"\\u0061\\u0031\\u0661\\u06F1", USPOOF_MODERATELY_RESTRICTIVE}, | |
547 | {"\\u0661\\u30FC\\u303C\\u0061\\u30A2\\u0031\\u0967\\u06F1", USPOOF_MINIMALLY_RESTRICTIVE}, | |
548 | {"\\u0061\\u30A2\\u30FC\\u303C\\u0031\\u0967\\u0661\\u06F1", USPOOF_MINIMALLY_RESTRICTIVE} | |
51004dcb A |
549 | }; |
550 | char msgBuffer[100]; | |
57a6839d | 551 | URestrictionLevel restrictionLevels[] = { USPOOF_ASCII, USPOOF_SINGLE_SCRIPT_RESTRICTIVE, |
f3c0d7a5 A |
552 | USPOOF_HIGHLY_RESTRICTIVE, USPOOF_MODERATELY_RESTRICTIVE, USPOOF_MINIMALLY_RESTRICTIVE, |
553 | USPOOF_UNRESTRICTIVE}; | |
554 | ||
51004dcb | 555 | UErrorCode status = U_ZERO_ERROR; |
f3c0d7a5 A |
556 | UnicodeSet allowedChars; |
557 | // Allowed Identifier Characters. In addition to the Recommended Set, | |
558 | // allow u303c, which has an interesting script extension of Hani Hira Kana. | |
559 | allowedChars.addAll(*uspoof_getRecommendedUnicodeSet(&status)).add(0x303C); | |
560 | ||
b331163b | 561 | for (int32_t testNum=0; testNum < UPRV_LENGTHOF(tests); testNum++) { |
51004dcb A |
562 | status = U_ZERO_ERROR; |
563 | const Test &test = tests[testNum]; | |
564 | UnicodeString testString = UnicodeString(test.fId).unescape(); | |
565 | URestrictionLevel expectedLevel = test.fExpectedRestrictionLevel; | |
b331163b | 566 | for (int levelIndex=0; levelIndex<UPRV_LENGTHOF(restrictionLevels); levelIndex++) { |
51004dcb A |
567 | status = U_ZERO_ERROR; |
568 | URestrictionLevel levelSetInSpoofChecker = restrictionLevels[levelIndex]; | |
569 | USpoofChecker *sc = uspoof_open(&status); | |
f3c0d7a5 | 570 | uspoof_setAllowedChars(sc, allowedChars.toUSet(), &status); |
51004dcb | 571 | uspoof_setRestrictionLevel(sc, levelSetInSpoofChecker); |
f3c0d7a5 | 572 | uspoof_setChecks(sc, USPOOF_RESTRICTION_LEVEL, &status); |
57a6839d A |
573 | int32_t actualValue = uspoof_checkUnicodeString(sc, testString, NULL, &status); |
574 | ||
51004dcb | 575 | // we want to fail if the text is (say) MODERATE and the testLevel is ASCII |
57a6839d A |
576 | int32_t expectedValue = 0; |
577 | if (expectedLevel > levelSetInSpoofChecker) { | |
578 | expectedValue |= USPOOF_RESTRICTION_LEVEL; | |
579 | } | |
57a6839d A |
580 | sprintf(msgBuffer, "testNum = %d, levelIndex = %d, expected = %#x, actual = %#x", |
581 | testNum, levelIndex, expectedValue, actualValue); | |
582 | TEST_ASSERT_MSG(expectedValue == actualValue, msgBuffer); | |
583 | TEST_ASSERT_SUCCESS(status); | |
584 | ||
585 | // Run the same check again, with the Spoof Checker configured to return | |
586 | // the actual restriction level. | |
f3c0d7a5 | 587 | uspoof_setAllowedChars(sc, allowedChars.toUSet(), &status); |
57a6839d | 588 | uspoof_setRestrictionLevel(sc, levelSetInSpoofChecker); |
f3c0d7a5 | 589 | uspoof_setChecks(sc, USPOOF_AUX_INFO | USPOOF_RESTRICTION_LEVEL, &status); |
57a6839d | 590 | int32_t result = uspoof_checkUnicodeString(sc, testString, NULL, &status); |
51004dcb | 591 | TEST_ASSERT_SUCCESS(status); |
57a6839d A |
592 | if (U_SUCCESS(status)) { |
593 | TEST_ASSERT_EQ(expectedLevel, result & USPOOF_RESTRICTION_LEVEL_MASK); | |
594 | TEST_ASSERT_EQ(expectedValue, result & USPOOF_ALL_CHECKS); | |
595 | } | |
51004dcb A |
596 | uspoof_close(sc); |
597 | } | |
598 | } | |
51004dcb | 599 | |
f3c0d7a5 | 600 | } |
51004dcb A |
601 | |
602 | void IntlTestSpoof::testMixedNumbers() { | |
603 | struct Test { | |
604 | const char *fTestString; | |
605 | const char *fExpectedSet; | |
606 | } tests[] = { | |
607 | {"1", "[0]"}, | |
608 | {"\\u0967", "[\\u0966]"}, | |
609 | {"1\\u0967", "[0\\u0966]"}, | |
f3c0d7a5 A |
610 | {"\\u0661\\u06F1", "[\\u0660\\u06F0]"}, |
611 | {"\\u0061\\u2665", "[]"}, | |
612 | {"\\u0061\\u303C", "[]"}, | |
613 | {"\\u0061\\u30FC\\u303C", "[]"}, | |
614 | {"\\u0061\\u30FC\\u303C\\u30A2", "[]"}, | |
615 | {"\\u30A2\\u0061\\u30FC\\u303C", "[]"}, | |
616 | {"\\u0061\\u0031\\u0661", "[\\u0030\\u0660]"}, | |
617 | {"\\u0061\\u0031\\u0661\\u06F1", "[\\u0030\\u0660\\u06F0]"}, | |
618 | {"\\u0661\\u30FC\\u303C\\u0061\\u30A2\\u0031\\u0967\\u06F1", "[\\u0030\\u0660\\u06F0\\u0966]"}, | |
619 | {"\\u0061\\u30A2\\u30FC\\u303C\\u0031\\u0967\\u0661\\u06F1", "[\\u0030\\u0660\\u06F0\\u0966]"} | |
51004dcb A |
620 | }; |
621 | UErrorCode status = U_ZERO_ERROR; | |
b331163b | 622 | for (int32_t testNum=0; testNum < UPRV_LENGTHOF(tests); testNum++) { |
51004dcb A |
623 | char msgBuf[100]; |
624 | sprintf(msgBuf, "testNum = %d ", testNum); | |
625 | Test &test = tests[testNum]; | |
626 | ||
627 | status = U_ZERO_ERROR; | |
628 | UnicodeString testString = UnicodeString(test.fTestString).unescape(); | |
629 | UnicodeSet expectedSet(UnicodeString(test.fExpectedSet).unescape(), status); | |
51004dcb A |
630 | |
631 | status = U_ZERO_ERROR; | |
f3c0d7a5 A |
632 | TEST_SETUP |
633 | uspoof_setChecks(sc, USPOOF_MIXED_NUMBERS, &status); // only check this | |
634 | uspoof_check2UnicodeString(sc, testString, checkResult, &status); | |
635 | UBool mixedNumberFailure = ((uspoof_getCheckResultChecks(checkResult, &status) & USPOOF_MIXED_NUMBERS) != 0); | |
636 | TEST_ASSERT_MSG((expectedSet.size() > 1) == mixedNumberFailure, msgBuf); | |
637 | const UnicodeSet* actualSet = UnicodeSet::fromUSet(uspoof_getCheckResultNumerics(checkResult, &status)); | |
638 | TEST_ASSERT_MSG(expectedSet == *actualSet, msgBuf); | |
639 | TEST_TEARDOWN | |
51004dcb A |
640 | } |
641 | } | |
642 | ||
f3c0d7a5 A |
643 | // Bug #12153 - uspoof_setRestrictionLevel() should enable restriction level testing. |
644 | // | |
645 | void IntlTestSpoof::testBug12153() { | |
646 | UErrorCode status = U_ZERO_ERROR; | |
647 | LocalUSpoofCheckerPointer sc(uspoof_open(&status)); | |
648 | TEST_ASSERT_SUCCESS(status); | |
649 | int32_t checks = uspoof_getChecks(sc.getAlias(), &status); | |
650 | TEST_ASSERT((checks & USPOOF_RESTRICTION_LEVEL) != 0); | |
651 | checks &= ~USPOOF_RESTRICTION_LEVEL; | |
652 | uspoof_setChecks(sc.getAlias(), checks, &status); | |
653 | checks = uspoof_getChecks(sc.getAlias(), &status); | |
654 | TEST_ASSERT((checks & USPOOF_RESTRICTION_LEVEL) == 0); | |
655 | ||
656 | uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE); | |
657 | checks = uspoof_getChecks(sc.getAlias(), &status); | |
658 | TEST_ASSERT((checks & USPOOF_RESTRICTION_LEVEL) != 0); | |
659 | TEST_ASSERT_SUCCESS(status); | |
660 | } | |
661 | ||
662 | // uspoof_checkUnicodeString should NOT have an infinite loop. | |
663 | void IntlTestSpoof::testBug12825() { | |
664 | UErrorCode status = U_ZERO_ERROR; | |
665 | LocalUSpoofCheckerPointer sc(uspoof_open(&status)); | |
666 | TEST_ASSERT_SUCCESS(status); | |
667 | uspoof_setChecks(sc.getAlias(), USPOOF_ALL_CHECKS | USPOOF_AUX_INFO, &status); | |
668 | TEST_ASSERT_SUCCESS(status); | |
669 | uspoof_checkUnicodeString(sc.getAlias(), UnicodeString("\\u30FB").unescape(), NULL, &status); | |
670 | TEST_ASSERT_SUCCESS(status); | |
671 | } | |
672 | ||
673 | // uspoof_getSkeleton should NOT set an ILLEGAL_ARGUMENT_EXCEPTION. | |
674 | void IntlTestSpoof::testBug12815() { | |
675 | UErrorCode status = U_ZERO_ERROR; | |
676 | LocalUSpoofCheckerPointer sc(uspoof_open(&status)); | |
677 | TEST_ASSERT_SUCCESS(status); | |
678 | uspoof_setChecks(sc.getAlias(), USPOOF_RESTRICTION_LEVEL, &status); | |
679 | TEST_ASSERT_SUCCESS(status); | |
680 | UnicodeString result; | |
681 | uspoof_getSkeletonUnicodeString(sc.getAlias(), 0, UnicodeString("hello world"), result, &status); | |
682 | TEST_ASSERT_SUCCESS(status); | |
683 | } | |
684 | ||
51004dcb | 685 | #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION && !UCONFIG_NO_FILE_IO */ |