]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f A |
3 | /******************************************************************** |
4 | * COPYRIGHT: | |
2ca993e8 | 5 | * Copyright (c) 2002-2016, International Business Machines Corporation and |
b75a7d8f A |
6 | * others. All Rights Reserved. |
7 | ******************************************************************** | |
8 | * | |
9 | * @author Mark E. Davis | |
10 | * @author Vladimir Weinstein | |
11 | */ | |
12 | ||
13 | #include "unicode/utypes.h" | |
14 | ||
15 | #if !UCONFIG_NO_NORMALIZATION | |
16 | ||
17 | #include "intltest.h" | |
2ca993e8 | 18 | #include "cmemory.h" |
b75a7d8f A |
19 | #include "cstring.h" |
20 | #include "canittst.h" | |
21 | #include "unicode/caniter.h" | |
374ca955 | 22 | #include "unicode/normlzr.h" |
b75a7d8f | 23 | #include "unicode/uchar.h" |
374ca955 | 24 | #include "hash.h" |
b75a7d8f | 25 | |
b75a7d8f A |
26 | #define CASE(id,test) case id: \ |
27 | name = #test; \ | |
28 | if (exec) { \ | |
29 | logln(#test "---"); \ | |
30 | logln((UnicodeString)""); \ | |
31 | test(); \ | |
32 | } \ | |
33 | break | |
34 | ||
35 | void CanonicalIteratorTest::runIndexedTest(int32_t index, UBool exec, | |
36 | const char* &name, char* /*par*/) { | |
37 | switch (index) { | |
38 | CASE(0, TestBasic); | |
39 | CASE(1, TestExhaustive); | |
40 | CASE(2, TestAPI); | |
41 | default: name = ""; break; | |
42 | } | |
43 | } | |
44 | ||
45 | /** | |
46 | * Convert Java-style strings with \u Unicode escapes into UnicodeString objects | |
47 | static UnicodeString str(const char *input) | |
48 | { | |
49 | UnicodeString str(input, ""); // Invariant conversion | |
50 | return str.unescape(); | |
51 | } | |
52 | */ | |
53 | ||
54 | ||
55 | CanonicalIteratorTest::CanonicalIteratorTest() : | |
56 | nameTrans(NULL), hexTrans(NULL) | |
57 | { | |
58 | } | |
59 | ||
60 | CanonicalIteratorTest::~CanonicalIteratorTest() | |
61 | { | |
62 | #if !UCONFIG_NO_TRANSLITERATION | |
63 | if(nameTrans != NULL) { | |
64 | delete(nameTrans); | |
65 | } | |
66 | if(hexTrans != NULL) { | |
67 | delete(hexTrans); | |
68 | } | |
69 | #endif | |
70 | } | |
71 | ||
72 | void CanonicalIteratorTest::TestExhaustive() { | |
73 | UErrorCode status = U_ZERO_ERROR; | |
74 | CanonicalIterator it("", status); | |
729e4ab9 A |
75 | if (U_FAILURE(status)) { |
76 | dataerrln("Error creating CanonicalIterator: %s", u_errorName(status)); | |
77 | return; | |
78 | } | |
b75a7d8f | 79 | UChar32 i = 0; |
73c04bcf | 80 | UnicodeString s; |
374ca955 | 81 | // Test static and dynamic class IDs |
b75a7d8f | 82 | if(it.getDynamicClassID() != CanonicalIterator::getStaticClassID()){ |
374ca955 A |
83 | errln("CanonicalIterator::getStaticClassId ! = CanonicalIterator.getDynamicClassID"); |
84 | } | |
b75a7d8f A |
85 | for (i = 0; i < 0x10FFFF; quick?i+=0x10:++i) { |
86 | //for (i = 0xae00; i < 0xaf00; ++i) { | |
87 | ||
88 | if ((i % 0x100) == 0) { | |
89 | logln("Testing U+%06X", i); | |
90 | } | |
91 | ||
92 | // skip characters we know don't have decomps | |
93 | int8_t type = u_charType(i); | |
94 | if (type == U_UNASSIGNED || type == U_PRIVATE_USE_CHAR | |
95 | || type == U_SURROGATE) continue; | |
96 | ||
97 | s = i; | |
73c04bcf A |
98 | characterTest(s, i, it); |
99 | ||
b75a7d8f | 100 | s += (UChar32)0x0345; //"\\u0345"; |
73c04bcf | 101 | characterTest(s, i, it); |
b75a7d8f A |
102 | } |
103 | } | |
104 | ||
105 | void CanonicalIteratorTest::TestBasic() { | |
106 | ||
107 | UErrorCode status = U_ZERO_ERROR; | |
108 | ||
109 | static const char * const testArray[][2] = { | |
110 | {"\\u00C5d\\u0307\\u0327", "A\\u030Ad\\u0307\\u0327, A\\u030Ad\\u0327\\u0307, A\\u030A\\u1E0B\\u0327, " | |
111 | "A\\u030A\\u1E11\\u0307, \\u00C5d\\u0307\\u0327, \\u00C5d\\u0327\\u0307, " | |
112 | "\\u00C5\\u1E0B\\u0327, \\u00C5\\u1E11\\u0307, \\u212Bd\\u0307\\u0327, " | |
113 | "\\u212Bd\\u0327\\u0307, \\u212B\\u1E0B\\u0327, \\u212B\\u1E11\\u0307"}, | |
114 | {"\\u010d\\u017E", "c\\u030Cz\\u030C, c\\u030C\\u017E, \\u010Dz\\u030C, \\u010D\\u017E"}, | |
115 | {"x\\u0307\\u0327", "x\\u0307\\u0327, x\\u0327\\u0307, \\u1E8B\\u0327"}, | |
116 | }; | |
117 | ||
118 | #if 0 | |
119 | // This is not interesting for C/C++ as the data is already built beforehand | |
120 | // check build | |
121 | UnicodeSet ss = CanonicalIterator.getSafeStart(); | |
122 | logln("Safe Start: " + ss.toPattern(true)); | |
123 | ss = CanonicalIterator.getStarts('a'); | |
124 | expectEqual("Characters with 'a' at the start of their decomposition: ", "", CanonicalIterator.getStarts('a'), | |
125 | new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB" | |
126 | + "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7]") | |
127 | ); | |
128 | #endif | |
129 | ||
130 | // check permute | |
131 | // NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted! | |
132 | ||
133 | Hashtable *permutations = new Hashtable(FALSE, status); | |
4388f060 | 134 | permutations->setValueDeleter(uprv_deleteUObject); |
b75a7d8f A |
135 | UnicodeString toPermute("ABC"); |
136 | ||
137 | CanonicalIterator::permute(toPermute, FALSE, permutations, status); | |
138 | ||
139 | logln("testing permutation"); | |
140 | ||
141 | expectEqual("Simple permutation ", "", collectionToString(permutations), "ABC, ACB, BAC, BCA, CAB, CBA"); | |
142 | ||
143 | delete permutations; | |
144 | ||
145 | // try samples | |
146 | logln("testing samples"); | |
147 | Hashtable *set = new Hashtable(FALSE, status); | |
4388f060 | 148 | set->setValueDeleter(uprv_deleteUObject); |
b75a7d8f A |
149 | int32_t i = 0; |
150 | CanonicalIterator it("", status); | |
151 | if(U_SUCCESS(status)) { | |
2ca993e8 | 152 | for (i = 0; i < UPRV_LENGTHOF(testArray); ++i) { |
b75a7d8f A |
153 | //logln("Results for: " + name.transliterate(testArray[i])); |
154 | UnicodeString testStr = CharsToUnicodeString(testArray[i][0]); | |
155 | it.setSource(testStr, status); | |
156 | set->removeAll(); | |
46f4442e | 157 | for (;;) { |
b75a7d8f A |
158 | //UnicodeString *result = new UnicodeString(it.next()); |
159 | UnicodeString result(it.next()); | |
160 | if (result.isBogus()) { | |
161 | break; | |
162 | } | |
163 | set->put(result, new UnicodeString(result), status); // Add result to the table | |
164 | //logln(++counter + ": " + hex.transliterate(result)); | |
165 | //logln(" = " + name.transliterate(result)); | |
166 | } | |
51004dcb | 167 | expectEqual(i + UnicodeString(": "), testStr, collectionToString(set), CharsToUnicodeString(testArray[i][1])); |
b75a7d8f A |
168 | |
169 | } | |
170 | } else { | |
729e4ab9 | 171 | dataerrln("Couldn't instantiate canonical iterator. Error: %s", u_errorName(status)); |
b75a7d8f A |
172 | } |
173 | delete set; | |
174 | } | |
175 | ||
73c04bcf A |
176 | void CanonicalIteratorTest::characterTest(UnicodeString &s, UChar32 ch, CanonicalIterator &it) |
177 | { | |
178 | UErrorCode status = U_ZERO_ERROR; | |
179 | UnicodeString decomp, comp; | |
180 | UBool gotDecomp = FALSE; | |
181 | UBool gotComp = FALSE; | |
182 | UBool gotSource = FALSE; | |
183 | ||
184 | Normalizer::decompose(s, FALSE, 0, decomp, status); | |
185 | Normalizer::compose(s, FALSE, 0, comp, status); | |
186 | ||
187 | // skip characters that don't have either decomp. | |
188 | // need quick test for this! | |
189 | if (s == decomp && s == comp) { | |
190 | return; | |
191 | } | |
192 | ||
193 | it.setSource(s, status); | |
194 | ||
46f4442e | 195 | for (;;) { |
73c04bcf A |
196 | UnicodeString item = it.next(); |
197 | if (item.isBogus()) break; | |
198 | if (item == s) gotSource = TRUE; | |
199 | if (item == decomp) gotDecomp = TRUE; | |
200 | if (item == comp) gotComp = TRUE; | |
201 | } | |
202 | ||
203 | if (!gotSource || !gotDecomp || !gotComp) { | |
204 | errln("FAIL CanonicalIterator: " + s + (int)ch); | |
205 | } | |
206 | } | |
207 | ||
b75a7d8f A |
208 | void CanonicalIteratorTest::expectEqual(const UnicodeString &message, const UnicodeString &item, const UnicodeString &a, const UnicodeString &b) { |
209 | if (!(a==b)) { | |
210 | errln("FAIL: " + message + getReadable(item)); | |
211 | errln("\t" + getReadable(a)); | |
212 | errln("\t" + getReadable(b)); | |
213 | } else { | |
214 | logln("Checked: " + message + getReadable(item)); | |
215 | logln("\t" + getReadable(a)); | |
216 | logln("\t" + getReadable(b)); | |
217 | } | |
218 | } | |
219 | ||
220 | UnicodeString CanonicalIteratorTest::getReadable(const UnicodeString &s) { | |
221 | UErrorCode status = U_ZERO_ERROR; | |
222 | UnicodeString result = "["; | |
223 | if (s.length() == 0) return ""; | |
224 | // set up for readable display | |
225 | #if !UCONFIG_NO_TRANSLITERATION | |
226 | if(verbose) { | |
227 | if (nameTrans == NULL) | |
228 | nameTrans = Transliterator::createInstance("[^\\ -\\u007F] name", UTRANS_FORWARD, status); | |
229 | UnicodeString sName = s; | |
230 | nameTrans->transliterate(sName); | |
231 | result += sName; | |
232 | result += ";"; | |
233 | } | |
234 | if (hexTrans == NULL) | |
235 | hexTrans = Transliterator::createInstance("[^\\ -\\u007F] hex", UTRANS_FORWARD, status); | |
236 | #endif | |
237 | UnicodeString sHex = s; | |
238 | #if !UCONFIG_NO_TRANSLITERATION | |
239 | if(hexTrans) { // maybe there is no data and transliterator cannot be instantiated | |
240 | hexTrans->transliterate(sHex); | |
241 | } | |
242 | #endif | |
243 | result += sHex; | |
244 | result += "]"; | |
245 | return result; | |
246 | //return "[" + (verbose ? name->transliterate(s) + "; " : "") + hex->transliterate(s) + "]"; | |
247 | } | |
248 | ||
374ca955 A |
249 | U_CFUNC int U_CALLCONV |
250 | compareUnicodeStrings(const void *s1, const void *s2) { | |
b75a7d8f A |
251 | UnicodeString **st1 = (UnicodeString **)s1; |
252 | UnicodeString **st2 = (UnicodeString **)s2; | |
253 | ||
254 | return (*st1)->compare(**st2); | |
255 | } | |
256 | ||
257 | ||
258 | UnicodeString CanonicalIteratorTest::collectionToString(Hashtable *col) { | |
259 | UnicodeString result; | |
260 | ||
261 | // Iterate over the Hashtable, then qsort. | |
262 | ||
263 | UnicodeString **resArray = new UnicodeString*[col->count()]; | |
264 | int32_t i = 0; | |
265 | ||
266 | const UHashElement *ne = NULL; | |
b331163b | 267 | int32_t el = UHASH_FIRST; |
b75a7d8f A |
268 | //Iterator it = basic.iterator(); |
269 | ne = col->nextElement(el); | |
270 | //while (it.hasNext()) | |
271 | while (ne != NULL) { | |
272 | //String item = (String) it.next(); | |
273 | UnicodeString *item = (UnicodeString *)(ne->value.pointer); | |
274 | resArray[i++] = item; | |
275 | ne = col->nextElement(el); | |
276 | } | |
277 | ||
278 | for(i = 0; i<col->count(); ++i) { | |
279 | logln(*resArray[i]); | |
280 | } | |
281 | ||
282 | qsort(resArray, col->count(), sizeof(UnicodeString *), compareUnicodeStrings); | |
283 | ||
284 | result = *resArray[0]; | |
285 | ||
286 | for(i = 1; i<col->count(); ++i) { | |
287 | result += ", "; | |
288 | result += *resArray[i]; | |
289 | } | |
290 | ||
291 | /* | |
292 | Iterator it = col.iterator(); | |
293 | while (it.hasNext()) { | |
294 | if (result.length() != 0) result.append(", "); | |
295 | result.append(it.next().toString()); | |
296 | } | |
297 | */ | |
298 | ||
299 | delete [] resArray; | |
300 | ||
301 | return result; | |
302 | } | |
303 | ||
304 | void CanonicalIteratorTest::TestAPI() { | |
305 | UErrorCode status = U_ZERO_ERROR; | |
306 | // Test reset and getSource | |
307 | UnicodeString start("ljubav"); | |
308 | logln("Testing CanonicalIterator::getSource"); | |
309 | logln("Instantiating canonical iterator with string "+start); | |
310 | CanonicalIterator can(start, status); | |
729e4ab9 A |
311 | if (U_FAILURE(status)) { |
312 | dataerrln("Error creating CanonicalIterator: %s", u_errorName(status)); | |
313 | return; | |
314 | } | |
b75a7d8f A |
315 | UnicodeString source = can.getSource(); |
316 | logln("CanonicalIterator::getSource returned "+source); | |
317 | if(start != source) { | |
318 | errln("CanonicalIterator.getSource() didn't return the starting string. Expected "+start+", got "+source); | |
319 | } | |
320 | logln("Testing CanonicalIterator::reset"); | |
321 | UnicodeString next = can.next(); | |
322 | logln("CanonicalIterator::next returned "+next); | |
323 | ||
324 | can.reset(); | |
325 | ||
326 | UnicodeString afterReset = can.next(); | |
327 | logln("After reset, CanonicalIterator::next returned "+afterReset); | |
328 | ||
329 | if(next != afterReset) { | |
330 | errln("Next after instantiation ("+next+") is different from next after reset ("+afterReset+")."); | |
331 | } | |
332 | ||
333 | logln("Testing getStaticClassID and getDynamicClassID"); | |
334 | if(can.getDynamicClassID() != CanonicalIterator::getStaticClassID()){ | |
335 | errln("RTTI failed for CanonicalIterator getDynamicClassID != getStaticClassID"); | |
336 | } | |
337 | } | |
338 | ||
339 | #endif /* #if !UCONFIG_NO_NORMALIZATION */ |