]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /******************************************************************** |
2 | * COPYRIGHT: | |
374ca955 | 3 | * Copyright (c) 2002-2004, International Business Machines Corporation and |
b75a7d8f A |
4 | * others. All Rights Reserved. |
5 | ******************************************************************** | |
6 | * | |
7 | * @author Mark E. Davis | |
8 | * @author Vladimir Weinstein | |
9 | */ | |
10 | ||
11 | #include "unicode/utypes.h" | |
12 | ||
13 | #if !UCONFIG_NO_NORMALIZATION | |
14 | ||
15 | #include "intltest.h" | |
16 | #include "cstring.h" | |
17 | #include "canittst.h" | |
18 | #include "unicode/caniter.h" | |
374ca955 | 19 | #include "unicode/normlzr.h" |
b75a7d8f | 20 | #include "unicode/uchar.h" |
374ca955 | 21 | #include "hash.h" |
b75a7d8f A |
22 | |
23 | #define ARRAY_LENGTH(array) ((int32_t)(sizeof (array) / sizeof (*array))) | |
24 | ||
25 | #define CASE(id,test) case id: \ | |
26 | name = #test; \ | |
27 | if (exec) { \ | |
28 | logln(#test "---"); \ | |
29 | logln((UnicodeString)""); \ | |
30 | test(); \ | |
31 | } \ | |
32 | break | |
33 | ||
34 | void CanonicalIteratorTest::runIndexedTest(int32_t index, UBool exec, | |
35 | const char* &name, char* /*par*/) { | |
36 | switch (index) { | |
37 | CASE(0, TestBasic); | |
38 | CASE(1, TestExhaustive); | |
39 | CASE(2, TestAPI); | |
40 | default: name = ""; break; | |
41 | } | |
42 | } | |
43 | ||
44 | /** | |
45 | * Convert Java-style strings with \u Unicode escapes into UnicodeString objects | |
46 | static UnicodeString str(const char *input) | |
47 | { | |
48 | UnicodeString str(input, ""); // Invariant conversion | |
49 | return str.unescape(); | |
50 | } | |
51 | */ | |
52 | ||
53 | ||
54 | CanonicalIteratorTest::CanonicalIteratorTest() : | |
55 | nameTrans(NULL), hexTrans(NULL) | |
56 | { | |
57 | } | |
58 | ||
59 | CanonicalIteratorTest::~CanonicalIteratorTest() | |
60 | { | |
61 | #if !UCONFIG_NO_TRANSLITERATION | |
62 | if(nameTrans != NULL) { | |
63 | delete(nameTrans); | |
64 | } | |
65 | if(hexTrans != NULL) { | |
66 | delete(hexTrans); | |
67 | } | |
68 | #endif | |
69 | } | |
70 | ||
71 | void CanonicalIteratorTest::TestExhaustive() { | |
72 | UErrorCode status = U_ZERO_ERROR; | |
73 | CanonicalIterator it("", status); | |
74 | UChar32 i = 0; | |
75 | UnicodeString s, decomp, comp; | |
374ca955 | 76 | // Test static and dynamic class IDs |
b75a7d8f | 77 | if(it.getDynamicClassID() != CanonicalIterator::getStaticClassID()){ |
374ca955 A |
78 | errln("CanonicalIterator::getStaticClassId ! = CanonicalIterator.getDynamicClassID"); |
79 | } | |
b75a7d8f A |
80 | for (i = 0; i < 0x10FFFF; quick?i+=0x10:++i) { |
81 | //for (i = 0xae00; i < 0xaf00; ++i) { | |
82 | ||
83 | if ((i % 0x100) == 0) { | |
84 | logln("Testing U+%06X", i); | |
85 | } | |
86 | ||
87 | // skip characters we know don't have decomps | |
88 | int8_t type = u_charType(i); | |
89 | if (type == U_UNASSIGNED || type == U_PRIVATE_USE_CHAR | |
90 | || type == U_SURROGATE) continue; | |
91 | ||
92 | s = i; | |
93 | s += (UChar32)0x0345; //"\\u0345"; | |
94 | ||
95 | Normalizer::decompose(s, FALSE, 0, decomp, status); | |
96 | Normalizer::compose(s, FALSE, 0, comp, status); | |
97 | ||
98 | // skip characters that don't have either decomp. | |
99 | // need quick test for this! | |
100 | if (s == decomp && s == comp) { | |
101 | continue; | |
102 | } | |
103 | ||
104 | it.setSource(s, status); | |
105 | UBool gotDecomp = FALSE; | |
106 | UBool gotComp = FALSE; | |
107 | UBool gotSource = FALSE; | |
108 | ||
109 | while (TRUE) { | |
110 | UnicodeString item = it.next(); | |
111 | if (item.isBogus()) break; | |
112 | if (item == s) gotSource = TRUE; | |
113 | if (item == decomp) gotDecomp = TRUE; | |
114 | if (item == comp) gotComp = TRUE; | |
115 | } | |
116 | ||
117 | if (!gotSource || !gotDecomp || !gotComp) { | |
118 | errln("FAIL CanonicalIterator: " + s + (int)i); | |
119 | } | |
120 | } | |
121 | } | |
122 | ||
123 | void CanonicalIteratorTest::TestBasic() { | |
124 | ||
125 | UErrorCode status = U_ZERO_ERROR; | |
126 | ||
127 | static const char * const testArray[][2] = { | |
128 | {"\\u00C5d\\u0307\\u0327", "A\\u030Ad\\u0307\\u0327, A\\u030Ad\\u0327\\u0307, A\\u030A\\u1E0B\\u0327, " | |
129 | "A\\u030A\\u1E11\\u0307, \\u00C5d\\u0307\\u0327, \\u00C5d\\u0327\\u0307, " | |
130 | "\\u00C5\\u1E0B\\u0327, \\u00C5\\u1E11\\u0307, \\u212Bd\\u0307\\u0327, " | |
131 | "\\u212Bd\\u0327\\u0307, \\u212B\\u1E0B\\u0327, \\u212B\\u1E11\\u0307"}, | |
132 | {"\\u010d\\u017E", "c\\u030Cz\\u030C, c\\u030C\\u017E, \\u010Dz\\u030C, \\u010D\\u017E"}, | |
133 | {"x\\u0307\\u0327", "x\\u0307\\u0327, x\\u0327\\u0307, \\u1E8B\\u0327"}, | |
134 | }; | |
135 | ||
136 | #if 0 | |
137 | // This is not interesting for C/C++ as the data is already built beforehand | |
138 | // check build | |
139 | UnicodeSet ss = CanonicalIterator.getSafeStart(); | |
140 | logln("Safe Start: " + ss.toPattern(true)); | |
141 | ss = CanonicalIterator.getStarts('a'); | |
142 | expectEqual("Characters with 'a' at the start of their decomposition: ", "", CanonicalIterator.getStarts('a'), | |
143 | new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB" | |
144 | + "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7]") | |
145 | ); | |
146 | #endif | |
147 | ||
148 | // check permute | |
149 | // NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted! | |
150 | ||
151 | Hashtable *permutations = new Hashtable(FALSE, status); | |
152 | permutations->setValueDeleter(uhash_deleteUnicodeString); | |
153 | UnicodeString toPermute("ABC"); | |
154 | ||
155 | CanonicalIterator::permute(toPermute, FALSE, permutations, status); | |
156 | ||
157 | logln("testing permutation"); | |
158 | ||
159 | expectEqual("Simple permutation ", "", collectionToString(permutations), "ABC, ACB, BAC, BCA, CAB, CBA"); | |
160 | ||
161 | delete permutations; | |
162 | ||
163 | // try samples | |
164 | logln("testing samples"); | |
165 | Hashtable *set = new Hashtable(FALSE, status); | |
166 | set->setValueDeleter(uhash_deleteUnicodeString); | |
167 | int32_t i = 0; | |
168 | CanonicalIterator it("", status); | |
169 | if(U_SUCCESS(status)) { | |
170 | for (i = 0; i < ARRAY_LENGTH(testArray); ++i) { | |
171 | //logln("Results for: " + name.transliterate(testArray[i])); | |
172 | UnicodeString testStr = CharsToUnicodeString(testArray[i][0]); | |
173 | it.setSource(testStr, status); | |
174 | set->removeAll(); | |
175 | while (TRUE) { | |
176 | //UnicodeString *result = new UnicodeString(it.next()); | |
177 | UnicodeString result(it.next()); | |
178 | if (result.isBogus()) { | |
179 | break; | |
180 | } | |
181 | set->put(result, new UnicodeString(result), status); // Add result to the table | |
182 | //logln(++counter + ": " + hex.transliterate(result)); | |
183 | //logln(" = " + name.transliterate(result)); | |
184 | } | |
185 | expectEqual(i + ": ", testStr, collectionToString(set), CharsToUnicodeString(testArray[i][1])); | |
186 | ||
187 | } | |
188 | } else { | |
189 | errln("Couldn't instantiate canonical iterator. Error: %s", u_errorName(status)); | |
190 | } | |
191 | delete set; | |
192 | } | |
193 | ||
194 | void CanonicalIteratorTest::expectEqual(const UnicodeString &message, const UnicodeString &item, const UnicodeString &a, const UnicodeString &b) { | |
195 | if (!(a==b)) { | |
196 | errln("FAIL: " + message + getReadable(item)); | |
197 | errln("\t" + getReadable(a)); | |
198 | errln("\t" + getReadable(b)); | |
199 | } else { | |
200 | logln("Checked: " + message + getReadable(item)); | |
201 | logln("\t" + getReadable(a)); | |
202 | logln("\t" + getReadable(b)); | |
203 | } | |
204 | } | |
205 | ||
206 | UnicodeString CanonicalIteratorTest::getReadable(const UnicodeString &s) { | |
207 | UErrorCode status = U_ZERO_ERROR; | |
208 | UnicodeString result = "["; | |
209 | if (s.length() == 0) return ""; | |
210 | // set up for readable display | |
211 | #if !UCONFIG_NO_TRANSLITERATION | |
212 | if(verbose) { | |
213 | if (nameTrans == NULL) | |
214 | nameTrans = Transliterator::createInstance("[^\\ -\\u007F] name", UTRANS_FORWARD, status); | |
215 | UnicodeString sName = s; | |
216 | nameTrans->transliterate(sName); | |
217 | result += sName; | |
218 | result += ";"; | |
219 | } | |
220 | if (hexTrans == NULL) | |
221 | hexTrans = Transliterator::createInstance("[^\\ -\\u007F] hex", UTRANS_FORWARD, status); | |
222 | #endif | |
223 | UnicodeString sHex = s; | |
224 | #if !UCONFIG_NO_TRANSLITERATION | |
225 | if(hexTrans) { // maybe there is no data and transliterator cannot be instantiated | |
226 | hexTrans->transliterate(sHex); | |
227 | } | |
228 | #endif | |
229 | result += sHex; | |
230 | result += "]"; | |
231 | return result; | |
232 | //return "[" + (verbose ? name->transliterate(s) + "; " : "") + hex->transliterate(s) + "]"; | |
233 | } | |
234 | ||
374ca955 A |
235 | U_CFUNC int U_CALLCONV |
236 | compareUnicodeStrings(const void *s1, const void *s2) { | |
b75a7d8f A |
237 | UnicodeString **st1 = (UnicodeString **)s1; |
238 | UnicodeString **st2 = (UnicodeString **)s2; | |
239 | ||
240 | return (*st1)->compare(**st2); | |
241 | } | |
242 | ||
243 | ||
244 | UnicodeString CanonicalIteratorTest::collectionToString(Hashtable *col) { | |
245 | UnicodeString result; | |
246 | ||
247 | // Iterate over the Hashtable, then qsort. | |
248 | ||
249 | UnicodeString **resArray = new UnicodeString*[col->count()]; | |
250 | int32_t i = 0; | |
251 | ||
252 | const UHashElement *ne = NULL; | |
253 | int32_t el = -1; | |
254 | //Iterator it = basic.iterator(); | |
255 | ne = col->nextElement(el); | |
256 | //while (it.hasNext()) | |
257 | while (ne != NULL) { | |
258 | //String item = (String) it.next(); | |
259 | UnicodeString *item = (UnicodeString *)(ne->value.pointer); | |
260 | resArray[i++] = item; | |
261 | ne = col->nextElement(el); | |
262 | } | |
263 | ||
264 | for(i = 0; i<col->count(); ++i) { | |
265 | logln(*resArray[i]); | |
266 | } | |
267 | ||
268 | qsort(resArray, col->count(), sizeof(UnicodeString *), compareUnicodeStrings); | |
269 | ||
270 | result = *resArray[0]; | |
271 | ||
272 | for(i = 1; i<col->count(); ++i) { | |
273 | result += ", "; | |
274 | result += *resArray[i]; | |
275 | } | |
276 | ||
277 | /* | |
278 | Iterator it = col.iterator(); | |
279 | while (it.hasNext()) { | |
280 | if (result.length() != 0) result.append(", "); | |
281 | result.append(it.next().toString()); | |
282 | } | |
283 | */ | |
284 | ||
285 | delete [] resArray; | |
286 | ||
287 | return result; | |
288 | } | |
289 | ||
290 | void CanonicalIteratorTest::TestAPI() { | |
291 | UErrorCode status = U_ZERO_ERROR; | |
292 | // Test reset and getSource | |
293 | UnicodeString start("ljubav"); | |
294 | logln("Testing CanonicalIterator::getSource"); | |
295 | logln("Instantiating canonical iterator with string "+start); | |
296 | CanonicalIterator can(start, status); | |
297 | UnicodeString source = can.getSource(); | |
298 | logln("CanonicalIterator::getSource returned "+source); | |
299 | if(start != source) { | |
300 | errln("CanonicalIterator.getSource() didn't return the starting string. Expected "+start+", got "+source); | |
301 | } | |
302 | logln("Testing CanonicalIterator::reset"); | |
303 | UnicodeString next = can.next(); | |
304 | logln("CanonicalIterator::next returned "+next); | |
305 | ||
306 | can.reset(); | |
307 | ||
308 | UnicodeString afterReset = can.next(); | |
309 | logln("After reset, CanonicalIterator::next returned "+afterReset); | |
310 | ||
311 | if(next != afterReset) { | |
312 | errln("Next after instantiation ("+next+") is different from next after reset ("+afterReset+")."); | |
313 | } | |
314 | ||
315 | logln("Testing getStaticClassID and getDynamicClassID"); | |
316 | if(can.getDynamicClassID() != CanonicalIterator::getStaticClassID()){ | |
317 | errln("RTTI failed for CanonicalIterator getDynamicClassID != getStaticClassID"); | |
318 | } | |
319 | } | |
320 | ||
321 | #endif /* #if !UCONFIG_NO_NORMALIZATION */ |