]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/canittst.cpp
ICU-400.38.tar.gz
[apple/icu.git] / icuSources / test / intltest / canittst.cpp
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 2002-2006, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************
6 *
7 * @author Mark E. Davis
8 * @author Vladimir Weinstein
9 */
10
11 #include "unicode/utypes.h"
12
13 #if !UCONFIG_NO_NORMALIZATION
14
15 #include "intltest.h"
16 #include "cstring.h"
17 #include "canittst.h"
18 #include "unicode/caniter.h"
19 #include "unicode/normlzr.h"
20 #include "unicode/uchar.h"
21 #include "hash.h"
22
23 #define ARRAY_LENGTH(array) ((int32_t)(sizeof (array) / sizeof (*array)))
24
25 #define CASE(id,test) case id: \
26 name = #test; \
27 if (exec) { \
28 logln(#test "---"); \
29 logln((UnicodeString)""); \
30 test(); \
31 } \
32 break
33
34 void CanonicalIteratorTest::runIndexedTest(int32_t index, UBool exec,
35 const char* &name, char* /*par*/) {
36 switch (index) {
37 CASE(0, TestBasic);
38 CASE(1, TestExhaustive);
39 CASE(2, TestAPI);
40 default: name = ""; break;
41 }
42 }
43
44 /**
45 * Convert Java-style strings with \u Unicode escapes into UnicodeString objects
46 static UnicodeString str(const char *input)
47 {
48 UnicodeString str(input, ""); // Invariant conversion
49 return str.unescape();
50 }
51 */
52
53
54 CanonicalIteratorTest::CanonicalIteratorTest() :
55 nameTrans(NULL), hexTrans(NULL)
56 {
57 }
58
59 CanonicalIteratorTest::~CanonicalIteratorTest()
60 {
61 #if !UCONFIG_NO_TRANSLITERATION
62 if(nameTrans != NULL) {
63 delete(nameTrans);
64 }
65 if(hexTrans != NULL) {
66 delete(hexTrans);
67 }
68 #endif
69 }
70
71 void CanonicalIteratorTest::TestExhaustive() {
72 UErrorCode status = U_ZERO_ERROR;
73 CanonicalIterator it("", status);
74 UChar32 i = 0;
75 UnicodeString s;
76 // Test static and dynamic class IDs
77 if(it.getDynamicClassID() != CanonicalIterator::getStaticClassID()){
78 errln("CanonicalIterator::getStaticClassId ! = CanonicalIterator.getDynamicClassID");
79 }
80 for (i = 0; i < 0x10FFFF; quick?i+=0x10:++i) {
81 //for (i = 0xae00; i < 0xaf00; ++i) {
82
83 if ((i % 0x100) == 0) {
84 logln("Testing U+%06X", i);
85 }
86
87 // skip characters we know don't have decomps
88 int8_t type = u_charType(i);
89 if (type == U_UNASSIGNED || type == U_PRIVATE_USE_CHAR
90 || type == U_SURROGATE) continue;
91
92 s = i;
93 characterTest(s, i, it);
94
95 s += (UChar32)0x0345; //"\\u0345";
96 characterTest(s, i, it);
97 }
98 }
99
100 void CanonicalIteratorTest::TestBasic() {
101
102 UErrorCode status = U_ZERO_ERROR;
103
104 static const char * const testArray[][2] = {
105 {"\\u00C5d\\u0307\\u0327", "A\\u030Ad\\u0307\\u0327, A\\u030Ad\\u0327\\u0307, A\\u030A\\u1E0B\\u0327, "
106 "A\\u030A\\u1E11\\u0307, \\u00C5d\\u0307\\u0327, \\u00C5d\\u0327\\u0307, "
107 "\\u00C5\\u1E0B\\u0327, \\u00C5\\u1E11\\u0307, \\u212Bd\\u0307\\u0327, "
108 "\\u212Bd\\u0327\\u0307, \\u212B\\u1E0B\\u0327, \\u212B\\u1E11\\u0307"},
109 {"\\u010d\\u017E", "c\\u030Cz\\u030C, c\\u030C\\u017E, \\u010Dz\\u030C, \\u010D\\u017E"},
110 {"x\\u0307\\u0327", "x\\u0307\\u0327, x\\u0327\\u0307, \\u1E8B\\u0327"},
111 };
112
113 #if 0
114 // This is not interesting for C/C++ as the data is already built beforehand
115 // check build
116 UnicodeSet ss = CanonicalIterator.getSafeStart();
117 logln("Safe Start: " + ss.toPattern(true));
118 ss = CanonicalIterator.getStarts('a');
119 expectEqual("Characters with 'a' at the start of their decomposition: ", "", CanonicalIterator.getStarts('a'),
120 new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB"
121 + "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7]")
122 );
123 #endif
124
125 // check permute
126 // NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted!
127
128 Hashtable *permutations = new Hashtable(FALSE, status);
129 permutations->setValueDeleter(uhash_deleteUnicodeString);
130 UnicodeString toPermute("ABC");
131
132 CanonicalIterator::permute(toPermute, FALSE, permutations, status);
133
134 logln("testing permutation");
135
136 expectEqual("Simple permutation ", "", collectionToString(permutations), "ABC, ACB, BAC, BCA, CAB, CBA");
137
138 delete permutations;
139
140 // try samples
141 logln("testing samples");
142 Hashtable *set = new Hashtable(FALSE, status);
143 set->setValueDeleter(uhash_deleteUnicodeString);
144 int32_t i = 0;
145 CanonicalIterator it("", status);
146 if(U_SUCCESS(status)) {
147 for (i = 0; i < ARRAY_LENGTH(testArray); ++i) {
148 //logln("Results for: " + name.transliterate(testArray[i]));
149 UnicodeString testStr = CharsToUnicodeString(testArray[i][0]);
150 it.setSource(testStr, status);
151 set->removeAll();
152 for (;;) {
153 //UnicodeString *result = new UnicodeString(it.next());
154 UnicodeString result(it.next());
155 if (result.isBogus()) {
156 break;
157 }
158 set->put(result, new UnicodeString(result), status); // Add result to the table
159 //logln(++counter + ": " + hex.transliterate(result));
160 //logln(" = " + name.transliterate(result));
161 }
162 expectEqual(i + ": ", testStr, collectionToString(set), CharsToUnicodeString(testArray[i][1]));
163
164 }
165 } else {
166 errln("Couldn't instantiate canonical iterator. Error: %s", u_errorName(status));
167 }
168 delete set;
169 }
170
171 void CanonicalIteratorTest::characterTest(UnicodeString &s, UChar32 ch, CanonicalIterator &it)
172 {
173 UErrorCode status = U_ZERO_ERROR;
174 UnicodeString decomp, comp;
175 UBool gotDecomp = FALSE;
176 UBool gotComp = FALSE;
177 UBool gotSource = FALSE;
178
179 Normalizer::decompose(s, FALSE, 0, decomp, status);
180 Normalizer::compose(s, FALSE, 0, comp, status);
181
182 // skip characters that don't have either decomp.
183 // need quick test for this!
184 if (s == decomp && s == comp) {
185 return;
186 }
187
188 it.setSource(s, status);
189
190 for (;;) {
191 UnicodeString item = it.next();
192 if (item.isBogus()) break;
193 if (item == s) gotSource = TRUE;
194 if (item == decomp) gotDecomp = TRUE;
195 if (item == comp) gotComp = TRUE;
196 }
197
198 if (!gotSource || !gotDecomp || !gotComp) {
199 errln("FAIL CanonicalIterator: " + s + (int)ch);
200 }
201 }
202
203 void CanonicalIteratorTest::expectEqual(const UnicodeString &message, const UnicodeString &item, const UnicodeString &a, const UnicodeString &b) {
204 if (!(a==b)) {
205 errln("FAIL: " + message + getReadable(item));
206 errln("\t" + getReadable(a));
207 errln("\t" + getReadable(b));
208 } else {
209 logln("Checked: " + message + getReadable(item));
210 logln("\t" + getReadable(a));
211 logln("\t" + getReadable(b));
212 }
213 }
214
215 UnicodeString CanonicalIteratorTest::getReadable(const UnicodeString &s) {
216 UErrorCode status = U_ZERO_ERROR;
217 UnicodeString result = "[";
218 if (s.length() == 0) return "";
219 // set up for readable display
220 #if !UCONFIG_NO_TRANSLITERATION
221 if(verbose) {
222 if (nameTrans == NULL)
223 nameTrans = Transliterator::createInstance("[^\\ -\\u007F] name", UTRANS_FORWARD, status);
224 UnicodeString sName = s;
225 nameTrans->transliterate(sName);
226 result += sName;
227 result += ";";
228 }
229 if (hexTrans == NULL)
230 hexTrans = Transliterator::createInstance("[^\\ -\\u007F] hex", UTRANS_FORWARD, status);
231 #endif
232 UnicodeString sHex = s;
233 #if !UCONFIG_NO_TRANSLITERATION
234 if(hexTrans) { // maybe there is no data and transliterator cannot be instantiated
235 hexTrans->transliterate(sHex);
236 }
237 #endif
238 result += sHex;
239 result += "]";
240 return result;
241 //return "[" + (verbose ? name->transliterate(s) + "; " : "") + hex->transliterate(s) + "]";
242 }
243
244 U_CFUNC int U_CALLCONV
245 compareUnicodeStrings(const void *s1, const void *s2) {
246 UnicodeString **st1 = (UnicodeString **)s1;
247 UnicodeString **st2 = (UnicodeString **)s2;
248
249 return (*st1)->compare(**st2);
250 }
251
252
253 UnicodeString CanonicalIteratorTest::collectionToString(Hashtable *col) {
254 UnicodeString result;
255
256 // Iterate over the Hashtable, then qsort.
257
258 UnicodeString **resArray = new UnicodeString*[col->count()];
259 int32_t i = 0;
260
261 const UHashElement *ne = NULL;
262 int32_t el = -1;
263 //Iterator it = basic.iterator();
264 ne = col->nextElement(el);
265 //while (it.hasNext())
266 while (ne != NULL) {
267 //String item = (String) it.next();
268 UnicodeString *item = (UnicodeString *)(ne->value.pointer);
269 resArray[i++] = item;
270 ne = col->nextElement(el);
271 }
272
273 for(i = 0; i<col->count(); ++i) {
274 logln(*resArray[i]);
275 }
276
277 qsort(resArray, col->count(), sizeof(UnicodeString *), compareUnicodeStrings);
278
279 result = *resArray[0];
280
281 for(i = 1; i<col->count(); ++i) {
282 result += ", ";
283 result += *resArray[i];
284 }
285
286 /*
287 Iterator it = col.iterator();
288 while (it.hasNext()) {
289 if (result.length() != 0) result.append(", ");
290 result.append(it.next().toString());
291 }
292 */
293
294 delete [] resArray;
295
296 return result;
297 }
298
299 void CanonicalIteratorTest::TestAPI() {
300 UErrorCode status = U_ZERO_ERROR;
301 // Test reset and getSource
302 UnicodeString start("ljubav");
303 logln("Testing CanonicalIterator::getSource");
304 logln("Instantiating canonical iterator with string "+start);
305 CanonicalIterator can(start, status);
306 UnicodeString source = can.getSource();
307 logln("CanonicalIterator::getSource returned "+source);
308 if(start != source) {
309 errln("CanonicalIterator.getSource() didn't return the starting string. Expected "+start+", got "+source);
310 }
311 logln("Testing CanonicalIterator::reset");
312 UnicodeString next = can.next();
313 logln("CanonicalIterator::next returned "+next);
314
315 can.reset();
316
317 UnicodeString afterReset = can.next();
318 logln("After reset, CanonicalIterator::next returned "+afterReset);
319
320 if(next != afterReset) {
321 errln("Next after instantiation ("+next+") is different from next after reset ("+afterReset+").");
322 }
323
324 logln("Testing getStaticClassID and getDynamicClassID");
325 if(can.getDynamicClassID() != CanonicalIterator::getStaticClassID()){
326 errln("RTTI failed for CanonicalIterator getDynamicClassID != getStaticClassID");
327 }
328 }
329
330 #endif /* #if !UCONFIG_NO_NORMALIZATION */