]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/canittst.cpp
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / test / intltest / canittst.cpp
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 2002-2016, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************
6 *
7 * @author Mark E. Davis
8 * @author Vladimir Weinstein
9 */
10
11 #include "unicode/utypes.h"
12
13 #if !UCONFIG_NO_NORMALIZATION
14
15 #include "intltest.h"
16 #include "cmemory.h"
17 #include "cstring.h"
18 #include "canittst.h"
19 #include "unicode/caniter.h"
20 #include "unicode/normlzr.h"
21 #include "unicode/uchar.h"
22 #include "hash.h"
23
24 #define CASE(id,test) case id: \
25 name = #test; \
26 if (exec) { \
27 logln(#test "---"); \
28 logln((UnicodeString)""); \
29 test(); \
30 } \
31 break
32
33 void CanonicalIteratorTest::runIndexedTest(int32_t index, UBool exec,
34 const char* &name, char* /*par*/) {
35 switch (index) {
36 CASE(0, TestBasic);
37 CASE(1, TestExhaustive);
38 CASE(2, TestAPI);
39 default: name = ""; break;
40 }
41 }
42
43 /**
44 * Convert Java-style strings with \u Unicode escapes into UnicodeString objects
45 static UnicodeString str(const char *input)
46 {
47 UnicodeString str(input, ""); // Invariant conversion
48 return str.unescape();
49 }
50 */
51
52
53 CanonicalIteratorTest::CanonicalIteratorTest() :
54 nameTrans(NULL), hexTrans(NULL)
55 {
56 }
57
58 CanonicalIteratorTest::~CanonicalIteratorTest()
59 {
60 #if !UCONFIG_NO_TRANSLITERATION
61 if(nameTrans != NULL) {
62 delete(nameTrans);
63 }
64 if(hexTrans != NULL) {
65 delete(hexTrans);
66 }
67 #endif
68 }
69
70 void CanonicalIteratorTest::TestExhaustive() {
71 UErrorCode status = U_ZERO_ERROR;
72 CanonicalIterator it("", status);
73 if (U_FAILURE(status)) {
74 dataerrln("Error creating CanonicalIterator: %s", u_errorName(status));
75 return;
76 }
77 UChar32 i = 0;
78 UnicodeString s;
79 // Test static and dynamic class IDs
80 if(it.getDynamicClassID() != CanonicalIterator::getStaticClassID()){
81 errln("CanonicalIterator::getStaticClassId ! = CanonicalIterator.getDynamicClassID");
82 }
83 for (i = 0; i < 0x10FFFF; quick?i+=0x10:++i) {
84 //for (i = 0xae00; i < 0xaf00; ++i) {
85
86 if ((i % 0x100) == 0) {
87 logln("Testing U+%06X", i);
88 }
89
90 // skip characters we know don't have decomps
91 int8_t type = u_charType(i);
92 if (type == U_UNASSIGNED || type == U_PRIVATE_USE_CHAR
93 || type == U_SURROGATE) continue;
94
95 s = i;
96 characterTest(s, i, it);
97
98 s += (UChar32)0x0345; //"\\u0345";
99 characterTest(s, i, it);
100 }
101 }
102
103 void CanonicalIteratorTest::TestBasic() {
104
105 UErrorCode status = U_ZERO_ERROR;
106
107 static const char * const testArray[][2] = {
108 {"\\u00C5d\\u0307\\u0327", "A\\u030Ad\\u0307\\u0327, A\\u030Ad\\u0327\\u0307, A\\u030A\\u1E0B\\u0327, "
109 "A\\u030A\\u1E11\\u0307, \\u00C5d\\u0307\\u0327, \\u00C5d\\u0327\\u0307, "
110 "\\u00C5\\u1E0B\\u0327, \\u00C5\\u1E11\\u0307, \\u212Bd\\u0307\\u0327, "
111 "\\u212Bd\\u0327\\u0307, \\u212B\\u1E0B\\u0327, \\u212B\\u1E11\\u0307"},
112 {"\\u010d\\u017E", "c\\u030Cz\\u030C, c\\u030C\\u017E, \\u010Dz\\u030C, \\u010D\\u017E"},
113 {"x\\u0307\\u0327", "x\\u0307\\u0327, x\\u0327\\u0307, \\u1E8B\\u0327"},
114 };
115
116 #if 0
117 // This is not interesting for C/C++ as the data is already built beforehand
118 // check build
119 UnicodeSet ss = CanonicalIterator.getSafeStart();
120 logln("Safe Start: " + ss.toPattern(true));
121 ss = CanonicalIterator.getStarts('a');
122 expectEqual("Characters with 'a' at the start of their decomposition: ", "", CanonicalIterator.getStarts('a'),
123 new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB"
124 + "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7]")
125 );
126 #endif
127
128 // check permute
129 // NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted!
130
131 Hashtable *permutations = new Hashtable(FALSE, status);
132 permutations->setValueDeleter(uprv_deleteUObject);
133 UnicodeString toPermute("ABC");
134
135 CanonicalIterator::permute(toPermute, FALSE, permutations, status);
136
137 logln("testing permutation");
138
139 expectEqual("Simple permutation ", "", collectionToString(permutations), "ABC, ACB, BAC, BCA, CAB, CBA");
140
141 delete permutations;
142
143 // try samples
144 logln("testing samples");
145 Hashtable *set = new Hashtable(FALSE, status);
146 set->setValueDeleter(uprv_deleteUObject);
147 int32_t i = 0;
148 CanonicalIterator it("", status);
149 if(U_SUCCESS(status)) {
150 for (i = 0; i < UPRV_LENGTHOF(testArray); ++i) {
151 //logln("Results for: " + name.transliterate(testArray[i]));
152 UnicodeString testStr = CharsToUnicodeString(testArray[i][0]);
153 it.setSource(testStr, status);
154 set->removeAll();
155 for (;;) {
156 //UnicodeString *result = new UnicodeString(it.next());
157 UnicodeString result(it.next());
158 if (result.isBogus()) {
159 break;
160 }
161 set->put(result, new UnicodeString(result), status); // Add result to the table
162 //logln(++counter + ": " + hex.transliterate(result));
163 //logln(" = " + name.transliterate(result));
164 }
165 expectEqual(i + UnicodeString(": "), testStr, collectionToString(set), CharsToUnicodeString(testArray[i][1]));
166
167 }
168 } else {
169 dataerrln("Couldn't instantiate canonical iterator. Error: %s", u_errorName(status));
170 }
171 delete set;
172 }
173
174 void CanonicalIteratorTest::characterTest(UnicodeString &s, UChar32 ch, CanonicalIterator &it)
175 {
176 UErrorCode status = U_ZERO_ERROR;
177 UnicodeString decomp, comp;
178 UBool gotDecomp = FALSE;
179 UBool gotComp = FALSE;
180 UBool gotSource = FALSE;
181
182 Normalizer::decompose(s, FALSE, 0, decomp, status);
183 Normalizer::compose(s, FALSE, 0, comp, status);
184
185 // skip characters that don't have either decomp.
186 // need quick test for this!
187 if (s == decomp && s == comp) {
188 return;
189 }
190
191 it.setSource(s, status);
192
193 for (;;) {
194 UnicodeString item = it.next();
195 if (item.isBogus()) break;
196 if (item == s) gotSource = TRUE;
197 if (item == decomp) gotDecomp = TRUE;
198 if (item == comp) gotComp = TRUE;
199 }
200
201 if (!gotSource || !gotDecomp || !gotComp) {
202 errln("FAIL CanonicalIterator: " + s + (int)ch);
203 }
204 }
205
206 void CanonicalIteratorTest::expectEqual(const UnicodeString &message, const UnicodeString &item, const UnicodeString &a, const UnicodeString &b) {
207 if (!(a==b)) {
208 errln("FAIL: " + message + getReadable(item));
209 errln("\t" + getReadable(a));
210 errln("\t" + getReadable(b));
211 } else {
212 logln("Checked: " + message + getReadable(item));
213 logln("\t" + getReadable(a));
214 logln("\t" + getReadable(b));
215 }
216 }
217
218 UnicodeString CanonicalIteratorTest::getReadable(const UnicodeString &s) {
219 UErrorCode status = U_ZERO_ERROR;
220 UnicodeString result = "[";
221 if (s.length() == 0) return "";
222 // set up for readable display
223 #if !UCONFIG_NO_TRANSLITERATION
224 if(verbose) {
225 if (nameTrans == NULL)
226 nameTrans = Transliterator::createInstance("[^\\ -\\u007F] name", UTRANS_FORWARD, status);
227 UnicodeString sName = s;
228 nameTrans->transliterate(sName);
229 result += sName;
230 result += ";";
231 }
232 if (hexTrans == NULL)
233 hexTrans = Transliterator::createInstance("[^\\ -\\u007F] hex", UTRANS_FORWARD, status);
234 #endif
235 UnicodeString sHex = s;
236 #if !UCONFIG_NO_TRANSLITERATION
237 if(hexTrans) { // maybe there is no data and transliterator cannot be instantiated
238 hexTrans->transliterate(sHex);
239 }
240 #endif
241 result += sHex;
242 result += "]";
243 return result;
244 //return "[" + (verbose ? name->transliterate(s) + "; " : "") + hex->transliterate(s) + "]";
245 }
246
247 U_CFUNC int U_CALLCONV
248 compareUnicodeStrings(const void *s1, const void *s2) {
249 UnicodeString **st1 = (UnicodeString **)s1;
250 UnicodeString **st2 = (UnicodeString **)s2;
251
252 return (*st1)->compare(**st2);
253 }
254
255
256 UnicodeString CanonicalIteratorTest::collectionToString(Hashtable *col) {
257 UnicodeString result;
258
259 // Iterate over the Hashtable, then qsort.
260
261 UnicodeString **resArray = new UnicodeString*[col->count()];
262 int32_t i = 0;
263
264 const UHashElement *ne = NULL;
265 int32_t el = UHASH_FIRST;
266 //Iterator it = basic.iterator();
267 ne = col->nextElement(el);
268 //while (it.hasNext())
269 while (ne != NULL) {
270 //String item = (String) it.next();
271 UnicodeString *item = (UnicodeString *)(ne->value.pointer);
272 resArray[i++] = item;
273 ne = col->nextElement(el);
274 }
275
276 for(i = 0; i<col->count(); ++i) {
277 logln(*resArray[i]);
278 }
279
280 qsort(resArray, col->count(), sizeof(UnicodeString *), compareUnicodeStrings);
281
282 result = *resArray[0];
283
284 for(i = 1; i<col->count(); ++i) {
285 result += ", ";
286 result += *resArray[i];
287 }
288
289 /*
290 Iterator it = col.iterator();
291 while (it.hasNext()) {
292 if (result.length() != 0) result.append(", ");
293 result.append(it.next().toString());
294 }
295 */
296
297 delete [] resArray;
298
299 return result;
300 }
301
302 void CanonicalIteratorTest::TestAPI() {
303 UErrorCode status = U_ZERO_ERROR;
304 // Test reset and getSource
305 UnicodeString start("ljubav");
306 logln("Testing CanonicalIterator::getSource");
307 logln("Instantiating canonical iterator with string "+start);
308 CanonicalIterator can(start, status);
309 if (U_FAILURE(status)) {
310 dataerrln("Error creating CanonicalIterator: %s", u_errorName(status));
311 return;
312 }
313 UnicodeString source = can.getSource();
314 logln("CanonicalIterator::getSource returned "+source);
315 if(start != source) {
316 errln("CanonicalIterator.getSource() didn't return the starting string. Expected "+start+", got "+source);
317 }
318 logln("Testing CanonicalIterator::reset");
319 UnicodeString next = can.next();
320 logln("CanonicalIterator::next returned "+next);
321
322 can.reset();
323
324 UnicodeString afterReset = can.next();
325 logln("After reset, CanonicalIterator::next returned "+afterReset);
326
327 if(next != afterReset) {
328 errln("Next after instantiation ("+next+") is different from next after reset ("+afterReset+").");
329 }
330
331 logln("Testing getStaticClassID and getDynamicClassID");
332 if(can.getDynamicClassID() != CanonicalIterator::getStaticClassID()){
333 errln("RTTI failed for CanonicalIterator getDynamicClassID != getStaticClassID");
334 }
335 }
336
337 #endif /* #if !UCONFIG_NO_NORMALIZATION */