]>
git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/canittst.cpp
1 /********************************************************************
3 * Copyright (c) 2002-2016, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************
7 * @author Mark E. Davis
8 * @author Vladimir Weinstein
11 #include "unicode/utypes.h"
13 #if !UCONFIG_NO_NORMALIZATION
19 #include "unicode/caniter.h"
20 #include "unicode/normlzr.h"
21 #include "unicode/uchar.h"
24 #define CASE(id,test) case id: \
28 logln((UnicodeString)""); \
33 void CanonicalIteratorTest::runIndexedTest(int32_t index
, UBool exec
,
34 const char* &name
, char* /*par*/) {
37 CASE(1, TestExhaustive
);
39 default: name
= ""; break;
44 * Convert Java-style strings with \u Unicode escapes into UnicodeString objects
45 static UnicodeString str(const char *input)
47 UnicodeString str(input, ""); // Invariant conversion
48 return str.unescape();
53 CanonicalIteratorTest::CanonicalIteratorTest() :
54 nameTrans(NULL
), hexTrans(NULL
)
58 CanonicalIteratorTest::~CanonicalIteratorTest()
60 #if !UCONFIG_NO_TRANSLITERATION
61 if(nameTrans
!= NULL
) {
64 if(hexTrans
!= NULL
) {
70 void CanonicalIteratorTest::TestExhaustive() {
71 UErrorCode status
= U_ZERO_ERROR
;
72 CanonicalIterator
it("", status
);
73 if (U_FAILURE(status
)) {
74 dataerrln("Error creating CanonicalIterator: %s", u_errorName(status
));
79 // Test static and dynamic class IDs
80 if(it
.getDynamicClassID() != CanonicalIterator::getStaticClassID()){
81 errln("CanonicalIterator::getStaticClassId ! = CanonicalIterator.getDynamicClassID");
83 for (i
= 0; i
< 0x10FFFF; quick
?i
+=0x10:++i
) {
84 //for (i = 0xae00; i < 0xaf00; ++i) {
86 if ((i
% 0x100) == 0) {
87 logln("Testing U+%06X", i
);
90 // skip characters we know don't have decomps
91 int8_t type
= u_charType(i
);
92 if (type
== U_UNASSIGNED
|| type
== U_PRIVATE_USE_CHAR
93 || type
== U_SURROGATE
) continue;
96 characterTest(s
, i
, it
);
98 s
+= (UChar32
)0x0345; //"\\u0345";
99 characterTest(s
, i
, it
);
103 void CanonicalIteratorTest::TestBasic() {
105 UErrorCode status
= U_ZERO_ERROR
;
107 static const char * const testArray
[][2] = {
108 {"\\u00C5d\\u0307\\u0327", "A\\u030Ad\\u0307\\u0327, A\\u030Ad\\u0327\\u0307, A\\u030A\\u1E0B\\u0327, "
109 "A\\u030A\\u1E11\\u0307, \\u00C5d\\u0307\\u0327, \\u00C5d\\u0327\\u0307, "
110 "\\u00C5\\u1E0B\\u0327, \\u00C5\\u1E11\\u0307, \\u212Bd\\u0307\\u0327, "
111 "\\u212Bd\\u0327\\u0307, \\u212B\\u1E0B\\u0327, \\u212B\\u1E11\\u0307"},
112 {"\\u010d\\u017E", "c\\u030Cz\\u030C, c\\u030C\\u017E, \\u010Dz\\u030C, \\u010D\\u017E"},
113 {"x\\u0307\\u0327", "x\\u0307\\u0327, x\\u0327\\u0307, \\u1E8B\\u0327"},
117 // This is not interesting for C/C++ as the data is already built beforehand
119 UnicodeSet ss
= CanonicalIterator
.getSafeStart();
120 logln("Safe Start: " + ss
.toPattern(true));
121 ss
= CanonicalIterator
.getStarts('a');
122 expectEqual("Characters with 'a' at the start of their decomposition: ", "", CanonicalIterator
.getStarts('a'),
123 new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB"
124 + "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7]")
129 // NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted!
131 Hashtable
*permutations
= new Hashtable(FALSE
, status
);
132 permutations
->setValueDeleter(uprv_deleteUObject
);
133 UnicodeString
toPermute("ABC");
135 CanonicalIterator::permute(toPermute
, FALSE
, permutations
, status
);
137 logln("testing permutation");
139 expectEqual("Simple permutation ", "", collectionToString(permutations
), "ABC, ACB, BAC, BCA, CAB, CBA");
144 logln("testing samples");
145 Hashtable
*set
= new Hashtable(FALSE
, status
);
146 set
->setValueDeleter(uprv_deleteUObject
);
148 CanonicalIterator
it("", status
);
149 if(U_SUCCESS(status
)) {
150 for (i
= 0; i
< UPRV_LENGTHOF(testArray
); ++i
) {
151 //logln("Results for: " + name.transliterate(testArray[i]));
152 UnicodeString testStr
= CharsToUnicodeString(testArray
[i
][0]);
153 it
.setSource(testStr
, status
);
156 //UnicodeString *result = new UnicodeString(it.next());
157 UnicodeString
result(it
.next());
158 if (result
.isBogus()) {
161 set
->put(result
, new UnicodeString(result
), status
); // Add result to the table
162 //logln(++counter + ": " + hex.transliterate(result));
163 //logln(" = " + name.transliterate(result));
165 expectEqual(i
+ UnicodeString(": "), testStr
, collectionToString(set
), CharsToUnicodeString(testArray
[i
][1]));
169 dataerrln("Couldn't instantiate canonical iterator. Error: %s", u_errorName(status
));
174 void CanonicalIteratorTest::characterTest(UnicodeString
&s
, UChar32 ch
, CanonicalIterator
&it
)
176 UErrorCode status
= U_ZERO_ERROR
;
177 UnicodeString decomp
, comp
;
178 UBool gotDecomp
= FALSE
;
179 UBool gotComp
= FALSE
;
180 UBool gotSource
= FALSE
;
182 Normalizer::decompose(s
, FALSE
, 0, decomp
, status
);
183 Normalizer::compose(s
, FALSE
, 0, comp
, status
);
185 // skip characters that don't have either decomp.
186 // need quick test for this!
187 if (s
== decomp
&& s
== comp
) {
191 it
.setSource(s
, status
);
194 UnicodeString item
= it
.next();
195 if (item
.isBogus()) break;
196 if (item
== s
) gotSource
= TRUE
;
197 if (item
== decomp
) gotDecomp
= TRUE
;
198 if (item
== comp
) gotComp
= TRUE
;
201 if (!gotSource
|| !gotDecomp
|| !gotComp
) {
202 errln("FAIL CanonicalIterator: " + s
+ (int)ch
);
206 void CanonicalIteratorTest::expectEqual(const UnicodeString
&message
, const UnicodeString
&item
, const UnicodeString
&a
, const UnicodeString
&b
) {
208 errln("FAIL: " + message
+ getReadable(item
));
209 errln("\t" + getReadable(a
));
210 errln("\t" + getReadable(b
));
212 logln("Checked: " + message
+ getReadable(item
));
213 logln("\t" + getReadable(a
));
214 logln("\t" + getReadable(b
));
218 UnicodeString
CanonicalIteratorTest::getReadable(const UnicodeString
&s
) {
219 UErrorCode status
= U_ZERO_ERROR
;
220 UnicodeString result
= "[";
221 if (s
.length() == 0) return "";
222 // set up for readable display
223 #if !UCONFIG_NO_TRANSLITERATION
225 if (nameTrans
== NULL
)
226 nameTrans
= Transliterator::createInstance("[^\\ -\\u007F] name", UTRANS_FORWARD
, status
);
227 UnicodeString sName
= s
;
228 nameTrans
->transliterate(sName
);
232 if (hexTrans
== NULL
)
233 hexTrans
= Transliterator::createInstance("[^\\ -\\u007F] hex", UTRANS_FORWARD
, status
);
235 UnicodeString sHex
= s
;
236 #if !UCONFIG_NO_TRANSLITERATION
237 if(hexTrans
) { // maybe there is no data and transliterator cannot be instantiated
238 hexTrans
->transliterate(sHex
);
244 //return "[" + (verbose ? name->transliterate(s) + "; " : "") + hex->transliterate(s) + "]";
247 U_CFUNC
int U_CALLCONV
248 compareUnicodeStrings(const void *s1
, const void *s2
) {
249 UnicodeString
**st1
= (UnicodeString
**)s1
;
250 UnicodeString
**st2
= (UnicodeString
**)s2
;
252 return (*st1
)->compare(**st2
);
256 UnicodeString
CanonicalIteratorTest::collectionToString(Hashtable
*col
) {
257 UnicodeString result
;
259 // Iterate over the Hashtable, then qsort.
261 UnicodeString
**resArray
= new UnicodeString
*[col
->count()];
264 const UHashElement
*ne
= NULL
;
265 int32_t el
= UHASH_FIRST
;
266 //Iterator it = basic.iterator();
267 ne
= col
->nextElement(el
);
268 //while (it.hasNext())
270 //String item = (String) it.next();
271 UnicodeString
*item
= (UnicodeString
*)(ne
->value
.pointer
);
272 resArray
[i
++] = item
;
273 ne
= col
->nextElement(el
);
276 for(i
= 0; i
<col
->count(); ++i
) {
280 qsort(resArray
, col
->count(), sizeof(UnicodeString
*), compareUnicodeStrings
);
282 result
= *resArray
[0];
284 for(i
= 1; i
<col
->count(); ++i
) {
286 result
+= *resArray
[i
];
290 Iterator it = col.iterator();
291 while (it.hasNext()) {
292 if (result.length() != 0) result.append(", ");
293 result.append(it.next().toString());
302 void CanonicalIteratorTest::TestAPI() {
303 UErrorCode status
= U_ZERO_ERROR
;
304 // Test reset and getSource
305 UnicodeString
start("ljubav");
306 logln("Testing CanonicalIterator::getSource");
307 logln("Instantiating canonical iterator with string "+start
);
308 CanonicalIterator
can(start
, status
);
309 if (U_FAILURE(status
)) {
310 dataerrln("Error creating CanonicalIterator: %s", u_errorName(status
));
313 UnicodeString source
= can
.getSource();
314 logln("CanonicalIterator::getSource returned "+source
);
315 if(start
!= source
) {
316 errln("CanonicalIterator.getSource() didn't return the starting string. Expected "+start
+", got "+source
);
318 logln("Testing CanonicalIterator::reset");
319 UnicodeString next
= can
.next();
320 logln("CanonicalIterator::next returned "+next
);
324 UnicodeString afterReset
= can
.next();
325 logln("After reset, CanonicalIterator::next returned "+afterReset
);
327 if(next
!= afterReset
) {
328 errln("Next after instantiation ("+next
+") is different from next after reset ("+afterReset
+").");
331 logln("Testing getStaticClassID and getDynamicClassID");
332 if(can
.getDynamicClassID() != CanonicalIterator::getStaticClassID()){
333 errln("RTTI failed for CanonicalIterator getDynamicClassID != getStaticClassID");
337 #endif /* #if !UCONFIG_NO_NORMALIZATION */