]>
Commit | Line | Data |
---|---|---|
4388f060 A |
1 | /******************************************************************** |
2 | * COPYRIGHT: | |
3 | * Copyright (c) 2012, International Business Machines Corporation and | |
4 | * others. All Rights Reserved. | |
5 | ********************************************************************/ | |
6 | // | |
7 | // file: alphaindex.cpp | |
8 | // Alphabetic Index Tests. | |
9 | // | |
10 | #include "intltest.h" | |
11 | #include "alphaindextst.h" | |
12 | ||
13 | #include "unicode/alphaindex.h" | |
14 | #include "unicode/coll.h" | |
15 | #include "unicode/tblcoll.h" | |
16 | #include "unicode/uniset.h" | |
17 | ||
18 | #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION | |
19 | ||
20 | // #include <string> | |
21 | // #include <iostream> | |
22 | ||
23 | AlphabeticIndexTest::AlphabeticIndexTest() { | |
24 | } | |
25 | ||
26 | AlphabeticIndexTest::~AlphabeticIndexTest() { | |
27 | } | |
28 | ||
29 | void AlphabeticIndexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) | |
30 | { | |
31 | if (exec) logln("TestSuite AlphabeticIndex: "); | |
32 | switch (index) { | |
33 | ||
34 | case 0: name = "APITest"; | |
35 | if (exec) APITest(); | |
36 | break; | |
37 | ||
38 | case 1: name = "ManyLocales"; | |
39 | if (exec) ManyLocalesTest(); | |
40 | break; | |
41 | ||
42 | case 2: name = "HackPinyinTest"; | |
43 | if (exec) HackPinyinTest(); | |
44 | break; | |
45 | ||
46 | case 3: name = "TestBug9009"; | |
47 | if (exec) TestBug9009(); | |
48 | break; | |
49 | ||
50 | default: name = ""; | |
51 | break; //needed to end loop | |
52 | } | |
53 | } | |
54 | ||
55 | #define TEST_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("%s:%d: Test failure. status=%s", \ | |
56 | __FILE__, __LINE__, u_errorName(status)); return;}} | |
57 | ||
58 | #define TEST_ASSERT(expr) {if ((expr)==FALSE) {errln("%s:%d: Test failure \n", __FILE__, __LINE__);};} | |
59 | ||
60 | // | |
61 | // APITest. Invoke every function at least once, and check that it does something. | |
62 | // Does not attempt to check complete functionality. | |
63 | // | |
64 | void AlphabeticIndexTest::APITest() { | |
65 | // | |
66 | // Simple constructor and destructor, getBucketCount() | |
67 | // | |
68 | UErrorCode status = U_ZERO_ERROR; | |
69 | int32_t lc = 0; | |
70 | int32_t i = 0; | |
71 | AlphabeticIndex *index = new AlphabeticIndex(Locale::getEnglish(), status); | |
72 | TEST_CHECK_STATUS; | |
73 | lc = index->getBucketCount(status); | |
74 | TEST_CHECK_STATUS; | |
75 | TEST_ASSERT(28 == lc); // 26 letters plus two under/overflow labels. | |
76 | //printf("getBucketCount() == %d\n", lc); | |
77 | delete index; | |
78 | ||
79 | // addLabels() | |
80 | ||
81 | status = U_ZERO_ERROR; | |
82 | index = new AlphabeticIndex(Locale::getEnglish(), status); | |
83 | TEST_CHECK_STATUS; | |
84 | UnicodeSet additions; | |
85 | additions.add((UChar32)0x410).add((UChar32)0x415); // A couple of Cyrillic letters | |
86 | index->addLabels(additions, status); | |
87 | TEST_CHECK_STATUS; | |
88 | lc = index->getBucketCount(status); | |
89 | TEST_CHECK_STATUS; | |
90 | // TODO: should get 31. Java also gives 30. Needs fixing | |
91 | TEST_ASSERT(30 == lc); // 26 Latin letters plus | |
92 | // TEST_ASSERT(31 == lc); // 26 Latin letters plus | |
93 | // 2 Cyrillic letters plus | |
94 | // 1 inflow label plus | |
95 | // two under/overflow labels. | |
96 | // std::cout << lc << std::endl; | |
97 | delete index; | |
98 | ||
99 | ||
100 | // addLabels(Locale) | |
101 | ||
102 | status = U_ZERO_ERROR; | |
103 | index = new AlphabeticIndex(Locale::getEnglish(), status); | |
104 | TEST_CHECK_STATUS; | |
105 | AlphabeticIndex &aip = index->addLabels(Locale::getJapanese(), status); | |
106 | TEST_ASSERT(&aip == index); | |
107 | TEST_CHECK_STATUS; | |
108 | lc = index->getBucketCount(status); | |
109 | TEST_CHECK_STATUS; | |
110 | TEST_ASSERT(35 < lc); // Japanese should add a bunch. Don't rely on the exact value. | |
111 | delete index; | |
112 | ||
113 | // GetCollator(), Get under/in/over flow labels | |
114 | ||
115 | status = U_ZERO_ERROR; | |
116 | index = new AlphabeticIndex(Locale::getGerman(), status); | |
117 | TEST_CHECK_STATUS; | |
118 | Collator *germanCol = Collator::createInstance(Locale::getGerman(), status); | |
119 | TEST_CHECK_STATUS; | |
120 | const RuleBasedCollator &indexCol = index->getCollator(); | |
121 | TEST_ASSERT(*germanCol == indexCol); | |
122 | delete germanCol; | |
123 | ||
124 | UnicodeString ELLIPSIS; ELLIPSIS.append((UChar32)0x2026); | |
125 | UnicodeString s = index->getUnderflowLabel(); | |
126 | TEST_ASSERT(ELLIPSIS == s); | |
127 | s = index->getOverflowLabel(); | |
128 | TEST_ASSERT(ELLIPSIS == s); | |
129 | s = index->getInflowLabel(); | |
130 | TEST_ASSERT(ELLIPSIS == s); | |
131 | index->setOverflowLabel(UNICODE_STRING_SIMPLE("O"), status); | |
132 | index->setUnderflowLabel(UNICODE_STRING_SIMPLE("U"), status).setInflowLabel(UNICODE_STRING_SIMPLE("I"), status); | |
133 | s = index->getUnderflowLabel(); | |
134 | TEST_ASSERT(UNICODE_STRING_SIMPLE("U") == s); | |
135 | s = index->getOverflowLabel(); | |
136 | TEST_ASSERT(UNICODE_STRING_SIMPLE("O") == s); | |
137 | s = index->getInflowLabel(); | |
138 | TEST_ASSERT(UNICODE_STRING_SIMPLE("I") == s); | |
139 | ||
140 | ||
141 | ||
142 | ||
143 | delete index; | |
144 | ||
145 | ||
146 | ||
147 | const UnicodeString adam = UNICODE_STRING_SIMPLE("Adam"); | |
148 | const UnicodeString baker = UNICODE_STRING_SIMPLE("Baker"); | |
149 | const UnicodeString charlie = UNICODE_STRING_SIMPLE("Charlie"); | |
150 | const UnicodeString chad = UNICODE_STRING_SIMPLE("Chad"); | |
151 | const UnicodeString zed = UNICODE_STRING_SIMPLE("Zed"); | |
152 | const UnicodeString Cyrillic = UNICODE_STRING_SIMPLE("\\u0410\\u0443\\u0435").unescape(); | |
153 | ||
154 | // addRecord(), verify that it comes back out. | |
155 | // | |
156 | status = U_ZERO_ERROR; | |
157 | index = new AlphabeticIndex(Locale::getEnglish(), status); | |
158 | TEST_CHECK_STATUS; | |
159 | index->addRecord(UnicodeString("Adam"), this, status); | |
160 | UBool b; | |
161 | TEST_CHECK_STATUS; | |
162 | index->resetBucketIterator(status); | |
163 | TEST_CHECK_STATUS; | |
164 | index->nextBucket(status); // Move to underflow label | |
165 | index->nextBucket(status); // Move to "A" | |
166 | TEST_CHECK_STATUS; | |
167 | const UnicodeString &label2 = index->getBucketLabel(); | |
168 | UnicodeString A_STR = UNICODE_STRING_SIMPLE("A"); | |
169 | TEST_ASSERT(A_STR == label2); | |
170 | ||
171 | b = index->nextRecord(status); | |
172 | TEST_CHECK_STATUS; | |
173 | TEST_ASSERT(b); | |
174 | const UnicodeString &itemName = index->getRecordName(); | |
175 | TEST_ASSERT(adam == itemName); | |
176 | ||
177 | const void *itemContext = index->getRecordData(); | |
178 | TEST_ASSERT(itemContext == this); | |
179 | ||
180 | delete index; | |
181 | ||
182 | // clearRecords, addRecord(), Iteration | |
183 | ||
184 | status = U_ZERO_ERROR; | |
185 | index = new AlphabeticIndex(Locale::getEnglish(), status); | |
186 | TEST_CHECK_STATUS; | |
187 | while (index->nextBucket(status)) { | |
188 | TEST_CHECK_STATUS; | |
189 | while (index->nextRecord(status)) { | |
190 | TEST_CHECK_STATUS; | |
191 | TEST_ASSERT(FALSE); // No items have been added. | |
192 | } | |
193 | TEST_CHECK_STATUS; | |
194 | } | |
195 | ||
196 | index->addRecord(adam, NULL, status); | |
197 | index->addRecord(baker, NULL, status); | |
198 | index->addRecord(charlie, NULL, status); | |
199 | index->addRecord(chad, NULL, status); | |
200 | TEST_CHECK_STATUS; | |
201 | int itemCount = 0; | |
202 | index->resetBucketIterator(status); | |
203 | while (index->nextBucket(status)) { | |
204 | TEST_CHECK_STATUS; | |
205 | while (index->nextRecord(status)) { | |
206 | TEST_CHECK_STATUS; | |
207 | ++itemCount; | |
208 | } | |
209 | } | |
210 | TEST_CHECK_STATUS; | |
211 | TEST_ASSERT(itemCount == 4); | |
212 | ||
213 | TEST_ASSERT(index->nextBucket(status) == FALSE); | |
214 | index->resetBucketIterator(status); | |
215 | TEST_CHECK_STATUS; | |
216 | TEST_ASSERT(index->nextBucket(status) == TRUE); | |
217 | ||
218 | index->clearRecords(status); | |
219 | TEST_CHECK_STATUS; | |
220 | index->resetBucketIterator(status); | |
221 | while (index->nextBucket(status)) { | |
222 | TEST_CHECK_STATUS; | |
223 | while (index->nextRecord(status)) { | |
224 | TEST_ASSERT(FALSE); // No items have been added. | |
225 | } | |
226 | } | |
227 | TEST_CHECK_STATUS; | |
228 | delete index; | |
229 | ||
230 | // getBucketLabel(), getBucketType() | |
231 | ||
232 | status = U_ZERO_ERROR; | |
233 | index = new AlphabeticIndex(Locale::getEnglish(), status); | |
234 | TEST_CHECK_STATUS; | |
235 | index->setUnderflowLabel(adam, status).setOverflowLabel(charlie, status); | |
236 | TEST_CHECK_STATUS; | |
237 | for (i=0; index->nextBucket(status); i++) { | |
238 | TEST_CHECK_STATUS; | |
239 | UnicodeString label = index->getBucketLabel(); | |
240 | UAlphabeticIndexLabelType type = index->getBucketLabelType(); | |
241 | if (i == 0) { | |
242 | TEST_ASSERT(type == U_ALPHAINDEX_UNDERFLOW); | |
243 | TEST_ASSERT(label == adam); | |
244 | } else if (i <= 26) { | |
245 | // Labels A - Z for English locale | |
246 | TEST_ASSERT(type == U_ALPHAINDEX_NORMAL); | |
247 | UnicodeString expectedLabel((UChar)(0x40 + i)); | |
248 | TEST_ASSERT(expectedLabel == label); | |
249 | } else if (i == 27) { | |
250 | TEST_ASSERT(type == U_ALPHAINDEX_OVERFLOW); | |
251 | TEST_ASSERT(label == charlie); | |
252 | } else { | |
253 | TEST_ASSERT(FALSE); | |
254 | } | |
255 | } | |
256 | TEST_ASSERT(i==28); | |
257 | delete index; | |
258 | ||
259 | // getBucketIndex() | |
260 | ||
261 | status = U_ZERO_ERROR; | |
262 | index = new AlphabeticIndex(Locale::getEnglish(), status); | |
263 | TEST_CHECK_STATUS; | |
264 | int32_t n = index->getBucketIndex(adam, status); | |
265 | TEST_CHECK_STATUS; | |
266 | TEST_ASSERT(n == 1); /* Label #0 is underflow, 1 is A, etc. */ | |
267 | n = index->getBucketIndex(baker, status); | |
268 | TEST_ASSERT(n == 2); | |
269 | n = index->getBucketIndex(Cyrillic, status); | |
270 | TEST_ASSERT(n == 27); // Overflow label | |
271 | n = index->getBucketIndex(zed, status); | |
272 | TEST_ASSERT(n == 26); | |
273 | ||
274 | for (i=0; index->nextBucket(status); i++) { | |
275 | n = index->getBucketIndex(); | |
276 | TEST_ASSERT(n == i); | |
277 | UnicodeString label = index->getBucketLabel(); | |
278 | TEST_ASSERT(n == i); | |
279 | } | |
280 | TEST_ASSERT(i == 28); | |
281 | ||
282 | delete index; | |
283 | index = new AlphabeticIndex(Locale::createFromName("ru"), status); | |
284 | //Locale loc = Locale::createFromName(localeName); | |
285 | TEST_CHECK_STATUS; | |
286 | n = index->getBucketIndex(adam, status); | |
287 | TEST_CHECK_STATUS; | |
288 | TEST_ASSERT(n == 0); // Label #0 is underflow | |
289 | n = index->getBucketIndex(baker, status); | |
290 | TEST_ASSERT(n == 0); | |
291 | n = index->getBucketIndex(Cyrillic, status); | |
292 | TEST_ASSERT(n == 1); // Overflow label | |
293 | n = index->getBucketIndex(zed, status); | |
294 | TEST_ASSERT(n == 0); | |
295 | ||
296 | delete index; | |
297 | ||
298 | } | |
299 | ||
300 | ||
301 | static const char * KEY_LOCALES[] = { | |
302 | "en", "es", "de", "fr", "ja", "it", "tr", "pt", "zh", "nl", | |
303 | "pl", "ar", "ru", "zh_Hant", "ko", "th", "sv", "fi", "da", | |
304 | "he", "nb", "el", "hr", "bg", "sk", "lt", "vi", "lv", "sr", | |
305 | "pt_PT", "ro", "hu", "cs", "id", "sl", "fil", "fa", "uk", | |
306 | "ca", "hi", "et", "eu", "is", "sw", "ms", "bn", "am", "ta", | |
307 | "te", "mr", "ur", "ml", "kn", "gu", "or", ""}; | |
308 | ||
309 | ||
310 | void AlphabeticIndexTest::ManyLocalesTest() { | |
311 | UErrorCode status = U_ZERO_ERROR; | |
312 | int32_t lc = 0; | |
313 | AlphabeticIndex *index = NULL; | |
314 | ||
315 | for (int i=0; ; ++i) { | |
316 | status = U_ZERO_ERROR; | |
317 | const char *localeName = KEY_LOCALES[i]; | |
318 | if (localeName[0] == 0) { | |
319 | break; | |
320 | } | |
321 | // std::cout << localeName << " "; | |
322 | Locale loc = Locale::createFromName(localeName); | |
323 | index = new AlphabeticIndex(loc, status); | |
324 | TEST_CHECK_STATUS; | |
325 | lc = index->getBucketCount(status); | |
326 | TEST_CHECK_STATUS; | |
327 | // std::cout << "getBucketCount() == " << lc << std::endl; | |
328 | ||
329 | while (index->nextBucket(status)) { | |
330 | TEST_CHECK_STATUS; | |
331 | const UnicodeString &label = index->getBucketLabel(); | |
332 | TEST_ASSERT(label.length()>0); | |
333 | // std::string ss; | |
334 | // std::cout << ":" << label.toUTF8String(ss); | |
335 | } | |
336 | // std::cout << ":" << std::endl; | |
337 | ||
338 | ||
339 | delete index; | |
340 | } | |
341 | } | |
342 | ||
343 | ||
344 | // Test data for Pinyin based indexes. | |
345 | // The Chinese characters should be distributed under latin labels in | |
346 | // an index. | |
347 | ||
348 | static const char *pinyinTestData[] = { | |
349 | "\\u0101", "\\u5416", "\\u58ba", // | |
350 | "b", "\\u516b", "\\u62d4", "\\u8500", // | |
351 | "c", "\\u5693", "\\u7938", "\\u9e7e", // | |
352 | "d", "\\u5491", "\\u8fcf", "\\u964a", // | |
353 | "\\u0113","\\u59b8", "\\u92e8", "\\u834b", // | |
354 | "f", "\\u53d1", "\\u9197", "\\u99a5", // | |
355 | "g", "\\u7324", "\\u91d3", "\\u8142", // | |
356 | "h", "\\u598e", "\\u927f", "\\u593b", // | |
357 | "j", "\\u4e0c", "\\u6785", "\\u9d58", // | |
358 | "k", "\\u5494", "\\u958b", "\\u7a52", // | |
359 | "l", "\\u5783", "\\u62c9", "\\u9ba5", // | |
360 | "m", "\\u5638", "\\u9ebb", "\\u65c0", // | |
361 | "n", "\\u62ff", "\\u80ad", "\\u685b", // | |
362 | "\\u014D", "\\u5662", "\\u6bee", "\\u8bb4", // | |
363 | "p", "\\u5991", "\\u8019", "\\u8c31", // | |
364 | "q", "\\u4e03", "\\u6053", "\\u7f56", // | |
365 | "r", "\\u5465", "\\u72aa", "\\u6e03", // | |
366 | "s", "\\u4ee8", "\\u9491", "\\u93c1", // | |
367 | "t", "\\u4ed6", "\\u9248", "\\u67dd", // | |
368 | "w", "\\u5c72", "\\u5558", "\\u5a7a", // | |
369 | "x", "\\u5915", "\\u5438", "\\u6bbe", // | |
370 | "y", "\\u4e2b", "\\u82bd", "\\u8574", // | |
371 | "z", "\\u5e00", "\\u707d", "\\u5c0a", | |
372 | NULL | |
373 | }; | |
374 | ||
375 | void AlphabeticIndexTest::HackPinyinTest() { | |
376 | UErrorCode status = U_ZERO_ERROR; | |
377 | AlphabeticIndex aindex(Locale::createFromName("zh"), status); | |
378 | TEST_CHECK_STATUS; | |
379 | ||
380 | UnicodeString names[sizeof(pinyinTestData) / sizeof(pinyinTestData[0])]; | |
381 | int32_t nameCount; | |
382 | for (nameCount=0; pinyinTestData[nameCount] != NULL; nameCount++) { | |
383 | names[nameCount] = UnicodeString(pinyinTestData[nameCount], -1, UnicodeString::kInvariant).unescape(); | |
384 | aindex.addRecord(names[nameCount], &names[nameCount], status); | |
385 | TEST_CHECK_STATUS; | |
386 | if (U_FAILURE(status)) { | |
387 | return; | |
388 | } | |
389 | } | |
390 | TEST_ASSERT(nameCount == aindex.getRecordCount(status)); | |
391 | ||
392 | // Weak checking: make sure that none of the Chinese names landed in the overflow bucket | |
393 | // of the index, and that the names are distributed among several buckets. | |
394 | // (Exact expected data would be subject to change with evolution of the collation rules.) | |
395 | ||
396 | int32_t bucketCount = 0; | |
397 | int32_t filledBucketCount = 0; | |
398 | while (aindex.nextBucket(status)) { | |
399 | bucketCount++; | |
400 | UnicodeString label = aindex.getBucketLabel(); | |
401 | // std::string s; | |
402 | // std::cout << label.toUTF8String(s) << ": "; | |
403 | ||
404 | UBool bucketHasContents = FALSE; | |
405 | while (aindex.nextRecord(status)) { | |
406 | bucketHasContents = TRUE; | |
407 | UnicodeString name = aindex.getRecordName(); | |
408 | if (aindex.getBucketLabelType() != U_ALPHAINDEX_NORMAL) { | |
409 | errln("File %s, Line %d, Name \"\\u%x\" is in an under or overflow bucket.", | |
410 | __FILE__, __LINE__, name.char32At(0)); | |
411 | } | |
412 | // s.clear(); | |
413 | // std::cout << aindex.getRecordName().toUTF8String(s) << " "; | |
414 | } | |
415 | if (bucketHasContents) { | |
416 | filledBucketCount++; | |
417 | } | |
418 | // std::cout << std::endl; | |
419 | } | |
420 | TEST_ASSERT(bucketCount > 25); | |
421 | TEST_ASSERT(filledBucketCount > 15); | |
422 | } | |
423 | ||
424 | ||
425 | void AlphabeticIndexTest::TestBug9009() { | |
426 | UErrorCode status = U_ZERO_ERROR; | |
427 | Locale loc("root"); | |
428 | AlphabeticIndex aindex(loc, status); | |
429 | TEST_CHECK_STATUS; | |
430 | aindex.nextBucket(status); // Crash here before bug was fixed. | |
431 | TEST_CHECK_STATUS; | |
432 | } | |
433 | ||
434 | ||
435 | #endif |