]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/citertst.c
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / citertst.c
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2016, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
7 *
8 * File CITERTST.C
9 *
10 * Modification History:
11 * Date Name Description
12 * Madhu Katragadda Ported for C API
13 * 02/19/01 synwee Modified test case for new collation iterator
14 *********************************************************************************/
15 /*
16 * Collation Iterator tests.
17 * (Let me reiterate my position...)
18 */
19
20 #include "unicode/utypes.h"
21
22 #if !UCONFIG_NO_COLLATION
23
24 #include "unicode/ucol.h"
25 #include "unicode/ucoleitr.h"
26 #include "unicode/uloc.h"
27 #include "unicode/uchar.h"
28 #include "unicode/ustring.h"
29 #include "unicode/putil.h"
30 #include "callcoll.h"
31 #include "cmemory.h"
32 #include "cintltst.h"
33 #include "citertst.h"
34 #include "ccolltst.h"
35 #include "filestrm.h"
36 #include "cstring.h"
37 #include "ucol_imp.h"
38 #include "uparse.h"
39 #include <stdio.h>
40
41 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
42
43 void addCollIterTest(TestNode** root)
44 {
45 addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
46 addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
47 addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
48 addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
49 addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
50 addTest(root, &TestNormalizedUnicodeChar,
51 "tscoll/citertst/TestNormalizedUnicodeChar");
52 addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
53 addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
54 addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
55 addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
56 addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
57 addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
58 }
59
60 /* The locales we support */
61
62 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
63
64 static void TestBug672() {
65 UErrorCode status = U_ZERO_ERROR;
66 UChar pattern[20];
67 UChar text[50];
68 int i;
69 int result[3][3];
70
71 u_uastrcpy(pattern, "resume");
72 u_uastrcpy(text, "Time to resume updating my resume.");
73
74 for (i = 0; i < 3; ++ i) {
75 UCollator *coll = ucol_open(LOCALES[i], &status);
76 UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
77 &status);
78 UCollationElements *titer = ucol_openElements(coll, text, -1,
79 &status);
80 if (U_FAILURE(status)) {
81 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
82 myErrorName(status));
83 return;
84 }
85
86 log_verbose("locale tested %s\n", LOCALES[i]);
87
88 while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
89 U_SUCCESS(status)) {
90 }
91 if (U_FAILURE(status)) {
92 log_err("ERROR: reversing collation iterator :%s\n",
93 myErrorName(status));
94 return;
95 }
96 ucol_reset(pitr);
97
98 ucol_setOffset(titer, u_strlen(pattern), &status);
99 if (U_FAILURE(status)) {
100 log_err("ERROR: setting offset in collator :%s\n",
101 myErrorName(status));
102 return;
103 }
104 result[i][0] = ucol_getOffset(titer);
105 log_verbose("Text iterator set to offset %d\n", result[i][0]);
106
107 /* Use previous() */
108 ucol_previous(titer, &status);
109 result[i][1] = ucol_getOffset(titer);
110 log_verbose("Current offset %d after previous\n", result[i][1]);
111
112 /* Add one to index */
113 log_verbose("Adding one to current offset...\n");
114 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
115 if (U_FAILURE(status)) {
116 log_err("ERROR: setting offset in collator :%s\n",
117 myErrorName(status));
118 return;
119 }
120 result[i][2] = ucol_getOffset(titer);
121 log_verbose("Current offset in text = %d\n", result[i][2]);
122 ucol_closeElements(pitr);
123 ucol_closeElements(titer);
124 ucol_close(coll);
125 }
126
127 if (uprv_memcmp(result[0], result[1], 3) != 0 ||
128 uprv_memcmp(result[1], result[2], 3) != 0) {
129 log_err("ERROR: Different locales have different offsets at the same character\n");
130 }
131 }
132
133
134
135 /* Running this test with normalization enabled showed up a bug in the incremental
136 normalization code. */
137 static void TestBug672Normalize() {
138 UErrorCode status = U_ZERO_ERROR;
139 UChar pattern[20];
140 UChar text[50];
141 int i;
142 int result[3][3];
143
144 u_uastrcpy(pattern, "resume");
145 u_uastrcpy(text, "Time to resume updating my resume.");
146
147 for (i = 0; i < 3; ++ i) {
148 UCollator *coll = ucol_open(LOCALES[i], &status);
149 UCollationElements *pitr = NULL;
150 UCollationElements *titer = NULL;
151
152 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
153
154 pitr = ucol_openElements(coll, pattern, -1, &status);
155 titer = ucol_openElements(coll, text, -1, &status);
156 if (U_FAILURE(status)) {
157 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
158 myErrorName(status));
159 return;
160 }
161
162 log_verbose("locale tested %s\n", LOCALES[i]);
163
164 while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
165 U_SUCCESS(status)) {
166 }
167 if (U_FAILURE(status)) {
168 log_err("ERROR: reversing collation iterator :%s\n",
169 myErrorName(status));
170 return;
171 }
172 ucol_reset(pitr);
173
174 ucol_setOffset(titer, u_strlen(pattern), &status);
175 if (U_FAILURE(status)) {
176 log_err("ERROR: setting offset in collator :%s\n",
177 myErrorName(status));
178 return;
179 }
180 result[i][0] = ucol_getOffset(titer);
181 log_verbose("Text iterator set to offset %d\n", result[i][0]);
182
183 /* Use previous() */
184 ucol_previous(titer, &status);
185 result[i][1] = ucol_getOffset(titer);
186 log_verbose("Current offset %d after previous\n", result[i][1]);
187
188 /* Add one to index */
189 log_verbose("Adding one to current offset...\n");
190 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
191 if (U_FAILURE(status)) {
192 log_err("ERROR: setting offset in collator :%s\n",
193 myErrorName(status));
194 return;
195 }
196 result[i][2] = ucol_getOffset(titer);
197 log_verbose("Current offset in text = %d\n", result[i][2]);
198 ucol_closeElements(pitr);
199 ucol_closeElements(titer);
200 ucol_close(coll);
201 }
202
203 if (uprv_memcmp(result[0], result[1], 3) != 0 ||
204 uprv_memcmp(result[1], result[2], 3) != 0) {
205 log_err("ERROR: Different locales have different offsets at the same character\n");
206 }
207 }
208
209
210
211
212 /**
213 * Test for CollationElementIterator previous and next for the whole set of
214 * unicode characters.
215 */
216 static void TestUnicodeChar()
217 {
218 UChar source[0x100];
219 UCollator *en_us;
220 UCollationElements *iter;
221 UErrorCode status = U_ZERO_ERROR;
222 UChar codepoint;
223
224 UChar *test;
225 en_us = ucol_open("en_US", &status);
226 if (U_FAILURE(status)){
227 log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
228 myErrorName(status));
229 return;
230 }
231
232 for (codepoint = 1; codepoint < 0xFFFE;)
233 {
234 test = source;
235
236 while (codepoint % 0xFF != 0)
237 {
238 if (u_isdefined(codepoint))
239 *(test ++) = codepoint;
240 codepoint ++;
241 }
242
243 if (u_isdefined(codepoint))
244 *(test ++) = codepoint;
245
246 if (codepoint != 0xFFFF)
247 codepoint ++;
248
249 *test = 0;
250 iter=ucol_openElements(en_us, source, u_strlen(source), &status);
251 if(U_FAILURE(status)){
252 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
253 myErrorName(status));
254 ucol_close(en_us);
255 return;
256 }
257 /* A basic test to see if it's working at all */
258 log_verbose("codepoint testing %x\n", codepoint);
259 backAndForth(iter);
260 ucol_closeElements(iter);
261
262 /* null termination test */
263 iter=ucol_openElements(en_us, source, -1, &status);
264 if(U_FAILURE(status)){
265 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
266 myErrorName(status));
267 ucol_close(en_us);
268 return;
269 }
270 /* A basic test to see if it's working at all */
271 backAndForth(iter);
272 ucol_closeElements(iter);
273 }
274
275 ucol_close(en_us);
276 }
277
278 /**
279 * Test for CollationElementIterator previous and next for the whole set of
280 * unicode characters with normalization on.
281 */
282 static void TestNormalizedUnicodeChar()
283 {
284 UChar source[0x100];
285 UCollator *th_th;
286 UCollationElements *iter;
287 UErrorCode status = U_ZERO_ERROR;
288 UChar codepoint;
289
290 UChar *test;
291 /* thai should have normalization on */
292 th_th = ucol_open("th_TH", &status);
293 if (U_FAILURE(status)){
294 log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
295 myErrorName(status));
296 return;
297 }
298
299 for (codepoint = 1; codepoint < 0xFFFE;)
300 {
301 test = source;
302
303 while (codepoint % 0xFF != 0)
304 {
305 if (u_isdefined(codepoint))
306 *(test ++) = codepoint;
307 codepoint ++;
308 }
309
310 if (u_isdefined(codepoint))
311 *(test ++) = codepoint;
312
313 if (codepoint != 0xFFFF)
314 codepoint ++;
315
316 *test = 0;
317 iter=ucol_openElements(th_th, source, u_strlen(source), &status);
318 if(U_FAILURE(status)){
319 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
320 myErrorName(status));
321 ucol_close(th_th);
322 return;
323 }
324
325 backAndForth(iter);
326 ucol_closeElements(iter);
327
328 iter=ucol_openElements(th_th, source, -1, &status);
329 if(U_FAILURE(status)){
330 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
331 myErrorName(status));
332 ucol_close(th_th);
333 return;
334 }
335
336 backAndForth(iter);
337 ucol_closeElements(iter);
338 }
339
340 ucol_close(th_th);
341 }
342
343 /**
344 * Test the incremental normalization
345 */
346 static void TestNormalization()
347 {
348 UErrorCode status = U_ZERO_ERROR;
349 const char *str =
350 "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
351 UCollator *coll;
352 UChar rule[50];
353 int rulelen = u_unescape(str, rule, 50);
354 int count = 0;
355 const char *testdata[] =
356 {"\\u1ED9", "o\\u0323\\u0302",
357 "\\u0300\\u0315", "\\u0315\\u0300",
358 "A\\u0300\\u0315B", "A\\u0315\\u0300B",
359 "A\\u0316\\u0315B", "A\\u0315\\u0316B",
360 "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
361 "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
362 "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
363 int32_t srclen;
364 UChar source[10];
365 UCollationElements *iter;
366
367 coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
368 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
369 if (U_FAILURE(status)){
370 log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
371 myErrorName(status));
372 return;
373 }
374
375 srclen = u_unescape(testdata[0], source, 10);
376 iter = ucol_openElements(coll, source, srclen, &status);
377 backAndForth(iter);
378 ucol_closeElements(iter);
379
380 srclen = u_unescape(testdata[1], source, 10);
381 iter = ucol_openElements(coll, source, srclen, &status);
382 backAndForth(iter);
383 ucol_closeElements(iter);
384
385 while (count < 12) {
386 srclen = u_unescape(testdata[count], source, 10);
387 iter = ucol_openElements(coll, source, srclen, &status);
388
389 if (U_FAILURE(status)){
390 log_err("ERROR: in creation of collator element iterator\n %s\n",
391 myErrorName(status));
392 return;
393 }
394 backAndForth(iter);
395 ucol_closeElements(iter);
396
397 iter = ucol_openElements(coll, source, -1, &status);
398
399 if (U_FAILURE(status)){
400 log_err("ERROR: in creation of collator element iterator\n %s\n",
401 myErrorName(status));
402 return;
403 }
404 backAndForth(iter);
405 ucol_closeElements(iter);
406 count ++;
407 }
408 ucol_close(coll);
409 }
410
411 /**
412 * Test for CollationElementIterator.previous()
413 *
414 * @bug 4108758 - Make sure it works with contracting characters
415 *
416 */
417 static void TestPrevious()
418 {
419 UCollator *coll=NULL;
420 UChar rule[50];
421 UChar *source;
422 UCollator *c1, *c2, *c3;
423 UCollationElements *iter;
424 UErrorCode status = U_ZERO_ERROR;
425 UChar test1[50];
426 UChar test2[50];
427
428 u_uastrcpy(test1, "What subset of all possible test cases?");
429 u_uastrcpy(test2, "has the highest probability of detecting");
430 coll = ucol_open("en_US", &status);
431
432 iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
433 log_verbose("English locale testing back and forth\n");
434 if(U_FAILURE(status)){
435 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
436 myErrorName(status));
437 ucol_close(coll);
438 return;
439 }
440 /* A basic test to see if it's working at all */
441 backAndForth(iter);
442 ucol_closeElements(iter);
443 ucol_close(coll);
444
445 /* Test with a contracting character sequence */
446 u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
447 c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
448
449 log_verbose("Contraction rule testing back and forth with no normalization\n");
450
451 if (c1 == NULL || U_FAILURE(status))
452 {
453 log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
454 myErrorName(status));
455 return;
456 }
457 source=(UChar*)malloc(sizeof(UChar) * 20);
458 u_uastrcpy(source, "abchdcba");
459 iter=ucol_openElements(c1, source, u_strlen(source), &status);
460 if(U_FAILURE(status)){
461 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
462 myErrorName(status));
463 return;
464 }
465 backAndForth(iter);
466 ucol_closeElements(iter);
467 ucol_close(c1);
468
469 /* Test with an expanding character sequence */
470 u_uastrcpy(rule, "&a < b < c/abd < d");
471 c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
472 log_verbose("Expansion rule testing back and forth with no normalization\n");
473 if (c2 == NULL || U_FAILURE(status))
474 {
475 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
476 myErrorName(status));
477 return;
478 }
479 u_uastrcpy(source, "abcd");
480 iter=ucol_openElements(c2, source, u_strlen(source), &status);
481 if(U_FAILURE(status)){
482 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
483 myErrorName(status));
484 return;
485 }
486 backAndForth(iter);
487 ucol_closeElements(iter);
488 ucol_close(c2);
489 /* Now try both */
490 u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
491 c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,NULL, &status);
492 log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
493
494 if (c3 == NULL || U_FAILURE(status))
495 {
496 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
497 myErrorName(status));
498 return;
499 }
500 u_uastrcpy(source, "abcdbchdc");
501 iter=ucol_openElements(c3, source, u_strlen(source), &status);
502 if(U_FAILURE(status)){
503 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
504 myErrorName(status));
505 return;
506 }
507 backAndForth(iter);
508 ucol_closeElements(iter);
509 ucol_close(c3);
510 source[0] = 0x0e41;
511 source[1] = 0x0e02;
512 source[2] = 0x0e41;
513 source[3] = 0x0e02;
514 source[4] = 0x0e27;
515 source[5] = 0x61;
516 source[6] = 0x62;
517 source[7] = 0x63;
518 source[8] = 0;
519
520 coll = ucol_open("th_TH", &status);
521 log_verbose("Thai locale testing back and forth with normalization\n");
522 iter=ucol_openElements(coll, source, u_strlen(source), &status);
523 if(U_FAILURE(status)){
524 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
525 myErrorName(status));
526 return;
527 }
528 backAndForth(iter);
529 ucol_closeElements(iter);
530 ucol_close(coll);
531
532 /* prev test */
533 source[0] = 0x0061;
534 source[1] = 0x30CF;
535 source[2] = 0x3099;
536 source[3] = 0x30FC;
537 source[4] = 0;
538
539 coll = ucol_open("ja_JP", &status);
540 log_verbose("Japanese locale testing back and forth with normalization\n");
541 iter=ucol_openElements(coll, source, u_strlen(source), &status);
542 if(U_FAILURE(status)){
543 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
544 myErrorName(status));
545 return;
546 }
547 backAndForth(iter);
548 ucol_closeElements(iter);
549 ucol_close(coll);
550
551 free(source);
552 }
553
554 /**
555 * Test for getOffset() and setOffset()
556 */
557 static void TestOffset()
558 {
559 UErrorCode status= U_ZERO_ERROR;
560 UCollator *en_us=NULL;
561 UCollationElements *iter, *pristine;
562 int32_t offset;
563 OrderAndOffset *orders;
564 int32_t orderLength=0;
565 int count = 0;
566 UChar test1[50];
567 UChar test2[50];
568
569 u_uastrcpy(test1, "What subset of all possible test cases?");
570 u_uastrcpy(test2, "has the highest probability of detecting");
571 en_us = ucol_open("en_US", &status);
572 log_verbose("Testing getOffset and setOffset for collations\n");
573 iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
574 if(U_FAILURE(status)){
575 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
576 myErrorName(status));
577 ucol_close(en_us);
578 return;
579 }
580
581 /* testing boundaries */
582 ucol_setOffset(iter, 0, &status);
583 if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
584 log_err("Error: After setting offset to 0, we should be at the end "
585 "of the backwards iteration");
586 }
587 ucol_setOffset(iter, u_strlen(test1), &status);
588 if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
589 log_err("Error: After setting offset to end of the string, we should "
590 "be at the end of the backwards iteration");
591 }
592
593 /* Run all the way through the iterator, then get the offset */
594
595 orders = getOrders(iter, &orderLength);
596
597 offset = ucol_getOffset(iter);
598
599 if (offset != u_strlen(test1))
600 {
601 log_err("offset at end != length %d vs %d\n", offset,
602 u_strlen(test1) );
603 }
604
605 /* Now set the offset back to the beginning and see if it works */
606 pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
607 if(U_FAILURE(status)){
608 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
609 myErrorName(status));
610 ucol_close(en_us);
611 return;
612 }
613 status = U_ZERO_ERROR;
614
615 ucol_setOffset(iter, 0, &status);
616 if (U_FAILURE(status))
617 {
618 log_err("setOffset failed. %s\n", myErrorName(status));
619 }
620 else
621 {
622 assertEqual(iter, pristine);
623 }
624
625 ucol_closeElements(pristine);
626 ucol_closeElements(iter);
627 free(orders);
628
629 /* testing offsets in normalization buffer */
630 test1[0] = 0x61;
631 test1[1] = 0x300;
632 test1[2] = 0x316;
633 test1[3] = 0x62;
634 test1[4] = 0;
635 ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
636 iter = ucol_openElements(en_us, test1, 4, &status);
637 if(U_FAILURE(status)){
638 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
639 myErrorName(status));
640 ucol_close(en_us);
641 return;
642 }
643
644 count = 0;
645 while (ucol_next(iter, &status) != UCOL_NULLORDER &&
646 U_SUCCESS(status)) {
647 switch (count) {
648 case 0:
649 if (ucol_getOffset(iter) != 1) {
650 log_err("ERROR: Offset of iteration should be 1\n");
651 }
652 break;
653 case 3:
654 if (ucol_getOffset(iter) != 4) {
655 log_err("ERROR: Offset of iteration should be 4\n");
656 }
657 break;
658 default:
659 if (ucol_getOffset(iter) != 3) {
660 log_err("ERROR: Offset of iteration should be 3\n");
661 }
662 }
663 count ++;
664 }
665
666 ucol_reset(iter);
667 count = 0;
668 while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
669 U_SUCCESS(status)) {
670 switch (count) {
671 case 0:
672 case 1:
673 if (ucol_getOffset(iter) != 3) {
674 log_err("ERROR: Offset of iteration should be 3\n");
675 }
676 break;
677 case 2:
678 if (ucol_getOffset(iter) != 1) {
679 log_err("ERROR: Offset of iteration should be 1\n");
680 }
681 break;
682 default:
683 if (ucol_getOffset(iter) != 0) {
684 log_err("ERROR: Offset of iteration should be 0\n");
685 }
686 }
687 count ++;
688 }
689
690 if(U_FAILURE(status)){
691 log_err("ERROR: in iterating collation elements %s\n",
692 myErrorName(status));
693 }
694
695 ucol_closeElements(iter);
696 ucol_close(en_us);
697 }
698
699 /**
700 * Test for setText()
701 */
702 static void TestSetText()
703 {
704 int32_t c,i;
705 UErrorCode status = U_ZERO_ERROR;
706 UCollator *en_us=NULL;
707 UCollationElements *iter1, *iter2;
708 UChar test1[50];
709 UChar test2[50];
710
711 u_uastrcpy(test1, "What subset of all possible test cases?");
712 u_uastrcpy(test2, "has the highest probability of detecting");
713 en_us = ucol_open("en_US", &status);
714 log_verbose("testing setText for Collation elements\n");
715 iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
716 if(U_FAILURE(status)){
717 log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
718 myErrorName(status));
719 ucol_close(en_us);
720 return;
721 }
722 iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
723 if(U_FAILURE(status)){
724 log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
725 myErrorName(status));
726 ucol_close(en_us);
727 return;
728 }
729
730 /* Run through the second iterator just to exercise it */
731 c = ucol_next(iter2, &status);
732 i = 0;
733
734 while ( ++i < 10 && (c != UCOL_NULLORDER))
735 {
736 if (U_FAILURE(status))
737 {
738 log_err("iter2->next() returned an error. %s\n", myErrorName(status));
739 ucol_closeElements(iter2);
740 ucol_closeElements(iter1);
741 ucol_close(en_us);
742 return;
743 }
744
745 c = ucol_next(iter2, &status);
746 }
747
748 /* Now set it to point to the same string as the first iterator */
749 ucol_setText(iter2, test1, u_strlen(test1), &status);
750 if (U_FAILURE(status))
751 {
752 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
753 }
754 else
755 {
756 assertEqual(iter1, iter2);
757 }
758
759 /* Now set it to point to a null string with fake length*/
760 ucol_setText(iter2, NULL, 2, &status);
761 if (status != U_ILLEGAL_ARGUMENT_ERROR)
762 {
763 log_err("call to iter2->setText(null, 2) should yield an illegal-argument-error - %s\n",
764 myErrorName(status));
765 }
766
767 ucol_closeElements(iter2);
768 ucol_closeElements(iter1);
769 ucol_close(en_us);
770 }
771
772 /** @bug 4108762
773 * Test for getMaxExpansion()
774 */
775 static void TestMaxExpansion()
776 {
777 UErrorCode status = U_ZERO_ERROR;
778 UCollator *coll ;/*= ucol_open("en_US", &status);*/
779 UChar ch = 0;
780 UChar32 unassigned = 0xEFFFD;
781 UChar supplementary[2];
782 uint32_t stringOffset = 0;
783 UBool isError = FALSE;
784 uint32_t sorder = 0;
785 UCollationElements *iter ;/*= ucol_openElements(coll, &ch, 1, &status);*/
786 uint32_t temporder = 0;
787
788 UChar rule[256];
789 u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
790 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
791 UCOL_DEFAULT_STRENGTH,NULL, &status);
792 if(U_SUCCESS(status) && coll) {
793 iter = ucol_openElements(coll, &ch, 1, &status);
794
795 while (ch < 0xFFFF && U_SUCCESS(status)) {
796 int count = 1;
797 uint32_t order;
798 int32_t size = 0;
799
800 ch ++;
801
802 ucol_setText(iter, &ch, 1, &status);
803 order = ucol_previous(iter, &status);
804
805 /* thai management */
806 if (order == 0)
807 order = ucol_previous(iter, &status);
808
809 while (U_SUCCESS(status) &&
810 ucol_previous(iter, &status) != UCOL_NULLORDER) {
811 count ++;
812 }
813
814 size = ucol_getMaxExpansion(iter, order);
815 if (U_FAILURE(status) || size < count) {
816 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
817 ch, count);
818 }
819 }
820
821 /* testing for exact max expansion */
822 ch = 0;
823 while (ch < 0x61) {
824 uint32_t order;
825 int32_t size;
826 ucol_setText(iter, &ch, 1, &status);
827 order = ucol_previous(iter, &status);
828 size = ucol_getMaxExpansion(iter, order);
829 if (U_FAILURE(status) || size != 1) {
830 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
831 ch, 1);
832 }
833 ch ++;
834 }
835
836 ch = 0x63;
837 ucol_setText(iter, &ch, 1, &status);
838 temporder = ucol_previous(iter, &status);
839
840 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
841 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
842 ch, 3);
843 }
844
845 ch = 0x64;
846 ucol_setText(iter, &ch, 1, &status);
847 temporder = ucol_previous(iter, &status);
848
849 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
850 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
851 ch, 3);
852 }
853
854 U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
855 (void)isError; /* Suppress set but not used warning. */
856 ucol_setText(iter, supplementary, 2, &status);
857 sorder = ucol_previous(iter, &status);
858
859 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
860 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
861 ch, 2);
862 }
863
864 /* testing jamo */
865 ch = 0x1165;
866
867 ucol_setText(iter, &ch, 1, &status);
868 temporder = ucol_previous(iter, &status);
869 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
870 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
871 ch, 3);
872 }
873
874 ucol_closeElements(iter);
875 ucol_close(coll);
876
877 /* testing special jamo &a<\u1160 */
878 rule[0] = 0x26;
879 rule[1] = 0x71;
880 rule[2] = 0x3c;
881 rule[3] = 0x1165;
882 rule[4] = 0x2f;
883 rule[5] = 0x71;
884 rule[6] = 0x71;
885 rule[7] = 0x71;
886 rule[8] = 0x71;
887 rule[9] = 0;
888
889 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
890 UCOL_DEFAULT_STRENGTH,NULL, &status);
891 iter = ucol_openElements(coll, &ch, 1, &status);
892
893 temporder = ucol_previous(iter, &status);
894 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
895 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
896 ch, 5);
897 }
898
899 ucol_closeElements(iter);
900 ucol_close(coll);
901 } else {
902 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
903 }
904
905 }
906
907
908 static void assertEqual(UCollationElements *i1, UCollationElements *i2)
909 {
910 int32_t c1, c2;
911 int32_t count = 0;
912 UErrorCode status = U_ZERO_ERROR;
913
914 do
915 {
916 c1 = ucol_next(i1, &status);
917 c2 = ucol_next(i2, &status);
918
919 if (c1 != c2)
920 {
921 log_err("Error in iteration %d assetEqual between\n %d and %d, they are not equal\n", count, c1, c2);
922 break;
923 }
924
925 count += 1;
926 }
927 while (c1 != UCOL_NULLORDER);
928 }
929
930 /**
931 * Testing iterators with extremely small buffers
932 */
933 static void TestSmallBuffer()
934 {
935 UErrorCode status = U_ZERO_ERROR;
936 UCollator *coll;
937 UCollationElements *testiter,
938 *iter;
939 int32_t count = 0;
940 OrderAndOffset *testorders,
941 *orders;
942
943 UChar teststr[500];
944 UChar str[] = {0x300, 0x31A, 0};
945 /*
946 creating a long string of decomposable characters,
947 since by default the writable buffer is of size 256
948 */
949 while (count < 500) {
950 if ((count & 1) == 0) {
951 teststr[count ++] = 0x300;
952 }
953 else {
954 teststr[count ++] = 0x31A;
955 }
956 }
957
958 coll = ucol_open("th_TH", &status);
959 if(U_SUCCESS(status) && coll) {
960 testiter = ucol_openElements(coll, teststr, 500, &status);
961 iter = ucol_openElements(coll, str, 2, &status);
962
963 orders = getOrders(iter, &count);
964 if (count != 2) {
965 log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
966 }
967
968 /*
969 this will rearrange the string data to 250 characters of 0x300 first then
970 250 characters of 0x031A
971 */
972 testorders = getOrders(testiter, &count);
973
974 if (count != 500) {
975 log_err("Error decomposition does not give the right sized collation elements\n");
976 }
977
978 while (count != 0) {
979 /* UCA collation element for 0x0F76 */
980 if ((count > 250 && testorders[-- count].order != orders[1].order) ||
981 (count <= 250 && testorders[-- count].order != orders[0].order)) {
982 log_err("Error decomposition does not give the right collation element at %d count\n", count);
983 break;
984 }
985 }
986
987 free(testorders);
988 free(orders);
989
990 ucol_reset(testiter);
991
992 /* ensures closing of elements done properly to clear writable buffer */
993 ucol_next(testiter, &status);
994 ucol_next(testiter, &status);
995 ucol_closeElements(testiter);
996 ucol_closeElements(iter);
997 ucol_close(coll);
998 } else {
999 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
1000 }
1001 }
1002
1003 /**
1004 * Testing the discontigous contractions
1005 */
1006 static void TestDiscontiguos() {
1007 const char *rulestr =
1008 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1009 UChar rule[50];
1010 int rulelen = u_unescape(rulestr, rule, 50);
1011 const char *src[] = {
1012 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1013 /* base character blocked */
1014 "XD\\u0300", "XD\\u0300\\u0315",
1015 /* non blocking combining character */
1016 "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1017 /* blocking combining character */
1018 "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1019 /* contraction prefix */
1020 "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1021 "X\\u0300\\u031A\\u0315",
1022 /* ends not with a contraction character */
1023 "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1024 "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1025 };
1026 const char *tgt[] = {
1027 /* non blocking combining character */
1028 "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1029 /* base character blocked */
1030 "X D \\u0300", "X D \\u0300\\u0315",
1031 /* non blocking combining character */
1032 "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1033 /* blocking combining character */
1034 "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1035 /* contraction prefix */
1036 "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1037 "X\\u0300 \\u031A \\u0315",
1038 /* ends not with a contraction character */
1039 "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1040 "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1041 };
1042 int size = 20;
1043 UCollator *coll;
1044 UErrorCode status = U_ZERO_ERROR;
1045 int count = 0;
1046 UCollationElements *iter;
1047 UCollationElements *resultiter;
1048
1049 coll = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
1050 iter = ucol_openElements(coll, rule, 1, &status);
1051 resultiter = ucol_openElements(coll, rule, 1, &status);
1052
1053 if (U_FAILURE(status)) {
1054 log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
1055 return;
1056 }
1057
1058 while (count < size) {
1059 UChar str[20];
1060 UChar tstr[20];
1061 int strLen = u_unescape(src[count], str, 20);
1062 UChar *s;
1063
1064 ucol_setText(iter, str, strLen, &status);
1065 if (U_FAILURE(status)) {
1066 log_err("Error opening collation iterator\n");
1067 return;
1068 }
1069
1070 u_unescape(tgt[count], tstr, 20);
1071 s = tstr;
1072
1073 log_verbose("count %d\n", count);
1074
1075 for (;;) {
1076 uint32_t ce;
1077 UChar *e = u_strchr(s, 0x20);
1078 if (e == 0) {
1079 e = u_strchr(s, 0);
1080 }
1081 ucol_setText(resultiter, s, (int32_t)(e - s), &status);
1082 ce = ucol_next(resultiter, &status);
1083 if (U_FAILURE(status)) {
1084 log_err("Error manipulating collation iterator\n");
1085 return;
1086 }
1087 while (ce != UCOL_NULLORDER) {
1088 if (ce != (uint32_t)ucol_next(iter, &status) ||
1089 U_FAILURE(status)) {
1090 log_err("Discontiguos contraction test mismatch\n");
1091 return;
1092 }
1093 ce = ucol_next(resultiter, &status);
1094 if (U_FAILURE(status)) {
1095 log_err("Error getting next collation element\n");
1096 return;
1097 }
1098 }
1099 s = e + 1;
1100 if (*e == 0) {
1101 break;
1102 }
1103 }
1104 ucol_reset(iter);
1105 backAndForth(iter);
1106 count ++;
1107 }
1108 ucol_closeElements(resultiter);
1109 ucol_closeElements(iter);
1110 ucol_close(coll);
1111 }
1112
1113 /**
1114 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
1115 * normalization on AND jamo tailoring, among other things.
1116 *
1117 * Note: This test is sensitive to changes of the root collator,
1118 * for example whether the ae-ligature maps to three CEs (as in the DUCET)
1119 * or to two CEs (as in the CLDR 24 FractionalUCA.txt).
1120 * It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding.
1121 * For example, the DUCET's artificial secondary CE in the ae-ligature
1122 * may map to two 32-bit iterator CEs (as it did until ICU 52).
1123 */
1124 static const UChar tsceText[] = { /* Nothing in here should be ignorable */
1125 0x0020, 0xAC00, /* simple LV Hangul */
1126 0x0020, 0xAC01, /* simple LVT Hangul */
1127 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */
1128 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search */
1129 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
1130 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
1131 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
1132 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
1133 0x0020, 0x00E6, /* small letter ae, expands */
1134 0x0020, 0x1E4D, /* small letter o with tilde and acute, decomposes */
1135 0x0020
1136 };
1137 enum { kLen_tsceText = UPRV_LENGTHOF(tsceText) };
1138
1139 static const int32_t rootStandardOffsets[] = {
1140 0, 1,2,
1141 2, 3,4,4,
1142 4, 5,6,6,
1143 6, 7,8,8,
1144 8, 9,10,11,
1145 12, 13,14,15,
1146 16, 17,18,19,
1147 20, 21,22,23,
1148 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1149 26, 27,28,28,
1150 28,
1151 29
1152 };
1153 enum { kLen_rootStandardOffsets = UPRV_LENGTHOF(rootStandardOffsets) };
1154
1155 static const int32_t rootSearchOffsets[] = {
1156 0, 1,2,
1157 2, 3,4,4,
1158 4, 5,6,6,6,
1159 6, 7,8,8,8,8,8,8,
1160 8, 9,10,11,
1161 12, 13,14,15,
1162 16, 17,18,19,20,
1163 20, 21,22,22,23,23,23,24,
1164 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1165 26, 27,28,28,
1166 28,
1167 29
1168 };
1169 enum { kLen_rootSearchOffsets = UPRV_LENGTHOF(rootSearchOffsets) };
1170
1171 typedef struct {
1172 const char * locale;
1173 const int32_t * offsets;
1174 int32_t offsetsLen;
1175 } TSCEItem;
1176
1177 static const TSCEItem tsceItems[] = {
1178 { "root", rootStandardOffsets, kLen_rootStandardOffsets },
1179 { "root@collation=search", rootSearchOffsets, kLen_rootSearchOffsets },
1180 { NULL, NULL, 0 }
1181 };
1182
1183 static void TestSearchCollatorElements(void)
1184 {
1185 const TSCEItem * tsceItemPtr;
1186 for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
1187 UErrorCode status = U_ZERO_ERROR;
1188 UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
1189 if ( U_SUCCESS(status) ) {
1190 UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);
1191 if ( U_SUCCESS(status) ) {
1192 int32_t offset, element;
1193 const int32_t * nextOffsetPtr;
1194 const int32_t * limitOffsetPtr;
1195
1196 nextOffsetPtr = tsceItemPtr->offsets;
1197 limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
1198 do {
1199 offset = ucol_getOffset(uce);
1200 element = ucol_next(uce, &status);
1201 log_verbose("(%s) offset=%2d ce=%08x\n", tsceItemPtr->locale, offset, element);
1202 if ( element == 0 ) {
1203 log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
1204 }
1205 if ( nextOffsetPtr < limitOffsetPtr ) {
1206 if (offset != *nextOffsetPtr) {
1207 log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
1208 tsceItemPtr->locale, *nextOffsetPtr, offset );
1209 nextOffsetPtr = limitOffsetPtr;
1210 break;
1211 }
1212 nextOffsetPtr++;
1213 } else {
1214 log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );
1215 }
1216 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
1217 if ( nextOffsetPtr < limitOffsetPtr ) {
1218 log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );
1219 }
1220
1221 ucol_setOffset(uce, kLen_tsceText, &status);
1222 status = U_ZERO_ERROR;
1223 nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
1224 limitOffsetPtr = tsceItemPtr->offsets;
1225 do {
1226 offset = ucol_getOffset(uce);
1227 element = ucol_previous(uce, &status);
1228 if ( element == 0 ) {
1229 log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );
1230 }
1231 if ( nextOffsetPtr > limitOffsetPtr ) {
1232 nextOffsetPtr--;
1233 if (offset != *nextOffsetPtr) {
1234 log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
1235 tsceItemPtr->locale, *nextOffsetPtr, offset );
1236 nextOffsetPtr = limitOffsetPtr;
1237 break;
1238 }
1239 } else {
1240 log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
1241 }
1242 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
1243 if ( nextOffsetPtr > limitOffsetPtr ) {
1244 log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );
1245 }
1246
1247 ucol_closeElements(uce);
1248 } else {
1249 log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
1250 }
1251 ucol_close(ucol);
1252 } else {
1253 log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
1254 }
1255 }
1256 }
1257
1258 #endif /* #if !UCONFIG_NO_COLLATION */