]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/citertst.c
ICU-59117.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / citertst.c
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/********************************************************************
4 * COPYRIGHT:
2ca993e8 5 * Copyright (c) 1997-2016, International Business Machines Corporation and
b75a7d8f
A
6 * others. All Rights Reserved.
7 ********************************************************************/
8/********************************************************************************
9*
10* File CITERTST.C
11*
12* Modification History:
13* Date Name Description
14* Madhu Katragadda Ported for C API
15* 02/19/01 synwee Modified test case for new collation iterator
16*********************************************************************************/
17/*
18 * Collation Iterator tests.
19 * (Let me reiterate my position...)
20 */
21
22#include "unicode/utypes.h"
23
24#if !UCONFIG_NO_COLLATION
25
26#include "unicode/ucol.h"
729e4ab9 27#include "unicode/ucoleitr.h"
b75a7d8f
A
28#include "unicode/uloc.h"
29#include "unicode/uchar.h"
30#include "unicode/ustring.h"
374ca955
A
31#include "unicode/putil.h"
32#include "callcoll.h"
b75a7d8f
A
33#include "cmemory.h"
34#include "cintltst.h"
35#include "citertst.h"
36#include "ccolltst.h"
37#include "filestrm.h"
38#include "cstring.h"
39#include "ucol_imp.h"
729e4ab9 40#include "uparse.h"
b75a7d8f
A
41#include <stdio.h>
42
43extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
44
45void addCollIterTest(TestNode** root)
46{
47 addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
48 addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
49 addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
50 addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
51 addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
52 addTest(root, &TestNormalizedUnicodeChar,
53 "tscoll/citertst/TestNormalizedUnicodeChar");
54 addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
55 addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
56 addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
57 addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
b75a7d8f 58 addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
729e4ab9 59 addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
b75a7d8f
A
60}
61
62/* The locales we support */
63
64static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
65
66static void TestBug672() {
67 UErrorCode status = U_ZERO_ERROR;
68 UChar pattern[20];
69 UChar text[50];
70 int i;
71 int result[3][3];
72
73 u_uastrcpy(pattern, "resume");
74 u_uastrcpy(text, "Time to resume updating my resume.");
75
76 for (i = 0; i < 3; ++ i) {
77 UCollator *coll = ucol_open(LOCALES[i], &status);
78 UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
79 &status);
80 UCollationElements *titer = ucol_openElements(coll, text, -1,
81 &status);
82 if (U_FAILURE(status)) {
729e4ab9 83 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
b75a7d8f
A
84 myErrorName(status));
85 return;
86 }
87
88 log_verbose("locale tested %s\n", LOCALES[i]);
89
90 while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
91 U_SUCCESS(status)) {
92 }
93 if (U_FAILURE(status)) {
94 log_err("ERROR: reversing collation iterator :%s\n",
95 myErrorName(status));
96 return;
97 }
98 ucol_reset(pitr);
99
100 ucol_setOffset(titer, u_strlen(pattern), &status);
101 if (U_FAILURE(status)) {
102 log_err("ERROR: setting offset in collator :%s\n",
103 myErrorName(status));
104 return;
105 }
106 result[i][0] = ucol_getOffset(titer);
107 log_verbose("Text iterator set to offset %d\n", result[i][0]);
108
109 /* Use previous() */
110 ucol_previous(titer, &status);
111 result[i][1] = ucol_getOffset(titer);
112 log_verbose("Current offset %d after previous\n", result[i][1]);
113
114 /* Add one to index */
115 log_verbose("Adding one to current offset...\n");
116 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
117 if (U_FAILURE(status)) {
118 log_err("ERROR: setting offset in collator :%s\n",
119 myErrorName(status));
120 return;
121 }
122 result[i][2] = ucol_getOffset(titer);
123 log_verbose("Current offset in text = %d\n", result[i][2]);
124 ucol_closeElements(pitr);
125 ucol_closeElements(titer);
126 ucol_close(coll);
127 }
128
129 if (uprv_memcmp(result[0], result[1], 3) != 0 ||
130 uprv_memcmp(result[1], result[2], 3) != 0) {
131 log_err("ERROR: Different locales have different offsets at the same character\n");
132 }
133}
134
135
136
137/* Running this test with normalization enabled showed up a bug in the incremental
138 normalization code. */
139static void TestBug672Normalize() {
140 UErrorCode status = U_ZERO_ERROR;
141 UChar pattern[20];
142 UChar text[50];
143 int i;
144 int result[3][3];
145
146 u_uastrcpy(pattern, "resume");
147 u_uastrcpy(text, "Time to resume updating my resume.");
148
149 for (i = 0; i < 3; ++ i) {
150 UCollator *coll = ucol_open(LOCALES[i], &status);
151 UCollationElements *pitr = NULL;
152 UCollationElements *titer = NULL;
153
154 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
155
156 pitr = ucol_openElements(coll, pattern, -1, &status);
157 titer = ucol_openElements(coll, text, -1, &status);
158 if (U_FAILURE(status)) {
729e4ab9 159 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
b75a7d8f
A
160 myErrorName(status));
161 return;
162 }
163
164 log_verbose("locale tested %s\n", LOCALES[i]);
165
166 while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
167 U_SUCCESS(status)) {
168 }
169 if (U_FAILURE(status)) {
170 log_err("ERROR: reversing collation iterator :%s\n",
171 myErrorName(status));
172 return;
173 }
174 ucol_reset(pitr);
175
176 ucol_setOffset(titer, u_strlen(pattern), &status);
177 if (U_FAILURE(status)) {
178 log_err("ERROR: setting offset in collator :%s\n",
179 myErrorName(status));
180 return;
181 }
182 result[i][0] = ucol_getOffset(titer);
183 log_verbose("Text iterator set to offset %d\n", result[i][0]);
184
185 /* Use previous() */
186 ucol_previous(titer, &status);
187 result[i][1] = ucol_getOffset(titer);
188 log_verbose("Current offset %d after previous\n", result[i][1]);
189
190 /* Add one to index */
191 log_verbose("Adding one to current offset...\n");
192 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
193 if (U_FAILURE(status)) {
194 log_err("ERROR: setting offset in collator :%s\n",
195 myErrorName(status));
196 return;
197 }
198 result[i][2] = ucol_getOffset(titer);
199 log_verbose("Current offset in text = %d\n", result[i][2]);
200 ucol_closeElements(pitr);
201 ucol_closeElements(titer);
202 ucol_close(coll);
203 }
204
205 if (uprv_memcmp(result[0], result[1], 3) != 0 ||
206 uprv_memcmp(result[1], result[2], 3) != 0) {
207 log_err("ERROR: Different locales have different offsets at the same character\n");
208 }
209}
210
211
212
213
214/**
215 * Test for CollationElementIterator previous and next for the whole set of
216 * unicode characters.
217 */
218static void TestUnicodeChar()
219{
220 UChar source[0x100];
221 UCollator *en_us;
222 UCollationElements *iter;
223 UErrorCode status = U_ZERO_ERROR;
224 UChar codepoint;
225
226 UChar *test;
227 en_us = ucol_open("en_US", &status);
228 if (U_FAILURE(status)){
729e4ab9 229 log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
b75a7d8f
A
230 myErrorName(status));
231 return;
232 }
233
234 for (codepoint = 1; codepoint < 0xFFFE;)
235 {
236 test = source;
237
238 while (codepoint % 0xFF != 0)
239 {
240 if (u_isdefined(codepoint))
241 *(test ++) = codepoint;
242 codepoint ++;
243 }
244
245 if (u_isdefined(codepoint))
246 *(test ++) = codepoint;
247
248 if (codepoint != 0xFFFF)
249 codepoint ++;
250
251 *test = 0;
252 iter=ucol_openElements(en_us, source, u_strlen(source), &status);
253 if(U_FAILURE(status)){
254 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
255 myErrorName(status));
256 ucol_close(en_us);
257 return;
258 }
259 /* A basic test to see if it's working at all */
260 log_verbose("codepoint testing %x\n", codepoint);
261 backAndForth(iter);
262 ucol_closeElements(iter);
263
264 /* null termination test */
265 iter=ucol_openElements(en_us, source, -1, &status);
266 if(U_FAILURE(status)){
267 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
268 myErrorName(status));
269 ucol_close(en_us);
270 return;
271 }
272 /* A basic test to see if it's working at all */
273 backAndForth(iter);
274 ucol_closeElements(iter);
275 }
276
277 ucol_close(en_us);
278}
279
280/**
281 * Test for CollationElementIterator previous and next for the whole set of
282 * unicode characters with normalization on.
283 */
284static void TestNormalizedUnicodeChar()
285{
286 UChar source[0x100];
287 UCollator *th_th;
288 UCollationElements *iter;
289 UErrorCode status = U_ZERO_ERROR;
290 UChar codepoint;
291
292 UChar *test;
293 /* thai should have normalization on */
294 th_th = ucol_open("th_TH", &status);
295 if (U_FAILURE(status)){
729e4ab9 296 log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
b75a7d8f
A
297 myErrorName(status));
298 return;
299 }
300
301 for (codepoint = 1; codepoint < 0xFFFE;)
302 {
303 test = source;
304
305 while (codepoint % 0xFF != 0)
306 {
307 if (u_isdefined(codepoint))
308 *(test ++) = codepoint;
309 codepoint ++;
310 }
311
312 if (u_isdefined(codepoint))
313 *(test ++) = codepoint;
314
315 if (codepoint != 0xFFFF)
316 codepoint ++;
317
318 *test = 0;
319 iter=ucol_openElements(th_th, source, u_strlen(source), &status);
320 if(U_FAILURE(status)){
321 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
322 myErrorName(status));
323 ucol_close(th_th);
324 return;
325 }
326
327 backAndForth(iter);
328 ucol_closeElements(iter);
329
330 iter=ucol_openElements(th_th, source, -1, &status);
331 if(U_FAILURE(status)){
332 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
333 myErrorName(status));
334 ucol_close(th_th);
335 return;
336 }
337
338 backAndForth(iter);
339 ucol_closeElements(iter);
340 }
341
342 ucol_close(th_th);
343}
344
345/**
346* Test the incremental normalization
347*/
348static void TestNormalization()
349{
350 UErrorCode status = U_ZERO_ERROR;
351 const char *str =
352 "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
353 UCollator *coll;
354 UChar rule[50];
355 int rulelen = u_unescape(str, rule, 50);
356 int count = 0;
357 const char *testdata[] =
358 {"\\u1ED9", "o\\u0323\\u0302",
359 "\\u0300\\u0315", "\\u0315\\u0300",
360 "A\\u0300\\u0315B", "A\\u0315\\u0300B",
361 "A\\u0316\\u0315B", "A\\u0315\\u0316B",
362 "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
363 "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
364 "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
365 int32_t srclen;
366 UChar source[10];
367 UCollationElements *iter;
368
369 coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
370 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
371 if (U_FAILURE(status)){
729e4ab9 372 log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
b75a7d8f
A
373 myErrorName(status));
374 return;
375 }
376
377 srclen = u_unescape(testdata[0], source, 10);
378 iter = ucol_openElements(coll, source, srclen, &status);
379 backAndForth(iter);
380 ucol_closeElements(iter);
381
382 srclen = u_unescape(testdata[1], source, 10);
383 iter = ucol_openElements(coll, source, srclen, &status);
384 backAndForth(iter);
385 ucol_closeElements(iter);
386
387 while (count < 12) {
388 srclen = u_unescape(testdata[count], source, 10);
389 iter = ucol_openElements(coll, source, srclen, &status);
390
391 if (U_FAILURE(status)){
392 log_err("ERROR: in creation of collator element iterator\n %s\n",
393 myErrorName(status));
394 return;
395 }
396 backAndForth(iter);
397 ucol_closeElements(iter);
398
399 iter = ucol_openElements(coll, source, -1, &status);
400
401 if (U_FAILURE(status)){
402 log_err("ERROR: in creation of collator element iterator\n %s\n",
403 myErrorName(status));
404 return;
405 }
406 backAndForth(iter);
407 ucol_closeElements(iter);
408 count ++;
409 }
410 ucol_close(coll);
411}
412
413/**
414 * Test for CollationElementIterator.previous()
415 *
416 * @bug 4108758 - Make sure it works with contracting characters
417 *
418 */
419static void TestPrevious()
420{
421 UCollator *coll=NULL;
422 UChar rule[50];
423 UChar *source;
424 UCollator *c1, *c2, *c3;
425 UCollationElements *iter;
426 UErrorCode status = U_ZERO_ERROR;
46f4442e
A
427 UChar test1[50];
428 UChar test2[50];
b75a7d8f 429
b75a7d8f
A
430 u_uastrcpy(test1, "What subset of all possible test cases?");
431 u_uastrcpy(test2, "has the highest probability of detecting");
432 coll = ucol_open("en_US", &status);
433
434 iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
435 log_verbose("English locale testing back and forth\n");
436 if(U_FAILURE(status)){
729e4ab9 437 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
b75a7d8f
A
438 myErrorName(status));
439 ucol_close(coll);
440 return;
441 }
442 /* A basic test to see if it's working at all */
443 backAndForth(iter);
444 ucol_closeElements(iter);
445 ucol_close(coll);
446
447 /* Test with a contracting character sequence */
448 u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
449 c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
450
451 log_verbose("Contraction rule testing back and forth with no normalization\n");
452
453 if (c1 == NULL || U_FAILURE(status))
454 {
455 log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
456 myErrorName(status));
457 return;
458 }
459 source=(UChar*)malloc(sizeof(UChar) * 20);
460 u_uastrcpy(source, "abchdcba");
461 iter=ucol_openElements(c1, source, u_strlen(source), &status);
462 if(U_FAILURE(status)){
463 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
464 myErrorName(status));
465 return;
466 }
467 backAndForth(iter);
468 ucol_closeElements(iter);
469 ucol_close(c1);
470
471 /* Test with an expanding character sequence */
472 u_uastrcpy(rule, "&a < b < c/abd < d");
473 c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
474 log_verbose("Expansion rule testing back and forth with no normalization\n");
475 if (c2 == NULL || U_FAILURE(status))
476 {
477 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
478 myErrorName(status));
479 return;
480 }
481 u_uastrcpy(source, "abcd");
482 iter=ucol_openElements(c2, source, u_strlen(source), &status);
483 if(U_FAILURE(status)){
484 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
485 myErrorName(status));
486 return;
487 }
488 backAndForth(iter);
489 ucol_closeElements(iter);
490 ucol_close(c2);
491 /* Now try both */
492 u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
493 c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,NULL, &status);
494 log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
495
496 if (c3 == NULL || U_FAILURE(status))
497 {
498 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
499 myErrorName(status));
500 return;
501 }
502 u_uastrcpy(source, "abcdbchdc");
503 iter=ucol_openElements(c3, source, u_strlen(source), &status);
504 if(U_FAILURE(status)){
505 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
506 myErrorName(status));
507 return;
508 }
509 backAndForth(iter);
510 ucol_closeElements(iter);
511 ucol_close(c3);
512 source[0] = 0x0e41;
513 source[1] = 0x0e02;
514 source[2] = 0x0e41;
515 source[3] = 0x0e02;
516 source[4] = 0x0e27;
517 source[5] = 0x61;
518 source[6] = 0x62;
519 source[7] = 0x63;
520 source[8] = 0;
521
522 coll = ucol_open("th_TH", &status);
523 log_verbose("Thai locale testing back and forth with normalization\n");
524 iter=ucol_openElements(coll, source, u_strlen(source), &status);
525 if(U_FAILURE(status)){
526 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
527 myErrorName(status));
528 return;
529 }
530 backAndForth(iter);
531 ucol_closeElements(iter);
532 ucol_close(coll);
533
534 /* prev test */
535 source[0] = 0x0061;
536 source[1] = 0x30CF;
537 source[2] = 0x3099;
538 source[3] = 0x30FC;
539 source[4] = 0;
540
541 coll = ucol_open("ja_JP", &status);
542 log_verbose("Japanese locale testing back and forth with normalization\n");
543 iter=ucol_openElements(coll, source, u_strlen(source), &status);
544 if(U_FAILURE(status)){
545 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
546 myErrorName(status));
547 return;
548 }
549 backAndForth(iter);
550 ucol_closeElements(iter);
551 ucol_close(coll);
552
553 free(source);
b75a7d8f
A
554}
555
556/**
557 * Test for getOffset() and setOffset()
558 */
559static void TestOffset()
560{
561 UErrorCode status= U_ZERO_ERROR;
562 UCollator *en_us=NULL;
563 UCollationElements *iter, *pristine;
564 int32_t offset;
46f4442e 565 OrderAndOffset *orders;
b75a7d8f
A
566 int32_t orderLength=0;
567 int count = 0;
46f4442e
A
568 UChar test1[50];
569 UChar test2[50];
570
b75a7d8f
A
571 u_uastrcpy(test1, "What subset of all possible test cases?");
572 u_uastrcpy(test2, "has the highest probability of detecting");
573 en_us = ucol_open("en_US", &status);
374ca955 574 log_verbose("Testing getOffset and setOffset for collations\n");
b75a7d8f
A
575 iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
576 if(U_FAILURE(status)){
729e4ab9 577 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
b75a7d8f
A
578 myErrorName(status));
579 ucol_close(en_us);
580 return;
581 }
374ca955
A
582
583 /* testing boundaries */
584 ucol_setOffset(iter, 0, &status);
585 if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
586 log_err("Error: After setting offset to 0, we should be at the end "
587 "of the backwards iteration");
588 }
589 ucol_setOffset(iter, u_strlen(test1), &status);
590 if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
591 log_err("Error: After setting offset to end of the string, we should "
592 "be at the end of the backwards iteration");
593 }
594
b75a7d8f
A
595 /* Run all the way through the iterator, then get the offset */
596
597 orders = getOrders(iter, &orderLength);
598
599 offset = ucol_getOffset(iter);
600
601 if (offset != u_strlen(test1))
602 {
603 log_err("offset at end != length %d vs %d\n", offset,
604 u_strlen(test1) );
605 }
606
607 /* Now set the offset back to the beginning and see if it works */
608 pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
609 if(U_FAILURE(status)){
610 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
611 myErrorName(status));
612 ucol_close(en_us);
613 return;
614 }
615 status = U_ZERO_ERROR;
616
617 ucol_setOffset(iter, 0, &status);
618 if (U_FAILURE(status))
619 {
620 log_err("setOffset failed. %s\n", myErrorName(status));
621 }
622 else
623 {
624 assertEqual(iter, pristine);
625 }
626
627 ucol_closeElements(pristine);
628 ucol_closeElements(iter);
629 free(orders);
630
631 /* testing offsets in normalization buffer */
632 test1[0] = 0x61;
633 test1[1] = 0x300;
634 test1[2] = 0x316;
635 test1[3] = 0x62;
636 test1[4] = 0;
637 ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
638 iter = ucol_openElements(en_us, test1, 4, &status);
639 if(U_FAILURE(status)){
640 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
641 myErrorName(status));
642 ucol_close(en_us);
643 return;
644 }
645
646 count = 0;
647 while (ucol_next(iter, &status) != UCOL_NULLORDER &&
648 U_SUCCESS(status)) {
649 switch (count) {
650 case 0:
651 if (ucol_getOffset(iter) != 1) {
46f4442e 652 log_err("ERROR: Offset of iteration should be 1\n");
b75a7d8f
A
653 }
654 break;
655 case 3:
656 if (ucol_getOffset(iter) != 4) {
657 log_err("ERROR: Offset of iteration should be 4\n");
658 }
659 break;
660 default:
661 if (ucol_getOffset(iter) != 3) {
662 log_err("ERROR: Offset of iteration should be 3\n");
663 }
664 }
665 count ++;
666 }
667
668 ucol_reset(iter);
669 count = 0;
670 while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
671 U_SUCCESS(status)) {
672 switch (count) {
673 case 0:
46f4442e 674 case 1:
b75a7d8f
A
675 if (ucol_getOffset(iter) != 3) {
676 log_err("ERROR: Offset of iteration should be 3\n");
677 }
678 break;
46f4442e
A
679 case 2:
680 if (ucol_getOffset(iter) != 1) {
681 log_err("ERROR: Offset of iteration should be 1\n");
682 }
683 break;
b75a7d8f
A
684 default:
685 if (ucol_getOffset(iter) != 0) {
686 log_err("ERROR: Offset of iteration should be 0\n");
687 }
688 }
689 count ++;
690 }
691
692 if(U_FAILURE(status)){
693 log_err("ERROR: in iterating collation elements %s\n",
694 myErrorName(status));
695 }
696
697 ucol_closeElements(iter);
698 ucol_close(en_us);
b75a7d8f
A
699}
700
701/**
702 * Test for setText()
703 */
704static void TestSetText()
705{
706 int32_t c,i;
707 UErrorCode status = U_ZERO_ERROR;
708 UCollator *en_us=NULL;
709 UCollationElements *iter1, *iter2;
46f4442e
A
710 UChar test1[50];
711 UChar test2[50];
712
b75a7d8f
A
713 u_uastrcpy(test1, "What subset of all possible test cases?");
714 u_uastrcpy(test2, "has the highest probability of detecting");
715 en_us = ucol_open("en_US", &status);
716 log_verbose("testing setText for Collation elements\n");
717 iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
718 if(U_FAILURE(status)){
729e4ab9 719 log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
b75a7d8f
A
720 myErrorName(status));
721 ucol_close(en_us);
722 return;
723 }
724 iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
725 if(U_FAILURE(status)){
726 log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
727 myErrorName(status));
728 ucol_close(en_us);
729 return;
730 }
731
732 /* Run through the second iterator just to exercise it */
733 c = ucol_next(iter2, &status);
734 i = 0;
735
736 while ( ++i < 10 && (c != UCOL_NULLORDER))
737 {
738 if (U_FAILURE(status))
739 {
740 log_err("iter2->next() returned an error. %s\n", myErrorName(status));
741 ucol_closeElements(iter2);
742 ucol_closeElements(iter1);
743 ucol_close(en_us);
744 return;
745 }
746
747 c = ucol_next(iter2, &status);
748 }
749
750 /* Now set it to point to the same string as the first iterator */
751 ucol_setText(iter2, test1, u_strlen(test1), &status);
752 if (U_FAILURE(status))
753 {
754 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
755 }
756 else
757 {
758 assertEqual(iter1, iter2);
759 }
760
761 /* Now set it to point to a null string with fake length*/
762 ucol_setText(iter2, NULL, 2, &status);
57a6839d 763 if (status != U_ILLEGAL_ARGUMENT_ERROR)
b75a7d8f 764 {
57a6839d
A
765 log_err("call to iter2->setText(null, 2) should yield an illegal-argument-error - %s\n",
766 myErrorName(status));
b75a7d8f
A
767 }
768
769 ucol_closeElements(iter2);
770 ucol_closeElements(iter1);
771 ucol_close(en_us);
b75a7d8f
A
772}
773
b75a7d8f
A
774/** @bug 4108762
775 * Test for getMaxExpansion()
776 */
777static void TestMaxExpansion()
778{
779 UErrorCode status = U_ZERO_ERROR;
780 UCollator *coll ;/*= ucol_open("en_US", &status);*/
781 UChar ch = 0;
374ca955
A
782 UChar32 unassigned = 0xEFFFD;
783 UChar supplementary[2];
729e4ab9 784 uint32_t stringOffset = 0;
374ca955 785 UBool isError = FALSE;
b75a7d8f
A
786 uint32_t sorder = 0;
787 UCollationElements *iter ;/*= ucol_openElements(coll, &ch, 1, &status);*/
788 uint32_t temporder = 0;
789
790 UChar rule[256];
791 u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
792 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
793 UCOL_DEFAULT_STRENGTH,NULL, &status);
794 if(U_SUCCESS(status) && coll) {
795 iter = ucol_openElements(coll, &ch, 1, &status);
796
797 while (ch < 0xFFFF && U_SUCCESS(status)) {
798 int count = 1;
799 uint32_t order;
800 int32_t size = 0;
801
802 ch ++;
803
804 ucol_setText(iter, &ch, 1, &status);
805 order = ucol_previous(iter, &status);
806
807 /* thai management */
808 if (order == 0)
809 order = ucol_previous(iter, &status);
810
811 while (U_SUCCESS(status) &&
812 ucol_previous(iter, &status) != UCOL_NULLORDER) {
813 count ++;
814 }
815
816 size = ucol_getMaxExpansion(iter, order);
817 if (U_FAILURE(status) || size < count) {
818 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
819 ch, count);
820 }
821 }
822
823 /* testing for exact max expansion */
824 ch = 0;
825 while (ch < 0x61) {
826 uint32_t order;
827 int32_t size;
828 ucol_setText(iter, &ch, 1, &status);
829 order = ucol_previous(iter, &status);
830 size = ucol_getMaxExpansion(iter, order);
831 if (U_FAILURE(status) || size != 1) {
832 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
833 ch, 1);
834 }
835 ch ++;
836 }
837
838 ch = 0x63;
839 ucol_setText(iter, &ch, 1, &status);
840 temporder = ucol_previous(iter, &status);
841
842 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
843 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
844 ch, 3);
845 }
846
847 ch = 0x64;
848 ucol_setText(iter, &ch, 1, &status);
849 temporder = ucol_previous(iter, &status);
850
851 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
852 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
853 ch, 3);
854 }
855
729e4ab9 856 U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
57a6839d 857 (void)isError; /* Suppress set but not used warning. */
b75a7d8f
A
858 ucol_setText(iter, supplementary, 2, &status);
859 sorder = ucol_previous(iter, &status);
860
861 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
862 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
863 ch, 2);
864 }
865
866 /* testing jamo */
867 ch = 0x1165;
868
869 ucol_setText(iter, &ch, 1, &status);
870 temporder = ucol_previous(iter, &status);
871 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
872 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
873 ch, 3);
874 }
875
876 ucol_closeElements(iter);
877 ucol_close(coll);
878
879 /* testing special jamo &a<\u1160 */
880 rule[0] = 0x26;
881 rule[1] = 0x71;
882 rule[2] = 0x3c;
883 rule[3] = 0x1165;
884 rule[4] = 0x2f;
885 rule[5] = 0x71;
886 rule[6] = 0x71;
887 rule[7] = 0x71;
888 rule[8] = 0x71;
889 rule[9] = 0;
890
891 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
892 UCOL_DEFAULT_STRENGTH,NULL, &status);
893 iter = ucol_openElements(coll, &ch, 1, &status);
894
895 temporder = ucol_previous(iter, &status);
896 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
897 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
898 ch, 5);
899 }
900
901 ucol_closeElements(iter);
902 ucol_close(coll);
903 } else {
729e4ab9 904 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
b75a7d8f
A
905 }
906
907}
908
b75a7d8f
A
909
910static void assertEqual(UCollationElements *i1, UCollationElements *i2)
911{
912 int32_t c1, c2;
913 int32_t count = 0;
914 UErrorCode status = U_ZERO_ERROR;
915
916 do
917 {
918 c1 = ucol_next(i1, &status);
919 c2 = ucol_next(i2, &status);
920
921 if (c1 != c2)
922 {
923 log_err("Error in iteration %d assetEqual between\n %d and %d, they are not equal\n", count, c1, c2);
924 break;
925 }
926
927 count += 1;
928 }
929 while (c1 != UCOL_NULLORDER);
930}
931
932/**
933 * Testing iterators with extremely small buffers
934 */
935static void TestSmallBuffer()
936{
937 UErrorCode status = U_ZERO_ERROR;
938 UCollator *coll;
939 UCollationElements *testiter,
940 *iter;
941 int32_t count = 0;
46f4442e 942 OrderAndOffset *testorders,
b75a7d8f
A
943 *orders;
944
945 UChar teststr[500];
946 UChar str[] = {0x300, 0x31A, 0};
947 /*
948 creating a long string of decomposable characters,
949 since by default the writable buffer is of size 256
950 */
951 while (count < 500) {
952 if ((count & 1) == 0) {
953 teststr[count ++] = 0x300;
954 }
955 else {
956 teststr[count ++] = 0x31A;
957 }
958 }
959
960 coll = ucol_open("th_TH", &status);
961 if(U_SUCCESS(status) && coll) {
962 testiter = ucol_openElements(coll, teststr, 500, &status);
963 iter = ucol_openElements(coll, str, 2, &status);
964
965 orders = getOrders(iter, &count);
966 if (count != 2) {
967 log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
968 }
969
970 /*
971 this will rearrange the string data to 250 characters of 0x300 first then
972 250 characters of 0x031A
973 */
974 testorders = getOrders(testiter, &count);
975
976 if (count != 500) {
977 log_err("Error decomposition does not give the right sized collation elements\n");
978 }
979
980 while (count != 0) {
981 /* UCA collation element for 0x0F76 */
46f4442e
A
982 if ((count > 250 && testorders[-- count].order != orders[1].order) ||
983 (count <= 250 && testorders[-- count].order != orders[0].order)) {
b75a7d8f
A
984 log_err("Error decomposition does not give the right collation element at %d count\n", count);
985 break;
986 }
987 }
988
989 free(testorders);
990 free(orders);
991
992 ucol_reset(testiter);
b75a7d8f
A
993
994 /* ensures closing of elements done properly to clear writable buffer */
995 ucol_next(testiter, &status);
996 ucol_next(testiter, &status);
997 ucol_closeElements(testiter);
998 ucol_closeElements(iter);
999 ucol_close(coll);
1000 } else {
729e4ab9 1001 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
b75a7d8f
A
1002 }
1003}
1004
b75a7d8f
A
1005/**
1006* Testing the discontigous contractions
1007*/
1008static void TestDiscontiguos() {
1009 const char *rulestr =
1010 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1011 UChar rule[50];
1012 int rulelen = u_unescape(rulestr, rule, 50);
1013 const char *src[] = {
1014 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1015 /* base character blocked */
1016 "XD\\u0300", "XD\\u0300\\u0315",
1017 /* non blocking combining character */
1018 "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1019 /* blocking combining character */
1020 "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1021 /* contraction prefix */
1022 "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1023 "X\\u0300\\u031A\\u0315",
1024 /* ends not with a contraction character */
1025 "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1026 "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1027 };
1028 const char *tgt[] = {
1029 /* non blocking combining character */
1030 "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1031 /* base character blocked */
1032 "X D \\u0300", "X D \\u0300\\u0315",
1033 /* non blocking combining character */
1034 "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1035 /* blocking combining character */
1036 "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1037 /* contraction prefix */
1038 "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1039 "X\\u0300 \\u031A \\u0315",
1040 /* ends not with a contraction character */
1041 "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1042 "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1043 };
1044 int size = 20;
1045 UCollator *coll;
1046 UErrorCode status = U_ZERO_ERROR;
1047 int count = 0;
1048 UCollationElements *iter;
1049 UCollationElements *resultiter;
1050
1051 coll = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
1052 iter = ucol_openElements(coll, rule, 1, &status);
1053 resultiter = ucol_openElements(coll, rule, 1, &status);
1054
1055 if (U_FAILURE(status)) {
729e4ab9 1056 log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
b75a7d8f
A
1057 return;
1058 }
1059
1060 while (count < size) {
1061 UChar str[20];
1062 UChar tstr[20];
1063 int strLen = u_unescape(src[count], str, 20);
1064 UChar *s;
1065
1066 ucol_setText(iter, str, strLen, &status);
1067 if (U_FAILURE(status)) {
1068 log_err("Error opening collation iterator\n");
1069 return;
1070 }
1071
1072 u_unescape(tgt[count], tstr, 20);
1073 s = tstr;
1074
1075 log_verbose("count %d\n", count);
1076
1077 for (;;) {
1078 uint32_t ce;
1079 UChar *e = u_strchr(s, 0x20);
1080 if (e == 0) {
1081 e = u_strchr(s, 0);
1082 }
1083 ucol_setText(resultiter, s, (int32_t)(e - s), &status);
1084 ce = ucol_next(resultiter, &status);
1085 if (U_FAILURE(status)) {
1086 log_err("Error manipulating collation iterator\n");
1087 return;
1088 }
1089 while (ce != UCOL_NULLORDER) {
1090 if (ce != (uint32_t)ucol_next(iter, &status) ||
1091 U_FAILURE(status)) {
1092 log_err("Discontiguos contraction test mismatch\n");
1093 return;
1094 }
1095 ce = ucol_next(resultiter, &status);
1096 if (U_FAILURE(status)) {
1097 log_err("Error getting next collation element\n");
1098 return;
1099 }
1100 }
1101 s = e + 1;
1102 if (*e == 0) {
1103 break;
1104 }
1105 }
1106 ucol_reset(iter);
1107 backAndForth(iter);
1108 count ++;
1109 }
1110 ucol_closeElements(resultiter);
1111 ucol_closeElements(iter);
1112 ucol_close(coll);
1113}
1114
729e4ab9
A
1115/**
1116* TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
1117* normalization on AND jamo tailoring, among other things.
57a6839d
A
1118*
1119* Note: This test is sensitive to changes of the root collator,
1120* for example whether the ae-ligature maps to three CEs (as in the DUCET)
1121* or to two CEs (as in the CLDR 24 FractionalUCA.txt).
1122* It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding.
1123* For example, the DUCET's artificial secondary CE in the ae-ligature
1124* may map to two 32-bit iterator CEs (as it did until ICU 52).
729e4ab9
A
1125*/
1126static const UChar tsceText[] = { /* Nothing in here should be ignorable */
1127 0x0020, 0xAC00, /* simple LV Hangul */
1128 0x0020, 0xAC01, /* simple LVT Hangul */
1129 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */
1130 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search */
1131 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
1132 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
1133 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
1134 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
1135 0x0020, 0x00E6, /* small letter ae, expands */
1136 0x0020, 0x1E4D, /* small letter o with tilde and acute, decomposes */
1137 0x0020
1138};
2ca993e8 1139enum { kLen_tsceText = UPRV_LENGTHOF(tsceText) };
729e4ab9
A
1140
1141static const int32_t rootStandardOffsets[] = {
1142 0, 1,2,
1143 2, 3,4,4,
1144 4, 5,6,6,
1145 6, 7,8,8,
1146 8, 9,10,11,
1147 12, 13,14,15,
1148 16, 17,18,19,
1149 20, 21,22,23,
57a6839d 1150 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
729e4ab9
A
1151 26, 27,28,28,
1152 28,
1153 29
1154};
2ca993e8 1155enum { kLen_rootStandardOffsets = UPRV_LENGTHOF(rootStandardOffsets) };
729e4ab9
A
1156
1157static const int32_t rootSearchOffsets[] = {
1158 0, 1,2,
1159 2, 3,4,4,
1160 4, 5,6,6,6,
1161 6, 7,8,8,8,8,8,8,
1162 8, 9,10,11,
1163 12, 13,14,15,
1164 16, 17,18,19,20,
1165 20, 21,22,22,23,23,23,24,
57a6839d 1166 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
729e4ab9
A
1167 26, 27,28,28,
1168 28,
1169 29
1170};
2ca993e8 1171enum { kLen_rootSearchOffsets = UPRV_LENGTHOF(rootSearchOffsets) };
729e4ab9
A
1172
1173typedef struct {
1174 const char * locale;
1175 const int32_t * offsets;
1176 int32_t offsetsLen;
1177} TSCEItem;
1178
1179static const TSCEItem tsceItems[] = {
1180 { "root", rootStandardOffsets, kLen_rootStandardOffsets },
729e4ab9 1181 { "root@collation=search", rootSearchOffsets, kLen_rootSearchOffsets },
729e4ab9
A
1182 { NULL, NULL, 0 }
1183};
1184
1185static void TestSearchCollatorElements(void)
1186{
1187 const TSCEItem * tsceItemPtr;
1188 for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
1189 UErrorCode status = U_ZERO_ERROR;
1190 UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
1191 if ( U_SUCCESS(status) ) {
1192 UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);
1193 if ( U_SUCCESS(status) ) {
1194 int32_t offset, element;
1195 const int32_t * nextOffsetPtr;
1196 const int32_t * limitOffsetPtr;
1197
1198 nextOffsetPtr = tsceItemPtr->offsets;
1199 limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
1200 do {
1201 offset = ucol_getOffset(uce);
1202 element = ucol_next(uce, &status);
57a6839d 1203 log_verbose("(%s) offset=%2d ce=%08x\n", tsceItemPtr->locale, offset, element);
729e4ab9
A
1204 if ( element == 0 ) {
1205 log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
1206 }
1207 if ( nextOffsetPtr < limitOffsetPtr ) {
1208 if (offset != *nextOffsetPtr) {
1209 log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
1210 tsceItemPtr->locale, *nextOffsetPtr, offset );
1211 nextOffsetPtr = limitOffsetPtr;
1212 break;
1213 }
1214 nextOffsetPtr++;
1215 } else {
1216 log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );
1217 }
1218 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
1219 if ( nextOffsetPtr < limitOffsetPtr ) {
1220 log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );
1221 }
1222
1223 ucol_setOffset(uce, kLen_tsceText, &status);
1224 status = U_ZERO_ERROR;
1225 nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
1226 limitOffsetPtr = tsceItemPtr->offsets;
1227 do {
1228 offset = ucol_getOffset(uce);
1229 element = ucol_previous(uce, &status);
1230 if ( element == 0 ) {
1231 log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );
1232 }
1233 if ( nextOffsetPtr > limitOffsetPtr ) {
1234 nextOffsetPtr--;
1235 if (offset != *nextOffsetPtr) {
1236 log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
1237 tsceItemPtr->locale, *nextOffsetPtr, offset );
1238 nextOffsetPtr = limitOffsetPtr;
1239 break;
1240 }
1241 } else {
1242 log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
1243 }
1244 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
1245 if ( nextOffsetPtr > limitOffsetPtr ) {
1246 log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );
1247 }
1248
1249 ucol_closeElements(uce);
1250 } else {
1251 log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
1252 }
1253 ucol_close(ucol);
1254 } else {
4388f060 1255 log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
729e4ab9
A
1256 }
1257 }
1258}
1259
b75a7d8f 1260#endif /* #if !UCONFIG_NO_COLLATION */