]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/citertst.c
ICU-461.18.tar.gz
[apple/icu.git] / icuSources / test / cintltst / citertst.c
CommitLineData
b75a7d8f
A
1/********************************************************************
2 * COPYRIGHT:
729e4ab9 3 * Copyright (c) 1997-2011, International Business Machines Corporation and
b75a7d8f
A
4 * others. All Rights Reserved.
5 ********************************************************************/
6/********************************************************************************
7*
8* File CITERTST.C
9*
10* Modification History:
11* Date Name Description
12* Madhu Katragadda Ported for C API
13* 02/19/01 synwee Modified test case for new collation iterator
14*********************************************************************************/
15/*
16 * Collation Iterator tests.
17 * (Let me reiterate my position...)
18 */
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_COLLATION
23
24#include "unicode/ucol.h"
729e4ab9 25#include "unicode/ucoleitr.h"
b75a7d8f
A
26#include "unicode/uloc.h"
27#include "unicode/uchar.h"
28#include "unicode/ustring.h"
374ca955
A
29#include "unicode/putil.h"
30#include "callcoll.h"
b75a7d8f
A
31#include "cmemory.h"
32#include "cintltst.h"
33#include "citertst.h"
34#include "ccolltst.h"
35#include "filestrm.h"
36#include "cstring.h"
37#include "ucol_imp.h"
38#include "ucol_tok.h"
729e4ab9 39#include "uparse.h"
b75a7d8f
A
40#include <stdio.h>
41
42extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
43
44void addCollIterTest(TestNode** root)
45{
46 addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
47 addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
48 addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
49 addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
50 addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
51 addTest(root, &TestNormalizedUnicodeChar,
52 "tscoll/citertst/TestNormalizedUnicodeChar");
53 addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
54 addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
55 addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
56 addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
57 addTest(root, &TestCEs, "tscoll/citertst/TestCEs");
58 addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
59 addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
60 addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
61 addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
729e4ab9 62 addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
b75a7d8f
A
63}
64
65/* The locales we support */
66
67static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
68
69static void TestBug672() {
70 UErrorCode status = U_ZERO_ERROR;
71 UChar pattern[20];
72 UChar text[50];
73 int i;
74 int result[3][3];
75
76 u_uastrcpy(pattern, "resume");
77 u_uastrcpy(text, "Time to resume updating my resume.");
78
79 for (i = 0; i < 3; ++ i) {
80 UCollator *coll = ucol_open(LOCALES[i], &status);
81 UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
82 &status);
83 UCollationElements *titer = ucol_openElements(coll, text, -1,
84 &status);
85 if (U_FAILURE(status)) {
729e4ab9 86 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
b75a7d8f
A
87 myErrorName(status));
88 return;
89 }
90
91 log_verbose("locale tested %s\n", LOCALES[i]);
92
93 while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
94 U_SUCCESS(status)) {
95 }
96 if (U_FAILURE(status)) {
97 log_err("ERROR: reversing collation iterator :%s\n",
98 myErrorName(status));
99 return;
100 }
101 ucol_reset(pitr);
102
103 ucol_setOffset(titer, u_strlen(pattern), &status);
104 if (U_FAILURE(status)) {
105 log_err("ERROR: setting offset in collator :%s\n",
106 myErrorName(status));
107 return;
108 }
109 result[i][0] = ucol_getOffset(titer);
110 log_verbose("Text iterator set to offset %d\n", result[i][0]);
111
112 /* Use previous() */
113 ucol_previous(titer, &status);
114 result[i][1] = ucol_getOffset(titer);
115 log_verbose("Current offset %d after previous\n", result[i][1]);
116
117 /* Add one to index */
118 log_verbose("Adding one to current offset...\n");
119 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
120 if (U_FAILURE(status)) {
121 log_err("ERROR: setting offset in collator :%s\n",
122 myErrorName(status));
123 return;
124 }
125 result[i][2] = ucol_getOffset(titer);
126 log_verbose("Current offset in text = %d\n", result[i][2]);
127 ucol_closeElements(pitr);
128 ucol_closeElements(titer);
129 ucol_close(coll);
130 }
131
132 if (uprv_memcmp(result[0], result[1], 3) != 0 ||
133 uprv_memcmp(result[1], result[2], 3) != 0) {
134 log_err("ERROR: Different locales have different offsets at the same character\n");
135 }
136}
137
138
139
140/* Running this test with normalization enabled showed up a bug in the incremental
141 normalization code. */
142static void TestBug672Normalize() {
143 UErrorCode status = U_ZERO_ERROR;
144 UChar pattern[20];
145 UChar text[50];
146 int i;
147 int result[3][3];
148
149 u_uastrcpy(pattern, "resume");
150 u_uastrcpy(text, "Time to resume updating my resume.");
151
152 for (i = 0; i < 3; ++ i) {
153 UCollator *coll = ucol_open(LOCALES[i], &status);
154 UCollationElements *pitr = NULL;
155 UCollationElements *titer = NULL;
156
157 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
158
159 pitr = ucol_openElements(coll, pattern, -1, &status);
160 titer = ucol_openElements(coll, text, -1, &status);
161 if (U_FAILURE(status)) {
729e4ab9 162 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
b75a7d8f
A
163 myErrorName(status));
164 return;
165 }
166
167 log_verbose("locale tested %s\n", LOCALES[i]);
168
169 while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
170 U_SUCCESS(status)) {
171 }
172 if (U_FAILURE(status)) {
173 log_err("ERROR: reversing collation iterator :%s\n",
174 myErrorName(status));
175 return;
176 }
177 ucol_reset(pitr);
178
179 ucol_setOffset(titer, u_strlen(pattern), &status);
180 if (U_FAILURE(status)) {
181 log_err("ERROR: setting offset in collator :%s\n",
182 myErrorName(status));
183 return;
184 }
185 result[i][0] = ucol_getOffset(titer);
186 log_verbose("Text iterator set to offset %d\n", result[i][0]);
187
188 /* Use previous() */
189 ucol_previous(titer, &status);
190 result[i][1] = ucol_getOffset(titer);
191 log_verbose("Current offset %d after previous\n", result[i][1]);
192
193 /* Add one to index */
194 log_verbose("Adding one to current offset...\n");
195 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
196 if (U_FAILURE(status)) {
197 log_err("ERROR: setting offset in collator :%s\n",
198 myErrorName(status));
199 return;
200 }
201 result[i][2] = ucol_getOffset(titer);
202 log_verbose("Current offset in text = %d\n", result[i][2]);
203 ucol_closeElements(pitr);
204 ucol_closeElements(titer);
205 ucol_close(coll);
206 }
207
208 if (uprv_memcmp(result[0], result[1], 3) != 0 ||
209 uprv_memcmp(result[1], result[2], 3) != 0) {
210 log_err("ERROR: Different locales have different offsets at the same character\n");
211 }
212}
213
214
215
216
217/**
218 * Test for CollationElementIterator previous and next for the whole set of
219 * unicode characters.
220 */
221static void TestUnicodeChar()
222{
223 UChar source[0x100];
224 UCollator *en_us;
225 UCollationElements *iter;
226 UErrorCode status = U_ZERO_ERROR;
227 UChar codepoint;
228
229 UChar *test;
230 en_us = ucol_open("en_US", &status);
231 if (U_FAILURE(status)){
729e4ab9 232 log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
b75a7d8f
A
233 myErrorName(status));
234 return;
235 }
236
237 for (codepoint = 1; codepoint < 0xFFFE;)
238 {
239 test = source;
240
241 while (codepoint % 0xFF != 0)
242 {
243 if (u_isdefined(codepoint))
244 *(test ++) = codepoint;
245 codepoint ++;
246 }
247
248 if (u_isdefined(codepoint))
249 *(test ++) = codepoint;
250
251 if (codepoint != 0xFFFF)
252 codepoint ++;
253
254 *test = 0;
255 iter=ucol_openElements(en_us, source, u_strlen(source), &status);
256 if(U_FAILURE(status)){
257 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
258 myErrorName(status));
259 ucol_close(en_us);
260 return;
261 }
262 /* A basic test to see if it's working at all */
263 log_verbose("codepoint testing %x\n", codepoint);
264 backAndForth(iter);
265 ucol_closeElements(iter);
266
267 /* null termination test */
268 iter=ucol_openElements(en_us, source, -1, &status);
269 if(U_FAILURE(status)){
270 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
271 myErrorName(status));
272 ucol_close(en_us);
273 return;
274 }
275 /* A basic test to see if it's working at all */
276 backAndForth(iter);
277 ucol_closeElements(iter);
278 }
279
280 ucol_close(en_us);
281}
282
283/**
284 * Test for CollationElementIterator previous and next for the whole set of
285 * unicode characters with normalization on.
286 */
287static void TestNormalizedUnicodeChar()
288{
289 UChar source[0x100];
290 UCollator *th_th;
291 UCollationElements *iter;
292 UErrorCode status = U_ZERO_ERROR;
293 UChar codepoint;
294
295 UChar *test;
296 /* thai should have normalization on */
297 th_th = ucol_open("th_TH", &status);
298 if (U_FAILURE(status)){
729e4ab9 299 log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
b75a7d8f
A
300 myErrorName(status));
301 return;
302 }
303
304 for (codepoint = 1; codepoint < 0xFFFE;)
305 {
306 test = source;
307
308 while (codepoint % 0xFF != 0)
309 {
310 if (u_isdefined(codepoint))
311 *(test ++) = codepoint;
312 codepoint ++;
313 }
314
315 if (u_isdefined(codepoint))
316 *(test ++) = codepoint;
317
318 if (codepoint != 0xFFFF)
319 codepoint ++;
320
321 *test = 0;
322 iter=ucol_openElements(th_th, source, u_strlen(source), &status);
323 if(U_FAILURE(status)){
324 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
325 myErrorName(status));
326 ucol_close(th_th);
327 return;
328 }
329
330 backAndForth(iter);
331 ucol_closeElements(iter);
332
333 iter=ucol_openElements(th_th, source, -1, &status);
334 if(U_FAILURE(status)){
335 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
336 myErrorName(status));
337 ucol_close(th_th);
338 return;
339 }
340
341 backAndForth(iter);
342 ucol_closeElements(iter);
343 }
344
345 ucol_close(th_th);
346}
347
348/**
349* Test the incremental normalization
350*/
351static void TestNormalization()
352{
353 UErrorCode status = U_ZERO_ERROR;
354 const char *str =
355 "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
356 UCollator *coll;
357 UChar rule[50];
358 int rulelen = u_unescape(str, rule, 50);
359 int count = 0;
360 const char *testdata[] =
361 {"\\u1ED9", "o\\u0323\\u0302",
362 "\\u0300\\u0315", "\\u0315\\u0300",
363 "A\\u0300\\u0315B", "A\\u0315\\u0300B",
364 "A\\u0316\\u0315B", "A\\u0315\\u0316B",
365 "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
366 "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
367 "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
368 int32_t srclen;
369 UChar source[10];
370 UCollationElements *iter;
371
372 coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
373 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
374 if (U_FAILURE(status)){
729e4ab9 375 log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
b75a7d8f
A
376 myErrorName(status));
377 return;
378 }
379
380 srclen = u_unescape(testdata[0], source, 10);
381 iter = ucol_openElements(coll, source, srclen, &status);
382 backAndForth(iter);
383 ucol_closeElements(iter);
384
385 srclen = u_unescape(testdata[1], source, 10);
386 iter = ucol_openElements(coll, source, srclen, &status);
387 backAndForth(iter);
388 ucol_closeElements(iter);
389
390 while (count < 12) {
391 srclen = u_unescape(testdata[count], source, 10);
392 iter = ucol_openElements(coll, source, srclen, &status);
393
394 if (U_FAILURE(status)){
395 log_err("ERROR: in creation of collator element iterator\n %s\n",
396 myErrorName(status));
397 return;
398 }
399 backAndForth(iter);
400 ucol_closeElements(iter);
401
402 iter = ucol_openElements(coll, source, -1, &status);
403
404 if (U_FAILURE(status)){
405 log_err("ERROR: in creation of collator element iterator\n %s\n",
406 myErrorName(status));
407 return;
408 }
409 backAndForth(iter);
410 ucol_closeElements(iter);
411 count ++;
412 }
413 ucol_close(coll);
414}
415
416/**
417 * Test for CollationElementIterator.previous()
418 *
419 * @bug 4108758 - Make sure it works with contracting characters
420 *
421 */
422static void TestPrevious()
423{
424 UCollator *coll=NULL;
425 UChar rule[50];
426 UChar *source;
427 UCollator *c1, *c2, *c3;
428 UCollationElements *iter;
429 UErrorCode status = U_ZERO_ERROR;
46f4442e
A
430 UChar test1[50];
431 UChar test2[50];
b75a7d8f 432
b75a7d8f
A
433 u_uastrcpy(test1, "What subset of all possible test cases?");
434 u_uastrcpy(test2, "has the highest probability of detecting");
435 coll = ucol_open("en_US", &status);
436
437 iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
438 log_verbose("English locale testing back and forth\n");
439 if(U_FAILURE(status)){
729e4ab9 440 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
b75a7d8f
A
441 myErrorName(status));
442 ucol_close(coll);
443 return;
444 }
445 /* A basic test to see if it's working at all */
446 backAndForth(iter);
447 ucol_closeElements(iter);
448 ucol_close(coll);
449
450 /* Test with a contracting character sequence */
451 u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
452 c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
453
454 log_verbose("Contraction rule testing back and forth with no normalization\n");
455
456 if (c1 == NULL || U_FAILURE(status))
457 {
458 log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
459 myErrorName(status));
460 return;
461 }
462 source=(UChar*)malloc(sizeof(UChar) * 20);
463 u_uastrcpy(source, "abchdcba");
464 iter=ucol_openElements(c1, source, u_strlen(source), &status);
465 if(U_FAILURE(status)){
466 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
467 myErrorName(status));
468 return;
469 }
470 backAndForth(iter);
471 ucol_closeElements(iter);
472 ucol_close(c1);
473
474 /* Test with an expanding character sequence */
475 u_uastrcpy(rule, "&a < b < c/abd < d");
476 c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
477 log_verbose("Expansion rule testing back and forth with no normalization\n");
478 if (c2 == NULL || U_FAILURE(status))
479 {
480 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
481 myErrorName(status));
482 return;
483 }
484 u_uastrcpy(source, "abcd");
485 iter=ucol_openElements(c2, source, u_strlen(source), &status);
486 if(U_FAILURE(status)){
487 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
488 myErrorName(status));
489 return;
490 }
491 backAndForth(iter);
492 ucol_closeElements(iter);
493 ucol_close(c2);
494 /* Now try both */
495 u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
496 c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,NULL, &status);
497 log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
498
499 if (c3 == NULL || U_FAILURE(status))
500 {
501 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
502 myErrorName(status));
503 return;
504 }
505 u_uastrcpy(source, "abcdbchdc");
506 iter=ucol_openElements(c3, source, u_strlen(source), &status);
507 if(U_FAILURE(status)){
508 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
509 myErrorName(status));
510 return;
511 }
512 backAndForth(iter);
513 ucol_closeElements(iter);
514 ucol_close(c3);
515 source[0] = 0x0e41;
516 source[1] = 0x0e02;
517 source[2] = 0x0e41;
518 source[3] = 0x0e02;
519 source[4] = 0x0e27;
520 source[5] = 0x61;
521 source[6] = 0x62;
522 source[7] = 0x63;
523 source[8] = 0;
524
525 coll = ucol_open("th_TH", &status);
526 log_verbose("Thai locale testing back and forth with normalization\n");
527 iter=ucol_openElements(coll, source, u_strlen(source), &status);
528 if(U_FAILURE(status)){
529 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
530 myErrorName(status));
531 return;
532 }
533 backAndForth(iter);
534 ucol_closeElements(iter);
535 ucol_close(coll);
536
537 /* prev test */
538 source[0] = 0x0061;
539 source[1] = 0x30CF;
540 source[2] = 0x3099;
541 source[3] = 0x30FC;
542 source[4] = 0;
543
544 coll = ucol_open("ja_JP", &status);
545 log_verbose("Japanese locale testing back and forth with normalization\n");
546 iter=ucol_openElements(coll, source, u_strlen(source), &status);
547 if(U_FAILURE(status)){
548 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
549 myErrorName(status));
550 return;
551 }
552 backAndForth(iter);
553 ucol_closeElements(iter);
554 ucol_close(coll);
555
556 free(source);
b75a7d8f
A
557}
558
559/**
560 * Test for getOffset() and setOffset()
561 */
562static void TestOffset()
563{
564 UErrorCode status= U_ZERO_ERROR;
565 UCollator *en_us=NULL;
566 UCollationElements *iter, *pristine;
567 int32_t offset;
46f4442e 568 OrderAndOffset *orders;
b75a7d8f
A
569 int32_t orderLength=0;
570 int count = 0;
46f4442e
A
571 UChar test1[50];
572 UChar test2[50];
573
b75a7d8f
A
574 u_uastrcpy(test1, "What subset of all possible test cases?");
575 u_uastrcpy(test2, "has the highest probability of detecting");
576 en_us = ucol_open("en_US", &status);
374ca955 577 log_verbose("Testing getOffset and setOffset for collations\n");
b75a7d8f
A
578 iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
579 if(U_FAILURE(status)){
729e4ab9 580 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
b75a7d8f
A
581 myErrorName(status));
582 ucol_close(en_us);
583 return;
584 }
374ca955
A
585
586 /* testing boundaries */
587 ucol_setOffset(iter, 0, &status);
588 if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
589 log_err("Error: After setting offset to 0, we should be at the end "
590 "of the backwards iteration");
591 }
592 ucol_setOffset(iter, u_strlen(test1), &status);
593 if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
594 log_err("Error: After setting offset to end of the string, we should "
595 "be at the end of the backwards iteration");
596 }
597
b75a7d8f
A
598 /* Run all the way through the iterator, then get the offset */
599
600 orders = getOrders(iter, &orderLength);
601
602 offset = ucol_getOffset(iter);
603
604 if (offset != u_strlen(test1))
605 {
606 log_err("offset at end != length %d vs %d\n", offset,
607 u_strlen(test1) );
608 }
609
610 /* Now set the offset back to the beginning and see if it works */
611 pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
612 if(U_FAILURE(status)){
613 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
614 myErrorName(status));
615 ucol_close(en_us);
616 return;
617 }
618 status = U_ZERO_ERROR;
619
620 ucol_setOffset(iter, 0, &status);
621 if (U_FAILURE(status))
622 {
623 log_err("setOffset failed. %s\n", myErrorName(status));
624 }
625 else
626 {
627 assertEqual(iter, pristine);
628 }
629
630 ucol_closeElements(pristine);
631 ucol_closeElements(iter);
632 free(orders);
633
634 /* testing offsets in normalization buffer */
635 test1[0] = 0x61;
636 test1[1] = 0x300;
637 test1[2] = 0x316;
638 test1[3] = 0x62;
639 test1[4] = 0;
640 ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
641 iter = ucol_openElements(en_us, test1, 4, &status);
642 if(U_FAILURE(status)){
643 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
644 myErrorName(status));
645 ucol_close(en_us);
646 return;
647 }
648
649 count = 0;
650 while (ucol_next(iter, &status) != UCOL_NULLORDER &&
651 U_SUCCESS(status)) {
652 switch (count) {
653 case 0:
654 if (ucol_getOffset(iter) != 1) {
46f4442e 655 log_err("ERROR: Offset of iteration should be 1\n");
b75a7d8f
A
656 }
657 break;
658 case 3:
659 if (ucol_getOffset(iter) != 4) {
660 log_err("ERROR: Offset of iteration should be 4\n");
661 }
662 break;
663 default:
664 if (ucol_getOffset(iter) != 3) {
665 log_err("ERROR: Offset of iteration should be 3\n");
666 }
667 }
668 count ++;
669 }
670
671 ucol_reset(iter);
672 count = 0;
673 while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
674 U_SUCCESS(status)) {
675 switch (count) {
676 case 0:
46f4442e 677 case 1:
b75a7d8f
A
678 if (ucol_getOffset(iter) != 3) {
679 log_err("ERROR: Offset of iteration should be 3\n");
680 }
681 break;
46f4442e
A
682 case 2:
683 if (ucol_getOffset(iter) != 1) {
684 log_err("ERROR: Offset of iteration should be 1\n");
685 }
686 break;
b75a7d8f
A
687 default:
688 if (ucol_getOffset(iter) != 0) {
689 log_err("ERROR: Offset of iteration should be 0\n");
690 }
691 }
692 count ++;
693 }
694
695 if(U_FAILURE(status)){
696 log_err("ERROR: in iterating collation elements %s\n",
697 myErrorName(status));
698 }
699
700 ucol_closeElements(iter);
701 ucol_close(en_us);
b75a7d8f
A
702}
703
704/**
705 * Test for setText()
706 */
707static void TestSetText()
708{
709 int32_t c,i;
710 UErrorCode status = U_ZERO_ERROR;
711 UCollator *en_us=NULL;
712 UCollationElements *iter1, *iter2;
46f4442e
A
713 UChar test1[50];
714 UChar test2[50];
715
b75a7d8f
A
716 u_uastrcpy(test1, "What subset of all possible test cases?");
717 u_uastrcpy(test2, "has the highest probability of detecting");
718 en_us = ucol_open("en_US", &status);
719 log_verbose("testing setText for Collation elements\n");
720 iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
721 if(U_FAILURE(status)){
729e4ab9 722 log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
b75a7d8f
A
723 myErrorName(status));
724 ucol_close(en_us);
725 return;
726 }
727 iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
728 if(U_FAILURE(status)){
729 log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
730 myErrorName(status));
731 ucol_close(en_us);
732 return;
733 }
734
735 /* Run through the second iterator just to exercise it */
736 c = ucol_next(iter2, &status);
737 i = 0;
738
739 while ( ++i < 10 && (c != UCOL_NULLORDER))
740 {
741 if (U_FAILURE(status))
742 {
743 log_err("iter2->next() returned an error. %s\n", myErrorName(status));
744 ucol_closeElements(iter2);
745 ucol_closeElements(iter1);
746 ucol_close(en_us);
747 return;
748 }
749
750 c = ucol_next(iter2, &status);
751 }
752
753 /* Now set it to point to the same string as the first iterator */
754 ucol_setText(iter2, test1, u_strlen(test1), &status);
755 if (U_FAILURE(status))
756 {
757 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
758 }
759 else
760 {
761 assertEqual(iter1, iter2);
762 }
763
764 /* Now set it to point to a null string with fake length*/
765 ucol_setText(iter2, NULL, 2, &status);
766 if (U_FAILURE(status))
767 {
768 log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status));
769 }
770 else
771 {
772 if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
773 log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
774 }
775 }
776
777 ucol_closeElements(iter2);
778 ucol_closeElements(iter1);
779 ucol_close(en_us);
b75a7d8f
A
780}
781
b75a7d8f
A
782/** @bug 4108762
783 * Test for getMaxExpansion()
784 */
785static void TestMaxExpansion()
786{
787 UErrorCode status = U_ZERO_ERROR;
788 UCollator *coll ;/*= ucol_open("en_US", &status);*/
789 UChar ch = 0;
374ca955
A
790 UChar32 unassigned = 0xEFFFD;
791 UChar supplementary[2];
729e4ab9 792 uint32_t stringOffset = 0;
374ca955 793 UBool isError = FALSE;
b75a7d8f
A
794 uint32_t sorder = 0;
795 UCollationElements *iter ;/*= ucol_openElements(coll, &ch, 1, &status);*/
796 uint32_t temporder = 0;
797
798 UChar rule[256];
799 u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
800 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
801 UCOL_DEFAULT_STRENGTH,NULL, &status);
802 if(U_SUCCESS(status) && coll) {
803 iter = ucol_openElements(coll, &ch, 1, &status);
804
805 while (ch < 0xFFFF && U_SUCCESS(status)) {
806 int count = 1;
807 uint32_t order;
808 int32_t size = 0;
809
810 ch ++;
811
812 ucol_setText(iter, &ch, 1, &status);
813 order = ucol_previous(iter, &status);
814
815 /* thai management */
816 if (order == 0)
817 order = ucol_previous(iter, &status);
818
819 while (U_SUCCESS(status) &&
820 ucol_previous(iter, &status) != UCOL_NULLORDER) {
821 count ++;
822 }
823
824 size = ucol_getMaxExpansion(iter, order);
825 if (U_FAILURE(status) || size < count) {
826 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
827 ch, count);
828 }
829 }
830
831 /* testing for exact max expansion */
832 ch = 0;
833 while (ch < 0x61) {
834 uint32_t order;
835 int32_t size;
836 ucol_setText(iter, &ch, 1, &status);
837 order = ucol_previous(iter, &status);
838 size = ucol_getMaxExpansion(iter, order);
839 if (U_FAILURE(status) || size != 1) {
840 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
841 ch, 1);
842 }
843 ch ++;
844 }
845
846 ch = 0x63;
847 ucol_setText(iter, &ch, 1, &status);
848 temporder = ucol_previous(iter, &status);
849
850 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
851 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
852 ch, 3);
853 }
854
855 ch = 0x64;
856 ucol_setText(iter, &ch, 1, &status);
857 temporder = ucol_previous(iter, &status);
858
859 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
860 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
861 ch, 3);
862 }
863
729e4ab9 864 U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
b75a7d8f
A
865 ucol_setText(iter, supplementary, 2, &status);
866 sorder = ucol_previous(iter, &status);
867
868 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
869 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
870 ch, 2);
871 }
872
873 /* testing jamo */
874 ch = 0x1165;
875
876 ucol_setText(iter, &ch, 1, &status);
877 temporder = ucol_previous(iter, &status);
878 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
879 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
880 ch, 3);
881 }
882
883 ucol_closeElements(iter);
884 ucol_close(coll);
885
886 /* testing special jamo &a<\u1160 */
887 rule[0] = 0x26;
888 rule[1] = 0x71;
889 rule[2] = 0x3c;
890 rule[3] = 0x1165;
891 rule[4] = 0x2f;
892 rule[5] = 0x71;
893 rule[6] = 0x71;
894 rule[7] = 0x71;
895 rule[8] = 0x71;
896 rule[9] = 0;
897
898 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
899 UCOL_DEFAULT_STRENGTH,NULL, &status);
900 iter = ucol_openElements(coll, &ch, 1, &status);
901
902 temporder = ucol_previous(iter, &status);
903 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
904 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
905 ch, 5);
906 }
907
908 ucol_closeElements(iter);
909 ucol_close(coll);
910 } else {
729e4ab9 911 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
b75a7d8f
A
912 }
913
914}
915
b75a7d8f
A
916
917static void assertEqual(UCollationElements *i1, UCollationElements *i2)
918{
919 int32_t c1, c2;
920 int32_t count = 0;
921 UErrorCode status = U_ZERO_ERROR;
922
923 do
924 {
925 c1 = ucol_next(i1, &status);
926 c2 = ucol_next(i2, &status);
927
928 if (c1 != c2)
929 {
930 log_err("Error in iteration %d assetEqual between\n %d and %d, they are not equal\n", count, c1, c2);
931 break;
932 }
933
934 count += 1;
935 }
936 while (c1 != UCOL_NULLORDER);
937}
938
939/**
940 * Testing iterators with extremely small buffers
941 */
942static void TestSmallBuffer()
943{
944 UErrorCode status = U_ZERO_ERROR;
945 UCollator *coll;
946 UCollationElements *testiter,
947 *iter;
948 int32_t count = 0;
46f4442e 949 OrderAndOffset *testorders,
b75a7d8f
A
950 *orders;
951
952 UChar teststr[500];
953 UChar str[] = {0x300, 0x31A, 0};
954 /*
955 creating a long string of decomposable characters,
956 since by default the writable buffer is of size 256
957 */
958 while (count < 500) {
959 if ((count & 1) == 0) {
960 teststr[count ++] = 0x300;
961 }
962 else {
963 teststr[count ++] = 0x31A;
964 }
965 }
966
967 coll = ucol_open("th_TH", &status);
968 if(U_SUCCESS(status) && coll) {
969 testiter = ucol_openElements(coll, teststr, 500, &status);
970 iter = ucol_openElements(coll, str, 2, &status);
971
972 orders = getOrders(iter, &count);
973 if (count != 2) {
974 log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
975 }
976
977 /*
978 this will rearrange the string data to 250 characters of 0x300 first then
979 250 characters of 0x031A
980 */
981 testorders = getOrders(testiter, &count);
982
983 if (count != 500) {
984 log_err("Error decomposition does not give the right sized collation elements\n");
985 }
986
987 while (count != 0) {
988 /* UCA collation element for 0x0F76 */
46f4442e
A
989 if ((count > 250 && testorders[-- count].order != orders[1].order) ||
990 (count <= 250 && testorders[-- count].order != orders[0].order)) {
b75a7d8f
A
991 log_err("Error decomposition does not give the right collation element at %d count\n", count);
992 break;
993 }
994 }
995
996 free(testorders);
997 free(orders);
998
999 ucol_reset(testiter);
b75a7d8f
A
1000
1001 /* ensures closing of elements done properly to clear writable buffer */
1002 ucol_next(testiter, &status);
1003 ucol_next(testiter, &status);
1004 ucol_closeElements(testiter);
1005 ucol_closeElements(iter);
1006 ucol_close(coll);
1007 } else {
729e4ab9 1008 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
b75a7d8f
A
1009 }
1010}
1011
1012/**
1013* Sniplets of code from genuca
1014*/
1015static int32_t hex2num(char hex) {
1016 if(hex>='0' && hex <='9') {
1017 return hex-'0';
1018 } else if(hex>='a' && hex<='f') {
1019 return hex-'a'+10;
1020 } else if(hex>='A' && hex<='F') {
1021 return hex-'A'+10;
1022 } else {
1023 return 0;
1024 }
1025}
1026
1027/**
1028* Getting codepoints from a string
1029* @param str character string contain codepoints seperated by space and ended
1030* by a semicolon
1031* @param codepoints array for storage, assuming size > 5
1032* @return position at the end of the codepoint section
1033*/
729e4ab9
A
1034static char *getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) {
1035 UErrorCode errorCode = U_ZERO_ERROR;
1036 char *semi = uprv_strchr(str, ';');
1037 char *pipe = uprv_strchr(str, '|');
1038 char *s;
1039 *codepoints = 0;
1040 *contextCPs = 0;
1041 if(semi == NULL) {
1042 log_err("expected semicolon after code point string in FractionalUCA.txt %s\n", str);
1043 return str;
1044 }
1045 if(pipe != NULL) {
1046 int32_t contextLength;
1047 *pipe = 0;
1048 contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode);
1049 *pipe = '|';
1050 if(U_FAILURE(errorCode)) {
1051 log_err("error parsing precontext string from FractionalUCA.txt %s\n", str);
1052 return str;
46f4442e 1053 }
729e4ab9
A
1054 /* prepend the precontext string to the codepoints */
1055 u_memcpy(codepoints, contextCPs, contextLength);
1056 codepoints += contextLength;
1057 /* start of the code point string */
1058 s = pipe + 1;
1059 } else {
1060 s = str;
46f4442e 1061 }
729e4ab9
A
1062 u_parseString(s, codepoints, 99, NULL, &errorCode);
1063 if(U_FAILURE(errorCode)) {
1064 log_err("error parsing code point string from FractionalUCA.txt %s\n", str);
1065 return str;
b75a7d8f 1066 }
729e4ab9 1067 return semi + 1;
b75a7d8f
A
1068}
1069
1070/**
1071* Sniplets of code from genuca
1072*/
1073static int32_t
1074readElement(char **from, char *to, char separator, UErrorCode *status)
1075{
1076 if (U_SUCCESS(*status)) {
1077 char buffer[1024];
1078 int32_t i = 0;
1079 while (**from != separator) {
1080 if (**from != ' ') {
1081 *(buffer+i++) = **from;
1082 }
1083 (*from)++;
1084 }
1085 (*from)++;
1086 *(buffer + i) = 0;
1087 strcpy(to, buffer);
1088 return i/2;
1089 }
1090
1091 return 0;
1092}
1093
1094/**
1095* Sniplets of code from genuca
1096*/
1097static uint32_t
1098getSingleCEValue(char *primary, char *secondary, char *tertiary,
1099 UErrorCode *status)
1100{
1101 if (U_SUCCESS(*status)) {
1102 uint32_t value = 0;
1103 char primsave = '\0';
1104 char secsave = '\0';
1105 char tersave = '\0';
1106 char *primend = primary+4;
1107 char *secend = secondary+2;
1108 char *terend = tertiary+2;
1109 uint32_t primvalue;
1110 uint32_t secvalue;
1111 uint32_t tervalue;
1112
1113 if (uprv_strlen(primary) > 4) {
1114 primsave = *primend;
1115 *primend = '\0';
1116 }
1117
1118 if (uprv_strlen(secondary) > 2) {
1119 secsave = *secend;
1120 *secend = '\0';
1121 }
1122
1123 if (uprv_strlen(tertiary) > 2) {
1124 tersave = *terend;
1125 *terend = '\0';
1126 }
1127
1128 primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0;
1129 secvalue = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0;
1130 tervalue = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0;
1131 if(primvalue <= 0xFF) {
1132 primvalue <<= 8;
1133 }
1134
1135 value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)
1136 | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)
1137 | (tervalue & UCOL_TERTIARYORDERMASK);
1138
1139 if(primsave!='\0') {
1140 *primend = primsave;
1141 }
1142 if(secsave!='\0') {
1143 *secend = secsave;
1144 }
1145 if(tersave!='\0') {
1146 *terend = tersave;
1147 }
1148 return value;
1149 }
1150 return 0;
1151}
1152
1153/**
1154* Getting collation elements generated from a string
1155* @param str character string contain collation elements contained in [] and
1156* seperated by space
1157* @param ce array for storage, assuming size > 20
1158* @param status error status
1159* @return position at the end of the codepoint section
1160*/
1161static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) {
1162 char *pStartCP = uprv_strchr(str, '[');
1163 int count = 0;
1164 char *pEndCP;
1165 char primary[100];
1166 char secondary[100];
1167 char tertiary[100];
1168
1169 while (*pStartCP == '[') {
1170 uint32_t primarycount = 0;
1171 uint32_t secondarycount = 0;
1172 uint32_t tertiarycount = 0;
1173 uint32_t CEi = 1;
1174 pEndCP = strchr(pStartCP, ']');
1175 if(pEndCP == NULL) {
1176 break;
1177 }
1178 pStartCP ++;
1179
1180 primarycount = readElement(&pStartCP, primary, ',', status);
1181 secondarycount = readElement(&pStartCP, secondary, ',', status);
1182 tertiarycount = readElement(&pStartCP, tertiary, ']', status);
1183
1184 /* I want to get the CEs entered right here, including continuation */
1185 ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);
1186 if (U_FAILURE(*status)) {
1187 break;
1188 }
1189
1190 while (2 * CEi < primarycount || CEi < secondarycount ||
1191 CEi < tertiarycount) {
1192 uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
1193 if (2 * CEi < primarycount) {
1194 value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28);
1195 value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24);
1196 }
1197
1198 if (2 * CEi + 1 < primarycount) {
1199 value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20);
1200 value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16);
1201 }
1202
1203 if (CEi < secondarycount) {
1204 value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12);
1205 value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8);
1206 }
1207
1208 if (CEi < tertiarycount) {
1209 value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4);
1210 value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF);
1211 }
1212
1213 CEi ++;
1214 ces[count ++] = value;
1215 }
1216
1217 pStartCP = pEndCP + 1;
1218 }
1219 ces[count] = 0;
1220 return pStartCP;
1221}
1222
1223/**
1224* Getting the FractionalUCA.txt file stream
1225*/
1226static FileStream * getFractionalUCA(void)
1227{
1228 char newPath[256];
1229 char backupPath[256];
1230 FileStream *result = NULL;
1231
1232 /* Look inside ICU_DATA first */
374ca955 1233 uprv_strcpy(newPath, ctest_dataSrcDir());
b75a7d8f
A
1234 uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );
1235 uprv_strcat(newPath, "FractionalUCA.txt");
1236
1237 /* As a fallback, try to guess where the source data was located
1238 * at the time ICU was built, and look there.
1239 */
1240#if defined (U_TOPSRCDIR)
1241 strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data");
1242#else
1243 {
1244 UErrorCode errorCode = U_ZERO_ERROR;
1245 strcpy(backupPath, loadTestData(&errorCode));
1246 strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
1247 }
1248#endif
1249 strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt");
1250
1251 result = T_FileStream_open(newPath, "rb");
1252
1253 if (result == NULL) {
1254 result = T_FileStream_open(backupPath, "rb");
1255 if (result == NULL) {
1256 log_err("Failed to open either %s or %s\n", newPath, backupPath);
1257 }
1258 }
1259 return result;
1260}
1261
1262/**
1263* Testing the CEs returned by the iterator
1264*/
1265static void TestCEs() {
1266 FileStream *file = NULL;
729e4ab9 1267 char line[2048];
b75a7d8f 1268 char *str;
46f4442e 1269 UChar codepoints[10];
b75a7d8f
A
1270 uint32_t ces[20];
1271 UErrorCode status = U_ZERO_ERROR;
1272 UCollator *coll = ucol_open("", &status);
1273 uint32_t lineNo = 0;
46f4442e 1274 UChar contextCPs[5];
b75a7d8f
A
1275
1276 if (U_FAILURE(status)) {
729e4ab9 1277 log_err_status(status, "Error in opening root collator -> %s\n", u_errorName(status));
b75a7d8f
A
1278 return;
1279 }
1280
1281 file = getFractionalUCA();
1282
1283 if (file == NULL) {
1284 log_err("*** unable to open input FractionalUCA.txt file ***\n");
1285 return;
1286 }
1287
1288
1289 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1290 int count = 0;
1291 UCollationElements *iter;
46f4442e 1292 int32_t preContextCeLen=0;
b75a7d8f
A
1293 lineNo++;
1294 /* skip this line if it is empty or a comment or is a return value
1295 or start of some variable section */
1296 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1297 line[0] == 0x000D || line[0] == '[') {
1298 continue;
1299 }
1300
46f4442e 1301 str = getCodePoints(line, codepoints, contextCPs);
b75a7d8f
A
1302
1303 /* these are 'fake' codepoints in the fractional UCA, and are used just
1304 * for positioning of indirect values. They should not go through this
1305 * test.
1306 */
1307 if(*codepoints == 0xFDD0) {
1308 continue;
1309 }
46f4442e
A
1310 if (*contextCPs != 0) {
1311 iter = ucol_openElements(coll, contextCPs, -1, &status);
1312 if (U_FAILURE(status)) {
1313 log_err("Error in opening collation elements\n");
1314 break;
1315 }
1316 while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t)UCOL_NULLORDER) {
1317 preContextCeLen++;
1318 }
1319 ucol_closeElements(iter);
1320 }
b75a7d8f 1321
46f4442e 1322 getCEs(str, ces+preContextCeLen, &status);
b75a7d8f
A
1323 if (U_FAILURE(status)) {
1324 log_err("Error in parsing collation elements in FractionalUCA.txt\n");
1325 break;
1326 }
1327 iter = ucol_openElements(coll, codepoints, -1, &status);
1328 if (U_FAILURE(status)) {
1329 log_err("Error in opening collation elements\n");
1330 break;
1331 }
1332 for (;;) {
1333 uint32_t ce = (uint32_t)ucol_next(iter, &status);
1334 if (ce == 0xFFFFFFFF) {
1335 ce = 0;
1336 }
1337 /* we now unconditionally reorder Thai/Lao prevowels, so this
1338 * test would fail if we don't skip here.
1339 */
1340 if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) {
1341 continue;
1342 }
1343 if (ce != ces[count] || U_FAILURE(status)) {
1344 log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
1345 break;
1346 }
1347 if (ces[count] == 0) {
1348 break;
1349 }
1350 count ++;
1351 }
1352 ucol_closeElements(iter);
1353 }
1354
1355 T_FileStream_close(file);
1356 ucol_close(coll);
1357}
1358
1359/**
1360* Testing the discontigous contractions
1361*/
1362static void TestDiscontiguos() {
1363 const char *rulestr =
1364 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1365 UChar rule[50];
1366 int rulelen = u_unescape(rulestr, rule, 50);
1367 const char *src[] = {
1368 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1369 /* base character blocked */
1370 "XD\\u0300", "XD\\u0300\\u0315",
1371 /* non blocking combining character */
1372 "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1373 /* blocking combining character */
1374 "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1375 /* contraction prefix */
1376 "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1377 "X\\u0300\\u031A\\u0315",
1378 /* ends not with a contraction character */
1379 "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1380 "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1381 };
1382 const char *tgt[] = {
1383 /* non blocking combining character */
1384 "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1385 /* base character blocked */
1386 "X D \\u0300", "X D \\u0300\\u0315",
1387 /* non blocking combining character */
1388 "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1389 /* blocking combining character */
1390 "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1391 /* contraction prefix */
1392 "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1393 "X\\u0300 \\u031A \\u0315",
1394 /* ends not with a contraction character */
1395 "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1396 "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1397 };
1398 int size = 20;
1399 UCollator *coll;
1400 UErrorCode status = U_ZERO_ERROR;
1401 int count = 0;
1402 UCollationElements *iter;
1403 UCollationElements *resultiter;
1404
1405 coll = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
1406 iter = ucol_openElements(coll, rule, 1, &status);
1407 resultiter = ucol_openElements(coll, rule, 1, &status);
1408
1409 if (U_FAILURE(status)) {
729e4ab9 1410 log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
b75a7d8f
A
1411 return;
1412 }
1413
1414 while (count < size) {
1415 UChar str[20];
1416 UChar tstr[20];
1417 int strLen = u_unescape(src[count], str, 20);
1418 UChar *s;
1419
1420 ucol_setText(iter, str, strLen, &status);
1421 if (U_FAILURE(status)) {
1422 log_err("Error opening collation iterator\n");
1423 return;
1424 }
1425
1426 u_unescape(tgt[count], tstr, 20);
1427 s = tstr;
1428
1429 log_verbose("count %d\n", count);
1430
1431 for (;;) {
1432 uint32_t ce;
1433 UChar *e = u_strchr(s, 0x20);
1434 if (e == 0) {
1435 e = u_strchr(s, 0);
1436 }
1437 ucol_setText(resultiter, s, (int32_t)(e - s), &status);
1438 ce = ucol_next(resultiter, &status);
1439 if (U_FAILURE(status)) {
1440 log_err("Error manipulating collation iterator\n");
1441 return;
1442 }
1443 while (ce != UCOL_NULLORDER) {
1444 if (ce != (uint32_t)ucol_next(iter, &status) ||
1445 U_FAILURE(status)) {
1446 log_err("Discontiguos contraction test mismatch\n");
1447 return;
1448 }
1449 ce = ucol_next(resultiter, &status);
1450 if (U_FAILURE(status)) {
1451 log_err("Error getting next collation element\n");
1452 return;
1453 }
1454 }
1455 s = e + 1;
1456 if (*e == 0) {
1457 break;
1458 }
1459 }
1460 ucol_reset(iter);
1461 backAndForth(iter);
1462 count ++;
1463 }
1464 ucol_closeElements(resultiter);
1465 ucol_closeElements(iter);
1466 ucol_close(coll);
1467}
1468
1469static void TestCEBufferOverflow()
1470{
1471 UChar str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];
1472 UErrorCode status = U_ZERO_ERROR;
1473 UChar rule[10];
1474 UCollator *coll;
1475 UCollationElements *iter;
1476
1477 u_uastrcpy(rule, "&z < AB");
1478 coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
1479 if (U_FAILURE(status)) {
729e4ab9 1480 log_err_status(status, "Rule based collator not created for testing ce buffer overflow -> %s\n", u_errorName(status));
b75a7d8f
A
1481 return;
1482 }
1483
1484 /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
1485 test. this will cause an overflow in getPrev */
1486 str[0] = 0x0041; /* 'A' */
1487 /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/
1488 uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);
1489 str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042; /* 'B' */
1490 iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,
1491 &status);
46f4442e
A
1492 if (ucol_previous(iter, &status) == UCOL_NULLORDER ||
1493 status == U_BUFFER_OVERFLOW_ERROR) {
1494 log_err("CE buffer should not overflow with long string of trail surrogates\n");
b75a7d8f
A
1495 }
1496 ucol_closeElements(iter);
1497 ucol_close(coll);
1498}
1499
1500/**
729e4ab9 1501* Checking collation element validity.
b75a7d8f 1502*/
729e4ab9
A
1503#define MAX_CODEPOINTS_TO_SHOW 10
1504static void showCodepoints(const UChar *codepoints, int length, char * codepointText) {
1505 int i, lengthToUse = length;
1506 if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) {
1507 lengthToUse = MAX_CODEPOINTS_TO_SHOW;
1508 }
1509 for (i = 0; i < lengthToUse; ++i) {
1510 int bytesWritten = sprintf(codepointText, " %04X", *codepoints++);
1511 if (bytesWritten <= 0) {
1512 break;
b75a7d8f 1513 }
729e4ab9 1514 codepointText += bytesWritten;
b75a7d8f 1515 }
729e4ab9
A
1516 if (i < length) {
1517 sprintf(codepointText, " ...");
b75a7d8f 1518 }
b75a7d8f 1519}
b75a7d8f 1520
b75a7d8f 1521static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
729e4ab9 1522 int length)
b75a7d8f
A
1523{
1524 UErrorCode status = U_ZERO_ERROR;
1525 UCollationElements *iter = ucol_openElements(coll, codepoints, length,
1526 &status);
729e4ab9
A
1527 UBool result = FALSE;
1528 UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE;
1529 const char * collLocale;
b75a7d8f
A
1530
1531 if (U_FAILURE(status)) {
1532 log_err("Error creating iterator for testing validity\n");
729e4ab9
A
1533 return FALSE;
1534 }
1535 collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status);
1536 if (U_FAILURE(status) || collLocale==NULL) {
1537 status = U_ZERO_ERROR;
1538 collLocale = "?";
b75a7d8f
A
1539 }
1540
729e4ab9
A
1541 for (;;) {
1542 uint32_t ce = ucol_next(iter, &status);
1543 uint32_t primary, p1, p2, secondary, tertiary;
1544 if (ce == UCOL_NULLORDER) {
1545 result = TRUE;
1546 break;
1547 }
1548 if (ce == 0) {
1549 continue;
1550 }
1551 if (ce == 0x02000202) {
1552 /* special CE for merge-sort character */
1553 if (*codepoints == 0xFFFE /* && length == 1 */) {
1554 /*
1555 * Note: We should check for length==1 but the token parser appears
1556 * to give us trailing NUL characters.
1557 * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet()
1558 * rather than the internal collation rule parser
1559 */
1560 continue;
1561 } else {
1562 log_err("Special 02/02/02 weight for code point U+%04X [len %d] != U+FFFE\n",
1563 (int)*codepoints, (int)length);
1564 break;
1565 }
1566 }
1567 primary = UCOL_PRIMARYORDER(ce);
1568 p1 = primary >> 8;
1569 p2 = primary & 0xFF;
1570 secondary = UCOL_SECONDARYORDER(ce);
1571 tertiary = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION;
1572
1573 if (!isContinuation(ce)) {
1574 if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
1575 log_err("Empty CE %08lX except for case bits\n", (long)ce);
1576 break;
1577 }
1578 if (p1 == 0) {
1579 if (p2 != 0) {
1580 log_err("Primary 00 xx in %08lX\n", (long)ce);
1581 break;
1582 }
1583 primaryDone = TRUE;
1584 } else {
1585 if (p1 <= 2 || p1 >= 0xF0) {
1586 /* Primary first bytes F0..FF are specials. */
1587 log_err("Primary first byte of %08lX out of range\n", (long)ce);
1588 break;
1589 }
1590 if (p2 == 0) {
1591 primaryDone = TRUE;
1592 } else {
1593 if (p2 <= 3 || p2 >= 0xFF) {
1594 /* Primary second bytes 03 and FF are sort key compression terminators. */
1595 log_err("Primary second byte of %08lX out of range\n", (long)ce);
1596 break;
1597 }
1598 primaryDone = FALSE;
1599 }
1600 }
1601 if (secondary == 0) {
1602 if (primary != 0) {
1603 log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce);
1604 break;
1605 }
1606 secondaryDone = TRUE;
1607 } else {
1608 if (secondary <= 2 ||
1609 (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COMMON + 0x80))
1610 ) {
1611 /* Secondary first bytes common+1..+0x80 are used for sort key compression. */
1612 log_err("Secondary byte of %08lX out of range\n", (long)ce);
1613 break;
1614 }
1615 secondaryDone = FALSE;
1616 }
1617 if (tertiary == 0) {
1618 /* We know that ce != 0. */
1619 log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce);
1620 break;
1621 }
1622 if (tertiary <= 2) {
1623 log_err("Tertiary byte of %08lX out of range\n", (long)ce);
1624 break;
1625 }
1626 tertiaryDone = FALSE;
1627 } else {
1628 if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
1629 log_err("Empty continuation %08lX\n", (long)ce);
1630 break;
1631 }
1632 if (primaryDone && primary != 0) {
1633 log_err("Primary was done but continues in %08lX\n", (long)ce);
1634 break;
1635 }
1636 if (p1 == 0) {
1637 if (p2 != 0) {
1638 log_err("Primary 00 xx in %08lX\n", (long)ce);
1639 break;
1640 }
1641 primaryDone = TRUE;
1642 } else {
1643 if (p1 <= 2) {
1644 log_err("Primary first byte of %08lX out of range\n", (long)ce);
1645 break;
1646 }
1647 if (p2 == 0) {
1648 primaryDone = TRUE;
1649 } else {
1650 if (p2 <= 3) {
1651 log_err("Primary second byte of %08lX out of range\n", (long)ce);
1652 break;
1653 }
1654 }
1655 }
1656 if (secondaryDone && secondary != 0) {
1657 log_err("Secondary was done but continues in %08lX\n", (long)ce);
1658 break;
1659 }
1660 if (secondary == 0) {
1661 secondaryDone = TRUE;
1662 } else {
1663 if (secondary <= 2) {
1664 log_err("Secondary byte of %08lX out of range\n", (long)ce);
1665 break;
1666 }
1667 }
1668 if (tertiaryDone && tertiary != 0) {
1669 log_err("Tertiary was done but continues in %08lX\n", (long)ce);
1670 break;
1671 }
1672 if (tertiary == 0) {
1673 tertiaryDone = TRUE;
1674 } else if (tertiary <= 2) {
1675 log_err("Tertiary byte of %08lX out of range\n", (long)ce);
1676 break;
1677 }
1678 }
1679 }
1680 if (!result) {
1681 char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5];
1682 showCodepoints(codepoints, length, codepointText);
1683 log_err("Locale: %s Code point string: %s\n", collLocale, codepointText);
1684 }
1685 ucol_closeElements(iter);
1686 return result;
b75a7d8f
A
1687}
1688
1689static void TestCEValidity()
1690{
1691 /* testing UCA collation elements */
1692 UErrorCode status = U_ZERO_ERROR;
1693 /* en_US has no tailorings */
374ca955 1694 UCollator *coll = ucol_open("root", &status);
b75a7d8f 1695 /* tailored locales */
374ca955
A
1696 char locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"};
1697 const char *loc;
46f4442e 1698 FileStream *file = NULL;
729e4ab9
A
1699 char line[2048];
1700 UChar codepoints[11];
b75a7d8f 1701 int count = 0;
374ca955 1702 int maxCount = 0;
46f4442e 1703 UChar contextCPs[3];
729e4ab9 1704 UChar32 c;
b75a7d8f
A
1705 UParseError parseError;
1706 if (U_FAILURE(status)) {
729e4ab9 1707 log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
b75a7d8f
A
1708 return;
1709 }
1710 log_verbose("Testing UCA elements\n");
46f4442e 1711 file = getFractionalUCA();
b75a7d8f
A
1712 if (file == NULL) {
1713 log_err("Fractional UCA data can not be opened\n");
1714 return;
1715 }
1716
1717 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1718 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1719 line[0] == 0x000D || line[0] == '[') {
1720 continue;
1721 }
1722
46f4442e 1723 getCodePoints(line, codepoints, contextCPs);
729e4ab9 1724 checkCEValidity(coll, codepoints, u_strlen(codepoints));
b75a7d8f
A
1725 }
1726
1727 log_verbose("Testing UCA elements for the whole range of unicode characters\n");
729e4ab9
A
1728 for (c = 0; c <= 0xffff; ++c) {
1729 if (u_isdefined(c)) {
1730 codepoints[0] = (UChar)c;
1731 checkCEValidity(coll, codepoints, 1);
1732 }
1733 }
1734 for (; c <= 0x10ffff; ++c) {
1735 if (u_isdefined(c)) {
1736 int32_t i = 0;
1737 U16_APPEND_UNSAFE(codepoints, i, c);
1738 checkCEValidity(coll, codepoints, i);
b75a7d8f 1739 }
b75a7d8f
A
1740 }
1741
1742 ucol_close(coll);
1743
1744 /* testing tailored collation elements */
1745 log_verbose("Testing tailored elements\n");
729e4ab9 1746 if(getTestOption(QUICK_OPTION)) {
374ca955
A
1747 maxCount = sizeof(locale)/sizeof(locale[0]);
1748 } else {
1749 maxCount = uloc_countAvailable();
1750 }
1751 while (count < maxCount) {
b75a7d8f
A
1752 const UChar *rules = NULL,
1753 *current = NULL;
1754 UChar *rulesCopy = NULL;
1755 int32_t ruleLen = 0;
1756
1757 uint32_t chOffset = 0;
1758 uint32_t chLen = 0;
1759 uint32_t exOffset = 0;
1760 uint32_t exLen = 0;
1761 uint32_t prefixOffset = 0;
1762 uint32_t prefixLen = 0;
1763 UBool startOfRules = TRUE;
1764 UColOptionSet opts;
1765
1766 UColTokenParser src;
1767 uint32_t strength = 0;
1768 uint16_t specs = 0;
729e4ab9 1769 if(getTestOption(QUICK_OPTION)) {
374ca955
A
1770 loc = locale[count];
1771 } else {
1772 loc = uloc_getAvailable(count);
1773 if(!hasCollationElements(loc)) {
1774 count++;
1775 continue;
1776 }
1777 }
1778
729e4ab9
A
1779 uprv_memset(&src, 0, sizeof(UColTokenParser));
1780
374ca955 1781 log_verbose("Testing CEs for %s\n", loc);
b75a7d8f 1782
374ca955 1783 coll = ucol_open(loc, &status);
b75a7d8f 1784 if (U_FAILURE(status)) {
374ca955 1785 log_err("%s collator creation failed\n", loc);
b75a7d8f
A
1786 return;
1787 }
1788
1789 src.opts = &opts;
1790 rules = ucol_getRules(coll, &ruleLen);
1791
1792 if (ruleLen > 0) {
729e4ab9 1793 rulesCopy = (UChar *)uprv_malloc((ruleLen +
b75a7d8f
A
1794 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
1795 uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
1796 src.current = src.source = rulesCopy;
1797 src.end = rulesCopy + ruleLen;
1798 src.extraCurrent = src.end;
1799 src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1800
729e4ab9
A
1801 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1802 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
b75a7d8f
A
1803 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
1804 strength = src.parsedToken.strength;
1805 chOffset = src.parsedToken.charsOffset;
1806 chLen = src.parsedToken.charsLen;
1807 exOffset = src.parsedToken.extensionOffset;
1808 exLen = src.parsedToken.extensionLen;
1809 prefixOffset = src.parsedToken.prefixOffset;
1810 prefixLen = src.parsedToken.prefixLen;
1811 specs = src.parsedToken.flags;
1812
1813 startOfRules = FALSE;
1814 uprv_memcpy(codepoints, src.source + chOffset,
1815 chLen * sizeof(UChar));
1816 codepoints[chLen] = 0;
729e4ab9 1817 checkCEValidity(coll, codepoints, chLen);
b75a7d8f 1818 }
729e4ab9 1819 uprv_free(src.source);
b75a7d8f
A
1820 }
1821
1822 ucol_close(coll);
1823 count ++;
1824 }
1825 T_FileStream_close(file);
1826}
1827
1828static void printSortKeyError(const UChar *codepoints, int length,
1829 uint8_t *sortkey, int sklen)
1830{
1831 int count = 0;
1832 log_err("Sortkey not valid for ");
1833 while (length > 0) {
1834 log_err("0x%04x ", *codepoints);
1835 length --;
1836 codepoints ++;
1837 }
1838 log_err("\nSortkey : ");
1839 while (count < sklen) {
1840 log_err("0x%02x ", sortkey[count]);
1841 count ++;
1842 }
1843 log_err("\n");
1844}
1845
1846/**
1847* Checking sort key validity for all levels
1848*/
1849static UBool checkSortKeyValidity(UCollator *coll,
1850 const UChar *codepoints,
1851 int length)
1852{
1853 UErrorCode status = U_ZERO_ERROR;
1854 UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,
1855 UCOL_TERTIARY, UCOL_QUATERNARY,
1856 UCOL_IDENTICAL};
1857 int strengthlen = 5;
729e4ab9 1858 int strengthIndex = 0;
b75a7d8f
A
1859 int caselevel = 0;
1860
1861 while (caselevel < 1) {
1862 if (caselevel == 0) {
1863 ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
1864 }
1865 else {
1866 ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
1867 }
1868
729e4ab9 1869 while (strengthIndex < strengthlen) {
b75a7d8f
A
1870 int count01 = 0;
1871 uint32_t count = 0;
1872 uint8_t sortkey[128];
1873 uint32_t sklen;
1874
729e4ab9 1875 ucol_setStrength(coll, strength[strengthIndex]);
b75a7d8f
A
1876 sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);
1877 while (sortkey[count] != 0) {
729e4ab9 1878 if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && strengthIndex != 4)) {
b75a7d8f
A
1879 printSortKeyError(codepoints, length, sortkey, sklen);
1880 return FALSE;
1881 }
1882 if (sortkey[count] == 1) {
1883 count01 ++;
1884 }
1885 count ++;
1886 }
1887
729e4ab9 1888 if (count + 1 != sklen || (count01 != strengthIndex + caselevel)) {
b75a7d8f
A
1889 printSortKeyError(codepoints, length, sortkey, sklen);
1890 return FALSE;
1891 }
729e4ab9 1892 strengthIndex ++;
b75a7d8f
A
1893 }
1894 caselevel ++;
1895 }
1896 return TRUE;
1897}
1898
1899static void TestSortKeyValidity(void)
1900{
1901 /* testing UCA collation elements */
1902 UErrorCode status = U_ZERO_ERROR;
1903 /* en_US has no tailorings */
1904 UCollator *coll = ucol_open("en_US", &status);
1905 /* tailored locales */
46f4442e
A
1906 char locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};
1907 FileStream *file = NULL;
729e4ab9 1908 char line[2048];
b75a7d8f
A
1909 UChar codepoints[10];
1910 int count = 0;
46f4442e 1911 UChar contextCPs[5];
b75a7d8f
A
1912 UParseError parseError;
1913 if (U_FAILURE(status)) {
729e4ab9 1914 log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
b75a7d8f
A
1915 return;
1916 }
1917 log_verbose("Testing UCA elements\n");
46f4442e 1918 file = getFractionalUCA();
b75a7d8f
A
1919 if (file == NULL) {
1920 log_err("Fractional UCA data can not be opened\n");
1921 return;
1922 }
1923
1924 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1925 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1926 line[0] == 0x000D || line[0] == '[') {
1927 continue;
1928 }
1929
46f4442e 1930 getCodePoints(line, codepoints, contextCPs);
729e4ab9
A
1931 if(codepoints[0] == 0xFFFE) {
1932 /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
1933 continue;
1934 }
b75a7d8f
A
1935 checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
1936 }
1937
1938 log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1939 codepoints[0] = 0;
1940
1941 while (codepoints[0] < 0xFFFF) {
1942 if (u_isdefined((UChar32)codepoints[0])) {
1943 checkSortKeyValidity(coll, codepoints, 1);
1944 }
1945 codepoints[0] ++;
1946 }
1947
1948 ucol_close(coll);
1949
1950 /* testing tailored collation elements */
1951 log_verbose("Testing tailored elements\n");
1952 while (count < 5) {
1953 const UChar *rules = NULL,
1954 *current = NULL;
1955 UChar *rulesCopy = NULL;
1956 int32_t ruleLen = 0;
1957
1958 uint32_t chOffset = 0;
1959 uint32_t chLen = 0;
1960 uint32_t exOffset = 0;
1961 uint32_t exLen = 0;
1962 uint32_t prefixOffset = 0;
1963 uint32_t prefixLen = 0;
1964 UBool startOfRules = TRUE;
1965 UColOptionSet opts;
1966
1967 UColTokenParser src;
1968 uint32_t strength = 0;
1969 uint16_t specs = 0;
1970
729e4ab9
A
1971 uprv_memset(&src, 0, sizeof(UColTokenParser));
1972
b75a7d8f
A
1973 coll = ucol_open(locale[count], &status);
1974 if (U_FAILURE(status)) {
1975 log_err("%s collator creation failed\n", locale[count]);
1976 return;
1977 }
1978
1979 src.opts = &opts;
1980 rules = ucol_getRules(coll, &ruleLen);
1981
1982 if (ruleLen > 0) {
729e4ab9 1983 rulesCopy = (UChar *)uprv_malloc((ruleLen +
b75a7d8f
A
1984 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
1985 uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
1986 src.current = src.source = rulesCopy;
1987 src.end = rulesCopy + ruleLen;
1988 src.extraCurrent = src.end;
1989 src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1990
729e4ab9
A
1991 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1992 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
b75a7d8f
A
1993 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL) {
1994 strength = src.parsedToken.strength;
1995 chOffset = src.parsedToken.charsOffset;
1996 chLen = src.parsedToken.charsLen;
1997 exOffset = src.parsedToken.extensionOffset;
1998 exLen = src.parsedToken.extensionLen;
1999 prefixOffset = src.parsedToken.prefixOffset;
2000 prefixLen = src.parsedToken.prefixLen;
2001 specs = src.parsedToken.flags;
2002
2003 startOfRules = FALSE;
2004 uprv_memcpy(codepoints, src.source + chOffset,
2005 chLen * sizeof(UChar));
2006 codepoints[chLen] = 0;
729e4ab9
A
2007 if(codepoints[0] == 0xFFFE) {
2008 /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
2009 continue;
2010 }
b75a7d8f
A
2011 checkSortKeyValidity(coll, codepoints, chLen);
2012 }
729e4ab9 2013 uprv_free(src.source);
b75a7d8f
A
2014 }
2015
2016 ucol_close(coll);
2017 count ++;
2018 }
2019 T_FileStream_close(file);
2020}
2021
729e4ab9
A
2022/**
2023* TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
2024* normalization on AND jamo tailoring, among other things.
2025*/
2026static const UChar tsceText[] = { /* Nothing in here should be ignorable */
2027 0x0020, 0xAC00, /* simple LV Hangul */
2028 0x0020, 0xAC01, /* simple LVT Hangul */
2029 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */
2030 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search */
2031 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
2032 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
2033 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
2034 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
2035 0x0020, 0x00E6, /* small letter ae, expands */
2036 0x0020, 0x1E4D, /* small letter o with tilde and acute, decomposes */
2037 0x0020
2038};
2039enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) };
2040
2041static const int32_t rootStandardOffsets[] = {
2042 0, 1,2,
2043 2, 3,4,4,
2044 4, 5,6,6,
2045 6, 7,8,8,
2046 8, 9,10,11,
2047 12, 13,14,15,
2048 16, 17,18,19,
2049 20, 21,22,23,
2050 24, 25,26,26,26,
2051 26, 27,28,28,
2052 28,
2053 29
2054};
2055enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandardOffsets[0]) };
2056
2057static const int32_t rootSearchOffsets[] = {
2058 0, 1,2,
2059 2, 3,4,4,
2060 4, 5,6,6,6,
2061 6, 7,8,8,8,8,8,8,
2062 8, 9,10,11,
2063 12, 13,14,15,
2064 16, 17,18,19,20,
2065 20, 21,22,22,23,23,23,24,
2066 24, 25,26,26,26,
2067 26, 27,28,28,
2068 28,
2069 29
2070};
2071enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffsets[0]) };
2072
2073typedef struct {
2074 const char * locale;
2075 const int32_t * offsets;
2076 int32_t offsetsLen;
2077} TSCEItem;
2078
2079static const TSCEItem tsceItems[] = {
2080 { "root", rootStandardOffsets, kLen_rootStandardOffsets },
2081#if 1
2082 /* No jamo tailorings in Apple version of search collator currently */
2083 { "root@collation=search", rootStandardOffsets, kLen_rootStandardOffsets },
2084#else
2085 /* Use this when we do have jamo tailorings */
2086 { "root@collation=search", rootSearchOffsets, kLen_rootSearchOffsets },
2087#endif
2088 { NULL, NULL, 0 }
2089};
2090
2091static void TestSearchCollatorElements(void)
2092{
2093 const TSCEItem * tsceItemPtr;
2094 for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
2095 UErrorCode status = U_ZERO_ERROR;
2096 UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
2097 if ( U_SUCCESS(status) ) {
2098 UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);
2099 if ( U_SUCCESS(status) ) {
2100 int32_t offset, element;
2101 const int32_t * nextOffsetPtr;
2102 const int32_t * limitOffsetPtr;
2103
2104 nextOffsetPtr = tsceItemPtr->offsets;
2105 limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
2106 do {
2107 offset = ucol_getOffset(uce);
2108 element = ucol_next(uce, &status);
2109 if ( element == 0 ) {
2110 log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
2111 }
2112 if ( nextOffsetPtr < limitOffsetPtr ) {
2113 if (offset != *nextOffsetPtr) {
2114 log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
2115 tsceItemPtr->locale, *nextOffsetPtr, offset );
2116 nextOffsetPtr = limitOffsetPtr;
2117 break;
2118 }
2119 nextOffsetPtr++;
2120 } else {
2121 log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );
2122 }
2123 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
2124 if ( nextOffsetPtr < limitOffsetPtr ) {
2125 log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );
2126 }
2127
2128 ucol_setOffset(uce, kLen_tsceText, &status);
2129 status = U_ZERO_ERROR;
2130 nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
2131 limitOffsetPtr = tsceItemPtr->offsets;
2132 do {
2133 offset = ucol_getOffset(uce);
2134 element = ucol_previous(uce, &status);
2135 if ( element == 0 ) {
2136 log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );
2137 }
2138 if ( nextOffsetPtr > limitOffsetPtr ) {
2139 nextOffsetPtr--;
2140 if (offset != *nextOffsetPtr) {
2141 log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
2142 tsceItemPtr->locale, *nextOffsetPtr, offset );
2143 nextOffsetPtr = limitOffsetPtr;
2144 break;
2145 }
2146 } else {
2147 log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
2148 }
2149 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
2150 if ( nextOffsetPtr > limitOffsetPtr ) {
2151 log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );
2152 }
2153
2154 ucol_closeElements(uce);
2155 } else {
2156 log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
2157 }
2158 ucol_close(ucol);
2159 } else {
2160 log_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
2161 }
2162 }
2163}
2164
b75a7d8f 2165#endif /* #if !UCONFIG_NO_COLLATION */