]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/citertst.c
ICU-511.35.tar.gz
[apple/icu.git] / icuSources / test / cintltst / citertst.c
CommitLineData
b75a7d8f
A
1/********************************************************************
2 * COPYRIGHT:
51004dcb 3 * Copyright (c) 1997-2013, International Business Machines Corporation and
b75a7d8f
A
4 * others. All Rights Reserved.
5 ********************************************************************/
6/********************************************************************************
7*
8* File CITERTST.C
9*
10* Modification History:
11* Date Name Description
12* Madhu Katragadda Ported for C API
13* 02/19/01 synwee Modified test case for new collation iterator
14*********************************************************************************/
15/*
16 * Collation Iterator tests.
17 * (Let me reiterate my position...)
18 */
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_COLLATION
23
24#include "unicode/ucol.h"
729e4ab9 25#include "unicode/ucoleitr.h"
b75a7d8f
A
26#include "unicode/uloc.h"
27#include "unicode/uchar.h"
28#include "unicode/ustring.h"
374ca955
A
29#include "unicode/putil.h"
30#include "callcoll.h"
b75a7d8f
A
31#include "cmemory.h"
32#include "cintltst.h"
33#include "citertst.h"
34#include "ccolltst.h"
35#include "filestrm.h"
36#include "cstring.h"
37#include "ucol_imp.h"
38#include "ucol_tok.h"
729e4ab9 39#include "uparse.h"
b75a7d8f
A
40#include <stdio.h>
41
42extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
43
44void addCollIterTest(TestNode** root)
45{
46 addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
47 addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
48 addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
49 addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
50 addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
51 addTest(root, &TestNormalizedUnicodeChar,
52 "tscoll/citertst/TestNormalizedUnicodeChar");
53 addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
54 addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
55 addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
56 addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
57 addTest(root, &TestCEs, "tscoll/citertst/TestCEs");
58 addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
59 addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
60 addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
61 addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
729e4ab9 62 addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
b75a7d8f
A
63}
64
65/* The locales we support */
66
67static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
68
69static void TestBug672() {
70 UErrorCode status = U_ZERO_ERROR;
71 UChar pattern[20];
72 UChar text[50];
73 int i;
74 int result[3][3];
75
76 u_uastrcpy(pattern, "resume");
77 u_uastrcpy(text, "Time to resume updating my resume.");
78
79 for (i = 0; i < 3; ++ i) {
80 UCollator *coll = ucol_open(LOCALES[i], &status);
81 UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
82 &status);
83 UCollationElements *titer = ucol_openElements(coll, text, -1,
84 &status);
85 if (U_FAILURE(status)) {
729e4ab9 86 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
b75a7d8f
A
87 myErrorName(status));
88 return;
89 }
90
91 log_verbose("locale tested %s\n", LOCALES[i]);
92
93 while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
94 U_SUCCESS(status)) {
95 }
96 if (U_FAILURE(status)) {
97 log_err("ERROR: reversing collation iterator :%s\n",
98 myErrorName(status));
99 return;
100 }
101 ucol_reset(pitr);
102
103 ucol_setOffset(titer, u_strlen(pattern), &status);
104 if (U_FAILURE(status)) {
105 log_err("ERROR: setting offset in collator :%s\n",
106 myErrorName(status));
107 return;
108 }
109 result[i][0] = ucol_getOffset(titer);
110 log_verbose("Text iterator set to offset %d\n", result[i][0]);
111
112 /* Use previous() */
113 ucol_previous(titer, &status);
114 result[i][1] = ucol_getOffset(titer);
115 log_verbose("Current offset %d after previous\n", result[i][1]);
116
117 /* Add one to index */
118 log_verbose("Adding one to current offset...\n");
119 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
120 if (U_FAILURE(status)) {
121 log_err("ERROR: setting offset in collator :%s\n",
122 myErrorName(status));
123 return;
124 }
125 result[i][2] = ucol_getOffset(titer);
126 log_verbose("Current offset in text = %d\n", result[i][2]);
127 ucol_closeElements(pitr);
128 ucol_closeElements(titer);
129 ucol_close(coll);
130 }
131
132 if (uprv_memcmp(result[0], result[1], 3) != 0 ||
133 uprv_memcmp(result[1], result[2], 3) != 0) {
134 log_err("ERROR: Different locales have different offsets at the same character\n");
135 }
136}
137
138
139
140/* Running this test with normalization enabled showed up a bug in the incremental
141 normalization code. */
142static void TestBug672Normalize() {
143 UErrorCode status = U_ZERO_ERROR;
144 UChar pattern[20];
145 UChar text[50];
146 int i;
147 int result[3][3];
148
149 u_uastrcpy(pattern, "resume");
150 u_uastrcpy(text, "Time to resume updating my resume.");
151
152 for (i = 0; i < 3; ++ i) {
153 UCollator *coll = ucol_open(LOCALES[i], &status);
154 UCollationElements *pitr = NULL;
155 UCollationElements *titer = NULL;
156
157 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
158
159 pitr = ucol_openElements(coll, pattern, -1, &status);
160 titer = ucol_openElements(coll, text, -1, &status);
161 if (U_FAILURE(status)) {
729e4ab9 162 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
b75a7d8f
A
163 myErrorName(status));
164 return;
165 }
166
167 log_verbose("locale tested %s\n", LOCALES[i]);
168
169 while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
170 U_SUCCESS(status)) {
171 }
172 if (U_FAILURE(status)) {
173 log_err("ERROR: reversing collation iterator :%s\n",
174 myErrorName(status));
175 return;
176 }
177 ucol_reset(pitr);
178
179 ucol_setOffset(titer, u_strlen(pattern), &status);
180 if (U_FAILURE(status)) {
181 log_err("ERROR: setting offset in collator :%s\n",
182 myErrorName(status));
183 return;
184 }
185 result[i][0] = ucol_getOffset(titer);
186 log_verbose("Text iterator set to offset %d\n", result[i][0]);
187
188 /* Use previous() */
189 ucol_previous(titer, &status);
190 result[i][1] = ucol_getOffset(titer);
191 log_verbose("Current offset %d after previous\n", result[i][1]);
192
193 /* Add one to index */
194 log_verbose("Adding one to current offset...\n");
195 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
196 if (U_FAILURE(status)) {
197 log_err("ERROR: setting offset in collator :%s\n",
198 myErrorName(status));
199 return;
200 }
201 result[i][2] = ucol_getOffset(titer);
202 log_verbose("Current offset in text = %d\n", result[i][2]);
203 ucol_closeElements(pitr);
204 ucol_closeElements(titer);
205 ucol_close(coll);
206 }
207
208 if (uprv_memcmp(result[0], result[1], 3) != 0 ||
209 uprv_memcmp(result[1], result[2], 3) != 0) {
210 log_err("ERROR: Different locales have different offsets at the same character\n");
211 }
212}
213
214
215
216
217/**
218 * Test for CollationElementIterator previous and next for the whole set of
219 * unicode characters.
220 */
221static void TestUnicodeChar()
222{
223 UChar source[0x100];
224 UCollator *en_us;
225 UCollationElements *iter;
226 UErrorCode status = U_ZERO_ERROR;
227 UChar codepoint;
228
229 UChar *test;
230 en_us = ucol_open("en_US", &status);
231 if (U_FAILURE(status)){
729e4ab9 232 log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
b75a7d8f
A
233 myErrorName(status));
234 return;
235 }
236
237 for (codepoint = 1; codepoint < 0xFFFE;)
238 {
239 test = source;
240
241 while (codepoint % 0xFF != 0)
242 {
243 if (u_isdefined(codepoint))
244 *(test ++) = codepoint;
245 codepoint ++;
246 }
247
248 if (u_isdefined(codepoint))
249 *(test ++) = codepoint;
250
251 if (codepoint != 0xFFFF)
252 codepoint ++;
253
254 *test = 0;
255 iter=ucol_openElements(en_us, source, u_strlen(source), &status);
256 if(U_FAILURE(status)){
257 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
258 myErrorName(status));
259 ucol_close(en_us);
260 return;
261 }
262 /* A basic test to see if it's working at all */
263 log_verbose("codepoint testing %x\n", codepoint);
264 backAndForth(iter);
265 ucol_closeElements(iter);
266
267 /* null termination test */
268 iter=ucol_openElements(en_us, source, -1, &status);
269 if(U_FAILURE(status)){
270 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
271 myErrorName(status));
272 ucol_close(en_us);
273 return;
274 }
275 /* A basic test to see if it's working at all */
276 backAndForth(iter);
277 ucol_closeElements(iter);
278 }
279
280 ucol_close(en_us);
281}
282
283/**
284 * Test for CollationElementIterator previous and next for the whole set of
285 * unicode characters with normalization on.
286 */
287static void TestNormalizedUnicodeChar()
288{
289 UChar source[0x100];
290 UCollator *th_th;
291 UCollationElements *iter;
292 UErrorCode status = U_ZERO_ERROR;
293 UChar codepoint;
294
295 UChar *test;
296 /* thai should have normalization on */
297 th_th = ucol_open("th_TH", &status);
298 if (U_FAILURE(status)){
729e4ab9 299 log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
b75a7d8f
A
300 myErrorName(status));
301 return;
302 }
303
304 for (codepoint = 1; codepoint < 0xFFFE;)
305 {
306 test = source;
307
308 while (codepoint % 0xFF != 0)
309 {
310 if (u_isdefined(codepoint))
311 *(test ++) = codepoint;
312 codepoint ++;
313 }
314
315 if (u_isdefined(codepoint))
316 *(test ++) = codepoint;
317
318 if (codepoint != 0xFFFF)
319 codepoint ++;
320
321 *test = 0;
322 iter=ucol_openElements(th_th, source, u_strlen(source), &status);
323 if(U_FAILURE(status)){
324 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
325 myErrorName(status));
326 ucol_close(th_th);
327 return;
328 }
329
330 backAndForth(iter);
331 ucol_closeElements(iter);
332
333 iter=ucol_openElements(th_th, source, -1, &status);
334 if(U_FAILURE(status)){
335 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
336 myErrorName(status));
337 ucol_close(th_th);
338 return;
339 }
340
341 backAndForth(iter);
342 ucol_closeElements(iter);
343 }
344
345 ucol_close(th_th);
346}
347
348/**
349* Test the incremental normalization
350*/
351static void TestNormalization()
352{
353 UErrorCode status = U_ZERO_ERROR;
354 const char *str =
355 "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
356 UCollator *coll;
357 UChar rule[50];
358 int rulelen = u_unescape(str, rule, 50);
359 int count = 0;
360 const char *testdata[] =
361 {"\\u1ED9", "o\\u0323\\u0302",
362 "\\u0300\\u0315", "\\u0315\\u0300",
363 "A\\u0300\\u0315B", "A\\u0315\\u0300B",
364 "A\\u0316\\u0315B", "A\\u0315\\u0316B",
365 "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
366 "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
367 "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
368 int32_t srclen;
369 UChar source[10];
370 UCollationElements *iter;
371
372 coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
373 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
374 if (U_FAILURE(status)){
729e4ab9 375 log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
b75a7d8f
A
376 myErrorName(status));
377 return;
378 }
379
380 srclen = u_unescape(testdata[0], source, 10);
381 iter = ucol_openElements(coll, source, srclen, &status);
382 backAndForth(iter);
383 ucol_closeElements(iter);
384
385 srclen = u_unescape(testdata[1], source, 10);
386 iter = ucol_openElements(coll, source, srclen, &status);
387 backAndForth(iter);
388 ucol_closeElements(iter);
389
390 while (count < 12) {
391 srclen = u_unescape(testdata[count], source, 10);
392 iter = ucol_openElements(coll, source, srclen, &status);
393
394 if (U_FAILURE(status)){
395 log_err("ERROR: in creation of collator element iterator\n %s\n",
396 myErrorName(status));
397 return;
398 }
399 backAndForth(iter);
400 ucol_closeElements(iter);
401
402 iter = ucol_openElements(coll, source, -1, &status);
403
404 if (U_FAILURE(status)){
405 log_err("ERROR: in creation of collator element iterator\n %s\n",
406 myErrorName(status));
407 return;
408 }
409 backAndForth(iter);
410 ucol_closeElements(iter);
411 count ++;
412 }
413 ucol_close(coll);
414}
415
416/**
417 * Test for CollationElementIterator.previous()
418 *
419 * @bug 4108758 - Make sure it works with contracting characters
420 *
421 */
422static void TestPrevious()
423{
424 UCollator *coll=NULL;
425 UChar rule[50];
426 UChar *source;
427 UCollator *c1, *c2, *c3;
428 UCollationElements *iter;
429 UErrorCode status = U_ZERO_ERROR;
46f4442e
A
430 UChar test1[50];
431 UChar test2[50];
b75a7d8f 432
b75a7d8f
A
433 u_uastrcpy(test1, "What subset of all possible test cases?");
434 u_uastrcpy(test2, "has the highest probability of detecting");
435 coll = ucol_open("en_US", &status);
436
437 iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
438 log_verbose("English locale testing back and forth\n");
439 if(U_FAILURE(status)){
729e4ab9 440 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
b75a7d8f
A
441 myErrorName(status));
442 ucol_close(coll);
443 return;
444 }
445 /* A basic test to see if it's working at all */
446 backAndForth(iter);
447 ucol_closeElements(iter);
448 ucol_close(coll);
449
450 /* Test with a contracting character sequence */
451 u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
452 c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
453
454 log_verbose("Contraction rule testing back and forth with no normalization\n");
455
456 if (c1 == NULL || U_FAILURE(status))
457 {
458 log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
459 myErrorName(status));
460 return;
461 }
462 source=(UChar*)malloc(sizeof(UChar) * 20);
463 u_uastrcpy(source, "abchdcba");
464 iter=ucol_openElements(c1, source, u_strlen(source), &status);
465 if(U_FAILURE(status)){
466 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
467 myErrorName(status));
468 return;
469 }
470 backAndForth(iter);
471 ucol_closeElements(iter);
472 ucol_close(c1);
473
474 /* Test with an expanding character sequence */
475 u_uastrcpy(rule, "&a < b < c/abd < d");
476 c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
477 log_verbose("Expansion rule testing back and forth with no normalization\n");
478 if (c2 == NULL || U_FAILURE(status))
479 {
480 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
481 myErrorName(status));
482 return;
483 }
484 u_uastrcpy(source, "abcd");
485 iter=ucol_openElements(c2, source, u_strlen(source), &status);
486 if(U_FAILURE(status)){
487 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
488 myErrorName(status));
489 return;
490 }
491 backAndForth(iter);
492 ucol_closeElements(iter);
493 ucol_close(c2);
494 /* Now try both */
495 u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
496 c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,NULL, &status);
497 log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
498
499 if (c3 == NULL || U_FAILURE(status))
500 {
501 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
502 myErrorName(status));
503 return;
504 }
505 u_uastrcpy(source, "abcdbchdc");
506 iter=ucol_openElements(c3, source, u_strlen(source), &status);
507 if(U_FAILURE(status)){
508 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
509 myErrorName(status));
510 return;
511 }
512 backAndForth(iter);
513 ucol_closeElements(iter);
514 ucol_close(c3);
515 source[0] = 0x0e41;
516 source[1] = 0x0e02;
517 source[2] = 0x0e41;
518 source[3] = 0x0e02;
519 source[4] = 0x0e27;
520 source[5] = 0x61;
521 source[6] = 0x62;
522 source[7] = 0x63;
523 source[8] = 0;
524
525 coll = ucol_open("th_TH", &status);
526 log_verbose("Thai locale testing back and forth with normalization\n");
527 iter=ucol_openElements(coll, source, u_strlen(source), &status);
528 if(U_FAILURE(status)){
529 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
530 myErrorName(status));
531 return;
532 }
533 backAndForth(iter);
534 ucol_closeElements(iter);
535 ucol_close(coll);
536
537 /* prev test */
538 source[0] = 0x0061;
539 source[1] = 0x30CF;
540 source[2] = 0x3099;
541 source[3] = 0x30FC;
542 source[4] = 0;
543
544 coll = ucol_open("ja_JP", &status);
545 log_verbose("Japanese locale testing back and forth with normalization\n");
546 iter=ucol_openElements(coll, source, u_strlen(source), &status);
547 if(U_FAILURE(status)){
548 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
549 myErrorName(status));
550 return;
551 }
552 backAndForth(iter);
553 ucol_closeElements(iter);
554 ucol_close(coll);
555
556 free(source);
b75a7d8f
A
557}
558
559/**
560 * Test for getOffset() and setOffset()
561 */
562static void TestOffset()
563{
564 UErrorCode status= U_ZERO_ERROR;
565 UCollator *en_us=NULL;
566 UCollationElements *iter, *pristine;
567 int32_t offset;
46f4442e 568 OrderAndOffset *orders;
b75a7d8f
A
569 int32_t orderLength=0;
570 int count = 0;
46f4442e
A
571 UChar test1[50];
572 UChar test2[50];
573
b75a7d8f
A
574 u_uastrcpy(test1, "What subset of all possible test cases?");
575 u_uastrcpy(test2, "has the highest probability of detecting");
576 en_us = ucol_open("en_US", &status);
374ca955 577 log_verbose("Testing getOffset and setOffset for collations\n");
b75a7d8f
A
578 iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
579 if(U_FAILURE(status)){
729e4ab9 580 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
b75a7d8f
A
581 myErrorName(status));
582 ucol_close(en_us);
583 return;
584 }
374ca955
A
585
586 /* testing boundaries */
587 ucol_setOffset(iter, 0, &status);
588 if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
589 log_err("Error: After setting offset to 0, we should be at the end "
590 "of the backwards iteration");
591 }
592 ucol_setOffset(iter, u_strlen(test1), &status);
593 if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
594 log_err("Error: After setting offset to end of the string, we should "
595 "be at the end of the backwards iteration");
596 }
597
b75a7d8f
A
598 /* Run all the way through the iterator, then get the offset */
599
600 orders = getOrders(iter, &orderLength);
601
602 offset = ucol_getOffset(iter);
603
604 if (offset != u_strlen(test1))
605 {
606 log_err("offset at end != length %d vs %d\n", offset,
607 u_strlen(test1) );
608 }
609
610 /* Now set the offset back to the beginning and see if it works */
611 pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
612 if(U_FAILURE(status)){
613 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
614 myErrorName(status));
615 ucol_close(en_us);
616 return;
617 }
618 status = U_ZERO_ERROR;
619
620 ucol_setOffset(iter, 0, &status);
621 if (U_FAILURE(status))
622 {
623 log_err("setOffset failed. %s\n", myErrorName(status));
624 }
625 else
626 {
627 assertEqual(iter, pristine);
628 }
629
630 ucol_closeElements(pristine);
631 ucol_closeElements(iter);
632 free(orders);
633
634 /* testing offsets in normalization buffer */
635 test1[0] = 0x61;
636 test1[1] = 0x300;
637 test1[2] = 0x316;
638 test1[3] = 0x62;
639 test1[4] = 0;
640 ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
641 iter = ucol_openElements(en_us, test1, 4, &status);
642 if(U_FAILURE(status)){
643 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
644 myErrorName(status));
645 ucol_close(en_us);
646 return;
647 }
648
649 count = 0;
650 while (ucol_next(iter, &status) != UCOL_NULLORDER &&
651 U_SUCCESS(status)) {
652 switch (count) {
653 case 0:
654 if (ucol_getOffset(iter) != 1) {
46f4442e 655 log_err("ERROR: Offset of iteration should be 1\n");
b75a7d8f
A
656 }
657 break;
658 case 3:
659 if (ucol_getOffset(iter) != 4) {
660 log_err("ERROR: Offset of iteration should be 4\n");
661 }
662 break;
663 default:
664 if (ucol_getOffset(iter) != 3) {
665 log_err("ERROR: Offset of iteration should be 3\n");
666 }
667 }
668 count ++;
669 }
670
671 ucol_reset(iter);
672 count = 0;
673 while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
674 U_SUCCESS(status)) {
675 switch (count) {
676 case 0:
46f4442e 677 case 1:
b75a7d8f
A
678 if (ucol_getOffset(iter) != 3) {
679 log_err("ERROR: Offset of iteration should be 3\n");
680 }
681 break;
46f4442e
A
682 case 2:
683 if (ucol_getOffset(iter) != 1) {
684 log_err("ERROR: Offset of iteration should be 1\n");
685 }
686 break;
b75a7d8f
A
687 default:
688 if (ucol_getOffset(iter) != 0) {
689 log_err("ERROR: Offset of iteration should be 0\n");
690 }
691 }
692 count ++;
693 }
694
695 if(U_FAILURE(status)){
696 log_err("ERROR: in iterating collation elements %s\n",
697 myErrorName(status));
698 }
699
700 ucol_closeElements(iter);
701 ucol_close(en_us);
b75a7d8f
A
702}
703
704/**
705 * Test for setText()
706 */
707static void TestSetText()
708{
709 int32_t c,i;
710 UErrorCode status = U_ZERO_ERROR;
711 UCollator *en_us=NULL;
712 UCollationElements *iter1, *iter2;
46f4442e
A
713 UChar test1[50];
714 UChar test2[50];
715
b75a7d8f
A
716 u_uastrcpy(test1, "What subset of all possible test cases?");
717 u_uastrcpy(test2, "has the highest probability of detecting");
718 en_us = ucol_open("en_US", &status);
719 log_verbose("testing setText for Collation elements\n");
720 iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
721 if(U_FAILURE(status)){
729e4ab9 722 log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
b75a7d8f
A
723 myErrorName(status));
724 ucol_close(en_us);
725 return;
726 }
727 iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
728 if(U_FAILURE(status)){
729 log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
730 myErrorName(status));
731 ucol_close(en_us);
732 return;
733 }
734
735 /* Run through the second iterator just to exercise it */
736 c = ucol_next(iter2, &status);
737 i = 0;
738
739 while ( ++i < 10 && (c != UCOL_NULLORDER))
740 {
741 if (U_FAILURE(status))
742 {
743 log_err("iter2->next() returned an error. %s\n", myErrorName(status));
744 ucol_closeElements(iter2);
745 ucol_closeElements(iter1);
746 ucol_close(en_us);
747 return;
748 }
749
750 c = ucol_next(iter2, &status);
751 }
752
753 /* Now set it to point to the same string as the first iterator */
754 ucol_setText(iter2, test1, u_strlen(test1), &status);
755 if (U_FAILURE(status))
756 {
757 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
758 }
759 else
760 {
761 assertEqual(iter1, iter2);
762 }
763
764 /* Now set it to point to a null string with fake length*/
765 ucol_setText(iter2, NULL, 2, &status);
766 if (U_FAILURE(status))
767 {
768 log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status));
769 }
770 else
771 {
772 if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
773 log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
774 }
775 }
776
777 ucol_closeElements(iter2);
778 ucol_closeElements(iter1);
779 ucol_close(en_us);
b75a7d8f
A
780}
781
b75a7d8f
A
782/** @bug 4108762
783 * Test for getMaxExpansion()
784 */
785static void TestMaxExpansion()
786{
787 UErrorCode status = U_ZERO_ERROR;
788 UCollator *coll ;/*= ucol_open("en_US", &status);*/
789 UChar ch = 0;
374ca955
A
790 UChar32 unassigned = 0xEFFFD;
791 UChar supplementary[2];
729e4ab9 792 uint32_t stringOffset = 0;
374ca955 793 UBool isError = FALSE;
b75a7d8f
A
794 uint32_t sorder = 0;
795 UCollationElements *iter ;/*= ucol_openElements(coll, &ch, 1, &status);*/
796 uint32_t temporder = 0;
797
798 UChar rule[256];
799 u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
800 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
801 UCOL_DEFAULT_STRENGTH,NULL, &status);
802 if(U_SUCCESS(status) && coll) {
803 iter = ucol_openElements(coll, &ch, 1, &status);
804
805 while (ch < 0xFFFF && U_SUCCESS(status)) {
806 int count = 1;
807 uint32_t order;
808 int32_t size = 0;
809
810 ch ++;
811
812 ucol_setText(iter, &ch, 1, &status);
813 order = ucol_previous(iter, &status);
814
815 /* thai management */
816 if (order == 0)
817 order = ucol_previous(iter, &status);
818
819 while (U_SUCCESS(status) &&
820 ucol_previous(iter, &status) != UCOL_NULLORDER) {
821 count ++;
822 }
823
824 size = ucol_getMaxExpansion(iter, order);
825 if (U_FAILURE(status) || size < count) {
826 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
827 ch, count);
828 }
829 }
830
831 /* testing for exact max expansion */
832 ch = 0;
833 while (ch < 0x61) {
834 uint32_t order;
835 int32_t size;
836 ucol_setText(iter, &ch, 1, &status);
837 order = ucol_previous(iter, &status);
838 size = ucol_getMaxExpansion(iter, order);
839 if (U_FAILURE(status) || size != 1) {
840 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
841 ch, 1);
842 }
843 ch ++;
844 }
845
846 ch = 0x63;
847 ucol_setText(iter, &ch, 1, &status);
848 temporder = ucol_previous(iter, &status);
849
850 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
851 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
852 ch, 3);
853 }
854
855 ch = 0x64;
856 ucol_setText(iter, &ch, 1, &status);
857 temporder = ucol_previous(iter, &status);
858
859 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
860 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
861 ch, 3);
862 }
863
729e4ab9 864 U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
b75a7d8f
A
865 ucol_setText(iter, supplementary, 2, &status);
866 sorder = ucol_previous(iter, &status);
867
868 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
869 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
870 ch, 2);
871 }
872
873 /* testing jamo */
874 ch = 0x1165;
875
876 ucol_setText(iter, &ch, 1, &status);
877 temporder = ucol_previous(iter, &status);
878 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
879 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
880 ch, 3);
881 }
882
883 ucol_closeElements(iter);
884 ucol_close(coll);
885
886 /* testing special jamo &a<\u1160 */
887 rule[0] = 0x26;
888 rule[1] = 0x71;
889 rule[2] = 0x3c;
890 rule[3] = 0x1165;
891 rule[4] = 0x2f;
892 rule[5] = 0x71;
893 rule[6] = 0x71;
894 rule[7] = 0x71;
895 rule[8] = 0x71;
896 rule[9] = 0;
897
898 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
899 UCOL_DEFAULT_STRENGTH,NULL, &status);
900 iter = ucol_openElements(coll, &ch, 1, &status);
901
902 temporder = ucol_previous(iter, &status);
903 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
904 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
905 ch, 5);
906 }
907
908 ucol_closeElements(iter);
909 ucol_close(coll);
910 } else {
729e4ab9 911 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
b75a7d8f
A
912 }
913
914}
915
b75a7d8f
A
916
917static void assertEqual(UCollationElements *i1, UCollationElements *i2)
918{
919 int32_t c1, c2;
920 int32_t count = 0;
921 UErrorCode status = U_ZERO_ERROR;
922
923 do
924 {
925 c1 = ucol_next(i1, &status);
926 c2 = ucol_next(i2, &status);
927
928 if (c1 != c2)
929 {
930 log_err("Error in iteration %d assetEqual between\n %d and %d, they are not equal\n", count, c1, c2);
931 break;
932 }
933
934 count += 1;
935 }
936 while (c1 != UCOL_NULLORDER);
937}
938
939/**
940 * Testing iterators with extremely small buffers
941 */
942static void TestSmallBuffer()
943{
944 UErrorCode status = U_ZERO_ERROR;
945 UCollator *coll;
946 UCollationElements *testiter,
947 *iter;
948 int32_t count = 0;
46f4442e 949 OrderAndOffset *testorders,
b75a7d8f
A
950 *orders;
951
952 UChar teststr[500];
953 UChar str[] = {0x300, 0x31A, 0};
954 /*
955 creating a long string of decomposable characters,
956 since by default the writable buffer is of size 256
957 */
958 while (count < 500) {
959 if ((count & 1) == 0) {
960 teststr[count ++] = 0x300;
961 }
962 else {
963 teststr[count ++] = 0x31A;
964 }
965 }
966
967 coll = ucol_open("th_TH", &status);
968 if(U_SUCCESS(status) && coll) {
969 testiter = ucol_openElements(coll, teststr, 500, &status);
970 iter = ucol_openElements(coll, str, 2, &status);
971
972 orders = getOrders(iter, &count);
973 if (count != 2) {
974 log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
975 }
976
977 /*
978 this will rearrange the string data to 250 characters of 0x300 first then
979 250 characters of 0x031A
980 */
981 testorders = getOrders(testiter, &count);
982
983 if (count != 500) {
984 log_err("Error decomposition does not give the right sized collation elements\n");
985 }
986
987 while (count != 0) {
988 /* UCA collation element for 0x0F76 */
46f4442e
A
989 if ((count > 250 && testorders[-- count].order != orders[1].order) ||
990 (count <= 250 && testorders[-- count].order != orders[0].order)) {
b75a7d8f
A
991 log_err("Error decomposition does not give the right collation element at %d count\n", count);
992 break;
993 }
994 }
995
996 free(testorders);
997 free(orders);
998
999 ucol_reset(testiter);
b75a7d8f
A
1000
1001 /* ensures closing of elements done properly to clear writable buffer */
1002 ucol_next(testiter, &status);
1003 ucol_next(testiter, &status);
1004 ucol_closeElements(testiter);
1005 ucol_closeElements(iter);
1006 ucol_close(coll);
1007 } else {
729e4ab9 1008 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
b75a7d8f
A
1009 }
1010}
1011
1012/**
1013* Sniplets of code from genuca
1014*/
1015static int32_t hex2num(char hex) {
1016 if(hex>='0' && hex <='9') {
1017 return hex-'0';
1018 } else if(hex>='a' && hex<='f') {
1019 return hex-'a'+10;
1020 } else if(hex>='A' && hex<='F') {
1021 return hex-'A'+10;
1022 } else {
1023 return 0;
1024 }
1025}
1026
1027/**
1028* Getting codepoints from a string
1029* @param str character string contain codepoints seperated by space and ended
1030* by a semicolon
1031* @param codepoints array for storage, assuming size > 5
1032* @return position at the end of the codepoint section
1033*/
729e4ab9
A
1034static char *getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) {
1035 UErrorCode errorCode = U_ZERO_ERROR;
1036 char *semi = uprv_strchr(str, ';');
1037 char *pipe = uprv_strchr(str, '|');
1038 char *s;
1039 *codepoints = 0;
1040 *contextCPs = 0;
1041 if(semi == NULL) {
1042 log_err("expected semicolon after code point string in FractionalUCA.txt %s\n", str);
1043 return str;
1044 }
1045 if(pipe != NULL) {
1046 int32_t contextLength;
1047 *pipe = 0;
1048 contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode);
1049 *pipe = '|';
1050 if(U_FAILURE(errorCode)) {
1051 log_err("error parsing precontext string from FractionalUCA.txt %s\n", str);
1052 return str;
46f4442e 1053 }
729e4ab9
A
1054 /* prepend the precontext string to the codepoints */
1055 u_memcpy(codepoints, contextCPs, contextLength);
1056 codepoints += contextLength;
1057 /* start of the code point string */
1058 s = pipe + 1;
1059 } else {
1060 s = str;
46f4442e 1061 }
729e4ab9
A
1062 u_parseString(s, codepoints, 99, NULL, &errorCode);
1063 if(U_FAILURE(errorCode)) {
1064 log_err("error parsing code point string from FractionalUCA.txt %s\n", str);
1065 return str;
b75a7d8f 1066 }
729e4ab9 1067 return semi + 1;
b75a7d8f
A
1068}
1069
1070/**
1071* Sniplets of code from genuca
1072*/
1073static int32_t
1074readElement(char **from, char *to, char separator, UErrorCode *status)
1075{
1076 if (U_SUCCESS(*status)) {
1077 char buffer[1024];
1078 int32_t i = 0;
1079 while (**from != separator) {
1080 if (**from != ' ') {
1081 *(buffer+i++) = **from;
1082 }
1083 (*from)++;
1084 }
1085 (*from)++;
1086 *(buffer + i) = 0;
1087 strcpy(to, buffer);
1088 return i/2;
1089 }
1090
1091 return 0;
1092}
1093
1094/**
1095* Sniplets of code from genuca
1096*/
1097static uint32_t
1098getSingleCEValue(char *primary, char *secondary, char *tertiary,
1099 UErrorCode *status)
1100{
1101 if (U_SUCCESS(*status)) {
1102 uint32_t value = 0;
1103 char primsave = '\0';
1104 char secsave = '\0';
1105 char tersave = '\0';
1106 char *primend = primary+4;
1107 char *secend = secondary+2;
1108 char *terend = tertiary+2;
1109 uint32_t primvalue;
1110 uint32_t secvalue;
1111 uint32_t tervalue;
1112
1113 if (uprv_strlen(primary) > 4) {
1114 primsave = *primend;
1115 *primend = '\0';
1116 }
1117
1118 if (uprv_strlen(secondary) > 2) {
1119 secsave = *secend;
1120 *secend = '\0';
1121 }
1122
1123 if (uprv_strlen(tertiary) > 2) {
1124 tersave = *terend;
1125 *terend = '\0';
1126 }
1127
1128 primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0;
1129 secvalue = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0;
1130 tervalue = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0;
1131 if(primvalue <= 0xFF) {
1132 primvalue <<= 8;
1133 }
1134
1135 value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)
1136 | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)
1137 | (tervalue & UCOL_TERTIARYORDERMASK);
1138
1139 if(primsave!='\0') {
1140 *primend = primsave;
1141 }
1142 if(secsave!='\0') {
1143 *secend = secsave;
1144 }
1145 if(tersave!='\0') {
1146 *terend = tersave;
1147 }
1148 return value;
1149 }
1150 return 0;
1151}
1152
1153/**
1154* Getting collation elements generated from a string
1155* @param str character string contain collation elements contained in [] and
1156* seperated by space
1157* @param ce array for storage, assuming size > 20
1158* @param status error status
1159* @return position at the end of the codepoint section
1160*/
1161static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) {
1162 char *pStartCP = uprv_strchr(str, '[');
1163 int count = 0;
1164 char *pEndCP;
1165 char primary[100];
1166 char secondary[100];
1167 char tertiary[100];
1168
1169 while (*pStartCP == '[') {
1170 uint32_t primarycount = 0;
1171 uint32_t secondarycount = 0;
1172 uint32_t tertiarycount = 0;
1173 uint32_t CEi = 1;
1174 pEndCP = strchr(pStartCP, ']');
1175 if(pEndCP == NULL) {
1176 break;
1177 }
1178 pStartCP ++;
1179
1180 primarycount = readElement(&pStartCP, primary, ',', status);
1181 secondarycount = readElement(&pStartCP, secondary, ',', status);
1182 tertiarycount = readElement(&pStartCP, tertiary, ']', status);
1183
1184 /* I want to get the CEs entered right here, including continuation */
1185 ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);
1186 if (U_FAILURE(*status)) {
1187 break;
1188 }
1189
1190 while (2 * CEi < primarycount || CEi < secondarycount ||
1191 CEi < tertiarycount) {
1192 uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
1193 if (2 * CEi < primarycount) {
1194 value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28);
1195 value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24);
1196 }
1197
1198 if (2 * CEi + 1 < primarycount) {
1199 value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20);
1200 value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16);
1201 }
1202
1203 if (CEi < secondarycount) {
1204 value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12);
1205 value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8);
1206 }
1207
1208 if (CEi < tertiarycount) {
1209 value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4);
1210 value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF);
1211 }
1212
1213 CEi ++;
1214 ces[count ++] = value;
1215 }
1216
1217 pStartCP = pEndCP + 1;
1218 }
1219 ces[count] = 0;
1220 return pStartCP;
1221}
1222
1223/**
1224* Getting the FractionalUCA.txt file stream
1225*/
1226static FileStream * getFractionalUCA(void)
1227{
1228 char newPath[256];
1229 char backupPath[256];
1230 FileStream *result = NULL;
1231
1232 /* Look inside ICU_DATA first */
374ca955 1233 uprv_strcpy(newPath, ctest_dataSrcDir());
b75a7d8f
A
1234 uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );
1235 uprv_strcat(newPath, "FractionalUCA.txt");
1236
1237 /* As a fallback, try to guess where the source data was located
1238 * at the time ICU was built, and look there.
1239 */
1240#if defined (U_TOPSRCDIR)
1241 strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data");
1242#else
1243 {
1244 UErrorCode errorCode = U_ZERO_ERROR;
1245 strcpy(backupPath, loadTestData(&errorCode));
1246 strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
1247 }
1248#endif
1249 strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt");
1250
1251 result = T_FileStream_open(newPath, "rb");
1252
1253 if (result == NULL) {
1254 result = T_FileStream_open(backupPath, "rb");
1255 if (result == NULL) {
1256 log_err("Failed to open either %s or %s\n", newPath, backupPath);
1257 }
1258 }
1259 return result;
1260}
1261
1262/**
1263* Testing the CEs returned by the iterator
1264*/
1265static void TestCEs() {
1266 FileStream *file = NULL;
729e4ab9 1267 char line[2048];
b75a7d8f 1268 char *str;
46f4442e 1269 UChar codepoints[10];
b75a7d8f
A
1270 uint32_t ces[20];
1271 UErrorCode status = U_ZERO_ERROR;
1272 UCollator *coll = ucol_open("", &status);
1273 uint32_t lineNo = 0;
46f4442e 1274 UChar contextCPs[5];
b75a7d8f
A
1275
1276 if (U_FAILURE(status)) {
729e4ab9 1277 log_err_status(status, "Error in opening root collator -> %s\n", u_errorName(status));
b75a7d8f
A
1278 return;
1279 }
1280
1281 file = getFractionalUCA();
1282
1283 if (file == NULL) {
1284 log_err("*** unable to open input FractionalUCA.txt file ***\n");
1285 return;
1286 }
1287
1288
1289 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1290 int count = 0;
1291 UCollationElements *iter;
46f4442e 1292 int32_t preContextCeLen=0;
b75a7d8f
A
1293 lineNo++;
1294 /* skip this line if it is empty or a comment or is a return value
1295 or start of some variable section */
1296 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1297 line[0] == 0x000D || line[0] == '[') {
1298 continue;
1299 }
1300
46f4442e 1301 str = getCodePoints(line, codepoints, contextCPs);
b75a7d8f
A
1302
1303 /* these are 'fake' codepoints in the fractional UCA, and are used just
1304 * for positioning of indirect values. They should not go through this
1305 * test.
1306 */
1307 if(*codepoints == 0xFDD0) {
1308 continue;
1309 }
46f4442e
A
1310 if (*contextCPs != 0) {
1311 iter = ucol_openElements(coll, contextCPs, -1, &status);
1312 if (U_FAILURE(status)) {
1313 log_err("Error in opening collation elements\n");
1314 break;
1315 }
1316 while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t)UCOL_NULLORDER) {
1317 preContextCeLen++;
1318 }
1319 ucol_closeElements(iter);
1320 }
b75a7d8f 1321
46f4442e 1322 getCEs(str, ces+preContextCeLen, &status);
b75a7d8f
A
1323 if (U_FAILURE(status)) {
1324 log_err("Error in parsing collation elements in FractionalUCA.txt\n");
1325 break;
1326 }
1327 iter = ucol_openElements(coll, codepoints, -1, &status);
1328 if (U_FAILURE(status)) {
1329 log_err("Error in opening collation elements\n");
1330 break;
1331 }
1332 for (;;) {
1333 uint32_t ce = (uint32_t)ucol_next(iter, &status);
1334 if (ce == 0xFFFFFFFF) {
1335 ce = 0;
1336 }
1337 /* we now unconditionally reorder Thai/Lao prevowels, so this
1338 * test would fail if we don't skip here.
1339 */
1340 if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) {
1341 continue;
1342 }
1343 if (ce != ces[count] || U_FAILURE(status)) {
1344 log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
1345 break;
1346 }
1347 if (ces[count] == 0) {
1348 break;
1349 }
1350 count ++;
1351 }
1352 ucol_closeElements(iter);
1353 }
1354
1355 T_FileStream_close(file);
1356 ucol_close(coll);
1357}
1358
1359/**
1360* Testing the discontigous contractions
1361*/
1362static void TestDiscontiguos() {
1363 const char *rulestr =
1364 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1365 UChar rule[50];
1366 int rulelen = u_unescape(rulestr, rule, 50);
1367 const char *src[] = {
1368 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1369 /* base character blocked */
1370 "XD\\u0300", "XD\\u0300\\u0315",
1371 /* non blocking combining character */
1372 "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1373 /* blocking combining character */
1374 "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1375 /* contraction prefix */
1376 "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1377 "X\\u0300\\u031A\\u0315",
1378 /* ends not with a contraction character */
1379 "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1380 "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1381 };
1382 const char *tgt[] = {
1383 /* non blocking combining character */
1384 "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1385 /* base character blocked */
1386 "X D \\u0300", "X D \\u0300\\u0315",
1387 /* non blocking combining character */
1388 "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1389 /* blocking combining character */
1390 "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1391 /* contraction prefix */
1392 "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1393 "X\\u0300 \\u031A \\u0315",
1394 /* ends not with a contraction character */
1395 "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1396 "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1397 };
1398 int size = 20;
1399 UCollator *coll;
1400 UErrorCode status = U_ZERO_ERROR;
1401 int count = 0;
1402 UCollationElements *iter;
1403 UCollationElements *resultiter;
1404
1405 coll = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
1406 iter = ucol_openElements(coll, rule, 1, &status);
1407 resultiter = ucol_openElements(coll, rule, 1, &status);
1408
1409 if (U_FAILURE(status)) {
729e4ab9 1410 log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
b75a7d8f
A
1411 return;
1412 }
1413
1414 while (count < size) {
1415 UChar str[20];
1416 UChar tstr[20];
1417 int strLen = u_unescape(src[count], str, 20);
1418 UChar *s;
1419
1420 ucol_setText(iter, str, strLen, &status);
1421 if (U_FAILURE(status)) {
1422 log_err("Error opening collation iterator\n");
1423 return;
1424 }
1425
1426 u_unescape(tgt[count], tstr, 20);
1427 s = tstr;
1428
1429 log_verbose("count %d\n", count);
1430
1431 for (;;) {
1432 uint32_t ce;
1433 UChar *e = u_strchr(s, 0x20);
1434 if (e == 0) {
1435 e = u_strchr(s, 0);
1436 }
1437 ucol_setText(resultiter, s, (int32_t)(e - s), &status);
1438 ce = ucol_next(resultiter, &status);
1439 if (U_FAILURE(status)) {
1440 log_err("Error manipulating collation iterator\n");
1441 return;
1442 }
1443 while (ce != UCOL_NULLORDER) {
1444 if (ce != (uint32_t)ucol_next(iter, &status) ||
1445 U_FAILURE(status)) {
1446 log_err("Discontiguos contraction test mismatch\n");
1447 return;
1448 }
1449 ce = ucol_next(resultiter, &status);
1450 if (U_FAILURE(status)) {
1451 log_err("Error getting next collation element\n");
1452 return;
1453 }
1454 }
1455 s = e + 1;
1456 if (*e == 0) {
1457 break;
1458 }
1459 }
1460 ucol_reset(iter);
1461 backAndForth(iter);
1462 count ++;
1463 }
1464 ucol_closeElements(resultiter);
1465 ucol_closeElements(iter);
1466 ucol_close(coll);
1467}
1468
1469static void TestCEBufferOverflow()
1470{
1471 UChar str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];
1472 UErrorCode status = U_ZERO_ERROR;
1473 UChar rule[10];
1474 UCollator *coll;
1475 UCollationElements *iter;
1476
1477 u_uastrcpy(rule, "&z < AB");
1478 coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
1479 if (U_FAILURE(status)) {
729e4ab9 1480 log_err_status(status, "Rule based collator not created for testing ce buffer overflow -> %s\n", u_errorName(status));
b75a7d8f
A
1481 return;
1482 }
1483
1484 /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
1485 test. this will cause an overflow in getPrev */
1486 str[0] = 0x0041; /* 'A' */
1487 /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/
1488 uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);
1489 str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042; /* 'B' */
1490 iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,
1491 &status);
46f4442e
A
1492 if (ucol_previous(iter, &status) == UCOL_NULLORDER ||
1493 status == U_BUFFER_OVERFLOW_ERROR) {
1494 log_err("CE buffer should not overflow with long string of trail surrogates\n");
b75a7d8f
A
1495 }
1496 ucol_closeElements(iter);
1497 ucol_close(coll);
1498}
1499
1500/**
729e4ab9 1501* Checking collation element validity.
b75a7d8f 1502*/
729e4ab9
A
1503#define MAX_CODEPOINTS_TO_SHOW 10
1504static void showCodepoints(const UChar *codepoints, int length, char * codepointText) {
1505 int i, lengthToUse = length;
1506 if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) {
1507 lengthToUse = MAX_CODEPOINTS_TO_SHOW;
1508 }
1509 for (i = 0; i < lengthToUse; ++i) {
1510 int bytesWritten = sprintf(codepointText, " %04X", *codepoints++);
1511 if (bytesWritten <= 0) {
1512 break;
b75a7d8f 1513 }
729e4ab9 1514 codepointText += bytesWritten;
b75a7d8f 1515 }
729e4ab9
A
1516 if (i < length) {
1517 sprintf(codepointText, " ...");
b75a7d8f 1518 }
b75a7d8f 1519}
b75a7d8f 1520
b75a7d8f 1521static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
729e4ab9 1522 int length)
b75a7d8f
A
1523{
1524 UErrorCode status = U_ZERO_ERROR;
1525 UCollationElements *iter = ucol_openElements(coll, codepoints, length,
1526 &status);
729e4ab9
A
1527 UBool result = FALSE;
1528 UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE;
1529 const char * collLocale;
b75a7d8f
A
1530
1531 if (U_FAILURE(status)) {
1532 log_err("Error creating iterator for testing validity\n");
729e4ab9
A
1533 return FALSE;
1534 }
1535 collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status);
1536 if (U_FAILURE(status) || collLocale==NULL) {
1537 status = U_ZERO_ERROR;
1538 collLocale = "?";
b75a7d8f
A
1539 }
1540
729e4ab9
A
1541 for (;;) {
1542 uint32_t ce = ucol_next(iter, &status);
1543 uint32_t primary, p1, p2, secondary, tertiary;
1544 if (ce == UCOL_NULLORDER) {
1545 result = TRUE;
1546 break;
1547 }
1548 if (ce == 0) {
1549 continue;
1550 }
1551 if (ce == 0x02000202) {
1552 /* special CE for merge-sort character */
1553 if (*codepoints == 0xFFFE /* && length == 1 */) {
1554 /*
1555 * Note: We should check for length==1 but the token parser appears
1556 * to give us trailing NUL characters.
1557 * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet()
1558 * rather than the internal collation rule parser
1559 */
1560 continue;
1561 } else {
1562 log_err("Special 02/02/02 weight for code point U+%04X [len %d] != U+FFFE\n",
1563 (int)*codepoints, (int)length);
1564 break;
1565 }
1566 }
1567 primary = UCOL_PRIMARYORDER(ce);
1568 p1 = primary >> 8;
1569 p2 = primary & 0xFF;
1570 secondary = UCOL_SECONDARYORDER(ce);
1571 tertiary = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION;
1572
1573 if (!isContinuation(ce)) {
1574 if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
1575 log_err("Empty CE %08lX except for case bits\n", (long)ce);
1576 break;
1577 }
1578 if (p1 == 0) {
1579 if (p2 != 0) {
1580 log_err("Primary 00 xx in %08lX\n", (long)ce);
1581 break;
1582 }
1583 primaryDone = TRUE;
1584 } else {
1585 if (p1 <= 2 || p1 >= 0xF0) {
1586 /* Primary first bytes F0..FF are specials. */
1587 log_err("Primary first byte of %08lX out of range\n", (long)ce);
1588 break;
1589 }
1590 if (p2 == 0) {
1591 primaryDone = TRUE;
1592 } else {
1593 if (p2 <= 3 || p2 >= 0xFF) {
1594 /* Primary second bytes 03 and FF are sort key compression terminators. */
1595 log_err("Primary second byte of %08lX out of range\n", (long)ce);
1596 break;
1597 }
1598 primaryDone = FALSE;
1599 }
1600 }
1601 if (secondary == 0) {
1602 if (primary != 0) {
1603 log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce);
1604 break;
1605 }
1606 secondaryDone = TRUE;
1607 } else {
1608 if (secondary <= 2 ||
1609 (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COMMON + 0x80))
1610 ) {
1611 /* Secondary first bytes common+1..+0x80 are used for sort key compression. */
1612 log_err("Secondary byte of %08lX out of range\n", (long)ce);
1613 break;
1614 }
1615 secondaryDone = FALSE;
1616 }
1617 if (tertiary == 0) {
1618 /* We know that ce != 0. */
1619 log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce);
1620 break;
1621 }
1622 if (tertiary <= 2) {
1623 log_err("Tertiary byte of %08lX out of range\n", (long)ce);
1624 break;
1625 }
1626 tertiaryDone = FALSE;
1627 } else {
1628 if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
1629 log_err("Empty continuation %08lX\n", (long)ce);
1630 break;
1631 }
1632 if (primaryDone && primary != 0) {
1633 log_err("Primary was done but continues in %08lX\n", (long)ce);
1634 break;
1635 }
1636 if (p1 == 0) {
1637 if (p2 != 0) {
1638 log_err("Primary 00 xx in %08lX\n", (long)ce);
1639 break;
1640 }
1641 primaryDone = TRUE;
1642 } else {
1643 if (p1 <= 2) {
1644 log_err("Primary first byte of %08lX out of range\n", (long)ce);
1645 break;
1646 }
1647 if (p2 == 0) {
1648 primaryDone = TRUE;
1649 } else {
1650 if (p2 <= 3) {
1651 log_err("Primary second byte of %08lX out of range\n", (long)ce);
1652 break;
1653 }
1654 }
1655 }
1656 if (secondaryDone && secondary != 0) {
1657 log_err("Secondary was done but continues in %08lX\n", (long)ce);
1658 break;
1659 }
1660 if (secondary == 0) {
1661 secondaryDone = TRUE;
1662 } else {
1663 if (secondary <= 2) {
1664 log_err("Secondary byte of %08lX out of range\n", (long)ce);
1665 break;
1666 }
1667 }
1668 if (tertiaryDone && tertiary != 0) {
1669 log_err("Tertiary was done but continues in %08lX\n", (long)ce);
1670 break;
1671 }
1672 if (tertiary == 0) {
1673 tertiaryDone = TRUE;
1674 } else if (tertiary <= 2) {
1675 log_err("Tertiary byte of %08lX out of range\n", (long)ce);
1676 break;
1677 }
1678 }
1679 }
1680 if (!result) {
1681 char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5];
1682 showCodepoints(codepoints, length, codepointText);
1683 log_err("Locale: %s Code point string: %s\n", collLocale, codepointText);
1684 }
1685 ucol_closeElements(iter);
1686 return result;
b75a7d8f
A
1687}
1688
51004dcb
A
1689static const UChar IMPORT[] = { 0x5B, 0x69, 0x6D, 0x70, 0x6F, 0x72, 0x74, 0 }; /* "[import" */
1690
b75a7d8f
A
1691static void TestCEValidity()
1692{
1693 /* testing UCA collation elements */
1694 UErrorCode status = U_ZERO_ERROR;
1695 /* en_US has no tailorings */
374ca955 1696 UCollator *coll = ucol_open("root", &status);
b75a7d8f 1697 /* tailored locales */
374ca955
A
1698 char locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"};
1699 const char *loc;
46f4442e 1700 FileStream *file = NULL;
729e4ab9
A
1701 char line[2048];
1702 UChar codepoints[11];
b75a7d8f 1703 int count = 0;
374ca955 1704 int maxCount = 0;
46f4442e 1705 UChar contextCPs[3];
729e4ab9 1706 UChar32 c;
b75a7d8f
A
1707 UParseError parseError;
1708 if (U_FAILURE(status)) {
729e4ab9 1709 log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
b75a7d8f
A
1710 return;
1711 }
1712 log_verbose("Testing UCA elements\n");
46f4442e 1713 file = getFractionalUCA();
b75a7d8f
A
1714 if (file == NULL) {
1715 log_err("Fractional UCA data can not be opened\n");
1716 return;
1717 }
1718
1719 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1720 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1721 line[0] == 0x000D || line[0] == '[') {
1722 continue;
1723 }
1724
46f4442e 1725 getCodePoints(line, codepoints, contextCPs);
729e4ab9 1726 checkCEValidity(coll, codepoints, u_strlen(codepoints));
b75a7d8f
A
1727 }
1728
1729 log_verbose("Testing UCA elements for the whole range of unicode characters\n");
729e4ab9
A
1730 for (c = 0; c <= 0xffff; ++c) {
1731 if (u_isdefined(c)) {
1732 codepoints[0] = (UChar)c;
1733 checkCEValidity(coll, codepoints, 1);
1734 }
1735 }
1736 for (; c <= 0x10ffff; ++c) {
1737 if (u_isdefined(c)) {
1738 int32_t i = 0;
1739 U16_APPEND_UNSAFE(codepoints, i, c);
1740 checkCEValidity(coll, codepoints, i);
b75a7d8f 1741 }
b75a7d8f
A
1742 }
1743
1744 ucol_close(coll);
1745
1746 /* testing tailored collation elements */
1747 log_verbose("Testing tailored elements\n");
729e4ab9 1748 if(getTestOption(QUICK_OPTION)) {
374ca955
A
1749 maxCount = sizeof(locale)/sizeof(locale[0]);
1750 } else {
1751 maxCount = uloc_countAvailable();
1752 }
1753 while (count < maxCount) {
b75a7d8f
A
1754 const UChar *rules = NULL,
1755 *current = NULL;
1756 UChar *rulesCopy = NULL;
1757 int32_t ruleLen = 0;
1758
1759 uint32_t chOffset = 0;
1760 uint32_t chLen = 0;
1761 uint32_t exOffset = 0;
1762 uint32_t exLen = 0;
1763 uint32_t prefixOffset = 0;
1764 uint32_t prefixLen = 0;
1765 UBool startOfRules = TRUE;
1766 UColOptionSet opts;
1767
1768 UColTokenParser src;
1769 uint32_t strength = 0;
1770 uint16_t specs = 0;
729e4ab9 1771 if(getTestOption(QUICK_OPTION)) {
374ca955
A
1772 loc = locale[count];
1773 } else {
1774 loc = uloc_getAvailable(count);
1775 if(!hasCollationElements(loc)) {
1776 count++;
1777 continue;
1778 }
1779 }
51004dcb 1780 status = U_ZERO_ERROR; // clear status from previous loop iteration
374ca955 1781
729e4ab9
A
1782 uprv_memset(&src, 0, sizeof(UColTokenParser));
1783
374ca955 1784 log_verbose("Testing CEs for %s\n", loc);
b75a7d8f 1785
374ca955 1786 coll = ucol_open(loc, &status);
b75a7d8f 1787 if (U_FAILURE(status)) {
51004dcb 1788 log_err("%s collator creation failed with status %s\n", loc, u_errorName(status));
b75a7d8f
A
1789 return;
1790 }
1791
1792 src.opts = &opts;
1793 rules = ucol_getRules(coll, &ruleLen);
1794
51004dcb
A
1795 /*
1796 * We have not set up the UColTokenParser with a callback function
1797 * to fetch [import] sub-rules,
1798 * so skip testing tailorings that import others.
1799 * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet()
1800 * rather than the internal collation rule parser
1801 */
1802 if (ruleLen > 0 && u_strstr(rules, IMPORT) == NULL) {
729e4ab9 1803 rulesCopy = (UChar *)uprv_malloc((ruleLen +
b75a7d8f
A
1804 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
1805 uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
1806 src.current = src.source = rulesCopy;
1807 src.end = rulesCopy + ruleLen;
1808 src.extraCurrent = src.end;
1809 src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1810
729e4ab9
A
1811 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1812 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
51004dcb 1813 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL && U_SUCCESS(status)) {
b75a7d8f
A
1814 strength = src.parsedToken.strength;
1815 chOffset = src.parsedToken.charsOffset;
1816 chLen = src.parsedToken.charsLen;
1817 exOffset = src.parsedToken.extensionOffset;
1818 exLen = src.parsedToken.extensionLen;
1819 prefixOffset = src.parsedToken.prefixOffset;
1820 prefixLen = src.parsedToken.prefixLen;
1821 specs = src.parsedToken.flags;
1822
1823 startOfRules = FALSE;
1824 uprv_memcpy(codepoints, src.source + chOffset,
1825 chLen * sizeof(UChar));
1826 codepoints[chLen] = 0;
729e4ab9 1827 checkCEValidity(coll, codepoints, chLen);
b75a7d8f 1828 }
51004dcb
A
1829 if (U_FAILURE(status)) {
1830 log_err("%s collator, ucol_tok_parseNextToken failed with status %s\n", loc, u_errorName(status));
1831 }
729e4ab9 1832 uprv_free(src.source);
51004dcb 1833 uprv_free(src.reorderCodes);
b75a7d8f
A
1834 }
1835
1836 ucol_close(coll);
1837 count ++;
1838 }
1839 T_FileStream_close(file);
1840}
1841
1842static void printSortKeyError(const UChar *codepoints, int length,
1843 uint8_t *sortkey, int sklen)
1844{
1845 int count = 0;
1846 log_err("Sortkey not valid for ");
1847 while (length > 0) {
1848 log_err("0x%04x ", *codepoints);
1849 length --;
1850 codepoints ++;
1851 }
1852 log_err("\nSortkey : ");
1853 while (count < sklen) {
1854 log_err("0x%02x ", sortkey[count]);
1855 count ++;
1856 }
1857 log_err("\n");
1858}
1859
1860/**
1861* Checking sort key validity for all levels
1862*/
1863static UBool checkSortKeyValidity(UCollator *coll,
1864 const UChar *codepoints,
1865 int length)
1866{
1867 UErrorCode status = U_ZERO_ERROR;
1868 UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,
1869 UCOL_TERTIARY, UCOL_QUATERNARY,
1870 UCOL_IDENTICAL};
1871 int strengthlen = 5;
729e4ab9 1872 int strengthIndex = 0;
b75a7d8f
A
1873 int caselevel = 0;
1874
1875 while (caselevel < 1) {
1876 if (caselevel == 0) {
1877 ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
1878 }
1879 else {
1880 ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
1881 }
1882
729e4ab9 1883 while (strengthIndex < strengthlen) {
b75a7d8f
A
1884 int count01 = 0;
1885 uint32_t count = 0;
1886 uint8_t sortkey[128];
1887 uint32_t sklen;
1888
729e4ab9 1889 ucol_setStrength(coll, strength[strengthIndex]);
b75a7d8f
A
1890 sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);
1891 while (sortkey[count] != 0) {
729e4ab9 1892 if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && strengthIndex != 4)) {
b75a7d8f
A
1893 printSortKeyError(codepoints, length, sortkey, sklen);
1894 return FALSE;
1895 }
1896 if (sortkey[count] == 1) {
1897 count01 ++;
1898 }
1899 count ++;
1900 }
1901
729e4ab9 1902 if (count + 1 != sklen || (count01 != strengthIndex + caselevel)) {
b75a7d8f
A
1903 printSortKeyError(codepoints, length, sortkey, sklen);
1904 return FALSE;
1905 }
729e4ab9 1906 strengthIndex ++;
b75a7d8f
A
1907 }
1908 caselevel ++;
1909 }
1910 return TRUE;
1911}
1912
1913static void TestSortKeyValidity(void)
1914{
1915 /* testing UCA collation elements */
1916 UErrorCode status = U_ZERO_ERROR;
1917 /* en_US has no tailorings */
1918 UCollator *coll = ucol_open("en_US", &status);
1919 /* tailored locales */
46f4442e
A
1920 char locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};
1921 FileStream *file = NULL;
729e4ab9 1922 char line[2048];
b75a7d8f
A
1923 UChar codepoints[10];
1924 int count = 0;
46f4442e 1925 UChar contextCPs[5];
b75a7d8f
A
1926 UParseError parseError;
1927 if (U_FAILURE(status)) {
729e4ab9 1928 log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
b75a7d8f
A
1929 return;
1930 }
1931 log_verbose("Testing UCA elements\n");
46f4442e 1932 file = getFractionalUCA();
b75a7d8f
A
1933 if (file == NULL) {
1934 log_err("Fractional UCA data can not be opened\n");
1935 return;
1936 }
1937
1938 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1939 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1940 line[0] == 0x000D || line[0] == '[') {
1941 continue;
1942 }
1943
46f4442e 1944 getCodePoints(line, codepoints, contextCPs);
729e4ab9
A
1945 if(codepoints[0] == 0xFFFE) {
1946 /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
1947 continue;
1948 }
b75a7d8f
A
1949 checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
1950 }
1951
1952 log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1953 codepoints[0] = 0;
1954
1955 while (codepoints[0] < 0xFFFF) {
1956 if (u_isdefined((UChar32)codepoints[0])) {
1957 checkSortKeyValidity(coll, codepoints, 1);
1958 }
1959 codepoints[0] ++;
1960 }
1961
1962 ucol_close(coll);
1963
1964 /* testing tailored collation elements */
1965 log_verbose("Testing tailored elements\n");
1966 while (count < 5) {
1967 const UChar *rules = NULL,
1968 *current = NULL;
1969 UChar *rulesCopy = NULL;
1970 int32_t ruleLen = 0;
1971
1972 uint32_t chOffset = 0;
1973 uint32_t chLen = 0;
1974 uint32_t exOffset = 0;
1975 uint32_t exLen = 0;
1976 uint32_t prefixOffset = 0;
1977 uint32_t prefixLen = 0;
1978 UBool startOfRules = TRUE;
1979 UColOptionSet opts;
1980
1981 UColTokenParser src;
1982 uint32_t strength = 0;
1983 uint16_t specs = 0;
51004dcb 1984 status = U_ZERO_ERROR; // clear status from previous loop iteration
b75a7d8f 1985
729e4ab9
A
1986 uprv_memset(&src, 0, sizeof(UColTokenParser));
1987
b75a7d8f
A
1988 coll = ucol_open(locale[count], &status);
1989 if (U_FAILURE(status)) {
51004dcb 1990 log_err("%s collator creation failed with status %s\n", locale[count], u_errorName(status));
b75a7d8f
A
1991 return;
1992 }
1993
1994 src.opts = &opts;
1995 rules = ucol_getRules(coll, &ruleLen);
1996
51004dcb
A
1997 /*
1998 * We have not set up the UColTokenParser with a callback function
1999 * to fetch [import] sub-rules,
2000 * so skip testing tailorings that import others.
2001 * TODO: Ticket #8047: Change TestSortKeyValidity to use ucol_getTailoredSet()
2002 * rather than the internal collation rule parser
2003 */
2004 if (ruleLen > 0 && u_strstr(rules, IMPORT) == NULL) {
729e4ab9 2005 rulesCopy = (UChar *)uprv_malloc((ruleLen +
b75a7d8f
A
2006 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
2007 uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
2008 src.current = src.source = rulesCopy;
2009 src.end = rulesCopy + ruleLen;
2010 src.extraCurrent = src.end;
2011 src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
2012
729e4ab9
A
2013 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
2014 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
51004dcb 2015 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL && U_SUCCESS(status)) {
b75a7d8f
A
2016 strength = src.parsedToken.strength;
2017 chOffset = src.parsedToken.charsOffset;
2018 chLen = src.parsedToken.charsLen;
2019 exOffset = src.parsedToken.extensionOffset;
2020 exLen = src.parsedToken.extensionLen;
2021 prefixOffset = src.parsedToken.prefixOffset;
2022 prefixLen = src.parsedToken.prefixLen;
2023 specs = src.parsedToken.flags;
2024
2025 startOfRules = FALSE;
2026 uprv_memcpy(codepoints, src.source + chOffset,
2027 chLen * sizeof(UChar));
2028 codepoints[chLen] = 0;
729e4ab9
A
2029 if(codepoints[0] == 0xFFFE) {
2030 /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
2031 continue;
2032 }
b75a7d8f
A
2033 checkSortKeyValidity(coll, codepoints, chLen);
2034 }
51004dcb
A
2035 if (U_FAILURE(status)) {
2036 log_err("%s collator, ucol_tok_parseNextToken failed with status %s\n", locale[count], u_errorName(status));
2037 }
729e4ab9 2038 uprv_free(src.source);
51004dcb 2039 uprv_free(src.reorderCodes);
b75a7d8f
A
2040 }
2041
2042 ucol_close(coll);
2043 count ++;
2044 }
2045 T_FileStream_close(file);
2046}
2047
729e4ab9
A
2048/**
2049* TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
2050* normalization on AND jamo tailoring, among other things.
2051*/
2052static const UChar tsceText[] = { /* Nothing in here should be ignorable */
2053 0x0020, 0xAC00, /* simple LV Hangul */
2054 0x0020, 0xAC01, /* simple LVT Hangul */
2055 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */
2056 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search */
2057 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
2058 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
2059 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
2060 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
2061 0x0020, 0x00E6, /* small letter ae, expands */
2062 0x0020, 0x1E4D, /* small letter o with tilde and acute, decomposes */
2063 0x0020
2064};
2065enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) };
2066
2067static const int32_t rootStandardOffsets[] = {
2068 0, 1,2,
2069 2, 3,4,4,
2070 4, 5,6,6,
2071 6, 7,8,8,
2072 8, 9,10,11,
2073 12, 13,14,15,
2074 16, 17,18,19,
2075 20, 21,22,23,
2076 24, 25,26,26,26,
2077 26, 27,28,28,
2078 28,
2079 29
2080};
2081enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandardOffsets[0]) };
2082
2083static const int32_t rootSearchOffsets[] = {
2084 0, 1,2,
2085 2, 3,4,4,
2086 4, 5,6,6,6,
2087 6, 7,8,8,8,8,8,8,
2088 8, 9,10,11,
2089 12, 13,14,15,
2090 16, 17,18,19,20,
2091 20, 21,22,22,23,23,23,24,
2092 24, 25,26,26,26,
2093 26, 27,28,28,
2094 28,
2095 29
2096};
2097enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffsets[0]) };
2098
2099typedef struct {
2100 const char * locale;
2101 const int32_t * offsets;
2102 int32_t offsetsLen;
2103} TSCEItem;
2104
2105static const TSCEItem tsceItems[] = {
2106 { "root", rootStandardOffsets, kLen_rootStandardOffsets },
729e4ab9 2107 { "root@collation=search", rootSearchOffsets, kLen_rootSearchOffsets },
729e4ab9
A
2108 { NULL, NULL, 0 }
2109};
2110
2111static void TestSearchCollatorElements(void)
2112{
2113 const TSCEItem * tsceItemPtr;
2114 for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
2115 UErrorCode status = U_ZERO_ERROR;
2116 UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
2117 if ( U_SUCCESS(status) ) {
2118 UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);
2119 if ( U_SUCCESS(status) ) {
2120 int32_t offset, element;
2121 const int32_t * nextOffsetPtr;
2122 const int32_t * limitOffsetPtr;
2123
2124 nextOffsetPtr = tsceItemPtr->offsets;
2125 limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
2126 do {
2127 offset = ucol_getOffset(uce);
2128 element = ucol_next(uce, &status);
2129 if ( element == 0 ) {
2130 log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
2131 }
2132 if ( nextOffsetPtr < limitOffsetPtr ) {
2133 if (offset != *nextOffsetPtr) {
2134 log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
2135 tsceItemPtr->locale, *nextOffsetPtr, offset );
2136 nextOffsetPtr = limitOffsetPtr;
2137 break;
2138 }
2139 nextOffsetPtr++;
2140 } else {
2141 log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );
2142 }
2143 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
2144 if ( nextOffsetPtr < limitOffsetPtr ) {
2145 log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );
2146 }
2147
2148 ucol_setOffset(uce, kLen_tsceText, &status);
2149 status = U_ZERO_ERROR;
2150 nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
2151 limitOffsetPtr = tsceItemPtr->offsets;
2152 do {
2153 offset = ucol_getOffset(uce);
2154 element = ucol_previous(uce, &status);
2155 if ( element == 0 ) {
2156 log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );
2157 }
2158 if ( nextOffsetPtr > limitOffsetPtr ) {
2159 nextOffsetPtr--;
2160 if (offset != *nextOffsetPtr) {
2161 log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
2162 tsceItemPtr->locale, *nextOffsetPtr, offset );
2163 nextOffsetPtr = limitOffsetPtr;
2164 break;
2165 }
2166 } else {
2167 log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
2168 }
2169 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
2170 if ( nextOffsetPtr > limitOffsetPtr ) {
2171 log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );
2172 }
2173
2174 ucol_closeElements(uce);
2175 } else {
2176 log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
2177 }
2178 ucol_close(ucol);
2179 } else {
4388f060 2180 log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
729e4ab9
A
2181 }
2182 }
2183}
2184
b75a7d8f 2185#endif /* #if !UCONFIG_NO_COLLATION */