]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/cmsccoll.c
ICU-66108.tar.gz
[apple/icu.git] / icuSources / test / cintltst / cmsccoll.c
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/********************************************************************
4 * COPYRIGHT:
2ca993e8 5 * Copyright (c) 2001-2016, International Business Machines Corporation and
b75a7d8f
A
6 * others. All Rights Reserved.
7 ********************************************************************/
8/*******************************************************************************
9*
10* File cmsccoll.C
11*
12*******************************************************************************/
13/**
14 * These are the tests specific to ICU 1.8 and above, that I didn't know where
15 * to fit.
16 */
17
18#include <stdio.h>
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_COLLATION
23
24#include "unicode/ucol.h"
25#include "unicode/ucoleitr.h"
26#include "unicode/uloc.h"
27#include "cintltst.h"
28#include "ccolltst.h"
29#include "callcoll.h"
30#include "unicode/ustring.h"
31#include "string.h"
32#include "ucol_imp.h"
b75a7d8f
A
33#include "cmemory.h"
34#include "cstring.h"
374ca955 35#include "uassert.h"
b75a7d8f
A
36#include "unicode/parseerr.h"
37#include "unicode/ucnv.h"
46f4442e 38#include "unicode/ures.h"
729e4ab9 39#include "unicode/uscript.h"
4388f060 40#include "unicode/utf16.h"
b75a7d8f 41#include "uparse.h"
46f4442e
A
42#include "putilimp.h"
43
b75a7d8f 44
374ca955 45#define MAX_TOKEN_LEN 16
b75a7d8f 46
46f4442e 47typedef UCollationResult tst_strcoll(void *collator, const int object,
b75a7d8f
A
48 const UChar *source, const int sLen,
49 const UChar *target, const int tLen);
50
51
b75a7d8f
A
52
53const static char cnt1[][10] = {
54
55 "AA",
56 "AC",
57 "AZ",
58 "AQ",
59 "AB",
60 "ABZ",
61 "ABQ",
62 "Z",
63 "ABC",
64 "Q",
65 "B"
66};
67
68const static char cnt2[][10] = {
69 "DA",
70 "DAD",
71 "DAZ",
72 "MAR",
73 "Z",
74 "DAVIS",
75 "MARK",
76 "DAV",
77 "DAVI"
78};
79
80static void IncompleteCntTest(void)
81{
82 UErrorCode status = U_ZERO_ERROR;
83 UChar temp[90];
84 UChar t1[90];
85 UChar t2[90];
86
87 UCollator *coll = NULL;
88 uint32_t i = 0, j = 0;
89 uint32_t size = 0;
90
91 u_uastrcpy(temp, " & Z < ABC < Q < B");
92
93 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
94
95 if(U_SUCCESS(status)) {
2ca993e8 96 size = UPRV_LENGTHOF(cnt1);
b75a7d8f
A
97 for(i = 0; i < size-1; i++) {
98 for(j = i+1; j < size; j++) {
99 UCollationElements *iter;
100 u_uastrcpy(t1, cnt1[i]);
101 u_uastrcpy(t2, cnt1[j]);
102 doTest(coll, t1, t2, UCOL_LESS);
103 /* synwee : added collation element iterator test */
104 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
105 if (U_FAILURE(status)) {
106 log_err("Creation of iterator failed\n");
107 break;
108 }
109 backAndForth(iter);
110 ucol_closeElements(iter);
111 }
112 }
113 }
114
115 ucol_close(coll);
116
117
118 u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
119 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
120
121 if(U_SUCCESS(status)) {
2ca993e8 122 size = UPRV_LENGTHOF(cnt2);
b75a7d8f
A
123 for(i = 0; i < size-1; i++) {
124 for(j = i+1; j < size; j++) {
125 UCollationElements *iter;
126 u_uastrcpy(t1, cnt2[i]);
127 u_uastrcpy(t2, cnt2[j]);
128 doTest(coll, t1, t2, UCOL_LESS);
129
130 /* synwee : added collation element iterator test */
131 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
132 if (U_FAILURE(status)) {
133 log_err("Creation of iterator failed\n");
134 break;
135 }
136 backAndForth(iter);
137 ucol_closeElements(iter);
138 }
139 }
140 }
141
142 ucol_close(coll);
143
144
145}
146
147const static char shifted[][20] = {
148 "black bird",
149 "black-bird",
150 "blackbird",
151 "black Bird",
152 "black-Bird",
153 "blackBird",
154 "black birds",
155 "black-birds",
156 "blackbirds"
157};
158
159const static UCollationResult shiftedTert[] = {
46f4442e 160 UCOL_EQUAL,
b75a7d8f
A
161 UCOL_EQUAL,
162 UCOL_EQUAL,
163 UCOL_LESS,
164 UCOL_EQUAL,
165 UCOL_EQUAL,
166 UCOL_LESS,
167 UCOL_EQUAL,
168 UCOL_EQUAL
169};
170
171const static char nonignorable[][20] = {
172 "black bird",
173 "black Bird",
174 "black birds",
175 "black-bird",
176 "black-Bird",
177 "black-birds",
178 "blackbird",
179 "blackBird",
180 "blackbirds"
181};
182
183static void BlackBirdTest(void) {
184 UErrorCode status = U_ZERO_ERROR;
185 UChar t1[90];
186 UChar t2[90];
187
188 uint32_t i = 0, j = 0;
189 uint32_t size = 0;
190 UCollator *coll = ucol_open("en_US", &status);
191
192 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
193 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
194
195 if(U_SUCCESS(status)) {
2ca993e8 196 size = UPRV_LENGTHOF(nonignorable);
b75a7d8f
A
197 for(i = 0; i < size-1; i++) {
198 for(j = i+1; j < size; j++) {
199 u_uastrcpy(t1, nonignorable[i]);
200 u_uastrcpy(t2, nonignorable[j]);
201 doTest(coll, t1, t2, UCOL_LESS);
202 }
203 }
204 }
205
206 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
207 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
208
209 if(U_SUCCESS(status)) {
2ca993e8 210 size = UPRV_LENGTHOF(shifted);
b75a7d8f
A
211 for(i = 0; i < size-1; i++) {
212 for(j = i+1; j < size; j++) {
213 u_uastrcpy(t1, shifted[i]);
214 u_uastrcpy(t2, shifted[j]);
215 doTest(coll, t1, t2, UCOL_LESS);
216 }
217 }
218 }
219
220 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
221 if(U_SUCCESS(status)) {
2ca993e8 222 size = UPRV_LENGTHOF(shifted);
b75a7d8f
A
223 for(i = 1; i < size; i++) {
224 u_uastrcpy(t1, shifted[i-1]);
225 u_uastrcpy(t2, shifted[i]);
226 doTest(coll, t1, t2, shiftedTert[i]);
227 }
228 }
229
230 ucol_close(coll);
231}
232
233const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
234 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
235 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
236 {0x0041/*'A'*/, 0x0300, 0x0000},
237 {0x00C0, 0x0301, 0x0000},
238 /* this would work with forced normalization */
239 {0x00C0, 0x0316, 0x0000}
240};
241
242const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
243 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
244 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
245 {0x00C0, 0},
246 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
247 /* this would work with forced normalization */
248 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
249};
250
251const static UCollationResult results[] = {
252 UCOL_GREATER,
253 UCOL_EQUAL,
254 UCOL_EQUAL,
255 UCOL_GREATER,
256 UCOL_EQUAL
257};
258
259static void FunkyATest(void)
260{
261
262 int32_t i;
263 UErrorCode status = U_ZERO_ERROR;
264 UCollator *myCollation;
265 myCollation = ucol_open("en_US", &status);
266 if(U_FAILURE(status)){
729e4ab9 267 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
b75a7d8f
A
268 return;
269 }
270 log_verbose("Testing some A letters, for some reason\n");
271 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
272 ucol_setStrength(myCollation, UCOL_TERTIARY);
273 for (i = 0; i < 4 ; i++)
274 {
275 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
276 }
277 ucol_close(myCollation);
278}
279
280UColAttributeValue caseFirst[] = {
281 UCOL_OFF,
282 UCOL_LOWER_FIRST,
283 UCOL_UPPER_FIRST
284};
285
286
287UColAttributeValue alternateHandling[] = {
288 UCOL_NON_IGNORABLE,
289 UCOL_SHIFTED
290};
291
292UColAttributeValue caseLevel[] = {
293 UCOL_OFF,
294 UCOL_ON
295};
296
297UColAttributeValue strengths[] = {
298 UCOL_PRIMARY,
299 UCOL_SECONDARY,
300 UCOL_TERTIARY,
301 UCOL_QUATERNARY,
302 UCOL_IDENTICAL
303};
304
305#if 0
306static const char * strengthsC[] = {
307 "UCOL_PRIMARY",
308 "UCOL_SECONDARY",
309 "UCOL_TERTIARY",
310 "UCOL_QUATERNARY",
311 "UCOL_IDENTICAL"
312};
313
314static const char * caseFirstC[] = {
315 "UCOL_OFF",
316 "UCOL_LOWER_FIRST",
317 "UCOL_UPPER_FIRST"
318};
319
320
321static const char * alternateHandlingC[] = {
322 "UCOL_NON_IGNORABLE",
323 "UCOL_SHIFTED"
324};
325
326static const char * caseLevelC[] = {
327 "UCOL_OFF",
328 "UCOL_ON"
329};
330
331/* not used currently - does not test only prints */
332static void PrintMarkDavis(void)
333{
334 UErrorCode status = U_ZERO_ERROR;
335 UChar m[256];
336 uint8_t sortkey[256];
337 UCollator *coll = ucol_open("en_US", &status);
338 uint32_t h,i,j,k, sortkeysize;
339 uint32_t sizem = 0;
340 char buffer[512];
341 uint32_t len = 512;
342
343 log_verbose("PrintMarkDavis");
344
345 u_uastrcpy(m, "Mark Davis");
346 sizem = u_strlen(m);
347
348
349 m[1] = 0xe4;
350
351 for(i = 0; i<sizem; i++) {
352 fprintf(stderr, "\\u%04X ", m[i]);
353 }
354 fprintf(stderr, "\n");
355
2ca993e8 356 for(h = 0; h<UPRV_LENGTHOF(caseFirst); h++) {
b75a7d8f
A
357 ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
358 fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
359
2ca993e8 360 for(i = 0; i<UPRV_LENGTHOF(alternateHandling); i++) {
b75a7d8f
A
361 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
362 fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]);
363
2ca993e8 364 for(j = 0; j<UPRV_LENGTHOF(caseLevel); j++) {
b75a7d8f
A
365 ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
366 fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]);
367
2ca993e8 368 for(k = 0; k<UPRV_LENGTHOF(strengths); k++) {
b75a7d8f
A
369 ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
370 sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
371 fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]);
372 fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
373 }
374
375 }
376
377 }
378
379 }
380}
381#endif
382
383static void BillFairmanTest(void) {
384/*
385** check for actual locale via ICU resource bundles
386**
387** lp points to the original locale ("fr_FR_....")
388*/
389
390 UResourceBundle *lr,*cr;
391 UErrorCode lec = U_ZERO_ERROR;
392 const char *lp = "fr_FR_you_ll_never_find_this_locale";
393
394 log_verbose("BillFairmanTest\n");
395
396 lr = ures_open(NULL,lp,&lec);
397 if (lr) {
374ca955 398 cr = ures_getByKey(lr,"collations",0,&lec);
b75a7d8f 399 if (cr) {
729e4ab9 400 lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
b75a7d8f
A
401 if (lp) {
402 if (U_SUCCESS(lec)) {
403 if(strcmp(lp, "fr") != 0) {
404 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
405 }
406 }
407 }
408 ures_close(cr);
409 }
410 ures_close(lr);
411 }
412}
413
b75a7d8f
A
414const static char chTest[][20] = {
415 "c",
416 "C",
417 "ca", "cb", "cx", "cy", "CZ",
418 "c\\u030C", "C\\u030C",
419 "h",
420 "H",
421 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
422 "ch", "cH", "Ch", "CH",
423 "cha", "charly", "che", "chh", "chch", "chr",
424 "i", "I", "iarly",
425 "r", "R",
426 "r\\u030C", "R\\u030C",
427 "s",
428 "S",
429 "s\\u030C", "S\\u030C",
430 "z", "Z",
431 "z\\u030C", "Z\\u030C"
432};
433
434static void TestChMove(void) {
46f4442e
A
435 UChar t1[256] = {0};
436 UChar t2[256] = {0};
b75a7d8f 437
46f4442e
A
438 uint32_t i = 0, j = 0;
439 uint32_t size = 0;
440 UErrorCode status = U_ZERO_ERROR;
b75a7d8f 441
46f4442e 442 UCollator *coll = ucol_open("cs", &status);
b75a7d8f 443
46f4442e 444 if(U_SUCCESS(status)) {
2ca993e8 445 size = UPRV_LENGTHOF(chTest);
46f4442e
A
446 for(i = 0; i < size-1; i++) {
447 for(j = i+1; j < size; j++) {
448 u_unescape(chTest[i], t1, 256);
449 u_unescape(chTest[j], t2, 256);
450 doTest(coll, t1, t2, UCOL_LESS);
451 }
452 }
b75a7d8f 453 }
46f4442e 454 else {
729e4ab9 455 log_data_err("Can't open collator");
46f4442e
A
456 }
457 ucol_close(coll);
b75a7d8f
A
458}
459
374ca955
A
460
461
462
57a6839d 463/*
b75a7d8f
A
464const static char impTest[][20] = {
465 "\\u4e00",
466 "a",
467 "A",
468 "b",
469 "B",
470 "\\u4e01"
471};
57a6839d 472*/
b75a7d8f
A
473
474
475static void TestImplicitTailoring(void) {
46f4442e 476 static const struct {
374ca955 477 const char *rules;
46f4442e 478 const char *data[10];
374ca955
A
479 const uint32_t len;
480 } tests[] = {
57a6839d
A
481 {
482 /* Tailor b and c before U+4E00. */
483 "&[before 1]\\u4e00 < b < c "
484 /* Now, before U+4E00 is c; put d and e after that. */
485 "&[before 1]\\u4e00 < d < e",
486 { "b", "c", "d", "e", "\\u4e00"}, 5 },
374ca955
A
487 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
488 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
489 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
490 };
491
492 int32_t i = 0;
493
2ca993e8 494 for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
374ca955
A
495 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
496 }
497
498/*
b75a7d8f
A
499 UChar t1[256] = {0};
500 UChar t2[256] = {0};
501
502 const char *rule = "&\\u4e00 < a <<< A < b <<< B";
503
504 uint32_t i = 0, j = 0;
505 uint32_t size = 0;
506 uint32_t ruleLen = 0;
507 UErrorCode status = U_ZERO_ERROR;
508 UCollator *coll = NULL;
509 ruleLen = u_unescape(rule, t1, 256);
510
511 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
512
513 if(U_SUCCESS(status)) {
2ca993e8 514 size = UPRV_LENGTHOF(impTest);
b75a7d8f
A
515 for(i = 0; i < size-1; i++) {
516 for(j = i+1; j < size; j++) {
517 u_unescape(impTest[i], t1, 256);
518 u_unescape(impTest[j], t2, 256);
519 doTest(coll, t1, t2, UCOL_LESS);
520 }
521 }
522 }
523 else {
524 log_err("Can't open collator");
525 }
526 ucol_close(coll);
374ca955 527 */
b75a7d8f
A
528}
529
530static void TestFCDProblem(void) {
531 UChar t1[256] = {0};
532 UChar t2[256] = {0};
533
534 const char *s1 = "\\u0430\\u0306\\u0325";
535 const char *s2 = "\\u04D1\\u0325";
536
537 UErrorCode status = U_ZERO_ERROR;
538 UCollator *coll = ucol_open("", &status);
539 u_unescape(s1, t1, 256);
540 u_unescape(s2, t2, 256);
541
542 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
543 doTest(coll, t1, t2, UCOL_EQUAL);
544
545 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
546 doTest(coll, t1, t2, UCOL_EQUAL);
547
548 ucol_close(coll);
549}
550
46f4442e
A
551/*
552The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
553We're only using NFC/NFD in this test.
554*/
555#define NORM_BUFFER_TEST_LEN 18
b75a7d8f
A
556typedef struct {
557 UChar32 u;
558 UChar NFC[NORM_BUFFER_TEST_LEN];
559 UChar NFD[NORM_BUFFER_TEST_LEN];
560} tester;
561
562static void TestComposeDecompose(void) {
46f4442e
A
563 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
564 static const UChar UNICODESET_STR[] = {
565 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
566 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
567 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
568 };
b75a7d8f
A
569 int32_t noOfLoc;
570 int32_t i = 0, j = 0;
571
572 UErrorCode status = U_ZERO_ERROR;
b75a7d8f 573 const char *locName = NULL;
b75a7d8f
A
574 uint32_t nfcSize;
575 uint32_t nfdSize;
576 tester **t;
577 uint32_t noCases = 0;
578 UCollator *coll = NULL;
579 UChar32 u = 0;
580 UChar comp[NORM_BUFFER_TEST_LEN];
581 uint32_t len = 0;
374ca955 582 UCollationElements *iter;
46f4442e
A
583 USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
584 int32_t charsToTestSize;
b75a7d8f
A
585
586 noOfLoc = uloc_countAvailable();
587
b75a7d8f 588 coll = ucol_open("", &status);
729e4ab9
A
589 if (U_FAILURE(status)) {
590 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
340931cb 591 uset_close(charsToTest);
46f4442e 592 return;
b75a7d8f 593 }
46f4442e
A
594 charsToTestSize = uset_size(charsToTest);
595 if (charsToTestSize <= 0) {
596 log_err("Set was zero. Missing data?\n");
340931cb 597 uset_close(charsToTest);
46f4442e
A
598 return;
599 }
4388f060 600 t = (tester **)malloc(charsToTestSize * sizeof(tester *));
46f4442e
A
601 t[0] = (tester *)malloc(sizeof(tester));
602 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
b75a7d8f 603
46f4442e
A
604 for(u = 0; u < charsToTestSize; u++) {
605 UChar32 ch = uset_charAt(charsToTest, u);
606 len = 0;
4388f060 607 U16_APPEND_UNSAFE(comp, len, ch);
b75a7d8f
A
608 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
609 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
610
374ca955 611 if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
b75a7d8f 612 || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
46f4442e 613 t[noCases]->u = ch;
b75a7d8f 614 if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
46f4442e
A
615 u_strncpy(t[noCases]->NFC, comp, len);
616 t[noCases]->NFC[len] = 0;
b75a7d8f
A
617 }
618 noCases++;
619 t[noCases] = (tester *)malloc(sizeof(tester));
620 uprv_memset(t[noCases], 0, sizeof(tester));
374ca955 621 }
b75a7d8f 622 }
46f4442e
A
623 log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
624 uset_close(charsToTest);
625 charsToTest = NULL;
b75a7d8f
A
626
627 for(u=0; u<(UChar32)noCases; u++) {
46f4442e
A
628 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
629 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
630 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
631 }
b75a7d8f
A
632 }
633 /*
46f4442e 634 for(u = 0; u < charsToTestSize; u++) {
b75a7d8f
A
635 if(!(u&0xFFFF)) {
636 log_verbose("%08X ", u);
637 }
638 uprv_memset(t[noCases], 0, sizeof(tester));
639 t[noCases]->u = u;
640 len = 0;
4388f060 641 U16_APPEND_UNSAFE(comp, len, u);
b75a7d8f
A
642 comp[len] = 0;
643 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
644 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
645 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
646 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
647 }
648 */
649
650 ucol_close(coll);
651
652 log_verbose("Testing locales, number of cases = %i\n", noCases);
653 for(i = 0; i<noOfLoc; i++) {
654 status = U_ZERO_ERROR;
655 locName = uloc_getAvailable(i);
656 if(hasCollationElements(locName)) {
657 char cName[256];
658 UChar name[256];
659 int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
660
661 for(j = 0; j<nameSize; j++) {
662 cName[j] = (char)name[j];
663 }
664 cName[nameSize] = 0;
665 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
666
667 coll = ucol_open(locName, &status);
668 ucol_setStrength(coll, UCOL_IDENTICAL);
374ca955 669 iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
b75a7d8f
A
670
671 for(u=0; u<(UChar32)noCases; u++) {
46f4442e
A
672 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
673 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
674 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
675 log_verbose("Testing NFC\n");
676 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
677 backAndForth(iter);
678 log_verbose("Testing NFD\n");
679 ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
680 backAndForth(iter);
681 }
b75a7d8f 682 }
374ca955 683 ucol_closeElements(iter);
b75a7d8f
A
684 ucol_close(coll);
685 }
686 }
687 for(u = 0; u <= (UChar32)noCases; u++) {
688 free(t[u]);
689 }
690 free(t);
691}
692
693static void TestEmptyRule(void) {
694 UErrorCode status = U_ZERO_ERROR;
695 UChar rulez[] = { 0 };
696 UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
697
698 ucol_close(coll);
699}
700
701static void TestUCARules(void) {
702 UErrorCode status = U_ZERO_ERROR;
703 UChar b[256];
704 UChar *rules = b;
705 uint32_t ruleLen = 0;
706 UCollator *UCAfromRules = NULL;
707 UCollator *coll = ucol_open("", &status);
708 if(status == U_FILE_ACCESS_ERROR) {
709 log_data_err("Is your data around?\n");
710 return;
711 } else if(U_FAILURE(status)) {
712 log_err("Error opening collator\n");
713 return;
714 }
715 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
716
717 log_verbose("TestUCARules\n");
718 if(ruleLen > 256) {
719 rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
720 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
721 }
722 log_verbose("Rules length is %d\n", ruleLen);
723 UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
724 if(U_SUCCESS(status)) {
725 ucol_close(UCAfromRules);
726 } else {
727 log_verbose("Unable to create a collator from UCARules!\n");
728 }
729/*
730 u_unescape(blah, b, 256);
731 ucol_getSortKey(coll, b, 1, res, 256);
732*/
733 ucol_close(coll);
734 if(rules != b) {
735 free(rules);
736 }
737}
738
739
740/* Pinyin tonal order */
741/*
742 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
743 (w/macron)< (w/acute)< (w/caron)< (w/grave)
744 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
745 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
746 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
747 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
748 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
749.. (\u00fc)
750
751However, in testing we got the following order:
752 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
753 (w/acute)< (w/grave)< (w/caron)< (w/macron)
754 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
755.. (\u0113)
756 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
757 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
758 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
759.. (\u01d8)
760 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
761*/
762
763static void TestBefore(void) {
764 const static char *data[] = {
765 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
766 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
767 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
768 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
769 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
770 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
771 };
772 genericRulesStarter(
773 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
774 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
775 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
776 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
777 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
778 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
2ca993e8 779 data, UPRV_LENGTHOF(data));
b75a7d8f
A
780}
781
57a6839d
A
782#if 0
783/* superceded by TestBeforePinyin */
784static void TestJ784(void) {
785 const static char *data[] = {
786 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
787 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
788 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
789 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
790 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
791 "\\u00fc",
792 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
b75a7d8f 793 };
2ca993e8 794 genericLocaleStarter("zh", data, UPRV_LENGTHOF(data));
57a6839d
A
795}
796#endif
b75a7d8f 797
0f5d89e8 798static void TestUpperCaseFirst(void) {
57a6839d
A
799 const static char *data[] = {
800 "I",
801 "i",
802 "Y",
803 "y"
b75a7d8f 804 };
0f5d89e8 805 genericLocaleStarter("da", data, UPRV_LENGTHOF(data));
57a6839d 806}
b75a7d8f 807
57a6839d
A
808static void TestJ815(void) {
809 const static char *data[] = {
810 "aa",
811 "Aa",
812 "ab",
813 "Ab",
814 "ad",
815 "Ad",
816 "ae",
817 "Ae",
818 "\\u00e6",
819 "\\u00c6",
820 "af",
821 "Af",
822 "b",
823 "B"
b75a7d8f 824 };
2ca993e8
A
825 genericLocaleStarter("fr", data, UPRV_LENGTHOF(data));
826 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, UPRV_LENGTHOF(data));
b75a7d8f
A
827}
828
57a6839d 829
b75a7d8f
A
830static void TestCase(void)
831{
832 const static UChar gRules[MAX_TOKEN_LEN] =
833 /*" & 0 < 1,\u2461<a,A"*/
834 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
835
836 const static UChar testCase[][MAX_TOKEN_LEN] =
837 {
838 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
839 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
840 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
841 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
842 };
843
844 const static UCollationResult caseTestResults[][9] =
845 {
46f4442e
A
846 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
847 { UCOL_GREATER, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
848 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
849 { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
b75a7d8f
A
850 };
851
852 const static UColAttributeValue caseTestAttributes[][2] =
853 {
46f4442e
A
854 { UCOL_LOWER_FIRST, UCOL_OFF},
855 { UCOL_UPPER_FIRST, UCOL_OFF},
856 { UCOL_LOWER_FIRST, UCOL_ON},
857 { UCOL_UPPER_FIRST, UCOL_ON}
b75a7d8f
A
858 };
859 int32_t i,j,k;
860 UErrorCode status = U_ZERO_ERROR;
374ca955 861 UCollationElements *iter;
b75a7d8f
A
862 UCollator *myCollation;
863 myCollation = ucol_open("en_US", &status);
374ca955 864
b75a7d8f 865 if(U_FAILURE(status)){
729e4ab9 866 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
b75a7d8f
A
867 return;
868 }
869 log_verbose("Testing different case settings\n");
870 ucol_setStrength(myCollation, UCOL_TERTIARY);
871
872 for(k = 0; k<4; k++) {
873 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
874 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
875 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
876 for (i = 0; i < 3 ; i++) {
877 for(j = i+1; j<4; j++) {
878 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
879 }
880 }
881 }
882 ucol_close(myCollation);
883
884 myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
885 if(U_FAILURE(status)){
886 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
887 return;
888 }
889 log_verbose("Testing different case settings with custom rules\n");
890 ucol_setStrength(myCollation, UCOL_TERTIARY);
891
892 for(k = 0; k<4; k++) {
893 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
894 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
895 for (i = 0; i < 3 ; i++) {
896 for(j = i+1; j<4; j++) {
897 log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
898 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
374ca955
A
899 iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
900 backAndForth(iter);
901 ucol_closeElements(iter);
902 iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
903 backAndForth(iter);
904 ucol_closeElements(iter);
b75a7d8f
A
905 }
906 }
907 }
908 ucol_close(myCollation);
909 {
910 const static char *lowerFirst[] = {
911 "h",
912 "H",
913 "ch",
914 "Ch",
915 "CH",
916 "cha",
917 "chA",
918 "Cha",
919 "ChA",
920 "CHa",
921 "CHA",
922 "i",
923 "I"
924 };
925
926 const static char *upperFirst[] = {
927 "H",
928 "h",
929 "CH",
930 "Ch",
931 "ch",
932 "CHA",
933 "CHa",
934 "ChA",
935 "Cha",
936 "chA",
937 "cha",
938 "I",
939 "i"
940 };
941 log_verbose("mixed case test\n");
942 log_verbose("lower first, case level off\n");
2ca993e8 943 genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
b75a7d8f 944 log_verbose("upper first, case level off\n");
2ca993e8 945 genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
b75a7d8f 946 log_verbose("lower first, case level on\n");
2ca993e8 947 genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
b75a7d8f 948 log_verbose("upper first, case level on\n");
2ca993e8 949 genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
b75a7d8f
A
950 }
951
952}
953
954static void TestIncrementalNormalize(void) {
955
956 /*UChar baseA =0x61;*/
957 UChar baseA =0x41;
958/* UChar baseB = 0x42;*/
46f4442e 959 static const UChar ccMix[] = {0x316, 0x321, 0x300};
b75a7d8f
A
960 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/
961 /*
962 0x316 is combining grave accent below, cc=220
963 0x321 is combining palatalized hook below, cc=202
964 0x300 is combining grave accent, cc=230
965 */
966
46f4442e
A
967#define MAXSLEN 2000
968 /*int maxSLen = 64000;*/
b75a7d8f
A
969 int sLen;
970 int i;
971
972 UCollator *coll;
973 UErrorCode status = U_ZERO_ERROR;
974 UCollationResult result;
975
729e4ab9 976 int32_t myQ = getTestOption(QUICK_OPTION);
b75a7d8f 977
729e4ab9
A
978 if(getTestOption(QUICK_OPTION) < 0) {
979 setTestOption(QUICK_OPTION, 1);
b75a7d8f
A
980 }
981
982 {
983 /* Test 1. Run very long unnormalized strings, to force overflow of*/
984 /* most buffers along the way.*/
46f4442e
A
985 UChar strA[MAXSLEN+1];
986 UChar strB[MAXSLEN+1];
b75a7d8f
A
987
988 coll = ucol_open("en_US", &status);
989 if(status == U_FILE_ACCESS_ERROR) {
990 log_data_err("Is your data around?\n");
991 return;
992 } else if(U_FAILURE(status)) {
993 log_err("Error opening collator\n");
994 return;
995 }
996 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
997
46f4442e
A
998 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
999 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
b75a7d8f
A
1000 /*for (sLen = 1000; sLen<1001; sLen++) {*/
1001 for (sLen = 500; sLen<501; sLen++) {
1002 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
1003 strA[0] = baseA;
1004 strB[0] = baseA;
1005 for (i=1; i<=sLen-1; i++) {
1006 strA[i] = ccMix[i % 3];
1007 strB[sLen-i] = ccMix[i % 3];
1008 }
1009 strA[sLen] = 0;
1010 strB[sLen] = 0;
1011
1012 ucol_setStrength(coll, UCOL_TERTIARY); /* Do test with default strength, which runs*/
1013 doTest(coll, strA, strB, UCOL_EQUAL); /* optimized functions in the impl*/
1014 ucol_setStrength(coll, UCOL_IDENTICAL); /* Do again with the slow, general impl.*/
1015 doTest(coll, strA, strB, UCOL_EQUAL);
1016 }
b75a7d8f
A
1017 }
1018
729e4ab9 1019 setTestOption(QUICK_OPTION, myQ);
b75a7d8f
A
1020
1021
1022 /* Test 2: Non-normal sequence in a string that extends to the last character*/
1023 /* of the string. Checks a couple of edge cases.*/
1024
1025 {
46f4442e
A
1026 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
1027 static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
b75a7d8f
A
1028 ucol_setStrength(coll, UCOL_TERTIARY);
1029 doTest(coll, strA, strB, UCOL_EQUAL);
1030 }
1031
1032 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
1033
1034 {
374ca955
A
1035 /* New UCA 3.1.1.
1036 * test below used a code point from Desseret, which sorts differently
b75a7d8f
A
1037 * than d800 dc00
1038 */
1039 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
46f4442e
A
1040 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
1041 static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
b75a7d8f
A
1042 ucol_setStrength(coll, UCOL_TERTIARY);
1043 doTest(coll, strA, strB, UCOL_GREATER);
1044 }
1045
1046 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/
1047
1048 {
46f4442e
A
1049 static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
1050 static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
b75a7d8f
A
1051 char sortKeyA[50];
1052 char sortKeyAz[50];
1053 char sortKeyB[50];
1054 char sortKeyBz[50];
1055 int r;
1056
1057 /* there used to be -3 here. Hmmmm.... */
1058 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
1059 result = ucol_strcoll(coll, strA, 3, strB, 3);
1060 if (result != UCOL_GREATER) {
1061 log_err("ERROR 1 in test 4\n");
1062 }
1063 result = ucol_strcoll(coll, strA, -1, strB, -1);
1064 if (result != UCOL_EQUAL) {
1065 log_err("ERROR 2 in test 4\n");
1066 }
1067
1068 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1069 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1070 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1071 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1072
1073 r = strcmp(sortKeyA, sortKeyAz);
1074 if (r <= 0) {
1075 log_err("Error 3 in test 4\n");
1076 }
1077 r = strcmp(sortKeyA, sortKeyB);
1078 if (r <= 0) {
1079 log_err("Error 4 in test 4\n");
1080 }
1081 r = strcmp(sortKeyAz, sortKeyBz);
1082 if (r != 0) {
1083 log_err("Error 5 in test 4\n");
1084 }
1085
1086 ucol_setStrength(coll, UCOL_IDENTICAL);
1087 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1088 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1089 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1090 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1091
1092 r = strcmp(sortKeyA, sortKeyAz);
1093 if (r <= 0) {
1094 log_err("Error 6 in test 4\n");
1095 }
1096 r = strcmp(sortKeyA, sortKeyB);
1097 if (r <= 0) {
1098 log_err("Error 7 in test 4\n");
1099 }
1100 r = strcmp(sortKeyAz, sortKeyBz);
1101 if (r != 0) {
1102 log_err("Error 8 in test 4\n");
1103 }
1104 ucol_setStrength(coll, UCOL_TERTIARY);
1105 }
1106
1107
1108 /* Test 5: Null characters in non-normal source strings.*/
1109
1110 {
46f4442e
A
1111 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
1112 static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
b75a7d8f
A
1113 char sortKeyA[50];
1114 char sortKeyAz[50];
1115 char sortKeyB[50];
1116 char sortKeyBz[50];
1117 int r;
1118
1119 result = ucol_strcoll(coll, strA, 6, strB, 6);
1120 if (result != UCOL_GREATER) {
1121 log_err("ERROR 1 in test 5\n");
1122 }
1123 result = ucol_strcoll(coll, strA, -1, strB, -1);
1124 if (result != UCOL_EQUAL) {
1125 log_err("ERROR 2 in test 5\n");
1126 }
1127
1128 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1129 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1130 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1131 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1132
1133 r = strcmp(sortKeyA, sortKeyAz);
1134 if (r <= 0) {
1135 log_err("Error 3 in test 5\n");
1136 }
1137 r = strcmp(sortKeyA, sortKeyB);
1138 if (r <= 0) {
1139 log_err("Error 4 in test 5\n");
1140 }
1141 r = strcmp(sortKeyAz, sortKeyBz);
1142 if (r != 0) {
1143 log_err("Error 5 in test 5\n");
1144 }
1145
1146 ucol_setStrength(coll, UCOL_IDENTICAL);
1147 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1148 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1149 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1150 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1151
1152 r = strcmp(sortKeyA, sortKeyAz);
1153 if (r <= 0) {
1154 log_err("Error 6 in test 5\n");
1155 }
1156 r = strcmp(sortKeyA, sortKeyB);
1157 if (r <= 0) {
1158 log_err("Error 7 in test 5\n");
1159 }
1160 r = strcmp(sortKeyAz, sortKeyBz);
1161 if (r != 0) {
1162 log_err("Error 8 in test 5\n");
1163 }
1164 ucol_setStrength(coll, UCOL_TERTIARY);
1165 }
1166
1167
1168 /* Test 6: Null character as base of a non-normal combining sequence.*/
1169
1170 {
46f4442e
A
1171 static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
1172 static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
b75a7d8f
A
1173
1174 result = ucol_strcoll(coll, strA, 5, strB, 5);
1175 if (result != UCOL_LESS) {
1176 log_err("Error 1 in test 6\n");
1177 }
1178 result = ucol_strcoll(coll, strA, -1, strB, -1);
1179 if (result != UCOL_EQUAL) {
1180 log_err("Error 2 in test 6\n");
1181 }
1182 }
1183
1184 ucol_close(coll);
1185}
1186
1187
1188
1189#if 0
1190static void TestGetCaseBit(void) {
1191 static const char *caseBitData[] = {
1192 "a", "A", "ch", "Ch", "CH",
1193 "\\uFF9E", "\\u0009"
1194 };
1195
1196 static const uint8_t results[] = {
1197 UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
1198 UCOL_UPPER_CASE, UCOL_LOWER_CASE
1199 };
1200
1201 uint32_t i, blen = 0;
1202 UChar b[256] = {0};
1203 UErrorCode status = U_ZERO_ERROR;
1204 UCollator *UCA = ucol_open("", &status);
1205 uint8_t res = 0;
1206
2ca993e8 1207 for(i = 0; i<UPRV_LENGTHOF(results); i++) {
b75a7d8f
A
1208 blen = u_unescape(caseBitData[i], b, 256);
1209 res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
1210 if(results[i] != res) {
1211 log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
1212 }
1213 }
1214}
1215#endif
1216
1217static void TestHangulTailoring(void) {
1218 static const char *koreanData[] = {
1219 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
1220 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
1221 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
1222 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
1223 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
1224 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
1225 };
1226
1227 const char *rules =
1228 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
1229 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
1230 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
1231 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
1232 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
1233 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
1234
1235
1236 UErrorCode status = U_ZERO_ERROR;
1237 UChar rlz[2048] = { 0 };
1238 uint32_t rlen = u_unescape(rules, rlz, 2048);
1239
1240 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
1241 if(status == U_FILE_ACCESS_ERROR) {
1242 log_data_err("Is your data around?\n");
1243 return;
1244 } else if(U_FAILURE(status)) {
1245 log_err("Error opening collator\n");
1246 return;
1247 }
1248
1249 log_verbose("Using start of korean rules\n");
1250
1251 if(U_SUCCESS(status)) {
2ca993e8 1252 genericOrderingTest(coll, koreanData, UPRV_LENGTHOF(koreanData));
b75a7d8f
A
1253 } else {
1254 log_err("Unable to open collator with rules %s\n", rules);
1255 }
1256
b75a7d8f
A
1257 ucol_close(coll);
1258
1259 log_verbose("Using ko__LOTUS locale\n");
2ca993e8 1260 genericLocaleStarter("ko__LOTUS", koreanData, UPRV_LENGTHOF(koreanData));
b75a7d8f
A
1261}
1262
57a6839d
A
1263/*
1264 * The secondary/tertiary compression middle byte
1265 * as used by the current implementation.
1266 * Subject to change as the sort key compression changes.
1267 * See class CollationKeys.
1268 */
1269enum {
1270 SEC_COMMON_MIDDLE = 0x25, /* range 05..45 */
1271 TER_ONLY_COMMON_MIDDLE = 0x65 /* range 05..C5 */
1272};
1273
b75a7d8f
A
1274static void TestCompressOverlap(void) {
1275 UChar secstr[150];
1276 UChar tertstr[150];
1277 UErrorCode status = U_ZERO_ERROR;
1278 UCollator *coll;
57a6839d 1279 uint8_t result[500];
b75a7d8f
A
1280 uint32_t resultlen;
1281 int count = 0;
57a6839d 1282 uint8_t *tempptr;
b75a7d8f
A
1283
1284 coll = ucol_open("", &status);
1285
1286 if (U_FAILURE(status)) {
729e4ab9 1287 log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
b75a7d8f
A
1288 return;
1289 }
1290 while (count < 149) {
1291 secstr[count] = 0x0020; /* [06, 05, 05] */
1292 tertstr[count] = 0x0020;
1293 count ++;
1294 }
1295
1296 /* top down compression ----------------------------------- */
1297 secstr[count] = 0x0332; /* [, 87, 05] */
1298 tertstr[count] = 0x3000; /* [06, 05, 07] */
1299
1300 /* no compression secstr should have 150 secondary bytes, tertstr should
1301 have 150 tertiary bytes.
57a6839d
A
1302 with correct compression, secstr should have 6 secondary
1303 bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
2ca993e8 1304 resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
57a6839d
A
1305 (void)resultlen; /* Suppress set but not used warning. */
1306 tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
b75a7d8f
A
1307 while (*(tempptr + 1) != 1) {
1308 /* the last secondary collation element is not checked since it is not
1309 part of the compression */
57a6839d
A
1310 if (*tempptr < SEC_COMMON_MIDDLE) {
1311 log_err("Secondary top down compression overlapped\n");
b75a7d8f
A
1312 }
1313 tempptr ++;
1314 }
1315
1316 /* tertiary top/bottom/common for en_US is similar to the secondary
1317 top/bottom/common */
2ca993e8 1318 resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
57a6839d 1319 tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
b75a7d8f
A
1320 while (*(tempptr + 1) != 0) {
1321 /* the last secondary collation element is not checked since it is not
1322 part of the compression */
57a6839d
A
1323 if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
1324 log_err("Tertiary top down compression overlapped\n");
b75a7d8f
A
1325 }
1326 tempptr ++;
1327 }
1328
1329 /* bottom up compression ------------------------------------- */
1330 secstr[count] = 0;
1331 tertstr[count] = 0;
2ca993e8 1332 resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
57a6839d 1333 tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
b75a7d8f
A
1334 while (*(tempptr + 1) != 1) {
1335 /* the last secondary collation element is not checked since it is not
1336 part of the compression */
57a6839d
A
1337 if (*tempptr > SEC_COMMON_MIDDLE) {
1338 log_err("Secondary bottom up compression overlapped\n");
b75a7d8f
A
1339 }
1340 tempptr ++;
1341 }
1342
1343 /* tertiary top/bottom/common for en_US is similar to the secondary
1344 top/bottom/common */
2ca993e8 1345 resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
57a6839d 1346 tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
b75a7d8f
A
1347 while (*(tempptr + 1) != 0) {
1348 /* the last secondary collation element is not checked since it is not
1349 part of the compression */
57a6839d
A
1350 if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
1351 log_err("Tertiary bottom up compression overlapped\n");
b75a7d8f
A
1352 }
1353 tempptr ++;
1354 }
1355
1356 ucol_close(coll);
1357}
1358
1359static void TestCyrillicTailoring(void) {
1360 static const char *test[] = {
1361 "\\u0410b",
1362 "\\u0410\\u0306a",
1363 "\\u04d0A"
1364 };
1365
1366 /* Russian overrides contractions, so this test is not valid anymore */
374ca955 1367 /*genericLocaleStarter("ru", test, 3);*/
b75a7d8f 1368
b331163b
A
1369 // Most of the following are commented out because UCA 8.0
1370 // drops most of the Cyrillic contractions from the default order.
1371 // See CLDR ticket #7246 "root collation: remove Cyrillic contractions".
1372
1373 // genericLocaleStarter("root", test, 3);
1374 // genericRulesStarter("&\\u0410 = \\u0410", test, 3);
1375 // genericRulesStarter("&Z < \\u0410", test, 3);
b75a7d8f
A
1376 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
1377 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
b331163b
A
1378 // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
1379 // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
b75a7d8f
A
1380}
1381
1382static void TestSuppressContractions(void) {
1383
1384 static const char *testNoCont2[] = {
1385 "\\u0410\\u0302a",
1386 "\\u0410\\u0306b",
374ca955 1387 "\\u0410c"
b75a7d8f
A
1388 };
1389 static const char *testNoCont[] = {
374ca955 1390 "a\\u0410",
b75a7d8f
A
1391 "A\\u0410\\u0306",
1392 "\\uFF21\\u0410\\u0302"
1393 };
374ca955 1394
b75a7d8f
A
1395 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
1396 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
1397}
1398
1399static void TestContraction(void) {
1400 const static char *testrules[] = {
1401 "&A = AB / B",
1402 "&A = A\\u0306/\\u0306",
1403 "&c = ch / h"
1404 };
1405 const static UChar testdata[][2] = {
1406 {0x0041 /* 'A' */, 0x0042 /* 'B' */},
1407 {0x0041 /* 'A' */, 0x0306 /* combining breve */},
1408 {0x0063 /* 'c' */, 0x0068 /* 'h' */}
1409 };
1410 const static UChar testdata2[][2] = {
1411 {0x0063 /* 'c' */, 0x0067 /* 'g' */},
1412 {0x0063 /* 'c' */, 0x0068 /* 'h' */},
1413 {0x0063 /* 'c' */, 0x006C /* 'l' */}
1414 };
57a6839d
A
1415#if 0
1416 /*
1417 * These pairs of rule strings are not guaranteed to yield the very same mappings.
1418 * In fact, LDML 24 recommends an improved way of creating mappings
1419 * which always yields different mappings for such pairs. See
1420 * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
1421 */
b75a7d8f
A
1422 const static char *testrules3[] = {
1423 "&z < xyz &xyzw << B",
1424 "&z < xyz &xyz << B / w",
1425 "&z < ch &achm << B",
1426 "&z < ch &a << B / chm",
1427 "&\\ud800\\udc00w << B",
1428 "&\\ud800\\udc00 << B / w",
1429 "&a\\ud800\\udc00m << B",
1430 "&a << B / \\ud800\\udc00m",
1431 };
57a6839d 1432#endif
b75a7d8f
A
1433
1434 UErrorCode status = U_ZERO_ERROR;
1435 UCollator *coll;
1436 UChar rule[256] = {0};
1437 uint32_t rlen = 0;
1438 int i;
1439
2ca993e8 1440 for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
b75a7d8f
A
1441 UCollationElements *iter1;
1442 int j = 0;
1443 log_verbose("Rule %s for testing\n", testrules[i]);
1444 rlen = u_unescape(testrules[i], rule, 32);
1445 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1446 if (U_FAILURE(status)) {
729e4ab9 1447 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
b75a7d8f
A
1448 return;
1449 }
1450 iter1 = ucol_openElements(coll, testdata[i], 2, &status);
1451 if (U_FAILURE(status)) {
1452 log_err("Collation iterator creation failed\n");
1453 return;
1454 }
1455 while (j < 2) {
1456 UCollationElements *iter2 = ucol_openElements(coll,
1457 &(testdata[i][j]),
1458 1, &status);
340931cb 1459 int32_t ce;
b75a7d8f
A
1460 if (U_FAILURE(status)) {
1461 log_err("Collation iterator creation failed\n");
1462 return;
1463 }
1464 ce = ucol_next(iter2, &status);
1465 while (ce != UCOL_NULLORDER) {
340931cb 1466 if (ucol_next(iter1, &status) != ce) {
b75a7d8f
A
1467 log_err("Collation elements in contraction split does not match\n");
1468 return;
1469 }
1470 ce = ucol_next(iter2, &status);
1471 }
1472 j ++;
1473 ucol_closeElements(iter2);
1474 }
1475 if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
1476 log_err("Collation elements not exhausted\n");
1477 return;
1478 }
1479 ucol_closeElements(iter1);
1480 ucol_close(coll);
1481 }
1482
1483 rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
1484 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1485 if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
1486 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1487 testdata2[0][0], testdata2[0][1], testdata2[1][0],
1488 testdata2[1][1]);
1489 return;
1490 }
1491 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
1492 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1493 testdata2[1][0], testdata2[1][1], testdata2[2][0],
1494 testdata2[2][1]);
1495 return;
1496 }
1497 ucol_close(coll);
57a6839d 1498#if 0 /* see above */
2ca993e8 1499 for (i = 0; i < UPRV_LENGTHOF(testrules3); i += 2) {
57a6839d 1500 log_verbose("testrules3 i==%d \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);
b75a7d8f
A
1501 UCollator *coll1,
1502 *coll2;
1503 UCollationElements *iter1,
1504 *iter2;
1505 UChar ch = 0x0042 /* 'B' */;
1506 uint32_t ce;
1507 rlen = u_unescape(testrules3[i], rule, 32);
1508 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1509 rlen = u_unescape(testrules3[i + 1], rule, 32);
1510 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1511 if (U_FAILURE(status)) {
1512 log_err("Collator creation failed %s\n", testrules[i]);
1513 return;
1514 }
1515 iter1 = ucol_openElements(coll1, &ch, 1, &status);
1516 iter2 = ucol_openElements(coll2, &ch, 1, &status);
1517 if (U_FAILURE(status)) {
1518 log_err("Collation iterator creation failed\n");
1519 return;
1520 }
1521 ce = ucol_next(iter1, &status);
1522 if (U_FAILURE(status)) {
1523 log_err("Retrieving ces failed\n");
1524 return;
1525 }
1526 while (ce != UCOL_NULLORDER) {
57a6839d
A
1527 uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
1528 if (ce == ce2) {
1529 log_verbose("CEs match: %08x\n", ce);
1530 } else {
1531 log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
b75a7d8f
A
1532 return;
1533 }
1534 ce = ucol_next(iter1, &status);
1535 if (U_FAILURE(status)) {
1536 log_err("Retrieving ces failed\n");
1537 return;
1538 }
1539 }
1540 if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
1541 log_err("CEs not exhausted\n");
1542 return;
1543 }
1544 ucol_closeElements(iter1);
1545 ucol_closeElements(iter2);
1546 ucol_close(coll1);
1547 ucol_close(coll2);
1548 }
57a6839d 1549#endif
b75a7d8f
A
1550}
1551
1552static void TestExpansion(void) {
1553 const static char *testrules[] = {
57a6839d
A
1554#if 0
1555 /*
1556 * This seems to have tested that M was not mapped to an expansion.
1557 * I believe the old builder just did that because it computed the extension CEs
1558 * at the very end, which was a bug.
1559 * Among other problems, it violated the core tailoring principle
1560 * by making an earlier rule depend on a later one.
1561 * And, of course, if M did not get an expansion, then it was primary different from K,
1562 * unlike what the rule &K<<M says.
1563 */
b75a7d8f 1564 "&J << K / B & K << M",
57a6839d 1565#endif
b75a7d8f
A
1566 "&J << K / B << M"
1567 };
1568 const static UChar testdata[][3] = {
1569 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
1570 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
1571 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
1572 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
1573 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
1574 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
1575 };
1576
1577 UErrorCode status = U_ZERO_ERROR;
1578 UCollator *coll;
1579 UChar rule[256] = {0};
1580 uint32_t rlen = 0;
1581 int i;
1582
2ca993e8 1583 for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
b75a7d8f
A
1584 int j = 0;
1585 log_verbose("Rule %s for testing\n", testrules[i]);
1586 rlen = u_unescape(testrules[i], rule, 32);
1587 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1588 if (U_FAILURE(status)) {
729e4ab9 1589 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
b75a7d8f
A
1590 return;
1591 }
1592
1593 for (j = 0; j < 5; j ++) {
1594 doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
1595 }
1596 ucol_close(coll);
1597 }
1598}
1599
b75a7d8f
A
1600#if 0
1601/* this test tests the current limitations of the engine */
1602/* it always fail, so it is disabled by default */
1603static void TestLimitations(void) {
1604 /* recursive expansions */
1605 {
1606 static const char *rule = "&a=b/c&d=c/e";
1607 static const char *tlimit01[] = {"add","b","adf"};
1608 static const char *tlimit02[] = {"aa","b","af"};
1609 log_verbose("recursive expansions\n");
2ca993e8
A
1610 genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1611 genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
b75a7d8f
A
1612 }
1613 /* contractions spanning expansions */
1614 {
1615 static const char *rule = "&a<<<c/e&g<<<eh";
1616 static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
1617 static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
1618 log_verbose("contractions spanning expansions\n");
2ca993e8
A
1619 genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1620 genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
b75a7d8f
A
1621 }
1622 /* normalization: nulls in contractions */
1623 {
1624 static const char *rule = "&a<<<\\u0000\\u0302";
1625 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1626 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1627 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1628 static const UColAttributeValue valOn[] = { UCOL_ON };
1629 static const UColAttributeValue valOff[] = { UCOL_OFF };
1630
1631 log_verbose("NULL in contractions\n");
1632 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1633 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1634 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1635 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1636
1637 }
1638 /* normalization: contractions spanning normalization */
1639 {
1640 static const char *rule = "&a<<<\\u0000\\u0302";
1641 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1642 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1643 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1644 static const UColAttributeValue valOn[] = { UCOL_ON };
1645 static const UColAttributeValue valOff[] = { UCOL_OFF };
1646
1647 log_verbose("contractions spanning normalization\n");
1648 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1649 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1650 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1651 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1652
1653 }
1654 /* variable top: */
1655 {
1656 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
1657 static const char *rule = "&\\u2010<x<[variable top]=z";
1658 /*static const char *rule3 = "&' '<x<[variable top]=z";*/
1659 static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
1660 static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
1661 static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
1662 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
1663 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
1664 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
1665
1666 log_verbose("variable top\n");
2ca993e8
A
1667 genericRulesStarterWithOptions(rule, tlimit03, UPRV_LENGTHOF(tlimit03), att, valOn, UPRV_LENGTHOF(att));
1668 genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1669 genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1670 genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));
1671 genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));
b75a7d8f
A
1672
1673 }
1674 /* case level */
1675 {
1676 static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
1677 static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
1678 static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
1679 static const UColAttribute att[] = { UCOL_CASE_FIRST};
1680 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
1681 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
1682 log_verbose("case level\n");
2ca993e8
A
1683 genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1684 genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1685 /*genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));*/
1686 /*genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));*/
b75a7d8f
A
1687 }
1688
1689}
1690#endif
1691
1692static void TestBocsuCoverage(void) {
1693 UErrorCode status = U_ZERO_ERROR;
1694 const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
1695 UChar test[256] = {0};
1696 uint32_t tlen = u_unescape(testString, test, 32);
1697 uint8_t key[256] = {0};
1698 uint32_t klen = 0;
1699
1700 UCollator *coll = ucol_open("", &status);
1701 if(U_SUCCESS(status)) {
1702 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
1703
1704 klen = ucol_getSortKey(coll, test, tlen, key, 256);
57a6839d 1705 (void)klen; /* Suppress set but not used warning. */
b75a7d8f
A
1706
1707 ucol_close(coll);
1708 } else {
1709 log_data_err("Couldn't open UCA\n");
1710 }
1711}
1712
1713static void TestVariableTopSetting(void) {
1714 UErrorCode status = U_ZERO_ERROR;
b75a7d8f
A
1715 uint32_t varTopOriginal = 0, varTop1, varTop2;
1716 UCollator *coll = ucol_open("", &status);
1717 if(U_SUCCESS(status)) {
1718
57a6839d
A
1719 static const UChar nul = 0;
1720 static const UChar space = 0x20;
1721 static const UChar dot = 0x2e; /* punctuation */
1722 static const UChar degree = 0xb0; /* symbol */
1723 static const UChar dollar = 0x24; /* currency symbol */
1724 static const UChar zero = 0x30; /* digit */
729e4ab9 1725
57a6839d
A
1726 varTopOriginal = ucol_getVariableTop(coll, &status);
1727 log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
1728 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
b75a7d8f 1729
57a6839d
A
1730 varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1731 varTop2 = ucol_getVariableTop(coll, &status);
1732 log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
1733 if(U_FAILURE(status) || varTop1 != varTop2 ||
1734 !ucol_equal(coll, &nul, 0, &space, 1) ||
1735 ucol_equal(coll, &nul, 0, &dot, 1) ||
1736 ucol_equal(coll, &nul, 0, &degree, 1) ||
1737 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1738 ucol_equal(coll, &nul, 0, &zero, 1) ||
1739 ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1740 log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));
b75a7d8f
A
1741 }
1742
57a6839d
A
1743 varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
1744 varTop2 = ucol_getVariableTop(coll, &status);
1745 log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
1746 if(U_FAILURE(status) || varTop1 != varTop2 ||
1747 !ucol_equal(coll, &nul, 0, &space, 1) ||
1748 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1749 ucol_equal(coll, &nul, 0, &degree, 1) ||
1750 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1751 ucol_equal(coll, &nul, 0, &zero, 1) ||
1752 ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1753 log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));
1754 }
b75a7d8f 1755
57a6839d
A
1756 varTop1 = ucol_setVariableTop(coll, &degree, 1, &status);
1757 varTop2 = ucol_getVariableTop(coll, &status);
1758 log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
1759 if(U_FAILURE(status) || varTop1 != varTop2 ||
1760 !ucol_equal(coll, &nul, 0, &space, 1) ||
1761 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1762 !ucol_equal(coll, &nul, 0, &degree, 1) ||
1763 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1764 ucol_equal(coll, &nul, 0, &zero, 1) ||
1765 ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1766 log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));
b75a7d8f
A
1767 }
1768
57a6839d
A
1769 varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
1770 varTop2 = ucol_getVariableTop(coll, &status);
1771 log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
1772 if(U_FAILURE(status) || varTop1 != varTop2 ||
1773 !ucol_equal(coll, &nul, 0, &space, 1) ||
1774 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1775 !ucol_equal(coll, &nul, 0, &degree, 1) ||
1776 !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1777 ucol_equal(coll, &nul, 0, &zero, 1) ||
1778 ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1779 log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));
1780 }
b75a7d8f
A
1781
1782 log_verbose("Testing setting variable top to contractions\n");
1783 {
57a6839d 1784 UChar first[4] = { 0 };
b75a7d8f
A
1785 first[0] = 0x0040;
1786 first[1] = 0x0050;
1787 first[2] = 0x0000;
1788
57a6839d 1789 status = U_ZERO_ERROR;
b75a7d8f
A
1790 ucol_setVariableTop(coll, first, -1, &status);
1791
1792 if(U_SUCCESS(status)) {
1793 log_err("Invalid contraction succeded in setting variable top!\n");
1794 }
1795
1796 }
1797
1798 log_verbose("Test restoring variable top\n");
1799
1800 status = U_ZERO_ERROR;
1801 ucol_restoreVariableTop(coll, varTopOriginal, &status);
1802 if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
1803 log_err("Couldn't restore old variable top\n");
1804 }
1805
1806 log_verbose("Testing calling with error set\n");
1807
1808 status = U_INTERNAL_PROGRAM_ERROR;
57a6839d 1809 varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
b75a7d8f
A
1810 varTop2 = ucol_getVariableTop(coll, &status);
1811 ucol_restoreVariableTop(coll, varTop2, &status);
57a6839d 1812 varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
b75a7d8f
A
1813 varTop2 = ucol_getVariableTop(NULL, &status);
1814 ucol_restoreVariableTop(NULL, varTop2, &status);
1815 if(status != U_INTERNAL_PROGRAM_ERROR) {
1816 log_err("Bad reaction to passed error!\n");
1817 }
b75a7d8f
A
1818 ucol_close(coll);
1819 } else {
1820 log_data_err("Couldn't open UCA collator\n");
1821 }
57a6839d
A
1822}
1823
3d1f044b 1824static void TestMaxVariable(void) {
57a6839d
A
1825 UErrorCode status = U_ZERO_ERROR;
1826 UColReorderCode oldMax, max;
1827 UCollator *coll;
1828
1829 static const UChar nul = 0;
1830 static const UChar space = 0x20;
1831 static const UChar dot = 0x2e; /* punctuation */
1832 static const UChar degree = 0xb0; /* symbol */
1833 static const UChar dollar = 0x24; /* currency symbol */
1834 static const UChar zero = 0x30; /* digit */
1835
1836 coll = ucol_open("", &status);
1837 if(U_FAILURE(status)) {
1838 log_data_err("Couldn't open root collator\n");
1839 return;
1840 }
1841
1842 oldMax = ucol_getMaxVariable(coll);
1843 log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
1844 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1845
1846 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1847 max = ucol_getMaxVariable(coll);
1848 log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
1849 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
1850 !ucol_equal(coll, &nul, 0, &space, 1) ||
1851 ucol_equal(coll, &nul, 0, &dot, 1) ||
1852 ucol_equal(coll, &nul, 0, &degree, 1) ||
1853 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1854 ucol_equal(coll, &nul, 0, &zero, 1) ||
1855 ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1856 log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));
1857 }
1858
1859 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
1860 max = ucol_getMaxVariable(coll);
1861 log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
1862 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
1863 !ucol_equal(coll, &nul, 0, &space, 1) ||
1864 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1865 ucol_equal(coll, &nul, 0, &degree, 1) ||
1866 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1867 ucol_equal(coll, &nul, 0, &zero, 1) ||
1868 ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1869 log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));
1870 }
1871
1872 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
1873 max = ucol_getMaxVariable(coll);
1874 log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
1875 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
1876 !ucol_equal(coll, &nul, 0, &space, 1) ||
1877 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1878 !ucol_equal(coll, &nul, 0, &degree, 1) ||
1879 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1880 ucol_equal(coll, &nul, 0, &zero, 1) ||
1881 ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1882 log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));
1883 }
1884
1885 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
1886 max = ucol_getMaxVariable(coll);
1887 log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
1888 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
1889 !ucol_equal(coll, &nul, 0, &space, 1) ||
1890 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1891 !ucol_equal(coll, &nul, 0, &degree, 1) ||
1892 !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1893 ucol_equal(coll, &nul, 0, &zero, 1) ||
1894 ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1895 log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));
1896 }
1897
1898 log_verbose("Test restoring maxVariable\n");
1899 status = U_ZERO_ERROR;
1900 ucol_setMaxVariable(coll, oldMax, &status);
1901 if(oldMax != ucol_getMaxVariable(coll)) {
1902 log_err("Couldn't restore old maxVariable\n");
1903 }
b75a7d8f 1904
57a6839d
A
1905 log_verbose("Testing calling with error set\n");
1906 status = U_INTERNAL_PROGRAM_ERROR;
1907 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1908 max = ucol_getMaxVariable(coll);
1909 if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
1910 log_err("Bad reaction to passed error!\n");
1911 }
1912 ucol_close(coll);
b75a7d8f
A
1913}
1914
1915static void TestNonChars(void) {
1916 static const char *test[] = {
729e4ab9
A
1917 "\\u0000", /* ignorable */
1918 "\\uFFFE", /* special merge-sort character with minimum non-ignorable weights */
1919 "\\uFDD0", "\\uFDEF",
1920 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */
1921 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */
b75a7d8f
A
1922 "\\U0003FFFE", "\\U0003FFFF",
1923 "\\U0004FFFE", "\\U0004FFFF",
1924 "\\U0005FFFE", "\\U0005FFFF",
1925 "\\U0006FFFE", "\\U0006FFFF",
1926 "\\U0007FFFE", "\\U0007FFFF",
1927 "\\U0008FFFE", "\\U0008FFFF",
1928 "\\U0009FFFE", "\\U0009FFFF",
1929 "\\U000AFFFE", "\\U000AFFFF",
1930 "\\U000BFFFE", "\\U000BFFFF",
1931 "\\U000CFFFE", "\\U000CFFFF",
1932 "\\U000DFFFE", "\\U000DFFFF",
1933 "\\U000EFFFE", "\\U000EFFFF",
1934 "\\U000FFFFE", "\\U000FFFFF",
729e4ab9
A
1935 "\\U0010FFFE", "\\U0010FFFF",
1936 "\\uFFFF" /* special character with maximum primary weight */
b75a7d8f
A
1937 };
1938 UErrorCode status = U_ZERO_ERROR;
1939 UCollator *coll = ucol_open("en_US", &status);
1940
1941 log_verbose("Test non characters\n");
1942
1943 if(U_SUCCESS(status)) {
729e4ab9 1944 genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
b75a7d8f 1945 } else {
729e4ab9 1946 log_err_status(status, "Unable to open collator\n");
b75a7d8f
A
1947 }
1948
1949 ucol_close(coll);
1950}
1951
1952static void TestExtremeCompression(void) {
1953 static char *test[4];
1954 int32_t j = 0, i = 0;
1955
1956 for(i = 0; i<4; i++) {
1957 test[i] = (char *)malloc(2048*sizeof(char));
1958 }
1959
1960 for(j = 20; j < 500; j++) {
1961 for(i = 0; i<4; i++) {
1962 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1963 test[i][j-1] = (char)('a'+i);
1964 test[i][j] = 0;
1965 }
1966 genericLocaleStarter("en_US", (const char **)test, 4);
1967 }
1968
1969
1970 for(i = 0; i<4; i++) {
1971 free(test[i]);
1972 }
1973}
1974
1975#if 0
1976static void TestExtremeCompression(void) {
1977 static char *test[4];
1978 int32_t j = 0, i = 0;
1979 UErrorCode status = U_ZERO_ERROR;
1980 UCollator *coll = ucol_open("en_US", status);
1981 for(i = 0; i<4; i++) {
1982 test[i] = (char *)malloc(2048*sizeof(char));
1983 }
1984 for(j = 10; j < 2048; j++) {
1985 for(i = 0; i<4; i++) {
1986 uprv_memset(test[i], 'a', (j-2)*sizeof(char));
1987 test[i][j-1] = (char)('a'+i);
1988 test[i][j] = 0;
1989 }
1990 }
1991 genericLocaleStarter("en_US", (const char **)test, 4);
1992
1993 for(j = 10; j < 2048; j++) {
1994 for(i = 0; i<1; i++) {
1995 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1996 test[i][j] = 0;
1997 }
1998 }
1999 for(i = 0; i<4; i++) {
2000 free(test[i]);
2001 }
2002}
2003#endif
2004
2005static void TestSurrogates(void) {
2006 static const char *test[] = {
2007 "z","\\ud900\\udc25", "\\ud805\\udc50",
2008 "\\ud800\\udc00y", "\\ud800\\udc00r",
2009 "\\ud800\\udc00f", "\\ud800\\udc00",
2010 "\\ud800\\udc00c", "\\ud800\\udc00b",
2011 "\\ud800\\udc00fa", "\\ud800\\udc00fb",
2012 "\\ud800\\udc00a",
2013 "c", "b"
2014 };
2015
2016 static const char *rule =
2017 "&z < \\ud900\\udc25 < \\ud805\\udc50"
2018 "< \\ud800\\udc00y < \\ud800\\udc00r"
2019 "< \\ud800\\udc00f << \\ud800\\udc00"
2020 "< \\ud800\\udc00fa << \\ud800\\udc00fb"
2021 "< \\ud800\\udc00a < c < b" ;
2022
2023 genericRulesStarter(rule, test, 14);
2024}
2025
2026/* This is a test for prefix implementation, used by JIS X 4061 collation rules */
2027static void TestPrefix(void) {
2028 uint32_t i;
2029
46f4442e 2030 static const struct {
b75a7d8f
A
2031 const char *rules;
2032 const char *data[50];
2033 const uint32_t len;
374ca955
A
2034 } tests[] = {
2035 { "&z <<< z|a",
b75a7d8f
A
2036 {"zz", "za"}, 2 },
2037
374ca955 2038 { "&z <<< z| a",
b75a7d8f
A
2039 {"zz", "za"}, 2 },
2040 { "[strength I]"
2041 "&a=\\ud900\\udc25"
374ca955 2042 "&z<<<\\ud900\\udc25|a",
b75a7d8f
A
2043 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
2044 };
2045
2046
2ca993e8 2047 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
b75a7d8f
A
2048 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2049 }
2050}
2051
2052/* This test uses data suplied by Masashiko Maedera to test the implementation */
2053/* JIS X 4061 collation order implementation */
2054static void TestNewJapanese(void) {
2055
46f4442e 2056 static const char * const test1[] = {
b75a7d8f
A
2057 "\\u30b7\\u30e3\\u30fc\\u30ec",
2058 "\\u30b7\\u30e3\\u30a4",
2059 "\\u30b7\\u30e4\\u30a3",
2060 "\\u30b7\\u30e3\\u30ec",
2061 "\\u3061\\u3087\\u3053",
2062 "\\u3061\\u3088\\u3053",
2063 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
2064 "\\u3066\\u30fc\\u305f",
2065 "\\u30c6\\u30fc\\u30bf",
2066 "\\u30c6\\u30a7\\u30bf",
2067 "\\u3066\\u3048\\u305f",
374ca955 2068 "\\u3067\\u30fc\\u305f",
b75a7d8f
A
2069 "\\u30c7\\u30fc\\u30bf",
2070 "\\u30c7\\u30a7\\u30bf",
2071 "\\u3067\\u3048\\u305f",
2072 "\\u3066\\u30fc\\u305f\\u30fc",
2073 "\\u30c6\\u30fc\\u30bf\\u30a1",
2074 "\\u30c6\\u30a7\\u30bf\\u30fc",
2075 "\\u3066\\u3047\\u305f\\u3041",
2076 "\\u3066\\u3048\\u305f\\u30fc",
2077 "\\u3067\\u30fc\\u305f\\u30fc",
2078 "\\u30c7\\u30fc\\u30bf\\u30a1",
2079 "\\u3067\\u30a7\\u305f\\u30a1",
2080 "\\u30c7\\u3047\\u30bf\\u3041",
2081 "\\u30c7\\u30a8\\u30bf\\u30a2",
2082 "\\u3072\\u3086",
2083 "\\u3073\\u3085\\u3042",
2084 "\\u3074\\u3085\\u3042",
2085 "\\u3073\\u3085\\u3042\\u30fc",
2086 "\\u30d3\\u30e5\\u30a2\\u30fc",
2087 "\\u3074\\u3085\\u3042\\u30fc",
2088 "\\u30d4\\u30e5\\u30a2\\u30fc",
2089 "\\u30d2\\u30e5\\u30a6",
2090 "\\u30d2\\u30e6\\u30a6",
2091 "\\u30d4\\u30e5\\u30a6\\u30a2",
374ca955 2092 "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
b75a7d8f
A
2093 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
2094 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
2095 "\\u3072\\u3085\\u3093",
2096 "\\u3074\\u3085\\u3093",
2097 "\\u3075\\u30fc\\u308a",
2098 "\\u30d5\\u30fc\\u30ea",
2099 "\\u3075\\u3045\\u308a",
2100 "\\u3075\\u30a5\\u308a",
2101 "\\u3075\\u30a5\\u30ea",
2102 "\\u30d5\\u30a6\\u30ea",
2103 "\\u3076\\u30fc\\u308a",
2104 "\\u30d6\\u30fc\\u30ea",
2105 "\\u3076\\u3045\\u308a",
2106 "\\u30d6\\u30a5\\u308a",
2107 "\\u3077\\u3046\\u308a",
2108 "\\u30d7\\u30a6\\u30ea",
2109 "\\u3075\\u30fc\\u308a\\u30fc",
2110 "\\u30d5\\u30a5\\u30ea\\u30fc",
2111 "\\u3075\\u30a5\\u308a\\u30a3",
2112 "\\u30d5\\u3045\\u308a\\u3043",
2113 "\\u30d5\\u30a6\\u30ea\\u30fc",
2114 "\\u3075\\u3046\\u308a\\u3043",
2115 "\\u30d6\\u30a6\\u30ea\\u30a4",
2116 "\\u3077\\u30fc\\u308a\\u30fc",
2117 "\\u3077\\u30a5\\u308a\\u30a4",
2118 "\\u3077\\u3046\\u308a\\u30fc",
2119 "\\u30d7\\u30a6\\u30ea\\u30a4",
2120 "\\u30d5\\u30fd",
2121 "\\u3075\\u309e",
2122 "\\u3076\\u309d",
2123 "\\u3076\\u3075",
2124 "\\u3076\\u30d5",
2125 "\\u30d6\\u3075",
2126 "\\u30d6\\u30d5",
2127 "\\u3076\\u309e",
2128 "\\u3076\\u3077",
2129 "\\u30d6\\u3077",
2130 "\\u3077\\u309d",
2131 "\\u30d7\\u30fd",
2132 "\\u3077\\u3075",
2133};
2134
2135 static const char *test2[] = {
2136 "\\u306f\\u309d", /* H\\u309d */
73c04bcf 2137 "\\u30cf\\u30fd", /* K\\u30fd */
b75a7d8f
A
2138 "\\u306f\\u306f", /* HH */
2139 "\\u306f\\u30cf", /* HK */
2140 "\\u30cf\\u30cf", /* KK */
2141 "\\u306f\\u309e", /* H\\u309e */
2142 "\\u30cf\\u30fe", /* K\\u30fe */
2143 "\\u306f\\u3070", /* HH\\u309b */
2144 "\\u30cf\\u30d0", /* KK\\u309b */
2145 "\\u306f\\u3071", /* HH\\u309c */
2146 "\\u30cf\\u3071", /* KH\\u309c */
2147 "\\u30cf\\u30d1", /* KK\\u309c */
2148 "\\u3070\\u309d", /* H\\u309b\\u309d */
2149 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
2150 "\\u3070\\u306f", /* H\\u309bH */
2151 "\\u30d0\\u30cf", /* K\\u309bK */
2152 "\\u3070\\u309e", /* H\\u309b\\u309e */
2153 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
2154 "\\u3070\\u3070", /* H\\u309bH\\u309b */
2155 "\\u30d0\\u3070", /* K\\u309bH\\u309b */
2156 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
2157 "\\u3070\\u3071", /* H\\u309bH\\u309c */
2158 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
2159 "\\u3071\\u309d", /* H\\u309c\\u309d */
2160 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
2161 "\\u3071\\u306f", /* H\\u309cH */
2162 "\\u30d1\\u30cf", /* K\\u309cK */
2163 "\\u3071\\u3070", /* H\\u309cH\\u309b */
2164 "\\u3071\\u30d0", /* H\\u309cK\\u309b */
2165 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
2166 "\\u3071\\u3071", /* H\\u309cH\\u309c */
2167 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
2168 };
2169 /*
2170 static const char *test3[] = {
2171 "\\u221er\\u221e",
2172 "\\u221eR#",
2173 "\\u221et\\u221e",
2174 "#r\\u221e",
2175 "#R#",
2176 "#t%",
2177 "#T%",
2178 "8t\\u221e",
2179 "8T\\u221e",
2180 "8t#",
2181 "8T#",
2182 "8t%",
2183 "8T%",
2184 "8t8",
2185 "8T8",
2186 "\\u03c9r\\u221e",
2187 "\\u03a9R%",
2188 "rr\\u221e",
2189 "rR\\u221e",
2190 "Rr\\u221e",
2191 "RR\\u221e",
2192 "RT%",
2193 "rt8",
2194 "tr\\u221e",
2195 "tr8",
2196 "TR8",
2197 "tt8",
2198 "\\u30b7\\u30e3\\u30fc\\u30ec",
2199 };
2200 */
2201 static const UColAttribute att[] = { UCOL_STRENGTH };
2202 static const UColAttributeValue val[] = { UCOL_QUATERNARY };
2203
2204 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
2205 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
2206
2ca993e8
A
2207 genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), att, val, 1);
2208 genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), att, val, 1);
2209 /*genericLocaleStarter("ja", test3, UPRV_LENGTHOF(test3));*/
2210 genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), attShifted, valShifted, 2);
2211 genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), attShifted, valShifted, 2);
b75a7d8f
A
2212}
2213
2214static void TestStrCollIdenticalPrefix(void) {
2215 const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
2216 const char* test[] = {
2217 "ab\\ud9b0\\udc70",
2218 "ab\\ud9b0\\udc71"
2219 };
2ca993e8 2220 genericRulesStarterWithResult(rule, test, UPRV_LENGTHOF(test), UCOL_EQUAL);
b75a7d8f
A
2221}
2222/* Contractions should have all their canonically equivalent */
2223/* strings included */
2224static void TestContractionClosure(void) {
46f4442e 2225 static const struct {
b75a7d8f 2226 const char *rules;
46f4442e 2227 const char *data[10];
b75a7d8f 2228 const uint32_t len;
374ca955 2229 } tests[] = {
b75a7d8f
A
2230 { "&b=\\u00e4\\u00e4",
2231 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
2232 { "&b=\\u00C5",
2233 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
2234 };
2235 uint32_t i;
2236
2237
2ca993e8 2238 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
73c04bcf 2239 genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
b75a7d8f
A
2240 }
2241}
2242
2243/* This tests also fails*/
2244static void TestBeforePrefixFailure(void) {
46f4442e 2245 static const struct {
b75a7d8f 2246 const char *rules;
46f4442e 2247 const char *data[10];
b75a7d8f 2248 const uint32_t len;
374ca955 2249 } tests[] = {
b75a7d8f
A
2250 { "&g <<< a"
2251 "&[before 3]\\uff41 <<< x",
2252 {"x", "\\uff41"}, 2 },
2253 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2254 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
374ca955 2255 "&[before 3]\\u30a7<<<\\u30a9",
b75a7d8f
A
2256 {"\\u30a9", "\\u30a7"}, 2 },
2257 { "&[before 3]\\u30a7<<<\\u30a9"
2258 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2259 "&\\u30A8=\\u30A8=\\u3048=\\uff74",
2260 {"\\u30a9", "\\u30a7"}, 2 },
2261 };
2262 uint32_t i;
2263
2264
2ca993e8 2265 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
b75a7d8f
A
2266 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2267 }
2268
2269#if 0
374ca955 2270 const char* rule1 =
b75a7d8f
A
2271 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2272 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2273 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
374ca955 2274 const char* rule2 =
b75a7d8f
A
2275 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
2276 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2277 "&\\u30A8=\\u30A8=\\u3048=\\uff74";
2278 const char* test[] = {
374ca955 2279 "\\u30c6\\u30fc\\u30bf",
b75a7d8f
A
2280 "\\u30c6\\u30a7\\u30bf",
2281 };
2ca993e8
A
2282 genericRulesStarter(rule1, test, UPRV_LENGTHOF(test));
2283 genericRulesStarter(rule2, test, UPRV_LENGTHOF(test));
b75a7d8f
A
2284/* this piece of code should be in some sort of verbose mode */
2285/* it gets the collation elements for elements and prints them */
2286/* This is useful when trying to see whether the problem is */
374ca955 2287 {
b75a7d8f
A
2288 UErrorCode status = U_ZERO_ERROR;
2289 uint32_t i = 0;
2290 UCollationElements *it = NULL;
2291 uint32_t CE;
2292 UChar string[256];
2293 uint32_t uStringLen;
2294 UCollator *coll = NULL;
2295
2296 uStringLen = u_unescape(rule1, string, 256);
2297
2298 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2299
2300 /*coll = ucol_open("ja_JP_JIS", &status);*/
2301 it = ucol_openElements(coll, string, 0, &status);
2302
2ca993e8 2303 for(i = 0; i < UPRV_LENGTHOF(test); i++) {
b75a7d8f
A
2304 log_verbose("%s\n", test[i]);
2305 uStringLen = u_unescape(test[i], string, 256);
2306 ucol_setText(it, string, uStringLen, &status);
2307
2308 while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
2309 log_verbose("%08X\n", CE);
2310 }
2311 log_verbose("\n");
2312
2313 }
2314
2315 ucol_closeElements(it);
2316 ucol_close(coll);
2317 }
2318#endif
2319}
2320
2321static void TestPrefixCompose(void) {
374ca955 2322 const char* rule1 =
b75a7d8f
A
2323 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
2324 /*
2325 const char* test[] = {
374ca955 2326 "\\u30c6\\u30fc\\u30bf",
b75a7d8f
A
2327 "\\u30c6\\u30a7\\u30bf",
2328 };
2329 */
374ca955 2330 {
b75a7d8f
A
2331 UErrorCode status = U_ZERO_ERROR;
2332 /*uint32_t i = 0;*/
2333 /*UCollationElements *it = NULL;*/
2334/* uint32_t CE;*/
2335 UChar string[256];
2336 uint32_t uStringLen;
2337 UCollator *coll = NULL;
2338
2339 uStringLen = u_unescape(rule1, string, 256);
2340
2341 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2342 ucol_close(coll);
2343 }
2344
2345
2346}
2347
2348/*
374ca955
A
2349[last variable] last variable value
2350[last primary ignorable] largest CE for primary ignorable
2351[last secondary ignorable] largest CE for secondary ignorable
2352[last tertiary ignorable] largest CE for tertiary ignorable
2353[top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
b75a7d8f
A
2354*/
2355
2356static void TestRuleOptions(void) {
374ca955
A
2357 /* values here are hardcoded and are correct for the current UCA
2358 * when the UCA changes, one might be forced to change these
729e4ab9
A
2359 * values.
2360 */
2361
2362 /*
2363 * These strings contain the last character before [variable top]
2364 * and the first and second characters (by primary weights) after it.
2365 * See FractionalUCA.txt. For example:
2366 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
2367 [variable top = 0C FE]
2368 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
2369 and
2370 00B4; [0D 0C, 05, 05]
2371 *
2372 * Note: Starting with UCA 6.0, the [variable top] collation element
2373 * is not the weight of any character or string,
2374 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
2375 */
2376#define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
2377#define FIRST_REGULAR_CHAR_STRING "\\u0060"
2378#define SECOND_REGULAR_CHAR_STRING "\\u00B4"
2379
2380 /*
2381 * This string has to match the character that has the [last regular] weight
2382 * which changes with each UCA version.
2383 * See the bottom of FractionalUCA.txt which says something like
2384 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
2385 *
2386 * Note: Starting with UCA 6.0, the [last regular] collation element
2387 * is not the weight of any character or string,
2388 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
b75a7d8f 2389 */
729e4ab9
A
2390#define LAST_REGULAR_CHAR_STRING "\\U0001342E"
2391
46f4442e 2392 static const struct {
b75a7d8f 2393 const char *rules;
46f4442e 2394 const char *data[10];
b75a7d8f 2395 const uint32_t len;
374ca955 2396 } tests[] = {
57a6839d
A
2397#if 0
2398 /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
b75a7d8f 2399 /* - all befores here amount to zero */
374ca955
A
2400 { "&[before 3][first tertiary ignorable]<<<a",
2401 { "\\u0000", "a"}, 2
2402 }, /* you cannot go before first tertiary ignorable */
b75a7d8f 2403
374ca955
A
2404 { "&[before 3][last tertiary ignorable]<<<a",
2405 { "\\u0000", "a"}, 2
2406 }, /* you cannot go before last tertiary ignorable */
57a6839d
A
2407#endif
2408 /*
2409 * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
2410 * and it *is* possible to "go before" that.
2411 */
374ca955
A
2412 { "&[before 3][first secondary ignorable]<<<a",
2413 { "\\u0000", "a"}, 2
57a6839d 2414 },
b75a7d8f 2415
374ca955
A
2416 { "&[before 3][last secondary ignorable]<<<a",
2417 { "\\u0000", "a"}, 2
57a6839d 2418 },
b75a7d8f
A
2419
2420 /* 'normal' befores */
2421
57a6839d
A
2422 /*
2423 * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
2424 * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
2425 * because there is no tailoring space before that boundary.
2426 * Made the tests work by tailoring to a space instead.
2427 */
2428 { "&[before 3][first primary ignorable]<<<c<<<b &' '<a", /* was &[first primary ignorable]<a */
374ca955
A
2429 { "c", "b", "\\u0332", "a" }, 4
2430 },
b75a7d8f 2431
374ca955 2432 /* we don't have a code point that corresponds to
b75a7d8f
A
2433 * the last primary ignorable
2434 */
57a6839d 2435 { "&[before 3][last primary ignorable]<<<c<<<b &' '<a", /* was &[last primary ignorable]<a */
374ca955
A
2436 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5
2437 },
b75a7d8f 2438
374ca955
A
2439 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
2440 { "c", "b", "\\u0009", "a", "\\u000a" }, 5
2441 },
b75a7d8f 2442
374ca955 2443 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
729e4ab9 2444 { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
374ca955 2445 },
b75a7d8f
A
2446
2447 { "&[first regular]<a"
2448 "&[before 1][first regular]<b",
729e4ab9 2449 { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
374ca955 2450 },
b75a7d8f
A
2451
2452 { "&[before 1][last regular]<b"
2453 "&[last regular]<a",
729e4ab9 2454 { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
374ca955 2455 },
b75a7d8f
A
2456
2457 { "&[before 1][first implicit]<b"
2458 "&[first implicit]<a",
374ca955
A
2459 { "b", "\\u4e00", "a", "\\u4e01"}, 4
2460 },
57a6839d 2461#if 0 /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
b75a7d8f
A
2462 { "&[before 1][last implicit]<b"
2463 "&[last implicit]<a",
374ca955
A
2464 { "b", "\\U0010FFFD", "a" }, 3
2465 },
57a6839d 2466#endif
b75a7d8f 2467 { "&[last variable]<z"
57a6839d 2468 "&' '<x" /* was &[last primary ignorable]<x, see above */
b75a7d8f
A
2469 "&[last secondary ignorable]<<y"
2470 "&[last tertiary ignorable]<<<w"
2471 "&[top]<u",
729e4ab9 2472 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
374ca955 2473 }
b75a7d8f
A
2474
2475 };
2476 uint32_t i;
2477
2ca993e8 2478 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
b75a7d8f
A
2479 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2480 }
2481}
2482
2483
2484static void TestOptimize(void) {
374ca955
A
2485 /* this is not really a test - just trying out
2486 * whether copying of UCA contents will fail
2487 * Cannot really test, since the functionality
b75a7d8f
A
2488 * remains the same.
2489 */
46f4442e 2490 static const struct {
b75a7d8f 2491 const char *rules;
46f4442e 2492 const char *data[10];
b75a7d8f 2493 const uint32_t len;
374ca955 2494 } tests[] = {
b75a7d8f 2495 /* - all befores here amount to zero */
374ca955
A
2496 { "[optimize [\\uAC00-\\uD7FF]]",
2497 { "a", "b"}, 2}
b75a7d8f
A
2498 };
2499 uint32_t i;
2500
2ca993e8 2501 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
b75a7d8f
A
2502 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2503 }
2504}
2505
2506/*
374ca955
A
2507cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
2508weiv ucol_strcollIter?
2509cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
2510weiv these are the input strings?
2511cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
2512weiv will check - could be a problem with utf-8 iterator
2513cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
2514weiv hmmm
2515cycheng@ca.ibm.c... note that we have a standalone high surrogate
2516weiv that doesn't sound right
2517cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
2518weiv so you have two strings, you convert them to utf-8 and to utf-16BE
2519cycheng@ca.ibm.c... yes
2520weiv and then do the comparison
2521cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
2522weiv utf-16 strings look like a little endian ones in the example you sent me
2523weiv It could be a bug - let me try to test it out
2524cycheng@ca.ibm.c... ok
2525cycheng@ca.ibm.c... we can wait till the conf. call
2526cycheng@ca.ibm.c... next weke
2527weiv that would be great
2528weiv hmmm
2529weiv I might be wrong
2530weiv let me play with it some more
2531cycheng@ca.ibm.c... ok
2532cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be
2533cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
2534cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
2535weiv ok
2536cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
2537weiv thanks
2538cycheng@ca.ibm.c... the 4 strings we sent are just samples
b75a7d8f
A
2539*/
2540#if 0
2541static void Alexis(void) {
2542 UErrorCode status = U_ZERO_ERROR;
2543 UCollator *coll = ucol_open("", &status);
2544
2545
2546 const char utf16be[2][4] = {
2547 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
2548 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
2549 };
2550
2551 const char utf8[2][4] = {
2552 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
2553 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
2554 };
2555
2556 UCharIterator iterU161, iterU162;
2557 UCharIterator iterU81, iterU82;
2558
2559 UCollationResult resU16, resU8;
2560
2561 uiter_setUTF16BE(&iterU161, utf16be[0], 4);
2562 uiter_setUTF16BE(&iterU162, utf16be[1], 4);
2563
2564 uiter_setUTF8(&iterU81, utf8[0], 4);
2565 uiter_setUTF8(&iterU82, utf8[1], 4);
2566
2567 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2568
2569 resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
2570 resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
2571
2572
2573 if(resU16 != resU8) {
2574 log_err("different results\n");
2575 }
2576
2577 ucol_close(coll);
2578}
2579#endif
2580
2581#define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
2582static void Alexis2(void) {
2583 UErrorCode status = U_ZERO_ERROR;
2584 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2585 char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
374ca955 2586 char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
b75a7d8f
A
2587 int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
2588
2589 UConverter *conv = NULL;
2590
2591 UCharIterator U16BEItS, U16BEItT;
2592 UCharIterator U8ItS, U8ItT;
2593
2594 UCollationResult resU16, resU16BE, resU8;
2595
46f4442e 2596 static const char* const pairs[][2] = {
b75a7d8f
A
2597 { "\\ud800\\u0021", "\\uFFFC\\u0062"},
2598 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
2599 { "\\u0E40\\u0021", "\\u00A1\\u0021"},
2600 { "\\u0E40\\u0021", "\\uFE57\\u0062"},
2601 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
2602 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
2603 { "\\u0020", "\\u0020\\u0000"}
2604/*
26055F20 (my result here)
26065F204E008E3F
26075F20 (your result here)
2608*/
2609 };
2610
2611 int32_t i = 0;
2612
2613 UCollator *coll = ucol_open("", &status);
2614 if(status == U_FILE_ACCESS_ERROR) {
2615 log_data_err("Is your data around?\n");
2616 return;
2617 } else if(U_FAILURE(status)) {
2618 log_err("Error opening collator\n");
2619 return;
2620 }
2621 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2622 conv = ucnv_open("UTF16BE", &status);
2ca993e8 2623 for(i = 0; i < UPRV_LENGTHOF(pairs); i++) {
b75a7d8f
A
2624 U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2625 U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2626
2627 resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
2628
2629 log_verbose("Result of strcoll is %i\n", resU16);
2630
2631 U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
2632 U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
57a6839d
A
2633 (void)U16BELenS; /* Suppress set but not used warnings. */
2634 (void)U16BELenT;
b75a7d8f
A
2635
2636 /* use the original sizes, as the result from converter is in bytes */
2637 uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
2638 uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
2639
2640 resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
2641
2642 log_verbose("Result of U16BE is %i\n", resU16BE);
2643
2644 if(resU16 != resU16BE) {
2645 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
2646 }
2647
2648 u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
2649 u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
2650
2651 uiter_setUTF8(&U8ItS, U8Source, U8LenS);
2652 uiter_setUTF8(&U8ItT, U8Target, U8LenT);
2653
2654 resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
2655
2656 if(resU16 != resU8) {
2657 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
2658 }
2659
2660 }
2661
2662 ucol_close(coll);
2663 ucnv_close(conv);
2664}
2665
2666static void TestHebrewUCA(void) {
2667 UErrorCode status = U_ZERO_ERROR;
46f4442e 2668 static const char *first[] = {
b75a7d8f
A
2669 "d790d6b8d79cd795d6bcd7a9",
2670 "d790d79cd79ed7a7d799d799d7a1",
2671 "d790d6b4d79ed795d6bcd7a9",
2672 };
2673
2674 char utf8String[3][256];
2675 UChar utf16String[3][256];
2676
2677 int32_t i = 0, j = 0;
2678 int32_t sizeUTF8[3];
2679 int32_t sizeUTF16[3];
2680
2681 UCollator *coll = ucol_open("", &status);
46f4442e 2682 if (U_FAILURE(status)) {
729e4ab9 2683 log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
46f4442e
A
2684 return;
2685 }
b75a7d8f
A
2686 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
2687
2ca993e8 2688 for(i = 0; i < UPRV_LENGTHOF(first); i++) {
b75a7d8f
A
2689 sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
2690 u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
2691 log_verbose("%i: ");
2692 for(j = 0; j < sizeUTF16[i]; j++) {
2693 /*log_verbose("\\u%04X", utf16String[i][j]);*/
2694 log_verbose("%04X", utf16String[i][j]);
2695 }
2696 log_verbose("\n");
2697 }
2ca993e8
A
2698 for(i = 0; i < UPRV_LENGTHOF(first)-1; i++) {
2699 for(j = i + 1; j < UPRV_LENGTHOF(first); j++) {
b75a7d8f
A
2700 doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
2701 }
2702 }
2703
2704 ucol_close(coll);
2705
2706}
2707
2708static void TestPartialSortKeyTermination(void) {
46f4442e 2709 static const char* cases[] = {
b75a7d8f 2710 "\\u1234\\u1234\\udc00",
374ca955 2711 "\\udc00\\ud800\\ud800"
b75a7d8f
A
2712 };
2713
57a6839d 2714 int32_t i;
b75a7d8f
A
2715
2716 UErrorCode status = U_ZERO_ERROR;
2717
2718 UCollator *coll = ucol_open("", &status);
2719
2720 UCharIterator iter;
2721
2722 UChar currCase[256];
2723 int32_t length = 0;
2724 int32_t pKeyLen = 0;
2725
2726 uint8_t key[256];
2727
2ca993e8 2728 for(i = 0; i < UPRV_LENGTHOF(cases); i++) {
b75a7d8f
A
2729 uint32_t state[2] = {0, 0};
2730 length = u_unescape(cases[i], currCase, 256);
2731 uiter_setString(&iter, currCase, length);
2732 pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
57a6839d 2733 (void)pKeyLen; /* Suppress set but not used warning. */
b75a7d8f
A
2734
2735 log_verbose("Done\n");
2736
2737 }
2738 ucol_close(coll);
2739}
2740
2741static void TestSettings(void) {
46f4442e 2742 static const char* cases[] = {
b75a7d8f
A
2743 "apple",
2744 "Apple"
2745 };
2746
46f4442e 2747 static const char* locales[] = {
b75a7d8f
A
2748 "",
2749 "en"
2750 };
2751
2752 UErrorCode status = U_ZERO_ERROR;
2753
2754 int32_t i = 0, j = 0;
2755
2756 UChar source[256], target[256];
2757 int32_t sLen = 0, tLen = 0;
2758
2759 UCollator *collateObject = NULL;
2ca993e8 2760 for(i = 0; i < UPRV_LENGTHOF(locales); i++) {
b75a7d8f
A
2761 collateObject = ucol_open(locales[i], &status);
2762 ucol_setStrength(collateObject, UCOL_PRIMARY);
2763 ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
2ca993e8 2764 for(j = 1; j < UPRV_LENGTHOF(cases); j++) {
b75a7d8f
A
2765 sLen = u_unescape(cases[j-1], source, 256);
2766 source[sLen] = 0;
2767 tLen = u_unescape(cases[j], target, 256);
2768 source[tLen] = 0;
2769 doTest(collateObject, source, target, UCOL_EQUAL);
2770 }
2771 ucol_close(collateObject);
2772 }
2773}
2774
2775static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
46f4442e
A
2776 UErrorCode status = U_ZERO_ERROR;
2777 int32_t errorNo = 0;
57a6839d
A
2778 const UChar *sourceRules = NULL;
2779 int32_t sourceRulesLen = 0;
2780 UParseError parseError;
46f4442e 2781 UColAttributeValue french = UCOL_OFF;
46f4442e 2782
b75a7d8f 2783 if(!ucol_equals(source, target)) {
46f4442e
A
2784 log_err("Same collators, different address not equal\n");
2785 errorNo++;
b75a7d8f
A
2786 }
2787 ucol_close(target);
b331163b 2788 if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
57a6839d 2789 target = ucol_safeClone(source, NULL, NULL, &status);
46f4442e
A
2790 if(U_FAILURE(status)) {
2791 log_err("Error creating clone\n");
2792 errorNo++;
2793 return errorNo;
2794 }
2795 if(!ucol_equals(source, target)) {
2796 log_err("Collator different from it's clone\n");
2797 errorNo++;
2798 }
2799 french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
2800 if(french == UCOL_ON) {
2801 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
2802 } else {
2803 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
2804 }
2805 if(U_FAILURE(status)) {
2806 log_err("Error setting attributes\n");
2807 errorNo++;
2808 return errorNo;
2809 }
2810 if(ucol_equals(source, target)) {
2811 log_err("Collators same even when options changed\n");
2812 errorNo++;
2813 }
2814 ucol_close(target);
57a6839d 2815
46f4442e
A
2816 sourceRules = ucol_getRules(source, &sourceRulesLen);
2817 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2818 if(U_FAILURE(status)) {
57a6839d
A
2819 log_err("Error instantiating target from rules - %s\n", u_errorName(status));
2820 errorNo++;
2821 return errorNo;
46f4442e 2822 }
b331163b
A
2823 /* Note: The tailoring rule string is an optional data item. */
2824 if(!ucol_equals(source, target) && sourceRulesLen != 0) {
57a6839d
A
2825 log_err("Collator different from collator that was created from the same rules\n");
2826 errorNo++;
46f4442e
A
2827 }
2828 ucol_close(target);
b75a7d8f 2829 }
46f4442e 2830 return errorNo;
b75a7d8f
A
2831}
2832
2833
2834static void TestEquals(void) {
46f4442e 2835 /* ucol_equals is not currently a public API. There is a chance that it will become
b331163b 2836 * something like this.
46f4442e
A
2837 */
2838 /* test whether the two collators instantiated from the same locale are equal */
2839 UErrorCode status = U_ZERO_ERROR;
2840 UParseError parseError;
2841 int32_t noOfLoc = uloc_countAvailable();
2842 const char *locName = NULL;
2843 UCollator *source = NULL, *target = NULL;
2844 int32_t i = 0;
b75a7d8f 2845
46f4442e
A
2846 const char* rules[] = {
2847 "&l < lj <<< Lj <<< LJ",
2848 "&n < nj <<< Nj <<< NJ",
2849 "&ae <<< \\u00e4",
2850 "&AE <<< \\u00c4"
2851 };
2852 /*
2853 const char* badRules[] = {
b75a7d8f 2854 "&l <<< Lj",
46f4442e
A
2855 "&n < nj <<< nJ <<< NJ",
2856 "&a <<< \\u00e4",
2857 "&AE <<< \\u00c4 <<< x"
2858 };
2859 */
b75a7d8f 2860
46f4442e
A
2861 UChar sourceRules[1024], targetRules[1024];
2862 int32_t sourceRulesSize = 0, targetRulesSize = 0;
2ca993e8 2863 int32_t rulesSize = UPRV_LENGTHOF(rules);
b75a7d8f 2864
46f4442e
A
2865 for(i = 0; i < rulesSize; i++) {
2866 sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
2867 targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
2868 }
b75a7d8f 2869
46f4442e
A
2870 source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2871 if(status == U_FILE_ACCESS_ERROR) {
2872 log_data_err("Is your data around?\n");
2873 return;
2874 } else if(U_FAILURE(status)) {
2875 log_err("Error opening collator\n");
2876 return;
2877 }
2878 target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2879 if(!ucol_equals(source, target)) {
2880 log_err("Equivalent collators not equal!\n");
2881 }
2882 ucol_close(source);
2883 ucol_close(target);
b75a7d8f 2884
46f4442e
A
2885 source = ucol_open("root", &status);
2886 target = ucol_open("root", &status);
2887 log_verbose("Testing root\n");
2888 if(!ucol_equals(source, source)) {
2889 log_err("Same collator not equal\n");
2890 }
b331163b
A
2891 if(TestEqualsForCollator("root", source, target)) {
2892 log_err("Errors for root\n");
46f4442e
A
2893 }
2894 ucol_close(source);
2895
2896 for(i = 0; i<noOfLoc; i++) {
2897 status = U_ZERO_ERROR;
2898 locName = uloc_getAvailable(i);
2899 /*if(hasCollationElements(locName)) {*/
2900 log_verbose("Testing equality for locale %s\n", locName);
2901 source = ucol_open(locName, &status);
2902 target = ucol_open(locName, &status);
2903 if (U_FAILURE(status)) {
2904 log_err("Error opening collator for locale %s %s\n", locName, u_errorName(status));
2905 continue;
2906 }
2907 if(TestEqualsForCollator(locName, source, target)) {
2908 log_err("Errors for locale %s\n", locName);
2909 }
2910 ucol_close(source);
2911 /*}*/
2912 }
b75a7d8f
A
2913}
2914
2915static void TestJ2726(void) {
46f4442e
A
2916 UChar a[2] = { 0x61, 0x00 }; /*"a"*/
2917 UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
2918 UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
2919 UErrorCode status = U_ZERO_ERROR;
2920 UCollator *coll = ucol_open("en", &status);
2921 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
2922 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
2923 doTest(coll, a, aSpace, UCOL_EQUAL);
2924 doTest(coll, aSpace, a, UCOL_EQUAL);
2925 doTest(coll, a, spaceA, UCOL_EQUAL);
2926 doTest(coll, spaceA, a, UCOL_EQUAL);
2927 doTest(coll, spaceA, aSpace, UCOL_EQUAL);
2928 doTest(coll, aSpace, spaceA, UCOL_EQUAL);
2929 ucol_close(coll);
b75a7d8f
A
2930}
2931
374ca955 2932static void NullRule(void) {
46f4442e
A
2933 UChar r[3] = {0};
2934 UErrorCode status = U_ZERO_ERROR;
2935 UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2936 if(U_SUCCESS(status)) {
2937 log_err("This should have been an error!\n");
2938 ucol_close(coll);
2939 } else {
2940 status = U_ZERO_ERROR;
2941 }
2942 coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2943 if(U_FAILURE(status)) {
729e4ab9 2944 log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
46f4442e
A
2945 } else {
2946 ucol_close(coll);
2947 }
374ca955
A
2948}
2949
2950/**
2951 * Test for CollationElementIterator previous and next for the whole set of
2952 * unicode characters with normalization on.
2953 */
2954static void TestNumericCollation(void)
2955{
2956 UErrorCode status = U_ZERO_ERROR;
2957
2958 const static char *basicTestStrings[]={
2959 "hello1",
2960 "hello2",
2961 "hello2002",
2962 "hello2003",
2963 "hello123456",
2964 "hello1234567",
2965 "hello10000000",
2966 "hello100000000",
2967 "hello1000000000",
2968 "hello10000000000",
2969 };
2970
2971 const static char *preZeroTestStrings[]={
2972 "avery10000",
2973 "avery010000",
2974 "avery0010000",
2975 "avery00010000",
2976 "avery000010000",
2977 "avery0000010000",
2978 "avery00000010000",
2979 "avery000000010000",
2980 };
2981
2982 const static char *thirtyTwoBitNumericStrings[]={
2983 "avery42949672960",
2984 "avery42949672961",
2985 "avery42949672962",
2986 "avery429496729610"
46f4442e
A
2987 };
2988
2989 const static char *longNumericStrings[]={
2990 /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
2991 In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
2992 are treated as multiple collation elements. */
2993 "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
2994 "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
2995 "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
2996 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
2997 "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
2998 "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
2999 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
3000 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
3001 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
3002 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
374ca955
A
3003 };
3004
3005 const static char *supplementaryDigits[] = {
3006 "\\uD835\\uDFCE", /* 0 */
3007 "\\uD835\\uDFCF", /* 1 */
3008 "\\uD835\\uDFD0", /* 2 */
3009 "\\uD835\\uDFD1", /* 3 */
3010 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
3011 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
3012 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
3013 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
3014 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
3015 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
3016 };
3017
3018 const static char *foreignDigits[] = {
3019 "\\u0661",
3020 "\\u0662",
3021 "\\u0663",
3022 "\\u0661\\u0660",
3023 "\\u0661\\u0662",
3024 "\\u0661\\u0663",
3025 "\\u0662\\u0660",
3026 "\\u0662\\u0662",
3027 "\\u0662\\u0663",
3028 "\\u0663\\u0660",
3029 "\\u0663\\u0662",
3030 "\\u0663\\u0663"
3031 };
3032
3033 const static char *evenZeroes[] = {
3034 "2000",
3035 "2001",
3036 "2002",
3037 "2003"
3038 };
3039
3040 UColAttribute att = UCOL_NUMERIC_COLLATION;
3041 UColAttributeValue val = UCOL_ON;
3042
3043 /* Open our collator. */
3044 UCollator* coll = ucol_open("root", &status);
3045 if (U_FAILURE(status)){
729e4ab9 3046 log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
374ca955
A
3047 myErrorName(status));
3048 return;
3049 }
2ca993e8
A
3050 genericLocaleStarterWithOptions("root", basicTestStrings, UPRV_LENGTHOF(basicTestStrings), &att, &val, 1);
3051 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, UPRV_LENGTHOF(thirtyTwoBitNumericStrings), &att, &val, 1);
3052 genericLocaleStarterWithOptions("root", longNumericStrings, UPRV_LENGTHOF(longNumericStrings), &att, &val, 1);
3053 genericLocaleStarterWithOptions("en_US", foreignDigits, UPRV_LENGTHOF(foreignDigits), &att, &val, 1);
3054 genericLocaleStarterWithOptions("root", supplementaryDigits, UPRV_LENGTHOF(supplementaryDigits), &att, &val, 1);
3055 genericLocaleStarterWithOptions("root", evenZeroes, UPRV_LENGTHOF(evenZeroes), &att, &val, 1);
374ca955
A
3056
3057 /* Setting up our collator to do digits. */
3058 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
3059 if (U_FAILURE(status)){
3060 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
3061 myErrorName(status));
3062 return;
3063 }
3064
3065 /*
3066 Testing that prepended zeroes still yield the correct collation behavior.
3067 We expect that every element in our strings array will be equal.
3068 */
2ca993e8 3069 genericOrderingTestWithResult(coll, preZeroTestStrings, UPRV_LENGTHOF(preZeroTestStrings), UCOL_EQUAL);
374ca955
A
3070
3071 ucol_close(coll);
3072}
3073
3074static void TestTibetanConformance(void)
3075{
3076 const char* test[] = {
3077 "\\u0FB2\\u0591\\u0F71\\u0061",
3078 "\\u0FB2\\u0F71\\u0061"
3079 };
3080
3081 UErrorCode status = U_ZERO_ERROR;
3082 UCollator *coll = ucol_open("", &status);
3083 UChar source[100];
3084 UChar target[100];
3085 int result;
3086 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3087 if (U_SUCCESS(status)) {
3088 u_unescape(test[0], source, 100);
3089 u_unescape(test[1], target, 100);
3090 doTest(coll, source, target, UCOL_EQUAL);
3091 result = ucol_strcoll(coll, source, -1, target, -1);
3092 log_verbose("result %d\n", result);
3093 if (UCOL_EQUAL != result) {
3094 log_err("Tibetan comparison error\n");
3095 }
3096 }
3097 ucol_close(coll);
3098
3099 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
3100}
3101
3102static void TestPinyinProblem(void) {
3103 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
2ca993e8 3104 genericLocaleStarter("zh__PINYIN", test, UPRV_LENGTHOF(test));
374ca955
A
3105}
3106
374ca955
A
3107/**
3108 * Iterate through the given iterator, checking to see that all the strings
3109 * in the expected array are present.
3110 * @param expected array of strings we expect to see, or NULL
3111 * @param expectedCount number of elements of expected, or 0
3112 */
3113static int32_t checkUEnumeration(const char* msg,
3114 UEnumeration* iter,
3115 const char** expected,
3116 int32_t expectedCount) {
3117 UErrorCode ec = U_ZERO_ERROR;
3118 int32_t i = 0, n, j, bit;
3119 int32_t seenMask = 0;
3120
3121 U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
3122 n = uenum_count(iter, &ec);
3123 if (!assertSuccess("count", &ec)) return -1;
3124 log_verbose("%s = [", msg);
3125 for (;; ++i) {
3126 const char* s = uenum_next(iter, NULL, &ec);
3127 if (!assertSuccess("snext", &ec) || s == NULL) break;
3128 if (i != 0) log_verbose(",");
3129 log_verbose("%s", s);
3130 /* check expected list */
3131 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3132 if ((seenMask&bit) == 0 &&
3133 uprv_strcmp(s, expected[j]) == 0) {
3134 seenMask |= bit;
3135 break;
3136 }
3137 }
3138 }
3139 log_verbose("] (%d)\n", i);
3140 assertTrue("count verified", i==n);
3141 /* did we see all expected strings? */
3142 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3143 if ((seenMask&bit)!=0) {
3144 log_verbose("Ok: \"%s\" seen\n", expected[j]);
3145 } else {
3146 log_err("FAIL: \"%s\" not seen\n", expected[j]);
3147 }
3148 }
3149 return n;
3150}
3151
3152/**
3153 * Test new API added for separate collation tree.
3154 */
3155static void TestSeparateTrees(void) {
3156 UErrorCode ec = U_ZERO_ERROR;
3157 UEnumeration *e = NULL;
3158 int32_t n = -1;
3159 UBool isAvailable;
3160 char loc[256];
3161
3162 static const char* AVAIL[] = { "en", "de" };
3163
3164 static const char* KW[] = { "collation" };
3165
3166 static const char* KWVAL[] = { "phonebook", "stroke" };
3167
3168#if !UCONFIG_NO_SERVICE
3169 e = ucol_openAvailableLocales(&ec);
729e4ab9
A
3170 if (e != NULL) {
3171 assertSuccess("ucol_openAvailableLocales", &ec);
3172 assertTrue("ucol_openAvailableLocales!=0", e!=0);
2ca993e8 3173 n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, UPRV_LENGTHOF(AVAIL));
57a6839d 3174 (void)n; /* Suppress set but not used warnings. */
729e4ab9
A
3175 /* Don't need to check n because we check list */
3176 uenum_close(e);
3177 } else {
3178 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
3179 }
374ca955
A
3180#endif
3181
3182 e = ucol_getKeywords(&ec);
729e4ab9
A
3183 if (e != NULL) {
3184 assertSuccess("ucol_getKeywords", &ec);
3185 assertTrue("ucol_getKeywords!=0", e!=0);
2ca993e8 3186 n = checkUEnumeration("ucol_getKeywords", e, KW, UPRV_LENGTHOF(KW));
729e4ab9
A
3187 /* Don't need to check n because we check list */
3188 uenum_close(e);
3189 } else {
3190 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
3191 }
374ca955
A
3192
3193 e = ucol_getKeywordValues(KW[0], &ec);
729e4ab9
A
3194 if (e != NULL) {
3195 assertSuccess("ucol_getKeywordValues", &ec);
3196 assertTrue("ucol_getKeywordValues!=0", e!=0);
2ca993e8 3197 n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, UPRV_LENGTHOF(KWVAL));
729e4ab9
A
3198 /* Don't need to check n because we check list */
3199 uenum_close(e);
3200 } else {
3201 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
3202 }
374ca955
A
3203
3204 /* Try setting a warning before calling ucol_getKeywordValues */
3205 ec = U_USING_FALLBACK_WARNING;
3206 e = ucol_getKeywordValues(KW[0], &ec);
729e4ab9
A
3207 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
3208 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
2ca993e8 3209 n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, UPRV_LENGTHOF(KWVAL));
729e4ab9
A
3210 /* Don't need to check n because we check list */
3211 uenum_close(e);
3212 }
374ca955
A
3213
3214 /*
3215U_DRAFT int32_t U_EXPORT2
3216ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
3217 const char* locale, UBool* isAvailable,
3218 UErrorCode* status);
3219}
3220*/
729e4ab9 3221 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
374ca955 3222 &isAvailable, &ec);
729e4ab9 3223 if (assertSuccess("getFunctionalEquivalent", &ec)) {
57a6839d 3224 assertEquals("getFunctionalEquivalent(de)", "root", loc);
729e4ab9
A
3225 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
3226 isAvailable == TRUE);
3227 }
374ca955 3228
729e4ab9 3229 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
374ca955 3230 &isAvailable, &ec);
729e4ab9 3231 if (assertSuccess("getFunctionalEquivalent", &ec)) {
57a6839d
A
3232 assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
3233 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
3234 isAvailable == FALSE);
729e4ab9 3235 }
374ca955
A
3236}
3237
3238/* supercedes TestJ784 */
3239static void TestBeforePinyin(void) {
3240 const static char rules[] = {
3241 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
3242 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
3243 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
3244 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
3245 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
3246 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
3247 };
3248
3249 const static char *test[] = {
3250 "l\\u0101",
3251 "la",
3252 "l\\u0101n",
3253 "lan ",
3254 "l\\u0113",
3255 "le",
3256 "l\\u0113n",
3257 "len"
3258 };
3259
3260 const static char *test2[] = {
3261 "x\\u0101",
3262 "x\\u0100",
3263 "X\\u0101",
3264 "X\\u0100",
3265 "x\\u00E1",
3266 "x\\u00C1",
3267 "X\\u00E1",
3268 "X\\u00C1",
3269 "x\\u01CE",
3270 "x\\u01CD",
3271 "X\\u01CE",
3272 "X\\u01CD",
3273 "x\\u00E0",
3274 "x\\u00C0",
3275 "X\\u00E0",
3276 "X\\u00C0",
3277 "xa",
3278 "xA",
3279 "Xa",
3280 "XA",
3281 "x\\u0101x",
3282 "x\\u0100x",
3283 "x\\u00E1x",
3284 "x\\u00C1x",
3285 "x\\u01CEx",
3286 "x\\u01CDx",
3287 "x\\u00E0x",
3288 "x\\u00C0x",
3289 "xax",
3290 "xAx"
3291 };
3292
3d1f044b
A
3293 const static char *test3[] = { // rdar://53741390
3294 "\\u85CF", // 藏 cáng
3295 "\\u92BA", // 銺 zàng
3296 "\\u85CF\\u6587", // 藏文 zàngwén
3297 "\\u85CF\\u8BED", // 藏语 zàngyǔ
3298 "\\u81D3", // 臓 zàng
3299 };
3300
2ca993e8
A
3301 genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
3302 genericLocaleStarter("zh", test, UPRV_LENGTHOF(test));
3303 genericRulesStarter(rules, test2, UPRV_LENGTHOF(test2));
3304 genericLocaleStarter("zh", test2, UPRV_LENGTHOF(test2));
3d1f044b 3305 genericLocaleStarter("zh", test3, UPRV_LENGTHOF(test3));
374ca955
A
3306}
3307
3308static void TestBeforeTightening(void) {
46f4442e 3309 static const struct {
374ca955
A
3310 const char *rules;
3311 UErrorCode expectedStatus;
3312 } tests[] = {
3313 { "&[before 1]a<x", U_ZERO_ERROR },
3314 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
3315 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
3316 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
3317 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
3318 { "&[before 2]a<<x",U_ZERO_ERROR },
3319 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
3320 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
3321 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR },
3322 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR },
3323 { "&[before 3]a<<<x",U_ZERO_ERROR },
3324 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR },
3325 { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
3326 };
3327
3328 int32_t i = 0;
3329
3330 UErrorCode status = U_ZERO_ERROR;
3331 UChar rlz[RULE_BUFFER_LEN] = { 0 };
3332 uint32_t rlen = 0;
3333
3334 UCollator *coll = NULL;
3335
3336
2ca993e8 3337 for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
374ca955
A
3338 rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
3339 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3340 if(status != tests[i].expectedStatus) {
729e4ab9 3341 log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
374ca955
A
3342 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
3343 }
3344 ucol_close(coll);
3345 status = U_ZERO_ERROR;
3346 }
3347
3348}
3349
51004dcb 3350/*
374ca955
A
3351&m < a
3352&[before 1] a < x <<< X << q <<< Q < z
3353assert: m <<< M < x <<< X << q <<< Q < z < a < n
3354
3355&m < a
3356&[before 2] a << x <<< X << q <<< Q < z
3357assert: m <<< M < x <<< X << q <<< Q << a < z < n
3358
3359&m < a
3360&[before 3] a <<< x <<< X << q <<< Q < z
3361assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
3362
3363
3364&m << a
3365&[before 1] a < x <<< X << q <<< Q < z
3366assert: x <<< X << q <<< Q < z < m <<< M << a < n
3367
3368&m << a
3369&[before 2] a << x <<< X << q <<< Q < z
3370assert: m <<< M << x <<< X << q <<< Q << a < z < n
3371
3372&m << a
3373&[before 3] a <<< x <<< X << q <<< Q < z
3374assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
3375
3376
3377&m <<< a
3378&[before 1] a < x <<< X << q <<< Q < z
3379assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
3380
3381&m <<< a
3382&[before 2] a << x <<< X << q <<< Q < z
3383assert: x <<< X << q <<< Q << m <<< a <<< M < z < n
3384
3385&m <<< a
3386&[before 3] a <<< x <<< X << q <<< Q < z
3387assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n
3388
3389
3390&[before 1] s < x <<< X << q <<< Q < z
3391assert: r <<< R < x <<< X << q <<< Q < z < s < n
3392
3393&[before 2] s << x <<< X << q <<< Q < z
3394assert: r <<< R < x <<< X << q <<< Q << s < z < n
3395
3396&[before 3] s <<< x <<< X << q <<< Q < z
3397assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
3398
3399
3400&[before 1] \u24DC < x <<< X << q <<< Q < z
3401assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
3402
3403&[before 2] \u24DC << x <<< X << q <<< Q < z
3404assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
3405
3406&[before 3] \u24DC <<< x <<< X << q <<< Q < z
3407assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n
51004dcb 3408*/
374ca955
A
3409
3410
3411#if 0
3412/* requires features not yet supported */
3413static void TestMoreBefore(void) {
46f4442e 3414 static const struct {
374ca955 3415 const char* rules;
46f4442e 3416 const char* order[16];
374ca955
A
3417 int32_t size;
3418 } tests[] = {
3419 { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
3420 { "m","M","x","X","q","Q","z","a","n" }, 9},
3421 { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
3422 { "m","M","x","X","q","Q","a","z","n" }, 9},
3423 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
3424 { "m","M","x","X","a","q","Q","z","n" }, 9},
3425 { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
3426 { "x","X","q","Q","z","m","M","a","n" }, 9},
3427 { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
3428 { "m","M","x","X","q","Q","a","z","n" }, 9},
3429 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
3430 { "m","M","x","X","a","q","Q","z","n" }, 9},
3431 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
3432 { "x","X","q","Q","z","n","m","a","M" }, 9},
3433 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
3434 { "x","X","q","Q","m","a","M","z","n" }, 9},
3435 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
3436 { "m","x","X","a","M","q","Q","z","n" }, 9},
3437 { "&[before 1] s < x <<< X << q <<< Q < z",
3438 { "r","R","x","X","q","Q","z","s","n" }, 9},
3439 { "&[before 2] s << x <<< X << q <<< Q < z",
3440 { "r","R","x","X","q","Q","s","z","n" }, 9},
3441 { "&[before 3] s <<< x <<< X << q <<< Q < z",
3442 { "r","R","x","X","s","q","Q","z","n" }, 9},
3443 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
3444 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
3445 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
3446 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
3447 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
3448 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
3449 };
3450
3451 int32_t i = 0;
3452
2ca993e8 3453 for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
374ca955
A
3454 genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
3455 }
3456}
3457#endif
3458
3459static void TestTailorNULL( void ) {
3460 const static char* rule = "&a <<< '\\u0000'";
3461 UErrorCode status = U_ZERO_ERROR;
3462 UChar rlz[RULE_BUFFER_LEN] = { 0 };
3463 uint32_t rlen = 0;
3464 UChar a = 1, null = 0;
3465 UCollationResult res = UCOL_EQUAL;
3466
3467 UCollator *coll = NULL;
3468
3469
3470 rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
3471 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
73c04bcf
A
3472
3473 if(U_FAILURE(status)) {
729e4ab9 3474 log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
73c04bcf
A
3475 } else {
3476 res = ucol_strcoll(coll, &a, 1, &null, 1);
3477
3478 if(res != UCOL_LESS) {
3479 log_err("NULL was not tailored properly!\n");
3480 }
374ca955 3481 }
73c04bcf 3482
374ca955
A
3483 ucol_close(coll);
3484}
b75a7d8f 3485
73c04bcf
A
3486static void
3487TestUpperFirstQuaternary(void)
3488{
3489 const char* tests[] = { "B", "b", "Bb", "bB" };
3490 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
3491 UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
2ca993e8 3492 genericLocaleStarterWithOptions("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att));
73c04bcf
A
3493}
3494
3495static void
3496TestJ4960(void)
3497{
3498 const char* tests[] = { "\\u00e2T", "aT" };
3499 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
3500 UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
3501 const char* tests2[] = { "a", "A" };
3502 const char* rule = "&[first tertiary ignorable]=A=a";
3503 UColAttribute att2[] = { UCOL_CASE_LEVEL };
3504 UColAttributeValue attVals2[] = { UCOL_ON };
3505 /* Test whether we correctly ignore primary ignorables on case level when */
3506 /* we have only primary & case level */
2ca993e8 3507 genericLocaleStarterWithOptionsAndResult("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att), UCOL_EQUAL);
73c04bcf
A
3508 /* Test whether ICU4J will make case level for sortkeys that have primary strength */
3509 /* and case level */
2ca993e8 3510 genericLocaleStarterWithOptions("root", tests2, UPRV_LENGTHOF(tests2), att, attVals, UPRV_LENGTHOF(att));
73c04bcf 3511 /* Test whether completely ignorable letters have case level info (they shouldn't) */
2ca993e8 3512 genericRulesStarterWithOptionsAndResult(rule, tests2, UPRV_LENGTHOF(tests2), att2, attVals2, UPRV_LENGTHOF(att2), UCOL_EQUAL);
73c04bcf
A
3513}
3514
3515static void
3516TestJ5223(void)
3517{
3518 static const char *test = "this is a test string";
3519 UChar ustr[256];
3520 int32_t ustr_length = u_unescape(test, ustr, 256);
3521 unsigned char sortkey[256];
3522 int32_t sortkey_length;
3523 UErrorCode status = U_ZERO_ERROR;
3524 static UCollator *coll = NULL;
3525 coll = ucol_open("root", &status);
3526 if(U_FAILURE(status)) {
729e4ab9 3527 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
73c04bcf
A
3528 return;
3529 }
3530 ucol_setStrength(coll, UCOL_PRIMARY);
3531 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
3532 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3533 if (U_FAILURE(status)) {
3534 log_err("Failed setting atributes\n");
3535 return;
729e4ab9 3536 }
73c04bcf
A
3537 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
3538 if (sortkey_length > 256) return;
3539
3540 /* we mark the position where the null byte should be written in advance */
3541 sortkey[sortkey_length-1] = 0xAA;
3542
3543 /* we set the buffer size one byte higher than needed */
3544 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3545 sortkey_length+1);
3546
3547 /* no error occurs (for me) */
3548 if (sortkey[sortkey_length-1] == 0xAA) {
3549 log_err("Hit bug at first try\n");
3550 }
3551
3552 /* we mark the position where the null byte should be written again */
3553 sortkey[sortkey_length-1] = 0xAA;
3554
3555 /* this time we set the buffer size to the exact amount needed */
3556 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3557 sortkey_length);
3558
3559 /* now the trailing null byte is not written */
3560 if (sortkey[sortkey_length-1] == 0xAA) {
3561 log_err("Hit bug at second try\n");
3562 }
3563
3564 ucol_close(coll);
3565}
3566
3567/* Regression test for Thai partial sort key problem */
3568static void
3569TestJ5232(void)
3570{
3571 const static char *test[] = {
3572 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
3573 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
3574 };
729e4ab9 3575
2ca993e8 3576 genericLocaleStarter("th", test, UPRV_LENGTHOF(test));
73c04bcf
A
3577}
3578
46f4442e
A
3579static void
3580TestJ5367(void)
3581{
3582 const static char *test[] = { "a", "y" };
3583 const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
2ca993e8 3584 genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
46f4442e
A
3585}
3586
3587static void
3588TestVI5913(void)
3589{
3590 UErrorCode status = U_ZERO_ERROR;
3591 int32_t i, j;
3592 UCollator *coll =NULL;
3593 uint8_t resColl[100], expColl[100];
3594 int32_t rLen, tLen, ruleLen, sLen, kLen;
57a6839d 3595 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &b<0x1FF3-omega with Ypogegrammeni*/
46f4442e 3596 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/
57a6839d
A
3597 /*
3598 * Note: Just tailoring &z<ae^ does not work as expected:
3599 * The UCA spec requires for discontiguous contractions that they
3600 * extend an *existing match* by one combining mark at a time.
3601 * Therefore, ae must be a contraction so that the builder finds
3602 * discontiguous contractions for ae^, for example with an intervening underdot.
3603 * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
3604 */
3605 UChar rule3[256]={
3606 0x26, 0x78, 0x3c, 0x61, 0x65, /* &x<ae */
3607 0x26, 0x7a, 0x3c, 0x0061, 0x00ea, /* &z<a+e with circumflex.*/
3608 0};
46f4442e
A
3609 static const UChar tData[][20]={
3610 {0x1EAC, 0},
3611 {0x0041, 0x0323, 0x0302, 0},
3612 {0x1EA0, 0x0302, 0},
3613 {0x00C2, 0x0323, 0},
3614 {0x1ED8, 0}, /* O with dot and circumflex */
3615 {0x1ECC, 0x0302, 0},
3616 {0x1EB7, 0},
3617 {0x1EA1, 0x0306, 0},
3618 };
3619 static const UChar tailorData[][20]={
3620 {0x1FA2, 0}, /* Omega with 3 combining marks */
3621 {0x03C9, 0x0313, 0x0300, 0x0345, 0},
3622 {0x1FF3, 0x0313, 0x0300, 0},
3623 {0x1F60, 0x0300, 0x0345, 0},
3624 {0x1F62, 0x0345, 0},
3625 {0x1FA0, 0x0300, 0},
3626 };
3627 static const UChar tailorData2[][20]={
3628 {0x1E63, 0x030C, 0}, /* s with dot below + caron */
3629 {0x0073, 0x0323, 0x030C, 0},
3630 {0x0073, 0x030C, 0x0323, 0},
3631 };
3632 static const UChar tailorData3[][20]={
3633 {0x007a, 0}, /* z */
3634 {0x0061, 0x0065, 0}, /* a + e */
3635 {0x0061, 0x00ea, 0}, /* a + e with circumflex */
3636 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */
3637 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
3638 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot below */
3639 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */
3640 {0x00EA, 0}, /* e with circumflex */
3641 };
73c04bcf 3642
46f4442e
A
3643 /* Test Vietnamese sort. */
3644 coll = ucol_open("vi", &status);
3645 if(U_FAILURE(status)) {
729e4ab9 3646 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
46f4442e
A
3647 return;
3648 }
3649 log_verbose("\n\nVI collation:");
3650 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
3651 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3652 }
3653 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
3654 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3655 }
3656 if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
3657 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
3658 }
3659 if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
3660 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3661 }
3662
3663 for (j=0; j<8; j++) {
3664 tLen = u_strlen(tData[j]);
3665 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
3666 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3667 for(i = 0; i<rLen; i++) {
3668 log_verbose(" %02X", resColl[i]);
3669 }
3670 }
3671
3672 ucol_close(coll);
3673
3674 /* Test Romanian sort. */
3675 coll = ucol_open("ro", &status);
3676 log_verbose("\n\nRO collation:");
3677 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
3678 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3679 }
3680 if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
3681 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3682 }
3683 if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
3684 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3685 }
3686
3687 for (j=4; j<8; j++) {
3688 tLen = u_strlen(tData[j]);
3689 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
3690 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3691 for(i = 0; i<rLen; i++) {
3692 log_verbose(" %02X", resColl[i]);
3693 }
3694 }
3695 ucol_close(coll);
3696
3697 /* Test the precomposed Greek character with 3 combining marks. */
3698 log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
3699 ruleLen = u_strlen(rule);
3700 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3701 if (U_FAILURE(status)) {
3702 log_err("ucol_openRules failed with %s\n", u_errorName(status));
3703 return;
3704 }
3705 sLen = u_strlen(tailorData[0]);
3706 for (j=1; j<6; j++) {
3707 tLen = u_strlen(tailorData[j]);
3708 if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen)) {
3709 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
3710 }
3711 }
3712 /* Test getSortKey. */
3713 tLen = u_strlen(tailorData[0]);
3714 kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
3715 for (j=0; j<6; j++) {
3716 tLen = u_strlen(tailorData[j]);
3717 rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
3718 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3719 log_err("\n Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
3720 for(i = 0; i<rLen; i++) {
3721 log_err(" %02X", resColl[i]);
3722 }
3723 }
3724 }
3725 ucol_close(coll);
3726
3727 log_verbose("\n\nTailoring test for s with caron:");
3728 ruleLen = u_strlen(rule2);
3729 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3730 tLen = u_strlen(tailorData2[0]);
3731 kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
3732 for (j=1; j<3; j++) {
3733 tLen = u_strlen(tailorData2[j]);
3734 rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
3735 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3736 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
3737 for(i = 0; i<rLen; i++) {
3738 log_err(" %02X", resColl[i]);
3739 }
3740 }
3741 }
3742 ucol_close(coll);
3743
3744 log_verbose("\n\nTailoring test for &z< ae with circumflex:");
3745 ruleLen = u_strlen(rule3);
3746 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3747 tLen = u_strlen(tailorData3[3]);
3748 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
57a6839d
A
3749 log_verbose("\n Test Data[3] :%s \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen);
3750 for(i = 0; i<kLen; i++) {
3751 log_verbose(" %02X", expColl[i]);
3752 }
46f4442e
A
3753 for (j=4; j<6; j++) {
3754 tLen = u_strlen(tailorData3[j]);
3755 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
3756
3757 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
57a6839d 3758 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
46f4442e
A
3759 for(i = 0; i<rLen; i++) {
3760 log_err(" %02X", resColl[i]);
3761 }
3762 }
3763
57a6839d 3764 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
46f4442e
A
3765 for(i = 0; i<rLen; i++) {
3766 log_verbose(" %02X", resColl[i]);
3767 }
3768 }
3769 ucol_close(coll);
3770}
3771
3772static void
3773TestTailor6179(void)
3774{
3775 UErrorCode status = U_ZERO_ERROR;
3776 int32_t i;
3777 UCollator *coll =NULL;
3778 uint8_t resColl[100];
3779 int32_t rLen, tLen, ruleLen;
3780 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */
4388f060
A
3781 static const UChar rule1[]={
3782 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
46f4442e
A
3783 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
3784 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
3785 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
3786 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
4388f060
A
3787 static const UChar rule2[]={
3788 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
46f4442e
A
3789 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
3790 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
3791 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
3792 0x3C,0x3C,0x20,0x62,0};
3793
4388f060 3794 static const UChar tData1[][4]={
46f4442e
A
3795 {0x61, 0},
3796 {0x62, 0},
3797 { 0xFDD0,0x009E, 0}
3798 };
4388f060
A
3799 static const UChar tData2[][4]={
3800 {0x61, 0},
3801 {0x62, 0},
3802 { 0xFDD0,0x009E, 0}
46f4442e
A
3803 };
3804
729e4ab9
A
3805 /*
3806 * These values from FractionalUCA.txt will change,
3807 * and need to be updated here.
57a6839d
A
3808 * TODO: Make this not check for particular sort keys.
3809 * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
729e4ab9 3810 */
57a6839d
A
3811 static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};
3812 static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};
3813 static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};
3814 static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};
3815
3816 UParseError parseError;
46f4442e
A
3817
3818 /* Test [Last Primary ignorable] */
729e4ab9 3819
4388f060 3820 log_verbose("Tailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b\n");
46f4442e
A
3821 ruleLen = u_strlen(rule1);
3822 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3823 if (U_FAILURE(status)) {
729e4ab9 3824 log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
46f4442e
A
3825 return;
3826 }
3827 tLen = u_strlen(tData1[0]);
3828 rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
2ca993e8 3829 if (rLen != UPRV_LENGTHOF(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
4388f060 3830 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData1[0], rLen);
46f4442e
A
3831 for(i = 0; i<rLen; i++) {
3832 log_err(" %02X", resColl[i]);
3833 }
4388f060 3834 log_err("\n");
46f4442e
A
3835 }
3836 tLen = u_strlen(tData1[1]);
3837 rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
2ca993e8 3838 if (rLen != UPRV_LENGTHOF(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
4388f060 3839 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData1[1], rLen);
46f4442e
A
3840 for(i = 0; i<rLen; i++) {
3841 log_err(" %02X", resColl[i]);
3842 }
4388f060 3843 log_err("\n");
46f4442e
A
3844 }
3845 ucol_close(coll);
729e4ab9 3846
46f4442e
A
3847
3848 /* Test [Last Secondary ignorable] */
4388f060 3849 log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b\n");
57a6839d
A
3850 ruleLen = u_strlen(rule2);
3851 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);
46f4442e 3852 if (U_FAILURE(status)) {
4388f060 3853 log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
57a6839d
A
3854 log_info(" offset=%d \"%s\" | \"%s\"\n",
3855 parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1));
46f4442e
A
3856 return;
3857 }
3858 tLen = u_strlen(tData2[0]);
3859 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
2ca993e8 3860 if (rLen != UPRV_LENGTHOF(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
4388f060 3861 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData2[0], rLen);
46f4442e
A
3862 for(i = 0; i<rLen; i++) {
3863 log_err(" %02X", resColl[i]);
3864 }
4388f060 3865 log_err("\n");
46f4442e
A
3866 }
3867 tLen = u_strlen(tData2[1]);
3868 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
2ca993e8 3869 if (rLen != UPRV_LENGTHOF(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
57a6839d
A
3870 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen);
3871 for(i = 0; i<rLen; i++) {
3872 log_err(" %02X", resColl[i]);
3873 }
3874 log_err("\n");
46f4442e
A
3875 }
3876 ucol_close(coll);
3877}
3878
3879static void
3880TestUCAPrecontext(void)
3881{
3882 UErrorCode status = U_ZERO_ERROR;
3883 int32_t i, j;
3884 UCollator *coll =NULL;
3885 uint8_t resColl[100], prevColl[100];
3886 int32_t rLen, tLen, ruleLen;
3887 UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
729e4ab9 3888 UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
46f4442e 3889 /* & l middle-dot << a a is an expansion. */
729e4ab9 3890
46f4442e
A
3891 UChar tData1[][20]={
3892 { 0xb7, 0}, /* standalone middle dot(0xb7) */
3893 { 0x387, 0}, /* standalone middle dot(0x387) */
3894 { 0x61, 0}, /* a */
3895 { 0x6C, 0}, /* l */
729e4ab9 3896 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */
46f4442e
A
3897 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */
3898 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
3899 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */
3900 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
3901 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */
3902 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */
3903 };
729e4ab9 3904
46f4442e
A
3905 log_verbose("\n\nEN collation:");
3906 coll = ucol_open("en", &status);
3907 if (U_FAILURE(status)) {
729e4ab9 3908 log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
46f4442e
A
3909 return;
3910 }
3911 for (j=0; j<11; j++) {
3912 tLen = u_strlen(tData1[j]);
3913 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3914 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
729e4ab9 3915 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
46f4442e
A
3916 j, tData1[j]);
3917 }
3918 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
3919 for(i = 0; i<rLen; i++) {
3920 log_verbose(" %02X", resColl[i]);
3921 }
3922 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3923 }
3924 ucol_close(coll);
729e4ab9
A
3925
3926
46f4442e
A
3927 log_verbose("\n\nJA collation:");
3928 coll = ucol_open("ja", &status);
3929 if (U_FAILURE(status)) {
3930 log_err("Tailoring test: &z <<a|- failed!");
3931 return;
3932 }
3933 for (j=0; j<11; j++) {
3934 tLen = u_strlen(tData1[j]);
3935 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3936 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
729e4ab9 3937 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
46f4442e
A
3938 j, tData1[j]);
3939 }
3940 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
3941 for(i = 0; i<rLen; i++) {
3942 log_verbose(" %02X", resColl[i]);
3943 }
3944 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3945 }
3946 ucol_close(coll);
729e4ab9 3947
46f4442e
A
3948
3949 log_verbose("\n\nTailoring test: & middle dot < a ");
3950 ruleLen = u_strlen(rule1);
3951 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3952 if (U_FAILURE(status)) {
3953 log_err("Tailoring test: & middle dot < a failed!");
3954 return;
3955 }
3956 for (j=0; j<11; j++) {
3957 tLen = u_strlen(tData1[j]);
3958 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3959 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
729e4ab9 3960 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
46f4442e
A
3961 j, tData1[j]);
3962 }
3963 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
3964 for(i = 0; i<rLen; i++) {
3965 log_verbose(" %02X", resColl[i]);
3966 }
3967 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3968 }
3969 ucol_close(coll);
729e4ab9 3970
46f4442e
A
3971
3972 log_verbose("\n\nTailoring test: & l middle-dot << a ");
3973 ruleLen = u_strlen(rule2);
3974 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3975 if (U_FAILURE(status)) {
3976 log_err("Tailoring test: & l middle-dot << a failed!");
3977 return;
3978 }
3979 for (j=0; j<11; j++) {
3980 tLen = u_strlen(tData1[j]);
3981 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3982 if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
729e4ab9 3983 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
46f4442e
A
3984 j, tData1[j]);
3985 }
3986 if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
729e4ab9 3987 log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
46f4442e
A
3988 j, tData1[j]);
3989 }
3990 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
3991 for(i = 0; i<rLen; i++) {
3992 log_verbose(" %02X", resColl[i]);
3993 }
3994 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3995 }
3996 ucol_close(coll);
3997}
3998
3999static void
4000TestOutOfBuffer5468(void)
4001{
4002 static const char *test = "\\u4e00";
4003 UChar ustr[256];
4004 int32_t ustr_length = u_unescape(test, ustr, 256);
4005 unsigned char shortKeyBuf[1];
4006 int32_t sortkey_length;
4007 UErrorCode status = U_ZERO_ERROR;
4008 static UCollator *coll = NULL;
729e4ab9 4009
46f4442e
A
4010 coll = ucol_open("root", &status);
4011 if(U_FAILURE(status)) {
729e4ab9 4012 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
46f4442e
A
4013 return;
4014 }
4015 ucol_setStrength(coll, UCOL_PRIMARY);
4016 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4017 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4018 if (U_FAILURE(status)) {
4019 log_err("Failed setting atributes\n");
4020 return;
729e4ab9
A
4021 }
4022
46f4442e
A
4023 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
4024 if (sortkey_length != 4) {
4025 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length);
4026 }
4027 log_verbose("length of sortKey is %d", sortkey_length);
4028 ucol_close(coll);
4029}
4030
4031#define TSKC_DATA_SIZE 5
4032#define TSKC_BUF_SIZE 50
4033static void
4034TestSortKeyConsistency(void)
4035{
4036 UErrorCode icuRC = U_ZERO_ERROR;
4037 UCollator* ucol;
4038 UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
4039
4040 uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4041 uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4042 int32_t i, j, i2;
4043
4044 ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
4045 if (U_FAILURE(icuRC))
4046 {
729e4ab9 4047 log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
46f4442e
A
4048 return;
4049 }
4050
4051 for (i = 0; i < TSKC_DATA_SIZE; i++)
4052 {
4053 UCharIterator uiter;
4054 uint32_t state[2] = { 0, 0 };
4055 int32_t dataLen = i+1;
4056 for (j=0; j<TSKC_BUF_SIZE; j++)
4057 bufFull[i][j] = bufPart[i][j] = 0;
4058
4059 /* Full sort key */
4060 ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
4061
4062 /* Partial sort key */
4063 uiter_setString(&uiter, data, dataLen);
4064 ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
4065 if (U_FAILURE(icuRC))
4066 {
4067 log_err("ucol_nextSortKeyPart failed\n");
4068 ucol_close(ucol);
4069 return;
4070 }
4071
4072 for (i2=0; i2<i; i2++)
4073 {
4074 UBool fullMatch = TRUE;
4075 UBool partMatch = TRUE;
4076 for (j=0; j<TSKC_BUF_SIZE; j++)
4077 {
4078 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
4079 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
4080 }
4081 if (fullMatch != partMatch) {
4082 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
4083 : "partial key was consistent, but full key changed\n");
4084 ucol_close(ucol);
4085 return;
4086 }
4087 }
4088 }
4089
4090 /*=============================================*/
4091 ucol_close(ucol);
4092}
4093
4094/* ticket: 6101 */
4095static void TestCroatianSortKey(void) {
4096 const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
4097 UErrorCode status = U_ZERO_ERROR;
4098 UCollator *ucol;
4099 UCharIterator iter;
4100
4101 static const UChar text[] = { 0x0044, 0xD81A };
4102
2ca993e8 4103 size_t length = UPRV_LENGTHOF(text);
46f4442e
A
4104
4105 uint8_t textSortKey[32];
4106 size_t lenSortKey = 32;
4107 size_t actualSortKeyLen;
4108 uint32_t uStateInfo[2] = { 0, 0 };
4109
4110 ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
4111 if (U_FAILURE(status)) {
729e4ab9 4112 log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
46f4442e
A
4113 return;
4114 }
4115
3d1f044b 4116 uiter_setString(&iter, text, (int32_t)length);
46f4442e
A
4117
4118 actualSortKeyLen = ucol_nextSortKeyPart(
4119 ucol, &iter, (uint32_t*)uStateInfo,
3d1f044b 4120 textSortKey, (int32_t)lenSortKey, &status
46f4442e
A
4121 );
4122
4123 if (actualSortKeyLen == lenSortKey) {
4124 log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
4125 }
4126
4127 ucol_close(ucol);
4128}
4129
4130/* ticket: 6140 */
4131/* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
729e4ab9 4132 * they are both Hiragana and Katakana
46f4442e
A
4133 */
4134#define SORTKEYLEN 50
4135static void TestHiragana(void) {
4136 UErrorCode status = U_ZERO_ERROR;
4137 UCollator* ucol;
4138 UCollationResult strcollresult;
4139 UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
4140 UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
2ca993e8
A
4141 int32_t data1Len = UPRV_LENGTHOF(data1);
4142 int32_t data2Len = UPRV_LENGTHOF(data2);
46f4442e
A
4143 int32_t i, j;
4144 uint8_t sortKey1[SORTKEYLEN];
4145 uint8_t sortKey2[SORTKEYLEN];
4146
4147 UCharIterator uiter1;
4148 UCharIterator uiter2;
4149 uint32_t state1[2] = { 0, 0 };
4150 uint32_t state2[2] = { 0, 0 };
4151 int32_t keySize1;
4152 int32_t keySize2;
4153
4154 ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
4155 &status);
4156 if (U_FAILURE(status)) {
729e4ab9 4157 log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
46f4442e
A
4158 return;
4159 }
4160
4161 /* Start of full sort keys */
4162 /* Full sort key1 */
4163 keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
4164 /* Full sort key2 */
4165 keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
4166 if (keySize1 == keySize2) {
4167 for (i = 0; i < keySize1; i++) {
4168 if (sortKey1[i] != sortKey2[i]) {
4169 log_err("Full sort keys are different. Should be equal.");
4170 }
4171 }
4172 } else {
4173 log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
4174 }
4175 /* End of full sort keys */
4176
4177 /* Start of partial sort keys */
4178 /* Partial sort key1 */
4179 uiter_setString(&uiter1, data1, data1Len);
4180 keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
4181 /* Partial sort key2 */
4182 uiter_setString(&uiter2, data2, data2Len);
4183 keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
4184 if (U_SUCCESS(status) && keySize1 == keySize2) {
4185 for (j = 0; j < keySize1; j++) {
4186 if (sortKey1[j] != sortKey2[j]) {
4187 log_err("Partial sort keys are different. Should be equal");
4188 }
4189 }
4190 } else {
4191 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
4192 }
4193 /* End of partial sort keys */
4194
4195 /* Start of strcoll */
4196 /* Use ucol_strcoll() to determine ordering */
4197 strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
4198 if (strcollresult != UCOL_EQUAL) {
4199 log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
4200 }
729e4ab9 4201
46f4442e
A
4202 ucol_close(ucol);
4203}
73c04bcf 4204
729e4ab9
A
4205/* Convenient struct for running collation tests */
4206typedef struct {
4207 const UChar source[MAX_TOKEN_LEN]; /* String on left */
4208 const UChar target[MAX_TOKEN_LEN]; /* String on right */
4209 UCollationResult result; /* -1, 0 or +1, depending on collation */
4210} OneTestCase;
4211
4212/*
4213 * Utility function to test one collation test case.
4214 * @param testcases Array of test cases.
4215 * @param n_testcases Size of the array testcases.
4216 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats.
4217 * @param n_rules Size of the array str_rules.
4218 */
4219static void doTestOneTestCase(const OneTestCase testcases[],
4220 int n_testcases,
4221 const char* str_rules[],
4222 int n_rules)
4223{
4224 int rule_no, testcase_no;
4225 UChar rule[500];
4226 int32_t length = 0;
4227 UErrorCode status = U_ZERO_ERROR;
4228 UParseError parse_error;
4229 UCollator *myCollation;
4230
4231 for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4232
4233 length = u_unescape(str_rules[rule_no], rule, 500);
4234 if (length == 0) {
4235 log_err("ERROR: The rule cannot be unescaped: %s\n");
4236 return;
4237 }
4238 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4239 if(U_FAILURE(status)){
4240 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
57a6839d
A
4241 log_info(" offset=%d \"%s\" | \"%s\"\n",
4242 parse_error.offset,
4243 aescstrdup(parse_error.preContext, -1),
4244 aescstrdup(parse_error.postContext, -1));
729e4ab9
A
4245 return;
4246 }
4247 log_verbose("Testing the <<* syntax\n");
4248 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4249 ucol_setStrength(myCollation, UCOL_TERTIARY);
4250 for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
4251 doTest(myCollation,
4252 testcases[testcase_no].source,
4253 testcases[testcase_no].target,
4254 testcases[testcase_no].result
4255 );
4256 }
4257 ucol_close(myCollation);
4258 }
4259}
4260
4261const static OneTestCase rangeTestcases[] = {
4262 { {0x0061}, {0x0062}, UCOL_LESS }, /* "a" < "b" */
4263 { {0x0062}, {0x0063}, UCOL_LESS }, /* "b" < "c" */
4264 { {0x0061}, {0x0063}, UCOL_LESS }, /* "a" < "c" */
4265
4266 { {0x0062}, {0x006b}, UCOL_LESS }, /* "b" << "k" */
4267 { {0x006b}, {0x006c}, UCOL_LESS }, /* "k" << "l" */
4268 { {0x0062}, {0x006c}, UCOL_LESS }, /* "b" << "l" */
4269 { {0x0061}, {0x006c}, UCOL_LESS }, /* "a" < "l" */
4270 { {0x0061}, {0x006d}, UCOL_LESS }, /* "a" < "m" */
4271
4272 { {0x0079}, {0x006d}, UCOL_LESS }, /* "y" < "f" */
4273 { {0x0079}, {0x0067}, UCOL_LESS }, /* "y" < "g" */
4274 { {0x0061}, {0x0068}, UCOL_LESS }, /* "y" < "h" */
4275 { {0x0061}, {0x0065}, UCOL_LESS }, /* "g" < "e" */
4276
4277 { {0x0061}, {0x0031}, UCOL_EQUAL }, /* "a" = "1" */
4278 { {0x0061}, {0x0032}, UCOL_EQUAL }, /* "a" = "2" */
4279 { {0x0061}, {0x0033}, UCOL_EQUAL }, /* "a" = "3" */
4280 { {0x0061}, {0x0066}, UCOL_LESS }, /* "a" < "f" */
4281 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL_LESS }, /* "la" < "123" */
4282 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL_EQUAL }, /* "aaa" = "123" */
4283 { {0x0062}, {0x007a}, UCOL_LESS }, /* "b" < "z" */
4284 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL_LESS }, /* "azm" = "2yc" */
4285};
4286
2ca993e8 4287static int nRangeTestcases = UPRV_LENGTHOF(rangeTestcases);
729e4ab9
A
4288
4289const static OneTestCase rangeTestcasesSupplemental[] = {
57a6839d
A
4290 { {0x4e00}, {0xfffb}, UCOL_LESS }, /* U+4E00 < U+FFFB */
4291 { {0xfffb}, {0xd800, 0xdc00}, UCOL_LESS }, /* U+FFFB < U+10000 */
729e4ab9 4292 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+10000 < U+10001 */
57a6839d 4293 { {0x4e00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+4E00 < U+10001 */
729e4ab9
A
4294 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
4295 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
57a6839d 4296 { {0x4e00}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+4E00 < U+10001 */
729e4ab9
A
4297};
4298
2ca993e8 4299static int nRangeTestcasesSupplemental = UPRV_LENGTHOF(rangeTestcasesSupplemental);
729e4ab9
A
4300
4301const static OneTestCase rangeTestcasesQwerty[] = {
4302 { {0x0071}, {0x0077}, UCOL_LESS }, /* "q" < "w" */
4303 { {0x0077}, {0x0065}, UCOL_LESS }, /* "w" < "e" */
4304
4305 { {0x0079}, {0x0075}, UCOL_LESS }, /* "y" < "u" */
4306 { {0x0071}, {0x0075}, UCOL_LESS }, /* "q" << "u" */
4307
4308 { {0x0074}, {0x0069}, UCOL_LESS }, /* "t" << "i" */
4309 { {0x006f}, {0x0070}, UCOL_LESS }, /* "o" << "p" */
4310
4311 { {0x0079}, {0x0065}, UCOL_LESS }, /* "y" < "e" */
4312 { {0x0069}, {0x0075}, UCOL_LESS }, /* "i" < "u" */
4313
4314 { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
4315 {0x0077, 0x0065, 0x0072, 0x0065}, UCOL_LESS }, /* "quest" < "were" */
4316 { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
4317 {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, UCOL_LESS }, /* "quack" < "quest" */
4318};
4319
2ca993e8 4320static int nRangeTestcasesQwerty = UPRV_LENGTHOF(rangeTestcasesQwerty);
729e4ab9
A
4321
4322static void TestSameStrengthList(void)
4323{
4324 const char* strRules[] = {
4325 /* Normal */
4326 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3",
4327
4328 /* Lists */
4329 "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
4330 };
2ca993e8 4331 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4332}
4333
4334static void TestSameStrengthListQuoted(void)
4335{
4336 const char* strRules[] = {
4337 /* Lists with quoted characters */
4338 "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
4339 "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
4340
4341 "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
4342 "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
4343
4344 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\u0033",
4345 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
4346 };
2ca993e8 4347 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4348}
4349
4350static void TestSameStrengthListSupplemental(void)
4351{
4352 const char* strRules[] = {
57a6839d
A
4353 "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
4354 "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
4355 "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
4356 "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
729e4ab9 4357 };
2ca993e8 4358 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4359}
4360
4361static void TestSameStrengthListQwerty(void)
4362{
4363 const char* strRules[] = {
4364 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
4365 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
4366 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
4367 "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
4368 "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
4369
4370 /* Quoted characters also will work if two quoted characters are not consecutive. */
4371 "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
4372
4373 /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
4374 /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
4375
4376 };
2ca993e8 4377 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4378}
4379
4380static void TestSameStrengthListQuotedQwerty(void)
4381{
4382 const char* strRules[] = {
4383 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
4384 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
4385 "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'", /* Lists with quotes */
4386
4387 /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
4388 /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
4389 };
2ca993e8 4390 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4391}
4392
4393static void TestSameStrengthListRanges(void)
4394{
4395 const char* strRules[] = {
4396 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
4397 };
2ca993e8 4398 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4399}
4400
4401static void TestSameStrengthListSupplementalRanges(void)
4402{
4403 const char* strRules[] = {
57a6839d
A
4404 /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
4405 "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
729e4ab9 4406 };
2ca993e8 4407 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4408}
4409
4410static void TestSpecialCharacters(void)
4411{
4412 const char* strRules[] = {
4413 /* Normal */
4414 "&';'<'+'<','<'-'<'&'<'*'",
4415
4416 /* List */
4417 "&';'<*'+,-&*'",
4418
4419 /* Range */
4420 "&';'<*'+'-'-&*'",
4421 };
4422
4423 const static OneTestCase specialCharacterStrings[] = {
4424 { {0x003b}, {0x002b}, UCOL_LESS }, /* ; < + */
4425 { {0x002b}, {0x002c}, UCOL_LESS }, /* + < , */
4426 { {0x002c}, {0x002d}, UCOL_LESS }, /* , < - */
4427 { {0x002d}, {0x0026}, UCOL_LESS }, /* - < & */
4428 };
2ca993e8 4429 doTestOneTestCase(specialCharacterStrings, UPRV_LENGTHOF(specialCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4430}
4431
4432static void TestPrivateUseCharacters(void)
4433{
4434 const char* strRules[] = {
4435 /* Normal */
4436 "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
4437 "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
4438 };
4439
4440 const static OneTestCase privateUseCharacterStrings[] = {
4441 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4442 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4443 { {0xe2d9}, {0xe2da}, UCOL_LESS },
4444 { {0xe2da}, {0xe2db}, UCOL_LESS },
4445 { {0xe2db}, {0xe2dc}, UCOL_LESS },
4446 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4447 };
2ca993e8 4448 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4449}
4450
4451static void TestPrivateUseCharactersInList(void)
4452{
4453 const char* strRules[] = {
4454 /* List */
4455 "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
4456 /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
4457 "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
4458 };
4459
4460 const static OneTestCase privateUseCharacterStrings[] = {
4461 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4462 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4463 { {0xe2d9}, {0xe2da}, UCOL_LESS },
4464 { {0xe2da}, {0xe2db}, UCOL_LESS },
4465 { {0xe2db}, {0xe2dc}, UCOL_LESS },
4466 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4467 };
2ca993e8 4468 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4469}
4470
4471static void TestPrivateUseCharactersInRange(void)
4472{
4473 const char* strRules[] = {
4474 /* Range */
4475 "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
4476 "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
4477 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
4478 };
4479
4480 const static OneTestCase privateUseCharacterStrings[] = {
4481 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4482 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4483 { {0xe2d9}, {0xe2da}, UCOL_LESS },
4484 { {0xe2da}, {0xe2db}, UCOL_LESS },
4485 { {0xe2db}, {0xe2dc}, UCOL_LESS },
4486 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4487 };
2ca993e8 4488 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4489}
4490
4491static void TestInvalidListsAndRanges(void)
4492{
4493 const char* invalidRules[] = {
4494 /* Range not in starred expression */
4495 "&\\ufffe<\\uffff-\\U00010002",
4496
4497 /* Range without start */
4498 "&a<*-c",
4499
4500 /* Range without end */
4501 "&a<*b-",
4502
4503 /* More than one hyphen */
4504 "&a<*b-g-l",
4505
4506 /* Range in the wrong order */
4507 "&a<*k-b",
4508
4509 };
4510
4511 UChar rule[500];
4512 UErrorCode status = U_ZERO_ERROR;
4513 UParseError parse_error;
2ca993e8 4514 int n_rules = UPRV_LENGTHOF(invalidRules);
729e4ab9
A
4515 int rule_no;
4516 int length;
4517 UCollator *myCollation;
4518
4519 for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4520
4521 length = u_unescape(invalidRules[rule_no], rule, 500);
4522 if (length == 0) {
4523 log_err("ERROR: The rule cannot be unescaped: %s\n");
4524 return;
4525 }
4526 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
57a6839d 4527 (void)myCollation; /* Suppress set but not used warning. */
729e4ab9
A
4528 if(!U_FAILURE(status)){
4529 log_err("ERROR: Could not cause a failure as expected: \n");
4530 }
4531 status = U_ZERO_ERROR;
4532 }
4533}
4534
4535/*
4536 * This test ensures that characters placed before a character in a different script have the same lead byte
4537 * in their collation key before and after script reordering.
4538 */
4539static void TestBeforeRuleWithScriptReordering(void)
4540{
4541 UParseError error;
4542 UErrorCode status = U_ZERO_ERROR;
4543 UCollator *myCollation;
4544 char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
4545 UChar rules[500];
4546 uint32_t rulesLength = 0;
4547 int32_t reorderCodes[1] = {USCRIPT_GREEK};
4548 UCollationResult collResult;
4549
4550 uint8_t baseKey[256];
4551 uint32_t baseKeyLength;
4552 uint8_t beforeKey[256];
4553 uint32_t beforeKeyLength;
4554
4555 UChar base[] = { 0x03b1 }; /* base */
2ca993e8 4556 int32_t baseLen = UPRV_LENGTHOF(base);
729e4ab9
A
4557
4558 UChar before[] = { 0x0e01 }; /* ko kai */
2ca993e8 4559 int32_t beforeLen = UPRV_LENGTHOF(before);
729e4ab9
A
4560
4561 /*UChar *data[] = { before, base };
4562 genericRulesStarter(srules, data, 2);*/
4563
4564 log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
4565
57a6839d
A
4566 (void)beforeKeyLength; /* Suppress set but not used warnings. */
4567 (void)baseKeyLength;
729e4ab9
A
4568
4569 /* build collator */
4570 log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
4571
2ca993e8 4572 rulesLength = u_unescape(srules, rules, UPRV_LENGTHOF(rules));
729e4ab9
A
4573 myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
4574 if(U_FAILURE(status)) {
4575 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4576 return;
4577 }
4578
4579 /* check collation results - before rule applied but not script reordering */
4580 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4581 if (collResult != UCOL_GREATER) {
4582 log_err("Collation result not correct before script reordering = %d\n", collResult);
4583 }
4584
4585 /* check the lead byte of the collation keys before script reordering */
4586 baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4587 beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4588 if (baseKey[0] != beforeKey[0]) {
4589 log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4590 }
4591
4592 /* reorder the scripts */
4593 ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
4594 if(U_FAILURE(status)) {
4595 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
4596 return;
4597 }
4598
4599 /* check collation results - before rule applied and after script reordering */
4600 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4601 if (collResult != UCOL_GREATER) {
4602 log_err("Collation result not correct after script reordering = %d\n", collResult);
4603 }
4604
4605 /* check the lead byte of the collation keys after script reordering */
4606 ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4607 ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4608 if (baseKey[0] != beforeKey[0]) {
4609 log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4610 }
4611
4612 ucol_close(myCollation);
4613}
4614
4615/*
4616 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
4617 */
4618static void TestNonLeadBytesDuringCollationReordering(void)
4619{
4620 UErrorCode status = U_ZERO_ERROR;
4621 UCollator *myCollation;
4622 int32_t reorderCodes[1] = {USCRIPT_GREEK};
729e4ab9
A
4623
4624 uint8_t baseKey[256];
4625 uint32_t baseKeyLength;
4626 uint8_t reorderKey[256];
4627 uint32_t reorderKeyLength;
4628
4629 UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
4630
4388f060 4631 uint32_t i;
729e4ab9
A
4632
4633
4634 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4635
4636 /* build collator tertiary */
4637 myCollation = ucol_open("", &status);
4638 ucol_setStrength(myCollation, UCOL_TERTIARY);
4639 if(U_FAILURE(status)) {
4640 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4641 return;
4642 }
2ca993e8 4643 baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
729e4ab9 4644
2ca993e8 4645 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
729e4ab9
A
4646 if(U_FAILURE(status)) {
4647 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4648 return;
4649 }
2ca993e8 4650 reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
729e4ab9
A
4651
4652 if (baseKeyLength != reorderKeyLength) {
4388f060 4653 log_err("Key lengths not the same during reordering.\n");
729e4ab9
A
4654 return;
4655 }
4656
4657 for (i = 1; i < baseKeyLength; i++) {
4658 if (baseKey[i] != reorderKey[i]) {
4659 log_err("Collation key bytes not the same at position %d.\n", i);
4660 return;
4661 }
4662 }
4663 ucol_close(myCollation);
4664
4665 /* build collator quaternary */
4666 myCollation = ucol_open("", &status);
4667 ucol_setStrength(myCollation, UCOL_QUATERNARY);
4668 if(U_FAILURE(status)) {
4669 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4670 return;
4671 }
2ca993e8 4672 baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
729e4ab9 4673
2ca993e8 4674 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
729e4ab9
A
4675 if(U_FAILURE(status)) {
4676 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4677 return;
4678 }
2ca993e8 4679 reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
729e4ab9
A
4680
4681 if (baseKeyLength != reorderKeyLength) {
4388f060 4682 log_err("Key lengths not the same during reordering.\n");
729e4ab9
A
4683 return;
4684 }
4685
4686 for (i = 1; i < baseKeyLength; i++) {
4687 if (baseKey[i] != reorderKey[i]) {
4688 log_err("Collation key bytes not the same at position %d.\n", i);
4689 return;
4690 }
4691 }
4692 ucol_close(myCollation);
4693}
4694
4695/*
4696 * Test reordering API.
4697 */
4698static void TestReorderingAPI(void)
4699{
4700 UErrorCode status = U_ZERO_ERROR;
4701 UCollator *myCollation;
4702 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
b331163b 4703 int32_t duplicateReorderCodes[] = {USCRIPT_HIRAGANA, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_KATAKANA};
4388f060 4704 int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
b331163b 4705 int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE;
729e4ab9
A
4706 UCollationResult collResult;
4707 int32_t retrievedReorderCodesLength;
4388f060 4708 int32_t retrievedReorderCodes[10];
729e4ab9
A
4709 UChar greekString[] = { 0x03b1 };
4710 UChar punctuationString[] = { 0x203e };
4388f060 4711 int loopIndex;
729e4ab9
A
4712
4713 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4714
4715 /* build collator tertiary */
4716 myCollation = ucol_open("", &status);
4717 ucol_setStrength(myCollation, UCOL_TERTIARY);
4718 if(U_FAILURE(status)) {
4719 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4720 return;
4721 }
4722
4723 /* set the reorderding */
2ca993e8 4724 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
729e4ab9
A
4725 if (U_FAILURE(status)) {
4726 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4727 return;
4728 }
4729
4388f060 4730 /* get the reordering */
729e4ab9
A
4731 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4732 if (status != U_BUFFER_OVERFLOW_ERROR) {
4733 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4734 return;
4735 }
4736 status = U_ZERO_ERROR;
2ca993e8
A
4737 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4738 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
729e4ab9
A
4739 return;
4740 }
4388f060 4741 /* now let's really get it */
2ca993e8 4742 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4388f060
A
4743 if (U_FAILURE(status)) {
4744 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4745 return;
4746 }
2ca993e8
A
4747 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4748 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4388f060
A
4749 return;
4750 }
4751 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4752 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4753 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4754 return;
4755 }
4756 }
2ca993e8 4757 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
729e4ab9
A
4758 if (collResult != UCOL_LESS) {
4759 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4760 return;
4761 }
4762
4763 /* clear the reordering */
4764 ucol_setReorderCodes(myCollation, NULL, 0, &status);
4765 if (U_FAILURE(status)) {
4766 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4767 return;
4768 }
4769
4388f060 4770 /* get the reordering again */
729e4ab9
A
4771 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4772 if (retrievedReorderCodesLength != 0) {
4773 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4774 return;
4775 }
4776
2ca993e8 4777 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
729e4ab9
A
4778 if (collResult != UCOL_GREATER) {
4779 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4780 return;
4781 }
4782
b331163b
A
4783 /* clear the reordering using [NONE] */
4784 ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status);
4785 if (U_FAILURE(status)) {
4786 log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", myErrorName(status));
4787 return;
4788 }
4789
4790 /* get the reordering again */
4791 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4792 if (retrievedReorderCodesLength != 0) {
4793 log_err_status(status,
4794 "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",
4795 retrievedReorderCodesLength);
4796 return;
4797 }
4798
4388f060 4799 /* test for error condition on duplicate reorder codes */
2ca993e8 4800 ucol_setReorderCodes(myCollation, duplicateReorderCodes, UPRV_LENGTHOF(duplicateReorderCodes), &status);
4388f060
A
4801 if (!U_FAILURE(status)) {
4802 log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
4803 return;
4804 }
4805
4806 status = U_ZERO_ERROR;
4807 /* test for reorder codes after a reset code */
2ca993e8 4808 ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, UPRV_LENGTHOF(reorderCodesStartingWithDefault), &status);
4388f060
A
4809 if (!U_FAILURE(status)) {
4810 log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
4811 return;
4812 }
4813
729e4ab9
A
4814 ucol_close(myCollation);
4815}
4816
4817/*
4388f060 4818 * Test reordering API.
729e4ab9 4819 */
4388f060 4820static void TestReorderingAPIWithRuleCreatedCollator(void)
729e4ab9 4821{
729e4ab9
A
4822 UErrorCode status = U_ZERO_ERROR;
4823 UCollator *myCollation;
4388f060 4824 UChar rules[90];
57a6839d
A
4825 static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
4826 static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4827 static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};
4388f060
A
4828 UCollationResult collResult;
4829 int32_t retrievedReorderCodesLength;
4830 int32_t retrievedReorderCodes[10];
57a6839d
A
4831 static const UChar greekString[] = { 0x03b1 };
4832 static const UChar punctuationString[] = { 0x203e };
4833 static const UChar hanString[] = { 0x65E5, 0x672C };
4388f060 4834 int loopIndex;
729e4ab9 4835
4388f060
A
4836 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4837
4838 /* build collator from rules */
4839 u_uastrcpy(rules, "[reorder Hani Grek]");
4840 myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
4841 if(U_FAILURE(status)) {
4842 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4843 return;
4844 }
4845
4846 /* get the reordering */
2ca993e8 4847 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4388f060
A
4848 if (U_FAILURE(status)) {
4849 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4850 return;
4851 }
2ca993e8
A
4852 if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4853 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
4388f060
A
4854 return;
4855 }
4856 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4857 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4858 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
729e4ab9
A
4859 return;
4860 }
4388f060 4861 }
2ca993e8 4862 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), hanString, UPRV_LENGTHOF(hanString));
4388f060 4863 if (collResult != UCOL_GREATER) {
57a6839d 4864 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4388f060
A
4865 return;
4866 }
4388f060 4867
57a6839d 4868 /* set the reordering */
2ca993e8 4869 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4388f060
A
4870 if (U_FAILURE(status)) {
4871 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4872 return;
4873 }
57a6839d 4874
4388f060
A
4875 /* get the reordering */
4876 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4877 if (status != U_BUFFER_OVERFLOW_ERROR) {
4878 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4879 return;
4880 }
4881 status = U_ZERO_ERROR;
2ca993e8
A
4882 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4883 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4388f060
A
4884 return;
4885 }
4886 /* now let's really get it */
2ca993e8 4887 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4388f060
A
4888 if (U_FAILURE(status)) {
4889 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4890 return;
4891 }
2ca993e8
A
4892 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4893 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4388f060
A
4894 return;
4895 }
4896 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4897 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4898 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
729e4ab9
A
4899 return;
4900 }
4388f060 4901 }
2ca993e8 4902 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4388f060
A
4903 if (collResult != UCOL_LESS) {
4904 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4905 return;
4906 }
57a6839d 4907
4388f060
A
4908 /* clear the reordering */
4909 ucol_setReorderCodes(myCollation, NULL, 0, &status);
4910 if (U_FAILURE(status)) {
4911 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4912 return;
4913 }
4914
4915 /* get the reordering again */
4916 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4917 if (retrievedReorderCodesLength != 0) {
4918 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4919 return;
4920 }
4921
2ca993e8 4922 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4388f060
A
4923 if (collResult != UCOL_GREATER) {
4924 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4925 return;
4926 }
4927
57a6839d
A
4928 /* reset the reordering */
4929 ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);
4930 if (U_FAILURE(status)) {
4931 log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));
4932 return;
4933 }
2ca993e8 4934 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
57a6839d
A
4935 if (U_FAILURE(status)) {
4936 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4937 return;
4938 }
2ca993e8
A
4939 if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4940 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
57a6839d
A
4941 return;
4942 }
4943 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4944 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4945 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4946 return;
4947 }
4948 }
4949
4388f060
A
4950 ucol_close(myCollation);
4951}
4952
b331163b
A
4953static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int32_t expectedScript) {
4954 int32_t i;
4955 for (i = 0; i < length; ++i) {
4956 if (expectedScript == scripts[i]) { return TRUE; }
4957 }
4958 return FALSE;
4388f060
A
4959}
4960
4961static void TestEquivalentReorderingScripts(void) {
b331163b
A
4962 // Beginning with ICU 55, collation reordering moves single scripts
4963 // rather than groups of scripts,
4964 // except where scripts share a range and sort primary-equal.
4388f060 4965 UErrorCode status = U_ZERO_ERROR;
b331163b
A
4966 int32_t equivalentScripts[100];
4967 int32_t length;
4968 int i;
4969 int32_t prevScript;
4970 /* These scripts are expected to be equivalent. */
4971 static const int32_t expectedScripts[] = {
4972 USCRIPT_HIRAGANA,
4973 USCRIPT_KATAKANA,
4974 USCRIPT_KATAKANA_OR_HIRAGANA
4388f060
A
4975 };
4976
b331163b
A
4977 equivalentScripts[0] = 0;
4978 length = ucol_getEquivalentReorderCodes(
2ca993e8 4979 USCRIPT_GOTHIC, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4388f060 4980 if (U_FAILURE(status)) {
b331163b 4981 log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4388f060
A
4982 return;
4983 }
b331163b
A
4984 if (length != 1 || equivalentScripts[0] != USCRIPT_GOTHIC) {
4985 log_err("ERROR/Gothic: retrieved equivalent scripts wrong: "
4986 "length expected 1, was = %d; expected [%d] was [%d]\n",
4987 length, USCRIPT_GOTHIC, equivalentScripts[0]);
4388f060 4988 }
b331163b
A
4989
4990 length = ucol_getEquivalentReorderCodes(
2ca993e8 4991 USCRIPT_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
b331163b
A
4992 if (U_FAILURE(status)) {
4993 log_err_status(status, "ERROR/Hiragana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4388f060
A
4994 return;
4995 }
2ca993e8 4996 if (length != UPRV_LENGTHOF(expectedScripts)) {
b331163b
A
4997 log_err("ERROR/Hiragana: retrieved equivalent script length wrong: "
4998 "expected %d, was = %d\n",
2ca993e8 4999 UPRV_LENGTHOF(expectedScripts), length);
b331163b
A
5000 }
5001 prevScript = -1;
5002 for (i = 0; i < length; ++i) {
5003 int32_t script = equivalentScripts[i];
5004 if (script <= prevScript) {
5005 log_err("ERROR/Hiragana: equivalent scripts out of order at index %d\n", i);
5006 }
5007 prevScript = script;
5008 }
2ca993e8 5009 for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
b331163b
A
5010 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5011 log_err("ERROR/Hiragana: equivalent scripts do not contain %d\n",
5012 expectedScripts[i]);
4388f060
A
5013 }
5014 }
5015
b331163b 5016 length = ucol_getEquivalentReorderCodes(
2ca993e8 5017 USCRIPT_KATAKANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4388f060 5018 if (U_FAILURE(status)) {
b331163b 5019 log_err_status(status, "ERROR/Katakana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4388f060
A
5020 return;
5021 }
2ca993e8 5022 if (length != UPRV_LENGTHOF(expectedScripts)) {
b331163b
A
5023 log_err("ERROR/Katakana: retrieved equivalent script length wrong: "
5024 "expected %d, was = %d\n",
2ca993e8 5025 UPRV_LENGTHOF(expectedScripts), length);
4388f060 5026 }
2ca993e8 5027 for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
b331163b
A
5028 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5029 log_err("ERROR/Katakana: equivalent scripts do not contain %d\n",
5030 expectedScripts[i]);
729e4ab9 5031 }
729e4ab9 5032 }
b331163b
A
5033
5034 length = ucol_getEquivalentReorderCodes(
2ca993e8
A
5035 USCRIPT_KATAKANA_OR_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5036 if (U_FAILURE(status) || length != UPRV_LENGTHOF(expectedScripts)) {
b331163b
A
5037 log_err("ERROR/Hrkt: retrieved equivalent script length wrong: "
5038 "expected %d, was = %d\n",
2ca993e8 5039 UPRV_LENGTHOF(expectedScripts), length);
b331163b
A
5040 }
5041
5042 length = ucol_getEquivalentReorderCodes(
2ca993e8 5043 USCRIPT_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
b331163b
A
5044 if (U_FAILURE(status) || length != 3) {
5045 log_err("ERROR/Hani: retrieved equivalent script length wrong: "
5046 "expected 3, was = %d\n", length);
5047 }
5048 length = ucol_getEquivalentReorderCodes(
2ca993e8 5049 USCRIPT_SIMPLIFIED_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
b331163b
A
5050 if (U_FAILURE(status) || length != 3) {
5051 log_err("ERROR/Hans: retrieved equivalent script length wrong: "
5052 "expected 3, was = %d\n", length);
5053 }
5054 length = ucol_getEquivalentReorderCodes(
2ca993e8 5055 USCRIPT_TRADITIONAL_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
b331163b
A
5056 if (U_FAILURE(status) || length != 3) {
5057 log_err("ERROR/Hant: retrieved equivalent script length wrong: "
5058 "expected 3, was = %d\n", length);
5059 }
5060
5061 length = ucol_getEquivalentReorderCodes(
2ca993e8 5062 USCRIPT_MEROITIC_CURSIVE, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
b331163b
A
5063 if (U_FAILURE(status) || length != 2) {
5064 log_err("ERROR/Merc: retrieved equivalent script length wrong: "
5065 "expected 2, was = %d\n", length);
5066 }
5067 length = ucol_getEquivalentReorderCodes(
2ca993e8 5068 USCRIPT_MEROITIC_HIEROGLYPHS, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
b331163b
A
5069 if (U_FAILURE(status) || length != 2) {
5070 log_err("ERROR/Mero: retrieved equivalent script length wrong: "
5071 "expected 2, was = %d\n", length);
5072 }
729e4ab9
A
5073}
5074
4388f060
A
5075static void TestReorderingAcrossCloning(void)
5076{
5077 UErrorCode status = U_ZERO_ERROR;
5078 UCollator *myCollation;
5079 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
5080 UCollator *clonedCollation;
4388f060
A
5081 int32_t retrievedReorderCodesLength;
5082 int32_t retrievedReorderCodes[10];
5083 int loopIndex;
5084
5085 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5086
5087 /* build collator tertiary */
5088 myCollation = ucol_open("", &status);
5089 ucol_setStrength(myCollation, UCOL_TERTIARY);
5090 if(U_FAILURE(status)) {
5091 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5092 return;
5093 }
5094
5095 /* set the reorderding */
2ca993e8 5096 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4388f060
A
5097 if (U_FAILURE(status)) {
5098 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5099 return;
5100 }
5101
5102 /* clone the collator */
57a6839d 5103 clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);
4388f060
A
5104 if (U_FAILURE(status)) {
5105 log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
5106 return;
5107 }
5108
5109 /* get the reordering */
2ca993e8 5110 retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4388f060
A
5111 if (U_FAILURE(status)) {
5112 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
5113 return;
5114 }
2ca993e8
A
5115 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
5116 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4388f060
A
5117 return;
5118 }
5119 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
5120 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
5121 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
5122 return;
5123 }
5124 }
5125
5126 /*uprv_free(buffer);*/
5127 ucol_close(myCollation);
5128 ucol_close(clonedCollation);
5129}
5130
5131/*
5132 * Utility function to test one collation reordering test case set.
5133 * @param testcases Array of test cases.
5134 * @param n_testcases Size of the array testcases.
5135 * @param reorderTokens Array of reordering codes.
5136 * @param reorderTokensLen Size of the array reorderTokens.
5137 */
5138static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
5139{
5140 uint32_t testCaseNum;
5141 UErrorCode status = U_ZERO_ERROR;
5142 UCollator *myCollation;
5143
5144 myCollation = ucol_open("", &status);
5145 if (U_FAILURE(status)) {
5146 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5147 return;
5148 }
5149 ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
5150 if(U_FAILURE(status)) {
5151 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5152 return;
5153 }
5154
5155 for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
5156 doTest(myCollation,
5157 testCases[testCaseNum].source,
5158 testCases[testCaseNum].target,
5159 testCases[testCaseNum].result
5160 );
5161 }
5162 ucol_close(myCollation);
5163}
5164
729e4ab9
A
5165static void TestGreekFirstReorder(void)
5166{
5167 const char* strRules[] = {
5168 "[reorder Grek]"
5169 };
5170
5171 const int32_t apiRules[] = {
5172 USCRIPT_GREEK
5173 };
5174
5175 const static OneTestCase privateUseCharacterStrings[] = {
5176 { {0x0391}, {0x0391}, UCOL_EQUAL },
5177 { {0x0041}, {0x0391}, UCOL_GREATER },
5178 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
5179 { {0x0060}, {0x0391}, UCOL_LESS },
5180 { {0x0391}, {0xe2dc}, UCOL_LESS },
5181 { {0x0391}, {0x0060}, UCOL_GREATER },
5182 };
5183
5184 /* Test rules creation */
2ca993e8 5185 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
5186
5187 /* Test collation reordering API */
2ca993e8 5188 doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
729e4ab9
A
5189}
5190
5191static void TestGreekLastReorder(void)
5192{
5193 const char* strRules[] = {
5194 "[reorder Zzzz Grek]"
5195 };
5196
5197 const int32_t apiRules[] = {
5198 USCRIPT_UNKNOWN, USCRIPT_GREEK
5199 };
5200
5201 const static OneTestCase privateUseCharacterStrings[] = {
5202 { {0x0391}, {0x0391}, UCOL_EQUAL },
5203 { {0x0041}, {0x0391}, UCOL_LESS },
5204 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
5205 { {0x0060}, {0x0391}, UCOL_LESS },
5206 { {0x0391}, {0xe2dc}, UCOL_GREATER },
5207 };
5208
5209 /* Test rules creation */
2ca993e8 5210 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
5211
5212 /* Test collation reordering API */
2ca993e8 5213 doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
729e4ab9
A
5214}
5215
5216static void TestNonScriptReorder(void)
5217{
5218 const char* strRules[] = {
5219 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
5220 };
5221
5222 const int32_t apiRules[] = {
5223 USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
5224 UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
5225 UCOL_REORDER_CODE_CURRENCY
5226 };
5227
5228 const static OneTestCase privateUseCharacterStrings[] = {
5229 { {0x0391}, {0x0041}, UCOL_LESS },
5230 { {0x0041}, {0x0391}, UCOL_GREATER },
5231 { {0x0060}, {0x0041}, UCOL_LESS },
5232 { {0x0060}, {0x0391}, UCOL_GREATER },
5233 { {0x0024}, {0x0041}, UCOL_GREATER },
5234 };
5235
5236 /* Test rules creation */
2ca993e8 5237 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
5238
5239 /* Test collation reordering API */
2ca993e8 5240 doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
729e4ab9
A
5241}
5242
5243static void TestHaniReorder(void)
5244{
5245 const char* strRules[] = {
5246 "[reorder Hani]"
5247 };
5248 const int32_t apiRules[] = {
5249 USCRIPT_HAN
5250 };
5251
5252 const static OneTestCase privateUseCharacterStrings[] = {
5253 { {0x4e00}, {0x0041}, UCOL_LESS },
5254 { {0x4e00}, {0x0060}, UCOL_GREATER },
5255 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5256 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5257 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5258 { {0xfa27}, {0x0041}, UCOL_LESS },
5259 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5260 };
5261
5262 /* Test rules creation */
2ca993e8 5263 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
5264
5265 /* Test collation reordering API */
2ca993e8 5266 doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
729e4ab9
A
5267}
5268
4388f060
A
5269static void TestHaniReorderWithOtherRules(void)
5270{
5271 const char* strRules[] = {
5272 "[reorder Hani] &b<a"
5273 };
51004dcb 5274 /*const int32_t apiRules[] = {
4388f060 5275 USCRIPT_HAN
51004dcb 5276 };*/
4388f060
A
5277
5278 const static OneTestCase privateUseCharacterStrings[] = {
5279 { {0x4e00}, {0x0041}, UCOL_LESS },
5280 { {0x4e00}, {0x0060}, UCOL_GREATER },
5281 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5282 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5283 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5284 { {0xfa27}, {0x0041}, UCOL_LESS },
5285 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5286 { {0x0062}, {0x0061}, UCOL_LESS },
5287 };
5288
5289 /* Test rules creation */
2ca993e8 5290 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4388f060
A
5291}
5292
5293static void TestMultipleReorder(void)
729e4ab9
A
5294{
5295 const char* strRules[] = {
5296 "[reorder Grek Zzzz DIGIT Latn Hani]"
5297 };
5298
5299 const int32_t apiRules[] = {
5300 USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
5301 };
5302
5303 const static OneTestCase collationTestCases[] = {
5304 { {0x0391}, {0x0041}, UCOL_LESS},
5305 { {0x0031}, {0x0041}, UCOL_LESS},
5306 { {0x0041}, {0x4e00}, UCOL_LESS},
5307 };
5308
5309 /* Test rules creation */
2ca993e8 5310 doTestOneTestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
5311
5312 /* Test collation reordering API */
2ca993e8 5313 doTestOneReorderingAPITestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), apiRules, UPRV_LENGTHOF(apiRules));
729e4ab9
A
5314}
5315
4388f060
A
5316/*
5317 * Test that covers issue reported in ticket 8814
5318 */
51004dcb 5319static void TestReorderWithNumericCollation(void)
4388f060
A
5320{
5321 UErrorCode status = U_ZERO_ERROR;
5322 UCollator *myCollation;
5323 UCollator *myReorderCollation;
5324 int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
5325 /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
5326 UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
5327 UChar fortyS[] = { 0x0053 };
5328 UChar fortyThreeP[] = { 0x0050 };
5329 uint8_t fortyS_sortKey[128];
5330 int32_t fortyS_sortKey_Length;
5331 uint8_t fortyThreeP_sortKey[128];
5332 int32_t fortyThreeP_sortKey_Length;
5333 uint8_t fortyS_sortKey_reorder[128];
5334 int32_t fortyS_sortKey_reorder_Length;
5335 uint8_t fortyThreeP_sortKey_reorder[128];
5336 int32_t fortyThreeP_sortKey_reorder_Length;
5337 UCollationResult collResult;
5338 UCollationResult collResultReorder;
4388f060
A
5339
5340 log_verbose("Testing reordering with and without numeric collation\n");
5341
5342 /* build collator tertiary with numeric */
5343 myCollation = ucol_open("", &status);
5344 /*
5345 ucol_setStrength(myCollation, UCOL_TERTIARY);
5346 */
5347 ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5348 if(U_FAILURE(status)) {
5349 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5350 return;
5351 }
5352
5353 /* build collator tertiary with numeric and reordering */
5354 myReorderCollation = ucol_open("", &status);
5355 /*
5356 ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
5357 */
5358 ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
2ca993e8 5359 ucol_setReorderCodes(myReorderCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4388f060
A
5360 if(U_FAILURE(status)) {
5361 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5362 return;
5363 }
5364
2ca993e8
A
5365 fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey, 128);
5366 fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey, 128);
5367 fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey_reorder, 128);
5368 fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
4388f060
A
5369
5370 if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
5371 log_err_status(status, "ERROR: couldn't generate sort keys\n");
5372 return;
5373 }
2ca993e8
A
5374 collResult = ucol_strcoll(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
5375 collResultReorder = ucol_strcoll(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
4388f060
A
5376 /*
5377 fprintf(stderr, "\tcollResult = %x\n", collResult);
5378 fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
5379 fprintf(stderr, "\nfortyS\n");
5380 for (i = 0; i < fortyS_sortKey_Length; i++) {
5381 fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
5382 }
5383 fprintf(stderr, "\nfortyThreeP\n");
5384 for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
5385 fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
5386 }
5387 */
5388 if (collResult != collResultReorder) {
5389 log_err_status(status, "ERROR: collation results should have been the same.\n");
5390 return;
5391 }
5392
5393 ucol_close(myCollation);
5394 ucol_close(myReorderCollation);
5395}
5396
729e4ab9
A
5397static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
5398{
5399 for (; *a == *b; ++a, ++b) {
5400 if (*a == 0) {
5401 return 0;
5402 }
5403 }
5404 return (*a < *b ? -1 : 1);
5405}
5406
4388f060
A
5407static void TestImportRulesDeWithPhonebook(void)
5408{
5409 const char* normalRules[] = {
5410 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
5411 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
5412 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
5413 };
5414 const OneTestCase normalTests[] = {
5415 { {0x00e6}, {0x00c6}, UCOL_LESS},
5416 { {0x00fc}, {0x00dc}, UCOL_GREATER},
5417 };
5418
5419 const char* importRules[] = {
5420 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
5421 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5422 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5423 };
5424 const OneTestCase importTests[] = {
5425 { {0x00e6}, {0x00c6}, UCOL_LESS},
5426 { {0x00fc}, {0x00dc}, UCOL_LESS},
5427 };
5428
2ca993e8
A
5429 doTestOneTestCase(normalTests, UPRV_LENGTHOF(normalTests), normalRules, UPRV_LENGTHOF(normalRules));
5430 doTestOneTestCase(importTests, UPRV_LENGTHOF(importTests), importRules, UPRV_LENGTHOF(importRules));
4388f060
A
5431}
5432
51004dcb 5433#if 0
4388f060
A
5434static void TestImportRulesFiWithEor(void)
5435{
5436 /* DUCET. */
5437 const char* defaultRules[] = {
5438 "&a<b", /* Dummy rule. */
5439 };
5440
5441 const OneTestCase defaultTests[] = {
5442 { {0x0110}, {0x00F0}, UCOL_LESS},
5443 { {0x00a3}, {0x00a5}, UCOL_LESS},
5444 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5445 };
5446
5447 /* European Ordering rules: ignore currency characters. */
5448 const char* eorRules[] = {
5449 "[import root-u-co-eor]",
5450 };
5451
5452 const OneTestCase eorTests[] = {
5453 { {0x0110}, {0x00F0}, UCOL_LESS},
5454 { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5455 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5456 };
5457
5458 const char* fiStdRules[] = {
5459 "[import fi-u-co-standard]",
5460 };
5461
5462 const OneTestCase fiStdTests[] = {
5463 { {0x0110}, {0x00F0}, UCOL_GREATER},
5464 { {0x00a3}, {0x00a5}, UCOL_LESS},
5465 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5466 };
5467
5468 /* Both European Ordering Rules and Fi Standard Rules. */
5469 const char* eorFiStdRules[] = {
5470 "[import root-u-co-eor][import fi-u-co-standard]",
5471 };
5472
5473 /* This is essentially same as the one before once fi.txt is updated with import. */
5474 const char* fiEorRules[] = {
5475 "[import fi-u-co-eor]",
5476 };
5477
5478 const OneTestCase fiEorTests[] = {
5479 { {0x0110}, {0x00F0}, UCOL_GREATER},
5480 { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5481 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5482 };
5483
2ca993e8
A
5484 doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5485 doTestOneTestCase(eorTests, UPRV_LENGTHOF(eorTests), eorRules, UPRV_LENGTHOF(eorRules));
5486 doTestOneTestCase(fiStdTests, UPRV_LENGTHOF(fiStdTests), fiStdRules, UPRV_LENGTHOF(fiStdRules));
5487 doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), eorFiStdRules, UPRV_LENGTHOF(eorFiStdRules));
4388f060 5488
57a6839d 5489 log_knownIssue("8962", NULL);
4388f060
A
5490 /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
5491 eor{
5492 Sequence{
5493 "[import root-u-co-eor][import fi-u-co-standard]"
5494 }
5495 Version{"21.0"}
5496 }
5497 */
2ca993e8 5498 /* doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), fiEorRules, UPRV_LENGTHOF(fiEorRules)); */
4388f060
A
5499
5500}
51004dcb 5501#endif
4388f060
A
5502
5503#if 0
5504/*
5505 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
5506 * the resource files are built with -includeUnihanColl option.
5507 * TODO: Uncomment this function and make it work when unihan rules are built by default.
5508 */
5509static void TestImportRulesCJKWithUnihan(void)
5510{
5511 /* DUCET. */
5512 const char* defaultRules[] = {
5513 "&a<b", /* Dummy rule. */
5514 };
5515
5516 const OneTestCase defaultTests[] = {
5517 { {0x3402}, {0x4e1e}, UCOL_GREATER},
5518 };
5519
5520 /* European Ordering rules: ignore currency characters. */
5521 const char* unihanRules[] = {
5522 "[import ko-u-co-unihan]",
5523 };
5524
5525 const OneTestCase unihanTests[] = {
5526 { {0x3402}, {0x4e1e}, UCOL_LESS},
5527 };
5528
2ca993e8
A
5529 doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5530 doTestOneTestCase(unihanTests, UPRV_LENGTHOF(unihanTests), unihanRules, UPRV_LENGTHOF(unihanRules));
4388f060
A
5531
5532}
5533#endif
5534
729e4ab9
A
5535static void TestImport(void)
5536{
5537 UCollator* vicoll;
5538 UCollator* escoll;
5539 UCollator* viescoll;
5540 UCollator* importviescoll;
5541 UParseError error;
5542 UErrorCode status = U_ZERO_ERROR;
5543 UChar* virules;
5544 int32_t viruleslength;
5545 UChar* esrules;
5546 int32_t esruleslength;
5547 UChar* viesrules;
5548 int32_t viesruleslength;
5549 char srules[500] = "[import vi][import es]";
5550 UChar rules[500];
5551 uint32_t length = 0;
5552 int32_t itemCount;
5553 int32_t i, k;
5554 UChar32 start;
5555 UChar32 end;
5556 UChar str[500];
5557 int32_t strLength;
5558
5559 uint8_t sk1[500];
5560 uint8_t sk2[500];
5561
5562 UBool b;
5563 USet* tailoredSet;
5564 USet* importTailoredSet;
5565
5566
5567 vicoll = ucol_open("vi", &status);
5568 if(U_FAILURE(status)){
5569 log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
5570 return;
5571 }
5572
5573 virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
b331163b
A
5574 if(viruleslength == 0) {
5575 log_data_err("missing vi tailoring rule string\n");
5576 ucol_close(vicoll);
5577 return;
5578 }
729e4ab9
A
5579 escoll = ucol_open("es", &status);
5580 esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
5581 viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
5582 viesrules[0] = 0;
5583 u_strcat(viesrules, virules);
5584 u_strcat(viesrules, esrules);
5585 viesruleslength = viruleslength + esruleslength;
5586 viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5587
5588 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5589 length = u_unescape(srules, rules, 500);
5590 importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5591 if(U_FAILURE(status)){
5592 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5593 return;
5594 }
5595
5596 tailoredSet = ucol_getTailoredSet(viescoll, &status);
5597 importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
5598
5599 if(!uset_equals(tailoredSet, importTailoredSet)){
5600 log_err("Tailored sets not equal");
5601 }
5602
5603 uset_close(importTailoredSet);
5604
5605 itemCount = uset_getItemCount(tailoredSet);
5606
5607 for( i = 0; i < itemCount; i++){
5608 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5609 if(strLength < 2){
5610 for (; start <= end; start++){
5611 k = 0;
5612 U16_APPEND(str, k, 500, start, b);
57a6839d 5613 (void)b; /* Suppress set but not used warning. */
729e4ab9
A
5614 ucol_getSortKey(viescoll, str, 1, sk1, 500);
5615 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
5616 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5617 log_err("Sort key for %s not equal\n", str);
5618 break;
5619 }
5620 }
5621 }else{
5622 ucol_getSortKey(viescoll, str, strLength, sk1, 500);
5623 ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
5624 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5625 log_err("ZZSort key for %s not equal\n", str);
5626 break;
5627 }
5628
5629 }
5630 }
5631
5632 uset_close(tailoredSet);
5633
5634 uprv_free(viesrules);
5635
5636 ucol_close(vicoll);
5637 ucol_close(escoll);
5638 ucol_close(viescoll);
5639 ucol_close(importviescoll);
5640}
5641
5642static void TestImportWithType(void)
5643{
5644 UCollator* vicoll;
5645 UCollator* decoll;
5646 UCollator* videcoll;
5647 UCollator* importvidecoll;
5648 UParseError error;
5649 UErrorCode status = U_ZERO_ERROR;
5650 const UChar* virules;
5651 int32_t viruleslength;
5652 const UChar* derules;
5653 int32_t deruleslength;
5654 UChar* viderules;
5655 int32_t videruleslength;
5656 const char srules[500] = "[import vi][import de-u-co-phonebk]";
5657 UChar rules[500];
5658 uint32_t length = 0;
5659 int32_t itemCount;
5660 int32_t i, k;
5661 UChar32 start;
5662 UChar32 end;
5663 UChar str[500];
5664 int32_t strLength;
5665
5666 uint8_t sk1[500];
5667 uint8_t sk2[500];
5668
5669 USet* tailoredSet;
5670 USet* importTailoredSet;
5671
5672 vicoll = ucol_open("vi", &status);
5673 if(U_FAILURE(status)){
5674 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5675 return;
5676 }
5677 virules = ucol_getRules(vicoll, &viruleslength);
b331163b
A
5678 if(viruleslength == 0) {
5679 log_data_err("missing vi tailoring rule string\n");
5680 ucol_close(vicoll);
5681 return;
5682 }
729e4ab9
A
5683 /* decoll = ucol_open("de@collation=phonebook", &status); */
5684 decoll = ucol_open("de-u-co-phonebk", &status);
5685 if(U_FAILURE(status)){
5686 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5687 return;
5688 }
5689
5690
5691 derules = ucol_getRules(decoll, &deruleslength);
5692 viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
5693 viderules[0] = 0;
5694 u_strcat(viderules, virules);
5695 u_strcat(viderules, derules);
5696 videruleslength = viruleslength + deruleslength;
5697 videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5698
5699 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5700 length = u_unescape(srules, rules, 500);
5701 importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5702 if(U_FAILURE(status)){
5703 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5704 return;
5705 }
5706
5707 tailoredSet = ucol_getTailoredSet(videcoll, &status);
5708 importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
5709
5710 if(!uset_equals(tailoredSet, importTailoredSet)){
5711 log_err("Tailored sets not equal");
5712 }
5713
5714 uset_close(importTailoredSet);
5715
5716 itemCount = uset_getItemCount(tailoredSet);
5717
5718 for( i = 0; i < itemCount; i++){
5719 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5720 if(strLength < 2){
5721 for (; start <= end; start++){
5722 k = 0;
5723 U16_APPEND_UNSAFE(str, k, start);
5724 ucol_getSortKey(videcoll, str, 1, sk1, 500);
5725 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
5726 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5727 log_err("Sort key for %s not equal\n", str);
5728 break;
5729 }
5730 }
5731 }else{
5732 ucol_getSortKey(videcoll, str, strLength, sk1, 500);
5733 ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
5734 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5735 log_err("Sort key for %s not equal\n", str);
5736 break;
5737 }
5738
5739 }
5740 }
5741
5742 uset_close(tailoredSet);
5743
5744 uprv_free(viderules);
5745
5746 ucol_close(videcoll);
5747 ucol_close(importvidecoll);
5748 ucol_close(vicoll);
5749 ucol_close(decoll);
4388f060
A
5750}
5751
5752/* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
5753static const UChar longUpperStr1[]= { /* 155 chars */
5754 0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
5755 0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
5756 0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
5757 0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
5758 0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
5759 0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
5760 0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
5761 0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
5762 0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
5763 0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
5764};
5765
5766/* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
5767static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
5768 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5769 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5770 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5771 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5772 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
5773};
5774
5775/* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
5776static const UChar longUpperStr3[]= { /* 324 chars */
5777 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5778 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5779 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5780 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5781 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5782 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5783 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5784 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5785 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5786 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5787 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5788 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
5789};
729e4ab9 5790
4388f060
A
5791typedef struct {
5792 const UChar * longUpperStrPtr;
5793 int32_t longUpperStrLen;
5794} LongUpperStrItem;
5795
5796/* String pointers must be in reverse collation order of the corresponding strings */
5797static const LongUpperStrItem longUpperStrItems[] = {
2ca993e8
A
5798 { longUpperStr1, UPRV_LENGTHOF(longUpperStr1) },
5799 { longUpperStr2, UPRV_LENGTHOF(longUpperStr2) },
5800 { longUpperStr3, UPRV_LENGTHOF(longUpperStr3) },
4388f060
A
5801 { NULL, 0 }
5802};
5803
57a6839d 5804enum { kCollKeyLenMax = 850 }; /* may change with collation changes */
4388f060
A
5805
5806/* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
5807static void TestCaseLevelBufferOverflow(void)
5808{
5809 UErrorCode status = U_ZERO_ERROR;
5810 UCollator * ucol = ucol_open("root", &status);
5811 if ( U_SUCCESS(status) ) {
5812 ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
5813 if ( U_SUCCESS(status) ) {
5814 const LongUpperStrItem * itemPtr;
5815 uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
5816 for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
5817 int32_t sortKeyLen;
5818 if (itemPtr > longUpperStrItems) {
5819 uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
5820 }
5821 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
5822 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
5823 log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
5824 break;
5825 }
5826 if ( itemPtr > longUpperStrItems ) {
5827 int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
5828 if (compareResult >= 0) {
5829 log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
5830 }
5831 }
5832 }
5833 } else {
5834 log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
5835 }
5836 ucol_close(ucol);
5837 } else {
5838 log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
5839 }
729e4ab9
A
5840}
5841
57a6839d
A
5842/* Test for #10595 */
5843static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
5844#define KEY_PART_SIZE 16
5845
5846static void TestNextSortKeyPartJaIdentical(void)
5847{
5848 UErrorCode status = U_ZERO_ERROR;
5849 UCollator *coll;
5850 uint8_t keyPart[KEY_PART_SIZE];
5851 UCharIterator iter;
5852 uint32_t state[2] = {0, 0};
5853 int32_t keyPartLen;
5854
5855 coll = ucol_open("ja", &status);
5856 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
5857 if (U_FAILURE(status)) {
5858 log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status));
5859 return;
5860 }
5861
5862 uiter_setString(&iter, testJapaneseName, 5);
5863 keyPartLen = KEY_PART_SIZE;
5864 while (keyPartLen == KEY_PART_SIZE) {
5865 keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status);
5866 if (U_FAILURE(status)) {
5867 log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status));
5868 break;
5869 }
5870 }
5871
5872 ucol_close(coll);
5873}
729e4ab9 5874
b75a7d8f
A
5875#define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
5876
5877void addMiscCollTest(TestNode** root)
5878{
374ca955
A
5879 TEST(TestRuleOptions);
5880 TEST(TestBeforePrefixFailure);
5881 TEST(TestContractionClosure);
5882 TEST(TestPrefixCompose);
5883 TEST(TestStrCollIdenticalPrefix);
5884 TEST(TestPrefix);
5885 TEST(TestNewJapanese);
5886 /*TEST(TestLimitations);*/
5887 TEST(TestNonChars);
5888 TEST(TestExtremeCompression);
5889 TEST(TestSurrogates);
5890 TEST(TestVariableTopSetting);
57a6839d 5891 TEST(TestMaxVariable);
374ca955
A
5892 TEST(TestBocsuCoverage);
5893 TEST(TestCyrillicTailoring);
5894 TEST(TestCase);
5895 TEST(IncompleteCntTest);
5896 TEST(BlackBirdTest);
5897 TEST(FunkyATest);
5898 TEST(BillFairmanTest);
374ca955
A
5899 TEST(TestChMove);
5900 TEST(TestImplicitTailoring);
5901 TEST(TestFCDProblem);
5902 TEST(TestEmptyRule);
5903 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
5904 TEST(TestJ815);
0f5d89e8 5905 TEST(TestUpperCaseFirst);
374ca955 5906 TEST(TestBefore);
374ca955
A
5907 TEST(TestHangulTailoring);
5908 TEST(TestUCARules);
5909 TEST(TestIncrementalNormalize);
5910 TEST(TestComposeDecompose);
5911 TEST(TestCompressOverlap);
5912 TEST(TestContraction);
5913 TEST(TestExpansion);
5914 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
5915 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
b75a7d8f
A
5916 TEST(TestOptimize);
5917 TEST(TestSuppressContractions);
5918 TEST(Alexis2);
5919 TEST(TestHebrewUCA);
5920 TEST(TestPartialSortKeyTermination);
5921 TEST(TestSettings);
5922 TEST(TestEquals);
5923 TEST(TestJ2726);
374ca955
A
5924 TEST(NullRule);
5925 TEST(TestNumericCollation);
5926 TEST(TestTibetanConformance);
5927 TEST(TestPinyinProblem);
374ca955
A
5928 TEST(TestSeparateTrees);
5929 TEST(TestBeforePinyin);
5930 TEST(TestBeforeTightening);
5931 /*TEST(TestMoreBefore);*/
5932 TEST(TestTailorNULL);
73c04bcf
A
5933 TEST(TestUpperFirstQuaternary);
5934 TEST(TestJ4960);
5935 TEST(TestJ5223);
5936 TEST(TestJ5232);
46f4442e
A
5937 TEST(TestJ5367);
5938 TEST(TestHiragana);
5939 TEST(TestSortKeyConsistency);
5940 TEST(TestVI5913); /* VI, RO tailored rules */
5941 TEST(TestCroatianSortKey);
5942 TEST(TestTailor6179);
5943 TEST(TestUCAPrecontext);
5944 TEST(TestOutOfBuffer5468);
729e4ab9
A
5945 TEST(TestSameStrengthList);
5946
5947 TEST(TestSameStrengthListQuoted);
5948 TEST(TestSameStrengthListSupplemental);
5949 TEST(TestSameStrengthListQwerty);
5950 TEST(TestSameStrengthListQuotedQwerty);
5951 TEST(TestSameStrengthListRanges);
5952 TEST(TestSameStrengthListSupplementalRanges);
5953 TEST(TestSpecialCharacters);
5954 TEST(TestPrivateUseCharacters);
5955 TEST(TestPrivateUseCharactersInList);
5956 TEST(TestPrivateUseCharactersInRange);
5957 TEST(TestInvalidListsAndRanges);
4388f060
A
5958 TEST(TestImportRulesDeWithPhonebook);
5959 /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
5960 /* TEST(TestImportRulesCJKWithUnihan); */
729e4ab9
A
5961 TEST(TestImport);
5962 TEST(TestImportWithType);
5963
5964 TEST(TestBeforeRuleWithScriptReordering);
5965 TEST(TestNonLeadBytesDuringCollationReordering);
5966 TEST(TestReorderingAPI);
4388f060
A
5967 TEST(TestReorderingAPIWithRuleCreatedCollator);
5968 TEST(TestEquivalentReorderingScripts);
729e4ab9
A
5969 TEST(TestGreekFirstReorder);
5970 TEST(TestGreekLastReorder);
5971 TEST(TestNonScriptReorder);
5972 TEST(TestHaniReorder);
4388f060 5973 TEST(TestHaniReorderWithOtherRules);
729e4ab9 5974 TEST(TestMultipleReorder);
4388f060 5975 TEST(TestReorderingAcrossCloning);
51004dcb 5976 TEST(TestReorderWithNumericCollation);
4388f060
A
5977
5978 TEST(TestCaseLevelBufferOverflow);
57a6839d 5979 TEST(TestNextSortKeyPartJaIdentical);
b75a7d8f
A
5980}
5981
5982#endif /* #if !UCONFIG_NO_COLLATION */