]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/cmsccoll.c
ICU-64260.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / cmsccoll.c
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/********************************************************************
4 * COPYRIGHT:
2ca993e8 5 * Copyright (c) 2001-2016, International Business Machines Corporation and
b75a7d8f
A
6 * others. All Rights Reserved.
7 ********************************************************************/
8/*******************************************************************************
9*
10* File cmsccoll.C
11*
12*******************************************************************************/
13/**
14 * These are the tests specific to ICU 1.8 and above, that I didn't know where
15 * to fit.
16 */
17
18#include <stdio.h>
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_COLLATION
23
24#include "unicode/ucol.h"
25#include "unicode/ucoleitr.h"
26#include "unicode/uloc.h"
27#include "cintltst.h"
28#include "ccolltst.h"
29#include "callcoll.h"
30#include "unicode/ustring.h"
31#include "string.h"
32#include "ucol_imp.h"
b75a7d8f
A
33#include "cmemory.h"
34#include "cstring.h"
374ca955 35#include "uassert.h"
b75a7d8f
A
36#include "unicode/parseerr.h"
37#include "unicode/ucnv.h"
46f4442e 38#include "unicode/ures.h"
729e4ab9 39#include "unicode/uscript.h"
4388f060 40#include "unicode/utf16.h"
b75a7d8f 41#include "uparse.h"
46f4442e
A
42#include "putilimp.h"
43
b75a7d8f 44
374ca955 45#define MAX_TOKEN_LEN 16
b75a7d8f 46
46f4442e 47typedef UCollationResult tst_strcoll(void *collator, const int object,
b75a7d8f
A
48 const UChar *source, const int sLen,
49 const UChar *target, const int tLen);
50
51
b75a7d8f
A
52
53const static char cnt1[][10] = {
54
55 "AA",
56 "AC",
57 "AZ",
58 "AQ",
59 "AB",
60 "ABZ",
61 "ABQ",
62 "Z",
63 "ABC",
64 "Q",
65 "B"
66};
67
68const static char cnt2[][10] = {
69 "DA",
70 "DAD",
71 "DAZ",
72 "MAR",
73 "Z",
74 "DAVIS",
75 "MARK",
76 "DAV",
77 "DAVI"
78};
79
80static void IncompleteCntTest(void)
81{
82 UErrorCode status = U_ZERO_ERROR;
83 UChar temp[90];
84 UChar t1[90];
85 UChar t2[90];
86
87 UCollator *coll = NULL;
88 uint32_t i = 0, j = 0;
89 uint32_t size = 0;
90
91 u_uastrcpy(temp, " & Z < ABC < Q < B");
92
93 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
94
95 if(U_SUCCESS(status)) {
2ca993e8 96 size = UPRV_LENGTHOF(cnt1);
b75a7d8f
A
97 for(i = 0; i < size-1; i++) {
98 for(j = i+1; j < size; j++) {
99 UCollationElements *iter;
100 u_uastrcpy(t1, cnt1[i]);
101 u_uastrcpy(t2, cnt1[j]);
102 doTest(coll, t1, t2, UCOL_LESS);
103 /* synwee : added collation element iterator test */
104 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
105 if (U_FAILURE(status)) {
106 log_err("Creation of iterator failed\n");
107 break;
108 }
109 backAndForth(iter);
110 ucol_closeElements(iter);
111 }
112 }
113 }
114
115 ucol_close(coll);
116
117
118 u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
119 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
120
121 if(U_SUCCESS(status)) {
2ca993e8 122 size = UPRV_LENGTHOF(cnt2);
b75a7d8f
A
123 for(i = 0; i < size-1; i++) {
124 for(j = i+1; j < size; j++) {
125 UCollationElements *iter;
126 u_uastrcpy(t1, cnt2[i]);
127 u_uastrcpy(t2, cnt2[j]);
128 doTest(coll, t1, t2, UCOL_LESS);
129
130 /* synwee : added collation element iterator test */
131 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
132 if (U_FAILURE(status)) {
133 log_err("Creation of iterator failed\n");
134 break;
135 }
136 backAndForth(iter);
137 ucol_closeElements(iter);
138 }
139 }
140 }
141
142 ucol_close(coll);
143
144
145}
146
147const static char shifted[][20] = {
148 "black bird",
149 "black-bird",
150 "blackbird",
151 "black Bird",
152 "black-Bird",
153 "blackBird",
154 "black birds",
155 "black-birds",
156 "blackbirds"
157};
158
159const static UCollationResult shiftedTert[] = {
46f4442e 160 UCOL_EQUAL,
b75a7d8f
A
161 UCOL_EQUAL,
162 UCOL_EQUAL,
163 UCOL_LESS,
164 UCOL_EQUAL,
165 UCOL_EQUAL,
166 UCOL_LESS,
167 UCOL_EQUAL,
168 UCOL_EQUAL
169};
170
171const static char nonignorable[][20] = {
172 "black bird",
173 "black Bird",
174 "black birds",
175 "black-bird",
176 "black-Bird",
177 "black-birds",
178 "blackbird",
179 "blackBird",
180 "blackbirds"
181};
182
183static void BlackBirdTest(void) {
184 UErrorCode status = U_ZERO_ERROR;
185 UChar t1[90];
186 UChar t2[90];
187
188 uint32_t i = 0, j = 0;
189 uint32_t size = 0;
190 UCollator *coll = ucol_open("en_US", &status);
191
192 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
193 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
194
195 if(U_SUCCESS(status)) {
2ca993e8 196 size = UPRV_LENGTHOF(nonignorable);
b75a7d8f
A
197 for(i = 0; i < size-1; i++) {
198 for(j = i+1; j < size; j++) {
199 u_uastrcpy(t1, nonignorable[i]);
200 u_uastrcpy(t2, nonignorable[j]);
201 doTest(coll, t1, t2, UCOL_LESS);
202 }
203 }
204 }
205
206 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
207 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
208
209 if(U_SUCCESS(status)) {
2ca993e8 210 size = UPRV_LENGTHOF(shifted);
b75a7d8f
A
211 for(i = 0; i < size-1; i++) {
212 for(j = i+1; j < size; j++) {
213 u_uastrcpy(t1, shifted[i]);
214 u_uastrcpy(t2, shifted[j]);
215 doTest(coll, t1, t2, UCOL_LESS);
216 }
217 }
218 }
219
220 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
221 if(U_SUCCESS(status)) {
2ca993e8 222 size = UPRV_LENGTHOF(shifted);
b75a7d8f
A
223 for(i = 1; i < size; i++) {
224 u_uastrcpy(t1, shifted[i-1]);
225 u_uastrcpy(t2, shifted[i]);
226 doTest(coll, t1, t2, shiftedTert[i]);
227 }
228 }
229
230 ucol_close(coll);
231}
232
233const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
234 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
235 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
236 {0x0041/*'A'*/, 0x0300, 0x0000},
237 {0x00C0, 0x0301, 0x0000},
238 /* this would work with forced normalization */
239 {0x00C0, 0x0316, 0x0000}
240};
241
242const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
243 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
244 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
245 {0x00C0, 0},
246 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
247 /* this would work with forced normalization */
248 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
249};
250
251const static UCollationResult results[] = {
252 UCOL_GREATER,
253 UCOL_EQUAL,
254 UCOL_EQUAL,
255 UCOL_GREATER,
256 UCOL_EQUAL
257};
258
259static void FunkyATest(void)
260{
261
262 int32_t i;
263 UErrorCode status = U_ZERO_ERROR;
264 UCollator *myCollation;
265 myCollation = ucol_open("en_US", &status);
266 if(U_FAILURE(status)){
729e4ab9 267 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
b75a7d8f
A
268 return;
269 }
270 log_verbose("Testing some A letters, for some reason\n");
271 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
272 ucol_setStrength(myCollation, UCOL_TERTIARY);
273 for (i = 0; i < 4 ; i++)
274 {
275 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
276 }
277 ucol_close(myCollation);
278}
279
280UColAttributeValue caseFirst[] = {
281 UCOL_OFF,
282 UCOL_LOWER_FIRST,
283 UCOL_UPPER_FIRST
284};
285
286
287UColAttributeValue alternateHandling[] = {
288 UCOL_NON_IGNORABLE,
289 UCOL_SHIFTED
290};
291
292UColAttributeValue caseLevel[] = {
293 UCOL_OFF,
294 UCOL_ON
295};
296
297UColAttributeValue strengths[] = {
298 UCOL_PRIMARY,
299 UCOL_SECONDARY,
300 UCOL_TERTIARY,
301 UCOL_QUATERNARY,
302 UCOL_IDENTICAL
303};
304
305#if 0
306static const char * strengthsC[] = {
307 "UCOL_PRIMARY",
308 "UCOL_SECONDARY",
309 "UCOL_TERTIARY",
310 "UCOL_QUATERNARY",
311 "UCOL_IDENTICAL"
312};
313
314static const char * caseFirstC[] = {
315 "UCOL_OFF",
316 "UCOL_LOWER_FIRST",
317 "UCOL_UPPER_FIRST"
318};
319
320
321static const char * alternateHandlingC[] = {
322 "UCOL_NON_IGNORABLE",
323 "UCOL_SHIFTED"
324};
325
326static const char * caseLevelC[] = {
327 "UCOL_OFF",
328 "UCOL_ON"
329};
330
331/* not used currently - does not test only prints */
332static void PrintMarkDavis(void)
333{
334 UErrorCode status = U_ZERO_ERROR;
335 UChar m[256];
336 uint8_t sortkey[256];
337 UCollator *coll = ucol_open("en_US", &status);
338 uint32_t h,i,j,k, sortkeysize;
339 uint32_t sizem = 0;
340 char buffer[512];
341 uint32_t len = 512;
342
343 log_verbose("PrintMarkDavis");
344
345 u_uastrcpy(m, "Mark Davis");
346 sizem = u_strlen(m);
347
348
349 m[1] = 0xe4;
350
351 for(i = 0; i<sizem; i++) {
352 fprintf(stderr, "\\u%04X ", m[i]);
353 }
354 fprintf(stderr, "\n");
355
2ca993e8 356 for(h = 0; h<UPRV_LENGTHOF(caseFirst); h++) {
b75a7d8f
A
357 ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
358 fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
359
2ca993e8 360 for(i = 0; i<UPRV_LENGTHOF(alternateHandling); i++) {
b75a7d8f
A
361 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
362 fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]);
363
2ca993e8 364 for(j = 0; j<UPRV_LENGTHOF(caseLevel); j++) {
b75a7d8f
A
365 ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
366 fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]);
367
2ca993e8 368 for(k = 0; k<UPRV_LENGTHOF(strengths); k++) {
b75a7d8f
A
369 ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
370 sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
371 fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]);
372 fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
373 }
374
375 }
376
377 }
378
379 }
380}
381#endif
382
383static void BillFairmanTest(void) {
384/*
385** check for actual locale via ICU resource bundles
386**
387** lp points to the original locale ("fr_FR_....")
388*/
389
390 UResourceBundle *lr,*cr;
391 UErrorCode lec = U_ZERO_ERROR;
392 const char *lp = "fr_FR_you_ll_never_find_this_locale";
393
394 log_verbose("BillFairmanTest\n");
395
396 lr = ures_open(NULL,lp,&lec);
397 if (lr) {
374ca955 398 cr = ures_getByKey(lr,"collations",0,&lec);
b75a7d8f 399 if (cr) {
729e4ab9 400 lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
b75a7d8f
A
401 if (lp) {
402 if (U_SUCCESS(lec)) {
403 if(strcmp(lp, "fr") != 0) {
404 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
405 }
406 }
407 }
408 ures_close(cr);
409 }
410 ures_close(lr);
411 }
412}
413
b75a7d8f
A
414const static char chTest[][20] = {
415 "c",
416 "C",
417 "ca", "cb", "cx", "cy", "CZ",
418 "c\\u030C", "C\\u030C",
419 "h",
420 "H",
421 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
422 "ch", "cH", "Ch", "CH",
423 "cha", "charly", "che", "chh", "chch", "chr",
424 "i", "I", "iarly",
425 "r", "R",
426 "r\\u030C", "R\\u030C",
427 "s",
428 "S",
429 "s\\u030C", "S\\u030C",
430 "z", "Z",
431 "z\\u030C", "Z\\u030C"
432};
433
434static void TestChMove(void) {
46f4442e
A
435 UChar t1[256] = {0};
436 UChar t2[256] = {0};
b75a7d8f 437
46f4442e
A
438 uint32_t i = 0, j = 0;
439 uint32_t size = 0;
440 UErrorCode status = U_ZERO_ERROR;
b75a7d8f 441
46f4442e 442 UCollator *coll = ucol_open("cs", &status);
b75a7d8f 443
46f4442e 444 if(U_SUCCESS(status)) {
2ca993e8 445 size = UPRV_LENGTHOF(chTest);
46f4442e
A
446 for(i = 0; i < size-1; i++) {
447 for(j = i+1; j < size; j++) {
448 u_unescape(chTest[i], t1, 256);
449 u_unescape(chTest[j], t2, 256);
450 doTest(coll, t1, t2, UCOL_LESS);
451 }
452 }
b75a7d8f 453 }
46f4442e 454 else {
729e4ab9 455 log_data_err("Can't open collator");
46f4442e
A
456 }
457 ucol_close(coll);
b75a7d8f
A
458}
459
374ca955
A
460
461
462
57a6839d 463/*
b75a7d8f
A
464const static char impTest[][20] = {
465 "\\u4e00",
466 "a",
467 "A",
468 "b",
469 "B",
470 "\\u4e01"
471};
57a6839d 472*/
b75a7d8f
A
473
474
475static void TestImplicitTailoring(void) {
46f4442e 476 static const struct {
374ca955 477 const char *rules;
46f4442e 478 const char *data[10];
374ca955
A
479 const uint32_t len;
480 } tests[] = {
57a6839d
A
481 {
482 /* Tailor b and c before U+4E00. */
483 "&[before 1]\\u4e00 < b < c "
484 /* Now, before U+4E00 is c; put d and e after that. */
485 "&[before 1]\\u4e00 < d < e",
486 { "b", "c", "d", "e", "\\u4e00"}, 5 },
374ca955
A
487 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
488 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
489 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
490 };
491
492 int32_t i = 0;
493
2ca993e8 494 for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
374ca955
A
495 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
496 }
497
498/*
b75a7d8f
A
499 UChar t1[256] = {0};
500 UChar t2[256] = {0};
501
502 const char *rule = "&\\u4e00 < a <<< A < b <<< B";
503
504 uint32_t i = 0, j = 0;
505 uint32_t size = 0;
506 uint32_t ruleLen = 0;
507 UErrorCode status = U_ZERO_ERROR;
508 UCollator *coll = NULL;
509 ruleLen = u_unescape(rule, t1, 256);
510
511 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
512
513 if(U_SUCCESS(status)) {
2ca993e8 514 size = UPRV_LENGTHOF(impTest);
b75a7d8f
A
515 for(i = 0; i < size-1; i++) {
516 for(j = i+1; j < size; j++) {
517 u_unescape(impTest[i], t1, 256);
518 u_unescape(impTest[j], t2, 256);
519 doTest(coll, t1, t2, UCOL_LESS);
520 }
521 }
522 }
523 else {
524 log_err("Can't open collator");
525 }
526 ucol_close(coll);
374ca955 527 */
b75a7d8f
A
528}
529
530static void TestFCDProblem(void) {
531 UChar t1[256] = {0};
532 UChar t2[256] = {0};
533
534 const char *s1 = "\\u0430\\u0306\\u0325";
535 const char *s2 = "\\u04D1\\u0325";
536
537 UErrorCode status = U_ZERO_ERROR;
538 UCollator *coll = ucol_open("", &status);
539 u_unescape(s1, t1, 256);
540 u_unescape(s2, t2, 256);
541
542 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
543 doTest(coll, t1, t2, UCOL_EQUAL);
544
545 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
546 doTest(coll, t1, t2, UCOL_EQUAL);
547
548 ucol_close(coll);
549}
550
46f4442e
A
551/*
552The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
553We're only using NFC/NFD in this test.
554*/
555#define NORM_BUFFER_TEST_LEN 18
b75a7d8f
A
556typedef struct {
557 UChar32 u;
558 UChar NFC[NORM_BUFFER_TEST_LEN];
559 UChar NFD[NORM_BUFFER_TEST_LEN];
560} tester;
561
562static void TestComposeDecompose(void) {
46f4442e
A
563 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
564 static const UChar UNICODESET_STR[] = {
565 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
566 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
567 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
568 };
b75a7d8f
A
569 int32_t noOfLoc;
570 int32_t i = 0, j = 0;
571
572 UErrorCode status = U_ZERO_ERROR;
b75a7d8f 573 const char *locName = NULL;
b75a7d8f
A
574 uint32_t nfcSize;
575 uint32_t nfdSize;
576 tester **t;
577 uint32_t noCases = 0;
578 UCollator *coll = NULL;
579 UChar32 u = 0;
580 UChar comp[NORM_BUFFER_TEST_LEN];
581 uint32_t len = 0;
374ca955 582 UCollationElements *iter;
46f4442e
A
583 USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
584 int32_t charsToTestSize;
b75a7d8f
A
585
586 noOfLoc = uloc_countAvailable();
587
b75a7d8f 588 coll = ucol_open("", &status);
729e4ab9
A
589 if (U_FAILURE(status)) {
590 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
46f4442e 591 return;
b75a7d8f 592 }
46f4442e
A
593 charsToTestSize = uset_size(charsToTest);
594 if (charsToTestSize <= 0) {
595 log_err("Set was zero. Missing data?\n");
596 return;
597 }
4388f060 598 t = (tester **)malloc(charsToTestSize * sizeof(tester *));
46f4442e
A
599 t[0] = (tester *)malloc(sizeof(tester));
600 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
b75a7d8f 601
46f4442e
A
602 for(u = 0; u < charsToTestSize; u++) {
603 UChar32 ch = uset_charAt(charsToTest, u);
604 len = 0;
4388f060 605 U16_APPEND_UNSAFE(comp, len, ch);
b75a7d8f
A
606 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
607 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
608
374ca955 609 if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
b75a7d8f 610 || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
46f4442e 611 t[noCases]->u = ch;
b75a7d8f 612 if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
46f4442e
A
613 u_strncpy(t[noCases]->NFC, comp, len);
614 t[noCases]->NFC[len] = 0;
b75a7d8f
A
615 }
616 noCases++;
617 t[noCases] = (tester *)malloc(sizeof(tester));
618 uprv_memset(t[noCases], 0, sizeof(tester));
374ca955 619 }
b75a7d8f 620 }
46f4442e
A
621 log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
622 uset_close(charsToTest);
623 charsToTest = NULL;
b75a7d8f
A
624
625 for(u=0; u<(UChar32)noCases; u++) {
46f4442e
A
626 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
627 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
628 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
629 }
b75a7d8f
A
630 }
631 /*
46f4442e 632 for(u = 0; u < charsToTestSize; u++) {
b75a7d8f
A
633 if(!(u&0xFFFF)) {
634 log_verbose("%08X ", u);
635 }
636 uprv_memset(t[noCases], 0, sizeof(tester));
637 t[noCases]->u = u;
638 len = 0;
4388f060 639 U16_APPEND_UNSAFE(comp, len, u);
b75a7d8f
A
640 comp[len] = 0;
641 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
642 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
643 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
644 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
645 }
646 */
647
648 ucol_close(coll);
649
650 log_verbose("Testing locales, number of cases = %i\n", noCases);
651 for(i = 0; i<noOfLoc; i++) {
652 status = U_ZERO_ERROR;
653 locName = uloc_getAvailable(i);
654 if(hasCollationElements(locName)) {
655 char cName[256];
656 UChar name[256];
657 int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
658
659 for(j = 0; j<nameSize; j++) {
660 cName[j] = (char)name[j];
661 }
662 cName[nameSize] = 0;
663 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
664
665 coll = ucol_open(locName, &status);
666 ucol_setStrength(coll, UCOL_IDENTICAL);
374ca955 667 iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
b75a7d8f
A
668
669 for(u=0; u<(UChar32)noCases; u++) {
46f4442e
A
670 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
671 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
672 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
673 log_verbose("Testing NFC\n");
674 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
675 backAndForth(iter);
676 log_verbose("Testing NFD\n");
677 ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
678 backAndForth(iter);
679 }
b75a7d8f 680 }
374ca955 681 ucol_closeElements(iter);
b75a7d8f
A
682 ucol_close(coll);
683 }
684 }
685 for(u = 0; u <= (UChar32)noCases; u++) {
686 free(t[u]);
687 }
688 free(t);
689}
690
691static void TestEmptyRule(void) {
692 UErrorCode status = U_ZERO_ERROR;
693 UChar rulez[] = { 0 };
694 UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
695
696 ucol_close(coll);
697}
698
699static void TestUCARules(void) {
700 UErrorCode status = U_ZERO_ERROR;
701 UChar b[256];
702 UChar *rules = b;
703 uint32_t ruleLen = 0;
704 UCollator *UCAfromRules = NULL;
705 UCollator *coll = ucol_open("", &status);
706 if(status == U_FILE_ACCESS_ERROR) {
707 log_data_err("Is your data around?\n");
708 return;
709 } else if(U_FAILURE(status)) {
710 log_err("Error opening collator\n");
711 return;
712 }
713 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
714
715 log_verbose("TestUCARules\n");
716 if(ruleLen > 256) {
717 rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
718 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
719 }
720 log_verbose("Rules length is %d\n", ruleLen);
721 UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
722 if(U_SUCCESS(status)) {
723 ucol_close(UCAfromRules);
724 } else {
725 log_verbose("Unable to create a collator from UCARules!\n");
726 }
727/*
728 u_unescape(blah, b, 256);
729 ucol_getSortKey(coll, b, 1, res, 256);
730*/
731 ucol_close(coll);
732 if(rules != b) {
733 free(rules);
734 }
735}
736
737
738/* Pinyin tonal order */
739/*
740 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
741 (w/macron)< (w/acute)< (w/caron)< (w/grave)
742 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
743 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
744 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
745 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
746 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
747.. (\u00fc)
748
749However, in testing we got the following order:
750 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
751 (w/acute)< (w/grave)< (w/caron)< (w/macron)
752 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
753.. (\u0113)
754 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
755 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
756 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
757.. (\u01d8)
758 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
759*/
760
761static void TestBefore(void) {
762 const static char *data[] = {
763 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
764 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
765 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
766 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
767 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
768 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
769 };
770 genericRulesStarter(
771 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
772 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
773 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
774 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
775 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
776 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
2ca993e8 777 data, UPRV_LENGTHOF(data));
b75a7d8f
A
778}
779
57a6839d
A
780#if 0
781/* superceded by TestBeforePinyin */
782static void TestJ784(void) {
783 const static char *data[] = {
784 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
785 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
786 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
787 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
788 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
789 "\\u00fc",
790 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
b75a7d8f 791 };
2ca993e8 792 genericLocaleStarter("zh", data, UPRV_LENGTHOF(data));
57a6839d
A
793}
794#endif
b75a7d8f 795
0f5d89e8 796static void TestUpperCaseFirst(void) {
57a6839d
A
797 const static char *data[] = {
798 "I",
799 "i",
800 "Y",
801 "y"
b75a7d8f 802 };
0f5d89e8 803 genericLocaleStarter("da", data, UPRV_LENGTHOF(data));
57a6839d 804}
b75a7d8f 805
57a6839d
A
806static void TestJ815(void) {
807 const static char *data[] = {
808 "aa",
809 "Aa",
810 "ab",
811 "Ab",
812 "ad",
813 "Ad",
814 "ae",
815 "Ae",
816 "\\u00e6",
817 "\\u00c6",
818 "af",
819 "Af",
820 "b",
821 "B"
b75a7d8f 822 };
2ca993e8
A
823 genericLocaleStarter("fr", data, UPRV_LENGTHOF(data));
824 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, UPRV_LENGTHOF(data));
b75a7d8f
A
825}
826
57a6839d 827
b75a7d8f
A
828static void TestCase(void)
829{
830 const static UChar gRules[MAX_TOKEN_LEN] =
831 /*" & 0 < 1,\u2461<a,A"*/
832 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
833
834 const static UChar testCase[][MAX_TOKEN_LEN] =
835 {
836 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
837 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
838 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
839 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
840 };
841
842 const static UCollationResult caseTestResults[][9] =
843 {
46f4442e
A
844 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
845 { UCOL_GREATER, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
846 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
847 { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
b75a7d8f
A
848 };
849
850 const static UColAttributeValue caseTestAttributes[][2] =
851 {
46f4442e
A
852 { UCOL_LOWER_FIRST, UCOL_OFF},
853 { UCOL_UPPER_FIRST, UCOL_OFF},
854 { UCOL_LOWER_FIRST, UCOL_ON},
855 { UCOL_UPPER_FIRST, UCOL_ON}
b75a7d8f
A
856 };
857 int32_t i,j,k;
858 UErrorCode status = U_ZERO_ERROR;
374ca955 859 UCollationElements *iter;
b75a7d8f
A
860 UCollator *myCollation;
861 myCollation = ucol_open("en_US", &status);
374ca955 862
b75a7d8f 863 if(U_FAILURE(status)){
729e4ab9 864 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
b75a7d8f
A
865 return;
866 }
867 log_verbose("Testing different case settings\n");
868 ucol_setStrength(myCollation, UCOL_TERTIARY);
869
870 for(k = 0; k<4; k++) {
871 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
872 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
873 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
874 for (i = 0; i < 3 ; i++) {
875 for(j = i+1; j<4; j++) {
876 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
877 }
878 }
879 }
880 ucol_close(myCollation);
881
882 myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
883 if(U_FAILURE(status)){
884 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
885 return;
886 }
887 log_verbose("Testing different case settings with custom rules\n");
888 ucol_setStrength(myCollation, UCOL_TERTIARY);
889
890 for(k = 0; k<4; k++) {
891 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
892 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
893 for (i = 0; i < 3 ; i++) {
894 for(j = i+1; j<4; j++) {
895 log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
896 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
374ca955
A
897 iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
898 backAndForth(iter);
899 ucol_closeElements(iter);
900 iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
901 backAndForth(iter);
902 ucol_closeElements(iter);
b75a7d8f
A
903 }
904 }
905 }
906 ucol_close(myCollation);
907 {
908 const static char *lowerFirst[] = {
909 "h",
910 "H",
911 "ch",
912 "Ch",
913 "CH",
914 "cha",
915 "chA",
916 "Cha",
917 "ChA",
918 "CHa",
919 "CHA",
920 "i",
921 "I"
922 };
923
924 const static char *upperFirst[] = {
925 "H",
926 "h",
927 "CH",
928 "Ch",
929 "ch",
930 "CHA",
931 "CHa",
932 "ChA",
933 "Cha",
934 "chA",
935 "cha",
936 "I",
937 "i"
938 };
939 log_verbose("mixed case test\n");
940 log_verbose("lower first, case level off\n");
2ca993e8 941 genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
b75a7d8f 942 log_verbose("upper first, case level off\n");
2ca993e8 943 genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
b75a7d8f 944 log_verbose("lower first, case level on\n");
2ca993e8 945 genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
b75a7d8f 946 log_verbose("upper first, case level on\n");
2ca993e8 947 genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
b75a7d8f
A
948 }
949
950}
951
952static void TestIncrementalNormalize(void) {
953
954 /*UChar baseA =0x61;*/
955 UChar baseA =0x41;
956/* UChar baseB = 0x42;*/
46f4442e 957 static const UChar ccMix[] = {0x316, 0x321, 0x300};
b75a7d8f
A
958 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/
959 /*
960 0x316 is combining grave accent below, cc=220
961 0x321 is combining palatalized hook below, cc=202
962 0x300 is combining grave accent, cc=230
963 */
964
46f4442e
A
965#define MAXSLEN 2000
966 /*int maxSLen = 64000;*/
b75a7d8f
A
967 int sLen;
968 int i;
969
970 UCollator *coll;
971 UErrorCode status = U_ZERO_ERROR;
972 UCollationResult result;
973
729e4ab9 974 int32_t myQ = getTestOption(QUICK_OPTION);
b75a7d8f 975
729e4ab9
A
976 if(getTestOption(QUICK_OPTION) < 0) {
977 setTestOption(QUICK_OPTION, 1);
b75a7d8f
A
978 }
979
980 {
981 /* Test 1. Run very long unnormalized strings, to force overflow of*/
982 /* most buffers along the way.*/
46f4442e
A
983 UChar strA[MAXSLEN+1];
984 UChar strB[MAXSLEN+1];
b75a7d8f
A
985
986 coll = ucol_open("en_US", &status);
987 if(status == U_FILE_ACCESS_ERROR) {
988 log_data_err("Is your data around?\n");
989 return;
990 } else if(U_FAILURE(status)) {
991 log_err("Error opening collator\n");
992 return;
993 }
994 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
995
46f4442e
A
996 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
997 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
b75a7d8f
A
998 /*for (sLen = 1000; sLen<1001; sLen++) {*/
999 for (sLen = 500; sLen<501; sLen++) {
1000 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
1001 strA[0] = baseA;
1002 strB[0] = baseA;
1003 for (i=1; i<=sLen-1; i++) {
1004 strA[i] = ccMix[i % 3];
1005 strB[sLen-i] = ccMix[i % 3];
1006 }
1007 strA[sLen] = 0;
1008 strB[sLen] = 0;
1009
1010 ucol_setStrength(coll, UCOL_TERTIARY); /* Do test with default strength, which runs*/
1011 doTest(coll, strA, strB, UCOL_EQUAL); /* optimized functions in the impl*/
1012 ucol_setStrength(coll, UCOL_IDENTICAL); /* Do again with the slow, general impl.*/
1013 doTest(coll, strA, strB, UCOL_EQUAL);
1014 }
b75a7d8f
A
1015 }
1016
729e4ab9 1017 setTestOption(QUICK_OPTION, myQ);
b75a7d8f
A
1018
1019
1020 /* Test 2: Non-normal sequence in a string that extends to the last character*/
1021 /* of the string. Checks a couple of edge cases.*/
1022
1023 {
46f4442e
A
1024 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
1025 static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
b75a7d8f
A
1026 ucol_setStrength(coll, UCOL_TERTIARY);
1027 doTest(coll, strA, strB, UCOL_EQUAL);
1028 }
1029
1030 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
1031
1032 {
374ca955
A
1033 /* New UCA 3.1.1.
1034 * test below used a code point from Desseret, which sorts differently
b75a7d8f
A
1035 * than d800 dc00
1036 */
1037 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
46f4442e
A
1038 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
1039 static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
b75a7d8f
A
1040 ucol_setStrength(coll, UCOL_TERTIARY);
1041 doTest(coll, strA, strB, UCOL_GREATER);
1042 }
1043
1044 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/
1045
1046 {
46f4442e
A
1047 static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
1048 static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
b75a7d8f
A
1049 char sortKeyA[50];
1050 char sortKeyAz[50];
1051 char sortKeyB[50];
1052 char sortKeyBz[50];
1053 int r;
1054
1055 /* there used to be -3 here. Hmmmm.... */
1056 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
1057 result = ucol_strcoll(coll, strA, 3, strB, 3);
1058 if (result != UCOL_GREATER) {
1059 log_err("ERROR 1 in test 4\n");
1060 }
1061 result = ucol_strcoll(coll, strA, -1, strB, -1);
1062 if (result != UCOL_EQUAL) {
1063 log_err("ERROR 2 in test 4\n");
1064 }
1065
1066 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1067 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1068 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1069 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1070
1071 r = strcmp(sortKeyA, sortKeyAz);
1072 if (r <= 0) {
1073 log_err("Error 3 in test 4\n");
1074 }
1075 r = strcmp(sortKeyA, sortKeyB);
1076 if (r <= 0) {
1077 log_err("Error 4 in test 4\n");
1078 }
1079 r = strcmp(sortKeyAz, sortKeyBz);
1080 if (r != 0) {
1081 log_err("Error 5 in test 4\n");
1082 }
1083
1084 ucol_setStrength(coll, UCOL_IDENTICAL);
1085 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1086 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1087 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1088 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1089
1090 r = strcmp(sortKeyA, sortKeyAz);
1091 if (r <= 0) {
1092 log_err("Error 6 in test 4\n");
1093 }
1094 r = strcmp(sortKeyA, sortKeyB);
1095 if (r <= 0) {
1096 log_err("Error 7 in test 4\n");
1097 }
1098 r = strcmp(sortKeyAz, sortKeyBz);
1099 if (r != 0) {
1100 log_err("Error 8 in test 4\n");
1101 }
1102 ucol_setStrength(coll, UCOL_TERTIARY);
1103 }
1104
1105
1106 /* Test 5: Null characters in non-normal source strings.*/
1107
1108 {
46f4442e
A
1109 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
1110 static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
b75a7d8f
A
1111 char sortKeyA[50];
1112 char sortKeyAz[50];
1113 char sortKeyB[50];
1114 char sortKeyBz[50];
1115 int r;
1116
1117 result = ucol_strcoll(coll, strA, 6, strB, 6);
1118 if (result != UCOL_GREATER) {
1119 log_err("ERROR 1 in test 5\n");
1120 }
1121 result = ucol_strcoll(coll, strA, -1, strB, -1);
1122 if (result != UCOL_EQUAL) {
1123 log_err("ERROR 2 in test 5\n");
1124 }
1125
1126 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1127 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1128 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1129 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1130
1131 r = strcmp(sortKeyA, sortKeyAz);
1132 if (r <= 0) {
1133 log_err("Error 3 in test 5\n");
1134 }
1135 r = strcmp(sortKeyA, sortKeyB);
1136 if (r <= 0) {
1137 log_err("Error 4 in test 5\n");
1138 }
1139 r = strcmp(sortKeyAz, sortKeyBz);
1140 if (r != 0) {
1141 log_err("Error 5 in test 5\n");
1142 }
1143
1144 ucol_setStrength(coll, UCOL_IDENTICAL);
1145 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1146 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1147 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1148 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1149
1150 r = strcmp(sortKeyA, sortKeyAz);
1151 if (r <= 0) {
1152 log_err("Error 6 in test 5\n");
1153 }
1154 r = strcmp(sortKeyA, sortKeyB);
1155 if (r <= 0) {
1156 log_err("Error 7 in test 5\n");
1157 }
1158 r = strcmp(sortKeyAz, sortKeyBz);
1159 if (r != 0) {
1160 log_err("Error 8 in test 5\n");
1161 }
1162 ucol_setStrength(coll, UCOL_TERTIARY);
1163 }
1164
1165
1166 /* Test 6: Null character as base of a non-normal combining sequence.*/
1167
1168 {
46f4442e
A
1169 static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
1170 static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
b75a7d8f
A
1171
1172 result = ucol_strcoll(coll, strA, 5, strB, 5);
1173 if (result != UCOL_LESS) {
1174 log_err("Error 1 in test 6\n");
1175 }
1176 result = ucol_strcoll(coll, strA, -1, strB, -1);
1177 if (result != UCOL_EQUAL) {
1178 log_err("Error 2 in test 6\n");
1179 }
1180 }
1181
1182 ucol_close(coll);
1183}
1184
1185
1186
1187#if 0
1188static void TestGetCaseBit(void) {
1189 static const char *caseBitData[] = {
1190 "a", "A", "ch", "Ch", "CH",
1191 "\\uFF9E", "\\u0009"
1192 };
1193
1194 static const uint8_t results[] = {
1195 UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
1196 UCOL_UPPER_CASE, UCOL_LOWER_CASE
1197 };
1198
1199 uint32_t i, blen = 0;
1200 UChar b[256] = {0};
1201 UErrorCode status = U_ZERO_ERROR;
1202 UCollator *UCA = ucol_open("", &status);
1203 uint8_t res = 0;
1204
2ca993e8 1205 for(i = 0; i<UPRV_LENGTHOF(results); i++) {
b75a7d8f
A
1206 blen = u_unescape(caseBitData[i], b, 256);
1207 res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
1208 if(results[i] != res) {
1209 log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
1210 }
1211 }
1212}
1213#endif
1214
1215static void TestHangulTailoring(void) {
1216 static const char *koreanData[] = {
1217 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
1218 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
1219 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
1220 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
1221 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
1222 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
1223 };
1224
1225 const char *rules =
1226 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
1227 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
1228 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
1229 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
1230 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
1231 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
1232
1233
1234 UErrorCode status = U_ZERO_ERROR;
1235 UChar rlz[2048] = { 0 };
1236 uint32_t rlen = u_unescape(rules, rlz, 2048);
1237
1238 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
1239 if(status == U_FILE_ACCESS_ERROR) {
1240 log_data_err("Is your data around?\n");
1241 return;
1242 } else if(U_FAILURE(status)) {
1243 log_err("Error opening collator\n");
1244 return;
1245 }
1246
1247 log_verbose("Using start of korean rules\n");
1248
1249 if(U_SUCCESS(status)) {
2ca993e8 1250 genericOrderingTest(coll, koreanData, UPRV_LENGTHOF(koreanData));
b75a7d8f
A
1251 } else {
1252 log_err("Unable to open collator with rules %s\n", rules);
1253 }
1254
b75a7d8f
A
1255 ucol_close(coll);
1256
1257 log_verbose("Using ko__LOTUS locale\n");
2ca993e8 1258 genericLocaleStarter("ko__LOTUS", koreanData, UPRV_LENGTHOF(koreanData));
b75a7d8f
A
1259}
1260
57a6839d
A
1261/*
1262 * The secondary/tertiary compression middle byte
1263 * as used by the current implementation.
1264 * Subject to change as the sort key compression changes.
1265 * See class CollationKeys.
1266 */
1267enum {
1268 SEC_COMMON_MIDDLE = 0x25, /* range 05..45 */
1269 TER_ONLY_COMMON_MIDDLE = 0x65 /* range 05..C5 */
1270};
1271
b75a7d8f
A
1272static void TestCompressOverlap(void) {
1273 UChar secstr[150];
1274 UChar tertstr[150];
1275 UErrorCode status = U_ZERO_ERROR;
1276 UCollator *coll;
57a6839d 1277 uint8_t result[500];
b75a7d8f
A
1278 uint32_t resultlen;
1279 int count = 0;
57a6839d 1280 uint8_t *tempptr;
b75a7d8f
A
1281
1282 coll = ucol_open("", &status);
1283
1284 if (U_FAILURE(status)) {
729e4ab9 1285 log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
b75a7d8f
A
1286 return;
1287 }
1288 while (count < 149) {
1289 secstr[count] = 0x0020; /* [06, 05, 05] */
1290 tertstr[count] = 0x0020;
1291 count ++;
1292 }
1293
1294 /* top down compression ----------------------------------- */
1295 secstr[count] = 0x0332; /* [, 87, 05] */
1296 tertstr[count] = 0x3000; /* [06, 05, 07] */
1297
1298 /* no compression secstr should have 150 secondary bytes, tertstr should
1299 have 150 tertiary bytes.
57a6839d
A
1300 with correct compression, secstr should have 6 secondary
1301 bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
2ca993e8 1302 resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
57a6839d
A
1303 (void)resultlen; /* Suppress set but not used warning. */
1304 tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
b75a7d8f
A
1305 while (*(tempptr + 1) != 1) {
1306 /* the last secondary collation element is not checked since it is not
1307 part of the compression */
57a6839d
A
1308 if (*tempptr < SEC_COMMON_MIDDLE) {
1309 log_err("Secondary top down compression overlapped\n");
b75a7d8f
A
1310 }
1311 tempptr ++;
1312 }
1313
1314 /* tertiary top/bottom/common for en_US is similar to the secondary
1315 top/bottom/common */
2ca993e8 1316 resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
57a6839d 1317 tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
b75a7d8f
A
1318 while (*(tempptr + 1) != 0) {
1319 /* the last secondary collation element is not checked since it is not
1320 part of the compression */
57a6839d
A
1321 if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
1322 log_err("Tertiary top down compression overlapped\n");
b75a7d8f
A
1323 }
1324 tempptr ++;
1325 }
1326
1327 /* bottom up compression ------------------------------------- */
1328 secstr[count] = 0;
1329 tertstr[count] = 0;
2ca993e8 1330 resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
57a6839d 1331 tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
b75a7d8f
A
1332 while (*(tempptr + 1) != 1) {
1333 /* the last secondary collation element is not checked since it is not
1334 part of the compression */
57a6839d
A
1335 if (*tempptr > SEC_COMMON_MIDDLE) {
1336 log_err("Secondary bottom up compression overlapped\n");
b75a7d8f
A
1337 }
1338 tempptr ++;
1339 }
1340
1341 /* tertiary top/bottom/common for en_US is similar to the secondary
1342 top/bottom/common */
2ca993e8 1343 resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
57a6839d 1344 tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
b75a7d8f
A
1345 while (*(tempptr + 1) != 0) {
1346 /* the last secondary collation element is not checked since it is not
1347 part of the compression */
57a6839d
A
1348 if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
1349 log_err("Tertiary bottom up compression overlapped\n");
b75a7d8f
A
1350 }
1351 tempptr ++;
1352 }
1353
1354 ucol_close(coll);
1355}
1356
1357static void TestCyrillicTailoring(void) {
1358 static const char *test[] = {
1359 "\\u0410b",
1360 "\\u0410\\u0306a",
1361 "\\u04d0A"
1362 };
1363
1364 /* Russian overrides contractions, so this test is not valid anymore */
374ca955 1365 /*genericLocaleStarter("ru", test, 3);*/
b75a7d8f 1366
b331163b
A
1367 // Most of the following are commented out because UCA 8.0
1368 // drops most of the Cyrillic contractions from the default order.
1369 // See CLDR ticket #7246 "root collation: remove Cyrillic contractions".
1370
1371 // genericLocaleStarter("root", test, 3);
1372 // genericRulesStarter("&\\u0410 = \\u0410", test, 3);
1373 // genericRulesStarter("&Z < \\u0410", test, 3);
b75a7d8f
A
1374 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
1375 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
b331163b
A
1376 // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
1377 // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
b75a7d8f
A
1378}
1379
1380static void TestSuppressContractions(void) {
1381
1382 static const char *testNoCont2[] = {
1383 "\\u0410\\u0302a",
1384 "\\u0410\\u0306b",
374ca955 1385 "\\u0410c"
b75a7d8f
A
1386 };
1387 static const char *testNoCont[] = {
374ca955 1388 "a\\u0410",
b75a7d8f
A
1389 "A\\u0410\\u0306",
1390 "\\uFF21\\u0410\\u0302"
1391 };
374ca955 1392
b75a7d8f
A
1393 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
1394 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
1395}
1396
1397static void TestContraction(void) {
1398 const static char *testrules[] = {
1399 "&A = AB / B",
1400 "&A = A\\u0306/\\u0306",
1401 "&c = ch / h"
1402 };
1403 const static UChar testdata[][2] = {
1404 {0x0041 /* 'A' */, 0x0042 /* 'B' */},
1405 {0x0041 /* 'A' */, 0x0306 /* combining breve */},
1406 {0x0063 /* 'c' */, 0x0068 /* 'h' */}
1407 };
1408 const static UChar testdata2[][2] = {
1409 {0x0063 /* 'c' */, 0x0067 /* 'g' */},
1410 {0x0063 /* 'c' */, 0x0068 /* 'h' */},
1411 {0x0063 /* 'c' */, 0x006C /* 'l' */}
1412 };
57a6839d
A
1413#if 0
1414 /*
1415 * These pairs of rule strings are not guaranteed to yield the very same mappings.
1416 * In fact, LDML 24 recommends an improved way of creating mappings
1417 * which always yields different mappings for such pairs. See
1418 * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
1419 */
b75a7d8f
A
1420 const static char *testrules3[] = {
1421 "&z < xyz &xyzw << B",
1422 "&z < xyz &xyz << B / w",
1423 "&z < ch &achm << B",
1424 "&z < ch &a << B / chm",
1425 "&\\ud800\\udc00w << B",
1426 "&\\ud800\\udc00 << B / w",
1427 "&a\\ud800\\udc00m << B",
1428 "&a << B / \\ud800\\udc00m",
1429 };
57a6839d 1430#endif
b75a7d8f
A
1431
1432 UErrorCode status = U_ZERO_ERROR;
1433 UCollator *coll;
1434 UChar rule[256] = {0};
1435 uint32_t rlen = 0;
1436 int i;
1437
2ca993e8 1438 for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
b75a7d8f
A
1439 UCollationElements *iter1;
1440 int j = 0;
1441 log_verbose("Rule %s for testing\n", testrules[i]);
1442 rlen = u_unescape(testrules[i], rule, 32);
1443 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1444 if (U_FAILURE(status)) {
729e4ab9 1445 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
b75a7d8f
A
1446 return;
1447 }
1448 iter1 = ucol_openElements(coll, testdata[i], 2, &status);
1449 if (U_FAILURE(status)) {
1450 log_err("Collation iterator creation failed\n");
1451 return;
1452 }
1453 while (j < 2) {
1454 UCollationElements *iter2 = ucol_openElements(coll,
1455 &(testdata[i][j]),
1456 1, &status);
1457 uint32_t ce;
1458 if (U_FAILURE(status)) {
1459 log_err("Collation iterator creation failed\n");
1460 return;
1461 }
1462 ce = ucol_next(iter2, &status);
1463 while (ce != UCOL_NULLORDER) {
1464 if ((uint32_t)ucol_next(iter1, &status) != ce) {
1465 log_err("Collation elements in contraction split does not match\n");
1466 return;
1467 }
1468 ce = ucol_next(iter2, &status);
1469 }
1470 j ++;
1471 ucol_closeElements(iter2);
1472 }
1473 if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
1474 log_err("Collation elements not exhausted\n");
1475 return;
1476 }
1477 ucol_closeElements(iter1);
1478 ucol_close(coll);
1479 }
1480
1481 rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
1482 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1483 if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
1484 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1485 testdata2[0][0], testdata2[0][1], testdata2[1][0],
1486 testdata2[1][1]);
1487 return;
1488 }
1489 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
1490 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1491 testdata2[1][0], testdata2[1][1], testdata2[2][0],
1492 testdata2[2][1]);
1493 return;
1494 }
1495 ucol_close(coll);
57a6839d 1496#if 0 /* see above */
2ca993e8 1497 for (i = 0; i < UPRV_LENGTHOF(testrules3); i += 2) {
57a6839d 1498 log_verbose("testrules3 i==%d \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);
b75a7d8f
A
1499 UCollator *coll1,
1500 *coll2;
1501 UCollationElements *iter1,
1502 *iter2;
1503 UChar ch = 0x0042 /* 'B' */;
1504 uint32_t ce;
1505 rlen = u_unescape(testrules3[i], rule, 32);
1506 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1507 rlen = u_unescape(testrules3[i + 1], rule, 32);
1508 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1509 if (U_FAILURE(status)) {
1510 log_err("Collator creation failed %s\n", testrules[i]);
1511 return;
1512 }
1513 iter1 = ucol_openElements(coll1, &ch, 1, &status);
1514 iter2 = ucol_openElements(coll2, &ch, 1, &status);
1515 if (U_FAILURE(status)) {
1516 log_err("Collation iterator creation failed\n");
1517 return;
1518 }
1519 ce = ucol_next(iter1, &status);
1520 if (U_FAILURE(status)) {
1521 log_err("Retrieving ces failed\n");
1522 return;
1523 }
1524 while (ce != UCOL_NULLORDER) {
57a6839d
A
1525 uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
1526 if (ce == ce2) {
1527 log_verbose("CEs match: %08x\n", ce);
1528 } else {
1529 log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
b75a7d8f
A
1530 return;
1531 }
1532 ce = ucol_next(iter1, &status);
1533 if (U_FAILURE(status)) {
1534 log_err("Retrieving ces failed\n");
1535 return;
1536 }
1537 }
1538 if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
1539 log_err("CEs not exhausted\n");
1540 return;
1541 }
1542 ucol_closeElements(iter1);
1543 ucol_closeElements(iter2);
1544 ucol_close(coll1);
1545 ucol_close(coll2);
1546 }
57a6839d 1547#endif
b75a7d8f
A
1548}
1549
1550static void TestExpansion(void) {
1551 const static char *testrules[] = {
57a6839d
A
1552#if 0
1553 /*
1554 * This seems to have tested that M was not mapped to an expansion.
1555 * I believe the old builder just did that because it computed the extension CEs
1556 * at the very end, which was a bug.
1557 * Among other problems, it violated the core tailoring principle
1558 * by making an earlier rule depend on a later one.
1559 * And, of course, if M did not get an expansion, then it was primary different from K,
1560 * unlike what the rule &K<<M says.
1561 */
b75a7d8f 1562 "&J << K / B & K << M",
57a6839d 1563#endif
b75a7d8f
A
1564 "&J << K / B << M"
1565 };
1566 const static UChar testdata[][3] = {
1567 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
1568 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
1569 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
1570 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
1571 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
1572 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
1573 };
1574
1575 UErrorCode status = U_ZERO_ERROR;
1576 UCollator *coll;
1577 UChar rule[256] = {0};
1578 uint32_t rlen = 0;
1579 int i;
1580
2ca993e8 1581 for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
b75a7d8f
A
1582 int j = 0;
1583 log_verbose("Rule %s for testing\n", testrules[i]);
1584 rlen = u_unescape(testrules[i], rule, 32);
1585 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1586 if (U_FAILURE(status)) {
729e4ab9 1587 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
b75a7d8f
A
1588 return;
1589 }
1590
1591 for (j = 0; j < 5; j ++) {
1592 doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
1593 }
1594 ucol_close(coll);
1595 }
1596}
1597
b75a7d8f
A
1598#if 0
1599/* this test tests the current limitations of the engine */
1600/* it always fail, so it is disabled by default */
1601static void TestLimitations(void) {
1602 /* recursive expansions */
1603 {
1604 static const char *rule = "&a=b/c&d=c/e";
1605 static const char *tlimit01[] = {"add","b","adf"};
1606 static const char *tlimit02[] = {"aa","b","af"};
1607 log_verbose("recursive expansions\n");
2ca993e8
A
1608 genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1609 genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
b75a7d8f
A
1610 }
1611 /* contractions spanning expansions */
1612 {
1613 static const char *rule = "&a<<<c/e&g<<<eh";
1614 static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
1615 static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
1616 log_verbose("contractions spanning expansions\n");
2ca993e8
A
1617 genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1618 genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
b75a7d8f
A
1619 }
1620 /* normalization: nulls in contractions */
1621 {
1622 static const char *rule = "&a<<<\\u0000\\u0302";
1623 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1624 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1625 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1626 static const UColAttributeValue valOn[] = { UCOL_ON };
1627 static const UColAttributeValue valOff[] = { UCOL_OFF };
1628
1629 log_verbose("NULL in contractions\n");
1630 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1631 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1632 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1633 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1634
1635 }
1636 /* normalization: contractions spanning normalization */
1637 {
1638 static const char *rule = "&a<<<\\u0000\\u0302";
1639 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1640 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1641 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1642 static const UColAttributeValue valOn[] = { UCOL_ON };
1643 static const UColAttributeValue valOff[] = { UCOL_OFF };
1644
1645 log_verbose("contractions spanning normalization\n");
1646 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1647 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1648 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1649 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1650
1651 }
1652 /* variable top: */
1653 {
1654 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
1655 static const char *rule = "&\\u2010<x<[variable top]=z";
1656 /*static const char *rule3 = "&' '<x<[variable top]=z";*/
1657 static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
1658 static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
1659 static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
1660 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
1661 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
1662 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
1663
1664 log_verbose("variable top\n");
2ca993e8
A
1665 genericRulesStarterWithOptions(rule, tlimit03, UPRV_LENGTHOF(tlimit03), att, valOn, UPRV_LENGTHOF(att));
1666 genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1667 genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1668 genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));
1669 genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));
b75a7d8f
A
1670
1671 }
1672 /* case level */
1673 {
1674 static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
1675 static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
1676 static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
1677 static const UColAttribute att[] = { UCOL_CASE_FIRST};
1678 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
1679 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
1680 log_verbose("case level\n");
2ca993e8
A
1681 genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1682 genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1683 /*genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));*/
1684 /*genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));*/
b75a7d8f
A
1685 }
1686
1687}
1688#endif
1689
1690static void TestBocsuCoverage(void) {
1691 UErrorCode status = U_ZERO_ERROR;
1692 const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
1693 UChar test[256] = {0};
1694 uint32_t tlen = u_unescape(testString, test, 32);
1695 uint8_t key[256] = {0};
1696 uint32_t klen = 0;
1697
1698 UCollator *coll = ucol_open("", &status);
1699 if(U_SUCCESS(status)) {
1700 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
1701
1702 klen = ucol_getSortKey(coll, test, tlen, key, 256);
57a6839d 1703 (void)klen; /* Suppress set but not used warning. */
b75a7d8f
A
1704
1705 ucol_close(coll);
1706 } else {
1707 log_data_err("Couldn't open UCA\n");
1708 }
1709}
1710
1711static void TestVariableTopSetting(void) {
1712 UErrorCode status = U_ZERO_ERROR;
b75a7d8f
A
1713 uint32_t varTopOriginal = 0, varTop1, varTop2;
1714 UCollator *coll = ucol_open("", &status);
1715 if(U_SUCCESS(status)) {
1716
57a6839d
A
1717 static const UChar nul = 0;
1718 static const UChar space = 0x20;
1719 static const UChar dot = 0x2e; /* punctuation */
1720 static const UChar degree = 0xb0; /* symbol */
1721 static const UChar dollar = 0x24; /* currency symbol */
1722 static const UChar zero = 0x30; /* digit */
729e4ab9 1723
57a6839d
A
1724 varTopOriginal = ucol_getVariableTop(coll, &status);
1725 log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
1726 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
b75a7d8f 1727
57a6839d
A
1728 varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1729 varTop2 = ucol_getVariableTop(coll, &status);
1730 log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
1731 if(U_FAILURE(status) || varTop1 != varTop2 ||
1732 !ucol_equal(coll, &nul, 0, &space, 1) ||
1733 ucol_equal(coll, &nul, 0, &dot, 1) ||
1734 ucol_equal(coll, &nul, 0, &degree, 1) ||
1735 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1736 ucol_equal(coll, &nul, 0, &zero, 1) ||
1737 ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1738 log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));
b75a7d8f
A
1739 }
1740
57a6839d
A
1741 varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
1742 varTop2 = ucol_getVariableTop(coll, &status);
1743 log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
1744 if(U_FAILURE(status) || varTop1 != varTop2 ||
1745 !ucol_equal(coll, &nul, 0, &space, 1) ||
1746 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1747 ucol_equal(coll, &nul, 0, &degree, 1) ||
1748 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1749 ucol_equal(coll, &nul, 0, &zero, 1) ||
1750 ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1751 log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));
1752 }
b75a7d8f 1753
57a6839d
A
1754 varTop1 = ucol_setVariableTop(coll, &degree, 1, &status);
1755 varTop2 = ucol_getVariableTop(coll, &status);
1756 log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
1757 if(U_FAILURE(status) || varTop1 != varTop2 ||
1758 !ucol_equal(coll, &nul, 0, &space, 1) ||
1759 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1760 !ucol_equal(coll, &nul, 0, &degree, 1) ||
1761 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1762 ucol_equal(coll, &nul, 0, &zero, 1) ||
1763 ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1764 log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));
b75a7d8f
A
1765 }
1766
57a6839d
A
1767 varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
1768 varTop2 = ucol_getVariableTop(coll, &status);
1769 log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
1770 if(U_FAILURE(status) || varTop1 != varTop2 ||
1771 !ucol_equal(coll, &nul, 0, &space, 1) ||
1772 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1773 !ucol_equal(coll, &nul, 0, &degree, 1) ||
1774 !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1775 ucol_equal(coll, &nul, 0, &zero, 1) ||
1776 ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1777 log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));
1778 }
b75a7d8f
A
1779
1780 log_verbose("Testing setting variable top to contractions\n");
1781 {
57a6839d 1782 UChar first[4] = { 0 };
b75a7d8f
A
1783 first[0] = 0x0040;
1784 first[1] = 0x0050;
1785 first[2] = 0x0000;
1786
57a6839d 1787 status = U_ZERO_ERROR;
b75a7d8f
A
1788 ucol_setVariableTop(coll, first, -1, &status);
1789
1790 if(U_SUCCESS(status)) {
1791 log_err("Invalid contraction succeded in setting variable top!\n");
1792 }
1793
1794 }
1795
1796 log_verbose("Test restoring variable top\n");
1797
1798 status = U_ZERO_ERROR;
1799 ucol_restoreVariableTop(coll, varTopOriginal, &status);
1800 if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
1801 log_err("Couldn't restore old variable top\n");
1802 }
1803
1804 log_verbose("Testing calling with error set\n");
1805
1806 status = U_INTERNAL_PROGRAM_ERROR;
57a6839d 1807 varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
b75a7d8f
A
1808 varTop2 = ucol_getVariableTop(coll, &status);
1809 ucol_restoreVariableTop(coll, varTop2, &status);
57a6839d 1810 varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
b75a7d8f
A
1811 varTop2 = ucol_getVariableTop(NULL, &status);
1812 ucol_restoreVariableTop(NULL, varTop2, &status);
1813 if(status != U_INTERNAL_PROGRAM_ERROR) {
1814 log_err("Bad reaction to passed error!\n");
1815 }
b75a7d8f
A
1816 ucol_close(coll);
1817 } else {
1818 log_data_err("Couldn't open UCA collator\n");
1819 }
57a6839d
A
1820}
1821
3d1f044b 1822static void TestMaxVariable(void) {
57a6839d
A
1823 UErrorCode status = U_ZERO_ERROR;
1824 UColReorderCode oldMax, max;
1825 UCollator *coll;
1826
1827 static const UChar nul = 0;
1828 static const UChar space = 0x20;
1829 static const UChar dot = 0x2e; /* punctuation */
1830 static const UChar degree = 0xb0; /* symbol */
1831 static const UChar dollar = 0x24; /* currency symbol */
1832 static const UChar zero = 0x30; /* digit */
1833
1834 coll = ucol_open("", &status);
1835 if(U_FAILURE(status)) {
1836 log_data_err("Couldn't open root collator\n");
1837 return;
1838 }
1839
1840 oldMax = ucol_getMaxVariable(coll);
1841 log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
1842 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1843
1844 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1845 max = ucol_getMaxVariable(coll);
1846 log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
1847 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
1848 !ucol_equal(coll, &nul, 0, &space, 1) ||
1849 ucol_equal(coll, &nul, 0, &dot, 1) ||
1850 ucol_equal(coll, &nul, 0, &degree, 1) ||
1851 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1852 ucol_equal(coll, &nul, 0, &zero, 1) ||
1853 ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1854 log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));
1855 }
1856
1857 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
1858 max = ucol_getMaxVariable(coll);
1859 log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
1860 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
1861 !ucol_equal(coll, &nul, 0, &space, 1) ||
1862 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1863 ucol_equal(coll, &nul, 0, &degree, 1) ||
1864 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1865 ucol_equal(coll, &nul, 0, &zero, 1) ||
1866 ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1867 log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));
1868 }
1869
1870 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
1871 max = ucol_getMaxVariable(coll);
1872 log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
1873 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
1874 !ucol_equal(coll, &nul, 0, &space, 1) ||
1875 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1876 !ucol_equal(coll, &nul, 0, &degree, 1) ||
1877 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1878 ucol_equal(coll, &nul, 0, &zero, 1) ||
1879 ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1880 log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));
1881 }
1882
1883 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
1884 max = ucol_getMaxVariable(coll);
1885 log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
1886 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
1887 !ucol_equal(coll, &nul, 0, &space, 1) ||
1888 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1889 !ucol_equal(coll, &nul, 0, &degree, 1) ||
1890 !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1891 ucol_equal(coll, &nul, 0, &zero, 1) ||
1892 ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1893 log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));
1894 }
1895
1896 log_verbose("Test restoring maxVariable\n");
1897 status = U_ZERO_ERROR;
1898 ucol_setMaxVariable(coll, oldMax, &status);
1899 if(oldMax != ucol_getMaxVariable(coll)) {
1900 log_err("Couldn't restore old maxVariable\n");
1901 }
b75a7d8f 1902
57a6839d
A
1903 log_verbose("Testing calling with error set\n");
1904 status = U_INTERNAL_PROGRAM_ERROR;
1905 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1906 max = ucol_getMaxVariable(coll);
1907 if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
1908 log_err("Bad reaction to passed error!\n");
1909 }
1910 ucol_close(coll);
b75a7d8f
A
1911}
1912
1913static void TestNonChars(void) {
1914 static const char *test[] = {
729e4ab9
A
1915 "\\u0000", /* ignorable */
1916 "\\uFFFE", /* special merge-sort character with minimum non-ignorable weights */
1917 "\\uFDD0", "\\uFDEF",
1918 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */
1919 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */
b75a7d8f
A
1920 "\\U0003FFFE", "\\U0003FFFF",
1921 "\\U0004FFFE", "\\U0004FFFF",
1922 "\\U0005FFFE", "\\U0005FFFF",
1923 "\\U0006FFFE", "\\U0006FFFF",
1924 "\\U0007FFFE", "\\U0007FFFF",
1925 "\\U0008FFFE", "\\U0008FFFF",
1926 "\\U0009FFFE", "\\U0009FFFF",
1927 "\\U000AFFFE", "\\U000AFFFF",
1928 "\\U000BFFFE", "\\U000BFFFF",
1929 "\\U000CFFFE", "\\U000CFFFF",
1930 "\\U000DFFFE", "\\U000DFFFF",
1931 "\\U000EFFFE", "\\U000EFFFF",
1932 "\\U000FFFFE", "\\U000FFFFF",
729e4ab9
A
1933 "\\U0010FFFE", "\\U0010FFFF",
1934 "\\uFFFF" /* special character with maximum primary weight */
b75a7d8f
A
1935 };
1936 UErrorCode status = U_ZERO_ERROR;
1937 UCollator *coll = ucol_open("en_US", &status);
1938
1939 log_verbose("Test non characters\n");
1940
1941 if(U_SUCCESS(status)) {
729e4ab9 1942 genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
b75a7d8f 1943 } else {
729e4ab9 1944 log_err_status(status, "Unable to open collator\n");
b75a7d8f
A
1945 }
1946
1947 ucol_close(coll);
1948}
1949
1950static void TestExtremeCompression(void) {
1951 static char *test[4];
1952 int32_t j = 0, i = 0;
1953
1954 for(i = 0; i<4; i++) {
1955 test[i] = (char *)malloc(2048*sizeof(char));
1956 }
1957
1958 for(j = 20; j < 500; j++) {
1959 for(i = 0; i<4; i++) {
1960 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1961 test[i][j-1] = (char)('a'+i);
1962 test[i][j] = 0;
1963 }
1964 genericLocaleStarter("en_US", (const char **)test, 4);
1965 }
1966
1967
1968 for(i = 0; i<4; i++) {
1969 free(test[i]);
1970 }
1971}
1972
1973#if 0
1974static void TestExtremeCompression(void) {
1975 static char *test[4];
1976 int32_t j = 0, i = 0;
1977 UErrorCode status = U_ZERO_ERROR;
1978 UCollator *coll = ucol_open("en_US", status);
1979 for(i = 0; i<4; i++) {
1980 test[i] = (char *)malloc(2048*sizeof(char));
1981 }
1982 for(j = 10; j < 2048; j++) {
1983 for(i = 0; i<4; i++) {
1984 uprv_memset(test[i], 'a', (j-2)*sizeof(char));
1985 test[i][j-1] = (char)('a'+i);
1986 test[i][j] = 0;
1987 }
1988 }
1989 genericLocaleStarter("en_US", (const char **)test, 4);
1990
1991 for(j = 10; j < 2048; j++) {
1992 for(i = 0; i<1; i++) {
1993 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1994 test[i][j] = 0;
1995 }
1996 }
1997 for(i = 0; i<4; i++) {
1998 free(test[i]);
1999 }
2000}
2001#endif
2002
2003static void TestSurrogates(void) {
2004 static const char *test[] = {
2005 "z","\\ud900\\udc25", "\\ud805\\udc50",
2006 "\\ud800\\udc00y", "\\ud800\\udc00r",
2007 "\\ud800\\udc00f", "\\ud800\\udc00",
2008 "\\ud800\\udc00c", "\\ud800\\udc00b",
2009 "\\ud800\\udc00fa", "\\ud800\\udc00fb",
2010 "\\ud800\\udc00a",
2011 "c", "b"
2012 };
2013
2014 static const char *rule =
2015 "&z < \\ud900\\udc25 < \\ud805\\udc50"
2016 "< \\ud800\\udc00y < \\ud800\\udc00r"
2017 "< \\ud800\\udc00f << \\ud800\\udc00"
2018 "< \\ud800\\udc00fa << \\ud800\\udc00fb"
2019 "< \\ud800\\udc00a < c < b" ;
2020
2021 genericRulesStarter(rule, test, 14);
2022}
2023
2024/* This is a test for prefix implementation, used by JIS X 4061 collation rules */
2025static void TestPrefix(void) {
2026 uint32_t i;
2027
46f4442e 2028 static const struct {
b75a7d8f
A
2029 const char *rules;
2030 const char *data[50];
2031 const uint32_t len;
374ca955
A
2032 } tests[] = {
2033 { "&z <<< z|a",
b75a7d8f
A
2034 {"zz", "za"}, 2 },
2035
374ca955 2036 { "&z <<< z| a",
b75a7d8f
A
2037 {"zz", "za"}, 2 },
2038 { "[strength I]"
2039 "&a=\\ud900\\udc25"
374ca955 2040 "&z<<<\\ud900\\udc25|a",
b75a7d8f
A
2041 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
2042 };
2043
2044
2ca993e8 2045 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
b75a7d8f
A
2046 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2047 }
2048}
2049
2050/* This test uses data suplied by Masashiko Maedera to test the implementation */
2051/* JIS X 4061 collation order implementation */
2052static void TestNewJapanese(void) {
2053
46f4442e 2054 static const char * const test1[] = {
b75a7d8f
A
2055 "\\u30b7\\u30e3\\u30fc\\u30ec",
2056 "\\u30b7\\u30e3\\u30a4",
2057 "\\u30b7\\u30e4\\u30a3",
2058 "\\u30b7\\u30e3\\u30ec",
2059 "\\u3061\\u3087\\u3053",
2060 "\\u3061\\u3088\\u3053",
2061 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
2062 "\\u3066\\u30fc\\u305f",
2063 "\\u30c6\\u30fc\\u30bf",
2064 "\\u30c6\\u30a7\\u30bf",
2065 "\\u3066\\u3048\\u305f",
374ca955 2066 "\\u3067\\u30fc\\u305f",
b75a7d8f
A
2067 "\\u30c7\\u30fc\\u30bf",
2068 "\\u30c7\\u30a7\\u30bf",
2069 "\\u3067\\u3048\\u305f",
2070 "\\u3066\\u30fc\\u305f\\u30fc",
2071 "\\u30c6\\u30fc\\u30bf\\u30a1",
2072 "\\u30c6\\u30a7\\u30bf\\u30fc",
2073 "\\u3066\\u3047\\u305f\\u3041",
2074 "\\u3066\\u3048\\u305f\\u30fc",
2075 "\\u3067\\u30fc\\u305f\\u30fc",
2076 "\\u30c7\\u30fc\\u30bf\\u30a1",
2077 "\\u3067\\u30a7\\u305f\\u30a1",
2078 "\\u30c7\\u3047\\u30bf\\u3041",
2079 "\\u30c7\\u30a8\\u30bf\\u30a2",
2080 "\\u3072\\u3086",
2081 "\\u3073\\u3085\\u3042",
2082 "\\u3074\\u3085\\u3042",
2083 "\\u3073\\u3085\\u3042\\u30fc",
2084 "\\u30d3\\u30e5\\u30a2\\u30fc",
2085 "\\u3074\\u3085\\u3042\\u30fc",
2086 "\\u30d4\\u30e5\\u30a2\\u30fc",
2087 "\\u30d2\\u30e5\\u30a6",
2088 "\\u30d2\\u30e6\\u30a6",
2089 "\\u30d4\\u30e5\\u30a6\\u30a2",
374ca955 2090 "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
b75a7d8f
A
2091 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
2092 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
2093 "\\u3072\\u3085\\u3093",
2094 "\\u3074\\u3085\\u3093",
2095 "\\u3075\\u30fc\\u308a",
2096 "\\u30d5\\u30fc\\u30ea",
2097 "\\u3075\\u3045\\u308a",
2098 "\\u3075\\u30a5\\u308a",
2099 "\\u3075\\u30a5\\u30ea",
2100 "\\u30d5\\u30a6\\u30ea",
2101 "\\u3076\\u30fc\\u308a",
2102 "\\u30d6\\u30fc\\u30ea",
2103 "\\u3076\\u3045\\u308a",
2104 "\\u30d6\\u30a5\\u308a",
2105 "\\u3077\\u3046\\u308a",
2106 "\\u30d7\\u30a6\\u30ea",
2107 "\\u3075\\u30fc\\u308a\\u30fc",
2108 "\\u30d5\\u30a5\\u30ea\\u30fc",
2109 "\\u3075\\u30a5\\u308a\\u30a3",
2110 "\\u30d5\\u3045\\u308a\\u3043",
2111 "\\u30d5\\u30a6\\u30ea\\u30fc",
2112 "\\u3075\\u3046\\u308a\\u3043",
2113 "\\u30d6\\u30a6\\u30ea\\u30a4",
2114 "\\u3077\\u30fc\\u308a\\u30fc",
2115 "\\u3077\\u30a5\\u308a\\u30a4",
2116 "\\u3077\\u3046\\u308a\\u30fc",
2117 "\\u30d7\\u30a6\\u30ea\\u30a4",
2118 "\\u30d5\\u30fd",
2119 "\\u3075\\u309e",
2120 "\\u3076\\u309d",
2121 "\\u3076\\u3075",
2122 "\\u3076\\u30d5",
2123 "\\u30d6\\u3075",
2124 "\\u30d6\\u30d5",
2125 "\\u3076\\u309e",
2126 "\\u3076\\u3077",
2127 "\\u30d6\\u3077",
2128 "\\u3077\\u309d",
2129 "\\u30d7\\u30fd",
2130 "\\u3077\\u3075",
2131};
2132
2133 static const char *test2[] = {
2134 "\\u306f\\u309d", /* H\\u309d */
73c04bcf 2135 "\\u30cf\\u30fd", /* K\\u30fd */
b75a7d8f
A
2136 "\\u306f\\u306f", /* HH */
2137 "\\u306f\\u30cf", /* HK */
2138 "\\u30cf\\u30cf", /* KK */
2139 "\\u306f\\u309e", /* H\\u309e */
2140 "\\u30cf\\u30fe", /* K\\u30fe */
2141 "\\u306f\\u3070", /* HH\\u309b */
2142 "\\u30cf\\u30d0", /* KK\\u309b */
2143 "\\u306f\\u3071", /* HH\\u309c */
2144 "\\u30cf\\u3071", /* KH\\u309c */
2145 "\\u30cf\\u30d1", /* KK\\u309c */
2146 "\\u3070\\u309d", /* H\\u309b\\u309d */
2147 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
2148 "\\u3070\\u306f", /* H\\u309bH */
2149 "\\u30d0\\u30cf", /* K\\u309bK */
2150 "\\u3070\\u309e", /* H\\u309b\\u309e */
2151 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
2152 "\\u3070\\u3070", /* H\\u309bH\\u309b */
2153 "\\u30d0\\u3070", /* K\\u309bH\\u309b */
2154 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
2155 "\\u3070\\u3071", /* H\\u309bH\\u309c */
2156 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
2157 "\\u3071\\u309d", /* H\\u309c\\u309d */
2158 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
2159 "\\u3071\\u306f", /* H\\u309cH */
2160 "\\u30d1\\u30cf", /* K\\u309cK */
2161 "\\u3071\\u3070", /* H\\u309cH\\u309b */
2162 "\\u3071\\u30d0", /* H\\u309cK\\u309b */
2163 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
2164 "\\u3071\\u3071", /* H\\u309cH\\u309c */
2165 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
2166 };
2167 /*
2168 static const char *test3[] = {
2169 "\\u221er\\u221e",
2170 "\\u221eR#",
2171 "\\u221et\\u221e",
2172 "#r\\u221e",
2173 "#R#",
2174 "#t%",
2175 "#T%",
2176 "8t\\u221e",
2177 "8T\\u221e",
2178 "8t#",
2179 "8T#",
2180 "8t%",
2181 "8T%",
2182 "8t8",
2183 "8T8",
2184 "\\u03c9r\\u221e",
2185 "\\u03a9R%",
2186 "rr\\u221e",
2187 "rR\\u221e",
2188 "Rr\\u221e",
2189 "RR\\u221e",
2190 "RT%",
2191 "rt8",
2192 "tr\\u221e",
2193 "tr8",
2194 "TR8",
2195 "tt8",
2196 "\\u30b7\\u30e3\\u30fc\\u30ec",
2197 };
2198 */
2199 static const UColAttribute att[] = { UCOL_STRENGTH };
2200 static const UColAttributeValue val[] = { UCOL_QUATERNARY };
2201
2202 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
2203 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
2204
2ca993e8
A
2205 genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), att, val, 1);
2206 genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), att, val, 1);
2207 /*genericLocaleStarter("ja", test3, UPRV_LENGTHOF(test3));*/
2208 genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), attShifted, valShifted, 2);
2209 genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), attShifted, valShifted, 2);
b75a7d8f
A
2210}
2211
2212static void TestStrCollIdenticalPrefix(void) {
2213 const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
2214 const char* test[] = {
2215 "ab\\ud9b0\\udc70",
2216 "ab\\ud9b0\\udc71"
2217 };
2ca993e8 2218 genericRulesStarterWithResult(rule, test, UPRV_LENGTHOF(test), UCOL_EQUAL);
b75a7d8f
A
2219}
2220/* Contractions should have all their canonically equivalent */
2221/* strings included */
2222static void TestContractionClosure(void) {
46f4442e 2223 static const struct {
b75a7d8f 2224 const char *rules;
46f4442e 2225 const char *data[10];
b75a7d8f 2226 const uint32_t len;
374ca955 2227 } tests[] = {
b75a7d8f
A
2228 { "&b=\\u00e4\\u00e4",
2229 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
2230 { "&b=\\u00C5",
2231 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
2232 };
2233 uint32_t i;
2234
2235
2ca993e8 2236 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
73c04bcf 2237 genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
b75a7d8f
A
2238 }
2239}
2240
2241/* This tests also fails*/
2242static void TestBeforePrefixFailure(void) {
46f4442e 2243 static const struct {
b75a7d8f 2244 const char *rules;
46f4442e 2245 const char *data[10];
b75a7d8f 2246 const uint32_t len;
374ca955 2247 } tests[] = {
b75a7d8f
A
2248 { "&g <<< a"
2249 "&[before 3]\\uff41 <<< x",
2250 {"x", "\\uff41"}, 2 },
2251 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2252 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
374ca955 2253 "&[before 3]\\u30a7<<<\\u30a9",
b75a7d8f
A
2254 {"\\u30a9", "\\u30a7"}, 2 },
2255 { "&[before 3]\\u30a7<<<\\u30a9"
2256 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2257 "&\\u30A8=\\u30A8=\\u3048=\\uff74",
2258 {"\\u30a9", "\\u30a7"}, 2 },
2259 };
2260 uint32_t i;
2261
2262
2ca993e8 2263 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
b75a7d8f
A
2264 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2265 }
2266
2267#if 0
374ca955 2268 const char* rule1 =
b75a7d8f
A
2269 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2270 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2271 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
374ca955 2272 const char* rule2 =
b75a7d8f
A
2273 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
2274 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2275 "&\\u30A8=\\u30A8=\\u3048=\\uff74";
2276 const char* test[] = {
374ca955 2277 "\\u30c6\\u30fc\\u30bf",
b75a7d8f
A
2278 "\\u30c6\\u30a7\\u30bf",
2279 };
2ca993e8
A
2280 genericRulesStarter(rule1, test, UPRV_LENGTHOF(test));
2281 genericRulesStarter(rule2, test, UPRV_LENGTHOF(test));
b75a7d8f
A
2282/* this piece of code should be in some sort of verbose mode */
2283/* it gets the collation elements for elements and prints them */
2284/* This is useful when trying to see whether the problem is */
374ca955 2285 {
b75a7d8f
A
2286 UErrorCode status = U_ZERO_ERROR;
2287 uint32_t i = 0;
2288 UCollationElements *it = NULL;
2289 uint32_t CE;
2290 UChar string[256];
2291 uint32_t uStringLen;
2292 UCollator *coll = NULL;
2293
2294 uStringLen = u_unescape(rule1, string, 256);
2295
2296 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2297
2298 /*coll = ucol_open("ja_JP_JIS", &status);*/
2299 it = ucol_openElements(coll, string, 0, &status);
2300
2ca993e8 2301 for(i = 0; i < UPRV_LENGTHOF(test); i++) {
b75a7d8f
A
2302 log_verbose("%s\n", test[i]);
2303 uStringLen = u_unescape(test[i], string, 256);
2304 ucol_setText(it, string, uStringLen, &status);
2305
2306 while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
2307 log_verbose("%08X\n", CE);
2308 }
2309 log_verbose("\n");
2310
2311 }
2312
2313 ucol_closeElements(it);
2314 ucol_close(coll);
2315 }
2316#endif
2317}
2318
2319static void TestPrefixCompose(void) {
374ca955 2320 const char* rule1 =
b75a7d8f
A
2321 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
2322 /*
2323 const char* test[] = {
374ca955 2324 "\\u30c6\\u30fc\\u30bf",
b75a7d8f
A
2325 "\\u30c6\\u30a7\\u30bf",
2326 };
2327 */
374ca955 2328 {
b75a7d8f
A
2329 UErrorCode status = U_ZERO_ERROR;
2330 /*uint32_t i = 0;*/
2331 /*UCollationElements *it = NULL;*/
2332/* uint32_t CE;*/
2333 UChar string[256];
2334 uint32_t uStringLen;
2335 UCollator *coll = NULL;
2336
2337 uStringLen = u_unescape(rule1, string, 256);
2338
2339 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2340 ucol_close(coll);
2341 }
2342
2343
2344}
2345
2346/*
374ca955
A
2347[last variable] last variable value
2348[last primary ignorable] largest CE for primary ignorable
2349[last secondary ignorable] largest CE for secondary ignorable
2350[last tertiary ignorable] largest CE for tertiary ignorable
2351[top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
b75a7d8f
A
2352*/
2353
2354static void TestRuleOptions(void) {
374ca955
A
2355 /* values here are hardcoded and are correct for the current UCA
2356 * when the UCA changes, one might be forced to change these
729e4ab9
A
2357 * values.
2358 */
2359
2360 /*
2361 * These strings contain the last character before [variable top]
2362 * and the first and second characters (by primary weights) after it.
2363 * See FractionalUCA.txt. For example:
2364 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
2365 [variable top = 0C FE]
2366 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
2367 and
2368 00B4; [0D 0C, 05, 05]
2369 *
2370 * Note: Starting with UCA 6.0, the [variable top] collation element
2371 * is not the weight of any character or string,
2372 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
2373 */
2374#define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
2375#define FIRST_REGULAR_CHAR_STRING "\\u0060"
2376#define SECOND_REGULAR_CHAR_STRING "\\u00B4"
2377
2378 /*
2379 * This string has to match the character that has the [last regular] weight
2380 * which changes with each UCA version.
2381 * See the bottom of FractionalUCA.txt which says something like
2382 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
2383 *
2384 * Note: Starting with UCA 6.0, the [last regular] collation element
2385 * is not the weight of any character or string,
2386 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
b75a7d8f 2387 */
729e4ab9
A
2388#define LAST_REGULAR_CHAR_STRING "\\U0001342E"
2389
46f4442e 2390 static const struct {
b75a7d8f 2391 const char *rules;
46f4442e 2392 const char *data[10];
b75a7d8f 2393 const uint32_t len;
374ca955 2394 } tests[] = {
57a6839d
A
2395#if 0
2396 /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
b75a7d8f 2397 /* - all befores here amount to zero */
374ca955
A
2398 { "&[before 3][first tertiary ignorable]<<<a",
2399 { "\\u0000", "a"}, 2
2400 }, /* you cannot go before first tertiary ignorable */
b75a7d8f 2401
374ca955
A
2402 { "&[before 3][last tertiary ignorable]<<<a",
2403 { "\\u0000", "a"}, 2
2404 }, /* you cannot go before last tertiary ignorable */
57a6839d
A
2405#endif
2406 /*
2407 * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
2408 * and it *is* possible to "go before" that.
2409 */
374ca955
A
2410 { "&[before 3][first secondary ignorable]<<<a",
2411 { "\\u0000", "a"}, 2
57a6839d 2412 },
b75a7d8f 2413
374ca955
A
2414 { "&[before 3][last secondary ignorable]<<<a",
2415 { "\\u0000", "a"}, 2
57a6839d 2416 },
b75a7d8f
A
2417
2418 /* 'normal' befores */
2419
57a6839d
A
2420 /*
2421 * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
2422 * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
2423 * because there is no tailoring space before that boundary.
2424 * Made the tests work by tailoring to a space instead.
2425 */
2426 { "&[before 3][first primary ignorable]<<<c<<<b &' '<a", /* was &[first primary ignorable]<a */
374ca955
A
2427 { "c", "b", "\\u0332", "a" }, 4
2428 },
b75a7d8f 2429
374ca955 2430 /* we don't have a code point that corresponds to
b75a7d8f
A
2431 * the last primary ignorable
2432 */
57a6839d 2433 { "&[before 3][last primary ignorable]<<<c<<<b &' '<a", /* was &[last primary ignorable]<a */
374ca955
A
2434 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5
2435 },
b75a7d8f 2436
374ca955
A
2437 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
2438 { "c", "b", "\\u0009", "a", "\\u000a" }, 5
2439 },
b75a7d8f 2440
374ca955 2441 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
729e4ab9 2442 { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
374ca955 2443 },
b75a7d8f
A
2444
2445 { "&[first regular]<a"
2446 "&[before 1][first regular]<b",
729e4ab9 2447 { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
374ca955 2448 },
b75a7d8f
A
2449
2450 { "&[before 1][last regular]<b"
2451 "&[last regular]<a",
729e4ab9 2452 { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
374ca955 2453 },
b75a7d8f
A
2454
2455 { "&[before 1][first implicit]<b"
2456 "&[first implicit]<a",
374ca955
A
2457 { "b", "\\u4e00", "a", "\\u4e01"}, 4
2458 },
57a6839d 2459#if 0 /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
b75a7d8f
A
2460 { "&[before 1][last implicit]<b"
2461 "&[last implicit]<a",
374ca955
A
2462 { "b", "\\U0010FFFD", "a" }, 3
2463 },
57a6839d 2464#endif
b75a7d8f 2465 { "&[last variable]<z"
57a6839d 2466 "&' '<x" /* was &[last primary ignorable]<x, see above */
b75a7d8f
A
2467 "&[last secondary ignorable]<<y"
2468 "&[last tertiary ignorable]<<<w"
2469 "&[top]<u",
729e4ab9 2470 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
374ca955 2471 }
b75a7d8f
A
2472
2473 };
2474 uint32_t i;
2475
2ca993e8 2476 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
b75a7d8f
A
2477 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2478 }
2479}
2480
2481
2482static void TestOptimize(void) {
374ca955
A
2483 /* this is not really a test - just trying out
2484 * whether copying of UCA contents will fail
2485 * Cannot really test, since the functionality
b75a7d8f
A
2486 * remains the same.
2487 */
46f4442e 2488 static const struct {
b75a7d8f 2489 const char *rules;
46f4442e 2490 const char *data[10];
b75a7d8f 2491 const uint32_t len;
374ca955 2492 } tests[] = {
b75a7d8f 2493 /* - all befores here amount to zero */
374ca955
A
2494 { "[optimize [\\uAC00-\\uD7FF]]",
2495 { "a", "b"}, 2}
b75a7d8f
A
2496 };
2497 uint32_t i;
2498
2ca993e8 2499 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
b75a7d8f
A
2500 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2501 }
2502}
2503
2504/*
374ca955
A
2505cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
2506weiv ucol_strcollIter?
2507cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
2508weiv these are the input strings?
2509cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
2510weiv will check - could be a problem with utf-8 iterator
2511cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
2512weiv hmmm
2513cycheng@ca.ibm.c... note that we have a standalone high surrogate
2514weiv that doesn't sound right
2515cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
2516weiv so you have two strings, you convert them to utf-8 and to utf-16BE
2517cycheng@ca.ibm.c... yes
2518weiv and then do the comparison
2519cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
2520weiv utf-16 strings look like a little endian ones in the example you sent me
2521weiv It could be a bug - let me try to test it out
2522cycheng@ca.ibm.c... ok
2523cycheng@ca.ibm.c... we can wait till the conf. call
2524cycheng@ca.ibm.c... next weke
2525weiv that would be great
2526weiv hmmm
2527weiv I might be wrong
2528weiv let me play with it some more
2529cycheng@ca.ibm.c... ok
2530cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be
2531cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
2532cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
2533weiv ok
2534cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
2535weiv thanks
2536cycheng@ca.ibm.c... the 4 strings we sent are just samples
b75a7d8f
A
2537*/
2538#if 0
2539static void Alexis(void) {
2540 UErrorCode status = U_ZERO_ERROR;
2541 UCollator *coll = ucol_open("", &status);
2542
2543
2544 const char utf16be[2][4] = {
2545 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
2546 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
2547 };
2548
2549 const char utf8[2][4] = {
2550 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
2551 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
2552 };
2553
2554 UCharIterator iterU161, iterU162;
2555 UCharIterator iterU81, iterU82;
2556
2557 UCollationResult resU16, resU8;
2558
2559 uiter_setUTF16BE(&iterU161, utf16be[0], 4);
2560 uiter_setUTF16BE(&iterU162, utf16be[1], 4);
2561
2562 uiter_setUTF8(&iterU81, utf8[0], 4);
2563 uiter_setUTF8(&iterU82, utf8[1], 4);
2564
2565 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2566
2567 resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
2568 resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
2569
2570
2571 if(resU16 != resU8) {
2572 log_err("different results\n");
2573 }
2574
2575 ucol_close(coll);
2576}
2577#endif
2578
2579#define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
2580static void Alexis2(void) {
2581 UErrorCode status = U_ZERO_ERROR;
2582 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2583 char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
374ca955 2584 char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
b75a7d8f
A
2585 int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
2586
2587 UConverter *conv = NULL;
2588
2589 UCharIterator U16BEItS, U16BEItT;
2590 UCharIterator U8ItS, U8ItT;
2591
2592 UCollationResult resU16, resU16BE, resU8;
2593
46f4442e 2594 static const char* const pairs[][2] = {
b75a7d8f
A
2595 { "\\ud800\\u0021", "\\uFFFC\\u0062"},
2596 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
2597 { "\\u0E40\\u0021", "\\u00A1\\u0021"},
2598 { "\\u0E40\\u0021", "\\uFE57\\u0062"},
2599 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
2600 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
2601 { "\\u0020", "\\u0020\\u0000"}
2602/*
26035F20 (my result here)
26045F204E008E3F
26055F20 (your result here)
2606*/
2607 };
2608
2609 int32_t i = 0;
2610
2611 UCollator *coll = ucol_open("", &status);
2612 if(status == U_FILE_ACCESS_ERROR) {
2613 log_data_err("Is your data around?\n");
2614 return;
2615 } else if(U_FAILURE(status)) {
2616 log_err("Error opening collator\n");
2617 return;
2618 }
2619 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2620 conv = ucnv_open("UTF16BE", &status);
2ca993e8 2621 for(i = 0; i < UPRV_LENGTHOF(pairs); i++) {
b75a7d8f
A
2622 U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2623 U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2624
2625 resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
2626
2627 log_verbose("Result of strcoll is %i\n", resU16);
2628
2629 U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
2630 U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
57a6839d
A
2631 (void)U16BELenS; /* Suppress set but not used warnings. */
2632 (void)U16BELenT;
b75a7d8f
A
2633
2634 /* use the original sizes, as the result from converter is in bytes */
2635 uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
2636 uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
2637
2638 resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
2639
2640 log_verbose("Result of U16BE is %i\n", resU16BE);
2641
2642 if(resU16 != resU16BE) {
2643 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
2644 }
2645
2646 u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
2647 u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
2648
2649 uiter_setUTF8(&U8ItS, U8Source, U8LenS);
2650 uiter_setUTF8(&U8ItT, U8Target, U8LenT);
2651
2652 resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
2653
2654 if(resU16 != resU8) {
2655 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
2656 }
2657
2658 }
2659
2660 ucol_close(coll);
2661 ucnv_close(conv);
2662}
2663
2664static void TestHebrewUCA(void) {
2665 UErrorCode status = U_ZERO_ERROR;
46f4442e 2666 static const char *first[] = {
b75a7d8f
A
2667 "d790d6b8d79cd795d6bcd7a9",
2668 "d790d79cd79ed7a7d799d799d7a1",
2669 "d790d6b4d79ed795d6bcd7a9",
2670 };
2671
2672 char utf8String[3][256];
2673 UChar utf16String[3][256];
2674
2675 int32_t i = 0, j = 0;
2676 int32_t sizeUTF8[3];
2677 int32_t sizeUTF16[3];
2678
2679 UCollator *coll = ucol_open("", &status);
46f4442e 2680 if (U_FAILURE(status)) {
729e4ab9 2681 log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
46f4442e
A
2682 return;
2683 }
b75a7d8f
A
2684 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
2685
2ca993e8 2686 for(i = 0; i < UPRV_LENGTHOF(first); i++) {
b75a7d8f
A
2687 sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
2688 u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
2689 log_verbose("%i: ");
2690 for(j = 0; j < sizeUTF16[i]; j++) {
2691 /*log_verbose("\\u%04X", utf16String[i][j]);*/
2692 log_verbose("%04X", utf16String[i][j]);
2693 }
2694 log_verbose("\n");
2695 }
2ca993e8
A
2696 for(i = 0; i < UPRV_LENGTHOF(first)-1; i++) {
2697 for(j = i + 1; j < UPRV_LENGTHOF(first); j++) {
b75a7d8f
A
2698 doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
2699 }
2700 }
2701
2702 ucol_close(coll);
2703
2704}
2705
2706static void TestPartialSortKeyTermination(void) {
46f4442e 2707 static const char* cases[] = {
b75a7d8f 2708 "\\u1234\\u1234\\udc00",
374ca955 2709 "\\udc00\\ud800\\ud800"
b75a7d8f
A
2710 };
2711
57a6839d 2712 int32_t i;
b75a7d8f
A
2713
2714 UErrorCode status = U_ZERO_ERROR;
2715
2716 UCollator *coll = ucol_open("", &status);
2717
2718 UCharIterator iter;
2719
2720 UChar currCase[256];
2721 int32_t length = 0;
2722 int32_t pKeyLen = 0;
2723
2724 uint8_t key[256];
2725
2ca993e8 2726 for(i = 0; i < UPRV_LENGTHOF(cases); i++) {
b75a7d8f
A
2727 uint32_t state[2] = {0, 0};
2728 length = u_unescape(cases[i], currCase, 256);
2729 uiter_setString(&iter, currCase, length);
2730 pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
57a6839d 2731 (void)pKeyLen; /* Suppress set but not used warning. */
b75a7d8f
A
2732
2733 log_verbose("Done\n");
2734
2735 }
2736 ucol_close(coll);
2737}
2738
2739static void TestSettings(void) {
46f4442e 2740 static const char* cases[] = {
b75a7d8f
A
2741 "apple",
2742 "Apple"
2743 };
2744
46f4442e 2745 static const char* locales[] = {
b75a7d8f
A
2746 "",
2747 "en"
2748 };
2749
2750 UErrorCode status = U_ZERO_ERROR;
2751
2752 int32_t i = 0, j = 0;
2753
2754 UChar source[256], target[256];
2755 int32_t sLen = 0, tLen = 0;
2756
2757 UCollator *collateObject = NULL;
2ca993e8 2758 for(i = 0; i < UPRV_LENGTHOF(locales); i++) {
b75a7d8f
A
2759 collateObject = ucol_open(locales[i], &status);
2760 ucol_setStrength(collateObject, UCOL_PRIMARY);
2761 ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
2ca993e8 2762 for(j = 1; j < UPRV_LENGTHOF(cases); j++) {
b75a7d8f
A
2763 sLen = u_unescape(cases[j-1], source, 256);
2764 source[sLen] = 0;
2765 tLen = u_unescape(cases[j], target, 256);
2766 source[tLen] = 0;
2767 doTest(collateObject, source, target, UCOL_EQUAL);
2768 }
2769 ucol_close(collateObject);
2770 }
2771}
2772
2773static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
46f4442e
A
2774 UErrorCode status = U_ZERO_ERROR;
2775 int32_t errorNo = 0;
57a6839d
A
2776 const UChar *sourceRules = NULL;
2777 int32_t sourceRulesLen = 0;
2778 UParseError parseError;
46f4442e 2779 UColAttributeValue french = UCOL_OFF;
46f4442e 2780
b75a7d8f 2781 if(!ucol_equals(source, target)) {
46f4442e
A
2782 log_err("Same collators, different address not equal\n");
2783 errorNo++;
b75a7d8f
A
2784 }
2785 ucol_close(target);
b331163b 2786 if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
57a6839d 2787 target = ucol_safeClone(source, NULL, NULL, &status);
46f4442e
A
2788 if(U_FAILURE(status)) {
2789 log_err("Error creating clone\n");
2790 errorNo++;
2791 return errorNo;
2792 }
2793 if(!ucol_equals(source, target)) {
2794 log_err("Collator different from it's clone\n");
2795 errorNo++;
2796 }
2797 french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
2798 if(french == UCOL_ON) {
2799 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
2800 } else {
2801 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
2802 }
2803 if(U_FAILURE(status)) {
2804 log_err("Error setting attributes\n");
2805 errorNo++;
2806 return errorNo;
2807 }
2808 if(ucol_equals(source, target)) {
2809 log_err("Collators same even when options changed\n");
2810 errorNo++;
2811 }
2812 ucol_close(target);
57a6839d 2813
46f4442e
A
2814 sourceRules = ucol_getRules(source, &sourceRulesLen);
2815 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2816 if(U_FAILURE(status)) {
57a6839d
A
2817 log_err("Error instantiating target from rules - %s\n", u_errorName(status));
2818 errorNo++;
2819 return errorNo;
46f4442e 2820 }
b331163b
A
2821 /* Note: The tailoring rule string is an optional data item. */
2822 if(!ucol_equals(source, target) && sourceRulesLen != 0) {
57a6839d
A
2823 log_err("Collator different from collator that was created from the same rules\n");
2824 errorNo++;
46f4442e
A
2825 }
2826 ucol_close(target);
b75a7d8f 2827 }
46f4442e 2828 return errorNo;
b75a7d8f
A
2829}
2830
2831
2832static void TestEquals(void) {
46f4442e 2833 /* ucol_equals is not currently a public API. There is a chance that it will become
b331163b 2834 * something like this.
46f4442e
A
2835 */
2836 /* test whether the two collators instantiated from the same locale are equal */
2837 UErrorCode status = U_ZERO_ERROR;
2838 UParseError parseError;
2839 int32_t noOfLoc = uloc_countAvailable();
2840 const char *locName = NULL;
2841 UCollator *source = NULL, *target = NULL;
2842 int32_t i = 0;
b75a7d8f 2843
46f4442e
A
2844 const char* rules[] = {
2845 "&l < lj <<< Lj <<< LJ",
2846 "&n < nj <<< Nj <<< NJ",
2847 "&ae <<< \\u00e4",
2848 "&AE <<< \\u00c4"
2849 };
2850 /*
2851 const char* badRules[] = {
b75a7d8f 2852 "&l <<< Lj",
46f4442e
A
2853 "&n < nj <<< nJ <<< NJ",
2854 "&a <<< \\u00e4",
2855 "&AE <<< \\u00c4 <<< x"
2856 };
2857 */
b75a7d8f 2858
46f4442e
A
2859 UChar sourceRules[1024], targetRules[1024];
2860 int32_t sourceRulesSize = 0, targetRulesSize = 0;
2ca993e8 2861 int32_t rulesSize = UPRV_LENGTHOF(rules);
b75a7d8f 2862
46f4442e
A
2863 for(i = 0; i < rulesSize; i++) {
2864 sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
2865 targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
2866 }
b75a7d8f 2867
46f4442e
A
2868 source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2869 if(status == U_FILE_ACCESS_ERROR) {
2870 log_data_err("Is your data around?\n");
2871 return;
2872 } else if(U_FAILURE(status)) {
2873 log_err("Error opening collator\n");
2874 return;
2875 }
2876 target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2877 if(!ucol_equals(source, target)) {
2878 log_err("Equivalent collators not equal!\n");
2879 }
2880 ucol_close(source);
2881 ucol_close(target);
b75a7d8f 2882
46f4442e
A
2883 source = ucol_open("root", &status);
2884 target = ucol_open("root", &status);
2885 log_verbose("Testing root\n");
2886 if(!ucol_equals(source, source)) {
2887 log_err("Same collator not equal\n");
2888 }
b331163b
A
2889 if(TestEqualsForCollator("root", source, target)) {
2890 log_err("Errors for root\n");
46f4442e
A
2891 }
2892 ucol_close(source);
2893
2894 for(i = 0; i<noOfLoc; i++) {
2895 status = U_ZERO_ERROR;
2896 locName = uloc_getAvailable(i);
2897 /*if(hasCollationElements(locName)) {*/
2898 log_verbose("Testing equality for locale %s\n", locName);
2899 source = ucol_open(locName, &status);
2900 target = ucol_open(locName, &status);
2901 if (U_FAILURE(status)) {
2902 log_err("Error opening collator for locale %s %s\n", locName, u_errorName(status));
2903 continue;
2904 }
2905 if(TestEqualsForCollator(locName, source, target)) {
2906 log_err("Errors for locale %s\n", locName);
2907 }
2908 ucol_close(source);
2909 /*}*/
2910 }
b75a7d8f
A
2911}
2912
2913static void TestJ2726(void) {
46f4442e
A
2914 UChar a[2] = { 0x61, 0x00 }; /*"a"*/
2915 UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
2916 UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
2917 UErrorCode status = U_ZERO_ERROR;
2918 UCollator *coll = ucol_open("en", &status);
2919 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
2920 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
2921 doTest(coll, a, aSpace, UCOL_EQUAL);
2922 doTest(coll, aSpace, a, UCOL_EQUAL);
2923 doTest(coll, a, spaceA, UCOL_EQUAL);
2924 doTest(coll, spaceA, a, UCOL_EQUAL);
2925 doTest(coll, spaceA, aSpace, UCOL_EQUAL);
2926 doTest(coll, aSpace, spaceA, UCOL_EQUAL);
2927 ucol_close(coll);
b75a7d8f
A
2928}
2929
374ca955 2930static void NullRule(void) {
46f4442e
A
2931 UChar r[3] = {0};
2932 UErrorCode status = U_ZERO_ERROR;
2933 UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2934 if(U_SUCCESS(status)) {
2935 log_err("This should have been an error!\n");
2936 ucol_close(coll);
2937 } else {
2938 status = U_ZERO_ERROR;
2939 }
2940 coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2941 if(U_FAILURE(status)) {
729e4ab9 2942 log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
46f4442e
A
2943 } else {
2944 ucol_close(coll);
2945 }
374ca955
A
2946}
2947
2948/**
2949 * Test for CollationElementIterator previous and next for the whole set of
2950 * unicode characters with normalization on.
2951 */
2952static void TestNumericCollation(void)
2953{
2954 UErrorCode status = U_ZERO_ERROR;
2955
2956 const static char *basicTestStrings[]={
2957 "hello1",
2958 "hello2",
2959 "hello2002",
2960 "hello2003",
2961 "hello123456",
2962 "hello1234567",
2963 "hello10000000",
2964 "hello100000000",
2965 "hello1000000000",
2966 "hello10000000000",
2967 };
2968
2969 const static char *preZeroTestStrings[]={
2970 "avery10000",
2971 "avery010000",
2972 "avery0010000",
2973 "avery00010000",
2974 "avery000010000",
2975 "avery0000010000",
2976 "avery00000010000",
2977 "avery000000010000",
2978 };
2979
2980 const static char *thirtyTwoBitNumericStrings[]={
2981 "avery42949672960",
2982 "avery42949672961",
2983 "avery42949672962",
2984 "avery429496729610"
46f4442e
A
2985 };
2986
2987 const static char *longNumericStrings[]={
2988 /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
2989 In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
2990 are treated as multiple collation elements. */
2991 "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
2992 "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
2993 "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
2994 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
2995 "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
2996 "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
2997 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
2998 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
2999 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
3000 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
374ca955
A
3001 };
3002
3003 const static char *supplementaryDigits[] = {
3004 "\\uD835\\uDFCE", /* 0 */
3005 "\\uD835\\uDFCF", /* 1 */
3006 "\\uD835\\uDFD0", /* 2 */
3007 "\\uD835\\uDFD1", /* 3 */
3008 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
3009 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
3010 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
3011 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
3012 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
3013 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
3014 };
3015
3016 const static char *foreignDigits[] = {
3017 "\\u0661",
3018 "\\u0662",
3019 "\\u0663",
3020 "\\u0661\\u0660",
3021 "\\u0661\\u0662",
3022 "\\u0661\\u0663",
3023 "\\u0662\\u0660",
3024 "\\u0662\\u0662",
3025 "\\u0662\\u0663",
3026 "\\u0663\\u0660",
3027 "\\u0663\\u0662",
3028 "\\u0663\\u0663"
3029 };
3030
3031 const static char *evenZeroes[] = {
3032 "2000",
3033 "2001",
3034 "2002",
3035 "2003"
3036 };
3037
3038 UColAttribute att = UCOL_NUMERIC_COLLATION;
3039 UColAttributeValue val = UCOL_ON;
3040
3041 /* Open our collator. */
3042 UCollator* coll = ucol_open("root", &status);
3043 if (U_FAILURE(status)){
729e4ab9 3044 log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
374ca955
A
3045 myErrorName(status));
3046 return;
3047 }
2ca993e8
A
3048 genericLocaleStarterWithOptions("root", basicTestStrings, UPRV_LENGTHOF(basicTestStrings), &att, &val, 1);
3049 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, UPRV_LENGTHOF(thirtyTwoBitNumericStrings), &att, &val, 1);
3050 genericLocaleStarterWithOptions("root", longNumericStrings, UPRV_LENGTHOF(longNumericStrings), &att, &val, 1);
3051 genericLocaleStarterWithOptions("en_US", foreignDigits, UPRV_LENGTHOF(foreignDigits), &att, &val, 1);
3052 genericLocaleStarterWithOptions("root", supplementaryDigits, UPRV_LENGTHOF(supplementaryDigits), &att, &val, 1);
3053 genericLocaleStarterWithOptions("root", evenZeroes, UPRV_LENGTHOF(evenZeroes), &att, &val, 1);
374ca955
A
3054
3055 /* Setting up our collator to do digits. */
3056 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
3057 if (U_FAILURE(status)){
3058 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
3059 myErrorName(status));
3060 return;
3061 }
3062
3063 /*
3064 Testing that prepended zeroes still yield the correct collation behavior.
3065 We expect that every element in our strings array will be equal.
3066 */
2ca993e8 3067 genericOrderingTestWithResult(coll, preZeroTestStrings, UPRV_LENGTHOF(preZeroTestStrings), UCOL_EQUAL);
374ca955
A
3068
3069 ucol_close(coll);
3070}
3071
3072static void TestTibetanConformance(void)
3073{
3074 const char* test[] = {
3075 "\\u0FB2\\u0591\\u0F71\\u0061",
3076 "\\u0FB2\\u0F71\\u0061"
3077 };
3078
3079 UErrorCode status = U_ZERO_ERROR;
3080 UCollator *coll = ucol_open("", &status);
3081 UChar source[100];
3082 UChar target[100];
3083 int result;
3084 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3085 if (U_SUCCESS(status)) {
3086 u_unescape(test[0], source, 100);
3087 u_unescape(test[1], target, 100);
3088 doTest(coll, source, target, UCOL_EQUAL);
3089 result = ucol_strcoll(coll, source, -1, target, -1);
3090 log_verbose("result %d\n", result);
3091 if (UCOL_EQUAL != result) {
3092 log_err("Tibetan comparison error\n");
3093 }
3094 }
3095 ucol_close(coll);
3096
3097 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
3098}
3099
3100static void TestPinyinProblem(void) {
3101 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
2ca993e8 3102 genericLocaleStarter("zh__PINYIN", test, UPRV_LENGTHOF(test));
374ca955
A
3103}
3104
374ca955
A
3105/**
3106 * Iterate through the given iterator, checking to see that all the strings
3107 * in the expected array are present.
3108 * @param expected array of strings we expect to see, or NULL
3109 * @param expectedCount number of elements of expected, or 0
3110 */
3111static int32_t checkUEnumeration(const char* msg,
3112 UEnumeration* iter,
3113 const char** expected,
3114 int32_t expectedCount) {
3115 UErrorCode ec = U_ZERO_ERROR;
3116 int32_t i = 0, n, j, bit;
3117 int32_t seenMask = 0;
3118
3119 U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
3120 n = uenum_count(iter, &ec);
3121 if (!assertSuccess("count", &ec)) return -1;
3122 log_verbose("%s = [", msg);
3123 for (;; ++i) {
3124 const char* s = uenum_next(iter, NULL, &ec);
3125 if (!assertSuccess("snext", &ec) || s == NULL) break;
3126 if (i != 0) log_verbose(",");
3127 log_verbose("%s", s);
3128 /* check expected list */
3129 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3130 if ((seenMask&bit) == 0 &&
3131 uprv_strcmp(s, expected[j]) == 0) {
3132 seenMask |= bit;
3133 break;
3134 }
3135 }
3136 }
3137 log_verbose("] (%d)\n", i);
3138 assertTrue("count verified", i==n);
3139 /* did we see all expected strings? */
3140 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3141 if ((seenMask&bit)!=0) {
3142 log_verbose("Ok: \"%s\" seen\n", expected[j]);
3143 } else {
3144 log_err("FAIL: \"%s\" not seen\n", expected[j]);
3145 }
3146 }
3147 return n;
3148}
3149
3150/**
3151 * Test new API added for separate collation tree.
3152 */
3153static void TestSeparateTrees(void) {
3154 UErrorCode ec = U_ZERO_ERROR;
3155 UEnumeration *e = NULL;
3156 int32_t n = -1;
3157 UBool isAvailable;
3158 char loc[256];
3159
3160 static const char* AVAIL[] = { "en", "de" };
3161
3162 static const char* KW[] = { "collation" };
3163
3164 static const char* KWVAL[] = { "phonebook", "stroke" };
3165
3166#if !UCONFIG_NO_SERVICE
3167 e = ucol_openAvailableLocales(&ec);
729e4ab9
A
3168 if (e != NULL) {
3169 assertSuccess("ucol_openAvailableLocales", &ec);
3170 assertTrue("ucol_openAvailableLocales!=0", e!=0);
2ca993e8 3171 n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, UPRV_LENGTHOF(AVAIL));
57a6839d 3172 (void)n; /* Suppress set but not used warnings. */
729e4ab9
A
3173 /* Don't need to check n because we check list */
3174 uenum_close(e);
3175 } else {
3176 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
3177 }
374ca955
A
3178#endif
3179
3180 e = ucol_getKeywords(&ec);
729e4ab9
A
3181 if (e != NULL) {
3182 assertSuccess("ucol_getKeywords", &ec);
3183 assertTrue("ucol_getKeywords!=0", e!=0);
2ca993e8 3184 n = checkUEnumeration("ucol_getKeywords", e, KW, UPRV_LENGTHOF(KW));
729e4ab9
A
3185 /* Don't need to check n because we check list */
3186 uenum_close(e);
3187 } else {
3188 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
3189 }
374ca955
A
3190
3191 e = ucol_getKeywordValues(KW[0], &ec);
729e4ab9
A
3192 if (e != NULL) {
3193 assertSuccess("ucol_getKeywordValues", &ec);
3194 assertTrue("ucol_getKeywordValues!=0", e!=0);
2ca993e8 3195 n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, UPRV_LENGTHOF(KWVAL));
729e4ab9
A
3196 /* Don't need to check n because we check list */
3197 uenum_close(e);
3198 } else {
3199 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
3200 }
374ca955
A
3201
3202 /* Try setting a warning before calling ucol_getKeywordValues */
3203 ec = U_USING_FALLBACK_WARNING;
3204 e = ucol_getKeywordValues(KW[0], &ec);
729e4ab9
A
3205 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
3206 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
2ca993e8 3207 n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, UPRV_LENGTHOF(KWVAL));
729e4ab9
A
3208 /* Don't need to check n because we check list */
3209 uenum_close(e);
3210 }
374ca955
A
3211
3212 /*
3213U_DRAFT int32_t U_EXPORT2
3214ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
3215 const char* locale, UBool* isAvailable,
3216 UErrorCode* status);
3217}
3218*/
729e4ab9 3219 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
374ca955 3220 &isAvailable, &ec);
729e4ab9 3221 if (assertSuccess("getFunctionalEquivalent", &ec)) {
57a6839d 3222 assertEquals("getFunctionalEquivalent(de)", "root", loc);
729e4ab9
A
3223 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
3224 isAvailable == TRUE);
3225 }
374ca955 3226
729e4ab9 3227 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
374ca955 3228 &isAvailable, &ec);
729e4ab9 3229 if (assertSuccess("getFunctionalEquivalent", &ec)) {
57a6839d
A
3230 assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
3231 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
3232 isAvailable == FALSE);
729e4ab9 3233 }
374ca955
A
3234}
3235
3236/* supercedes TestJ784 */
3237static void TestBeforePinyin(void) {
3238 const static char rules[] = {
3239 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
3240 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
3241 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
3242 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
3243 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
3244 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
3245 };
3246
3247 const static char *test[] = {
3248 "l\\u0101",
3249 "la",
3250 "l\\u0101n",
3251 "lan ",
3252 "l\\u0113",
3253 "le",
3254 "l\\u0113n",
3255 "len"
3256 };
3257
3258 const static char *test2[] = {
3259 "x\\u0101",
3260 "x\\u0100",
3261 "X\\u0101",
3262 "X\\u0100",
3263 "x\\u00E1",
3264 "x\\u00C1",
3265 "X\\u00E1",
3266 "X\\u00C1",
3267 "x\\u01CE",
3268 "x\\u01CD",
3269 "X\\u01CE",
3270 "X\\u01CD",
3271 "x\\u00E0",
3272 "x\\u00C0",
3273 "X\\u00E0",
3274 "X\\u00C0",
3275 "xa",
3276 "xA",
3277 "Xa",
3278 "XA",
3279 "x\\u0101x",
3280 "x\\u0100x",
3281 "x\\u00E1x",
3282 "x\\u00C1x",
3283 "x\\u01CEx",
3284 "x\\u01CDx",
3285 "x\\u00E0x",
3286 "x\\u00C0x",
3287 "xax",
3288 "xAx"
3289 };
3290
3d1f044b
A
3291 const static char *test3[] = { // rdar://53741390
3292 "\\u85CF", // 藏 cáng
3293 "\\u92BA", // 銺 zàng
3294 "\\u85CF\\u6587", // 藏文 zàngwén
3295 "\\u85CF\\u8BED", // 藏语 zàngyǔ
3296 "\\u81D3", // 臓 zàng
3297 };
3298
2ca993e8
A
3299 genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
3300 genericLocaleStarter("zh", test, UPRV_LENGTHOF(test));
3301 genericRulesStarter(rules, test2, UPRV_LENGTHOF(test2));
3302 genericLocaleStarter("zh", test2, UPRV_LENGTHOF(test2));
3d1f044b 3303 genericLocaleStarter("zh", test3, UPRV_LENGTHOF(test3));
374ca955
A
3304}
3305
3306static void TestBeforeTightening(void) {
46f4442e 3307 static const struct {
374ca955
A
3308 const char *rules;
3309 UErrorCode expectedStatus;
3310 } tests[] = {
3311 { "&[before 1]a<x", U_ZERO_ERROR },
3312 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
3313 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
3314 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
3315 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
3316 { "&[before 2]a<<x",U_ZERO_ERROR },
3317 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
3318 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
3319 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR },
3320 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR },
3321 { "&[before 3]a<<<x",U_ZERO_ERROR },
3322 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR },
3323 { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
3324 };
3325
3326 int32_t i = 0;
3327
3328 UErrorCode status = U_ZERO_ERROR;
3329 UChar rlz[RULE_BUFFER_LEN] = { 0 };
3330 uint32_t rlen = 0;
3331
3332 UCollator *coll = NULL;
3333
3334
2ca993e8 3335 for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
374ca955
A
3336 rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
3337 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3338 if(status != tests[i].expectedStatus) {
729e4ab9 3339 log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
374ca955
A
3340 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
3341 }
3342 ucol_close(coll);
3343 status = U_ZERO_ERROR;
3344 }
3345
3346}
3347
51004dcb 3348/*
374ca955
A
3349&m < a
3350&[before 1] a < x <<< X << q <<< Q < z
3351assert: m <<< M < x <<< X << q <<< Q < z < a < n
3352
3353&m < a
3354&[before 2] a << x <<< X << q <<< Q < z
3355assert: m <<< M < x <<< X << q <<< Q << a < z < n
3356
3357&m < a
3358&[before 3] a <<< x <<< X << q <<< Q < z
3359assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
3360
3361
3362&m << a
3363&[before 1] a < x <<< X << q <<< Q < z
3364assert: x <<< X << q <<< Q < z < m <<< M << a < n
3365
3366&m << a
3367&[before 2] a << x <<< X << q <<< Q < z
3368assert: m <<< M << x <<< X << q <<< Q << a < z < n
3369
3370&m << a
3371&[before 3] a <<< x <<< X << q <<< Q < z
3372assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
3373
3374
3375&m <<< a
3376&[before 1] a < x <<< X << q <<< Q < z
3377assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
3378
3379&m <<< a
3380&[before 2] a << x <<< X << q <<< Q < z
3381assert: x <<< X << q <<< Q << m <<< a <<< M < z < n
3382
3383&m <<< a
3384&[before 3] a <<< x <<< X << q <<< Q < z
3385assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n
3386
3387
3388&[before 1] s < x <<< X << q <<< Q < z
3389assert: r <<< R < x <<< X << q <<< Q < z < s < n
3390
3391&[before 2] s << x <<< X << q <<< Q < z
3392assert: r <<< R < x <<< X << q <<< Q << s < z < n
3393
3394&[before 3] s <<< x <<< X << q <<< Q < z
3395assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
3396
3397
3398&[before 1] \u24DC < x <<< X << q <<< Q < z
3399assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
3400
3401&[before 2] \u24DC << x <<< X << q <<< Q < z
3402assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
3403
3404&[before 3] \u24DC <<< x <<< X << q <<< Q < z
3405assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n
51004dcb 3406*/
374ca955
A
3407
3408
3409#if 0
3410/* requires features not yet supported */
3411static void TestMoreBefore(void) {
46f4442e 3412 static const struct {
374ca955 3413 const char* rules;
46f4442e 3414 const char* order[16];
374ca955
A
3415 int32_t size;
3416 } tests[] = {
3417 { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
3418 { "m","M","x","X","q","Q","z","a","n" }, 9},
3419 { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
3420 { "m","M","x","X","q","Q","a","z","n" }, 9},
3421 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
3422 { "m","M","x","X","a","q","Q","z","n" }, 9},
3423 { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
3424 { "x","X","q","Q","z","m","M","a","n" }, 9},
3425 { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
3426 { "m","M","x","X","q","Q","a","z","n" }, 9},
3427 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
3428 { "m","M","x","X","a","q","Q","z","n" }, 9},
3429 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
3430 { "x","X","q","Q","z","n","m","a","M" }, 9},
3431 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
3432 { "x","X","q","Q","m","a","M","z","n" }, 9},
3433 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
3434 { "m","x","X","a","M","q","Q","z","n" }, 9},
3435 { "&[before 1] s < x <<< X << q <<< Q < z",
3436 { "r","R","x","X","q","Q","z","s","n" }, 9},
3437 { "&[before 2] s << x <<< X << q <<< Q < z",
3438 { "r","R","x","X","q","Q","s","z","n" }, 9},
3439 { "&[before 3] s <<< x <<< X << q <<< Q < z",
3440 { "r","R","x","X","s","q","Q","z","n" }, 9},
3441 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
3442 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
3443 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
3444 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
3445 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
3446 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
3447 };
3448
3449 int32_t i = 0;
3450
2ca993e8 3451 for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
374ca955
A
3452 genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
3453 }
3454}
3455#endif
3456
3457static void TestTailorNULL( void ) {
3458 const static char* rule = "&a <<< '\\u0000'";
3459 UErrorCode status = U_ZERO_ERROR;
3460 UChar rlz[RULE_BUFFER_LEN] = { 0 };
3461 uint32_t rlen = 0;
3462 UChar a = 1, null = 0;
3463 UCollationResult res = UCOL_EQUAL;
3464
3465 UCollator *coll = NULL;
3466
3467
3468 rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
3469 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
73c04bcf
A
3470
3471 if(U_FAILURE(status)) {
729e4ab9 3472 log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
73c04bcf
A
3473 } else {
3474 res = ucol_strcoll(coll, &a, 1, &null, 1);
3475
3476 if(res != UCOL_LESS) {
3477 log_err("NULL was not tailored properly!\n");
3478 }
374ca955 3479 }
73c04bcf 3480
374ca955
A
3481 ucol_close(coll);
3482}
b75a7d8f 3483
73c04bcf
A
3484static void
3485TestUpperFirstQuaternary(void)
3486{
3487 const char* tests[] = { "B", "b", "Bb", "bB" };
3488 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
3489 UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
2ca993e8 3490 genericLocaleStarterWithOptions("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att));
73c04bcf
A
3491}
3492
3493static void
3494TestJ4960(void)
3495{
3496 const char* tests[] = { "\\u00e2T", "aT" };
3497 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
3498 UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
3499 const char* tests2[] = { "a", "A" };
3500 const char* rule = "&[first tertiary ignorable]=A=a";
3501 UColAttribute att2[] = { UCOL_CASE_LEVEL };
3502 UColAttributeValue attVals2[] = { UCOL_ON };
3503 /* Test whether we correctly ignore primary ignorables on case level when */
3504 /* we have only primary & case level */
2ca993e8 3505 genericLocaleStarterWithOptionsAndResult("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att), UCOL_EQUAL);
73c04bcf
A
3506 /* Test whether ICU4J will make case level for sortkeys that have primary strength */
3507 /* and case level */
2ca993e8 3508 genericLocaleStarterWithOptions("root", tests2, UPRV_LENGTHOF(tests2), att, attVals, UPRV_LENGTHOF(att));
73c04bcf 3509 /* Test whether completely ignorable letters have case level info (they shouldn't) */
2ca993e8 3510 genericRulesStarterWithOptionsAndResult(rule, tests2, UPRV_LENGTHOF(tests2), att2, attVals2, UPRV_LENGTHOF(att2), UCOL_EQUAL);
73c04bcf
A
3511}
3512
3513static void
3514TestJ5223(void)
3515{
3516 static const char *test = "this is a test string";
3517 UChar ustr[256];
3518 int32_t ustr_length = u_unescape(test, ustr, 256);
3519 unsigned char sortkey[256];
3520 int32_t sortkey_length;
3521 UErrorCode status = U_ZERO_ERROR;
3522 static UCollator *coll = NULL;
3523 coll = ucol_open("root", &status);
3524 if(U_FAILURE(status)) {
729e4ab9 3525 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
73c04bcf
A
3526 return;
3527 }
3528 ucol_setStrength(coll, UCOL_PRIMARY);
3529 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
3530 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3531 if (U_FAILURE(status)) {
3532 log_err("Failed setting atributes\n");
3533 return;
729e4ab9 3534 }
73c04bcf
A
3535 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
3536 if (sortkey_length > 256) return;
3537
3538 /* we mark the position where the null byte should be written in advance */
3539 sortkey[sortkey_length-1] = 0xAA;
3540
3541 /* we set the buffer size one byte higher than needed */
3542 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3543 sortkey_length+1);
3544
3545 /* no error occurs (for me) */
3546 if (sortkey[sortkey_length-1] == 0xAA) {
3547 log_err("Hit bug at first try\n");
3548 }
3549
3550 /* we mark the position where the null byte should be written again */
3551 sortkey[sortkey_length-1] = 0xAA;
3552
3553 /* this time we set the buffer size to the exact amount needed */
3554 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3555 sortkey_length);
3556
3557 /* now the trailing null byte is not written */
3558 if (sortkey[sortkey_length-1] == 0xAA) {
3559 log_err("Hit bug at second try\n");
3560 }
3561
3562 ucol_close(coll);
3563}
3564
3565/* Regression test for Thai partial sort key problem */
3566static void
3567TestJ5232(void)
3568{
3569 const static char *test[] = {
3570 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
3571 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
3572 };
729e4ab9 3573
2ca993e8 3574 genericLocaleStarter("th", test, UPRV_LENGTHOF(test));
73c04bcf
A
3575}
3576
46f4442e
A
3577static void
3578TestJ5367(void)
3579{
3580 const static char *test[] = { "a", "y" };
3581 const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
2ca993e8 3582 genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
46f4442e
A
3583}
3584
3585static void
3586TestVI5913(void)
3587{
3588 UErrorCode status = U_ZERO_ERROR;
3589 int32_t i, j;
3590 UCollator *coll =NULL;
3591 uint8_t resColl[100], expColl[100];
3592 int32_t rLen, tLen, ruleLen, sLen, kLen;
57a6839d 3593 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &b<0x1FF3-omega with Ypogegrammeni*/
46f4442e 3594 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/
57a6839d
A
3595 /*
3596 * Note: Just tailoring &z<ae^ does not work as expected:
3597 * The UCA spec requires for discontiguous contractions that they
3598 * extend an *existing match* by one combining mark at a time.
3599 * Therefore, ae must be a contraction so that the builder finds
3600 * discontiguous contractions for ae^, for example with an intervening underdot.
3601 * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
3602 */
3603 UChar rule3[256]={
3604 0x26, 0x78, 0x3c, 0x61, 0x65, /* &x<ae */
3605 0x26, 0x7a, 0x3c, 0x0061, 0x00ea, /* &z<a+e with circumflex.*/
3606 0};
46f4442e
A
3607 static const UChar tData[][20]={
3608 {0x1EAC, 0},
3609 {0x0041, 0x0323, 0x0302, 0},
3610 {0x1EA0, 0x0302, 0},
3611 {0x00C2, 0x0323, 0},
3612 {0x1ED8, 0}, /* O with dot and circumflex */
3613 {0x1ECC, 0x0302, 0},
3614 {0x1EB7, 0},
3615 {0x1EA1, 0x0306, 0},
3616 };
3617 static const UChar tailorData[][20]={
3618 {0x1FA2, 0}, /* Omega with 3 combining marks */
3619 {0x03C9, 0x0313, 0x0300, 0x0345, 0},
3620 {0x1FF3, 0x0313, 0x0300, 0},
3621 {0x1F60, 0x0300, 0x0345, 0},
3622 {0x1F62, 0x0345, 0},
3623 {0x1FA0, 0x0300, 0},
3624 };
3625 static const UChar tailorData2[][20]={
3626 {0x1E63, 0x030C, 0}, /* s with dot below + caron */
3627 {0x0073, 0x0323, 0x030C, 0},
3628 {0x0073, 0x030C, 0x0323, 0},
3629 };
3630 static const UChar tailorData3[][20]={
3631 {0x007a, 0}, /* z */
3632 {0x0061, 0x0065, 0}, /* a + e */
3633 {0x0061, 0x00ea, 0}, /* a + e with circumflex */
3634 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */
3635 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
3636 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot below */
3637 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */
3638 {0x00EA, 0}, /* e with circumflex */
3639 };
73c04bcf 3640
46f4442e
A
3641 /* Test Vietnamese sort. */
3642 coll = ucol_open("vi", &status);
3643 if(U_FAILURE(status)) {
729e4ab9 3644 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
46f4442e
A
3645 return;
3646 }
3647 log_verbose("\n\nVI collation:");
3648 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
3649 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3650 }
3651 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
3652 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3653 }
3654 if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
3655 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
3656 }
3657 if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
3658 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3659 }
3660
3661 for (j=0; j<8; j++) {
3662 tLen = u_strlen(tData[j]);
3663 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
3664 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3665 for(i = 0; i<rLen; i++) {
3666 log_verbose(" %02X", resColl[i]);
3667 }
3668 }
3669
3670 ucol_close(coll);
3671
3672 /* Test Romanian sort. */
3673 coll = ucol_open("ro", &status);
3674 log_verbose("\n\nRO collation:");
3675 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
3676 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3677 }
3678 if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
3679 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3680 }
3681 if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
3682 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3683 }
3684
3685 for (j=4; j<8; j++) {
3686 tLen = u_strlen(tData[j]);
3687 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
3688 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3689 for(i = 0; i<rLen; i++) {
3690 log_verbose(" %02X", resColl[i]);
3691 }
3692 }
3693 ucol_close(coll);
3694
3695 /* Test the precomposed Greek character with 3 combining marks. */
3696 log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
3697 ruleLen = u_strlen(rule);
3698 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3699 if (U_FAILURE(status)) {
3700 log_err("ucol_openRules failed with %s\n", u_errorName(status));
3701 return;
3702 }
3703 sLen = u_strlen(tailorData[0]);
3704 for (j=1; j<6; j++) {
3705 tLen = u_strlen(tailorData[j]);
3706 if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen)) {
3707 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
3708 }
3709 }
3710 /* Test getSortKey. */
3711 tLen = u_strlen(tailorData[0]);
3712 kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
3713 for (j=0; j<6; j++) {
3714 tLen = u_strlen(tailorData[j]);
3715 rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
3716 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3717 log_err("\n Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
3718 for(i = 0; i<rLen; i++) {
3719 log_err(" %02X", resColl[i]);
3720 }
3721 }
3722 }
3723 ucol_close(coll);
3724
3725 log_verbose("\n\nTailoring test for s with caron:");
3726 ruleLen = u_strlen(rule2);
3727 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3728 tLen = u_strlen(tailorData2[0]);
3729 kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
3730 for (j=1; j<3; j++) {
3731 tLen = u_strlen(tailorData2[j]);
3732 rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
3733 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3734 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
3735 for(i = 0; i<rLen; i++) {
3736 log_err(" %02X", resColl[i]);
3737 }
3738 }
3739 }
3740 ucol_close(coll);
3741
3742 log_verbose("\n\nTailoring test for &z< ae with circumflex:");
3743 ruleLen = u_strlen(rule3);
3744 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3745 tLen = u_strlen(tailorData3[3]);
3746 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
57a6839d
A
3747 log_verbose("\n Test Data[3] :%s \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen);
3748 for(i = 0; i<kLen; i++) {
3749 log_verbose(" %02X", expColl[i]);
3750 }
46f4442e
A
3751 for (j=4; j<6; j++) {
3752 tLen = u_strlen(tailorData3[j]);
3753 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
3754
3755 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
57a6839d 3756 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
46f4442e
A
3757 for(i = 0; i<rLen; i++) {
3758 log_err(" %02X", resColl[i]);
3759 }
3760 }
3761
57a6839d 3762 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
46f4442e
A
3763 for(i = 0; i<rLen; i++) {
3764 log_verbose(" %02X", resColl[i]);
3765 }
3766 }
3767 ucol_close(coll);
3768}
3769
3770static void
3771TestTailor6179(void)
3772{
3773 UErrorCode status = U_ZERO_ERROR;
3774 int32_t i;
3775 UCollator *coll =NULL;
3776 uint8_t resColl[100];
3777 int32_t rLen, tLen, ruleLen;
3778 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */
4388f060
A
3779 static const UChar rule1[]={
3780 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
46f4442e
A
3781 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
3782 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
3783 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
3784 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
4388f060
A
3785 static const UChar rule2[]={
3786 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
46f4442e
A
3787 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
3788 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
3789 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
3790 0x3C,0x3C,0x20,0x62,0};
3791
4388f060 3792 static const UChar tData1[][4]={
46f4442e
A
3793 {0x61, 0},
3794 {0x62, 0},
3795 { 0xFDD0,0x009E, 0}
3796 };
4388f060
A
3797 static const UChar tData2[][4]={
3798 {0x61, 0},
3799 {0x62, 0},
3800 { 0xFDD0,0x009E, 0}
46f4442e
A
3801 };
3802
729e4ab9
A
3803 /*
3804 * These values from FractionalUCA.txt will change,
3805 * and need to be updated here.
57a6839d
A
3806 * TODO: Make this not check for particular sort keys.
3807 * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
729e4ab9 3808 */
57a6839d
A
3809 static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};
3810 static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};
3811 static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};
3812 static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};
3813
3814 UParseError parseError;
46f4442e
A
3815
3816 /* Test [Last Primary ignorable] */
729e4ab9 3817
4388f060 3818 log_verbose("Tailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b\n");
46f4442e
A
3819 ruleLen = u_strlen(rule1);
3820 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3821 if (U_FAILURE(status)) {
729e4ab9 3822 log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
46f4442e
A
3823 return;
3824 }
3825 tLen = u_strlen(tData1[0]);
3826 rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
2ca993e8 3827 if (rLen != UPRV_LENGTHOF(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
4388f060 3828 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData1[0], rLen);
46f4442e
A
3829 for(i = 0; i<rLen; i++) {
3830 log_err(" %02X", resColl[i]);
3831 }
4388f060 3832 log_err("\n");
46f4442e
A
3833 }
3834 tLen = u_strlen(tData1[1]);
3835 rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
2ca993e8 3836 if (rLen != UPRV_LENGTHOF(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
4388f060 3837 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData1[1], rLen);
46f4442e
A
3838 for(i = 0; i<rLen; i++) {
3839 log_err(" %02X", resColl[i]);
3840 }
4388f060 3841 log_err("\n");
46f4442e
A
3842 }
3843 ucol_close(coll);
729e4ab9 3844
46f4442e
A
3845
3846 /* Test [Last Secondary ignorable] */
4388f060 3847 log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b\n");
57a6839d
A
3848 ruleLen = u_strlen(rule2);
3849 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);
46f4442e 3850 if (U_FAILURE(status)) {
4388f060 3851 log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
57a6839d
A
3852 log_info(" offset=%d \"%s\" | \"%s\"\n",
3853 parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1));
46f4442e
A
3854 return;
3855 }
3856 tLen = u_strlen(tData2[0]);
3857 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
2ca993e8 3858 if (rLen != UPRV_LENGTHOF(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
4388f060 3859 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData2[0], rLen);
46f4442e
A
3860 for(i = 0; i<rLen; i++) {
3861 log_err(" %02X", resColl[i]);
3862 }
4388f060 3863 log_err("\n");
46f4442e
A
3864 }
3865 tLen = u_strlen(tData2[1]);
3866 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
2ca993e8 3867 if (rLen != UPRV_LENGTHOF(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
57a6839d
A
3868 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen);
3869 for(i = 0; i<rLen; i++) {
3870 log_err(" %02X", resColl[i]);
3871 }
3872 log_err("\n");
46f4442e
A
3873 }
3874 ucol_close(coll);
3875}
3876
3877static void
3878TestUCAPrecontext(void)
3879{
3880 UErrorCode status = U_ZERO_ERROR;
3881 int32_t i, j;
3882 UCollator *coll =NULL;
3883 uint8_t resColl[100], prevColl[100];
3884 int32_t rLen, tLen, ruleLen;
3885 UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
729e4ab9 3886 UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
46f4442e 3887 /* & l middle-dot << a a is an expansion. */
729e4ab9 3888
46f4442e
A
3889 UChar tData1[][20]={
3890 { 0xb7, 0}, /* standalone middle dot(0xb7) */
3891 { 0x387, 0}, /* standalone middle dot(0x387) */
3892 { 0x61, 0}, /* a */
3893 { 0x6C, 0}, /* l */
729e4ab9 3894 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */
46f4442e
A
3895 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */
3896 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
3897 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */
3898 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
3899 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */
3900 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */
3901 };
729e4ab9 3902
46f4442e
A
3903 log_verbose("\n\nEN collation:");
3904 coll = ucol_open("en", &status);
3905 if (U_FAILURE(status)) {
729e4ab9 3906 log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
46f4442e
A
3907 return;
3908 }
3909 for (j=0; j<11; j++) {
3910 tLen = u_strlen(tData1[j]);
3911 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3912 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
729e4ab9 3913 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
46f4442e
A
3914 j, tData1[j]);
3915 }
3916 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
3917 for(i = 0; i<rLen; i++) {
3918 log_verbose(" %02X", resColl[i]);
3919 }
3920 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3921 }
3922 ucol_close(coll);
729e4ab9
A
3923
3924
46f4442e
A
3925 log_verbose("\n\nJA collation:");
3926 coll = ucol_open("ja", &status);
3927 if (U_FAILURE(status)) {
3928 log_err("Tailoring test: &z <<a|- failed!");
3929 return;
3930 }
3931 for (j=0; j<11; j++) {
3932 tLen = u_strlen(tData1[j]);
3933 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3934 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
729e4ab9 3935 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
46f4442e
A
3936 j, tData1[j]);
3937 }
3938 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
3939 for(i = 0; i<rLen; i++) {
3940 log_verbose(" %02X", resColl[i]);
3941 }
3942 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3943 }
3944 ucol_close(coll);
729e4ab9 3945
46f4442e
A
3946
3947 log_verbose("\n\nTailoring test: & middle dot < a ");
3948 ruleLen = u_strlen(rule1);
3949 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3950 if (U_FAILURE(status)) {
3951 log_err("Tailoring test: & middle dot < a failed!");
3952 return;
3953 }
3954 for (j=0; j<11; j++) {
3955 tLen = u_strlen(tData1[j]);
3956 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3957 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
729e4ab9 3958 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
46f4442e
A
3959 j, tData1[j]);
3960 }
3961 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
3962 for(i = 0; i<rLen; i++) {
3963 log_verbose(" %02X", resColl[i]);
3964 }
3965 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3966 }
3967 ucol_close(coll);
729e4ab9 3968
46f4442e
A
3969
3970 log_verbose("\n\nTailoring test: & l middle-dot << a ");
3971 ruleLen = u_strlen(rule2);
3972 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3973 if (U_FAILURE(status)) {
3974 log_err("Tailoring test: & l middle-dot << a failed!");
3975 return;
3976 }
3977 for (j=0; j<11; j++) {
3978 tLen = u_strlen(tData1[j]);
3979 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3980 if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
729e4ab9 3981 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
46f4442e
A
3982 j, tData1[j]);
3983 }
3984 if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
729e4ab9 3985 log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
46f4442e
A
3986 j, tData1[j]);
3987 }
3988 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
3989 for(i = 0; i<rLen; i++) {
3990 log_verbose(" %02X", resColl[i]);
3991 }
3992 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3993 }
3994 ucol_close(coll);
3995}
3996
3997static void
3998TestOutOfBuffer5468(void)
3999{
4000 static const char *test = "\\u4e00";
4001 UChar ustr[256];
4002 int32_t ustr_length = u_unescape(test, ustr, 256);
4003 unsigned char shortKeyBuf[1];
4004 int32_t sortkey_length;
4005 UErrorCode status = U_ZERO_ERROR;
4006 static UCollator *coll = NULL;
729e4ab9 4007
46f4442e
A
4008 coll = ucol_open("root", &status);
4009 if(U_FAILURE(status)) {
729e4ab9 4010 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
46f4442e
A
4011 return;
4012 }
4013 ucol_setStrength(coll, UCOL_PRIMARY);
4014 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4015 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4016 if (U_FAILURE(status)) {
4017 log_err("Failed setting atributes\n");
4018 return;
729e4ab9
A
4019 }
4020
46f4442e
A
4021 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
4022 if (sortkey_length != 4) {
4023 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length);
4024 }
4025 log_verbose("length of sortKey is %d", sortkey_length);
4026 ucol_close(coll);
4027}
4028
4029#define TSKC_DATA_SIZE 5
4030#define TSKC_BUF_SIZE 50
4031static void
4032TestSortKeyConsistency(void)
4033{
4034 UErrorCode icuRC = U_ZERO_ERROR;
4035 UCollator* ucol;
4036 UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
4037
4038 uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4039 uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4040 int32_t i, j, i2;
4041
4042 ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
4043 if (U_FAILURE(icuRC))
4044 {
729e4ab9 4045 log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
46f4442e
A
4046 return;
4047 }
4048
4049 for (i = 0; i < TSKC_DATA_SIZE; i++)
4050 {
4051 UCharIterator uiter;
4052 uint32_t state[2] = { 0, 0 };
4053 int32_t dataLen = i+1;
4054 for (j=0; j<TSKC_BUF_SIZE; j++)
4055 bufFull[i][j] = bufPart[i][j] = 0;
4056
4057 /* Full sort key */
4058 ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
4059
4060 /* Partial sort key */
4061 uiter_setString(&uiter, data, dataLen);
4062 ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
4063 if (U_FAILURE(icuRC))
4064 {
4065 log_err("ucol_nextSortKeyPart failed\n");
4066 ucol_close(ucol);
4067 return;
4068 }
4069
4070 for (i2=0; i2<i; i2++)
4071 {
4072 UBool fullMatch = TRUE;
4073 UBool partMatch = TRUE;
4074 for (j=0; j<TSKC_BUF_SIZE; j++)
4075 {
4076 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
4077 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
4078 }
4079 if (fullMatch != partMatch) {
4080 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
4081 : "partial key was consistent, but full key changed\n");
4082 ucol_close(ucol);
4083 return;
4084 }
4085 }
4086 }
4087
4088 /*=============================================*/
4089 ucol_close(ucol);
4090}
4091
4092/* ticket: 6101 */
4093static void TestCroatianSortKey(void) {
4094 const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
4095 UErrorCode status = U_ZERO_ERROR;
4096 UCollator *ucol;
4097 UCharIterator iter;
4098
4099 static const UChar text[] = { 0x0044, 0xD81A };
4100
2ca993e8 4101 size_t length = UPRV_LENGTHOF(text);
46f4442e
A
4102
4103 uint8_t textSortKey[32];
4104 size_t lenSortKey = 32;
4105 size_t actualSortKeyLen;
4106 uint32_t uStateInfo[2] = { 0, 0 };
4107
4108 ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
4109 if (U_FAILURE(status)) {
729e4ab9 4110 log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
46f4442e
A
4111 return;
4112 }
4113
3d1f044b 4114 uiter_setString(&iter, text, (int32_t)length);
46f4442e
A
4115
4116 actualSortKeyLen = ucol_nextSortKeyPart(
4117 ucol, &iter, (uint32_t*)uStateInfo,
3d1f044b 4118 textSortKey, (int32_t)lenSortKey, &status
46f4442e
A
4119 );
4120
4121 if (actualSortKeyLen == lenSortKey) {
4122 log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
4123 }
4124
4125 ucol_close(ucol);
4126}
4127
4128/* ticket: 6140 */
4129/* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
729e4ab9 4130 * they are both Hiragana and Katakana
46f4442e
A
4131 */
4132#define SORTKEYLEN 50
4133static void TestHiragana(void) {
4134 UErrorCode status = U_ZERO_ERROR;
4135 UCollator* ucol;
4136 UCollationResult strcollresult;
4137 UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
4138 UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
2ca993e8
A
4139 int32_t data1Len = UPRV_LENGTHOF(data1);
4140 int32_t data2Len = UPRV_LENGTHOF(data2);
46f4442e
A
4141 int32_t i, j;
4142 uint8_t sortKey1[SORTKEYLEN];
4143 uint8_t sortKey2[SORTKEYLEN];
4144
4145 UCharIterator uiter1;
4146 UCharIterator uiter2;
4147 uint32_t state1[2] = { 0, 0 };
4148 uint32_t state2[2] = { 0, 0 };
4149 int32_t keySize1;
4150 int32_t keySize2;
4151
4152 ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
4153 &status);
4154 if (U_FAILURE(status)) {
729e4ab9 4155 log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
46f4442e
A
4156 return;
4157 }
4158
4159 /* Start of full sort keys */
4160 /* Full sort key1 */
4161 keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
4162 /* Full sort key2 */
4163 keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
4164 if (keySize1 == keySize2) {
4165 for (i = 0; i < keySize1; i++) {
4166 if (sortKey1[i] != sortKey2[i]) {
4167 log_err("Full sort keys are different. Should be equal.");
4168 }
4169 }
4170 } else {
4171 log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
4172 }
4173 /* End of full sort keys */
4174
4175 /* Start of partial sort keys */
4176 /* Partial sort key1 */
4177 uiter_setString(&uiter1, data1, data1Len);
4178 keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
4179 /* Partial sort key2 */
4180 uiter_setString(&uiter2, data2, data2Len);
4181 keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
4182 if (U_SUCCESS(status) && keySize1 == keySize2) {
4183 for (j = 0; j < keySize1; j++) {
4184 if (sortKey1[j] != sortKey2[j]) {
4185 log_err("Partial sort keys are different. Should be equal");
4186 }
4187 }
4188 } else {
4189 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
4190 }
4191 /* End of partial sort keys */
4192
4193 /* Start of strcoll */
4194 /* Use ucol_strcoll() to determine ordering */
4195 strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
4196 if (strcollresult != UCOL_EQUAL) {
4197 log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
4198 }
729e4ab9 4199
46f4442e
A
4200 ucol_close(ucol);
4201}
73c04bcf 4202
729e4ab9
A
4203/* Convenient struct for running collation tests */
4204typedef struct {
4205 const UChar source[MAX_TOKEN_LEN]; /* String on left */
4206 const UChar target[MAX_TOKEN_LEN]; /* String on right */
4207 UCollationResult result; /* -1, 0 or +1, depending on collation */
4208} OneTestCase;
4209
4210/*
4211 * Utility function to test one collation test case.
4212 * @param testcases Array of test cases.
4213 * @param n_testcases Size of the array testcases.
4214 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats.
4215 * @param n_rules Size of the array str_rules.
4216 */
4217static void doTestOneTestCase(const OneTestCase testcases[],
4218 int n_testcases,
4219 const char* str_rules[],
4220 int n_rules)
4221{
4222 int rule_no, testcase_no;
4223 UChar rule[500];
4224 int32_t length = 0;
4225 UErrorCode status = U_ZERO_ERROR;
4226 UParseError parse_error;
4227 UCollator *myCollation;
4228
4229 for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4230
4231 length = u_unescape(str_rules[rule_no], rule, 500);
4232 if (length == 0) {
4233 log_err("ERROR: The rule cannot be unescaped: %s\n");
4234 return;
4235 }
4236 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4237 if(U_FAILURE(status)){
4238 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
57a6839d
A
4239 log_info(" offset=%d \"%s\" | \"%s\"\n",
4240 parse_error.offset,
4241 aescstrdup(parse_error.preContext, -1),
4242 aescstrdup(parse_error.postContext, -1));
729e4ab9
A
4243 return;
4244 }
4245 log_verbose("Testing the <<* syntax\n");
4246 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4247 ucol_setStrength(myCollation, UCOL_TERTIARY);
4248 for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
4249 doTest(myCollation,
4250 testcases[testcase_no].source,
4251 testcases[testcase_no].target,
4252 testcases[testcase_no].result
4253 );
4254 }
4255 ucol_close(myCollation);
4256 }
4257}
4258
4259const static OneTestCase rangeTestcases[] = {
4260 { {0x0061}, {0x0062}, UCOL_LESS }, /* "a" < "b" */
4261 { {0x0062}, {0x0063}, UCOL_LESS }, /* "b" < "c" */
4262 { {0x0061}, {0x0063}, UCOL_LESS }, /* "a" < "c" */
4263
4264 { {0x0062}, {0x006b}, UCOL_LESS }, /* "b" << "k" */
4265 { {0x006b}, {0x006c}, UCOL_LESS }, /* "k" << "l" */
4266 { {0x0062}, {0x006c}, UCOL_LESS }, /* "b" << "l" */
4267 { {0x0061}, {0x006c}, UCOL_LESS }, /* "a" < "l" */
4268 { {0x0061}, {0x006d}, UCOL_LESS }, /* "a" < "m" */
4269
4270 { {0x0079}, {0x006d}, UCOL_LESS }, /* "y" < "f" */
4271 { {0x0079}, {0x0067}, UCOL_LESS }, /* "y" < "g" */
4272 { {0x0061}, {0x0068}, UCOL_LESS }, /* "y" < "h" */
4273 { {0x0061}, {0x0065}, UCOL_LESS }, /* "g" < "e" */
4274
4275 { {0x0061}, {0x0031}, UCOL_EQUAL }, /* "a" = "1" */
4276 { {0x0061}, {0x0032}, UCOL_EQUAL }, /* "a" = "2" */
4277 { {0x0061}, {0x0033}, UCOL_EQUAL }, /* "a" = "3" */
4278 { {0x0061}, {0x0066}, UCOL_LESS }, /* "a" < "f" */
4279 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL_LESS }, /* "la" < "123" */
4280 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL_EQUAL }, /* "aaa" = "123" */
4281 { {0x0062}, {0x007a}, UCOL_LESS }, /* "b" < "z" */
4282 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL_LESS }, /* "azm" = "2yc" */
4283};
4284
2ca993e8 4285static int nRangeTestcases = UPRV_LENGTHOF(rangeTestcases);
729e4ab9
A
4286
4287const static OneTestCase rangeTestcasesSupplemental[] = {
57a6839d
A
4288 { {0x4e00}, {0xfffb}, UCOL_LESS }, /* U+4E00 < U+FFFB */
4289 { {0xfffb}, {0xd800, 0xdc00}, UCOL_LESS }, /* U+FFFB < U+10000 */
729e4ab9 4290 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+10000 < U+10001 */
57a6839d 4291 { {0x4e00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+4E00 < U+10001 */
729e4ab9
A
4292 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
4293 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
57a6839d 4294 { {0x4e00}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+4E00 < U+10001 */
729e4ab9
A
4295};
4296
2ca993e8 4297static int nRangeTestcasesSupplemental = UPRV_LENGTHOF(rangeTestcasesSupplemental);
729e4ab9
A
4298
4299const static OneTestCase rangeTestcasesQwerty[] = {
4300 { {0x0071}, {0x0077}, UCOL_LESS }, /* "q" < "w" */
4301 { {0x0077}, {0x0065}, UCOL_LESS }, /* "w" < "e" */
4302
4303 { {0x0079}, {0x0075}, UCOL_LESS }, /* "y" < "u" */
4304 { {0x0071}, {0x0075}, UCOL_LESS }, /* "q" << "u" */
4305
4306 { {0x0074}, {0x0069}, UCOL_LESS }, /* "t" << "i" */
4307 { {0x006f}, {0x0070}, UCOL_LESS }, /* "o" << "p" */
4308
4309 { {0x0079}, {0x0065}, UCOL_LESS }, /* "y" < "e" */
4310 { {0x0069}, {0x0075}, UCOL_LESS }, /* "i" < "u" */
4311
4312 { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
4313 {0x0077, 0x0065, 0x0072, 0x0065}, UCOL_LESS }, /* "quest" < "were" */
4314 { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
4315 {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, UCOL_LESS }, /* "quack" < "quest" */
4316};
4317
2ca993e8 4318static int nRangeTestcasesQwerty = UPRV_LENGTHOF(rangeTestcasesQwerty);
729e4ab9
A
4319
4320static void TestSameStrengthList(void)
4321{
4322 const char* strRules[] = {
4323 /* Normal */
4324 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3",
4325
4326 /* Lists */
4327 "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
4328 };
2ca993e8 4329 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4330}
4331
4332static void TestSameStrengthListQuoted(void)
4333{
4334 const char* strRules[] = {
4335 /* Lists with quoted characters */
4336 "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
4337 "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
4338
4339 "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
4340 "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
4341
4342 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\u0033",
4343 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
4344 };
2ca993e8 4345 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4346}
4347
4348static void TestSameStrengthListSupplemental(void)
4349{
4350 const char* strRules[] = {
57a6839d
A
4351 "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
4352 "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
4353 "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
4354 "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
729e4ab9 4355 };
2ca993e8 4356 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4357}
4358
4359static void TestSameStrengthListQwerty(void)
4360{
4361 const char* strRules[] = {
4362 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
4363 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
4364 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
4365 "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
4366 "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
4367
4368 /* Quoted characters also will work if two quoted characters are not consecutive. */
4369 "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
4370
4371 /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
4372 /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
4373
4374 };
2ca993e8 4375 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4376}
4377
4378static void TestSameStrengthListQuotedQwerty(void)
4379{
4380 const char* strRules[] = {
4381 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
4382 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
4383 "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'", /* Lists with quotes */
4384
4385 /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
4386 /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
4387 };
2ca993e8 4388 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4389}
4390
4391static void TestSameStrengthListRanges(void)
4392{
4393 const char* strRules[] = {
4394 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
4395 };
2ca993e8 4396 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4397}
4398
4399static void TestSameStrengthListSupplementalRanges(void)
4400{
4401 const char* strRules[] = {
57a6839d
A
4402 /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
4403 "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
729e4ab9 4404 };
2ca993e8 4405 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4406}
4407
4408static void TestSpecialCharacters(void)
4409{
4410 const char* strRules[] = {
4411 /* Normal */
4412 "&';'<'+'<','<'-'<'&'<'*'",
4413
4414 /* List */
4415 "&';'<*'+,-&*'",
4416
4417 /* Range */
4418 "&';'<*'+'-'-&*'",
4419 };
4420
4421 const static OneTestCase specialCharacterStrings[] = {
4422 { {0x003b}, {0x002b}, UCOL_LESS }, /* ; < + */
4423 { {0x002b}, {0x002c}, UCOL_LESS }, /* + < , */
4424 { {0x002c}, {0x002d}, UCOL_LESS }, /* , < - */
4425 { {0x002d}, {0x0026}, UCOL_LESS }, /* - < & */
4426 };
2ca993e8 4427 doTestOneTestCase(specialCharacterStrings, UPRV_LENGTHOF(specialCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4428}
4429
4430static void TestPrivateUseCharacters(void)
4431{
4432 const char* strRules[] = {
4433 /* Normal */
4434 "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
4435 "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
4436 };
4437
4438 const static OneTestCase privateUseCharacterStrings[] = {
4439 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4440 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4441 { {0xe2d9}, {0xe2da}, UCOL_LESS },
4442 { {0xe2da}, {0xe2db}, UCOL_LESS },
4443 { {0xe2db}, {0xe2dc}, UCOL_LESS },
4444 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4445 };
2ca993e8 4446 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4447}
4448
4449static void TestPrivateUseCharactersInList(void)
4450{
4451 const char* strRules[] = {
4452 /* List */
4453 "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
4454 /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
4455 "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
4456 };
4457
4458 const static OneTestCase privateUseCharacterStrings[] = {
4459 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4460 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4461 { {0xe2d9}, {0xe2da}, UCOL_LESS },
4462 { {0xe2da}, {0xe2db}, UCOL_LESS },
4463 { {0xe2db}, {0xe2dc}, UCOL_LESS },
4464 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4465 };
2ca993e8 4466 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4467}
4468
4469static void TestPrivateUseCharactersInRange(void)
4470{
4471 const char* strRules[] = {
4472 /* Range */
4473 "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
4474 "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
4475 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
4476 };
4477
4478 const static OneTestCase privateUseCharacterStrings[] = {
4479 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4480 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4481 { {0xe2d9}, {0xe2da}, UCOL_LESS },
4482 { {0xe2da}, {0xe2db}, UCOL_LESS },
4483 { {0xe2db}, {0xe2dc}, UCOL_LESS },
4484 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4485 };
2ca993e8 4486 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
4487}
4488
4489static void TestInvalidListsAndRanges(void)
4490{
4491 const char* invalidRules[] = {
4492 /* Range not in starred expression */
4493 "&\\ufffe<\\uffff-\\U00010002",
4494
4495 /* Range without start */
4496 "&a<*-c",
4497
4498 /* Range without end */
4499 "&a<*b-",
4500
4501 /* More than one hyphen */
4502 "&a<*b-g-l",
4503
4504 /* Range in the wrong order */
4505 "&a<*k-b",
4506
4507 };
4508
4509 UChar rule[500];
4510 UErrorCode status = U_ZERO_ERROR;
4511 UParseError parse_error;
2ca993e8 4512 int n_rules = UPRV_LENGTHOF(invalidRules);
729e4ab9
A
4513 int rule_no;
4514 int length;
4515 UCollator *myCollation;
4516
4517 for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4518
4519 length = u_unescape(invalidRules[rule_no], rule, 500);
4520 if (length == 0) {
4521 log_err("ERROR: The rule cannot be unescaped: %s\n");
4522 return;
4523 }
4524 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
57a6839d 4525 (void)myCollation; /* Suppress set but not used warning. */
729e4ab9
A
4526 if(!U_FAILURE(status)){
4527 log_err("ERROR: Could not cause a failure as expected: \n");
4528 }
4529 status = U_ZERO_ERROR;
4530 }
4531}
4532
4533/*
4534 * This test ensures that characters placed before a character in a different script have the same lead byte
4535 * in their collation key before and after script reordering.
4536 */
4537static void TestBeforeRuleWithScriptReordering(void)
4538{
4539 UParseError error;
4540 UErrorCode status = U_ZERO_ERROR;
4541 UCollator *myCollation;
4542 char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
4543 UChar rules[500];
4544 uint32_t rulesLength = 0;
4545 int32_t reorderCodes[1] = {USCRIPT_GREEK};
4546 UCollationResult collResult;
4547
4548 uint8_t baseKey[256];
4549 uint32_t baseKeyLength;
4550 uint8_t beforeKey[256];
4551 uint32_t beforeKeyLength;
4552
4553 UChar base[] = { 0x03b1 }; /* base */
2ca993e8 4554 int32_t baseLen = UPRV_LENGTHOF(base);
729e4ab9
A
4555
4556 UChar before[] = { 0x0e01 }; /* ko kai */
2ca993e8 4557 int32_t beforeLen = UPRV_LENGTHOF(before);
729e4ab9
A
4558
4559 /*UChar *data[] = { before, base };
4560 genericRulesStarter(srules, data, 2);*/
4561
4562 log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
4563
57a6839d
A
4564 (void)beforeKeyLength; /* Suppress set but not used warnings. */
4565 (void)baseKeyLength;
729e4ab9
A
4566
4567 /* build collator */
4568 log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
4569
2ca993e8 4570 rulesLength = u_unescape(srules, rules, UPRV_LENGTHOF(rules));
729e4ab9
A
4571 myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
4572 if(U_FAILURE(status)) {
4573 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4574 return;
4575 }
4576
4577 /* check collation results - before rule applied but not script reordering */
4578 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4579 if (collResult != UCOL_GREATER) {
4580 log_err("Collation result not correct before script reordering = %d\n", collResult);
4581 }
4582
4583 /* check the lead byte of the collation keys before script reordering */
4584 baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4585 beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4586 if (baseKey[0] != beforeKey[0]) {
4587 log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4588 }
4589
4590 /* reorder the scripts */
4591 ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
4592 if(U_FAILURE(status)) {
4593 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
4594 return;
4595 }
4596
4597 /* check collation results - before rule applied and after script reordering */
4598 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4599 if (collResult != UCOL_GREATER) {
4600 log_err("Collation result not correct after script reordering = %d\n", collResult);
4601 }
4602
4603 /* check the lead byte of the collation keys after script reordering */
4604 ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4605 ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4606 if (baseKey[0] != beforeKey[0]) {
4607 log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4608 }
4609
4610 ucol_close(myCollation);
4611}
4612
4613/*
4614 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
4615 */
4616static void TestNonLeadBytesDuringCollationReordering(void)
4617{
4618 UErrorCode status = U_ZERO_ERROR;
4619 UCollator *myCollation;
4620 int32_t reorderCodes[1] = {USCRIPT_GREEK};
729e4ab9
A
4621
4622 uint8_t baseKey[256];
4623 uint32_t baseKeyLength;
4624 uint8_t reorderKey[256];
4625 uint32_t reorderKeyLength;
4626
4627 UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
4628
4388f060 4629 uint32_t i;
729e4ab9
A
4630
4631
4632 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4633
4634 /* build collator tertiary */
4635 myCollation = ucol_open("", &status);
4636 ucol_setStrength(myCollation, UCOL_TERTIARY);
4637 if(U_FAILURE(status)) {
4638 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4639 return;
4640 }
2ca993e8 4641 baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
729e4ab9 4642
2ca993e8 4643 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
729e4ab9
A
4644 if(U_FAILURE(status)) {
4645 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4646 return;
4647 }
2ca993e8 4648 reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
729e4ab9
A
4649
4650 if (baseKeyLength != reorderKeyLength) {
4388f060 4651 log_err("Key lengths not the same during reordering.\n");
729e4ab9
A
4652 return;
4653 }
4654
4655 for (i = 1; i < baseKeyLength; i++) {
4656 if (baseKey[i] != reorderKey[i]) {
4657 log_err("Collation key bytes not the same at position %d.\n", i);
4658 return;
4659 }
4660 }
4661 ucol_close(myCollation);
4662
4663 /* build collator quaternary */
4664 myCollation = ucol_open("", &status);
4665 ucol_setStrength(myCollation, UCOL_QUATERNARY);
4666 if(U_FAILURE(status)) {
4667 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4668 return;
4669 }
2ca993e8 4670 baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
729e4ab9 4671
2ca993e8 4672 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
729e4ab9
A
4673 if(U_FAILURE(status)) {
4674 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4675 return;
4676 }
2ca993e8 4677 reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
729e4ab9
A
4678
4679 if (baseKeyLength != reorderKeyLength) {
4388f060 4680 log_err("Key lengths not the same during reordering.\n");
729e4ab9
A
4681 return;
4682 }
4683
4684 for (i = 1; i < baseKeyLength; i++) {
4685 if (baseKey[i] != reorderKey[i]) {
4686 log_err("Collation key bytes not the same at position %d.\n", i);
4687 return;
4688 }
4689 }
4690 ucol_close(myCollation);
4691}
4692
4693/*
4694 * Test reordering API.
4695 */
4696static void TestReorderingAPI(void)
4697{
4698 UErrorCode status = U_ZERO_ERROR;
4699 UCollator *myCollation;
4700 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
b331163b 4701 int32_t duplicateReorderCodes[] = {USCRIPT_HIRAGANA, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_KATAKANA};
4388f060 4702 int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
b331163b 4703 int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE;
729e4ab9
A
4704 UCollationResult collResult;
4705 int32_t retrievedReorderCodesLength;
4388f060 4706 int32_t retrievedReorderCodes[10];
729e4ab9
A
4707 UChar greekString[] = { 0x03b1 };
4708 UChar punctuationString[] = { 0x203e };
4388f060 4709 int loopIndex;
729e4ab9
A
4710
4711 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4712
4713 /* build collator tertiary */
4714 myCollation = ucol_open("", &status);
4715 ucol_setStrength(myCollation, UCOL_TERTIARY);
4716 if(U_FAILURE(status)) {
4717 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4718 return;
4719 }
4720
4721 /* set the reorderding */
2ca993e8 4722 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
729e4ab9
A
4723 if (U_FAILURE(status)) {
4724 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4725 return;
4726 }
4727
4388f060 4728 /* get the reordering */
729e4ab9
A
4729 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4730 if (status != U_BUFFER_OVERFLOW_ERROR) {
4731 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4732 return;
4733 }
4734 status = U_ZERO_ERROR;
2ca993e8
A
4735 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4736 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
729e4ab9
A
4737 return;
4738 }
4388f060 4739 /* now let's really get it */
2ca993e8 4740 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4388f060
A
4741 if (U_FAILURE(status)) {
4742 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4743 return;
4744 }
2ca993e8
A
4745 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4746 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4388f060
A
4747 return;
4748 }
4749 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4750 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4751 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4752 return;
4753 }
4754 }
2ca993e8 4755 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
729e4ab9
A
4756 if (collResult != UCOL_LESS) {
4757 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4758 return;
4759 }
4760
4761 /* clear the reordering */
4762 ucol_setReorderCodes(myCollation, NULL, 0, &status);
4763 if (U_FAILURE(status)) {
4764 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4765 return;
4766 }
4767
4388f060 4768 /* get the reordering again */
729e4ab9
A
4769 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4770 if (retrievedReorderCodesLength != 0) {
4771 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4772 return;
4773 }
4774
2ca993e8 4775 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
729e4ab9
A
4776 if (collResult != UCOL_GREATER) {
4777 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4778 return;
4779 }
4780
b331163b
A
4781 /* clear the reordering using [NONE] */
4782 ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status);
4783 if (U_FAILURE(status)) {
4784 log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", myErrorName(status));
4785 return;
4786 }
4787
4788 /* get the reordering again */
4789 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4790 if (retrievedReorderCodesLength != 0) {
4791 log_err_status(status,
4792 "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",
4793 retrievedReorderCodesLength);
4794 return;
4795 }
4796
4388f060 4797 /* test for error condition on duplicate reorder codes */
2ca993e8 4798 ucol_setReorderCodes(myCollation, duplicateReorderCodes, UPRV_LENGTHOF(duplicateReorderCodes), &status);
4388f060
A
4799 if (!U_FAILURE(status)) {
4800 log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
4801 return;
4802 }
4803
4804 status = U_ZERO_ERROR;
4805 /* test for reorder codes after a reset code */
2ca993e8 4806 ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, UPRV_LENGTHOF(reorderCodesStartingWithDefault), &status);
4388f060
A
4807 if (!U_FAILURE(status)) {
4808 log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
4809 return;
4810 }
4811
729e4ab9
A
4812 ucol_close(myCollation);
4813}
4814
4815/*
4388f060 4816 * Test reordering API.
729e4ab9 4817 */
4388f060 4818static void TestReorderingAPIWithRuleCreatedCollator(void)
729e4ab9 4819{
729e4ab9
A
4820 UErrorCode status = U_ZERO_ERROR;
4821 UCollator *myCollation;
4388f060 4822 UChar rules[90];
57a6839d
A
4823 static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
4824 static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4825 static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};
4388f060
A
4826 UCollationResult collResult;
4827 int32_t retrievedReorderCodesLength;
4828 int32_t retrievedReorderCodes[10];
57a6839d
A
4829 static const UChar greekString[] = { 0x03b1 };
4830 static const UChar punctuationString[] = { 0x203e };
4831 static const UChar hanString[] = { 0x65E5, 0x672C };
4388f060 4832 int loopIndex;
729e4ab9 4833
4388f060
A
4834 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4835
4836 /* build collator from rules */
4837 u_uastrcpy(rules, "[reorder Hani Grek]");
4838 myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
4839 if(U_FAILURE(status)) {
4840 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4841 return;
4842 }
4843
4844 /* get the reordering */
2ca993e8 4845 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4388f060
A
4846 if (U_FAILURE(status)) {
4847 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4848 return;
4849 }
2ca993e8
A
4850 if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4851 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
4388f060
A
4852 return;
4853 }
4854 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4855 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4856 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
729e4ab9
A
4857 return;
4858 }
4388f060 4859 }
2ca993e8 4860 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), hanString, UPRV_LENGTHOF(hanString));
4388f060 4861 if (collResult != UCOL_GREATER) {
57a6839d 4862 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4388f060
A
4863 return;
4864 }
4388f060 4865
57a6839d 4866 /* set the reordering */
2ca993e8 4867 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4388f060
A
4868 if (U_FAILURE(status)) {
4869 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4870 return;
4871 }
57a6839d 4872
4388f060
A
4873 /* get the reordering */
4874 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4875 if (status != U_BUFFER_OVERFLOW_ERROR) {
4876 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4877 return;
4878 }
4879 status = U_ZERO_ERROR;
2ca993e8
A
4880 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4881 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4388f060
A
4882 return;
4883 }
4884 /* now let's really get it */
2ca993e8 4885 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4388f060
A
4886 if (U_FAILURE(status)) {
4887 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4888 return;
4889 }
2ca993e8
A
4890 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4891 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4388f060
A
4892 return;
4893 }
4894 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4895 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4896 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
729e4ab9
A
4897 return;
4898 }
4388f060 4899 }
2ca993e8 4900 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4388f060
A
4901 if (collResult != UCOL_LESS) {
4902 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4903 return;
4904 }
57a6839d 4905
4388f060
A
4906 /* clear the reordering */
4907 ucol_setReorderCodes(myCollation, NULL, 0, &status);
4908 if (U_FAILURE(status)) {
4909 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4910 return;
4911 }
4912
4913 /* get the reordering again */
4914 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4915 if (retrievedReorderCodesLength != 0) {
4916 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4917 return;
4918 }
4919
2ca993e8 4920 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4388f060
A
4921 if (collResult != UCOL_GREATER) {
4922 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4923 return;
4924 }
4925
57a6839d
A
4926 /* reset the reordering */
4927 ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);
4928 if (U_FAILURE(status)) {
4929 log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));
4930 return;
4931 }
2ca993e8 4932 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
57a6839d
A
4933 if (U_FAILURE(status)) {
4934 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4935 return;
4936 }
2ca993e8
A
4937 if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4938 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
57a6839d
A
4939 return;
4940 }
4941 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4942 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4943 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4944 return;
4945 }
4946 }
4947
4388f060
A
4948 ucol_close(myCollation);
4949}
4950
b331163b
A
4951static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int32_t expectedScript) {
4952 int32_t i;
4953 for (i = 0; i < length; ++i) {
4954 if (expectedScript == scripts[i]) { return TRUE; }
4955 }
4956 return FALSE;
4388f060
A
4957}
4958
4959static void TestEquivalentReorderingScripts(void) {
b331163b
A
4960 // Beginning with ICU 55, collation reordering moves single scripts
4961 // rather than groups of scripts,
4962 // except where scripts share a range and sort primary-equal.
4388f060 4963 UErrorCode status = U_ZERO_ERROR;
b331163b
A
4964 int32_t equivalentScripts[100];
4965 int32_t length;
4966 int i;
4967 int32_t prevScript;
4968 /* These scripts are expected to be equivalent. */
4969 static const int32_t expectedScripts[] = {
4970 USCRIPT_HIRAGANA,
4971 USCRIPT_KATAKANA,
4972 USCRIPT_KATAKANA_OR_HIRAGANA
4388f060
A
4973 };
4974
b331163b
A
4975 equivalentScripts[0] = 0;
4976 length = ucol_getEquivalentReorderCodes(
2ca993e8 4977 USCRIPT_GOTHIC, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4388f060 4978 if (U_FAILURE(status)) {
b331163b 4979 log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4388f060
A
4980 return;
4981 }
b331163b
A
4982 if (length != 1 || equivalentScripts[0] != USCRIPT_GOTHIC) {
4983 log_err("ERROR/Gothic: retrieved equivalent scripts wrong: "
4984 "length expected 1, was = %d; expected [%d] was [%d]\n",
4985 length, USCRIPT_GOTHIC, equivalentScripts[0]);
4388f060 4986 }
b331163b
A
4987
4988 length = ucol_getEquivalentReorderCodes(
2ca993e8 4989 USCRIPT_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
b331163b
A
4990 if (U_FAILURE(status)) {
4991 log_err_status(status, "ERROR/Hiragana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4388f060
A
4992 return;
4993 }
2ca993e8 4994 if (length != UPRV_LENGTHOF(expectedScripts)) {
b331163b
A
4995 log_err("ERROR/Hiragana: retrieved equivalent script length wrong: "
4996 "expected %d, was = %d\n",
2ca993e8 4997 UPRV_LENGTHOF(expectedScripts), length);
b331163b
A
4998 }
4999 prevScript = -1;
5000 for (i = 0; i < length; ++i) {
5001 int32_t script = equivalentScripts[i];
5002 if (script <= prevScript) {
5003 log_err("ERROR/Hiragana: equivalent scripts out of order at index %d\n", i);
5004 }
5005 prevScript = script;
5006 }
2ca993e8 5007 for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
b331163b
A
5008 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5009 log_err("ERROR/Hiragana: equivalent scripts do not contain %d\n",
5010 expectedScripts[i]);
4388f060
A
5011 }
5012 }
5013
b331163b 5014 length = ucol_getEquivalentReorderCodes(
2ca993e8 5015 USCRIPT_KATAKANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4388f060 5016 if (U_FAILURE(status)) {
b331163b 5017 log_err_status(status, "ERROR/Katakana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4388f060
A
5018 return;
5019 }
2ca993e8 5020 if (length != UPRV_LENGTHOF(expectedScripts)) {
b331163b
A
5021 log_err("ERROR/Katakana: retrieved equivalent script length wrong: "
5022 "expected %d, was = %d\n",
2ca993e8 5023 UPRV_LENGTHOF(expectedScripts), length);
4388f060 5024 }
2ca993e8 5025 for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
b331163b
A
5026 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5027 log_err("ERROR/Katakana: equivalent scripts do not contain %d\n",
5028 expectedScripts[i]);
729e4ab9 5029 }
729e4ab9 5030 }
b331163b
A
5031
5032 length = ucol_getEquivalentReorderCodes(
2ca993e8
A
5033 USCRIPT_KATAKANA_OR_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5034 if (U_FAILURE(status) || length != UPRV_LENGTHOF(expectedScripts)) {
b331163b
A
5035 log_err("ERROR/Hrkt: retrieved equivalent script length wrong: "
5036 "expected %d, was = %d\n",
2ca993e8 5037 UPRV_LENGTHOF(expectedScripts), length);
b331163b
A
5038 }
5039
5040 length = ucol_getEquivalentReorderCodes(
2ca993e8 5041 USCRIPT_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
b331163b
A
5042 if (U_FAILURE(status) || length != 3) {
5043 log_err("ERROR/Hani: retrieved equivalent script length wrong: "
5044 "expected 3, was = %d\n", length);
5045 }
5046 length = ucol_getEquivalentReorderCodes(
2ca993e8 5047 USCRIPT_SIMPLIFIED_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
b331163b
A
5048 if (U_FAILURE(status) || length != 3) {
5049 log_err("ERROR/Hans: retrieved equivalent script length wrong: "
5050 "expected 3, was = %d\n", length);
5051 }
5052 length = ucol_getEquivalentReorderCodes(
2ca993e8 5053 USCRIPT_TRADITIONAL_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
b331163b
A
5054 if (U_FAILURE(status) || length != 3) {
5055 log_err("ERROR/Hant: retrieved equivalent script length wrong: "
5056 "expected 3, was = %d\n", length);
5057 }
5058
5059 length = ucol_getEquivalentReorderCodes(
2ca993e8 5060 USCRIPT_MEROITIC_CURSIVE, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
b331163b
A
5061 if (U_FAILURE(status) || length != 2) {
5062 log_err("ERROR/Merc: retrieved equivalent script length wrong: "
5063 "expected 2, was = %d\n", length);
5064 }
5065 length = ucol_getEquivalentReorderCodes(
2ca993e8 5066 USCRIPT_MEROITIC_HIEROGLYPHS, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
b331163b
A
5067 if (U_FAILURE(status) || length != 2) {
5068 log_err("ERROR/Mero: retrieved equivalent script length wrong: "
5069 "expected 2, was = %d\n", length);
5070 }
729e4ab9
A
5071}
5072
4388f060
A
5073static void TestReorderingAcrossCloning(void)
5074{
5075 UErrorCode status = U_ZERO_ERROR;
5076 UCollator *myCollation;
5077 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
5078 UCollator *clonedCollation;
4388f060
A
5079 int32_t retrievedReorderCodesLength;
5080 int32_t retrievedReorderCodes[10];
5081 int loopIndex;
5082
5083 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5084
5085 /* build collator tertiary */
5086 myCollation = ucol_open("", &status);
5087 ucol_setStrength(myCollation, UCOL_TERTIARY);
5088 if(U_FAILURE(status)) {
5089 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5090 return;
5091 }
5092
5093 /* set the reorderding */
2ca993e8 5094 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4388f060
A
5095 if (U_FAILURE(status)) {
5096 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5097 return;
5098 }
5099
5100 /* clone the collator */
57a6839d 5101 clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);
4388f060
A
5102 if (U_FAILURE(status)) {
5103 log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
5104 return;
5105 }
5106
5107 /* get the reordering */
2ca993e8 5108 retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4388f060
A
5109 if (U_FAILURE(status)) {
5110 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
5111 return;
5112 }
2ca993e8
A
5113 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
5114 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4388f060
A
5115 return;
5116 }
5117 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
5118 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
5119 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
5120 return;
5121 }
5122 }
5123
5124 /*uprv_free(buffer);*/
5125 ucol_close(myCollation);
5126 ucol_close(clonedCollation);
5127}
5128
5129/*
5130 * Utility function to test one collation reordering test case set.
5131 * @param testcases Array of test cases.
5132 * @param n_testcases Size of the array testcases.
5133 * @param reorderTokens Array of reordering codes.
5134 * @param reorderTokensLen Size of the array reorderTokens.
5135 */
5136static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
5137{
5138 uint32_t testCaseNum;
5139 UErrorCode status = U_ZERO_ERROR;
5140 UCollator *myCollation;
5141
5142 myCollation = ucol_open("", &status);
5143 if (U_FAILURE(status)) {
5144 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5145 return;
5146 }
5147 ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
5148 if(U_FAILURE(status)) {
5149 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5150 return;
5151 }
5152
5153 for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
5154 doTest(myCollation,
5155 testCases[testCaseNum].source,
5156 testCases[testCaseNum].target,
5157 testCases[testCaseNum].result
5158 );
5159 }
5160 ucol_close(myCollation);
5161}
5162
729e4ab9
A
5163static void TestGreekFirstReorder(void)
5164{
5165 const char* strRules[] = {
5166 "[reorder Grek]"
5167 };
5168
5169 const int32_t apiRules[] = {
5170 USCRIPT_GREEK
5171 };
5172
5173 const static OneTestCase privateUseCharacterStrings[] = {
5174 { {0x0391}, {0x0391}, UCOL_EQUAL },
5175 { {0x0041}, {0x0391}, UCOL_GREATER },
5176 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
5177 { {0x0060}, {0x0391}, UCOL_LESS },
5178 { {0x0391}, {0xe2dc}, UCOL_LESS },
5179 { {0x0391}, {0x0060}, UCOL_GREATER },
5180 };
5181
5182 /* Test rules creation */
2ca993e8 5183 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
5184
5185 /* Test collation reordering API */
2ca993e8 5186 doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
729e4ab9
A
5187}
5188
5189static void TestGreekLastReorder(void)
5190{
5191 const char* strRules[] = {
5192 "[reorder Zzzz Grek]"
5193 };
5194
5195 const int32_t apiRules[] = {
5196 USCRIPT_UNKNOWN, USCRIPT_GREEK
5197 };
5198
5199 const static OneTestCase privateUseCharacterStrings[] = {
5200 { {0x0391}, {0x0391}, UCOL_EQUAL },
5201 { {0x0041}, {0x0391}, UCOL_LESS },
5202 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
5203 { {0x0060}, {0x0391}, UCOL_LESS },
5204 { {0x0391}, {0xe2dc}, UCOL_GREATER },
5205 };
5206
5207 /* Test rules creation */
2ca993e8 5208 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
5209
5210 /* Test collation reordering API */
2ca993e8 5211 doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
729e4ab9
A
5212}
5213
5214static void TestNonScriptReorder(void)
5215{
5216 const char* strRules[] = {
5217 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
5218 };
5219
5220 const int32_t apiRules[] = {
5221 USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
5222 UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
5223 UCOL_REORDER_CODE_CURRENCY
5224 };
5225
5226 const static OneTestCase privateUseCharacterStrings[] = {
5227 { {0x0391}, {0x0041}, UCOL_LESS },
5228 { {0x0041}, {0x0391}, UCOL_GREATER },
5229 { {0x0060}, {0x0041}, UCOL_LESS },
5230 { {0x0060}, {0x0391}, UCOL_GREATER },
5231 { {0x0024}, {0x0041}, UCOL_GREATER },
5232 };
5233
5234 /* Test rules creation */
2ca993e8 5235 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
5236
5237 /* Test collation reordering API */
2ca993e8 5238 doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
729e4ab9
A
5239}
5240
5241static void TestHaniReorder(void)
5242{
5243 const char* strRules[] = {
5244 "[reorder Hani]"
5245 };
5246 const int32_t apiRules[] = {
5247 USCRIPT_HAN
5248 };
5249
5250 const static OneTestCase privateUseCharacterStrings[] = {
5251 { {0x4e00}, {0x0041}, UCOL_LESS },
5252 { {0x4e00}, {0x0060}, UCOL_GREATER },
5253 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5254 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5255 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5256 { {0xfa27}, {0x0041}, UCOL_LESS },
5257 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5258 };
5259
5260 /* Test rules creation */
2ca993e8 5261 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
5262
5263 /* Test collation reordering API */
2ca993e8 5264 doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
729e4ab9
A
5265}
5266
4388f060
A
5267static void TestHaniReorderWithOtherRules(void)
5268{
5269 const char* strRules[] = {
5270 "[reorder Hani] &b<a"
5271 };
51004dcb 5272 /*const int32_t apiRules[] = {
4388f060 5273 USCRIPT_HAN
51004dcb 5274 };*/
4388f060
A
5275
5276 const static OneTestCase privateUseCharacterStrings[] = {
5277 { {0x4e00}, {0x0041}, UCOL_LESS },
5278 { {0x4e00}, {0x0060}, UCOL_GREATER },
5279 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5280 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5281 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5282 { {0xfa27}, {0x0041}, UCOL_LESS },
5283 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5284 { {0x0062}, {0x0061}, UCOL_LESS },
5285 };
5286
5287 /* Test rules creation */
2ca993e8 5288 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4388f060
A
5289}
5290
5291static void TestMultipleReorder(void)
729e4ab9
A
5292{
5293 const char* strRules[] = {
5294 "[reorder Grek Zzzz DIGIT Latn Hani]"
5295 };
5296
5297 const int32_t apiRules[] = {
5298 USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
5299 };
5300
5301 const static OneTestCase collationTestCases[] = {
5302 { {0x0391}, {0x0041}, UCOL_LESS},
5303 { {0x0031}, {0x0041}, UCOL_LESS},
5304 { {0x0041}, {0x4e00}, UCOL_LESS},
5305 };
5306
5307 /* Test rules creation */
2ca993e8 5308 doTestOneTestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), strRules, UPRV_LENGTHOF(strRules));
729e4ab9
A
5309
5310 /* Test collation reordering API */
2ca993e8 5311 doTestOneReorderingAPITestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), apiRules, UPRV_LENGTHOF(apiRules));
729e4ab9
A
5312}
5313
4388f060
A
5314/*
5315 * Test that covers issue reported in ticket 8814
5316 */
51004dcb 5317static void TestReorderWithNumericCollation(void)
4388f060
A
5318{
5319 UErrorCode status = U_ZERO_ERROR;
5320 UCollator *myCollation;
5321 UCollator *myReorderCollation;
5322 int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
5323 /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
5324 UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
5325 UChar fortyS[] = { 0x0053 };
5326 UChar fortyThreeP[] = { 0x0050 };
5327 uint8_t fortyS_sortKey[128];
5328 int32_t fortyS_sortKey_Length;
5329 uint8_t fortyThreeP_sortKey[128];
5330 int32_t fortyThreeP_sortKey_Length;
5331 uint8_t fortyS_sortKey_reorder[128];
5332 int32_t fortyS_sortKey_reorder_Length;
5333 uint8_t fortyThreeP_sortKey_reorder[128];
5334 int32_t fortyThreeP_sortKey_reorder_Length;
5335 UCollationResult collResult;
5336 UCollationResult collResultReorder;
4388f060
A
5337
5338 log_verbose("Testing reordering with and without numeric collation\n");
5339
5340 /* build collator tertiary with numeric */
5341 myCollation = ucol_open("", &status);
5342 /*
5343 ucol_setStrength(myCollation, UCOL_TERTIARY);
5344 */
5345 ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5346 if(U_FAILURE(status)) {
5347 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5348 return;
5349 }
5350
5351 /* build collator tertiary with numeric and reordering */
5352 myReorderCollation = ucol_open("", &status);
5353 /*
5354 ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
5355 */
5356 ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
2ca993e8 5357 ucol_setReorderCodes(myReorderCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4388f060
A
5358 if(U_FAILURE(status)) {
5359 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5360 return;
5361 }
5362
2ca993e8
A
5363 fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey, 128);
5364 fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey, 128);
5365 fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey_reorder, 128);
5366 fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
4388f060
A
5367
5368 if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
5369 log_err_status(status, "ERROR: couldn't generate sort keys\n");
5370 return;
5371 }
2ca993e8
A
5372 collResult = ucol_strcoll(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
5373 collResultReorder = ucol_strcoll(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
4388f060
A
5374 /*
5375 fprintf(stderr, "\tcollResult = %x\n", collResult);
5376 fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
5377 fprintf(stderr, "\nfortyS\n");
5378 for (i = 0; i < fortyS_sortKey_Length; i++) {
5379 fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
5380 }
5381 fprintf(stderr, "\nfortyThreeP\n");
5382 for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
5383 fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
5384 }
5385 */
5386 if (collResult != collResultReorder) {
5387 log_err_status(status, "ERROR: collation results should have been the same.\n");
5388 return;
5389 }
5390
5391 ucol_close(myCollation);
5392 ucol_close(myReorderCollation);
5393}
5394
729e4ab9
A
5395static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
5396{
5397 for (; *a == *b; ++a, ++b) {
5398 if (*a == 0) {
5399 return 0;
5400 }
5401 }
5402 return (*a < *b ? -1 : 1);
5403}
5404
4388f060
A
5405static void TestImportRulesDeWithPhonebook(void)
5406{
5407 const char* normalRules[] = {
5408 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
5409 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
5410 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
5411 };
5412 const OneTestCase normalTests[] = {
5413 { {0x00e6}, {0x00c6}, UCOL_LESS},
5414 { {0x00fc}, {0x00dc}, UCOL_GREATER},
5415 };
5416
5417 const char* importRules[] = {
5418 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
5419 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5420 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5421 };
5422 const OneTestCase importTests[] = {
5423 { {0x00e6}, {0x00c6}, UCOL_LESS},
5424 { {0x00fc}, {0x00dc}, UCOL_LESS},
5425 };
5426
2ca993e8
A
5427 doTestOneTestCase(normalTests, UPRV_LENGTHOF(normalTests), normalRules, UPRV_LENGTHOF(normalRules));
5428 doTestOneTestCase(importTests, UPRV_LENGTHOF(importTests), importRules, UPRV_LENGTHOF(importRules));
4388f060
A
5429}
5430
51004dcb 5431#if 0
4388f060
A
5432static void TestImportRulesFiWithEor(void)
5433{
5434 /* DUCET. */
5435 const char* defaultRules[] = {
5436 "&a<b", /* Dummy rule. */
5437 };
5438
5439 const OneTestCase defaultTests[] = {
5440 { {0x0110}, {0x00F0}, UCOL_LESS},
5441 { {0x00a3}, {0x00a5}, UCOL_LESS},
5442 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5443 };
5444
5445 /* European Ordering rules: ignore currency characters. */
5446 const char* eorRules[] = {
5447 "[import root-u-co-eor]",
5448 };
5449
5450 const OneTestCase eorTests[] = {
5451 { {0x0110}, {0x00F0}, UCOL_LESS},
5452 { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5453 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5454 };
5455
5456 const char* fiStdRules[] = {
5457 "[import fi-u-co-standard]",
5458 };
5459
5460 const OneTestCase fiStdTests[] = {
5461 { {0x0110}, {0x00F0}, UCOL_GREATER},
5462 { {0x00a3}, {0x00a5}, UCOL_LESS},
5463 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5464 };
5465
5466 /* Both European Ordering Rules and Fi Standard Rules. */
5467 const char* eorFiStdRules[] = {
5468 "[import root-u-co-eor][import fi-u-co-standard]",
5469 };
5470
5471 /* This is essentially same as the one before once fi.txt is updated with import. */
5472 const char* fiEorRules[] = {
5473 "[import fi-u-co-eor]",
5474 };
5475
5476 const OneTestCase fiEorTests[] = {
5477 { {0x0110}, {0x00F0}, UCOL_GREATER},
5478 { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5479 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5480 };
5481
2ca993e8
A
5482 doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5483 doTestOneTestCase(eorTests, UPRV_LENGTHOF(eorTests), eorRules, UPRV_LENGTHOF(eorRules));
5484 doTestOneTestCase(fiStdTests, UPRV_LENGTHOF(fiStdTests), fiStdRules, UPRV_LENGTHOF(fiStdRules));
5485 doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), eorFiStdRules, UPRV_LENGTHOF(eorFiStdRules));
4388f060 5486
57a6839d 5487 log_knownIssue("8962", NULL);
4388f060
A
5488 /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
5489 eor{
5490 Sequence{
5491 "[import root-u-co-eor][import fi-u-co-standard]"
5492 }
5493 Version{"21.0"}
5494 }
5495 */
2ca993e8 5496 /* doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), fiEorRules, UPRV_LENGTHOF(fiEorRules)); */
4388f060
A
5497
5498}
51004dcb 5499#endif
4388f060
A
5500
5501#if 0
5502/*
5503 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
5504 * the resource files are built with -includeUnihanColl option.
5505 * TODO: Uncomment this function and make it work when unihan rules are built by default.
5506 */
5507static void TestImportRulesCJKWithUnihan(void)
5508{
5509 /* DUCET. */
5510 const char* defaultRules[] = {
5511 "&a<b", /* Dummy rule. */
5512 };
5513
5514 const OneTestCase defaultTests[] = {
5515 { {0x3402}, {0x4e1e}, UCOL_GREATER},
5516 };
5517
5518 /* European Ordering rules: ignore currency characters. */
5519 const char* unihanRules[] = {
5520 "[import ko-u-co-unihan]",
5521 };
5522
5523 const OneTestCase unihanTests[] = {
5524 { {0x3402}, {0x4e1e}, UCOL_LESS},
5525 };
5526
2ca993e8
A
5527 doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5528 doTestOneTestCase(unihanTests, UPRV_LENGTHOF(unihanTests), unihanRules, UPRV_LENGTHOF(unihanRules));
4388f060
A
5529
5530}
5531#endif
5532
729e4ab9
A
5533static void TestImport(void)
5534{
5535 UCollator* vicoll;
5536 UCollator* escoll;
5537 UCollator* viescoll;
5538 UCollator* importviescoll;
5539 UParseError error;
5540 UErrorCode status = U_ZERO_ERROR;
5541 UChar* virules;
5542 int32_t viruleslength;
5543 UChar* esrules;
5544 int32_t esruleslength;
5545 UChar* viesrules;
5546 int32_t viesruleslength;
5547 char srules[500] = "[import vi][import es]";
5548 UChar rules[500];
5549 uint32_t length = 0;
5550 int32_t itemCount;
5551 int32_t i, k;
5552 UChar32 start;
5553 UChar32 end;
5554 UChar str[500];
5555 int32_t strLength;
5556
5557 uint8_t sk1[500];
5558 uint8_t sk2[500];
5559
5560 UBool b;
5561 USet* tailoredSet;
5562 USet* importTailoredSet;
5563
5564
5565 vicoll = ucol_open("vi", &status);
5566 if(U_FAILURE(status)){
5567 log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
5568 return;
5569 }
5570
5571 virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
b331163b
A
5572 if(viruleslength == 0) {
5573 log_data_err("missing vi tailoring rule string\n");
5574 ucol_close(vicoll);
5575 return;
5576 }
729e4ab9
A
5577 escoll = ucol_open("es", &status);
5578 esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
5579 viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
5580 viesrules[0] = 0;
5581 u_strcat(viesrules, virules);
5582 u_strcat(viesrules, esrules);
5583 viesruleslength = viruleslength + esruleslength;
5584 viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5585
5586 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5587 length = u_unescape(srules, rules, 500);
5588 importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5589 if(U_FAILURE(status)){
5590 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5591 return;
5592 }
5593
5594 tailoredSet = ucol_getTailoredSet(viescoll, &status);
5595 importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
5596
5597 if(!uset_equals(tailoredSet, importTailoredSet)){
5598 log_err("Tailored sets not equal");
5599 }
5600
5601 uset_close(importTailoredSet);
5602
5603 itemCount = uset_getItemCount(tailoredSet);
5604
5605 for( i = 0; i < itemCount; i++){
5606 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5607 if(strLength < 2){
5608 for (; start <= end; start++){
5609 k = 0;
5610 U16_APPEND(str, k, 500, start, b);
57a6839d 5611 (void)b; /* Suppress set but not used warning. */
729e4ab9
A
5612 ucol_getSortKey(viescoll, str, 1, sk1, 500);
5613 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
5614 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5615 log_err("Sort key for %s not equal\n", str);
5616 break;
5617 }
5618 }
5619 }else{
5620 ucol_getSortKey(viescoll, str, strLength, sk1, 500);
5621 ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
5622 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5623 log_err("ZZSort key for %s not equal\n", str);
5624 break;
5625 }
5626
5627 }
5628 }
5629
5630 uset_close(tailoredSet);
5631
5632 uprv_free(viesrules);
5633
5634 ucol_close(vicoll);
5635 ucol_close(escoll);
5636 ucol_close(viescoll);
5637 ucol_close(importviescoll);
5638}
5639
5640static void TestImportWithType(void)
5641{
5642 UCollator* vicoll;
5643 UCollator* decoll;
5644 UCollator* videcoll;
5645 UCollator* importvidecoll;
5646 UParseError error;
5647 UErrorCode status = U_ZERO_ERROR;
5648 const UChar* virules;
5649 int32_t viruleslength;
5650 const UChar* derules;
5651 int32_t deruleslength;
5652 UChar* viderules;
5653 int32_t videruleslength;
5654 const char srules[500] = "[import vi][import de-u-co-phonebk]";
5655 UChar rules[500];
5656 uint32_t length = 0;
5657 int32_t itemCount;
5658 int32_t i, k;
5659 UChar32 start;
5660 UChar32 end;
5661 UChar str[500];
5662 int32_t strLength;
5663
5664 uint8_t sk1[500];
5665 uint8_t sk2[500];
5666
5667 USet* tailoredSet;
5668 USet* importTailoredSet;
5669
5670 vicoll = ucol_open("vi", &status);
5671 if(U_FAILURE(status)){
5672 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5673 return;
5674 }
5675 virules = ucol_getRules(vicoll, &viruleslength);
b331163b
A
5676 if(viruleslength == 0) {
5677 log_data_err("missing vi tailoring rule string\n");
5678 ucol_close(vicoll);
5679 return;
5680 }
729e4ab9
A
5681 /* decoll = ucol_open("de@collation=phonebook", &status); */
5682 decoll = ucol_open("de-u-co-phonebk", &status);
5683 if(U_FAILURE(status)){
5684 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5685 return;
5686 }
5687
5688
5689 derules = ucol_getRules(decoll, &deruleslength);
5690 viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
5691 viderules[0] = 0;
5692 u_strcat(viderules, virules);
5693 u_strcat(viderules, derules);
5694 videruleslength = viruleslength + deruleslength;
5695 videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5696
5697 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5698 length = u_unescape(srules, rules, 500);
5699 importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5700 if(U_FAILURE(status)){
5701 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5702 return;
5703 }
5704
5705 tailoredSet = ucol_getTailoredSet(videcoll, &status);
5706 importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
5707
5708 if(!uset_equals(tailoredSet, importTailoredSet)){
5709 log_err("Tailored sets not equal");
5710 }
5711
5712 uset_close(importTailoredSet);
5713
5714 itemCount = uset_getItemCount(tailoredSet);
5715
5716 for( i = 0; i < itemCount; i++){
5717 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5718 if(strLength < 2){
5719 for (; start <= end; start++){
5720 k = 0;
5721 U16_APPEND_UNSAFE(str, k, start);
5722 ucol_getSortKey(videcoll, str, 1, sk1, 500);
5723 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
5724 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5725 log_err("Sort key for %s not equal\n", str);
5726 break;
5727 }
5728 }
5729 }else{
5730 ucol_getSortKey(videcoll, str, strLength, sk1, 500);
5731 ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
5732 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5733 log_err("Sort key for %s not equal\n", str);
5734 break;
5735 }
5736
5737 }
5738 }
5739
5740 uset_close(tailoredSet);
5741
5742 uprv_free(viderules);
5743
5744 ucol_close(videcoll);
5745 ucol_close(importvidecoll);
5746 ucol_close(vicoll);
5747 ucol_close(decoll);
4388f060
A
5748}
5749
5750/* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
5751static const UChar longUpperStr1[]= { /* 155 chars */
5752 0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
5753 0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
5754 0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
5755 0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
5756 0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
5757 0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
5758 0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
5759 0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
5760 0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
5761 0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
5762};
5763
5764/* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
5765static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
5766 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5767 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5768 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5769 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5770 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
5771};
5772
5773/* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
5774static const UChar longUpperStr3[]= { /* 324 chars */
5775 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5776 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5777 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5778 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5779 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5780 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5781 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5782 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5783 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5784 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5785 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5786 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
5787};
729e4ab9 5788
4388f060
A
5789typedef struct {
5790 const UChar * longUpperStrPtr;
5791 int32_t longUpperStrLen;
5792} LongUpperStrItem;
5793
5794/* String pointers must be in reverse collation order of the corresponding strings */
5795static const LongUpperStrItem longUpperStrItems[] = {
2ca993e8
A
5796 { longUpperStr1, UPRV_LENGTHOF(longUpperStr1) },
5797 { longUpperStr2, UPRV_LENGTHOF(longUpperStr2) },
5798 { longUpperStr3, UPRV_LENGTHOF(longUpperStr3) },
4388f060
A
5799 { NULL, 0 }
5800};
5801
57a6839d 5802enum { kCollKeyLenMax = 850 }; /* may change with collation changes */
4388f060
A
5803
5804/* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
5805static void TestCaseLevelBufferOverflow(void)
5806{
5807 UErrorCode status = U_ZERO_ERROR;
5808 UCollator * ucol = ucol_open("root", &status);
5809 if ( U_SUCCESS(status) ) {
5810 ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
5811 if ( U_SUCCESS(status) ) {
5812 const LongUpperStrItem * itemPtr;
5813 uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
5814 for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
5815 int32_t sortKeyLen;
5816 if (itemPtr > longUpperStrItems) {
5817 uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
5818 }
5819 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
5820 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
5821 log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
5822 break;
5823 }
5824 if ( itemPtr > longUpperStrItems ) {
5825 int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
5826 if (compareResult >= 0) {
5827 log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
5828 }
5829 }
5830 }
5831 } else {
5832 log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
5833 }
5834 ucol_close(ucol);
5835 } else {
5836 log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
5837 }
729e4ab9
A
5838}
5839
57a6839d
A
5840/* Test for #10595 */
5841static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
5842#define KEY_PART_SIZE 16
5843
5844static void TestNextSortKeyPartJaIdentical(void)
5845{
5846 UErrorCode status = U_ZERO_ERROR;
5847 UCollator *coll;
5848 uint8_t keyPart[KEY_PART_SIZE];
5849 UCharIterator iter;
5850 uint32_t state[2] = {0, 0};
5851 int32_t keyPartLen;
5852
5853 coll = ucol_open("ja", &status);
5854 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
5855 if (U_FAILURE(status)) {
5856 log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status));
5857 return;
5858 }
5859
5860 uiter_setString(&iter, testJapaneseName, 5);
5861 keyPartLen = KEY_PART_SIZE;
5862 while (keyPartLen == KEY_PART_SIZE) {
5863 keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status);
5864 if (U_FAILURE(status)) {
5865 log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status));
5866 break;
5867 }
5868 }
5869
5870 ucol_close(coll);
5871}
729e4ab9 5872
b75a7d8f
A
5873#define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
5874
5875void addMiscCollTest(TestNode** root)
5876{
374ca955
A
5877 TEST(TestRuleOptions);
5878 TEST(TestBeforePrefixFailure);
5879 TEST(TestContractionClosure);
5880 TEST(TestPrefixCompose);
5881 TEST(TestStrCollIdenticalPrefix);
5882 TEST(TestPrefix);
5883 TEST(TestNewJapanese);
5884 /*TEST(TestLimitations);*/
5885 TEST(TestNonChars);
5886 TEST(TestExtremeCompression);
5887 TEST(TestSurrogates);
5888 TEST(TestVariableTopSetting);
57a6839d 5889 TEST(TestMaxVariable);
374ca955
A
5890 TEST(TestBocsuCoverage);
5891 TEST(TestCyrillicTailoring);
5892 TEST(TestCase);
5893 TEST(IncompleteCntTest);
5894 TEST(BlackBirdTest);
5895 TEST(FunkyATest);
5896 TEST(BillFairmanTest);
374ca955
A
5897 TEST(TestChMove);
5898 TEST(TestImplicitTailoring);
5899 TEST(TestFCDProblem);
5900 TEST(TestEmptyRule);
5901 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
5902 TEST(TestJ815);
0f5d89e8 5903 TEST(TestUpperCaseFirst);
374ca955 5904 TEST(TestBefore);
374ca955
A
5905 TEST(TestHangulTailoring);
5906 TEST(TestUCARules);
5907 TEST(TestIncrementalNormalize);
5908 TEST(TestComposeDecompose);
5909 TEST(TestCompressOverlap);
5910 TEST(TestContraction);
5911 TEST(TestExpansion);
5912 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
5913 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
b75a7d8f
A
5914 TEST(TestOptimize);
5915 TEST(TestSuppressContractions);
5916 TEST(Alexis2);
5917 TEST(TestHebrewUCA);
5918 TEST(TestPartialSortKeyTermination);
5919 TEST(TestSettings);
5920 TEST(TestEquals);
5921 TEST(TestJ2726);
374ca955
A
5922 TEST(NullRule);
5923 TEST(TestNumericCollation);
5924 TEST(TestTibetanConformance);
5925 TEST(TestPinyinProblem);
374ca955
A
5926 TEST(TestSeparateTrees);
5927 TEST(TestBeforePinyin);
5928 TEST(TestBeforeTightening);
5929 /*TEST(TestMoreBefore);*/
5930 TEST(TestTailorNULL);
73c04bcf
A
5931 TEST(TestUpperFirstQuaternary);
5932 TEST(TestJ4960);
5933 TEST(TestJ5223);
5934 TEST(TestJ5232);
46f4442e
A
5935 TEST(TestJ5367);
5936 TEST(TestHiragana);
5937 TEST(TestSortKeyConsistency);
5938 TEST(TestVI5913); /* VI, RO tailored rules */
5939 TEST(TestCroatianSortKey);
5940 TEST(TestTailor6179);
5941 TEST(TestUCAPrecontext);
5942 TEST(TestOutOfBuffer5468);
729e4ab9
A
5943 TEST(TestSameStrengthList);
5944
5945 TEST(TestSameStrengthListQuoted);
5946 TEST(TestSameStrengthListSupplemental);
5947 TEST(TestSameStrengthListQwerty);
5948 TEST(TestSameStrengthListQuotedQwerty);
5949 TEST(TestSameStrengthListRanges);
5950 TEST(TestSameStrengthListSupplementalRanges);
5951 TEST(TestSpecialCharacters);
5952 TEST(TestPrivateUseCharacters);
5953 TEST(TestPrivateUseCharactersInList);
5954 TEST(TestPrivateUseCharactersInRange);
5955 TEST(TestInvalidListsAndRanges);
4388f060
A
5956 TEST(TestImportRulesDeWithPhonebook);
5957 /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
5958 /* TEST(TestImportRulesCJKWithUnihan); */
729e4ab9
A
5959 TEST(TestImport);
5960 TEST(TestImportWithType);
5961
5962 TEST(TestBeforeRuleWithScriptReordering);
5963 TEST(TestNonLeadBytesDuringCollationReordering);
5964 TEST(TestReorderingAPI);
4388f060
A
5965 TEST(TestReorderingAPIWithRuleCreatedCollator);
5966 TEST(TestEquivalentReorderingScripts);
729e4ab9
A
5967 TEST(TestGreekFirstReorder);
5968 TEST(TestGreekLastReorder);
5969 TEST(TestNonScriptReorder);
5970 TEST(TestHaniReorder);
4388f060 5971 TEST(TestHaniReorderWithOtherRules);
729e4ab9 5972 TEST(TestMultipleReorder);
4388f060 5973 TEST(TestReorderingAcrossCloning);
51004dcb 5974 TEST(TestReorderWithNumericCollation);
4388f060
A
5975
5976 TEST(TestCaseLevelBufferOverflow);
57a6839d 5977 TEST(TestNextSortKeyPartJaIdentical);
b75a7d8f
A
5978}
5979
5980#endif /* #if !UCONFIG_NO_COLLATION */