git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/cmsccoll.c

2 // License & terms of use: http://www.unicode.org/copyright.html

3 /********************************************************************

4 * COPYRIGHT:

7 ********************************************************************/

8 /*******************************************************************************

9 *

10 * File cmsccoll.C

11 *

12 *******************************************************************************/

13 /**

14 * These are the tests specific to ICU 1.8 and above, that I didn't know where

15 * to fit.

16 */

18 #include <stdio.h>

20 #include "unicode/utypes.h"

22 #if !UCONFIG_NO_COLLATION

24 #include "unicode/ucol.h"

25 #include "unicode/ucoleitr.h"

26 #include "unicode/uloc.h"

27 #include "cintltst.h"

28 #include "ccolltst.h"

29 #include "callcoll.h"

30 #include "unicode/ustring.h"

31 #include "string.h"

32 #include "ucol_imp.h"

33 #include "cmemory.h"

34 #include "cstring.h"

35 #include "uassert.h"

36 #include "unicode/parseerr.h"

37 #include "unicode/ucnv.h"

38 #include "unicode/ures.h"

39 #include "unicode/uscript.h"

40 #include "unicode/utf16.h"

41 #include "uparse.h"

42 #include "putilimp.h"

45 #define MAX_TOKEN_LEN 16

 typedef UCollationResult tst_strcoll(void *collator, const int object,

                         const UChar *source, const int sLen,

                         const UChar *target, const int tLen);

53 const static char cnt1[][10] = {

55 "AA",

56 "AC",

57 "AZ",

58 "AQ",

59 "AB",

60 "ABZ",

61 "ABQ",

62 "Z",

63 "ABC",

64 "Q",

65 "B"

66 };

68 const static char cnt2[][10] = {

69 "DA",

70 "DAD",

71 "DAZ",

72 "MAR",

73 "Z",

74 "DAVIS",

75 "MARK",

76 "DAV",

77 "DAVI"

78 };

 static void IncompleteCntTest(void)

81 {

82 UErrorCode status = U_ZERO_ERROR;

83 UChar temp[90];

84 UChar t1[90];

85 UChar t2[90];

87 UCollator *coll = NULL;

   uint32_t i = 0, j = 0;

89 uint32_t size = 0;

   u_uastrcpy(temp, " & Z < ABC < Q < B");

   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);

   if(U_SUCCESS(status)) {

96 size = UPRV_LENGTHOF(cnt1);

     for(i = 0; i < size-1; i++) {

       for(j = i+1; j < size; j++) {

99 UCollationElements *iter;

         u_uastrcpy(t1, cnt1[i]);

         u_uastrcpy(t2, cnt1[j]);

         doTest(coll, t1, t2, UCOL_LESS);

103 /* synwee : added collation element iterator test */

         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);

         if (U_FAILURE(status)) {

           log_err("Creation of iterator failed\n");

107 break;

108 }

109 backAndForth(iter);

110 ucol_closeElements(iter);

111 }

112 }

113 }

114

115 ucol_close(coll);

116

117

   u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");

   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);

120

   if(U_SUCCESS(status)) {

122 size = UPRV_LENGTHOF(cnt2);

     for(i = 0; i < size-1; i++) {

       for(j = i+1; j < size; j++) {

125 UCollationElements *iter;

         u_uastrcpy(t1, cnt2[i]);

         u_uastrcpy(t2, cnt2[j]);

         doTest(coll, t1, t2, UCOL_LESS);

129

130 /* synwee : added collation element iterator test */

         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);

         if (U_FAILURE(status)) {

           log_err("Creation of iterator failed\n");

134 break;

135 }

136 backAndForth(iter);

137 ucol_closeElements(iter);

138 }

139 }

140 }

141

142 ucol_close(coll);

143

144

145 }

146

147 const static char shifted[][20] = {

148 "black bird",

149 "black-bird",

150 "blackbird",

151 "black Bird",

152 "black-Bird",

153 "blackBird",

154 "black birds",

155 "black-birds",

156 "blackbirds"

157 };

158

159 const static UCollationResult shiftedTert[] = {

160 UCOL_EQUAL,

161 UCOL_EQUAL,

162 UCOL_EQUAL,

163 UCOL_LESS,

164 UCOL_EQUAL,

165 UCOL_EQUAL,

166 UCOL_LESS,

167 UCOL_EQUAL,

168 UCOL_EQUAL

169 };

170

171 const static char nonignorable[][20] = {

172 "black bird",

173 "black Bird",

174 "black birds",

175 "black-bird",

176 "black-Bird",

177 "black-birds",

178 "blackbird",

179 "blackBird",

180 "blackbirds"

181 };

182

 static void BlackBirdTest(void) {

184 UErrorCode status = U_ZERO_ERROR;

185 UChar t1[90];

186 UChar t2[90];

187

   uint32_t i = 0, j = 0;

189 uint32_t size = 0;

   UCollator *coll = ucol_open("en_US", &status);

191

   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);

   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);

194

   if(U_SUCCESS(status)) {

196 size = UPRV_LENGTHOF(nonignorable);

     for(i = 0; i < size-1; i++) {

       for(j = i+1; j < size; j++) {

         u_uastrcpy(t1, nonignorable[i]);

         u_uastrcpy(t2, nonignorable[j]);

         doTest(coll, t1, t2, UCOL_LESS);

202 }

203 }

204 }

205

   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);

   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);

208

   if(U_SUCCESS(status)) {

210 size = UPRV_LENGTHOF(shifted);

     for(i = 0; i < size-1; i++) {

       for(j = i+1; j < size; j++) {

         u_uastrcpy(t1, shifted[i]);

         u_uastrcpy(t2, shifted[j]);

         doTest(coll, t1, t2, UCOL_LESS);

216 }

217 }

218 }

219

   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);

   if(U_SUCCESS(status)) {

222 size = UPRV_LENGTHOF(shifted);

     for(i = 1; i < size; i++) {

       u_uastrcpy(t1, shifted[i-1]);

       u_uastrcpy(t2, shifted[i]);

       doTest(coll, t1, t2, shiftedTert[i]);

227 }

228 }

229

230 ucol_close(coll);

231 }

232

233 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {

     {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},

     {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},

     {0x0041/*'A'*/, 0x0300, 0x0000},

     {0x00C0, 0x0301, 0x0000},

238 /* this would work with forced normalization */

     {0x00C0, 0x0316, 0x0000}

240 };

241

242 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {

     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},

     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},

     {0x00C0, 0},

     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},

247 /* this would work with forced normalization */

     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}

249 };

250

251 const static UCollationResult results[] = {

252 UCOL_GREATER,

253 UCOL_EQUAL,

254 UCOL_EQUAL,

255 UCOL_GREATER,

256 UCOL_EQUAL

257 };

258

 static void FunkyATest(void)

260 {

261

262 int32_t i;

263 UErrorCode status = U_ZERO_ERROR;

264 UCollator *myCollation;

     myCollation = ucol_open("en_US", &status);

     if(U_FAILURE(status)){

         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));

268 return;

269 }

     log_verbose("Testing some A letters, for some reason\n");

     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

272 ucol_setStrength(myCollation, UCOL_TERTIARY);

     for (i = 0; i < 4 ; i++)

274 {

         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);

276 }

277 ucol_close(myCollation);

278 }

279

280 UColAttributeValue caseFirst[] = {

281 UCOL_OFF,

282 UCOL_LOWER_FIRST,

283 UCOL_UPPER_FIRST

284 };

285

286

287 UColAttributeValue alternateHandling[] = {

288 UCOL_NON_IGNORABLE,

289 UCOL_SHIFTED

290 };

291

292 UColAttributeValue caseLevel[] = {

293 UCOL_OFF,

294 UCOL_ON

295 };

296

297 UColAttributeValue strengths[] = {

298 UCOL_PRIMARY,

299 UCOL_SECONDARY,

300 UCOL_TERTIARY,

301 UCOL_QUATERNARY,

302 UCOL_IDENTICAL

303 };

304

305 #if 0

306 static const char * strengthsC[] = {

307 "UCOL_PRIMARY",

308 "UCOL_SECONDARY",

309 "UCOL_TERTIARY",

310 "UCOL_QUATERNARY",

311 "UCOL_IDENTICAL"

312 };

313

314 static const char * caseFirstC[] = {

315 "UCOL_OFF",

316 "UCOL_LOWER_FIRST",

317 "UCOL_UPPER_FIRST"

318 };

319

320

321 static const char * alternateHandlingC[] = {

322 "UCOL_NON_IGNORABLE",

323 "UCOL_SHIFTED"

324 };

325

326 static const char * caseLevelC[] = {

327 "UCOL_OFF",

328 "UCOL_ON"

329 };

330

331 /* not used currently - does not test only prints */

 static void PrintMarkDavis(void)

333 {

334 UErrorCode status = U_ZERO_ERROR;

335 UChar m[256];

336 uint8_t sortkey[256];

   UCollator *coll = ucol_open("en_US", &status);

   uint32_t h,i,j,k, sortkeysize;

339 uint32_t sizem = 0;

340 char buffer[512];

341 uint32_t len = 512;

342

343 log_verbose("PrintMarkDavis");

344

   u_uastrcpy(m, "Mark Davis");

346 sizem = u_strlen(m);

347

348

   m[1] = 0xe4;

350

   for(i = 0; i<sizem; i++) {

     fprintf(stderr, "\\u%04X ", m[i]);

353 }

   fprintf(stderr, "\n");

355

   for(h = 0; h<UPRV_LENGTHOF(caseFirst); h++) {

     ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);

     fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);

359

     for(i = 0; i<UPRV_LENGTHOF(alternateHandling); i++) {

       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);

       fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);

363

       for(j = 0; j<UPRV_LENGTHOF(caseLevel); j++) {

         ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);

         fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);

367

         for(k = 0; k<UPRV_LENGTHOF(strengths); k++) {

           ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);

           sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);

           fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);

           fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));

373 }

374

375 }

376

377 }

378

379 }

380 }

381 #endif

382

 static void BillFairmanTest(void) {

384 /*

385 ** check for actual locale via ICU resource bundles

386 **

387 ** lp points to the original locale ("fr_FR_....")

388 */

389

390 UResourceBundle *lr,*cr;

391 UErrorCode lec = U_ZERO_ERROR;

     const char *lp = "fr_FR_you_ll_never_find_this_locale";

393

     log_verbose("BillFairmanTest\n");

395

     lr = ures_open(NULL,lp,&lec);

397 if (lr) {

         cr = ures_getByKey(lr,"collations",0,&lec);

399 if (cr) {

             lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);

401 if (lp) {

                 if (U_SUCCESS(lec)) {

                     if(strcmp(lp, "fr") != 0) {

                         log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);

405 }

406 }

407 }

408 ures_close(cr);

409 }

410 ures_close(lr);

411 }

412 }

413

414 const static char chTest[][20] = {

415 "c",

416 "C",

   "ca", "cb", "cx", "cy", "CZ",

   "c\\u030C", "C\\u030C",

419 "h",

420 "H",

   "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",

   "ch", "cH", "Ch", "CH",

   "cha", "charly", "che", "chh", "chch", "chr",

   "i", "I", "iarly",

425 "r", "R",

   "r\\u030C", "R\\u030C",

427 "s",

428 "S",

   "s\\u030C", "S\\u030C",

430 "z", "Z",

   "z\\u030C", "Z\\u030C"

432 };

433

 static void TestChMove(void) {

     UChar t1[256] = {0};

     UChar t2[256] = {0};

437

     uint32_t i = 0, j = 0;

439 uint32_t size = 0;

440 UErrorCode status = U_ZERO_ERROR;

441

     UCollator *coll = ucol_open("cs", &status);

443

     if(U_SUCCESS(status)) {

445 size = UPRV_LENGTHOF(chTest);

         for(i = 0; i < size-1; i++) {

             for(j = i+1; j < size; j++) {

                 u_unescape(chTest[i], t1, 256);

                 u_unescape(chTest[j], t2, 256);

                 doTest(coll, t1, t2, UCOL_LESS);

451 }

452 }

453 }

454 else {

455 log_data_err("Can't open collator");

456 }

457 ucol_close(coll);

458 }

463 /*

464 const static char impTest[][20] = {

465 "\\u4e00",

466 "a",

467 "A",

468 "b",

469 "B",

470 "\\u4e01"

471 };

472 */

473

474

 static void TestImplicitTailoring(void) {

476 static const struct {

477 const char *rules;

     const char *data[10];

479 const uint32_t len;

480 } tests[] = {

481 {

482 /* Tailor b and c before U+4E00. */

483 "&[before 1]\\u4e00 < b < c "

484 /* Now, before U+4E00 is c; put d and e after that. */

485 "&[before 1]\\u4e00 < d < e",

         { "b", "c", "d", "e", "\\u4e00"}, 5 },

       { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },

       { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},

       { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}

490 };

491

492 int32_t i = 0;

493

   for(i = 0; i < UPRV_LENGTHOF(tests); i++) {

       genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);

496 }

497

498 /*

499 UChar t1[256] = {0};

500 UChar t2[256] = {0};

501

502 const char *rule = "&\\u4e00 < a <<< A < b <<< B";

503

504 uint32_t i = 0, j = 0;

505 uint32_t size = 0;

506 uint32_t ruleLen = 0;

507 UErrorCode status = U_ZERO_ERROR;

508 UCollator *coll = NULL;

509 ruleLen = u_unescape(rule, t1, 256);

510

511 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);

512

513 if(U_SUCCESS(status)) {

514 size = UPRV_LENGTHOF(impTest);

515 for(i = 0; i < size-1; i++) {

516 for(j = i+1; j < size; j++) {

517 u_unescape(impTest[i], t1, 256);

518 u_unescape(impTest[j], t2, 256);

519 doTest(coll, t1, t2, UCOL_LESS);

520 }

521 }

522 }

523 else {

524 log_err("Can't open collator");

525 }

526 ucol_close(coll);

527 */

528 }

529

 static void TestFCDProblem(void) {

   UChar t1[256] = {0};

   UChar t2[256] = {0};

533

   const char *s1 = "\\u0430\\u0306\\u0325";

   const char *s2 = "\\u04D1\\u0325";

536

537 UErrorCode status = U_ZERO_ERROR;

   UCollator *coll = ucol_open("", &status);

   u_unescape(s1, t1, 256);

   u_unescape(s2, t2, 256);

541

   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);

   doTest(coll, t1, t2, UCOL_EQUAL);

544

   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

   doTest(coll, t1, t2, UCOL_EQUAL);

547

548 ucol_close(coll);

549 }

550

551 /*

552 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC

553 We're only using NFC/NFD in this test.

554 */

555 #define NORM_BUFFER_TEST_LEN 18

556 typedef struct {

557 UChar32 u;

558 UChar NFC[NORM_BUFFER_TEST_LEN];

559 UChar NFD[NORM_BUFFER_TEST_LEN];

560 } tester;

561

 static void TestComposeDecompose(void) {

563 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */

564 static const UChar UNICODESET_STR[] = {

         0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,

         0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,

         0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0

568 };

569 int32_t noOfLoc;

     int32_t i = 0, j = 0;

571

572 UErrorCode status = U_ZERO_ERROR;

573 const char *locName = NULL;

574 uint32_t nfcSize;

575 uint32_t nfdSize;

576 tester **t;

577 uint32_t noCases = 0;

578 UCollator *coll = NULL;

579 UChar32 u = 0;

580 UChar comp[NORM_BUFFER_TEST_LEN];

581 uint32_t len = 0;

582 UCollationElements *iter;

     USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);

584 int32_t charsToTestSize;

585

586 noOfLoc = uloc_countAvailable();

587

     coll = ucol_open("", &status);

     if (U_FAILURE(status)) {

         log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));

591 uset_close(charsToTest);

592 return;

593 }

594 charsToTestSize = uset_size(charsToTest);

     if (charsToTestSize <= 0) {

         log_err("Set was zero. Missing data?\n");

597 uset_close(charsToTest);

598 return;

599 }

     t = (tester **)malloc(charsToTestSize * sizeof(tester *));

     t[0] = (tester *)malloc(sizeof(tester));

     log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);

603

     for(u = 0; u < charsToTestSize; u++) {

         UChar32 ch = uset_charAt(charsToTest, u);

606 len = 0;

         U16_APPEND_UNSAFE(comp, len, ch);

         nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);

         nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);

610

         if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)

           || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {

613 t[noCases]->u = ch;

             if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {

                 u_strncpy(t[noCases]->NFC, comp, len);

                 t[noCases]->NFC[len] = 0;

617 }

618 noCases++;

             t[noCases] = (tester *)malloc(sizeof(tester));

             uprv_memset(t[noCases], 0, sizeof(tester));

621 }

622 }

     log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);

624 uset_close(charsToTest);

625 charsToTest = NULL;

626

     for(u=0; u<(UChar32)noCases; u++) {

         if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {

             log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);

             doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);

631 }

632 }

633 /*

634 for(u = 0; u < charsToTestSize; u++) {

635 if(!(u&0xFFFF)) {

636 log_verbose("%08X ", u);

637 }

638 uprv_memset(t[noCases], 0, sizeof(tester));

639 t[noCases]->u = u;

640 len = 0;

641 U16_APPEND_UNSAFE(comp, len, u);

642 comp[len] = 0;

643 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);

644 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);

645 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);

646 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);

647 }

648 */

649

650 ucol_close(coll);

651

     log_verbose("Testing locales, number of cases = %i\n", noCases);

     for(i = 0; i<noOfLoc; i++) {

654 status = U_ZERO_ERROR;

655 locName = uloc_getAvailable(i);

         if(hasCollationElements(locName)) {

657 char cName[256];

658 UChar name[256];

             int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);

660

             for(j = 0; j<nameSize; j++) {

                 cName[j] = (char)name[j];

663 }

664 cName[nameSize] = 0;

             log_verbose("\nTesting locale %s (%s)\n", locName, cName);

666

             coll = ucol_open(locName, &status);

668 ucol_setStrength(coll, UCOL_IDENTICAL);

             iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);

670

             for(u=0; u<(UChar32)noCases; u++) {

                 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {

                     log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);

                     doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);

                     log_verbose("Testing NFC\n");

                     ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);

677 backAndForth(iter);

                     log_verbose("Testing NFD\n");

                     ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);

680 backAndForth(iter);

681 }

682 }

683 ucol_closeElements(iter);

684 ucol_close(coll);

685 }

686 }

     for(u = 0; u <= (UChar32)noCases; u++) {

688 free(t[u]);

689 }

690 free(t);

691 }

692

 static void TestEmptyRule(void) {

694 UErrorCode status = U_ZERO_ERROR;

695 UChar rulez[] = { 0 };

   UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);

697

698 ucol_close(coll);

699 }

700

 static void TestUCARules(void) {

702 UErrorCode status = U_ZERO_ERROR;

703 UChar b[256];

704 UChar *rules = b;

705 uint32_t ruleLen = 0;

706 UCollator *UCAfromRules = NULL;

   UCollator *coll = ucol_open("", &status);

708 if(status == U_FILE_ACCESS_ERROR) {

     log_data_err("Is your data around?\n");

710 return;

   } else if(U_FAILURE(status)) {

     log_err("Error opening collator\n");

713 return;

714 }

   ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);

716

   log_verbose("TestUCARules\n");

   if(ruleLen > 256) {

     rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));

     ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);

721 }

   log_verbose("Rules length is %d\n", ruleLen);

   UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);

   if(U_SUCCESS(status)) {

725 ucol_close(UCAfromRules);

726 } else {

     log_verbose("Unable to create a collator from UCARules!\n");

728 }

729 /*

730 u_unescape(blah, b, 256);

731 ucol_getSortKey(coll, b, 1, res, 256);

732 */

733 ucol_close(coll);

734 if(rules != b) {

735 free(rules);

736 }

737 }

738

739

740 /* Pinyin tonal order */

741 /*

742 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)

743 (w/macron)< (w/acute)< (w/caron)< (w/grave)

744 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)

745 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)

746 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)

747 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)

748 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <

749 .. (\u00fc)

750

751 However, in testing we got the following order:

752 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)

753 (w/acute)< (w/grave)< (w/caron)< (w/macron)

754 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <

755 .. (\u0113)

756 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)

757 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)

758 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <

759 .. (\u01d8)

760 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)

761 */

762

 static void TestBefore(void) {

764 const static char *data[] = {

       "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",

       "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",

       "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",

       "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",

       "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",

       "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"

771 };

772 genericRulesStarter(

     "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"

     "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"

     "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"

     "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"

     "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"

     "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",

779 data, UPRV_LENGTHOF(data));

780 }

781

782 #if 0

783 /* superceded by TestBeforePinyin */

 static void TestJ784(void) {

785 const static char *data[] = {

       "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",

       "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",

       "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",

       "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",

       "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",

791 "\\u00fc",

            "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"

793 };

   genericLocaleStarter("zh", data, UPRV_LENGTHOF(data));

795 }

796 #endif

797

 static void TestUpperCaseFirst(void) {

799 const static char *data[] = {

800 "I",

801 "i",

802 "Y",

803 "y"

804 };

   genericLocaleStarter("da", data, UPRV_LENGTHOF(data));

806 }

807

 static void TestJ815(void) {

809 const static char *data[] = {

810 "aa",

811 "Aa",

812 "ab",

813 "Ab",

814 "ad",

815 "Ad",

816 "ae",

817 "Ae",

818 "\\u00e6",

819 "\\u00c6",

820 "af",

821 "Af",

822 "b",

823 "B"

824 };

   genericLocaleStarter("fr", data, UPRV_LENGTHOF(data));

   genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, UPRV_LENGTHOF(data));

827 }

828

829

 static void TestCase(void)

831 {

832 const static UChar gRules[MAX_TOKEN_LEN] =

833 /*" & 0 < 1,\u2461<a,A"*/

     { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };

835

836 const static UChar testCase[][MAX_TOKEN_LEN] =

837 {

         /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},

         /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},

         /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},

         /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}

842 };

843

844 const static UCollationResult caseTestResults[][9] =

845 {

         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },

         { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },

         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },

         { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }

850 };

851

852 const static UColAttributeValue caseTestAttributes[][2] =

853 {

854 { UCOL_LOWER_FIRST, UCOL_OFF},

855 { UCOL_UPPER_FIRST, UCOL_OFF},

856 { UCOL_LOWER_FIRST, UCOL_ON},

857 { UCOL_UPPER_FIRST, UCOL_ON}

858 };

859 int32_t i,j,k;

860 UErrorCode status = U_ZERO_ERROR;

861 UCollationElements *iter;

862 UCollator *myCollation;

     myCollation = ucol_open("en_US", &status);

864

     if(U_FAILURE(status)){

         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));

867 return;

868 }

     log_verbose("Testing different case settings\n");

870 ucol_setStrength(myCollation, UCOL_TERTIARY);

871

     for(k = 0; k<4; k++) {

       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);

       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);

       log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);

       for (i = 0; i < 3 ; i++) {

         for(j = i+1; j<4; j++) {

           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);

879 }

880 }

881 }

882 ucol_close(myCollation);

883

     myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);

     if(U_FAILURE(status)){

         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));

887 return;

888 }

     log_verbose("Testing different case settings with custom rules\n");

890 ucol_setStrength(myCollation, UCOL_TERTIARY);

891

     for(k = 0; k<4; k++) {

       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);

       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);

       for (i = 0; i < 3 ; i++) {

         for(j = i+1; j<4; j++) {

           log_verbose("k:%d, i:%d, j:%d\n", k, i, j);

           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);

           iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);

900 backAndForth(iter);

901 ucol_closeElements(iter);

           iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);

903 backAndForth(iter);

904 ucol_closeElements(iter);

905 }

906 }

907 }

908 ucol_close(myCollation);

909 {

910 const static char *lowerFirst[] = {

911 "h",

912 "H",

913 "ch",

914 "Ch",

915 "CH",

916 "cha",

917 "chA",

918 "Cha",

919 "ChA",

920 "CHa",

921 "CHA",

922 "i",

923 "I"

924 };

925

926 const static char *upperFirst[] = {

927 "H",

928 "h",

929 "CH",

930 "Ch",

931 "ch",

932 "CHA",

933 "CHa",

934 "ChA",

935 "Cha",

936 "chA",

937 "cha",

938 "I",

939 "i"

940 };

       log_verbose("mixed case test\n");

       log_verbose("lower first, case level off\n");

       genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));

       log_verbose("upper first, case level off\n");

       genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));

       log_verbose("lower first, case level on\n");

       genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));

       log_verbose("upper first, case level on\n");

       genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));

950 }

951

952 }

953

 static void TestIncrementalNormalize(void) {

955

956 /*UChar baseA =0x61;*/

957 UChar baseA =0x41;

958 /* UChar baseB = 0x42;*/

     static const UChar ccMix[]   = {0x316, 0x321, 0x300};

960 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/

961 /*

962 0x316 is combining grave accent below, cc=220

963 0x321 is combining palatalized hook below, cc=202

964 0x300 is combining grave accent, cc=230

965 */

966

967 #define MAXSLEN 2000

968 /*int maxSLen = 64000;*/

969 int sLen;

970 int i;

971

972 UCollator *coll;

973 UErrorCode status = U_ZERO_ERROR;

974 UCollationResult result;

975

     int32_t myQ = getTestOption(QUICK_OPTION);

977

     if(getTestOption(QUICK_OPTION) < 0) {

         setTestOption(QUICK_OPTION, 1);

980 }

981

982 {

983 /* Test 1. Run very long unnormalized strings, to force overflow of*/

984 /* most buffers along the way.*/

985 UChar strA[MAXSLEN+1];

986 UChar strB[MAXSLEN+1];

987

         coll = ucol_open("en_US", &status);

989 if(status == U_FILE_ACCESS_ERROR) {

           log_data_err("Is your data around?\n");

991 return;

         } else if(U_FAILURE(status)) {

           log_err("Error opening collator\n");

994 return;

995 }

         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

997

998 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/

999 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/

1000 /*for (sLen = 1000; sLen<1001; sLen++) {*/

         for (sLen = 500; sLen<501; sLen++) {

1002 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/

1003 strA[0] = baseA;

1004 strB[0] = baseA;

             for (i=1; i<=sLen-1; i++) {

                 strA[i] = ccMix[i % 3];

                 strB[sLen-i] = ccMix[i % 3];

1008 }

1009 strA[sLen] = 0;

1010 strB[sLen] = 0;

1011

             ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/

             doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/

             ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/

             doTest(coll, strA, strB, UCOL_EQUAL);

1016 }

1017 }

1018

1019 setTestOption(QUICK_OPTION, myQ);

1020

1021

1022 /* Test 2: Non-normal sequence in a string that extends to the last character*/

1023 /* of the string. Checks a couple of edge cases.*/

1024

1025 {

         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};

         static const UChar strB[] = {0x41, 0xc0, 0x316, 0};

1028 ucol_setStrength(coll, UCOL_TERTIARY);

         doTest(coll, strA, strB, UCOL_EQUAL);

1030 }

1031

1032 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/

1033

1034 {

1035 /* New UCA 3.1.1.

1036 * test below used a code point from Desseret, which sorts differently

1037 * than d800 dc00

1038 */

1039 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/

         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};

         static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};

1042 ucol_setStrength(coll, UCOL_TERTIARY);

         doTest(coll, strA, strB, UCOL_GREATER);

1044 }

1045

1046 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/

1047

1048 {

         static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};

         static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};

1051 char sortKeyA[50];

1052 char sortKeyAz[50];

1053 char sortKeyB[50];

1054 char sortKeyBz[50];

1055 int r;

1056

1057 /* there used to be -3 here. Hmmmm.... */

1058 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/

         result = ucol_strcoll(coll, strA, 3, strB, 3);

1060 if (result != UCOL_GREATER) {

             log_err("ERROR 1 in test 4\n");

1062 }

         result = ucol_strcoll(coll, strA, -1, strB, -1);

1064 if (result != UCOL_EQUAL) {

             log_err("ERROR 2 in test 4\n");

1066 }

1067

         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));

         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));

         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));

         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));

1072

         r = strcmp(sortKeyA, sortKeyAz);

         if (r <= 0) {

             log_err("Error 3 in test 4\n");

1076 }

         r = strcmp(sortKeyA, sortKeyB);

         if (r <= 0) {

             log_err("Error 4 in test 4\n");

1080 }

         r = strcmp(sortKeyAz, sortKeyBz);

         if (r != 0) {

             log_err("Error 5 in test 4\n");

1084 }

1085

1086 ucol_setStrength(coll, UCOL_IDENTICAL);

         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));

         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));

         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));

         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));

1091

         r = strcmp(sortKeyA, sortKeyAz);

         if (r <= 0) {

             log_err("Error 6 in test 4\n");

1095 }

         r = strcmp(sortKeyA, sortKeyB);

         if (r <= 0) {

             log_err("Error 7 in test 4\n");

1099 }

         r = strcmp(sortKeyAz, sortKeyBz);

         if (r != 0) {

             log_err("Error 8 in test 4\n");

1103 }

1104 ucol_setStrength(coll, UCOL_TERTIARY);

1105 }

1106

1107

1108 /* Test 5: Null characters in non-normal source strings.*/

1109

1110 {

         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};

         static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};

1113 char sortKeyA[50];

1114 char sortKeyAz[50];

1115 char sortKeyB[50];

1116 char sortKeyBz[50];

1117 int r;

1118

         result = ucol_strcoll(coll, strA, 6, strB, 6);

1120 if (result != UCOL_GREATER) {

             log_err("ERROR 1 in test 5\n");

1122 }

         result = ucol_strcoll(coll, strA, -1, strB, -1);

1124 if (result != UCOL_EQUAL) {

             log_err("ERROR 2 in test 5\n");

1126 }

1127

         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));

         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));

         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));

         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));

1132

         r = strcmp(sortKeyA, sortKeyAz);

         if (r <= 0) {

             log_err("Error 3 in test 5\n");

1136 }

         r = strcmp(sortKeyA, sortKeyB);

         if (r <= 0) {

             log_err("Error 4 in test 5\n");

1140 }

         r = strcmp(sortKeyAz, sortKeyBz);

         if (r != 0) {

             log_err("Error 5 in test 5\n");

1144 }

1145

1146 ucol_setStrength(coll, UCOL_IDENTICAL);

         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));

         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));

         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));

         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));

1151

         r = strcmp(sortKeyA, sortKeyAz);

         if (r <= 0) {

             log_err("Error 6 in test 5\n");

1155 }

         r = strcmp(sortKeyA, sortKeyB);

         if (r <= 0) {

             log_err("Error 7 in test 5\n");

1159 }

         r = strcmp(sortKeyAz, sortKeyBz);

         if (r != 0) {

             log_err("Error 8 in test 5\n");

1163 }

1164 ucol_setStrength(coll, UCOL_TERTIARY);

1165 }

1166

1167

1168 /* Test 6: Null character as base of a non-normal combining sequence.*/

1169

1170 {

         static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};

         static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};

1173

         result = ucol_strcoll(coll, strA, 5, strB, 5);

1175 if (result != UCOL_LESS) {

             log_err("Error 1 in test 6\n");

1177 }

         result = ucol_strcoll(coll, strA, -1, strB, -1);

1179 if (result != UCOL_EQUAL) {

             log_err("Error 2 in test 6\n");

1181 }

1182 }

1183

1184 ucol_close(coll);

1185 }

1186

1187

1188

1189 #if 0

 static void TestGetCaseBit(void) {

1191 static const char *caseBitData[] = {

     "a", "A", "ch", "Ch", "CH",

       "\\uFF9E", "\\u0009"

1194 };

1195

1196 static const uint8_t results[] = {

     UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,

1198 UCOL_UPPER_CASE, UCOL_LOWER_CASE

1199 };

1200

   uint32_t i, blen = 0;

   UChar b[256] = {0};

1203 UErrorCode status = U_ZERO_ERROR;

   UCollator *UCA = ucol_open("", &status);

1205 uint8_t res = 0;

1206

   for(i = 0; i<UPRV_LENGTHOF(results); i++) {

     blen = u_unescape(caseBitData[i], b, 256);

     res = ucol_uprv_getCaseBits(UCA, b, blen, &status);

     if(results[i] != res) {

       log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);

1212 }

1213 }

1214 }

1215 #endif

1216

 static void TestHangulTailoring(void) {

1218 static const char *koreanData[] = {

         "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",

             "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",

             "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",

             "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",

             "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",

             "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"

1225 };

1226

1227 const char *rules =

         "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "

         "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "

         "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "

         "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "

         "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "

         "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";

1234

1235

1236 UErrorCode status = U_ZERO_ERROR;

   UChar rlz[2048] = { 0 };

   uint32_t rlen = u_unescape(rules, rlz, 2048);

1239

   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);

1241 if(status == U_FILE_ACCESS_ERROR) {

     log_data_err("Is your data around?\n");

1243 return;

   } else if(U_FAILURE(status)) {

     log_err("Error opening collator\n");

1246 return;

1247 }

1248

   log_verbose("Using start of korean rules\n");

1250

   if(U_SUCCESS(status)) {

     genericOrderingTest(coll, koreanData, UPRV_LENGTHOF(koreanData));

1253 } else {

     log_err("Unable to open collator with rules %s\n", rules);

1255 }

1256

1257 ucol_close(coll);

1258

   log_verbose("Using ko__LOTUS locale\n");

   genericLocaleStarter("ko__LOTUS", koreanData, UPRV_LENGTHOF(koreanData));

1261 }

1262

1263 /*

1264 * The secondary/tertiary compression middle byte

1265 * as used by the current implementation.

1266 * Subject to change as the sort key compression changes.

1267 * See class CollationKeys.

1268 */

1269 enum {

1270 SEC_COMMON_MIDDLE = 0x25, /* range 05..45 */

1271 TER_ONLY_COMMON_MIDDLE = 0x65 /* range 05..C5 */

1272 };

1273

 static void TestCompressOverlap(void) {

1275 UChar secstr[150];

1276 UChar tertstr[150];

1277 UErrorCode status = U_ZERO_ERROR;

1278 UCollator *coll;

1279 uint8_t result[500];

1280 uint32_t resultlen;

1281 int count = 0;

1282 uint8_t *tempptr;

1283

     coll = ucol_open("", &status);

1285

     if (U_FAILURE(status)) {

         log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));

1288 return;

1289 }

     while (count < 149) {

         secstr[count] = 0x0020; /* [06, 05, 05] */

1292 tertstr[count] = 0x0020;

1293 count ++;

1294 }

1295

1296 /* top down compression ----------------------------------- */

     secstr[count] = 0x0332; /* [, 87, 05] */

     tertstr[count] = 0x3000; /* [06, 05, 07] */

1299

1300 /* no compression secstr should have 150 secondary bytes, tertstr should

1301 have 150 tertiary bytes.

1302 with correct compression, secstr should have 6 secondary

1303 bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */

     resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));

     (void)resultlen;    /* Suppress set but not used warning. */

     tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;

     while (*(tempptr + 1) != 1) {

1308 /* the last secondary collation element is not checked since it is not

1309 part of the compression */

1310 if (*tempptr < SEC_COMMON_MIDDLE) {

             log_err("Secondary top down compression overlapped\n");

1312 }

1313 tempptr ++;

1314 }

1315

1316 /* tertiary top/bottom/common for en_US is similar to the secondary

1317 top/bottom/common */

     resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));

     tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;

     while (*(tempptr + 1) != 0) {

1321 /* the last secondary collation element is not checked since it is not

1322 part of the compression */

1323 if (*tempptr < TER_ONLY_COMMON_MIDDLE) {

             log_err("Tertiary top down compression overlapped\n");

1325 }

1326 tempptr ++;

1327 }

1328

1329 /* bottom up compression ------------------------------------- */

1330 secstr[count] = 0;

1331 tertstr[count] = 0;

     resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));

     tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;

     while (*(tempptr + 1) != 1) {

1335 /* the last secondary collation element is not checked since it is not

1336 part of the compression */

1337 if (*tempptr > SEC_COMMON_MIDDLE) {

             log_err("Secondary bottom up compression overlapped\n");

1339 }

1340 tempptr ++;

1341 }

1342

1343 /* tertiary top/bottom/common for en_US is similar to the secondary

1344 top/bottom/common */

     resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));

     tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;

     while (*(tempptr + 1) != 0) {

1348 /* the last secondary collation element is not checked since it is not

1349 part of the compression */

1350 if (*tempptr > TER_ONLY_COMMON_MIDDLE) {

             log_err("Tertiary bottom up compression overlapped\n");

1352 }

1353 tempptr ++;

1354 }

1355

1356 ucol_close(coll);

1357 }

1358

 static void TestCyrillicTailoring(void) {

1360 static const char *test[] = {

1361 "\\u0410b",

       "\\u0410\\u0306a",

1363 "\\u04d0A"

1364 };

1365

1366 /* Russian overrides contractions, so this test is not valid anymore */

1367 /*genericLocaleStarter("ru", test, 3);*/

1368

1369 // Most of the following are commented out because UCA 8.0

1370 // drops most of the Cyrillic contractions from the default order.

1371 // See CLDR ticket #7246 "root collation: remove Cyrillic contractions".

1372

1373 // genericLocaleStarter("root", test, 3);

1374 // genericRulesStarter("&\\u0410 = \\u0410", test, 3);

1375 // genericRulesStarter("&Z < \\u0410", test, 3);

     genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);

     genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);

1378 // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);

1379 // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);

1380 }

1381

 static void TestSuppressContractions(void) {

1383

1384 static const char *testNoCont2[] = {

       "\\u0410\\u0302a",

       "\\u0410\\u0306b",

1387 "\\u0410c"

1388 };

1389 static const char *testNoCont[] = {

1390 "a\\u0410",

       "A\\u0410\\u0306",

       "\\uFF21\\u0410\\u0302"

1393 };

1394

   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);

   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);

1397 }

1398

 static void TestContraction(void) {

1400 const static char *testrules[] = {

1401 "&A = AB / B",

         "&A = A\\u0306/\\u0306",

1403 "&c = ch / h"

1404 };

1405 const static UChar testdata[][2] = {

         {0x0041 /* 'A' */, 0x0042 /* 'B' */},

         {0x0041 /* 'A' */, 0x0306 /* combining breve */},

         {0x0063 /* 'c' */, 0x0068 /* 'h' */}

1409 };

1410 const static UChar testdata2[][2] = {

         {0x0063 /* 'c' */, 0x0067 /* 'g' */},

         {0x0063 /* 'c' */, 0x0068 /* 'h' */},

         {0x0063 /* 'c' */, 0x006C /* 'l' */}

1414 };

1415 #if 0

1416 /*

1417 * These pairs of rule strings are not guaranteed to yield the very same mappings.

1418 * In fact, LDML 24 recommends an improved way of creating mappings

1419 * which always yields different mappings for such pairs. See

1420 * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings

1421 */

1422 const static char *testrules3[] = {

1423 "&z < xyz &xyzw << B",

1424 "&z < xyz &xyz << B / w",

1425 "&z < ch &achm << B",

1426 "&z < ch &a << B / chm",

         "&\\ud800\\udc00w << B",

         "&\\ud800\\udc00 << B / w",

         "&a\\ud800\\udc00m << B",

         "&a << B / \\ud800\\udc00m",

1431 };

1432 #endif

1433

1434 UErrorCode status = U_ZERO_ERROR;

1435 UCollator *coll;

     UChar       rule[256] = {0};

1437 uint32_t rlen = 0;

1438 int i;

1439

     for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {

1441 UCollationElements *iter1;

1442 int j = 0;

         log_verbose("Rule %s for testing\n", testrules[i]);

         rlen = u_unescape(testrules[i], rule, 32);

         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);

         if (U_FAILURE(status)) {

             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));

1448 return;

1449 }

         iter1 = ucol_openElements(coll, testdata[i], 2, &status);

         if (U_FAILURE(status)) {

             log_err("Collation iterator creation failed\n");

1453 return;

1454 }

         while (j < 2) {

             UCollationElements *iter2 = ucol_openElements(coll,

1457 &(testdata[i][j]),

1458 1, &status);

1459 int32_t ce;

             if (U_FAILURE(status)) {

                 log_err("Collation iterator creation failed\n");

1462 return;

1463 }

             ce = ucol_next(iter2, &status);

1465 while (ce != UCOL_NULLORDER) {

                 if (ucol_next(iter1, &status) != ce) {

                     log_err("Collation elements in contraction split does not match\n");

1468 return;

1469 }

                 ce = ucol_next(iter2, &status);

1471 }

1472 j ++;

1473 ucol_closeElements(iter2);

1474 }

         if (ucol_next(iter1, &status) != UCOL_NULLORDER) {

             log_err("Collation elements not exhausted\n");

1477 return;

1478 }

1479 ucol_closeElements(iter1);

1480 ucol_close(coll);

1481 }

1482

     rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);

     coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);

     if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {

         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",

                 testdata2[0][0], testdata2[0][1], testdata2[1][0],

                 testdata2[1][1]);

1489 return;

1490 }

     if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {

         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",

                 testdata2[1][0], testdata2[1][1], testdata2[2][0],

                 testdata2[2][1]);

1495 return;

1496 }

1497 ucol_close(coll);

1498 #if 0 /* see above */

     for (i = 0; i < UPRV_LENGTHOF(testrules3); i += 2) {

         log_verbose("testrules3 i==%d  \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);

1501 UCollator *coll1,

1502 *coll2;

1503 UCollationElements *iter1,

1504 *iter2;

1505 UChar ch = 0x0042 /* 'B' */;

1506 uint32_t ce;

         rlen = u_unescape(testrules3[i], rule, 32);

         coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);

         rlen = u_unescape(testrules3[i + 1], rule, 32);

         coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);

         if (U_FAILURE(status)) {

             log_err("Collator creation failed %s\n", testrules[i]);

1513 return;

1514 }

         iter1 = ucol_openElements(coll1, &ch, 1, &status);

         iter2 = ucol_openElements(coll2, &ch, 1, &status);

         if (U_FAILURE(status)) {

             log_err("Collation iterator creation failed\n");

1519 return;

1520 }

         ce = ucol_next(iter1, &status);

         if (U_FAILURE(status)) {

             log_err("Retrieving ces failed\n");

1524 return;

1525 }

1526 while (ce != UCOL_NULLORDER) {

             uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);

1528 if (ce == ce2) {

                 log_verbose("CEs match: %08x\n", ce);

1530 } else {

                 log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);

1532 return;

1533 }

             ce = ucol_next(iter1, &status);

             if (U_FAILURE(status)) {

                 log_err("Retrieving ces failed\n");

1537 return;

1538 }

1539 }

         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {

             log_err("CEs not exhausted\n");

1542 return;

1543 }

1544 ucol_closeElements(iter1);

1545 ucol_closeElements(iter2);

1546 ucol_close(coll1);

1547 ucol_close(coll2);

1548 }

1549 #endif

1550 }

1551

 static void TestExpansion(void) {

1553 const static char *testrules[] = {

1554 #if 0

1555 /*

1556 * This seems to have tested that M was not mapped to an expansion.

1557 * I believe the old builder just did that because it computed the extension CEs

1558 * at the very end, which was a bug.

1559 * Among other problems, it violated the core tailoring principle

1560 * by making an earlier rule depend on a later one.

1561 * And, of course, if M did not get an expansion, then it was primary different from K,

1562 * unlike what the rule &K<<M says.

1563 */

1564 "&J << K / B & K << M",

1565 #endif

1566 "&J << K / B << M"

1567 };

1568 const static UChar testdata[][3] = {

         {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},

         {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},

         {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},

         {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},

         {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},

         {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}

1575 };

1576

1577 UErrorCode status = U_ZERO_ERROR;

1578 UCollator *coll;

     UChar       rule[256] = {0};

1580 uint32_t rlen = 0;

1581 int i;

1582

     for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {

1584 int j = 0;

         log_verbose("Rule %s for testing\n", testrules[i]);

         rlen = u_unescape(testrules[i], rule, 32);

         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);

         if (U_FAILURE(status)) {

             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));

1590 return;

1591 }

1592

         for (j = 0; j < 5; j ++) {

             doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);

1595 }

1596 ucol_close(coll);

1597 }

1598 }

1599

1600 #if 0

1601 /* this test tests the current limitations of the engine */

1602 /* it always fail, so it is disabled by default */

 static void TestLimitations(void) {

1604 /* recursive expansions */

1605 {

     static const char *rule = "&a=b/c&d=c/e";

     static const char *tlimit01[] = {"add","b","adf"};

     static const char *tlimit02[] = {"aa","b","af"};

     log_verbose("recursive expansions\n");

     genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));

     genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));

1612 }

1613 /* contractions spanning expansions */

1614 {

     static const char *rule = "&a<<<c/e&g<<<eh";

     static const char *tlimit01[] = {"ad","c","af","f","ch","h"};

     static const char *tlimit02[] = {"ad","c","ch","af","f","h"};

     log_verbose("contractions spanning expansions\n");

     genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));

     genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));

1621 }

1622 /* normalization: nulls in contractions */

1623 {

     static const char *rule = "&a<<<\\u0000\\u0302";

     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};

     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};

1627 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };

1628 static const UColAttributeValue valOn[] = { UCOL_ON };

1629 static const UColAttributeValue valOff[] = { UCOL_OFF };

1630

     log_verbose("NULL in contractions\n");

     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);

     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);

     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);

     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);

1636

1637 }

1638 /* normalization: contractions spanning normalization */

1639 {

     static const char *rule = "&a<<<\\u0000\\u0302";

     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};

     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};

1643 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };

1644 static const UColAttributeValue valOn[] = { UCOL_ON };

1645 static const UColAttributeValue valOff[] = { UCOL_OFF };

1646

     log_verbose("contractions spanning normalization\n");

     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);

     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);

     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);

     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);

1652

1653 }

1654 /* variable top: */

1655 {

1656 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/

     static const char *rule = "&\\u2010<x<[variable top]=z";

1658 /*static const char *rule3 = "&' '<x<[variable top]=z";*/

     static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };

     static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};

     static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };

1662 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };

1663 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };

1664 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };

1665

     log_verbose("variable top\n");

     genericRulesStarterWithOptions(rule, tlimit03, UPRV_LENGTHOF(tlimit03), att, valOn, UPRV_LENGTHOF(att));

     genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));

     genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));

     genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));

     genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));

1672

1673 }

1674 /* case level */

1675 {

     static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";

     static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};

     static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};

1679 static const UColAttribute att[] = { UCOL_CASE_FIRST};

1680 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};

1681 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/

     log_verbose("case level\n");

     genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));

     genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));

1685 /*genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));*/

1686 /*genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));*/

1687 }

1688

1689 }

1690 #endif

1691

 static void TestBocsuCoverage(void) {

1693 UErrorCode status = U_ZERO_ERROR;

   const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";

   UChar       test[256] = {0};

   uint32_t    tlen     = u_unescape(testString, test, 32);

   uint8_t key[256]     = {0};

1698 uint32_t klen = 0;

1699

   UCollator *coll = ucol_open("", &status);

   if(U_SUCCESS(status)) {

   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);

1703

   klen = ucol_getSortKey(coll, test, tlen, key, 256);

   (void)klen;    /* Suppress set but not used warning. */

1706

1707 ucol_close(coll);

1708 } else {

     log_data_err("Couldn't open UCA\n");

1710 }

1711 }

1712

 static void TestVariableTopSetting(void) {

1714 UErrorCode status = U_ZERO_ERROR;

   uint32_t varTopOriginal = 0, varTop1, varTop2;

   UCollator *coll = ucol_open("", &status);

   if(U_SUCCESS(status)) {

1718

1719 static const UChar nul = 0;

1720 static const UChar space = 0x20;

   static const UChar dot = 0x2e;  /* punctuation */

   static const UChar degree = 0xb0;  /* symbol */

   static const UChar dollar = 0x24;  /* currency symbol */

   static const UChar zero = 0x30;  /* digit */

1725

   varTopOriginal = ucol_getVariableTop(coll, &status);

   log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);

   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);

1729

   varTop1 = ucol_setVariableTop(coll, &space, 1, &status);

   varTop2 = ucol_getVariableTop(coll, &status);

   log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);

   if(U_FAILURE(status) || varTop1 != varTop2 ||

       !ucol_equal(coll, &nul, 0, &space, 1) ||

       ucol_equal(coll, &nul, 0, &dot, 1) ||

       ucol_equal(coll, &nul, 0, &degree, 1) ||

       ucol_equal(coll, &nul, 0, &dollar, 1) ||

       ucol_equal(coll, &nul, 0, &zero, 1) ||

       ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {

     log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));

1741 }

1742

   varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);

   varTop2 = ucol_getVariableTop(coll, &status);

   log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);

   if(U_FAILURE(status) || varTop1 != varTop2 ||

       !ucol_equal(coll, &nul, 0, &space, 1) ||

       !ucol_equal(coll, &nul, 0, &dot, 1) ||

       ucol_equal(coll, &nul, 0, &degree, 1) ||

       ucol_equal(coll, &nul, 0, &dollar, 1) ||

       ucol_equal(coll, &nul, 0, &zero, 1) ||

       ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {

     log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));

1754 }

1755

   varTop1 = ucol_setVariableTop(coll, &degree, 1, &status);

   varTop2 = ucol_getVariableTop(coll, &status);

   log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);

   if(U_FAILURE(status) || varTop1 != varTop2 ||

       !ucol_equal(coll, &nul, 0, &space, 1) ||

       !ucol_equal(coll, &nul, 0, &dot, 1) ||

       !ucol_equal(coll, &nul, 0, &degree, 1) ||

       ucol_equal(coll, &nul, 0, &dollar, 1) ||

       ucol_equal(coll, &nul, 0, &zero, 1) ||

       ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {

     log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));

1767 }

1768

   varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);

   varTop2 = ucol_getVariableTop(coll, &status);

   log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);

   if(U_FAILURE(status) || varTop1 != varTop2 ||

       !ucol_equal(coll, &nul, 0, &space, 1) ||

       !ucol_equal(coll, &nul, 0, &dot, 1) ||

       !ucol_equal(coll, &nul, 0, &degree, 1) ||

       !ucol_equal(coll, &nul, 0, &dollar, 1) ||

       ucol_equal(coll, &nul, 0, &zero, 1) ||

       ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {

     log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));

1780 }

1781

   log_verbose("Testing setting variable top to contractions\n");

1783 {

     UChar first[4] = { 0 };

     first[0] = 0x0040;

     first[1] = 0x0050;

     first[2] = 0x0000;

1788

1789 status = U_ZERO_ERROR;

     ucol_setVariableTop(coll, first, -1, &status);

1791

     if(U_SUCCESS(status)) {

       log_err("Invalid contraction succeded in setting variable top!\n");

1794 }

1795

1796 }

1797

   log_verbose("Test restoring variable top\n");

1799

1800 status = U_ZERO_ERROR;

   ucol_restoreVariableTop(coll, varTopOriginal, &status);

   if(varTopOriginal != ucol_getVariableTop(coll, &status)) {

     log_err("Couldn't restore old variable top\n");

1804 }

1805

   log_verbose("Testing calling with error set\n");

1807

1808 status = U_INTERNAL_PROGRAM_ERROR;

   varTop1 = ucol_setVariableTop(coll, &space, 1, &status);

   varTop2 = ucol_getVariableTop(coll, &status);

   ucol_restoreVariableTop(coll, varTop2, &status);

   varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);

   varTop2 = ucol_getVariableTop(NULL, &status);

   ucol_restoreVariableTop(NULL, varTop2, &status);

1815 if(status != U_INTERNAL_PROGRAM_ERROR) {

     log_err("Bad reaction to passed error!\n");

1817 }

1818 ucol_close(coll);

1819 } else {

     log_data_err("Couldn't open UCA collator\n");

1821 }

1822 }

1823

 static void TestMaxVariable(void) {

1825 UErrorCode status = U_ZERO_ERROR;

1826 UColReorderCode oldMax, max;

1827 UCollator *coll;

1828

1829 static const UChar nul = 0;

1830 static const UChar space = 0x20;

   static const UChar dot = 0x2e;  /* punctuation */

   static const UChar degree = 0xb0;  /* symbol */

   static const UChar dollar = 0x24;  /* currency symbol */

   static const UChar zero = 0x30;  /* digit */

1835

   coll = ucol_open("", &status);

   if(U_FAILURE(status)) {

     log_data_err("Couldn't open root collator\n");

1839 return;

1840 }

1841

1842 oldMax = ucol_getMaxVariable(coll);

   log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);

   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);

1845

   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);

1847 max = ucol_getMaxVariable(coll);

   log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);

   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||

       !ucol_equal(coll, &nul, 0, &space, 1) ||

       ucol_equal(coll, &nul, 0, &dot, 1) ||

       ucol_equal(coll, &nul, 0, &degree, 1) ||

       ucol_equal(coll, &nul, 0, &dollar, 1) ||

       ucol_equal(coll, &nul, 0, &zero, 1) ||

       ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {

     log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));

1857 }

1858

   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);

1860 max = ucol_getMaxVariable(coll);

   log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);

   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||

       !ucol_equal(coll, &nul, 0, &space, 1) ||

       !ucol_equal(coll, &nul, 0, &dot, 1) ||

       ucol_equal(coll, &nul, 0, &degree, 1) ||

       ucol_equal(coll, &nul, 0, &dollar, 1) ||

       ucol_equal(coll, &nul, 0, &zero, 1) ||

       ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {

     log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));

1870 }

1871

   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);

1873 max = ucol_getMaxVariable(coll);

   log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);

   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||

       !ucol_equal(coll, &nul, 0, &space, 1) ||

       !ucol_equal(coll, &nul, 0, &dot, 1) ||

       !ucol_equal(coll, &nul, 0, &degree, 1) ||

       ucol_equal(coll, &nul, 0, &dollar, 1) ||

       ucol_equal(coll, &nul, 0, &zero, 1) ||

       ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {

     log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));

1883 }

1884

   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);

1886 max = ucol_getMaxVariable(coll);

   log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);

   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||

       !ucol_equal(coll, &nul, 0, &space, 1) ||

       !ucol_equal(coll, &nul, 0, &dot, 1) ||

       !ucol_equal(coll, &nul, 0, &degree, 1) ||

       !ucol_equal(coll, &nul, 0, &dollar, 1) ||

       ucol_equal(coll, &nul, 0, &zero, 1) ||

       ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {

     log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));

1896 }

1897

   log_verbose("Test restoring maxVariable\n");

1899 status = U_ZERO_ERROR;

   ucol_setMaxVariable(coll, oldMax, &status);

   if(oldMax != ucol_getMaxVariable(coll)) {

     log_err("Couldn't restore old maxVariable\n");

1903 }

1904

   log_verbose("Testing calling with error set\n");

1906 status = U_INTERNAL_PROGRAM_ERROR;

   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);

1908 max = ucol_getMaxVariable(coll);

   if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {

     log_err("Bad reaction to passed error!\n");

1911 }

1912 ucol_close(coll);

1913 }

1914

 static void TestNonChars(void) {

1916 static const char *test[] = {

       "\\u0000",  /* ignorable */

       "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */

       "\\uFDD0", "\\uFDEF",

       "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */

       "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */

       "\\U0003FFFE", "\\U0003FFFF",

       "\\U0004FFFE", "\\U0004FFFF",

       "\\U0005FFFE", "\\U0005FFFF",

       "\\U0006FFFE", "\\U0006FFFF",

       "\\U0007FFFE", "\\U0007FFFF",

       "\\U0008FFFE", "\\U0008FFFF",

       "\\U0009FFFE", "\\U0009FFFF",

       "\\U000AFFFE", "\\U000AFFFF",

       "\\U000BFFFE", "\\U000BFFFF",

       "\\U000CFFFE", "\\U000CFFFF",

       "\\U000DFFFE", "\\U000DFFFF",

       "\\U000EFFFE", "\\U000EFFFF",

       "\\U000FFFFE", "\\U000FFFFF",

       "\\U0010FFFE", "\\U0010FFFF",

1936 "\\uFFFF" /* special character with maximum primary weight */

1937 };

1938 UErrorCode status = U_ZERO_ERROR;

   UCollator *coll = ucol_open("en_US", &status);

1940

   log_verbose("Test non characters\n");

1942

   if(U_SUCCESS(status)) {

     genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);

1945 } else {

     log_err_status(status, "Unable to open collator\n");

1947 }

1948

1949 ucol_close(coll);

1950 }

1951

 static void TestExtremeCompression(void) {

   static char *test[4];

   int32_t j = 0, i = 0;

1955

   for(i = 0; i<4; i++) {

     test[i] = (char *)malloc(2048*sizeof(char));

1958 }

1959

   for(j = 20; j < 500; j++) {

     for(i = 0; i<4; i++) {

       uprv_memset(test[i], 'a', (j-1)*sizeof(char));

       test[i][j-1] = (char)('a'+i);

       test[i][j] = 0;

1965 }

     genericLocaleStarter("en_US", (const char **)test, 4);

1967 }

1968

1969

   for(i = 0; i<4; i++) {

1971 free(test[i]);

1972 }

1973 }

1974

1975 #if 0

 static void TestExtremeCompression(void) {

   static char *test[4];

   int32_t j = 0, i = 0;

1979 UErrorCode status = U_ZERO_ERROR;

   UCollator *coll = ucol_open("en_US", status);

   for(i = 0; i<4; i++) {

     test[i] = (char *)malloc(2048*sizeof(char));

1983 }

   for(j = 10; j < 2048; j++) {

     for(i = 0; i<4; i++) {

       uprv_memset(test[i], 'a', (j-2)*sizeof(char));

       test[i][j-1] = (char)('a'+i);

       test[i][j] = 0;

1989 }

1990 }

   genericLocaleStarter("en_US", (const char **)test, 4);

1992

   for(j = 10; j < 2048; j++) {

     for(i = 0; i<1; i++) {

       uprv_memset(test[i], 'a', (j-1)*sizeof(char));

       test[i][j] = 0;

1997 }

1998 }

   for(i = 0; i<4; i++) {

2000 free(test[i]);

2001 }

2002 }

2003 #endif

2004

 static void TestSurrogates(void) {

2006 static const char *test[] = {

     "z","\\ud900\\udc25",  "\\ud805\\udc50",

        "\\ud800\\udc00y",  "\\ud800\\udc00r",

        "\\ud800\\udc00f",  "\\ud800\\udc00",

        "\\ud800\\udc00c", "\\ud800\\udc00b",

        "\\ud800\\udc00fa", "\\ud800\\udc00fb",

        "\\ud800\\udc00a",

2013 "c", "b"

2014 };

2015

2016 static const char *rule =

     "&z < \\ud900\\udc25   < \\ud805\\udc50"

        "< \\ud800\\udc00y  < \\ud800\\udc00r"

        "< \\ud800\\udc00f  << \\ud800\\udc00"

        "< \\ud800\\udc00fa << \\ud800\\udc00fb"

        "< \\ud800\\udc00a  < c < b" ;

2022

   genericRulesStarter(rule, test, 14);

2024 }

2025

2026 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */

 static void TestPrefix(void) {

2028 uint32_t i;

2029

2030 static const struct {

2031 const char *rules;

     const char *data[50];

2033 const uint32_t len;

2034 } tests[] = {

2035 { "&z <<< z|a",

       {"zz", "za"}, 2 },

2037

2038 { "&z <<< z| a",

       {"zz", "za"}, 2 },

2040 { "[strength I]"

       "&a=\\ud900\\udc25"

       "&z<<<\\ud900\\udc25|a",

       {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },

2044 };

2045

2046

   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {

     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);

2049 }

2050 }

2051

2052 /* This test uses data suplied by Masashiko Maedera to test the implementation */

2053 /* JIS X 4061 collation order implementation */

 static void TestNewJapanese(void) {

2055

2056 static const char * const test1[] = {

       "\\u30b7\\u30e3\\u30fc\\u30ec",

       "\\u30b7\\u30e3\\u30a4",

       "\\u30b7\\u30e4\\u30a3",

       "\\u30b7\\u30e3\\u30ec",

       "\\u3061\\u3087\\u3053",

       "\\u3061\\u3088\\u3053",

       "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",

       "\\u3066\\u30fc\\u305f",

       "\\u30c6\\u30fc\\u30bf",

       "\\u30c6\\u30a7\\u30bf",

       "\\u3066\\u3048\\u305f",

       "\\u3067\\u30fc\\u305f",

       "\\u30c7\\u30fc\\u30bf",

       "\\u30c7\\u30a7\\u30bf",

       "\\u3067\\u3048\\u305f",

       "\\u3066\\u30fc\\u305f\\u30fc",

       "\\u30c6\\u30fc\\u30bf\\u30a1",

       "\\u30c6\\u30a7\\u30bf\\u30fc",

       "\\u3066\\u3047\\u305f\\u3041",

       "\\u3066\\u3048\\u305f\\u30fc",

       "\\u3067\\u30fc\\u305f\\u30fc",

       "\\u30c7\\u30fc\\u30bf\\u30a1",

       "\\u3067\\u30a7\\u305f\\u30a1",

       "\\u30c7\\u3047\\u30bf\\u3041",

       "\\u30c7\\u30a8\\u30bf\\u30a2",

       "\\u3072\\u3086",

       "\\u3073\\u3085\\u3042",

       "\\u3074\\u3085\\u3042",

       "\\u3073\\u3085\\u3042\\u30fc",

       "\\u30d3\\u30e5\\u30a2\\u30fc",

       "\\u3074\\u3085\\u3042\\u30fc",

       "\\u30d4\\u30e5\\u30a2\\u30fc",

       "\\u30d2\\u30e5\\u30a6",

       "\\u30d2\\u30e6\\u30a6",

       "\\u30d4\\u30e5\\u30a6\\u30a2",

       "\\u3073\\u3085\\u30fc\\u3042\\u30fc",

       "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",

       "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",

       "\\u3072\\u3085\\u3093",

       "\\u3074\\u3085\\u3093",

       "\\u3075\\u30fc\\u308a",

       "\\u30d5\\u30fc\\u30ea",

       "\\u3075\\u3045\\u308a",

       "\\u3075\\u30a5\\u308a",

       "\\u3075\\u30a5\\u30ea",

       "\\u30d5\\u30a6\\u30ea",

       "\\u3076\\u30fc\\u308a",

       "\\u30d6\\u30fc\\u30ea",

       "\\u3076\\u3045\\u308a",

       "\\u30d6\\u30a5\\u308a",

       "\\u3077\\u3046\\u308a",

       "\\u30d7\\u30a6\\u30ea",

       "\\u3075\\u30fc\\u308a\\u30fc",

       "\\u30d5\\u30a5\\u30ea\\u30fc",

       "\\u3075\\u30a5\\u308a\\u30a3",

       "\\u30d5\\u3045\\u308a\\u3043",

       "\\u30d5\\u30a6\\u30ea\\u30fc",

       "\\u3075\\u3046\\u308a\\u3043",

       "\\u30d6\\u30a6\\u30ea\\u30a4",

       "\\u3077\\u30fc\\u308a\\u30fc",

       "\\u3077\\u30a5\\u308a\\u30a4",

       "\\u3077\\u3046\\u308a\\u30fc",

       "\\u30d7\\u30a6\\u30ea\\u30a4",

       "\\u30d5\\u30fd",

       "\\u3075\\u309e",

       "\\u3076\\u309d",

       "\\u3076\\u3075",

       "\\u3076\\u30d5",

       "\\u30d6\\u3075",

       "\\u30d6\\u30d5",

       "\\u3076\\u309e",

       "\\u3076\\u3077",

       "\\u30d6\\u3077",

       "\\u3077\\u309d",

       "\\u30d7\\u30fd",

       "\\u3077\\u3075",

2133 };

2134

2135 static const char *test2[] = {

     "\\u306f\\u309d", /* H\\u309d */

     "\\u30cf\\u30fd", /* K\\u30fd */

     "\\u306f\\u306f", /* HH */

     "\\u306f\\u30cf", /* HK */

     "\\u30cf\\u30cf", /* KK */

     "\\u306f\\u309e", /* H\\u309e */

     "\\u30cf\\u30fe", /* K\\u30fe */

     "\\u306f\\u3070", /* HH\\u309b */

     "\\u30cf\\u30d0", /* KK\\u309b */

     "\\u306f\\u3071", /* HH\\u309c */

     "\\u30cf\\u3071", /* KH\\u309c */

     "\\u30cf\\u30d1", /* KK\\u309c */

     "\\u3070\\u309d", /* H\\u309b\\u309d */

     "\\u30d0\\u30fd", /* K\\u309b\\u30fd */

     "\\u3070\\u306f", /* H\\u309bH */

     "\\u30d0\\u30cf", /* K\\u309bK */

     "\\u3070\\u309e", /* H\\u309b\\u309e */

     "\\u30d0\\u30fe", /* K\\u309b\\u30fe */

     "\\u3070\\u3070", /* H\\u309bH\\u309b */

     "\\u30d0\\u3070", /* K\\u309bH\\u309b */

     "\\u30d0\\u30d0", /* K\\u309bK\\u309b */

     "\\u3070\\u3071", /* H\\u309bH\\u309c */

     "\\u30d0\\u30d1", /* K\\u309bK\\u309c */

     "\\u3071\\u309d", /* H\\u309c\\u309d */

     "\\u30d1\\u30fd", /* K\\u309c\\u30fd */

     "\\u3071\\u306f", /* H\\u309cH */

     "\\u30d1\\u30cf", /* K\\u309cK */

     "\\u3071\\u3070", /* H\\u309cH\\u309b */

     "\\u3071\\u30d0", /* H\\u309cK\\u309b */

     "\\u30d1\\u30d0", /* K\\u309cK\\u309b */

     "\\u3071\\u3071", /* H\\u309cH\\u309c */

     "\\u30d1\\u30d1", /* K\\u309cK\\u309c */

2168 };

2169 /*

2170 static const char *test3[] = {

2171 "\\u221er\\u221e",

2172 "\\u221eR#",

2173 "\\u221et\\u221e",

2174 "#r\\u221e",

2175 "#R#",

2176 "#t%",

2177 "#T%",

2178 "8t\\u221e",

2179 "8T\\u221e",

2180 "8t#",

2181 "8T#",

2182 "8t%",

2183 "8T%",

2184 "8t8",

2185 "8T8",

2186 "\\u03c9r\\u221e",

2187 "\\u03a9R%",

2188 "rr\\u221e",

2189 "rR\\u221e",

2190 "Rr\\u221e",

2191 "RR\\u221e",

2192 "RT%",

2193 "rt8",

2194 "tr\\u221e",

2195 "tr8",

2196 "TR8",

2197 "tt8",

2198 "\\u30b7\\u30e3\\u30fc\\u30ec",

2199 };

2200 */

2201 static const UColAttribute att[] = { UCOL_STRENGTH };

2202 static const UColAttributeValue val[] = { UCOL_QUATERNARY };

2203

2204 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};

2205 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };

2206

   genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), att, val, 1);

   genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), att, val, 1);

2209 /*genericLocaleStarter("ja", test3, UPRV_LENGTHOF(test3));*/

   genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), attShifted, valShifted, 2);

   genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), attShifted, valShifted, 2);

2212 }

2213

 static void TestStrCollIdenticalPrefix(void) {

   const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";

2216 const char* test[] = {

     "ab\\ud9b0\\udc70",

     "ab\\ud9b0\\udc71"

2219 };

   genericRulesStarterWithResult(rule, test, UPRV_LENGTHOF(test), UCOL_EQUAL);

2221 }

2222 /* Contractions should have all their canonically equivalent */

2223 /* strings included */

 static void TestContractionClosure(void) {

2225 static const struct {

2226 const char *rules;

     const char *data[10];

2228 const uint32_t len;

2229 } tests[] = {

     {   "&b=\\u00e4\\u00e4",

       { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},

     {   "&b=\\u00C5",

       { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},

2234 };

2235 uint32_t i;

2236

2237

   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {

     genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);

2240 }

2241 }

2242

2243 /* This tests also fails*/

 static void TestBeforePrefixFailure(void) {

2245 static const struct {

2246 const char *rules;

     const char *data[10];

2248 const uint32_t len;

2249 } tests[] = {

2250 { "&g <<< a"

2251 "&[before 3]\\uff41 <<< x",

       {"x", "\\uff41"}, 2 },

     {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"

         "&\\u30A8=\\u30A8=\\u3048=\\uff74"

         "&[before 3]\\u30a7<<<\\u30a9",

       {"\\u30a9", "\\u30a7"}, 2 },

     {   "&[before 3]\\u30a7<<<\\u30a9"

         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"

         "&\\u30A8=\\u30A8=\\u3048=\\uff74",

       {"\\u30a9", "\\u30a7"}, 2 },

2261 };

2262 uint32_t i;

2263

2264

   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {

     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);

2267 }

2268

2269 #if 0

2270 const char* rule1 =

         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"

         "&\\u30A8=\\u30A8=\\u3048=\\uff74"

         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";

2274 const char* rule2 =

         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"

         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"

         "&\\u30A8=\\u30A8=\\u3048=\\uff74";

2278 const char* test[] = {

       "\\u30c6\\u30fc\\u30bf",

       "\\u30c6\\u30a7\\u30bf",

2281 };

   genericRulesStarter(rule1, test, UPRV_LENGTHOF(test));

   genericRulesStarter(rule2, test, UPRV_LENGTHOF(test));

2284 /* this piece of code should be in some sort of verbose mode */

2285 /* it gets the collation elements for elements and prints them */

2286 /* This is useful when trying to see whether the problem is */

2287 {

2288 UErrorCode status = U_ZERO_ERROR;

2289 uint32_t i = 0;

2290 UCollationElements *it = NULL;

2291 uint32_t CE;

2292 UChar string[256];

2293 uint32_t uStringLen;

2294 UCollator *coll = NULL;

2295

     uStringLen = u_unescape(rule1, string, 256);

2297

     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);

2299

2300 /*coll = ucol_open("ja_JP_JIS", &status);*/

     it = ucol_openElements(coll, string, 0, &status);

2302

     for(i = 0; i < UPRV_LENGTHOF(test); i++) {

       log_verbose("%s\n", test[i]);

       uStringLen = u_unescape(test[i], string, 256);

       ucol_setText(it, string, uStringLen, &status);

2307

       while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {

         log_verbose("%08X\n", CE);

2310 }

       log_verbose("\n");

2312

2313 }

2314

2315 ucol_closeElements(it);

2316 ucol_close(coll);

2317 }

2318 #endif

2319 }

2320

 static void TestPrefixCompose(void) {

2322 const char* rule1 =

         "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";

2324 /*

2325 const char* test[] = {

2326 "\\u30c6\\u30fc\\u30bf",

2327 "\\u30c6\\u30a7\\u30bf",

2328 };

2329 */

2330 {

2331 UErrorCode status = U_ZERO_ERROR;

2332 /*uint32_t i = 0;*/

2333 /*UCollationElements *it = NULL;*/

2334 /* uint32_t CE;*/

2335 UChar string[256];

2336 uint32_t uStringLen;

2337 UCollator *coll = NULL;

2338

     uStringLen = u_unescape(rule1, string, 256);

2340

     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);

2342 ucol_close(coll);

2343 }

2344

2345

2346 }

2347

2348 /*

2349 [last variable] last variable value

2350 [last primary ignorable] largest CE for primary ignorable

2351 [last secondary ignorable] largest CE for secondary ignorable

2352 [last tertiary ignorable] largest CE for tertiary ignorable

2353 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)

2354 */

2355

 static void TestRuleOptions(void) {

2357 /* values here are hardcoded and are correct for the current UCA

2358 * when the UCA changes, one might be forced to change these

2359 * values.

2360 */

2361

2362 /*

2363 * These strings contain the last character before [variable top]

2364 * and the first and second characters (by primary weights) after it.

2365 * See FractionalUCA.txt. For example:

2366 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR

2367 [variable top = 0C FE]

2368 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT

2369 and

2370 00B4; [0D 0C, 05, 05]

2371 *

2372 * Note: Starting with UCA 6.0, the [variable top] collation element

2373 * is not the weight of any character or string,

2374 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].

2375 */

 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"

 #define FIRST_REGULAR_CHAR_STRING "\\u0060"

 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"

2379

2380 /*

2381 * This string has to match the character that has the [last regular] weight

2382 * which changes with each UCA version.

2383 * See the bottom of FractionalUCA.txt which says something like

2384 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032

2385 *

2386 * Note: Starting with UCA 6.0, the [last regular] collation element

2387 * is not the weight of any character or string,

2388 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].

2389 */

 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"

2391

2392 static const struct {

2393 const char *rules;

     const char *data[10];

2395 const uint32_t len;

2396 } tests[] = {

2397 #if 0

2398 /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */

2399 /* - all befores here amount to zero */

2400 { "&[before 3][first tertiary ignorable]<<<a",

         { "\\u0000", "a"}, 2

2402 }, /* you cannot go before first tertiary ignorable */

2403

2404 { "&[before 3][last tertiary ignorable]<<<a",

         { "\\u0000", "a"}, 2

2406 }, /* you cannot go before last tertiary ignorable */

2407 #endif

2408 /*

2409 * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),

2410 * and it *is* possible to "go before" that.

2411 */

2412 { "&[before 3][first secondary ignorable]<<<a",

         { "\\u0000", "a"}, 2

2414 },

2415

2416 { "&[before 3][last secondary ignorable]<<<a",

         { "\\u0000", "a"}, 2

2418 },

2419

2420 /* 'normal' befores */

2421

2422 /*

2423 * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,

2424 * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a

2425 * because there is no tailoring space before that boundary.

2426 * Made the tests work by tailoring to a space instead.

2427 */

2428 { "&[before 3][first primary ignorable]<<<c<<<b &' '<a", /* was &[first primary ignorable]<a */

         {  "c", "b", "\\u0332", "a" }, 4

2430 },

2431

2432 /* we don't have a code point that corresponds to

2433 * the last primary ignorable

2434 */

2435 { "&[before 3][last primary ignorable]<<<c<<<b &' '<a", /* was &[last primary ignorable]<a */

         {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5

2437 },

2438

2439 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",

         {  "c", "b", "\\u0009", "a", "\\u000a" }, 5

2441 },

2442

2443 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",

         { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5

2445 },

2446

2447 { "&[first regular]<a"

2448 "&[before 1][first regular]<b",

       { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4

2450 },

2451

2452 { "&[before 1][last regular]<b"

2453 "&[last regular]<a",

         { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4

2455 },

2456

2457 { "&[before 1][first implicit]<b"

2458 "&[first implicit]<a",

         { "b", "\\u4e00", "a", "\\u4e01"}, 4

2460 },

2461 #if 0 /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */

2462 { "&[before 1][last implicit]<b"

2463 "&[last implicit]<a",

         { "b", "\\U0010FFFD", "a" }, 3

2465 },

2466 #endif

2467 { "&[last variable]<z"

2468 "&' '<x" /* was &[last primary ignorable]<x, see above */

2469 "&[last secondary ignorable]<<y"

2470 "&[last tertiary ignorable]<<<w"

2471 "&[top]<u",

       {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7

2473 }

2474

2475 };

2476 uint32_t i;

2477

   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {

     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);

2480 }

2481 }

2482

2483

 static void TestOptimize(void) {

2485 /* this is not really a test - just trying out

2486 * whether copying of UCA contents will fail

2487 * Cannot really test, since the functionality

2488 * remains the same.

2489 */

2490 static const struct {

2491 const char *rules;

     const char *data[10];

2493 const uint32_t len;

2494 } tests[] = {

2495 /* - all befores here amount to zero */

     { "[optimize [\\uAC00-\\uD7FF]]",

     { "a", "b"}, 2}

2498 };

2499 uint32_t i;

2500

   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {

     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);

2503 }

2504 }

2505

2506 /*

2507 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.

2508 weiv ucol_strcollIter?

2509 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021

2510 weiv these are the input strings?

2511 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2

2512 weiv will check - could be a problem with utf-8 iterator

2513 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2

2514 weiv hmmm

2515 cycheng@ca.ibm.c... note that we have a standalone high surrogate

2516 weiv that doesn't sound right

2517 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000

2518 weiv so you have two strings, you convert them to utf-8 and to utf-16BE

2519 cycheng@ca.ibm.c... yes

2520 weiv and then do the comparison

2521 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be

2522 weiv utf-16 strings look like a little endian ones in the example you sent me

2523 weiv It could be a bug - let me try to test it out

2524 cycheng@ca.ibm.c... ok

2525 cycheng@ca.ibm.c... we can wait till the conf. call

2526 cycheng@ca.ibm.c... next weke

2527 weiv that would be great

2528 weiv hmmm

2529 weiv I might be wrong

2530 weiv let me play with it some more

2531 cycheng@ca.ibm.c... ok

2532 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be

2533 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2

2534 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be

2535 weiv ok

2536 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data

2537 weiv thanks

2538 cycheng@ca.ibm.c... the 4 strings we sent are just samples

2539 */

2540 #if 0

 static void Alexis(void) {

2542 UErrorCode status = U_ZERO_ERROR;

   UCollator *coll = ucol_open("", &status);

2544

2545

   const char utf16be[2][4] = {

     { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },

     { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }

2549 };

2550

   const char utf8[2][4] = {

     { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },

     { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },

2554 };

2555

2556 UCharIterator iterU161, iterU162;

2557 UCharIterator iterU81, iterU82;

2558

2559 UCollationResult resU16, resU8;

2560

   uiter_setUTF16BE(&iterU161, utf16be[0], 4);

   uiter_setUTF16BE(&iterU162, utf16be[1], 4);

2563

   uiter_setUTF8(&iterU81, utf8[0], 4);

   uiter_setUTF8(&iterU82, utf8[1], 4);

2566

   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

2568

   resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);

   resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);

2571

2572

2573 if(resU16 != resU8) {

     log_err("different results\n");

2575 }

2576

2577 ucol_close(coll);

2578 }

2579 #endif

2580

2581 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256

 static void Alexis2(void) {

2583 UErrorCode status = U_ZERO_ERROR;

2584 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];

   char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];

   char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];

   int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;

2588

2589 UConverter *conv = NULL;

2590

2591 UCharIterator U16BEItS, U16BEItT;

2592 UCharIterator U8ItS, U8ItT;

2593

2594 UCollationResult resU16, resU16BE, resU8;

2595

   static const char* const pairs[][2] = {

     { "\\ud800\\u0021", "\\uFFFC\\u0062"},

     { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },

     { "\\u0E40\\u0021", "\\u00A1\\u0021"},

     { "\\u0E40\\u0021", "\\uFE57\\u0062"},

     { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},

     { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},

     { "\\u0020", "\\u0020\\u0000"}

2604 /*

2605 5F20 (my result here)

2606 5F204E008E3F

2607 5F20 (your result here)

2608 */

2609 };

2610

2611 int32_t i = 0;

2612

   UCollator *coll = ucol_open("", &status);

2614 if(status == U_FILE_ACCESS_ERROR) {

     log_data_err("Is your data around?\n");

2616 return;

   } else if(U_FAILURE(status)) {

     log_err("Error opening collator\n");

2619 return;

2620 }

   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

   conv = ucnv_open("UTF16BE", &status);

   for(i = 0; i < UPRV_LENGTHOF(pairs); i++) {

     U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);

     U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);

2626

     resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);

2628

     log_verbose("Result of strcoll is %i\n", resU16);

2630

     U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);

     U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);

     (void)U16BELenS;    /* Suppress set but not used warnings. */

2634 (void)U16BELenT;

2635

2636 /* use the original sizes, as the result from converter is in bytes */

     uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);

     uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);

2639

     resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);

2641

     log_verbose("Result of U16BE is %i\n", resU16BE);

2643

2644 if(resU16 != resU16BE) {

       log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);

2646 }

2647

     u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);

     u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);

2650

     uiter_setUTF8(&U8ItS, U8Source, U8LenS);

     uiter_setUTF8(&U8ItT, U8Target, U8LenT);

2653

     resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);

2655

2656 if(resU16 != resU8) {

       log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);

2658 }

2659

2660 }

2661

2662 ucol_close(coll);

2663 ucnv_close(conv);

2664 }

2665

 static void TestHebrewUCA(void) {

2667 UErrorCode status = U_ZERO_ERROR;

2668 static const char *first[] = {

2669 "d790d6b8d79cd795d6bcd7a9",

2670 "d790d79cd79ed7a7d799d799d7a1",

2671 "d790d6b4d79ed795d6bcd7a9",

2672 };

2673

   char utf8String[3][256];

   UChar utf16String[3][256];

2676

   int32_t i = 0, j = 0;

2678 int32_t sizeUTF8[3];

2679 int32_t sizeUTF16[3];

2680

   UCollator *coll = ucol_open("", &status);

   if (U_FAILURE(status)) {

       log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));

2684 return;

2685 }

2686 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/

2687

   for(i = 0; i < UPRV_LENGTHOF(first); i++) {

     sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);

     u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);

     log_verbose("%i: ");

     for(j = 0; j < sizeUTF16[i]; j++) {

2693 /*log_verbose("\\u%04X", utf16String[i][j]);*/

       log_verbose("%04X", utf16String[i][j]);

2695 }

     log_verbose("\n");

2697 }

   for(i = 0; i < UPRV_LENGTHOF(first)-1; i++) {

     for(j = i + 1; j < UPRV_LENGTHOF(first); j++) {

       doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);

2701 }

2702 }

2703

2704 ucol_close(coll);

2705

2706 }

2707

 static void TestPartialSortKeyTermination(void) {

2709 static const char* cases[] = {

     "\\u1234\\u1234\\udc00",

     "\\udc00\\ud800\\ud800"

2712 };

2713

2714 int32_t i;

2715

2716 UErrorCode status = U_ZERO_ERROR;

2717

   UCollator *coll = ucol_open("", &status);

2719

2720 UCharIterator iter;

2721

2722 UChar currCase[256];

2723 int32_t length = 0;

2724 int32_t pKeyLen = 0;

2725

2726 uint8_t key[256];

2727

   for(i = 0; i < UPRV_LENGTHOF(cases); i++) {

     uint32_t state[2] = {0, 0};

     length = u_unescape(cases[i], currCase, 256);

     uiter_setString(&iter, currCase, length);

     pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);

     (void)pKeyLen;   /* Suppress set but not used warning. */

2734

     log_verbose("Done\n");

2736

2737 }

2738 ucol_close(coll);

2739 }

2740

 static void TestSettings(void) {

2742 static const char* cases[] = {

2743 "apple",

2744 "Apple"

2745 };

2746

2747 static const char* locales[] = {

2748 "",

2749 "en"

2750 };

2751

2752 UErrorCode status = U_ZERO_ERROR;

2753

   int32_t i = 0, j = 0;

2755

   UChar source[256], target[256];

   int32_t sLen = 0, tLen = 0;

2758

2759 UCollator *collateObject = NULL;

   for(i = 0; i < UPRV_LENGTHOF(locales); i++) {

     collateObject = ucol_open(locales[i], &status);

2762 ucol_setStrength(collateObject, UCOL_PRIMARY);

     ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);

     for(j = 1; j < UPRV_LENGTHOF(cases); j++) {

       sLen = u_unescape(cases[j-1], source, 256);

2766 source[sLen] = 0;

       tLen = u_unescape(cases[j], target, 256);

2768 source[tLen] = 0;

       doTest(collateObject, source, target, UCOL_EQUAL);

2770 }

2771 ucol_close(collateObject);

2772 }

2773 }

2774

 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {

2776 UErrorCode status = U_ZERO_ERROR;

2777 int32_t errorNo = 0;

2778 const UChar *sourceRules = NULL;

2779 int32_t sourceRulesLen = 0;

2780 UParseError parseError;

2781 UColAttributeValue french = UCOL_OFF;

2782

     if(!ucol_equals(source, target)) {

         log_err("Same collators, different address not equal\n");

2785 errorNo++;

2786 }

2787 ucol_close(target);

     if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {

         target = ucol_safeClone(source, NULL, NULL, &status);

         if(U_FAILURE(status)) {

             log_err("Error creating clone\n");

2792 errorNo++;

2793 return errorNo;

2794 }

         if(!ucol_equals(source, target)) {

             log_err("Collator different from it's clone\n");

2797 errorNo++;

2798 }

         french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);

2800 if(french == UCOL_ON) {

             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);

2802 } else {

             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);

2804 }

         if(U_FAILURE(status)) {

             log_err("Error setting attributes\n");

2807 errorNo++;

2808 return errorNo;

2809 }

         if(ucol_equals(source, target)) {

             log_err("Collators same even when options changed\n");

2812 errorNo++;

2813 }

2814 ucol_close(target);

2815

         sourceRules = ucol_getRules(source, &sourceRulesLen);

         target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);

         if(U_FAILURE(status)) {

             log_err("Error instantiating target from rules - %s\n", u_errorName(status));

2820 errorNo++;

2821 return errorNo;

2822 }

2823 /* Note: The tailoring rule string is an optional data item. */

         if(!ucol_equals(source, target) && sourceRulesLen != 0) {

             log_err("Collator different from collator that was created from the same rules\n");

2826 errorNo++;

2827 }

2828 ucol_close(target);

2829 }

2830 return errorNo;

2831 }

2832

2833

 static void TestEquals(void) {

2835 /* ucol_equals is not currently a public API. There is a chance that it will become

2836 * something like this.

2837 */

2838 /* test whether the two collators instantiated from the same locale are equal */

2839 UErrorCode status = U_ZERO_ERROR;

2840 UParseError parseError;

2841 int32_t noOfLoc = uloc_countAvailable();

2842 const char *locName = NULL;

     UCollator *source = NULL, *target = NULL;

2844 int32_t i = 0;

2845

2846 const char* rules[] = {

2847 "&l < lj <<< Lj <<< LJ",

2848 "&n < nj <<< Nj <<< NJ",

2849 "&ae <<< \\u00e4",

2850 "&AE <<< \\u00c4"

2851 };

2852 /*

2853 const char* badRules[] = {

2854 "&l <<< Lj",

2855 "&n < nj <<< nJ <<< NJ",

2856 "&a <<< \\u00e4",

2857 "&AE <<< \\u00c4 <<< x"

2858 };

2859 */

2860

     UChar sourceRules[1024], targetRules[1024];

     int32_t sourceRulesSize = 0, targetRulesSize = 0;

     int32_t rulesSize = UPRV_LENGTHOF(rules);

2864

     for(i = 0; i < rulesSize; i++) {

         sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);

         targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);

2868 }

2869

     source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);

2871 if(status == U_FILE_ACCESS_ERROR) {

         log_data_err("Is your data around?\n");

2873 return;

     } else if(U_FAILURE(status)) {

         log_err("Error opening collator\n");

2876 return;

2877 }

     target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);

     if(!ucol_equals(source, target)) {

         log_err("Equivalent collators not equal!\n");

2881 }

2882 ucol_close(source);

2883 ucol_close(target);

2884

     source = ucol_open("root", &status);

     target = ucol_open("root", &status);

     log_verbose("Testing root\n");

     if(!ucol_equals(source, source)) {

         log_err("Same collator not equal\n");

2890 }

     if(TestEqualsForCollator("root", source, target)) {

         log_err("Errors for root\n");

2893 }

2894 ucol_close(source);

2895

     for(i = 0; i<noOfLoc; i++) {

2897 status = U_ZERO_ERROR;

2898 locName = uloc_getAvailable(i);

2899 /*if(hasCollationElements(locName)) {*/

         log_verbose("Testing equality for locale %s\n", locName);

         source = ucol_open(locName, &status);

         target = ucol_open(locName, &status);

         if (U_FAILURE(status)) {

             log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));

2905 continue;

2906 }

         if(TestEqualsForCollator(locName, source, target)) {

             log_err("Errors for locale %s\n", locName);

2909 }

2910 ucol_close(source);

2911 /*}*/

2912 }

2913 }

2914

 static void TestJ2726(void) {

     UChar a[2] = { 0x61, 0x00 }; /*"a"*/

     UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/

     UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/

2919 UErrorCode status = U_ZERO_ERROR;

     UCollator *coll = ucol_open("en", &status);

     ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);

     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);

     doTest(coll, a, aSpace, UCOL_EQUAL);

     doTest(coll, aSpace, a, UCOL_EQUAL);

     doTest(coll, a, spaceA, UCOL_EQUAL);

     doTest(coll, spaceA, a, UCOL_EQUAL);

     doTest(coll, spaceA, aSpace, UCOL_EQUAL);

     doTest(coll, aSpace, spaceA, UCOL_EQUAL);

2929 ucol_close(coll);

2930 }

2931

 static void NullRule(void) {

     UChar r[3] = {0};

2934 UErrorCode status = U_ZERO_ERROR;

     UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);

     if(U_SUCCESS(status)) {

         log_err("This should have been an error!\n");

2938 ucol_close(coll);

2939 } else {

2940 status = U_ZERO_ERROR;

2941 }

     coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);

     if(U_FAILURE(status)) {

         log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));

2945 } else {

2946 ucol_close(coll);

2947 }

2948 }

2949

2950 /**

2951 * Test for CollationElementIterator previous and next for the whole set of

2952 * unicode characters with normalization on.

2953 */

 static void TestNumericCollation(void)

2955 {

2956 UErrorCode status = U_ZERO_ERROR;

2957

2958 const static char *basicTestStrings[]={

2959 "hello1",

2960 "hello2",

2961 "hello2002",

2962 "hello2003",

2963 "hello123456",

2964 "hello1234567",

2965 "hello10000000",

2966 "hello100000000",

2967 "hello1000000000",

2968 "hello10000000000",

2969 };

2970

2971 const static char *preZeroTestStrings[]={

2972 "avery10000",

2973 "avery010000",

2974 "avery0010000",

2975 "avery00010000",

2976 "avery000010000",

2977 "avery0000010000",

2978 "avery00000010000",

2979 "avery000000010000",

2980 };

2981

2982 const static char *thirtyTwoBitNumericStrings[]={

2983 "avery42949672960",

2984 "avery42949672961",

2985 "avery42949672962",

2986 "avery429496729610"

2987 };

2988

2989 const static char *longNumericStrings[]={

2990 /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.

2991 In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that

2992 are treated as multiple collation elements. */

2993 "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */

2994 "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */

2995 "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */

2996 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */

2997 "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */

2998 "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */

2999 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */

3000 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */

3001 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */

3002 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */

3003 };

3004

3005 const static char *supplementaryDigits[] = {

       "\\uD835\\uDFCE", /* 0 */

       "\\uD835\\uDFCF", /* 1 */

       "\\uD835\\uDFD0", /* 2 */

       "\\uD835\\uDFD1", /* 3 */

       "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */

       "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */

       "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */

       "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */

       "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */

       "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */

3016 };

3017

3018 const static char *foreignDigits[] = {

3019 "\\u0661",

3020 "\\u0662",

3021 "\\u0663",

       "\\u0661\\u0660",

       "\\u0661\\u0662",

       "\\u0661\\u0663",

       "\\u0662\\u0660",

       "\\u0662\\u0662",

       "\\u0662\\u0663",

       "\\u0663\\u0660",

       "\\u0663\\u0662",

       "\\u0663\\u0663"

3031 };

3032

3033 const static char *evenZeroes[] = {

3034 "2000",

3035 "2001",

3036 "2002",

3037 "2003"

3038 };

3039

3040 UColAttribute att = UCOL_NUMERIC_COLLATION;

3041 UColAttributeValue val = UCOL_ON;

3042

3043 /* Open our collator. */

     UCollator* coll = ucol_open("root", &status);

     if (U_FAILURE(status)){

         log_err_status(status, "ERROR: in using ucol_open() -> %s\n",

3047 myErrorName(status));

3048 return;

3049 }

     genericLocaleStarterWithOptions("root", basicTestStrings, UPRV_LENGTHOF(basicTestStrings), &att, &val, 1);

     genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, UPRV_LENGTHOF(thirtyTwoBitNumericStrings), &att, &val, 1);

     genericLocaleStarterWithOptions("root", longNumericStrings, UPRV_LENGTHOF(longNumericStrings), &att, &val, 1);

     genericLocaleStarterWithOptions("en_US", foreignDigits, UPRV_LENGTHOF(foreignDigits), &att, &val, 1);

     genericLocaleStarterWithOptions("root", supplementaryDigits, UPRV_LENGTHOF(supplementaryDigits), &att, &val, 1);

     genericLocaleStarterWithOptions("root", evenZeroes, UPRV_LENGTHOF(evenZeroes), &att, &val, 1);

3056

3057 /* Setting up our collator to do digits. */

     ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);

     if (U_FAILURE(status)){

         log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",

3061 myErrorName(status));

3062 return;

3063 }

3064

3065 /*

3066 Testing that prepended zeroes still yield the correct collation behavior.

3067 We expect that every element in our strings array will be equal.

3068 */

     genericOrderingTestWithResult(coll, preZeroTestStrings, UPRV_LENGTHOF(preZeroTestStrings), UCOL_EQUAL);

3070

3071 ucol_close(coll);

3072 }

3073

 static void TestTibetanConformance(void)

3075 {

3076 const char* test[] = {

         "\\u0FB2\\u0591\\u0F71\\u0061",

         "\\u0FB2\\u0F71\\u0061"

3079 };

3080

3081 UErrorCode status = U_ZERO_ERROR;

     UCollator *coll = ucol_open("", &status);

3083 UChar source[100];

3084 UChar target[100];

3085 int result;

     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

     if (U_SUCCESS(status)) {

         u_unescape(test[0], source, 100);

         u_unescape(test[1], target, 100);

         doTest(coll, source, target, UCOL_EQUAL);

         result = ucol_strcoll(coll, source, -1,   target, -1);

         log_verbose("result %d\n", result);

3093 if (UCOL_EQUAL != result) {

             log_err("Tibetan comparison error\n");

3095 }

3096 }

3097 ucol_close(coll);

3098

     genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);

3100 }

3101

 static void TestPinyinProblem(void) {

     static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };

     genericLocaleStarter("zh__PINYIN", test, UPRV_LENGTHOF(test));

3105 }

3106

3107 /**

3108 * Iterate through the given iterator, checking to see that all the strings

3109 * in the expected array are present.

3110 * @param expected array of strings we expect to see, or NULL

3111 * @param expectedCount number of elements of expected, or 0

3112 */

 static int32_t checkUEnumeration(const char* msg,

3114 UEnumeration* iter,

3115 const char** expected,

3116 int32_t expectedCount) {

3117 UErrorCode ec = U_ZERO_ERROR;

     int32_t i = 0, n, j, bit;

3119 int32_t seenMask = 0;

3120

     U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */

     n = uenum_count(iter, &ec);

     if (!assertSuccess("count", &ec)) return -1;

     log_verbose("%s = [", msg);

3125 for (;; ++i) {

         const char* s = uenum_next(iter, NULL, &ec);

         if (!assertSuccess("snext", &ec) || s == NULL) break;

         if (i != 0) log_verbose(",");

         log_verbose("%s", s);

3130 /* check expected list */

         for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {

             if ((seenMask&bit) == 0 &&

                 uprv_strcmp(s, expected[j]) == 0) {

3134 seenMask |= bit;

3135 break;

3136 }

3137 }

3138 }

     log_verbose("] (%d)\n", i);

     assertTrue("count verified", i==n);

3141 /* did we see all expected strings? */

     for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {

         if ((seenMask&bit)!=0) {

             log_verbose("Ok: \"%s\" seen\n", expected[j]);

3145 } else {

             log_err("FAIL: \"%s\" not seen\n", expected[j]);

3147 }

3148 }

3149 return n;

3150 }

3151

3152 /**

3153 * Test new API added for separate collation tree.

3154 */

 static void TestSeparateTrees(void) {

3156 UErrorCode ec = U_ZERO_ERROR;

3157 UEnumeration *e = NULL;

3158 int32_t n = -1;

3159 UBool isAvailable;

3160 char loc[256];

3161

     static const char* AVAIL[] = { "en", "de" };

3163

     static const char* KW[] = { "collation" };

3165

     static const char* KWVAL[] = { "phonebook", "stroke" };

3167

3168 #if !UCONFIG_NO_SERVICE

3169 e = ucol_openAvailableLocales(&ec);

3170 if (e != NULL) {

         assertSuccess("ucol_openAvailableLocales", &ec);

         assertTrue("ucol_openAvailableLocales!=0", e!=0);

         n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, UPRV_LENGTHOF(AVAIL));

         (void)n;    /* Suppress set but not used warnings. */

3175 /* Don't need to check n because we check list */

3176 uenum_close(e);

3177 } else {

         log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));

3179 }

3180 #endif

3181

3182 e = ucol_getKeywords(&ec);

3183 if (e != NULL) {

         assertSuccess("ucol_getKeywords", &ec);

         assertTrue("ucol_getKeywords!=0", e!=0);

         n = checkUEnumeration("ucol_getKeywords", e, KW, UPRV_LENGTHOF(KW));

3187 /* Don't need to check n because we check list */

3188 uenum_close(e);

3189 } else {

         log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));

3191 }

3192

     e = ucol_getKeywordValues(KW[0], &ec);

3194 if (e != NULL) {

         assertSuccess("ucol_getKeywordValues", &ec);

         assertTrue("ucol_getKeywordValues!=0", e!=0);

         n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, UPRV_LENGTHOF(KWVAL));

3198 /* Don't need to check n because we check list */

3199 uenum_close(e);

3200 } else {

         log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));

3202 }

3203

3204 /* Try setting a warning before calling ucol_getKeywordValues */

3205 ec = U_USING_FALLBACK_WARNING;

     e = ucol_getKeywordValues(KW[0], &ec);

     if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {

         assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);

         n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, UPRV_LENGTHOF(KWVAL));

3210 /* Don't need to check n because we check list */

3211 uenum_close(e);

3212 }

3213

3214 /*

3215 U_DRAFT int32_t U_EXPORT2

3216 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,

3217 const char* locale, UBool* isAvailable,

3218 UErrorCode* status);

3219 }

3220 */

     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",

3222 &isAvailable, &ec);

     if (assertSuccess("getFunctionalEquivalent", &ec)) {

         assertEquals("getFunctionalEquivalent(de)", "root", loc);

3225 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",

3226 isAvailable == TRUE);

3227 }

3228

     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",

3230 &isAvailable, &ec);

     if (assertSuccess("getFunctionalEquivalent", &ec)) {

         assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);

3233 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",

3234 isAvailable == FALSE);

3235 }

3236 }

3237

3238 /* supercedes TestJ784 */

 static void TestBeforePinyin(void) {

3240 const static char rules[] = {

         "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"

         "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"

         "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"

         "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"

         "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"

         "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"

3247 };

3248

3249 const static char *test[] = {

3250 "l\\u0101",

3251 "la",

3252 "l\\u0101n",

3253 "lan ",

3254 "l\\u0113",

3255 "le",

3256 "l\\u0113n",

3257 "len"

3258 };

3259

3260 const static char *test2[] = {

3261 "x\\u0101",

3262 "x\\u0100",

3263 "X\\u0101",

3264 "X\\u0100",

3265 "x\\u00E1",

3266 "x\\u00C1",

3267 "X\\u00E1",

3268 "X\\u00C1",

3269 "x\\u01CE",

3270 "x\\u01CD",

3271 "X\\u01CE",

3272 "X\\u01CD",

3273 "x\\u00E0",

3274 "x\\u00C0",

3275 "X\\u00E0",

3276 "X\\u00C0",

3277 "xa",

3278 "xA",

3279 "Xa",

3280 "XA",

3281 "x\\u0101x",

3282 "x\\u0100x",

3283 "x\\u00E1x",

3284 "x\\u00C1x",

3285 "x\\u01CEx",

3286 "x\\u01CDx",

3287 "x\\u00E0x",

3288 "x\\u00C0x",

3289 "xax",

3290 "xAx"

3291 };

3292

3293 const static char *test3[] = { // rdar://53741390

         "\\u85CF", // 藏 cáng

         "\\u92BA", // 銺 zàng

         "\\u85CF\\u6587", // 藏文 zàngwén

         "\\u85CF\\u8BED", // 藏语 zàngyǔ

         "\\u81D3", // 臓 zàng

3299 };

3300

     genericRulesStarter(rules, test, UPRV_LENGTHOF(test));

     genericLocaleStarter("zh", test, UPRV_LENGTHOF(test));

     genericRulesStarter(rules, test2, UPRV_LENGTHOF(test2));

     genericLocaleStarter("zh", test2, UPRV_LENGTHOF(test2));

     genericLocaleStarter("zh", test3, UPRV_LENGTHOF(test3));

3306 }

3307

 static void TestBeforeTightening(void) {

3309 static const struct {

3310 const char *rules;

3311 UErrorCode expectedStatus;

3312 } tests[] = {

3313 { "&[before 1]a<x", U_ZERO_ERROR },

3314 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },

3315 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },

3316 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },

3317 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },

3318 { "&[before 2]a<<x",U_ZERO_ERROR },

3319 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },

3320 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },

3321 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR },

3322 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR },

3323 { "&[before 3]a<<<x",U_ZERO_ERROR },

3324 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR },

3325 { "&[before I]a = x",U_INVALID_FORMAT_ERROR }

3326 };

3327

3328 int32_t i = 0;

3329

3330 UErrorCode status = U_ZERO_ERROR;

3331 UChar rlz[RULE_BUFFER_LEN] = { 0 };

3332 uint32_t rlen = 0;

3333

3334 UCollator *coll = NULL;

3335

3336

     for(i = 0; i < UPRV_LENGTHOF(tests); i++) {

         rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);

         coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);

         if(status != tests[i].expectedStatus) {

             log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",

                 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));

3343 }

3344 ucol_close(coll);

3345 status = U_ZERO_ERROR;

3346 }

3347

3348 }

3349

3350 /*

3351 &m < a

3352 &[before 1] a < x <<< X << q <<< Q < z

3353 assert: m <<< M < x <<< X << q <<< Q < z < a < n

3354

3355 &m < a

3356 &[before 2] a << x <<< X << q <<< Q < z

3357 assert: m <<< M < x <<< X << q <<< Q << a < z < n

3358

3359 &m < a

3360 &[before 3] a <<< x <<< X << q <<< Q < z

3361 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n

3362

3363

3364 &m << a

3365 &[before 1] a < x <<< X << q <<< Q < z

3366 assert: x <<< X << q <<< Q < z < m <<< M << a < n

3367

3368 &m << a

3369 &[before 2] a << x <<< X << q <<< Q < z

3370 assert: m <<< M << x <<< X << q <<< Q << a < z < n

3371

3372 &m << a

3373 &[before 3] a <<< x <<< X << q <<< Q < z

3374 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n

3375

3376

3377 &m <<< a

3378 &[before 1] a < x <<< X << q <<< Q < z

3379 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M

3380

3381 &m <<< a

3382 &[before 2] a << x <<< X << q <<< Q < z

3383 assert: x <<< X << q <<< Q << m <<< a <<< M < z < n

3384

3385 &m <<< a

3386 &[before 3] a <<< x <<< X << q <<< Q < z

3387 assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n

3388

3389

3390 &[before 1] s < x <<< X << q <<< Q < z

3391 assert: r <<< R < x <<< X << q <<< Q < z < s < n

3392

3393 &[before 2] s << x <<< X << q <<< Q < z

3394 assert: r <<< R < x <<< X << q <<< Q << s < z < n

3395

3396 &[before 3] s <<< x <<< X << q <<< Q < z

3397 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n

3398

3399

3400 &[before 1] \u24DC < x <<< X << q <<< Q < z

3401 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M

3402

3403 &[before 2] \u24DC << x <<< X << q <<< Q < z

3404 assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n

3405

3406 &[before 3] \u24DC <<< x <<< X << q <<< Q < z

3407 assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n

3408 */

3409

3410

3411 #if 0

3412 /* requires features not yet supported */

 static void TestMoreBefore(void) {

3414 static const struct {

3415 const char* rules;

         const char* order[16];

3417 int32_t size;

3418 } tests[] = {

3419 { "&m < a &[before 1] a < x <<< X << q <<< Q < z",

         { "m","M","x","X","q","Q","z","a","n" }, 9},

3421 { "&m < a &[before 2] a << x <<< X << q <<< Q < z",

         { "m","M","x","X","q","Q","a","z","n" }, 9},

3423 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",

         { "m","M","x","X","a","q","Q","z","n" }, 9},

3425 { "&m << a &[before 1] a < x <<< X << q <<< Q < z",

         { "x","X","q","Q","z","m","M","a","n" }, 9},

3427 { "&m << a &[before 2] a << x <<< X << q <<< Q < z",

         { "m","M","x","X","q","Q","a","z","n" }, 9},

3429 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",

         { "m","M","x","X","a","q","Q","z","n" }, 9},

3431 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",

         { "x","X","q","Q","z","n","m","a","M" }, 9},

3433 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",

         { "x","X","q","Q","m","a","M","z","n" }, 9},

3435 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",

         { "m","x","X","a","M","q","Q","z","n" }, 9},

3437 { "&[before 1] s < x <<< X << q <<< Q < z",

         { "r","R","x","X","q","Q","z","s","n" }, 9},

3439 { "&[before 2] s << x <<< X << q <<< Q < z",

         { "r","R","x","X","q","Q","s","z","n" }, 9},

3441 { "&[before 3] s <<< x <<< X << q <<< Q < z",

         { "r","R","x","X","s","q","Q","z","n" }, 9},

         { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",

         { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},

         { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",

         { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},

         { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",

         { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}

3449 };

3450

3451 int32_t i = 0;

3452

     for(i = 0; i < UPRV_LENGTHOF(tests); i++) {

         genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);

3455 }

3456 }

3457 #endif

3458

 static void TestTailorNULL( void ) {

     const static char* rule = "&a <<< '\\u0000'";

3461 UErrorCode status = U_ZERO_ERROR;

3462 UChar rlz[RULE_BUFFER_LEN] = { 0 };

3463 uint32_t rlen = 0;

     UChar a = 1, null = 0;

3465 UCollationResult res = UCOL_EQUAL;

3466

3467 UCollator *coll = NULL;

3468

3469

     rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);

     coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);

3472

     if(U_FAILURE(status)) {

         log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));

3475 } else {

         res = ucol_strcoll(coll, &a, 1, &null, 1);

3477

3478 if(res != UCOL_LESS) {

             log_err("NULL was not tailored properly!\n");

3480 }

3481 }

3482

3483 ucol_close(coll);

3484 }

3485

3486 static void

3487 TestUpperFirstQuaternary(void)

3488 {

   const char* tests[] = { "B", "b", "Bb", "bB" };

3490 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };

3491 UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };

   genericLocaleStarterWithOptions("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att));

3493 }

3494

3495 static void

3496 TestJ4960(void)

3497 {

   const char* tests[] = { "\\u00e2T", "aT" };

3499 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };

3500 UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };

   const char* tests2[] = { "a", "A" };

   const char* rule = "&[first tertiary ignorable]=A=a";

3503 UColAttribute att2[] = { UCOL_CASE_LEVEL };

3504 UColAttributeValue attVals2[] = { UCOL_ON };

3505 /* Test whether we correctly ignore primary ignorables on case level when */

3506 /* we have only primary & case level */

   genericLocaleStarterWithOptionsAndResult("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att), UCOL_EQUAL);

3508 /* Test whether ICU4J will make case level for sortkeys that have primary strength */

3509 /* and case level */

   genericLocaleStarterWithOptions("root", tests2, UPRV_LENGTHOF(tests2), att, attVals, UPRV_LENGTHOF(att));

3511 /* Test whether completely ignorable letters have case level info (they shouldn't) */

   genericRulesStarterWithOptionsAndResult(rule, tests2, UPRV_LENGTHOF(tests2), att2, attVals2, UPRV_LENGTHOF(att2), UCOL_EQUAL);

3513 }

3514

3515 static void

3516 TestJ5223(void)

3517 {

   static const char *test = "this is a test string";

3519 UChar ustr[256];

   int32_t ustr_length = u_unescape(test, ustr, 256);

3521 unsigned char sortkey[256];

3522 int32_t sortkey_length;

3523 UErrorCode status = U_ZERO_ERROR;

3524 static UCollator *coll = NULL;

   coll = ucol_open("root", &status);

   if(U_FAILURE(status)) {

     log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));

3528 return;

3529 }

3530 ucol_setStrength(coll, UCOL_PRIMARY);

   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);

   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

   if (U_FAILURE(status)) {

     log_err("Failed setting atributes\n");

3535 return;

3536 }

   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);

   if (sortkey_length > 256) return;

3539

3540 /* we mark the position where the null byte should be written in advance */

   sortkey[sortkey_length-1] = 0xAA;

3542

3543 /* we set the buffer size one byte higher than needed */

   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,

3545 sortkey_length+1);

3546

3547 /* no error occurs (for me) */

   if (sortkey[sortkey_length-1] == 0xAA) {

     log_err("Hit bug at first try\n");

3550 }

3551

3552 /* we mark the position where the null byte should be written again */

   sortkey[sortkey_length-1] = 0xAA;

3554

3555 /* this time we set the buffer size to the exact amount needed */

   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,

3557 sortkey_length);

3558

3559 /* now the trailing null byte is not written */

   if (sortkey[sortkey_length-1] == 0xAA) {

     log_err("Hit bug at second try\n");

3562 }

3563

3564 ucol_close(coll);

3565 }

3566

3567 /* Regression test for Thai partial sort key problem */

3568 static void

3569 TestJ5232(void)

3570 {

3571 const static char *test[] = {

         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",

         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"

3574 };

3575

     genericLocaleStarter("th", test, UPRV_LENGTHOF(test));

3577 }

3578

3579 static void

3580 TestJ5367(void)

3581 {

     const static char *test[] = { "a", "y" };

     const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";

     genericRulesStarter(rules, test, UPRV_LENGTHOF(test));

3585 }

3586

3587 static void

3588 TestVI5913(void)

3589 {

3590 UErrorCode status = U_ZERO_ERROR;

3591 int32_t i, j;

3592 UCollator *coll =NULL;

     uint8_t  resColl[100], expColl[100];

     int32_t  rLen, tLen, ruleLen, sLen, kLen;

     UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &b<0x1FF3-omega with Ypogegrammeni*/

     UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/

3597 /*

3598 * Note: Just tailoring &z<ae^ does not work as expected:

3599 * The UCA spec requires for discontiguous contractions that they

3600 * extend an *existing match* by one combining mark at a time.

3601 * Therefore, ae must be a contraction so that the builder finds

3602 * discontiguous contractions for ae^, for example with an intervening underdot.

3603 * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.

3604 */

3605 UChar rule3[256]={

         0x26, 0x78, 0x3c, 0x61, 0x65,      /* &x<ae */

         0x26, 0x7a, 0x3c, 0x0061, 0x00ea,  /* &z<a+e with circumflex.*/

3608 0};

3609 static const UChar tData[][20]={

         {0x1EAC, 0},

         {0x0041, 0x0323, 0x0302, 0},

         {0x1EA0, 0x0302, 0},

         {0x00C2, 0x0323, 0},

         {0x1ED8, 0},  /* O with dot and circumflex */

         {0x1ECC, 0x0302, 0},

         {0x1EB7, 0},

         {0x1EA1, 0x0306, 0},

3618 };

3619 static const UChar tailorData[][20]={

         {0x1FA2, 0},  /* Omega with 3 combining marks */

         {0x03C9, 0x0313, 0x0300, 0x0345, 0},

         {0x1FF3, 0x0313, 0x0300, 0},

         {0x1F60, 0x0300, 0x0345, 0},

         {0x1F62, 0x0345, 0},

         {0x1FA0, 0x0300, 0},

3626 };

3627 static const UChar tailorData2[][20]={

         {0x1E63, 0x030C, 0},  /* s with dot below + caron */

         {0x0073, 0x0323, 0x030C, 0},

         {0x0073, 0x030C, 0x0323, 0},

3631 };

3632 static const UChar tailorData3[][20]={

         {0x007a, 0},  /*  z */

         {0x0061, 0x0065, 0},  /*  a + e */

         {0x0061, 0x00ea, 0}, /* a + e with circumflex */

         {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */

         {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */

         {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */

         {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */

         {0x00EA, 0},  /* e with circumflex  */

3641 };

3642

3643 /* Test Vietnamese sort. */

     coll = ucol_open("vi", &status);

     if(U_FAILURE(status)) {

         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));

3647 return;

3648 }

     log_verbose("\n\nVI collation:");

     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {

         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");

3652 }

     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {

         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");

3655 }

     if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {

         log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");

3658 }

     if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {

         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");

3661 }

3662

     for (j=0; j<8; j++) {

         tLen = u_strlen(tData[j]);

         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);

         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);

         for(i = 0; i<rLen; i++) {

             log_verbose(" %02X", resColl[i]);

3669 }

3670 }

3671

3672 ucol_close(coll);

3673

3674 /* Test Romanian sort. */

     coll = ucol_open("ro", &status);

     log_verbose("\n\nRO collation:");

     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {

         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");

3679 }

     if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {

         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");

3682 }

     if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {

         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");

3685 }

3686

     for (j=4; j<8; j++) {

         tLen = u_strlen(tData[j]);

         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);

         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);

         for(i = 0; i<rLen; i++) {

             log_verbose(" %02X", resColl[i]);

3693 }

3694 }

3695 ucol_close(coll);

3696

3697 /* Test the precomposed Greek character with 3 combining marks. */

     log_verbose("\n\nTailoring test: Greek character with 3 combining marks");

3699 ruleLen = u_strlen(rule);

     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);

     if (U_FAILURE(status)) {

         log_err("ucol_openRules failed with %s\n", u_errorName(status));

3703 return;

3704 }

     sLen = u_strlen(tailorData[0]);

     for (j=1; j<6; j++) {

         tLen = u_strlen(tailorData[j]);

         if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {

             log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);

3710 }

3711 }

3712 /* Test getSortKey. */

     tLen = u_strlen(tailorData[0]);

     kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);

     for (j=0; j<6; j++) {

         tLen = u_strlen(tailorData[j]);

         rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);

         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {

             log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);

             for(i = 0; i<rLen; i++) {

                 log_err(" %02X", resColl[i]);

3722 }

3723 }

3724 }

3725 ucol_close(coll);

3726

     log_verbose("\n\nTailoring test for s with caron:");

3728 ruleLen = u_strlen(rule2);

     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);

     tLen = u_strlen(tailorData2[0]);

     kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);

     for (j=1; j<3; j++) {

         tLen = u_strlen(tailorData2[j]);

         rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);

         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {

             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);

             for(i = 0; i<rLen; i++) {

                 log_err(" %02X", resColl[i]);

3739 }

3740 }

3741 }

3742 ucol_close(coll);

3743

     log_verbose("\n\nTailoring test for &z< ae with circumflex:");

3745 ruleLen = u_strlen(rule3);

     coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);

     tLen = u_strlen(tailorData3[3]);

     kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);

     log_verbose("\n Test Data[3] :%s  \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen);

     for(i = 0; i<kLen; i++) {

         log_verbose(" %02X", expColl[i]);

3752 }

     for (j=4; j<6; j++) {

         tLen = u_strlen(tailorData3[j]);

         rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);

3756

         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {

             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);

             for(i = 0; i<rLen; i++) {

                 log_err(" %02X", resColl[i]);

3761 }

3762 }

3763

         log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);

          for(i = 0; i<rLen; i++) {

              log_verbose(" %02X", resColl[i]);

3767 }

3768 }

3769 ucol_close(coll);

3770 }

3771

3772 static void

3773 TestTailor6179(void)

3774 {

3775 UErrorCode status = U_ZERO_ERROR;

3776 int32_t i;

3777 UCollator *coll =NULL;

3778 uint8_t resColl[100];

3779 int32_t rLen, tLen, ruleLen;

3780 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */

3781 static const UChar rule1[]={

             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,

             0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,

             0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,

             0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};

3786 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */

3787 static const UChar rule2[]={

             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,

             0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,

             0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,

             0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,

             0x3C,0x3C,0x20,0x62,0};

3793

3794 static const UChar tData1[][4]={

         {0x61, 0},

         {0x62, 0},

         { 0xFDD0,0x009E, 0}

3798 };

3799 static const UChar tData2[][4]={

         {0x61, 0},

         {0x62, 0},

         { 0xFDD0,0x009E, 0}

3803 };

3804

3805 /*

3806 * These values from FractionalUCA.txt will change,

3807 * and need to be updated here.

3808 * TODO: Make this not check for particular sort keys.

3809 * Instead, test that we get CEs before & after other ignorables; see ticket #6179.

3810 */

     static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};

     static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};

     static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};

     static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};

3815

3816 UParseError parseError;

3817

3818 /* Test [Last Primary ignorable] */

3819

     log_verbose("Tailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b\n");

3821 ruleLen = u_strlen(rule1);

     coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);

     if (U_FAILURE(status)) {

         log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));

3825 return;

3826 }

     tLen = u_strlen(tData1[0]);

     rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);

     if (rLen != UPRV_LENGTHOF(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {

         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);

         for(i = 0; i<rLen; i++) {

             log_err(" %02X", resColl[i]);

3833 }

         log_err("\n");

3835 }

     tLen = u_strlen(tData1[1]);

     rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);

     if (rLen != UPRV_LENGTHOF(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {

         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);

         for(i = 0; i<rLen; i++) {

             log_err(" %02X", resColl[i]);

3842 }

         log_err("\n");

3844 }

3845 ucol_close(coll);

3846

3847

3848 /* Test [Last Secondary ignorable] */

     log_verbose("Tailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b\n");

3850 ruleLen = u_strlen(rule2);

     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);

     if (U_FAILURE(status)) {

         log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));

         log_info("  offset=%d  \"%s\" | \"%s\"\n",

                  parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1));

3856 return;

3857 }

     tLen = u_strlen(tData2[0]);

     rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);

     if (rLen != UPRV_LENGTHOF(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {

         log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);

         for(i = 0; i<rLen; i++) {

             log_err(" %02X", resColl[i]);

3864 }

         log_err("\n");

3866 }

     tLen = u_strlen(tData2[1]);

     rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);

     if (rLen != UPRV_LENGTHOF(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {

       log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);

       for(i = 0; i<rLen; i++) {

         log_err(" %02X", resColl[i]);

3873 }

       log_err("\n");

3875 }

3876 ucol_close(coll);

3877 }

3878

3879 static void

3880 TestUCAPrecontext(void)

3881 {

3882 UErrorCode status = U_ZERO_ERROR;

3883 int32_t i, j;

3884 UCollator *coll =NULL;

     uint8_t  resColl[100], prevColl[100];

3886 int32_t rLen, tLen, ruleLen;

     UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */

     UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};

3889 /* & l middle-dot << a a is an expansion. */

3890

3891 UChar tData1[][20]={

             { 0xb7, 0},  /* standalone middle dot(0xb7) */

             { 0x387, 0}, /* standalone middle dot(0x387) */

             { 0x61, 0},  /* a */

             { 0x6C, 0},  /* l */

             { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */

             { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */

             { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */

             { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */

             { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */

             { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */

             { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */

3903 };

3904

     log_verbose("\n\nEN collation:");

     coll = ucol_open("en", &status);

     if (U_FAILURE(status)) {

         log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));

3909 return;

3910 }

     for (j=0; j<11; j++) {

         tLen = u_strlen(tData1[j]);

         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);

         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {

             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",

3916 j, tData1[j]);

3917 }

         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);

         for(i = 0; i<rLen; i++) {

             log_verbose(" %02X", resColl[i]);

3921 }

         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));

3923 }

3924 ucol_close(coll);

3925

3926

      log_verbose("\n\nJA collation:");

      coll = ucol_open("ja", &status);

      if (U_FAILURE(status)) {

3930 log_err("Tailoring test: &z <<a|- failed!");

3931 return;

3932 }

      for (j=0; j<11; j++) {

          tLen = u_strlen(tData1[j]);

          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);

          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {

              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",

3938 j, tData1[j]);

3939 }

          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);

          for(i = 0; i<rLen; i++) {

              log_verbose(" %02X", resColl[i]);

3943 }

          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));

3945 }

3946 ucol_close(coll);

3947

3948

       log_verbose("\n\nTailoring test: & middle dot < a ");

3950 ruleLen = u_strlen(rule1);

       coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);

       if (U_FAILURE(status)) {

3953 log_err("Tailoring test: & middle dot < a failed!");

3954 return;

3955 }

       for (j=0; j<11; j++) {

           tLen = u_strlen(tData1[j]);

           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);

           if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {

               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",

3961 j, tData1[j]);

3962 }

           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);

           for(i = 0; i<rLen; i++) {

               log_verbose(" %02X", resColl[i]);

3966 }

           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));

3968 }

3969 ucol_close(coll);

3970

3971

        log_verbose("\n\nTailoring test: & l middle-dot << a ");

3973 ruleLen = u_strlen(rule2);

        coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);

        if (U_FAILURE(status)) {

3976 log_err("Tailoring test: & l middle-dot << a failed!");

3977 return;

3978 }

        for (j=0; j<11; j++) {

            tLen = u_strlen(tData1[j]);

            rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);

            if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {

                log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",

3984 j, tData1[j]);

3985 }

            if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {

                log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",

3988 j, tData1[j]);

3989 }

            log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);

            for(i = 0; i<rLen; i++) {

                log_verbose(" %02X", resColl[i]);

3993 }

            uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));

3995 }

3996 ucol_close(coll);

3997 }

3998

3999 static void

4000 TestOutOfBuffer5468(void)

4001 {

     static const char *test = "\\u4e00";

4003 UChar ustr[256];

     int32_t ustr_length = u_unescape(test, ustr, 256);

4005 unsigned char shortKeyBuf[1];

4006 int32_t sortkey_length;

4007 UErrorCode status = U_ZERO_ERROR;

4008 static UCollator *coll = NULL;

4009

     coll = ucol_open("root", &status);

     if(U_FAILURE(status)) {

       log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));

4013 return;

4014 }

4015 ucol_setStrength(coll, UCOL_PRIMARY);

     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);

     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

     if (U_FAILURE(status)) {

       log_err("Failed setting atributes\n");

4020 return;

4021 }

4022

     sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));

     if (sortkey_length != 4) {

         log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);

4026 }

     log_verbose("length of sortKey is %d", sortkey_length);

4028 ucol_close(coll);

4029 }

4030

4031 #define TSKC_DATA_SIZE 5

4032 #define TSKC_BUF_SIZE 50

4033 static void

4034 TestSortKeyConsistency(void)

4035 {

4036 UErrorCode icuRC = U_ZERO_ERROR;

4037 UCollator* ucol;

     UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};

4039

4040 uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];

4041 uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];

4042 int32_t i, j, i2;

4043

     ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);

     if (U_FAILURE(icuRC))

4046 {

         log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));

4048 return;

4049 }

4050

     for (i = 0; i < TSKC_DATA_SIZE; i++)

4052 {

4053 UCharIterator uiter;

         uint32_t state[2] = { 0, 0 };

         int32_t dataLen = i+1;

         for (j=0; j<TSKC_BUF_SIZE; j++)

             bufFull[i][j] = bufPart[i][j] = 0;

4058

4059 /* Full sort key */

         ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);

4061

4062 /* Partial sort key */

         uiter_setString(&uiter, data, dataLen);

         ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);

         if (U_FAILURE(icuRC))

4066 {

             log_err("ucol_nextSortKeyPart failed\n");

4068 ucol_close(ucol);

4069 return;

4070 }

4071

         for (i2=0; i2<i; i2++)

4073 {

4074 UBool fullMatch = TRUE;

4075 UBool partMatch = TRUE;

             for (j=0; j<TSKC_BUF_SIZE; j++)

4077 {

                 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);

                 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);

4080 }

4081 if (fullMatch != partMatch) {

                 log_err(fullMatch ? "full key was consistent, but partial key changed\n"

                                   : "partial key was consistent, but full key changed\n");

4084 ucol_close(ucol);

4085 return;

4086 }

4087 }

4088 }

4089

4090 /*=============================================*/

4091 ucol_close(ucol);

4092 }

4093

4094 /* ticket: 6101 */

 static void TestCroatianSortKey(void) {

     const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";

4097 UErrorCode status = U_ZERO_ERROR;

4098 UCollator *ucol;

4099 UCharIterator iter;

4100

     static const UChar text[] = { 0x0044, 0xD81A };

4102

     size_t length = UPRV_LENGTHOF(text);

4104

4105 uint8_t textSortKey[32];

4106 size_t lenSortKey = 32;

4107 size_t actualSortKeyLen;

     uint32_t uStateInfo[2] = { 0, 0 };

4109

     ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);

     if (U_FAILURE(status)) {

         log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));

4113 return;

4114 }

4115

     uiter_setString(&iter, text, (int32_t)length);

4117

4118 actualSortKeyLen = ucol_nextSortKeyPart(

         ucol, &iter, (uint32_t*)uStateInfo,

4120 textSortKey, (int32_t)lenSortKey, &status

4121 );

4122

4123 if (actualSortKeyLen == lenSortKey) {

         log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");

4125 }

4126

4127 ucol_close(ucol);

4128 }

4129

4130 /* ticket: 6140 */

4131 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since

4132 * they are both Hiragana and Katakana

4133 */

4134 #define SORTKEYLEN 50

 static void TestHiragana(void) {

4136 UErrorCode status = U_ZERO_ERROR;

4137 UCollator* ucol;

4138 UCollationResult strcollresult;

     UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */

     UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };

     int32_t data1Len = UPRV_LENGTHOF(data1);

     int32_t data2Len = UPRV_LENGTHOF(data2);

4143 int32_t i, j;

4144 uint8_t sortKey1[SORTKEYLEN];

4145 uint8_t sortKey2[SORTKEYLEN];

4146

4147 UCharIterator uiter1;

4148 UCharIterator uiter2;

     uint32_t state1[2] = { 0, 0 };

     uint32_t state2[2] = { 0, 0 };

4151 int32_t keySize1;

4152 int32_t keySize2;

4153

     ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,

4155 &status);

     if (U_FAILURE(status)) {

         log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));

4158 return;

4159 }

4160

4161 /* Start of full sort keys */

4162 /* Full sort key1 */

     keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);

4164 /* Full sort key2 */

     keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);

4166 if (keySize1 == keySize2) {

         for (i = 0; i < keySize1; i++) {

             if (sortKey1[i] != sortKey2[i]) {

4169 log_err("Full sort keys are different. Should be equal.");

4170 }

4171 }

4172 } else {

         log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);

4174 }

4175 /* End of full sort keys */

4176

4177 /* Start of partial sort keys */

4178 /* Partial sort key1 */

     uiter_setString(&uiter1, data1, data1Len);

     keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);

4181 /* Partial sort key2 */

     uiter_setString(&uiter2, data2, data2Len);

     keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);

     if (U_SUCCESS(status) && keySize1 == keySize2) {

         for (j = 0; j < keySize1; j++) {

             if (sortKey1[j] != sortKey2[j]) {

4187 log_err("Partial sort keys are different. Should be equal");

4188 }

4189 }

4190 } else {

         log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);

4192 }

4193 /* End of partial sort keys */

4194

4195 /* Start of strcoll */

4196 /* Use ucol_strcoll() to determine ordering */

     strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);

4198 if (strcollresult != UCOL_EQUAL) {

4199 log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");

4200 }

4201

4202 ucol_close(ucol);

4203 }

4204

4205 /* Convenient struct for running collation tests */

4206 typedef struct {

4207 const UChar source[MAX_TOKEN_LEN]; /* String on left */

4208 const UChar target[MAX_TOKEN_LEN]; /* String on right */

4209 UCollationResult result; /* -1, 0 or +1, depending on collation */

4210 } OneTestCase;

4211

4212 /*

4213 * Utility function to test one collation test case.

4214 * @param testcases Array of test cases.

4215 * @param n_testcases Size of the array testcases.

4216 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats.

4217 * @param n_rules Size of the array str_rules.

4218 */

 static void doTestOneTestCase(const OneTestCase testcases[],

4220 int n_testcases,

4221 const char* str_rules[],

4222 int n_rules)

4223 {

4224 int rule_no, testcase_no;

4225 UChar rule[500];

4226 int32_t length = 0;

4227 UErrorCode status = U_ZERO_ERROR;

4228 UParseError parse_error;

4229 UCollator *myCollation;

4230

   for (rule_no = 0; rule_no < n_rules; ++rule_no) {

4232

     length = u_unescape(str_rules[rule_no], rule, 500);

     if (length == 0) {

         log_err("ERROR: The rule cannot be unescaped: %s\n");

4236 return;

4237 }

     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);

     if(U_FAILURE(status)){

         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));

         log_info("  offset=%d  \"%s\" | \"%s\"\n",

4242 parse_error.offset,

                  aescstrdup(parse_error.preContext, -1),

                  aescstrdup(parse_error.postContext, -1));

4245 return;

4246 }

     log_verbose("Testing the <<* syntax\n");

     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

4249 ucol_setStrength(myCollation, UCOL_TERTIARY);

     for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {

4251 doTest(myCollation,

4252 testcases[testcase_no].source,

4253 testcases[testcase_no].target,

4254 testcases[testcase_no].result

4255 );

4256 }

4257 ucol_close(myCollation);

4258 }

4259 }

4260

4261 const static OneTestCase rangeTestcases[] = {

   { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */

   { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */

   { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */

4265

   { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */

   { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */

   { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */

   { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */

   { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */

4271

   { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */

   { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */

   { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */

   { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */

4276

   { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */

   { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */

   { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */

   { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */

   { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */

   { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */

   { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */

   { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */

4285 };

4286

 static int nRangeTestcases = UPRV_LENGTHOF(rangeTestcases);

4288

4289 const static OneTestCase rangeTestcasesSupplemental[] = {

   { {0x4e00},                            {0xfffb},                          UCOL_LESS }, /* U+4E00 < U+FFFB */

   { {0xfffb},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFB < U+10000 */

   { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */

   { {0x4e00},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+4E00 < U+10001 */

   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */

   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */

   { {0x4e00},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+4E00 < U+10001 */

4297 };

4298

 static int nRangeTestcasesSupplemental = UPRV_LENGTHOF(rangeTestcasesSupplemental);

4300

4301 const static OneTestCase rangeTestcasesQwerty[] = {

   { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */

   { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */

4304

   { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */

   { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */

4307

   { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */

   { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */

4310

   { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */

   { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */

4313

   { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},

     {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */

   { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},

     {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */

4318 };

4319

 static int nRangeTestcasesQwerty = UPRV_LENGTHOF(rangeTestcasesQwerty);

4321

 static void TestSameStrengthList(void)

4323 {

4324 const char* strRules[] = {

4325 /* Normal */

4326 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3",

4327

4328 /* Lists */

4329 "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",

4330 };

   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));

4332 }

4333

 static void TestSameStrengthListQuoted(void)

4335 {

4336 const char* strRules[] = {

4337 /* Lists with quoted characters */

     "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",

     "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",

4340

     "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",

     "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",

4343

     "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033", 

     "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'", 

4346 };

   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));

4348 }

4349

 static void TestSameStrengthListSupplemental(void)

4351 {

4352 const char* strRules[] = {

     "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",

     "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",

     "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",

     "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",

4357 };

   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));

4359 }

4360

 static void TestSameStrengthListQwerty(void)

4362 {

4363 const char* strRules[] = {

4364 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */

4365 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */

     "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",

     "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",

     "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",

4369

4370 /* Quoted characters also will work if two quoted characters are not consecutive. */

     "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",

4372

4373 /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */

4374 /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/

4375

4376 };

   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));

4378 }

4379

 static void TestSameStrengthListQuotedQwerty(void)

4381 {

4382 const char* strRules[] = {

4383 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */

4384 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */

4385 "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'", /* Lists with quotes */

4386

4387 /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */

4388 /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */

4389 };

   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));

4391 }

4392

 static void TestSameStrengthListRanges(void)

4394 {

4395 const char* strRules[] = {

4396 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",

4397 };

   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));

4399 }

4400

 static void TestSameStrengthListSupplementalRanges(void)

4402 {

4403 const char* strRules[] = {

4404 /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */

     "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",

4406 };

   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));

4408 }

4409

 static void TestSpecialCharacters(void)

4411 {

4412 const char* strRules[] = {

4413 /* Normal */

4414 "&';'<'+'<','<'-'<'&'<'*'",

4415

4416 /* List */

4417 "&';'<*'+,-&*'",

4418

4419 /* Range */

4420 "&';'<*'+'-'-&*'",

4421 };

4422

4423 const static OneTestCase specialCharacterStrings[] = {

     { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */

     { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */

     { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */

     { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */

4428 };

   doTestOneTestCase(specialCharacterStrings, UPRV_LENGTHOF(specialCharacterStrings), strRules, UPRV_LENGTHOF(strRules));

4430 }

4431

 static void TestPrivateUseCharacters(void)

4433 {

4434 const char* strRules[] = {

4435 /* Normal */

     "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",

     "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d", 

4438 };

4439

4440 const static OneTestCase privateUseCharacterStrings[] = {

     { {0x5ea7}, {0xe2d8}, UCOL_LESS },

     { {0xe2d8}, {0xe2d9}, UCOL_LESS },

     { {0xe2d9}, {0xe2da}, UCOL_LESS },

     { {0xe2da}, {0xe2db}, UCOL_LESS },

     { {0xe2db}, {0xe2dc}, UCOL_LESS },

     { {0xe2dc}, {0x4e8d}, UCOL_LESS },

4447 };

   doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));

4449 }

4450

 static void TestPrivateUseCharactersInList(void)

4452 {

4453 const char* strRules[] = {

4454 /* List */

     "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",

4456 /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */

     "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",

4458 };

4459

4460 const static OneTestCase privateUseCharacterStrings[] = {

     { {0x5ea7}, {0xe2d8}, UCOL_LESS },

     { {0xe2d8}, {0xe2d9}, UCOL_LESS },

     { {0xe2d9}, {0xe2da}, UCOL_LESS },

     { {0xe2da}, {0xe2db}, UCOL_LESS },

     { {0xe2db}, {0xe2dc}, UCOL_LESS },

     { {0xe2dc}, {0x4e8d}, UCOL_LESS },

4467 };

   doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));

4469 }

4470

 static void TestPrivateUseCharactersInRange(void)

4472 {

4473 const char* strRules[] = {

4474 /* Range */

     "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",

     "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",

4477 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */

4478 };

4479

4480 const static OneTestCase privateUseCharacterStrings[] = {

     { {0x5ea7}, {0xe2d8}, UCOL_LESS },

     { {0xe2d8}, {0xe2d9}, UCOL_LESS },

     { {0xe2d9}, {0xe2da}, UCOL_LESS },

     { {0xe2da}, {0xe2db}, UCOL_LESS },

     { {0xe2db}, {0xe2dc}, UCOL_LESS },

     { {0xe2dc}, {0x4e8d}, UCOL_LESS },

4487 };

   doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));

4489 }

4490

 static void TestInvalidListsAndRanges(void)

4492 {

4493 const char* invalidRules[] = {

4494 /* Range not in starred expression */

     "&\\ufffe<\\uffff-\\U00010002",

4496

4497 /* Range without start */

4498 "&a<*-c",

4499

4500 /* Range without end */

4501 "&a<*b-",

4502

4503 /* More than one hyphen */

4504 "&a<*b-g-l",

4505

4506 /* Range in the wrong order */

4507 "&a<*k-b",

4508

4509 };

4510

4511 UChar rule[500];

4512 UErrorCode status = U_ZERO_ERROR;

4513 UParseError parse_error;

   int n_rules = UPRV_LENGTHOF(invalidRules);

4515 int rule_no;

4516 int length;

4517 UCollator *myCollation;

4518

   for (rule_no = 0; rule_no < n_rules; ++rule_no) {

4520

     length = u_unescape(invalidRules[rule_no], rule, 500);

     if (length == 0) {

         log_err("ERROR: The rule cannot be unescaped: %s\n");

4524 return;

4525 }

     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);

     (void)myCollation;      /* Suppress set but not used warning. */

     if(!U_FAILURE(status)){

       log_err("ERROR: Could not cause a failure as expected: \n");

4530 }

4531 status = U_ZERO_ERROR;

4532 }

4533 }

4534

4535 /*

4536 * This test ensures that characters placed before a character in a different script have the same lead byte

4537 * in their collation key before and after script reordering.

4538 */

 static void TestBeforeRuleWithScriptReordering(void)

4540 {

4541 UParseError error;

4542 UErrorCode status = U_ZERO_ERROR;

4543 UCollator *myCollation;

     char srules[500] = "&[before 1]\\u03b1 < \\u0e01";

4545 UChar rules[500];

4546 uint32_t rulesLength = 0;

     int32_t reorderCodes[1] = {USCRIPT_GREEK};

4548 UCollationResult collResult;

4549

4550 uint8_t baseKey[256];

4551 uint32_t baseKeyLength;

4552 uint8_t beforeKey[256];

4553 uint32_t beforeKeyLength;

4554

4555 UChar base[] = { 0x03b1 }; /* base */

     int32_t baseLen = UPRV_LENGTHOF(base);

4557

4558 UChar before[] = { 0x0e01 }; /* ko kai */

     int32_t beforeLen = UPRV_LENGTHOF(before);

4560

4561 /*UChar *data[] = { before, base };

4562 genericRulesStarter(srules, data, 2);*/

4563

     log_verbose("Testing the &[before 1] rule with [reorder grek]\n");

4565

     (void)beforeKeyLength;   /* Suppress set but not used warnings. */

4567 (void)baseKeyLength;

4568

4569 /* build collator */

     log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");

4571

     rulesLength = u_unescape(srules, rules, UPRV_LENGTHOF(rules));

     myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);

     if(U_FAILURE(status)) {

         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));

4576 return;

4577 }

4578

4579 /* check collation results - before rule applied but not script reordering */

     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);

4581 if (collResult != UCOL_GREATER) {

         log_err("Collation result not correct before script reordering = %d\n", collResult);

4583 }

4584

4585 /* check the lead byte of the collation keys before script reordering */

     baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);

     beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);

     if (baseKey[0] != beforeKey[0]) {

       log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);

4590 }

4591

4592 /* reorder the scripts */

     ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);

     if(U_FAILURE(status)) {

         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));

4596 return;

4597 }

4598

4599 /* check collation results - before rule applied and after script reordering */

     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);

4601 if (collResult != UCOL_GREATER) {

         log_err("Collation result not correct after script reordering = %d\n", collResult);

4603 }

4604

4605 /* check the lead byte of the collation keys after script reordering */

     ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);

     ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);

     if (baseKey[0] != beforeKey[0]) {

         log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);

4610 }

4611

4612 ucol_close(myCollation);

4613 }

4614

4615 /*

4616 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.

4617 */

 static void TestNonLeadBytesDuringCollationReordering(void)

4619 {

4620 UErrorCode status = U_ZERO_ERROR;

4621 UCollator *myCollation;

     int32_t reorderCodes[1] = {USCRIPT_GREEK};

4623

4624 uint8_t baseKey[256];

4625 uint32_t baseKeyLength;

4626 uint8_t reorderKey[256];

4627 uint32_t reorderKeyLength;

4628

     UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };

4630

4631 uint32_t i;

4632

4633

     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");

4635

4636 /* build collator tertiary */

     myCollation = ucol_open("", &status);

4638 ucol_setStrength(myCollation, UCOL_TERTIARY);

     if(U_FAILURE(status)) {

         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));

4641 return;

4642 }

     baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);

4644

     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);

     if(U_FAILURE(status)) {

         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));

4648 return;

4649 }

     reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);

4651

4652 if (baseKeyLength != reorderKeyLength) {

         log_err("Key lengths not the same during reordering.\n");

4654 return;

4655 }

4656

     for (i = 1; i < baseKeyLength; i++) {

         if (baseKey[i] != reorderKey[i]) {

             log_err("Collation key bytes not the same at position %d.\n", i);

4660 return;

4661 }

4662 }

4663 ucol_close(myCollation);

4664

4665 /* build collator quaternary */

     myCollation = ucol_open("", &status);

4667 ucol_setStrength(myCollation, UCOL_QUATERNARY);

     if(U_FAILURE(status)) {

         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));

4670 return;

4671 }

     baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);

4673

     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);

     if(U_FAILURE(status)) {

         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));

4677 return;

4678 }

     reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);

4680

4681 if (baseKeyLength != reorderKeyLength) {

         log_err("Key lengths not the same during reordering.\n");

4683 return;

4684 }

4685

     for (i = 1; i < baseKeyLength; i++) {

         if (baseKey[i] != reorderKey[i]) {

             log_err("Collation key bytes not the same at position %d.\n", i);

4689 return;

4690 }

4691 }

4692 ucol_close(myCollation);

4693 }

4694

4695 /*

4696 * Test reordering API.

4697 */

 static void TestReorderingAPI(void)

4699 {

4700 UErrorCode status = U_ZERO_ERROR;

4701 UCollator *myCollation;

     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};

     int32_t duplicateReorderCodes[] = {USCRIPT_HIRAGANA, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_KATAKANA};

     int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};

4705 int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE;

4706 UCollationResult collResult;

4707 int32_t retrievedReorderCodesLength;

4708 int32_t retrievedReorderCodes[10];

4709 UChar greekString[] = { 0x03b1 };

4710 UChar punctuationString[] = { 0x203e };

4711 int loopIndex;

4712

     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");

4714

4715 /* build collator tertiary */

     myCollation = ucol_open("", &status);

4717 ucol_setStrength(myCollation, UCOL_TERTIARY);

     if(U_FAILURE(status)) {

         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));

4720 return;

4721 }

4722

4723 /* set the reorderding */

     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);

     if (U_FAILURE(status)) {

         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));

4727 return;

4728 }

4729

4730 /* get the reordering */

     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);

4732 if (status != U_BUFFER_OVERFLOW_ERROR) {

         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));

4734 return;

4735 }

4736 status = U_ZERO_ERROR;

     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {

         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));

4739 return;

4740 }

4741 /* now let's really get it */

     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);

     if (U_FAILURE(status)) {

         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));

4745 return;

4746 }

     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {

         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));

4749 return;

4750 }

     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {

         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {

             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);

4754 return;

4755 }

4756 }

     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));

4758 if (collResult != UCOL_LESS) {

         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");

4760 return;

4761 }

4762

4763 /* clear the reordering */

     ucol_setReorderCodes(myCollation, NULL, 0, &status);    

     if (U_FAILURE(status)) {

         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));

4767 return;

4768 }

4769

4770 /* get the reordering again */

     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);

     if (retrievedReorderCodesLength != 0) {

         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);

4774 return;

4775 }

4776

     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));

4778 if (collResult != UCOL_GREATER) {

         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");

4780 return;

4781 }

4782

4783 /* clear the reordering using [NONE] */

     ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status);    

     if (U_FAILURE(status)) {

         log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", myErrorName(status));

4787 return;

4788 }

4789

4790 /* get the reordering again */

     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);

     if (retrievedReorderCodesLength != 0) {

4793 log_err_status(status,

                        "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",

4795 retrievedReorderCodesLength);

4796 return;

4797 }

4798

4799 /* test for error condition on duplicate reorder codes */

     ucol_setReorderCodes(myCollation, duplicateReorderCodes, UPRV_LENGTHOF(duplicateReorderCodes), &status);

     if (!U_FAILURE(status)) {

         log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");

4803 return;

4804 }

4805

4806 status = U_ZERO_ERROR;

4807 /* test for reorder codes after a reset code */

     ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, UPRV_LENGTHOF(reorderCodesStartingWithDefault), &status);

     if (!U_FAILURE(status)) {

         log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");

4811 return;

4812 }

4813

4814 ucol_close(myCollation);

4815 }

4816

4817 /*

4818 * Test reordering API.

4819 */

 static void TestReorderingAPIWithRuleCreatedCollator(void)

4821 {

4822 UErrorCode status = U_ZERO_ERROR;

4823 UCollator *myCollation;

4824 UChar rules[90];

     static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};

     static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};

     static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};

4828 UCollationResult collResult;

4829 int32_t retrievedReorderCodesLength;

4830 int32_t retrievedReorderCodes[10];

4831 static const UChar greekString[] = { 0x03b1 };

4832 static const UChar punctuationString[] = { 0x203e };

     static const UChar hanString[] = { 0x65E5, 0x672C };

4834 int loopIndex;

4835

     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");

4837

4838 /* build collator from rules */

     u_uastrcpy(rules, "[reorder Hani Grek]");

     myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);

     if(U_FAILURE(status)) {

         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));

4843 return;

4844 }

4845

4846 /* get the reordering */

     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);

     if (U_FAILURE(status)) {

         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));

4850 return;

4851 }

     if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {

         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));

4854 return;

4855 }

     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {

         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {

             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);

4859 return;

4860 }

4861 }

     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), hanString, UPRV_LENGTHOF(hanString));

4863 if (collResult != UCOL_GREATER) {

         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");

4865 return;

4866 }

4867

4868 /* set the reordering */

     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);

     if (U_FAILURE(status)) {

         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));

4872 return;

4873 }

4874

4875 /* get the reordering */

     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);

4877 if (status != U_BUFFER_OVERFLOW_ERROR) {

         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));

4879 return;

4880 }

4881 status = U_ZERO_ERROR;

     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {

         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));

4884 return;

4885 }

4886 /* now let's really get it */

     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);

     if (U_FAILURE(status)) {

         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));

4890 return;

4891 }

     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {

         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));

4894 return;

4895 }

     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {

         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {

             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);

4899 return;

4900 }

4901 }

     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));

4903 if (collResult != UCOL_LESS) {

         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");

4905 return;

4906 }

4907

4908 /* clear the reordering */

     ucol_setReorderCodes(myCollation, NULL, 0, &status);    

     if (U_FAILURE(status)) {

         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));

4912 return;

4913 }

4914

4915 /* get the reordering again */

     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);

     if (retrievedReorderCodesLength != 0) {

         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);

4919 return;

4920 }

4921

     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));

4923 if (collResult != UCOL_GREATER) {

         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");

4925 return;

4926 }

4927

4928 /* reset the reordering */

     ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);

     if (U_FAILURE(status)) {

         log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));

4932 return;

4933 }

     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);

     if (U_FAILURE(status)) {

         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));

4937 return;

4938 }

     if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {

         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));

4941 return;

4942 }

     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {

         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {

             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);

4946 return;

4947 }

4948 }

4949

4950 ucol_close(myCollation);

4951 }

4952

 static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int32_t expectedScript) {

4954 int32_t i;

     for (i = 0; i < length; ++i) {

         if (expectedScript == scripts[i]) { return TRUE; }

4957 }

4958 return FALSE;

4959 }

4960

 static void TestEquivalentReorderingScripts(void) {

4962 // Beginning with ICU 55, collation reordering moves single scripts

4963 // rather than groups of scripts,

4964 // except where scripts share a range and sort primary-equal.

4965 UErrorCode status = U_ZERO_ERROR;

4966 int32_t equivalentScripts[100];

4967 int32_t length;

4968 int i;

4969 int32_t prevScript;

4970 /* These scripts are expected to be equivalent. */

4971 static const int32_t expectedScripts[] = {

4972 USCRIPT_HIRAGANA,

4973 USCRIPT_KATAKANA,

4974 USCRIPT_KATAKANA_OR_HIRAGANA

4975 };

4976

     equivalentScripts[0] = 0;

4978 length = ucol_getEquivalentReorderCodes(

             USCRIPT_GOTHIC, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);

     if (U_FAILURE(status)) {

         log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder codes: %s\n", myErrorName(status));

4982 return;

4983 }

     if (length != 1 || equivalentScripts[0] != USCRIPT_GOTHIC) {

4985 log_err("ERROR/Gothic: retrieved equivalent scripts wrong: "

                 "length expected 1, was = %d; expected [%d] was [%d]\n",

                 length, USCRIPT_GOTHIC, equivalentScripts[0]);

4988 }

4989

4990 length = ucol_getEquivalentReorderCodes(

             USCRIPT_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);

     if (U_FAILURE(status)) {

         log_err_status(status, "ERROR/Hiragana: retrieving equivalent reorder codes: %s\n", myErrorName(status));

4994 return;

4995 }

     if (length != UPRV_LENGTHOF(expectedScripts)) {

4997 log_err("ERROR/Hiragana: retrieved equivalent script length wrong: "

                 "expected %d, was = %d\n",

4999 UPRV_LENGTHOF(expectedScripts), length);

5000 }

5001 prevScript = -1;

     for (i = 0; i < length; ++i) {

5003 int32_t script = equivalentScripts[i];

5004 if (script <= prevScript) {

             log_err("ERROR/Hiragana: equivalent scripts out of order at index %d\n", i);

5006 }

5007 prevScript = script;

5008 }

     for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {

         if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {

             log_err("ERROR/Hiragana: equivalent scripts do not contain %d\n",

5012 expectedScripts[i]);

5013 }

5014 }

5015

5016 length = ucol_getEquivalentReorderCodes(

             USCRIPT_KATAKANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);

     if (U_FAILURE(status)) {

         log_err_status(status, "ERROR/Katakana: retrieving equivalent reorder codes: %s\n", myErrorName(status));

5020 return;

5021 }

     if (length != UPRV_LENGTHOF(expectedScripts)) {

5023 log_err("ERROR/Katakana: retrieved equivalent script length wrong: "

                 "expected %d, was = %d\n",

5025 UPRV_LENGTHOF(expectedScripts), length);

5026 }

     for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {

         if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {

             log_err("ERROR/Katakana: equivalent scripts do not contain %d\n",

5030 expectedScripts[i]);

5031 }

5032 }

5033

5034 length = ucol_getEquivalentReorderCodes(

             USCRIPT_KATAKANA_OR_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);

     if (U_FAILURE(status) || length != UPRV_LENGTHOF(expectedScripts)) {

5037 log_err("ERROR/Hrkt: retrieved equivalent script length wrong: "

                 "expected %d, was = %d\n",

5039 UPRV_LENGTHOF(expectedScripts), length);

5040 }

5041

5042 length = ucol_getEquivalentReorderCodes(

             USCRIPT_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);

     if (U_FAILURE(status) || length != 3) {

5045 log_err("ERROR/Hani: retrieved equivalent script length wrong: "

                 "expected 3, was = %d\n", length);

5047 }

5048 length = ucol_getEquivalentReorderCodes(

             USCRIPT_SIMPLIFIED_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);

     if (U_FAILURE(status) || length != 3) {

5051 log_err("ERROR/Hans: retrieved equivalent script length wrong: "

                 "expected 3, was = %d\n", length);

5053 }

5054 length = ucol_getEquivalentReorderCodes(

             USCRIPT_TRADITIONAL_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);

     if (U_FAILURE(status) || length != 3) {

5057 log_err("ERROR/Hant: retrieved equivalent script length wrong: "

                 "expected 3, was = %d\n", length);

5059 }

5060

5061 length = ucol_getEquivalentReorderCodes(

             USCRIPT_MEROITIC_CURSIVE, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);

     if (U_FAILURE(status) || length != 2) {

5064 log_err("ERROR/Merc: retrieved equivalent script length wrong: "

                 "expected 2, was = %d\n", length);

5066 }

5067 length = ucol_getEquivalentReorderCodes(

             USCRIPT_MEROITIC_HIEROGLYPHS, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);

     if (U_FAILURE(status) || length != 2) {

5070 log_err("ERROR/Mero: retrieved equivalent script length wrong: "

                 "expected 2, was = %d\n", length);

5072 }

5073 }

5074

 static void TestReorderingAcrossCloning(void) 

5076 {

5077 UErrorCode status = U_ZERO_ERROR;

5078 UCollator *myCollation;

     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};

5080 UCollator *clonedCollation;

5081 int32_t retrievedReorderCodesLength;

5082 int32_t retrievedReorderCodes[10];

5083 int loopIndex;

5084

     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");

5086

5087 /* build collator tertiary */

     myCollation = ucol_open("", &status);

5089 ucol_setStrength(myCollation, UCOL_TERTIARY);

     if(U_FAILURE(status)) {

         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));

5092 return;

5093 }

5094

5095 /* set the reorderding */

     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);

     if (U_FAILURE(status)) {

         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));

5099 return;

5100 }

5101

5102 /* clone the collator */

     clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);

     if (U_FAILURE(status)) {

         log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));

5106 return;

5107 }

5108

5109 /* get the reordering */

     retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);

     if (U_FAILURE(status)) {

         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));

5113 return;

5114 }

     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {

         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));

5117 return;

5118 }

     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {

         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {

             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);

5122 return;

5123 }

5124 }

5125

5126 /*uprv_free(buffer);*/

5127 ucol_close(myCollation);

5128 ucol_close(clonedCollation);

5129 }

5130

5131 /*

5132 * Utility function to test one collation reordering test case set.

5133 * @param testcases Array of test cases.

5134 * @param n_testcases Size of the array testcases.

5135 * @param reorderTokens Array of reordering codes.

5136 * @param reorderTokensLen Size of the array reorderTokens.

5137 */

 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)

5139 {

5140 uint32_t testCaseNum;

5141 UErrorCode status = U_ZERO_ERROR;

5142 UCollator *myCollation;

5143

     myCollation = ucol_open("", &status);

     if (U_FAILURE(status)) {

         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));

5147 return;

5148 }

     ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);

     if(U_FAILURE(status)) {

         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));

5152 return;

5153 }

5154

     for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {

5156 doTest(myCollation,

5157 testCases[testCaseNum].source,

5158 testCases[testCaseNum].target,

5159 testCases[testCaseNum].result

5160 );

5161 }

5162 ucol_close(myCollation);

5163 }

5164

 static void TestGreekFirstReorder(void)

5166 {

5167 const char* strRules[] = {

5168 "[reorder Grek]"

5169 };

5170

5171 const int32_t apiRules[] = {

5172 USCRIPT_GREEK

5173 };

5174

5175 const static OneTestCase privateUseCharacterStrings[] = {

         { {0x0391}, {0x0391}, UCOL_EQUAL },

         { {0x0041}, {0x0391}, UCOL_GREATER },

         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },

         { {0x0060}, {0x0391}, UCOL_LESS },

         { {0x0391}, {0xe2dc}, UCOL_LESS },

         { {0x0391}, {0x0060}, UCOL_GREATER },

5182 };

5183

5184 /* Test rules creation */

     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));

5186

5187 /* Test collation reordering API */

     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));

5189 }

5190

 static void TestGreekLastReorder(void)

5192 {

5193 const char* strRules[] = {

5194 "[reorder Zzzz Grek]"

5195 };

5196

5197 const int32_t apiRules[] = {

5198 USCRIPT_UNKNOWN, USCRIPT_GREEK

5199 };

5200

5201 const static OneTestCase privateUseCharacterStrings[] = {

         { {0x0391}, {0x0391}, UCOL_EQUAL },

         { {0x0041}, {0x0391}, UCOL_LESS },

         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },

         { {0x0060}, {0x0391}, UCOL_LESS },

         { {0x0391}, {0xe2dc}, UCOL_GREATER },

5207 };

5208

5209 /* Test rules creation */

     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));

5211

5212 /* Test collation reordering API */

     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));

5214 }

5215

 static void TestNonScriptReorder(void)

5217 {

5218 const char* strRules[] = {

5219 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"

5220 };

5221

5222 const int32_t apiRules[] = {

5223 USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,

5224 UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,

5225 UCOL_REORDER_CODE_CURRENCY

5226 };

5227

5228 const static OneTestCase privateUseCharacterStrings[] = {

         { {0x0391}, {0x0041}, UCOL_LESS },

         { {0x0041}, {0x0391}, UCOL_GREATER },

         { {0x0060}, {0x0041}, UCOL_LESS },

         { {0x0060}, {0x0391}, UCOL_GREATER },

         { {0x0024}, {0x0041}, UCOL_GREATER },

5234 };

5235

5236 /* Test rules creation */

     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));

5238

5239 /* Test collation reordering API */

     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));

5241 }

5242

 static void TestHaniReorder(void)

5244 {

5245 const char* strRules[] = {

5246 "[reorder Hani]"

5247 };

5248 const int32_t apiRules[] = {

5249 USCRIPT_HAN

5250 };

5251

5252 const static OneTestCase privateUseCharacterStrings[] = {

         { {0x4e00}, {0x0041}, UCOL_LESS },

         { {0x4e00}, {0x0060}, UCOL_GREATER },

         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },

         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },

         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },

         { {0xfa27}, {0x0041}, UCOL_LESS },

         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },

5260 };

5261

5262 /* Test rules creation */

     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));

5264

5265 /* Test collation reordering API */

     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));

5267 }

5268

 static void TestHaniReorderWithOtherRules(void)

5270 {

5271 const char* strRules[] = {

5272 "[reorder Hani] &b<a"

5273 };

5274 /*const int32_t apiRules[] = {

5275 USCRIPT_HAN

5276 };*/

5277

5278 const static OneTestCase privateUseCharacterStrings[] = {

         { {0x4e00}, {0x0041}, UCOL_LESS },

         { {0x4e00}, {0x0060}, UCOL_GREATER },

         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },

         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },

         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },

         { {0xfa27}, {0x0041}, UCOL_LESS },

         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },

         { {0x0062}, {0x0061}, UCOL_LESS },

5287 };

5288

5289 /* Test rules creation */

     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));

5291 }

5292

 static void TestMultipleReorder(void)

5294 {

5295 const char* strRules[] = {

5296 "[reorder Grek Zzzz DIGIT Latn Hani]"

5297 };

5298

5299 const int32_t apiRules[] = {

5300 USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN

5301 };

5302

5303 const static OneTestCase collationTestCases[] = {

         { {0x0391}, {0x0041}, UCOL_LESS},

         { {0x0031}, {0x0041}, UCOL_LESS},

         { {0x0041}, {0x4e00}, UCOL_LESS},

5307 };

5308

5309 /* Test rules creation */

     doTestOneTestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), strRules, UPRV_LENGTHOF(strRules));

5311

5312 /* Test collation reordering API */

     doTestOneReorderingAPITestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), apiRules, UPRV_LENGTHOF(apiRules));

5314 }

5315

5316 /*

5317 * Test that covers issue reported in ticket 8814

5318 */

 static void TestReorderWithNumericCollation(void)

5320 {

5321 UErrorCode status = U_ZERO_ERROR;

5322 UCollator *myCollation;

5323 UCollator *myReorderCollation;

     int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};

5325 /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };

5326 UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */

5327 UChar fortyS[] = { 0x0053 };

5328 UChar fortyThreeP[] = { 0x0050 };

5329 uint8_t fortyS_sortKey[128];

5330 int32_t fortyS_sortKey_Length;

5331 uint8_t fortyThreeP_sortKey[128];

5332 int32_t fortyThreeP_sortKey_Length;

5333 uint8_t fortyS_sortKey_reorder[128];

5334 int32_t fortyS_sortKey_reorder_Length;

5335 uint8_t fortyThreeP_sortKey_reorder[128];

5336 int32_t fortyThreeP_sortKey_reorder_Length;

5337 UCollationResult collResult;

5338 UCollationResult collResultReorder;

5339

     log_verbose("Testing reordering with and without numeric collation\n");

5341

5342 /* build collator tertiary with numeric */

     myCollation = ucol_open("", &status);

5344 /*

5345 ucol_setStrength(myCollation, UCOL_TERTIARY);

5346 */

     ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);

     if(U_FAILURE(status)) {

         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));

5350 return;

5351 }

5352

5353 /* build collator tertiary with numeric and reordering */

     myReorderCollation = ucol_open("", &status);

5355 /*

5356 ucol_setStrength(myReorderCollation, UCOL_TERTIARY);

5357 */

     ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);

     ucol_setReorderCodes(myReorderCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);

     if(U_FAILURE(status)) {

         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));

5362 return;

5363 }

5364

     fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey, 128);

     fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey, 128);

     fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey_reorder, 128);

     fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey_reorder, 128);

5369

     if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {

         log_err_status(status, "ERROR: couldn't generate sort keys\n");

5372 return;

5373 }

     collResult = ucol_strcoll(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));

     collResultReorder = ucol_strcoll(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));

5376 /*

5377 fprintf(stderr, "\tcollResult = %x\n", collResult);

5378 fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);

5379 fprintf(stderr, "\nfortyS\n");

5380 for (i = 0; i < fortyS_sortKey_Length; i++) {

5381 fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);

5382 }

5383 fprintf(stderr, "\nfortyThreeP\n");

5384 for (i = 0; i < fortyThreeP_sortKey_Length; i++) {

5385 fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);

5386 }

5387 */

5388 if (collResult != collResultReorder) {

         log_err_status(status, "ERROR: collation results should have been the same.\n");

5390 return;

5391 }

5392

5393 ucol_close(myCollation);

5394 ucol_close(myReorderCollation);

5395 }

5396

 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)

5398 {

   for (; *a == *b; ++a, ++b) {

     if (*a == 0) {

5401 return 0;

5402 }

5403 }

   return (*a < *b ? -1 : 1);

5405 }

5406

 static void TestImportRulesDeWithPhonebook(void)

5408 {

5409 const char* normalRules[] = {

     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",

     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",

     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",

5413 };

5414 const OneTestCase normalTests[] = {

     { {0x00e6}, {0x00c6}, UCOL_LESS},

     { {0x00fc}, {0x00dc}, UCOL_GREATER},

5417 };

5418

5419 const char* importRules[] = {

     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",

     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",

     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",

5423 };

5424 const OneTestCase importTests[] = {

     { {0x00e6}, {0x00c6}, UCOL_LESS},

     { {0x00fc}, {0x00dc}, UCOL_LESS},

5427 };

5428

   doTestOneTestCase(normalTests, UPRV_LENGTHOF(normalTests), normalRules, UPRV_LENGTHOF(normalRules));

   doTestOneTestCase(importTests, UPRV_LENGTHOF(importTests), importRules, UPRV_LENGTHOF(importRules));

5431 }

5432

5433 #if 0

 static void TestImportRulesFiWithEor(void)

5435 {

5436 /* DUCET. */

5437 const char* defaultRules[] = {

5438 "&a<b", /* Dummy rule. */

5439 };

5440

5441 const OneTestCase defaultTests[] = {

     { {0x0110}, {0x00F0}, UCOL_LESS},

     { {0x00a3}, {0x00a5}, UCOL_LESS},

     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},

5445 };

5446

5447 /* European Ordering rules: ignore currency characters. */

5448 const char* eorRules[] = {

5449 "[import root-u-co-eor]",

5450 };

5451

5452 const OneTestCase eorTests[] = {

     { {0x0110}, {0x00F0}, UCOL_LESS},

     { {0x00a3}, {0x00a5}, UCOL_EQUAL},

     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},

5456 };

5457

5458 const char* fiStdRules[] = {

5459 "[import fi-u-co-standard]",

5460 };

5461

5462 const OneTestCase fiStdTests[] = {

     { {0x0110}, {0x00F0}, UCOL_GREATER},

     { {0x00a3}, {0x00a5}, UCOL_LESS},

     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},

5466 };

5467

5468 /* Both European Ordering Rules and Fi Standard Rules. */

5469 const char* eorFiStdRules[] = {

5470 "[import root-u-co-eor][import fi-u-co-standard]",

5471 };

5472

5473 /* This is essentially same as the one before once fi.txt is updated with import. */

5474 const char* fiEorRules[] = {

5475 "[import fi-u-co-eor]",

5476 };

5477

5478 const OneTestCase fiEorTests[] = {

     { {0x0110}, {0x00F0}, UCOL_GREATER},

     { {0x00a3}, {0x00a5}, UCOL_EQUAL},

     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},

5482 };

5483

   doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));

   doTestOneTestCase(eorTests, UPRV_LENGTHOF(eorTests), eorRules, UPRV_LENGTHOF(eorRules));

   doTestOneTestCase(fiStdTests, UPRV_LENGTHOF(fiStdTests), fiStdRules, UPRV_LENGTHOF(fiStdRules));

   doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), eorFiStdRules, UPRV_LENGTHOF(eorFiStdRules));

5488

   log_knownIssue("8962", NULL);

5490 /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:

5491 eor{

5492 Sequence{

5493 "[import root-u-co-eor][import fi-u-co-standard]"

5494 }

5495 Version{"21.0"}

5496 }

5497 */

5498 /* doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), fiEorRules, UPRV_LENGTHOF(fiEorRules)); */

5499

5500 }

5501 #endif

5502

5503 #if 0

5504 /*

5505 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless

5506 * the resource files are built with -includeUnihanColl option.

5507 * TODO: Uncomment this function and make it work when unihan rules are built by default.

5508 */

 static void TestImportRulesCJKWithUnihan(void)

5510 {

5511 /* DUCET. */

5512 const char* defaultRules[] = {

5513 "&a<b", /* Dummy rule. */

5514 };

5515

5516 const OneTestCase defaultTests[] = {

     { {0x3402}, {0x4e1e}, UCOL_GREATER},

5518 };

5519

5520 /* European Ordering rules: ignore currency characters. */

5521 const char* unihanRules[] = {

5522 "[import ko-u-co-unihan]",

5523 };

5524

5525 const OneTestCase unihanTests[] = {

     { {0x3402}, {0x4e1e}, UCOL_LESS},

5527 };

5528

   doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));

   doTestOneTestCase(unihanTests, UPRV_LENGTHOF(unihanTests), unihanRules, UPRV_LENGTHOF(unihanRules));

5531

5532 }

5533 #endif

5534

 static void TestImport(void)

5536 {

5537 UCollator* vicoll;

5538 UCollator* escoll;

5539 UCollator* viescoll;

5540 UCollator* importviescoll;

5541 UParseError error;

5542 UErrorCode status = U_ZERO_ERROR;

5543 UChar* virules;

5544 int32_t viruleslength;

5545 UChar* esrules;

5546 int32_t esruleslength;

5547 UChar* viesrules;

5548 int32_t viesruleslength;

     char srules[500] = "[import vi][import es]";

5550 UChar rules[500];

5551 uint32_t length = 0;

5552 int32_t itemCount;

5553 int32_t i, k;

5554 UChar32 start;

5555 UChar32 end;

5556 UChar str[500];

5557 int32_t strLength;

5558

5559 uint8_t sk1[500];

5560 uint8_t sk2[500];

5561

5562 UBool b;

5563 USet* tailoredSet;

5564 USet* importTailoredSet;

5565

5566

     vicoll = ucol_open("vi", &status);

     if(U_FAILURE(status)){

         log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));

5570 return;

5571 }

5572

     virules = (UChar*) ucol_getRules(vicoll, &viruleslength);

     if(viruleslength == 0) {

         log_data_err("missing vi tailoring rule string\n");

5576 ucol_close(vicoll);

5577 return;

5578 }

     escoll = ucol_open("es", &status);

     esrules = (UChar*) ucol_getRules(escoll, &esruleslength);

     viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));

     viesrules[0] = 0;

5583 u_strcat(viesrules, virules);

5584 u_strcat(viesrules, esrules);

5585 viesruleslength = viruleslength + esruleslength;

     viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);

5587

5588 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */

     length = u_unescape(srules, rules, 500);

     importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);

     if(U_FAILURE(status)){

         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));

5593 return;

5594 }

5595

     tailoredSet = ucol_getTailoredSet(viescoll, &status);

     importTailoredSet = ucol_getTailoredSet(importviescoll, &status);

5598

     if(!uset_equals(tailoredSet, importTailoredSet)){

5600 log_err("Tailored sets not equal");

5601 }

5602

5603 uset_close(importTailoredSet);

5604

5605 itemCount = uset_getItemCount(tailoredSet);

5606

     for( i = 0; i < itemCount; i++){

         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);

         if(strLength < 2){

             for (; start <= end; start++){

5611 k = 0;

                 U16_APPEND(str, k, 500, start, b);

                 (void)b;    /* Suppress set but not used warning. */

                 ucol_getSortKey(viescoll, str, 1, sk1, 500);

                 ucol_getSortKey(importviescoll, str, 1, sk2, 500);

                 if(compare_uint8_t_arrays(sk1, sk2) != 0){

                     log_err("Sort key for %s not equal\n", str);

5618 break;

5619 }

5620 }

5621 }else{

             ucol_getSortKey(viescoll, str, strLength, sk1, 500);

             ucol_getSortKey(importviescoll, str, strLength, sk2, 500);

             if(compare_uint8_t_arrays(sk1, sk2) != 0){

                 log_err("ZZSort key for %s not equal\n", str);

5626 break;

5627 }

5628

5629 }

5630 }

5631

5632 uset_close(tailoredSet);

5633

5634 uprv_free(viesrules);

5635

5636 ucol_close(vicoll);

5637 ucol_close(escoll);

5638 ucol_close(viescoll);

5639 ucol_close(importviescoll);

5640 }

5641

 static void TestImportWithType(void)

5643 {

5644 UCollator* vicoll;

5645 UCollator* decoll;

5646 UCollator* videcoll;

5647 UCollator* importvidecoll;

5648 UParseError error;

5649 UErrorCode status = U_ZERO_ERROR;

5650 const UChar* virules;

5651 int32_t viruleslength;

5652 const UChar* derules;

5653 int32_t deruleslength;

5654 UChar* viderules;

5655 int32_t videruleslength;

     const char srules[500] = "[import vi][import de-u-co-phonebk]";

5657 UChar rules[500];

5658 uint32_t length = 0;

5659 int32_t itemCount;

5660 int32_t i, k;

5661 UChar32 start;

5662 UChar32 end;

5663 UChar str[500];

5664 int32_t strLength;

5665

5666 uint8_t sk1[500];

5667 uint8_t sk2[500];

5668

5669 USet* tailoredSet;

5670 USet* importTailoredSet;

5671

     vicoll = ucol_open("vi", &status);

     if(U_FAILURE(status)){

         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));

5675 return;

5676 }

     virules = ucol_getRules(vicoll, &viruleslength);

     if(viruleslength == 0) {

         log_data_err("missing vi tailoring rule string\n");

5680 ucol_close(vicoll);

5681 return;

5682 }

5683 /* decoll = ucol_open("de@collation=phonebook", &status); */

     decoll = ucol_open("de-u-co-phonebk", &status);

     if(U_FAILURE(status)){

         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));

5687 return;

5688 }

5689

5690

     derules = ucol_getRules(decoll, &deruleslength);

     viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));

     viderules[0] = 0;

5694 u_strcat(viderules, virules);

5695 u_strcat(viderules, derules);

5696 videruleslength = viruleslength + deruleslength;

     videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);

5698

5699 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */

     length = u_unescape(srules, rules, 500);

     importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);

     if(U_FAILURE(status)){

         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));

5704 return;

5705 }

5706

     tailoredSet = ucol_getTailoredSet(videcoll, &status);

     importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);

5709

     if(!uset_equals(tailoredSet, importTailoredSet)){

5711 log_err("Tailored sets not equal");

5712 }

5713

5714 uset_close(importTailoredSet);

5715

5716 itemCount = uset_getItemCount(tailoredSet);

5717

     for( i = 0; i < itemCount; i++){

         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);

         if(strLength < 2){

             for (; start <= end; start++){

5722 k = 0;

                 U16_APPEND_UNSAFE(str, k, start);

                 ucol_getSortKey(videcoll, str, 1, sk1, 500);

                 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);

                 if(compare_uint8_t_arrays(sk1, sk2) != 0){

                     log_err("Sort key for %s not equal\n", str);

5728 break;

5729 }

5730 }

5731 }else{

             ucol_getSortKey(videcoll, str, strLength, sk1, 500);

             ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);

             if(compare_uint8_t_arrays(sk1, sk2) != 0){

                 log_err("Sort key for %s not equal\n", str);

5736 break;

5737 }

5738

5739 }

5740 }

5741

5742 uset_close(tailoredSet);

5743

5744 uprv_free(viderules);

5745

5746 ucol_close(videcoll);

5747 ucol_close(importvidecoll);

5748 ucol_close(vicoll);

5749 ucol_close(decoll);

5750 }

5751

5752 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */

5753 static const UChar longUpperStr1[]= { /* 155 chars */

     0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,

     0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,

     0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,

     0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,

     0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,

     0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,

     0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,

     0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,

     0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,

     0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61

5764 };

5765

5766 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */

5767 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */

     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,

     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,

     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,

     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,

     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20

5773 };

5774

5775 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */

5776 static const UChar longUpperStr3[]= { /* 324 chars */

     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20

5789 };

5790

5791 typedef struct {

5792 const UChar * longUpperStrPtr;

5793 int32_t longUpperStrLen;

5794 } LongUpperStrItem;

5795

5796 /* String pointers must be in reverse collation order of the corresponding strings */

5797 static const LongUpperStrItem longUpperStrItems[] = {

     { longUpperStr1, UPRV_LENGTHOF(longUpperStr1) },

     { longUpperStr2, UPRV_LENGTHOF(longUpperStr2) },

     { longUpperStr3, UPRV_LENGTHOF(longUpperStr3) },

5801 { NULL, 0 }

5802 };

5803

 enum { kCollKeyLenMax = 850 }; /* may change with collation changes */

5805

5806 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */

 static void TestCaseLevelBufferOverflow(void)

5808 {

5809 UErrorCode status = U_ZERO_ERROR;

     UCollator * ucol = ucol_open("root", &status);

     if ( U_SUCCESS(status) ) {

         ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);

         if ( U_SUCCESS(status) ) {

5814 const LongUpperStrItem * itemPtr;

             uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];

             for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {

5817 int32_t sortKeyLen;

5818 if (itemPtr > longUpperStrItems) {

                     uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);

5820 }

                 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);

                 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {

                     log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);

5824 break;

5825 }

5826 if ( itemPtr > longUpperStrItems ) {

                     int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);

                     if (compareResult >= 0) {

                         log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);

5830 }

5831 }

5832 }

5833 } else {

             log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));

5835 }

5836 ucol_close(ucol);

5837 } else {

         log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));

5839 }

5840 }

5841

5842 /* Test for #10595 */

 static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */

5844 #define KEY_PART_SIZE 16

5845

 static void TestNextSortKeyPartJaIdentical(void)

5847 {

5848 UErrorCode status = U_ZERO_ERROR;

5849 UCollator *coll;

5850 uint8_t keyPart[KEY_PART_SIZE];

5851 UCharIterator iter;

     uint32_t state[2] = {0, 0};

5853 int32_t keyPartLen;

5854

     coll = ucol_open("ja", &status);

     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);

     if (U_FAILURE(status)) {

         log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status));

5859 return;

5860 }

5861

     uiter_setString(&iter, testJapaneseName, 5);

5863 keyPartLen = KEY_PART_SIZE;

5864 while (keyPartLen == KEY_PART_SIZE) {

         keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status);

         if (U_FAILURE(status)) {

             log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status));

5868 break;

5869 }

5870 }

5871

5872 ucol_close(coll);

5873 }

5874

5875 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)

5876

 void addMiscCollTest(TestNode** root)

5878 {

5879 TEST(TestRuleOptions);

5880 TEST(TestBeforePrefixFailure);

5881 TEST(TestContractionClosure);

5882 TEST(TestPrefixCompose);

5883 TEST(TestStrCollIdenticalPrefix);

5884 TEST(TestPrefix);

5885 TEST(TestNewJapanese);

5886 /*TEST(TestLimitations);*/

5887 TEST(TestNonChars);

5888 TEST(TestExtremeCompression);

5889 TEST(TestSurrogates);

5890 TEST(TestVariableTopSetting);

5891 TEST(TestMaxVariable);

5892 TEST(TestBocsuCoverage);

5893 TEST(TestCyrillicTailoring);

5894 TEST(TestCase);

5895 TEST(IncompleteCntTest);

5896 TEST(BlackBirdTest);

5897 TEST(FunkyATest);

5898 TEST(BillFairmanTest);

5899 TEST(TestChMove);

5900 TEST(TestImplicitTailoring);

5901 TEST(TestFCDProblem);

5902 TEST(TestEmptyRule);

5903 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */

5904 TEST(TestJ815);

5905 TEST(TestUpperCaseFirst);

5906 TEST(TestBefore);

5907 TEST(TestHangulTailoring);

5908 TEST(TestUCARules);

5909 TEST(TestIncrementalNormalize);

5910 TEST(TestComposeDecompose);

5911 TEST(TestCompressOverlap);

5912 TEST(TestContraction);

5913 TEST(TestExpansion);

5914 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */

5915 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */

5916 TEST(TestOptimize);

5917 TEST(TestSuppressContractions);

5918 TEST(Alexis2);

5919 TEST(TestHebrewUCA);

5920 TEST(TestPartialSortKeyTermination);

5921 TEST(TestSettings);

5922 TEST(TestEquals);

5923 TEST(TestJ2726);

5924 TEST(NullRule);

5925 TEST(TestNumericCollation);

5926 TEST(TestTibetanConformance);

5927 TEST(TestPinyinProblem);

5928 TEST(TestSeparateTrees);

5929 TEST(TestBeforePinyin);

5930 TEST(TestBeforeTightening);

5931 /*TEST(TestMoreBefore);*/

5932 TEST(TestTailorNULL);

5933 TEST(TestUpperFirstQuaternary);

5934 TEST(TestJ4960);

5935 TEST(TestJ5223);

5936 TEST(TestJ5232);

5937 TEST(TestJ5367);

5938 TEST(TestHiragana);

5939 TEST(TestSortKeyConsistency);

5940 TEST(TestVI5913); /* VI, RO tailored rules */

5941 TEST(TestCroatianSortKey);

5942 TEST(TestTailor6179);

5943 TEST(TestUCAPrecontext);

5944 TEST(TestOutOfBuffer5468);

5945 TEST(TestSameStrengthList);

5946

5947 TEST(TestSameStrengthListQuoted);

5948 TEST(TestSameStrengthListSupplemental);

5949 TEST(TestSameStrengthListQwerty);

5950 TEST(TestSameStrengthListQuotedQwerty);

5951 TEST(TestSameStrengthListRanges);

5952 TEST(TestSameStrengthListSupplementalRanges);

5953 TEST(TestSpecialCharacters);

5954 TEST(TestPrivateUseCharacters);

5955 TEST(TestPrivateUseCharactersInList);

5956 TEST(TestPrivateUseCharactersInRange);

5957 TEST(TestInvalidListsAndRanges);

5958 TEST(TestImportRulesDeWithPhonebook);

5959 /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */

5960 /* TEST(TestImportRulesCJKWithUnihan); */

5961 TEST(TestImport);

5962 TEST(TestImportWithType);

5963

5964 TEST(TestBeforeRuleWithScriptReordering);

5965 TEST(TestNonLeadBytesDuringCollationReordering);

5966 TEST(TestReorderingAPI);

5967 TEST(TestReorderingAPIWithRuleCreatedCollator);

5968 TEST(TestEquivalentReorderingScripts);

5969 TEST(TestGreekFirstReorder);

5970 TEST(TestGreekLastReorder);

5971 TEST(TestNonScriptReorder);

5972 TEST(TestHaniReorder);

5973 TEST(TestHaniReorderWithOtherRules);

5974 TEST(TestMultipleReorder);

5975 TEST(TestReorderingAcrossCloning);

5976 TEST(TestReorderWithNumericCollation);

5977

5978 TEST(TestCaseLevelBufferOverflow);

5979 TEST(TestNextSortKeyPartJaIdentical);

5980 }

5981

5982 #endif /* #if !UCONFIG_NO_COLLATION */