icuSources/test/testdata/collationtest.txt

   1 # Copyright (c) 2012-2015 International Business Machines
   2 # Corporation and others. All Rights Reserved.
   3 #
   4 # This file should be in UTF-8 with a signature byte sequence ("BOM").
   5 #
   6 # collationtest.txt: Collation test data.
   7 #
   8 # created on: 2012apr13
   9 # created by: Markus W. Scherer
  10
  11 # A line with "** test: description" is used for verbose and error output.
  12
  13 # A collator can be set with "@ root" or "@ locale language-tag",
  14 # for example "@ locale de-u-co-phonebk".
  15 # An old-style locale ID can also be used, for example "@ locale de@collation=phonebook".
  16
  17 # A collator can be built with "@ rules".
  18 # An "@ rules" line is followed by one or more lines with the tailoring rules.
  19
  20 # A collator can be modified with "% attribute=value".
  21
  22 # "* compare" tests the order (= or <) of the following strings.
  23 # The relation can be "=" or "<" (the level of the difference is not specified)
  24 # or "<1", "<2", "<c", "<3", "<4" (indicating the level of the difference).
  25
  26 # Test sections ("* compare") are terminated by
  27 # definitions of new collators, changing attributes, or new test sections.
  28
  29 ** test: simple CEs & expansions
  30 # Many types of mappings are tested elsewhere, including via the UCA conformance tests.
  31 # Here we mostly cover a few unusual mappings.
  32 @ rules
  33 &\x01                           # most control codes are ignorable
  34 <<<\u0300                       # tertiary CE
  35 &9<\x00                         # NUL not ignorable
  36 &\uA00A\uA00B=\uA002            # two long-primary CEs
  37 &\uA00A\uA00B\u00050005=\uA003  # three CEs, require 64 bits
  38
  39 * compare
  40 =  \x01
  41 =  \x02
  42 <3 \u0300
  43 <1 9
  44 <1 \x00
  45 =  \x01\x00\x02
  46 <1 a
  47 <3 a\u0300
  48 <2 a\u0308
  49 =  ä
  50 <1 b
  51 <1 か        # Hiragana Ka (U+304B)
  52 <2 か\u3099  # plus voiced sound mark
  53 =  が        # Hiragana Ga (U+304C)
  54 <1 \uA00A\uA00B
  55 =  \uA002
  56 <1 \uA00A\uA00B\u00050004
  57 <1 \uA00A\uA00B\u00050005
  58 =  \uA003
  59 <1 \uA00A\uA00B\u00050006
  60
  61 ** test: contractions
  62 # Create some interesting mappings, and map some normalization-inert characters
  63 # (which are not subject to canonical reordering)
  64 # to some of the same CEs to check the sequence of CEs.
  65 @ rules
  66
  67 # Contractions starting with 'a' should not continue with any character < U+0300
  68 # so that we can test a shortcut for that.
  69 &a=ⓐ
  70 &b<bz=ⓑ
  71 &d<dz\u0301=ⓓ           # d+z+acute
  72 &z
  73 <a\u0301=Ⓐ              # a+acute sorts after z
  74 <a\u0301\u0301=Ⓑ        # a+acute+acute
  75 <a\u0301\u0301\u0358=Ⓒ  # a+acute+acute+dot above right
  76 <a\u030a=Ⓓ              # a+ring
  77 <a\u0323=Ⓔ              # a+dot below
  78 <a\u0323\u0358=Ⓕ        # a+dot below+dot above right
  79 <a\u0327\u0323\u030a=Ⓖ  # a+cedilla+dot below+ring
  80 <a\u0327\u0323bz=Ⓗ      # a+cedilla+dot below+b+z
  81
  82 &\U0001D158=⁰           # musical notehead black (has a symbol primary)
  83 <\U0001D158\U0001D165=¼ # musical quarter note
  84
  85 # deliberately missing prefix contractions:
  86 # dz
  87 # a\u0327
  88 # a\u0327\u0323
  89 # a\u0327\u0323b
  90
  91 &\x01
  92 <<<\U0001D165=¹         # musical stem (ccc=216)
  93 <<<\U0001D16D=²         # musical augmentation dot (ccc=226)
  94 <<<\U0001D165\U0001D16D=³  # stem+dot (ccc=216 226)
  95 &\u0301=❶               # acute (ccc=230)
  96 &\u030a=❷               # ring (ccc=230)
  97 &\u0308=❸               # diaeresis (ccc=230)
  98 <<\u0308\u0301=❹        # diaeresis+acute (=dialytika tonos) (ccc=230 230)
  99 &\u0327=❺               # cedilla (ccc=202)
 100 &\u0323=❻               # dot below (ccc=220)
 101 &\u0331=❼               # macron below (ccc=220)
 102 <<\u0331\u0358=❽        # macron below+dot above right (ccc=220 232)
 103 &\u0334=❾               # tilde overlay (ccc=1)
 104 &\u0358=❿               # dot above right (ccc=232)
 105
 106 &\u0f71=①               # tibetan vowel sign aa
 107 &\u0f72=②               # tibetan vowel sign i
 108 #  \u0f71\u0f72         # tibetan vowel sign aa + i = ii = U+0F73
 109 &\u0f73=③               # tibetan vowel sign ii (ccc=0 but lccc=129)
 110
 111 ** test: simple contractions
 112
 113 # Some strings are chosen to cause incremental contiguous contraction matching to
 114 # go into partial matches for prefixes of contractions
 115 # (where the prefixes are deliberately not also contractions).
 116 # When there is no complete match, then the matching code must back out of those
 117 # so that discontiguous contractions work as specified.
 118
 119 * compare
 120 # contraction starter with no following text, or mismatch, or blocked
 121 <1 a
 122 =  ⓐ
 123 <1 aa
 124 =  ⓐⓐ
 125 <1 ab
 126 =  ⓐb
 127 <1 az
 128 =  ⓐz
 129
 130 * compare
 131 <1 a
 132 <2 a\u0308\u030a  # ring blocked by diaeresis
 133 =  ⓐ❸❷
 134 <2 a\u0327
 135 =  ⓐ❺
 136
 137 * compare
 138 <2 \u0308
 139 =  ❸
 140 <2 \u0308\u030a\u0301  # acute blocked by ring
 141 =  ❸❷❶
 142
 143 * compare
 144 <1 \U0001D158
 145 =  ⁰
 146 <1 \U0001D158\U0001D165
 147 =  ¼
 148
 149 # no discontiguous contraction because of missing prefix contraction d+z,
 150 # and a starter ('z') after the 'd'
 151 * compare
 152 <1 dz\u0323\u0301
 153 =  dz❻❶
 154
 155 # contiguous contractions
 156 * compare
 157 <1 abz
 158 =  ⓐⓑ
 159 <1 abzz
 160 =  ⓐⓑz
 161
 162 * compare
 163 <1 a
 164 <1 z
 165 <1 a\u0301
 166 =  Ⓐ
 167 <1 a\u0301\u0301
 168 =  Ⓑ
 169 <1 a\u0301\u0301\u0358
 170 =  Ⓒ
 171 <1 a\u030a
 172 =  Ⓓ
 173 <1 a\u0323\u0358
 174 =  Ⓕ
 175 <1 a\u0327\u0323\u030a  # match despite missing prefix
 176 =  Ⓖ
 177 <1 a\u0327\u0323bz
 178 =  Ⓗ
 179
 180 * compare
 181 <2 \u0308\u0308\u0301  # acute blocked from first diaeresis, contracts with second
 182 =  ❸❹
 183
 184 * compare
 185 <1 \U0001D158\U0001D165
 186 =  ¼
 187
 188 * compare
 189 <3 \U0001D165\U0001D16D
 190 =  ³
 191
 192 ** test: discontiguous contractions
 193 * compare
 194 <1 a\u0327\u030a                # a+ring skips cedilla
 195 =  Ⓓ❺
 196 <2 a\u0327\u0327\u030a          # a+ring skips 2 cedillas
 197 =  Ⓓ❺❺
 198 <2 a\u0327\u0327\u0327\u030a    # a+ring skips 3 cedillas
 199 =  Ⓓ❺❺❺
 200 <2 a\u0334\u0327\u0327\u030a    # a+ring skips tilde overlay & 2 cedillas
 201 =  Ⓓ❾❺❺
 202 <1 a\u0327\u0323                # a+dot below skips cedilla
 203 =  Ⓔ❺
 204 <1 a\u0323\u0301\u0358          # a+dot below+dot ab.r.: 2-char match, then skips acute
 205 =  Ⓕ❶
 206 <2 a\u0334\u0323\u0358          # a+dot below skips tilde overlay
 207 =  Ⓕ❾
 208
 209 * compare
 210 <2 \u0331\u0331\u0358           # macron below+dot ab.r. skips the second macron below
 211 =  ❽❼
 212
 213 * compare
 214 <1 a\u0327\u0331\u0323\u030a    # a+ring skips cedilla, macron below, dot below (dot blocked by macron)
 215 =  Ⓓ❺❼❻
 216 <1 a\u0327\u0323\U0001D16D\u030a  # a+dot below skips cedilla
 217 =  Ⓔ❺²❷
 218 <2 a\u0327\u0327\u0323\u030a    # a+dot below skips 2 cedillas
 219 =  Ⓔ❺❺❷
 220 <2 a\u0327\u0323\u0323\u030a    # a+dot below skips cedilla
 221 =  Ⓔ❺❻❷
 222 <2 a\u0334\u0327\u0323\u030a    # a+dot below skips tilde overlay & cedilla
 223 =  Ⓔ❾❺❷
 224
 225 * compare
 226 <1 \U0001D158\u0327\U0001D165   # quarter note skips cedilla
 227 =  ¼❺
 228 <1 a\U0001D165\u0323            # a+dot below skips stem
 229 =  Ⓔ¹
 230
 231 # partial contiguous match, backs up, matches discontiguous contraction
 232 <1 a\u0327\u0323b
 233 =  Ⓔ❺b
 234 <1 a\u0327\u0323ba
 235 =  Ⓔ❺bⓐ
 236
 237 # a+acute+acute+dot above right skips cedilla, continues matching 2 same-ccc combining marks
 238 * compare
 239 <1 a\u0327\u0301\u0301\u0358
 240 =  Ⓒ❺
 241
 242 # FCD but not NFD
 243 * compare
 244 <1 a\u0f73\u0301                # a+acute skips tibetan ii
 245 =  Ⓐ③
 246
 247 # FCD but the 0f71 inside the 0f73 must be skipped
 248 # to match the discontiguous contraction of the first 0f71 with the trailing 0f72 inside the 0f73
 249 * compare
 250 <1 \u0f71\u0f73                 # == \u0f73\u0f71 == \u0f71\u0f71\u0f72
 251 =  ③①
 252
 253 ** test: discontiguous contractions with nested contractions
 254 * compare
 255 <1 a\u0323\u0308\u0301\u0358
 256 =  Ⓕ❹
 257 <2 a\u0323\u0308\u0301\u0308\u0301\u0358
 258 =  Ⓕ❹❹
 259
 260 ** test: discontiguous contractions with interleaved contractions
 261 * compare
 262 # a+ring & cedilla & macron below+dot above right
 263 <1 a\u0327\u0331\u030a\u0358
 264 =  Ⓓ❺❽
 265
 266 # a+ring & 1x..3x macron below+dot above right
 267 <2 a\u0331\u030a\u0358
 268 =  Ⓓ❽
 269 <2 a\u0331\u0331\u030a\u0358\u0358
 270 =  Ⓓ❽❽
 271 # also skips acute
 272 <2 a\u0331\u0331\u0331\u030a\u0301\u0358\u0358\u0358
 273 =  Ⓓ❽❽❽❶
 274
 275 # a+dot below & stem+augmentation dot, followed by contiguous d+z+acute
 276 <1 a\U0001D165\u0323\U0001D16Ddz\u0301
 277 =  Ⓔ³ⓓ
 278
 279 ** test: some simple string comparisons
 280 @ root
 281 * compare
 282 # first string compares against ""
 283 = \u0000
 284 < a
 285 <1 b
 286 <3 B
 287 = \u0000B\u0000
 288
 289 ** test: compare with strength=primary
 290 % strength=primary
 291 * compare
 292 <1 a
 293 <1 b
 294 = B
 295
 296 ** test: compare with strength=secondary
 297 % strength=secondary
 298 * compare
 299 <1 a
 300 <1 b
 301 = B
 302
 303 ** test: compare with strength=tertiary
 304 % strength=tertiary
 305 * compare
 306 <1 a
 307 <1 b
 308 <3 B
 309
 310 ** test: compare with strength=quaternary
 311 % strength=quaternary
 312 * compare
 313 <1 a
 314 <1 b
 315 <3 B
 316
 317 ** test: compare with strength=identical
 318 % strength=identical
 319 * compare
 320 <1 a
 321 <1 b
 322 <3 B
 323
 324 ** test: côté with forwards secondary
 325 @ root
 326 * compare
 327 <1 cote
 328 <2 coté
 329 <2 côte
 330 <2 côté
 331
 332 ** test: côté with forwards secondary vs. U+FFFE merge separator
 333 # Merged sort keys: On each level, any difference in the first segment
 334 # must trump any further difference.
 335 * compare
 336 <1 cote\uFFFEcôté
 337 <2 coté\uFFFEcôte
 338 <2 côte\uFFFEcoté
 339 <2 côté\uFFFEcote
 340
 341 ** test: côté with backwards secondary
 342 % backwards=on
 343 * compare
 344 <1 cote
 345 <2 côte
 346 <2 coté
 347 <2 côté
 348
 349 ** test: côté with backwards secondary vs. U+FFFE merge separator
 350 # Merged sort keys: On each level, any difference in the first segment
 351 # must trump any further difference.
 352 * compare
 353 <1 cote\uFFFEcôté
 354 <2 côte\uFFFEcoté
 355 <2 coté\uFFFEcôte
 356 <2 côté\uFFFEcote
 357
 358 ** test: U+FFFE on identical level
 359 @ root
 360 % strength=identical
 361 * compare
 362 # All of these control codes are completely-ignorable, so that
 363 # their low code points are compared with the merge separator.
 364 # The merge separator must compare less than any other character.
 365 <1 \uFFFE\u0001\u0002\u0003
 366 <i \u0001\uFFFE\u0002\u0003
 367 <i \u0001\u0002\uFFFE\u0003
 368 <i \u0001\u0002\u0003\uFFFE
 369
 370 * compare
 371 # The merge separator must even compare less than U+0000.
 372 <1 \uFFFE\u0000\u0000
 373 <i \u0000\uFFFE\u0000
 374 <i \u0000\u0000\uFFFE
 375
 376 ** test: Hani < surrogates < U+FFFD
 377 # Note: compareUTF8() treats unpaired surrogates like U+FFFD,
 378 # so with that the strings with surrogates will compare equal to each other
 379 # and equal to the string with U+FFFD.
 380 @ root
 381 % strength=identical
 382 * compare
 383 <1 abz
 384 <1 a\u4e00z
 385 <1 a\U00020000z
 386 <1 a\ud800z
 387 <1 a\udbffz
 388 <1 a\udc00z
 389 <1 a\udfffz
 390 <1 a\ufffdz
 391
 392 ** test: script reordering
 393 @ root
 394 % reorder Hani Zzzz digit
 395 * compare
 396 <1 ?
 397 <1 +
 398 <1 丂
 399 <1 a
 400 <1 α
 401 <1 5
 402
 403 % reorder default
 404 * compare
 405 <1 ?
 406 <1 +
 407 <1 5
 408 <1 a
 409 <1 α
 410 <1 丂
 411
 412 ** test: empty rules
 413 @ rules
 414 * compare
 415 <1 a
 416 <2 ä
 417 <3 Ä
 418 <1 b
 419
 420 ** test: very simple rules
 421 @ rules
 422 &a=e<<<<q<<<<r<x<<<X<<y<<<Y;z,Z
 423 % strength=quaternary
 424 * compare
 425 <1 a
 426 =  e
 427 <4 q
 428 <4 r
 429 <1 x
 430 <3 X
 431 <2 y
 432 <3 Y
 433 <2 z
 434 <3 Z
 435
 436 ** test: tailoring twice before a root position: primary
 437 @ rules
 438 &[before 1]b<p
 439 &[before 1]b<q
 440 * compare
 441 <1 a
 442 <1 p
 443 <1 q
 444 <1 b
 445
 446 ** test: tailoring twice before a root position: secondary
 447 @ rules
 448 &[before 2]ſ<<p
 449 &[before 2]ſ<<q
 450 * compare
 451 <1 s
 452 <2 p
 453 <2 q
 454 <2 ſ
 455
 456 # secondary-before common weight
 457 @ rules
 458 &[before 2]b<<p
 459 &[before 2]b<<q
 460 * compare
 461 <1 a
 462 <1 p
 463 <2 q
 464 <2 b
 465
 466 ** test: tailoring twice before a root position: tertiary
 467 @ rules
 468 &[before 3]B<<<p
 469 &[before 3]B<<<q
 470 * compare
 471 <1 b
 472 <3 p
 473 <3 q
 474 <3 B
 475
 476 # tertiary-before common weight
 477 @ rules
 478 &[before 3]b<<<p
 479 &[before 3]b<<<q
 480 * compare
 481 <1 a
 482 <1 p
 483 <3 q
 484 <3 b
 485
 486 @ rules
 487 &[before 2]b<<s
 488 &[before 3]s<<<p
 489 &[before 3]s<<<q
 490 * compare
 491 <1 a
 492 <1 p
 493 <3 q
 494 <3 s
 495 <2 b
 496
 497 ** test: tailor after completely ignorable
 498 @ rules
 499 &\x00<<<x<<y
 500 * compare
 501 = \x00
 502 = \x1F
 503 <3 x
 504 <2 y
 505
 506 ** test: secondary tailoring gaps, ICU ticket 9362
 507 @ rules
 508 &[before 2]s<<'_'
 509 &s<<r  # secondary between s and ſ (long s)
 510 &ſ<<*a-q  # more than 15 between ſ and secondary CE boundary
 511 &[before 2][first primary ignorable]<<u<<v  # between secondary CE boundary & lowest secondary CE
 512 &[last primary ignorable]<<y<<z
 513
 514 * compare
 515 <2 u
 516 <2 v
 517 <2 \u0332  # lowest secondary CE
 518 <2 \u0308
 519 <2 y
 520 <2 z
 521 <1 s_
 522 <2 ss
 523 <2 sr
 524 <2 sſ
 525 <2 sa
 526 <2 sb
 527 <2 sp
 528 <2 sq
 529 <2 sus
 530 <2 svs
 531 <2 rs
 532
 533 ** test: tertiary tailoring gaps, ICU ticket 9362
 534 @ rules
 535 &[before 3]t<<<'_'
 536 &t<<<r  # tertiary between t and fullwidth t
 537 &ᵀ<<<*a-q  # more than 15 between ᵀ (modifier letter T) and tertiary CE boundary
 538 &[before 3][first secondary ignorable]<<<u<<<v  # between tertiary CE boundary & lowest tertiary CE
 539 &[last secondary ignorable]<<<y<<<z
 540
 541 * compare
 542 <3 u
 543 <3 v
 544 # Note: The root collator currently does not map any characters to tertiary CEs.
 545 <3 y
 546 <3 z
 547 <1 t_
 548 <3 tt
 549 <3 tr
 550 <3 tｔ
 551 <3 tᵀ
 552 <3 ta
 553 <3 tb
 554 <3 tp
 555 <3 tq
 556 <3 tut
 557 <3 tvt
 558 <3 rt
 559
 560 ** test: secondary & tertiary around root character
 561 @ rules
 562 &[before 2]m<<r
 563 &m<<s
 564 &[before 3]m<<<u
 565 &m<<<v
 566 * compare
 567 <1 l
 568 <1 r
 569 <2 u
 570 <3 m
 571 <3 v
 572 <2 s
 573 <1 n
 574
 575 ** test: secondary & tertiary around tailored item
 576 @ rules
 577 &m<x
 578 &[before 2]x<<r
 579 &x<<s
 580 &[before 3]x<<<u
 581 &x<<<v
 582 * compare
 583 <1 m
 584 <1 r
 585 <2 u
 586 <3 x
 587 <3 v
 588 <2 s
 589 <1 n
 590
 591 ** test: more nesting of secondary & tertiary before
 592 @ rules
 593 &[before 3]m<<<u
 594 &[before 2]m<<r
 595 &[before 3]r<<<q
 596 &m<<<w
 597 &m<<t
 598 &[before 3]w<<<v
 599 &w<<<x
 600 &w<<s
 601 * compare
 602 <1 l
 603 <1 q
 604 <3 r
 605 <2 u
 606 <3 m
 607 <3 v
 608 <3 w
 609 <3 x
 610 <2 s
 611 <2 t
 612 <1 n
 613
 614 ** test: case bits
 615 @ rules
 616 &w<x  # tailored CE getting case bits
 617   =uv=uV=Uv=UV  # 2 chars -> 1 CE
 618 &ae=ch=cH=Ch=CH  # 2 chars -> 2 CEs
 619 &rst=yz=yZ=Yz=YZ  # 2 chars -> 3 CEs
 620 % caseFirst=lower
 621 * compare
 622 <1 ae
 623 =  ch
 624 <3 cH
 625 <3 Ch
 626 <3 CH
 627 <1 rst
 628 =  yz
 629 <3 yZ
 630 <3 Yz
 631 <3 YZ
 632 <1 w
 633 <1 x
 634 =  uv
 635 <3 uV
 636 =  Uv  # mixed case on single CE cannot distinguish variations
 637 <3 UV
 638
 639 ** test: tertiary CEs, tertiary, caseLevel=off, caseFirst=lower
 640 @ rules
 641 &\u0001<<<t<<<T  # tertiary CEs
 642 % caseFirst=lower
 643 * compare
 644 <1 aa
 645 <3 aat
 646 <3 aaT
 647 <3 aA
 648 <3 aAt
 649 <3 ata
 650 <3 aTa
 651
 652 ** test: tertiary CEs, tertiary, caseLevel=off, caseFirst=upper
 653 % caseFirst=upper
 654 * compare
 655 <1 aA
 656 <3 aAt
 657 <3 aa
 658 <3 aat
 659 <3 aaT
 660 <3 ata
 661 <3 aTa
 662
 663 ** test: reset on expansion, ICU tickets 9415 & 9593
 664 @ rules
 665 &æ<x    # tailor the last primary CE so that x sorts between ae and af
 666 &æb=bæ  # copy all reset CEs to make bæ sort the same
 667 &각<h    # copy/tailor 3 CEs to make h sort before the next Hangul syllable 갂
 668 &⒀<<y   # copy/tailor 4 CEs to make y sort with only a secondary difference
 669 &l·=z   # handle the pre-context for · when fetching reset CEs
 670    <<u  # copy/tailor 2 CEs
 671
 672 * compare
 673 <1 ae
 674 <2 æ
 675 <1 x
 676 <1 af
 677
 678 * compare
 679 <1 aeb
 680 <2 æb
 681 =  bæ
 682
 683 * compare
 684 <1 각
 685 <1 h
 686 <1 갂
 687 <1 갃
 688
 689 * compare
 690 <1 ·    # by itself: primary CE
 691 <1 l
 692 <2 l·   # l+middle dot has only a secondary difference from l
 693 =  z
 694 <2 u
 695
 696 * compare
 697 <1 (13)
 698 <3 ⒀  # DUCET sets special tertiary weights in all CEs
 699 <2 y
 700 <1 (13[
 701
 702 % alternate=shifted
 703 * compare
 704 <1 (13)
 705 =  13
 706 <3 ⒀
 707 =  y  # alternate=shifted removes the tailoring difference on the last CE
 708 <1 14
 709
 710 ** test: contraction inside extension, ICU ticket 9378
 711 @ rules
 712 &а<<х/й     # all letters are Cyrillic
 713 * compare
 714 <1 ай
 715 <2 х
 716
 717 ** test: no duplicate tailored CEs for different reset positions with same CEs, ICU ticket 10104
 718 @ rules
 719 &t<x &ᵀ<y           # same primary weights
 720 &q<u &[before 1]ꝗ<v # q and ꝗ are primary adjacent
 721 * compare
 722 <1 q
 723 <1 u
 724 <1 v
 725 <1 ꝗ
 726 <1 t
 727 <3 ᵀ
 728 <1 y
 729 <1 x
 730
 731 # Principle: Each rule builds on the state of preceding rules and ignores following rules.
 732
 733 ** test: later rule does not affect earlier reset position, ICU ticket 10105
 734 @ rules
 735 &a < u < v < w  &ov < x  &b < v
 736 * compare
 737 <1 oa
 738 <1 ou
 739 <1 x    # CE(o) followed by CE between u and w
 740 <1 ow
 741 <1 ob
 742 <1 ov
 743
 744 ** test: later rule does not affect earlier extension (1), ICU ticket 10105
 745 @ rules
 746 &a=x/b &v=b
 747 % strength=secondary
 748 * compare
 749 <1 B
 750 <1 c
 751 <1 v
 752 =  b
 753 * compare
 754 <1 AB
 755 =  x
 756 <1 ac
 757 <1 av
 758 =  ab
 759
 760 ** test: later rule does not affect earlier extension (2), ICU ticket 10105
 761 @ rules
 762 &a <<< c / e &g <<< e / l
 763 % strength=secondary
 764 * compare
 765 <1 AE
 766 =  c
 767 <2 æ
 768 <1 agl
 769 =  ae
 770
 771 ** test: later rule does not affect earlier extension (3), ICU ticket 10105
 772 @ rules
 773 &a = b / c  &d = c / e
 774 % strength=secondary
 775 * compare
 776 <1 AC  # C is still only tertiary different from the original c
 777 =  b
 778 <1 ade
 779 =  ac
 780
 781 ** test: extension contains tailored character, ICU ticket 10105
 782 @ rules
 783 &a=e &b=u/e
 784 * compare
 785 <1 a
 786 =  e
 787 <1 ba
 788 =  be
 789 =  u
 790
 791 ** test: add simple mappings for characters with root context
 792 @ rules
 793 &z=·    # middle dot has a prefix mapping in the CLDR root
 794 &n=и    # и (U+0438) has contractions in the root
 795 * compare
 796 <1 l
 797 <2 l·   # root mapping for l|· still works
 798 <1 z
 799 =  ·
 800 * compare
 801 <1 n
 802 =  и
 803 <1 И
 804 <1 и\u0306  # root mapping for й=и\u0306 still works
 805 =  й
 806 <3 Й
 807
 808 ** test: add context mappings around characters with root context
 809 @ rules
 810 &z=·h   # middle dot has a prefix mapping in the CLDR root
 811 &n=ә|и  # и (U+0438) has contractions in the root
 812 * compare
 813 <1 l
 814 <2 l·   # root mapping for l|· still works
 815 <1 z
 816 =  ·h
 817 * compare
 818 <1 и
 819 <3 И
 820 <1 и\u0306  # root mapping for й=и\u0306 still works
 821 =  й
 822 * compare
 823 <1 әn
 824 =  әи
 825 <1 әo
 826
 827 ** test: many secondary CEs at the top of their range
 828 @ rules
 829 &[last primary ignorable]<<*\u2801-\u28ff
 830 * compare
 831 <2 \u0308
 832 <2 \u2801
 833 <2 \u2802
 834 <2 \u2803
 835 <2 \u2804
 836 <2 \u28fd
 837 <2 \u28fe
 838 <2 \u28ff
 839 <1 \x20
 840
 841 ** test: many tertiary CEs at the top of their range
 842 @ rules
 843 &[last secondary ignorable]<<<*a-z
 844 * compare
 845 <3 a
 846 <3 b
 847 <3 c
 848 <3 d
 849 # e..w
 850 <3 x
 851 <3 y
 852 <3 z
 853 <2 \u0308
 854
 855 ** test: tailor contraction together with nearly equivalent prefix, ICU ticket 10101
 856 @ rules
 857 &a=p|x &b=px &c=op
 858 * compare
 859 <1 b
 860 =  px
 861 <3 B
 862 <1 c
 863 =  op
 864 <3 C
 865 * compare
 866 <1 ca
 867 =  opx  # first contraction op, then prefix p|x
 868 <3 cA
 869 <3 Ca
 870
 871 ** test: reset position with prefix (pre-context), ICU ticket 10102
 872 @ rules
 873 &a=p|x &px=y
 874 * compare
 875 <1 pa
 876 =  px
 877 =  y
 878 <3 pA
 879 <1 q
 880 <1 x
 881
 882 ** test: prefix+contraction together (1), ICU ticket 10071
 883 @ rules
 884 &x=a|bc
 885 * compare
 886 <1 ab
 887 <1 Abc
 888 <1 abd
 889 <1 ac
 890 <1 aw
 891 <1 ax
 892 =  abc
 893 <3 aX
 894 <3 Ax
 895 <1 b
 896 <1 bb
 897 <1 bc
 898 <3 bC
 899 <3 Bc
 900 <1 bd
 901
 902 ** test: prefix+contraction together (2), ICU ticket 10071
 903 @ rules
 904 &w=bc &x=a|b
 905 * compare
 906 <1 w
 907 =  bc
 908 <3 W
 909 * compare
 910 <1 aw
 911 <1 ax
 912 =  ab
 913 <3 aX
 914 <1 axb
 915 <1 axc
 916 =  abc  # prefix match a|b takes precedence over contraction match bc
 917 <3 abC
 918 <1 abd
 919 <1 ay
 920
 921 ** test: prefix+contraction together (3), ICU ticket 10071
 922 @ rules
 923 &x=a|b &w=bc    # reverse order of rules as previous test, order should not matter here
 924 * compare       # same "compare" sequences as previous test
 925 <1 w
 926 =  bc
 927 <3 W
 928 * compare
 929 <1 aw
 930 <1 ax
 931 =  ab
 932 <3 aX
 933 <1 axb
 934 <1 axc
 935 =  abc  # prefix match a|b takes precedence over contraction match bc
 936 <3 abC
 937 <1 abd
 938 <1 ay
 939
 940 ** test: no mapping p|c, falls back to contraction ch, CLDR ticket 5962
 941 @ rules
 942 &d=ch &v=p|ci
 943 * compare
 944 <1 pc
 945 <3 pC
 946 <1 pcH
 947 <1 pcI
 948 <1 pd
 949 =  pch  # no-prefix contraction ch matches
 950 <3 pD
 951 <1 pv
 952 =  pci  # prefix+contraction p|ci matches
 953 <3 pV
 954
 955 ** test: tailor in & around compact ranges of root primaries
 956 # The Ogham characters U+1681..U+169A are in simple ascending order of primary CEs
 957 # which should be reliably encoded as one range in the root elements data.
 958 @ rules
 959 &[before 1]ᚁ<a
 960 &ᚁ<b
 961 &[before 1]ᚂ<c
 962 &ᚂ<d
 963 &[before 1]ᚚ<y
 964 &ᚚ<z
 965 &[before 2]ᚁ<<r
 966 &ᚁ<<s
 967 &[before 3]ᚚ<<<t
 968 &ᚚ<<<u
 969 * compare
 970 <1 ᣵ    # U+18F5 last Canadian Aboriginal
 971 <1 a
 972 <1 r
 973 <2 ᚁ
 974 <2 s
 975 <1 b
 976 <1 c
 977 <1 ᚂ
 978 <1 d
 979 <1 ᚃ
 980 <1 ᚙ
 981 <1 y
 982 <1 t
 983 <3 ᚚ
 984 <3 u
 985 <1 z
 986 <1 ᚠ    # U+16A0 first Runic
 987
 988 ** test: suppressContractions
 989 @ rules
 990 &z<ch<әж [suppressContractions [·cә]]
 991 * compare
 992 <1 ch
 993 <3 cH   # ch was suppressed
 994 <1 l
 995 <1 l·   # primary difference, not secondary, because l|· was suppressed
 996 <1 ә
 997 <2 ә\u0308  # secondary difference, not primary, because contractions for ә were suppressed
 998 <1 әж
 999 <3 әЖ
1000
1001 ** test: Hangul & Jamo
1002 @ rules
1003 &L=\u1100  # first Jamo L
1004 &V=\u1161  # first Jamo V
1005 &T=\u11A8  # first Jamo T
1006 &\uAC01<<*\u4E00-\u4EFF  # first Hangul LVT syllable & lots of secondary diffs
1007 * compare
1008 <1 Lv
1009 <3 LV
1010 =  \u1100\u1161
1011 =  \uAC00
1012 <1 LVt
1013 <3 LVT
1014 =  \u1100\u1161\u11A8
1015 =  \uAC00\u11A8
1016 =  \uAC01
1017 <2 LVT\u0308
1018 <2 \u4E00
1019 <2 \u4E01
1020 <2 \u4E80
1021 <2 \u4EFF
1022 <2 LV\u0308T
1023 <1 \uAC02
1024
1025 ** test: adjust special reset positions according to previous rules, CLDR ticket 6070
1026 @ rules
1027 &[last variable]<x
1028 [maxVariable space]  # has effect only after building, no effect on following rules
1029 &[last variable]<y
1030 &[before 1][first regular]<z
1031 * compare
1032 <1 ?  # some punctuation
1033 <1 x
1034 <1 y
1035 <1 z
1036 <1 $  # some symbol
1037
1038 @ rules
1039 &[last primary ignorable]<<x<<<y
1040 &[last primary ignorable]<<z
1041 * compare
1042 <2 \u0358
1043 <2 x
1044 <3 y
1045 <2 z
1046 <1 \x20
1047
1048 @ rules
1049 &[last secondary ignorable]<<<x
1050 &[last secondary ignorable]<<<y
1051 * compare
1052 <3 x
1053 <3 y
1054 <2 \u0358
1055
1056 @ rules
1057 &[before 2][first variable]<<z
1058 &[before 2][first variable]<<y
1059 &[before 3][first variable]<<<x
1060 &[before 3][first variable]<<<w
1061 &[before 1][first variable]<v
1062 &[before 2][first variable]<<u
1063 &[before 3][first variable]<<<t
1064 &[before 2]\uFDD1\xA0<<s  # FractionalUCA.txt: FDD1 00A0, SPACE first primary
1065 * compare
1066 <2 \u0358
1067 <1 s
1068 <2 \uFDD1\xA0
1069 <1 t
1070 <3 u
1071 <2 v
1072 <1 w
1073 <3 x
1074 <3 y
1075 <2 z
1076 <2 \t
1077
1078 @ rules
1079 &[before 2][first regular]<<z
1080 &[before 3][first regular]<<<y
1081 &[before 1][first regular]<x
1082 &[before 3][first regular]<<<w
1083 &[before 2]\uFDD1\u263A<<v  # FractionalUCA.txt: FDD1 263A, SYMBOL first primary
1084 &[before 3][first regular]<<<u
1085 &[before 1][first regular]<p  # primary before the boundary: becomes variable
1086 &[before 3][first regular]<<<t  # not affected by p
1087 &[last variable]<q              # after p!
1088 * compare
1089 <1 ?
1090 <1 p
1091 <1 q
1092 <1 t
1093 <3 u
1094 <3 v
1095 <1 w
1096 <3 x
1097 <1 y
1098 <3 z
1099 <1 $
1100
1101 # check that p & q are indeed variable
1102 % alternate=shifted
1103 * compare
1104 =  ?
1105 =  p
1106 =  q
1107 <1 t
1108 <3 u
1109 <3 v
1110 <1 w
1111 <3 x
1112 <1 y
1113 <3 z
1114 <1 $
1115
1116 @ rules
1117 &[before 2][first trailing]<<z
1118 &[before 1][first trailing]<y
1119 &[before 3][first trailing]<<<x
1120 * compare
1121 <1 \u4E00  # first Han, first implicit
1122 <1 \uFDD1\uFDD0  # FractionalUCA.txt: unassigned first primary
1123 # Note: The root collator currently does not map any characters to the trailing first boundary primary.
1124 <1 x
1125 <3 y
1126 <1 z
1127 <2 \uFFFD  # The root collator currently maps U+FFFD to the first real trailing primary.
1128
1129 @ rules
1130 &[before 2][first primary ignorable]<<z
1131 &[before 2][first primary ignorable]<<y
1132 &[before 3][first primary ignorable]<<<x
1133 &[before 3][first primary ignorable]<<<w
1134 * compare
1135 =  \x01
1136 <2 w
1137 <3 x
1138 <3 y
1139 <2 z
1140 <2 \u0301
1141
1142 @ rules
1143 &[before 3][first secondary ignorable]<<<y
1144 &[before 3][first secondary ignorable]<<<x
1145 * compare
1146 =  \x01
1147 <3 x
1148 <3 y
1149 <2 \u0301
1150
1151 ** test: canonical closure
1152 @ rules
1153 &X=A &U=Â
1154 * compare
1155 <1 U
1156 =  Â
1157 =  A\u0302
1158 <2 Ú  # U with acute
1159 =  U\u0301
1160 =  Ấ  # A with circumflex & acute
1161 =  Â\u0301
1162 =  A\u0302\u0301
1163 <1 X
1164 =  A
1165 <2 X\u030A  # with ring above
1166 =  Å
1167 =  A\u030A
1168 =  \u212B  # Angstrom sign
1169
1170 @ rules
1171 &x=\u5140\u55C0
1172 * compare
1173 <1 x
1174 =  \u5140\u55C0
1175 =  \u5140\uFA0D
1176 =  \uFA0C\u55C0
1177 =  \uFA0C\uFA0D  # CJK compatibility characters
1178 <3 X
1179
1180 # canonical closure on prefix rules, ICU ticket 9444
1181 @ rules
1182 &x=ä|ŝ
1183 * compare
1184 <1 äs  # not tailored
1185 <1 äx
1186 =  äŝ
1187 =  a\u0308s\u0302
1188 =  a\u0308ŝ
1189 =  äs\u0302
1190 <3 äX
1191
1192 ** test: conjoining Jamo map to expansions
1193 @ rules
1194 &gg=\u1101  # Jamo Lead consonant GG
1195 &nj=\u11AC  # Jamo Trail consonant NJ
1196 * compare
1197 <1 gg\u1161nj
1198 =  \u1101\u1161\u11AC
1199 =  \uAE4C\u11AC
1200 =  \uAE51
1201 <3 gg\u1161nJ
1202 <1 \u1100\u1100
1203
1204 ** test: canonical tail closure, ICU ticket 5913
1205 @ rules
1206 &a<â
1207 * compare
1208 <1 a
1209 <1 â              # tailored
1210 =  a\u0302
1211 <2 a\u0323\u0302  # discontiguous contraction
1212 =  ạ\u0302        # equivalent
1213 =  ậ              # equivalent
1214 <1 b
1215
1216 @ rules
1217 &a<ạ
1218 * compare
1219 <1 a
1220 <1 ạ              # tailored
1221 =  a\u0323
1222 <2 a\u0323\u0302  # contiguous contraction plus extra diacritic
1223 =  ạ\u0302        # equivalent
1224 =  ậ              # equivalent
1225 <1 b
1226
1227 # Tail closure should work even if there is a prefix and/or contraction.
1228 @ rules
1229 &a<\u5140|câ
1230 # In order to find discontiguous contractions for \u5140|câ
1231 # there must exist a mapping for \u5140|ca, regardless of what it maps to.
1232 # (This follows from the UCA spec.)
1233 &x=\u5140|ca
1234 * compare
1235 <1 \u5140a
1236 =  \uFA0Ca
1237 <1 \u5140câ              # tailored
1238 =  \uFA0Ccâ
1239 =  \u5140ca\u0302
1240 =  \uFA0Cca\u0302
1241 <2 \u5140ca\u0323\u0302  # discontiguous contraction
1242 =  \uFA0Cca\u0323\u0302
1243 =  \u5140cạ\u0302
1244 =  \uFA0Ccạ\u0302
1245 =  \u5140cậ
1246 =  \uFA0Ccậ
1247 <1 \u5140b
1248 =  \uFA0Cb
1249 <1 \u5140x
1250 =  \u5140ca
1251
1252 # Double-check that without the extra mapping there will be no discontiguous match.
1253 @ rules
1254 &a<\u5140|câ
1255 * compare
1256 <1 \u5140a
1257 =  \uFA0Ca
1258 <1 \u5140câ              # tailored
1259 =  \uFA0Ccâ
1260 =  \u5140ca\u0302
1261 =  \uFA0Cca\u0302
1262 <1 \u5140b
1263 =  \uFA0Cb
1264 <1 \u5140ca\u0323\u0302  # no discontiguous contraction
1265 =  \uFA0Cca\u0323\u0302
1266 =  \u5140cạ\u0302
1267 =  \uFA0Ccạ\u0302
1268 =  \u5140cậ
1269 =  \uFA0Ccậ
1270
1271 @ rules
1272 &a<cạ
1273 * compare
1274 <1 a
1275 <1 cạ              # tailored
1276 =  ca\u0323
1277 <2 ca\u0323\u0302  # contiguous contraction plus extra diacritic
1278 =  cạ\u0302        # equivalent
1279 =  cậ              # equivalent
1280 <1 b
1281
1282 # ᾢ = U+1FA2 GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1283 #   = 03C9 0313 0300 0345
1284 # ccc = 0, 230, 230, 240
1285 @ rules
1286 &δ=αῳ
1287 # In order to find discontiguous contractions for αῳ
1288 # there must exist a mapping for αω, regardless of what it maps to.
1289 # (This follows from the UCA spec.)
1290 &ε=αω
1291 * compare
1292 <1 δ
1293 =  αῳ
1294 =  αω\u0345
1295 <2 αω\u0313\u0300\u0345  # discontiguous contraction
1296 =  αὠ\u0300\u0345
1297 =  αὢ\u0345
1298 =  αᾢ
1299 <2 αω\u0300\u0313\u0345
1300 =  αὼ\u0313\u0345
1301 =  αῲ\u0313  # not FCD
1302 <1 ε
1303 =  αω
1304
1305 # Double-check that without the extra mapping there will be no discontiguous match.
1306 @ rules
1307 &δ=αῳ
1308 * compare
1309 <1 αω\u0313\u0300\u0345  # no discontiguous contraction
1310 =  αὠ\u0300\u0345
1311 =  αὢ\u0345
1312 =  αᾢ
1313 <2 αω\u0300\u0313\u0345
1314 =  αὼ\u0313\u0345
1315 =  αῲ\u0313  # not FCD
1316 <1 δ
1317 =  αῳ
1318 =  αω\u0345
1319
1320 # Add U+0315 COMBINING COMMA ABOVE RIGHT which has ccc=232.
1321 # Tests code paths where the tailored string has a combining mark
1322 # that does not occur in any composite's decomposition.
1323 @ rules
1324 &δ=αὼ\u0315
1325 * compare
1326 <1 αω\u0313\u0300\u0315  # Not tailored: The grave accent blocks the comma above.
1327 =  αὠ\u0300\u0315
1328 =  αὢ\u0315
1329 <1 δ
1330 =  αὼ\u0315
1331 =  αω\u0300\u0315
1332 <2 αω\u0300\u0315\u0345
1333 =  αὼ\u0315\u0345
1334 =  αῲ\u0315  # not FCD
1335
1336 ** test: danish a+a vs. a-umlaut, ICU ticket 9319
1337 @ rules
1338 &z<aa
1339 * compare
1340 <1 z
1341 <1 aa
1342 <2 aa\u0308
1343 =  aä
1344
1345 ** test: Jamo L with and in prefix
1346 # Useful for the Korean "searchjl" tailoring (instead of contractions of pairs of Jamo L).
1347 @ rules
1348 # Jamo Lead consonant G after G or GG
1349 &[last primary ignorable]<<\u1100|\u1100=\u1101|\u1100
1350 # Jamo Lead consonant GG sorts like G+G
1351 &\u1100\u1100=\u1101
1352 # Note: Making G|GG and GG|GG sort the same as G|G+G
1353 # would require the ability to reset on G|G+G,
1354 # or we could make G-after-G equal to some secondary-CE character,
1355 # and reset on a pair of those.
1356 # (It does not matter much if there are at most two G in a row in real text.)
1357 * compare
1358 <1 \u1100
1359 <2 \u1100\u1100  # only one primary from a sequence of G lead consonants
1360 =  \u1101
1361 <2 \u1100\u1100\u1100
1362 =  \u1101\u1100
1363 # but not = \u1100\u1101, see above
1364 <1 \u1100\u1161
1365 =  \uAC00
1366 <2 \u1100\u1100\u1161
1367 =  \u1100\uAC00  # prefix match from the L of the LV syllable
1368 =  \u1101\u1161
1369 =  \uAE4C
1370
1371 ** test: proposed Korean "searchjl" tailoring with prefixes, CLDR ticket 6546
1372 @ rules
1373 # Low secondary CEs for Jamo V & T.
1374 # Note: T should sort before V for proper syllable order.
1375 &\u0332  # COMBINING LOW LINE (first primary ignorable)
1376 <<\u1161<<\u1162
1377
1378 # Korean Jamo lead consonant search rules, part 2:
1379 # Make modern compound L jamo primary equivalent to non-compound forms.
1380
1381 # Secondary CEs for Jamo L-after-L, greater than Jamo V & T.
1382 &\u0313  # COMBINING COMMA ABOVE (second primary ignorable)
1383 =\u1100|\u1100
1384 =\u1103|\u1103
1385 =\u1107|\u1107
1386 =\u1109|\u1109
1387 =\u110C|\u110C
1388
1389 # Compound L Jamo map to equivalent expansions of primary+secondary CE.
1390 &\u1100\u0313=\u1101<<<\u3132  # HANGUL CHOSEONG SSANGKIYEOK, HANGUL LETTER SSANGKIYEOK
1391 &\u1103\u0313=\u1104<<<\u3138  # HANGUL CHOSEONG SSANGTIKEUT, HANGUL LETTER SSANGTIKEUT
1392 &\u1107\u0313=\u1108<<<\u3143  # HANGUL CHOSEONG SSANGPIEUP, HANGUL LETTER SSANGPIEUP
1393 &\u1109\u0313=\u110A<<<\u3146  # HANGUL CHOSEONG SSANGSIOS, HANGUL LETTER SSANGSIOS
1394 &\u110C\u0313=\u110D<<<\u3149  # HANGUL CHOSEONG SSANGCIEUC, HANGUL LETTER SSANGCIEUC
1395
1396 * compare
1397 <1 \u1100\u1161
1398 =  \uAC00
1399 <2 \u1100\u1162
1400 =  \uAC1C
1401 <2 \u1100\u1100\u1161
1402 =  \u1100\uAC00
1403 =  \u1101\u1161
1404 =  \uAE4C
1405 <3 \u3132\u1161
1406
1407 ** test: Hangul syllables in prefix & in the interior of a contraction
1408 @ rules
1409 &x=\u1100\u1161|a\u1102\u1162z
1410 * compare
1411 <1 \u1100\u1161x
1412 =  \u1100\u1161a\u1102\u1162z
1413 =  \u1100\u1161a\uB0B4z
1414 =  \uAC00a\u1102\u1162z
1415 =  \uAC00a\uB0B4z
1416
1417 ** test: digits are unsafe-backwards when numeric=on
1418 @ root
1419 % numeric=on
1420 * compare
1421 # If digits are not unsafe, then numeric collation sees "1"=="01" and "b">"a".
1422 # We need to back up before the identical prefix "1" and compare the full numbers.
1423 <1 11b
1424 <1 101a
1425
1426 ** test: simple locale data test
1427 @ locale de
1428 * compare
1429 <1 a
1430 <2 ä
1431 <1 ae
1432 <2 æ
1433
1434 @ locale de-u-co-phonebk
1435 * compare
1436 <1 a
1437 <1 ae
1438 <2 ä
1439 <2 æ
1440
1441 # The following test cases were moved here from ICU 52's DataDrivenCollationTest.txt.
1442
1443 ** test: DataDrivenCollationTest/TestMorePinyin
1444 # Testing the primary strength.
1445 @ locale zh
1446 % strength=primary
1447 * compare
1448 < lā
1449 = lĀ
1450 = Lā
1451 = LĀ
1452 < lān
1453 = lĀn
1454 < lē
1455 = lĒ
1456 = Lē
1457 = LĒ
1458 < lēn
1459 = lĒn
1460
1461 ** test: DataDrivenCollationTest/TestLithuanian
1462 # Lithuanian sort order.
1463 @ locale lt
1464 * compare
1465 < cz
1466 < č
1467 < d
1468 < iz
1469 < j
1470 < sz
1471 < š
1472 < t
1473 < zz
1474 < ž
1475
1476 ** test: DataDrivenCollationTest/TestLatvian
1477 # Latvian sort order.
1478 @ locale lv
1479 * compare
1480 < cz
1481 < č
1482 < d
1483 < gz
1484 < ģ
1485 < h
1486 < iz
1487 < j
1488 < kz
1489 < ķ
1490 < l
1491 < lz
1492 < ļ
1493 < m
1494 < nz
1495 < ņ
1496 < o
1497 < rz
1498 < ŗ
1499 < s
1500 < sz
1501 < š
1502 < t
1503 < zz
1504 < ž
1505
1506 ** test: DataDrivenCollationTest/TestEstonian
1507 # Estonian sort order.
1508 @ locale et
1509 * compare
1510 < sy
1511 < š
1512 < šy
1513 < z
1514 < zy
1515 < ž
1516 < v
1517 < va
1518 < w
1519 < õ
1520 < õy
1521 < ä
1522 < äy
1523 < ö
1524 < öy
1525 < ü
1526 < üy
1527 < x
1528
1529 ** test: DataDrivenCollationTest/TestAlbanian
1530 # Albanian sort order.
1531 @ locale sq
1532 * compare
1533 < cz
1534 < ç
1535 < d
1536 < dz
1537 < dh
1538 < e
1539 < ez
1540 < ë
1541 < f
1542 < gz
1543 < gj
1544 < h
1545 < lz
1546 < ll
1547 < m
1548 < nz
1549 < nj
1550 < o
1551 < rz
1552 < rr
1553 < s
1554 < sz
1555 < sh
1556 < t
1557 < tz
1558 < th
1559 < u
1560 < xz
1561 < xh
1562 < y
1563 < zz
1564 < zh
1565
1566 ** test: DataDrivenCollationTest/TestSimplifiedChineseOrder
1567 # Sorted file has different order.
1568 @ root
1569 # normalization=on turned on & off automatically.
1570 * compare
1571 < \u5F20
1572 < \u5F20\u4E00\u8E3F
1573
1574 ** test: DataDrivenCollationTest/TestTibetanNormalizedIterativeCrash
1575 # This pretty much crashes.
1576 @ root
1577 * compare
1578 < \u0f71\u0f72\u0f80\u0f71\u0f72
1579 < \u0f80
1580
1581 ** test: DataDrivenCollationTest/TestThaiPartialSortKeyProblems
1582 # These are examples of strings that caused trouble in partial sort key testing.
1583 @ locale th-TH
1584 * compare
1585 < \u0E01\u0E01\u0E38\u0E18\u0E20\u0E31\u0E13\u0E11\u0E4C
1586 < \u0E01\u0E01\u0E38\u0E2A\u0E31\u0E19\u0E42\u0E18
1587 * compare
1588 < \u0E01\u0E07\u0E01\u0E32\u0E23
1589 < \u0E01\u0E07\u0E42\u0E01\u0E49
1590 * compare
1591 < \u0E01\u0E23\u0E19\u0E17\u0E32
1592 < \u0E01\u0E23\u0E19\u0E19\u0E40\u0E0A\u0E49\u0E32
1593 * compare
1594 < \u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E22\u0E27
1595 < \u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E4A\u0E22\u0E27
1596 * compare
1597 < \u0E01\u0E23\u0E23\u0E40\u0E0A\u0E2D
1598 < \u0E01\u0E23\u0E23\u0E40\u0E0A\u0E49\u0E32
1599
1600 ** test: DataDrivenCollationTest/TestJavaStyleRule
1601 # java.text allows rules to start as '<<<x<<<y...'
1602 # we emulate this by assuming a &[first tertiary ignorable] in this case.
1603 @ rules
1604 &\u0001=equal<<<z<<x<<<w &[first tertiary ignorable]=a &[first primary ignorable]=b
1605 * compare
1606 = a
1607 = equal
1608 < z
1609 < x
1610 = b  # x had become the new first primary ignorable
1611 < w
1612
1613 ** test: DataDrivenCollationTest/TestShiftedIgnorable
1614 # The UCA states that primary ignorables should be completely
1615 # ignorable when following a shifted code point.
1616 @ root
1617 % alternate=shifted
1618 % strength=quaternary
1619 * compare
1620 < a\u0020b
1621 = a\u0020\u0300b
1622 = a\u0020\u0301b
1623 < a_b
1624 = a_\u0300b
1625 = a_\u0301b
1626 < A\u0020b
1627 = A\u0020\u0300b
1628 = A\u0020\u0301b
1629 < A_b
1630 = A_\u0300b
1631 = A_\u0301b
1632 < a\u0301b
1633 < A\u0301b
1634 < a\u0300b
1635 < A\u0300b
1636
1637 ** test: DataDrivenCollationTest/TestNShiftedIgnorable
1638 # The UCA states that primary ignorables should be completely
1639 # ignorable when following a shifted code point.
1640 @ root
1641 % alternate=non-ignorable
1642 % strength=tertiary
1643 * compare
1644 < a\u0020b
1645 < A\u0020b
1646 < a\u0020\u0301b
1647 < A\u0020\u0301b
1648 < a\u0020\u0300b
1649 < A\u0020\u0300b
1650 < a_b
1651 < A_b
1652 < a_\u0301b
1653 < A_\u0301b
1654 < a_\u0300b
1655 < A_\u0300b
1656 < a\u0301b
1657 < A\u0301b
1658 < a\u0300b
1659 < A\u0300b
1660
1661 ** test: DataDrivenCollationTest/TestSafeSurrogates
1662 # It turned out that surrogates were not skipped properly
1663 # when iterating backwards if they were in the middle of a
1664 # contraction. This test assures that this is fixed.
1665 @ rules
1666 &a < x\ud800\udc00b
1667 * compare
1668 < a
1669 < x\ud800\udc00b
1670
1671 ** test: DataDrivenCollationTest/da_TestPrimary
1672 # This test goes through primary strength cases
1673 @ locale da
1674 % strength=primary
1675 * compare
1676 < Lvi
1677 < Lwi
1678 * compare
1679 < L\u00e4vi
1680 < L\u00f6wi
1681 * compare
1682 < L\u00fcbeck
1683 = Lybeck
1684
1685 ** test: DataDrivenCollationTest/da_TestTertiary
1686 # This test goes through tertiary strength cases
1687 @ locale da
1688 % strength=tertiary
1689 * compare
1690 < Luc
1691 < luck
1692 * compare
1693 < luck
1694 < L\u00fcbeck
1695 * compare
1696 < lybeck
1697 < L\u00fcbeck
1698 * compare
1699 < L\u00e4vi
1700 < L\u00f6we
1701 * compare
1702 < L\u00f6ww
1703 < mast
1704
1705 * compare
1706 < A/S
1707 < ANDRE
1708 < ANDR\u00c9
1709 < ANDREAS
1710 < AS
1711 < CA
1712 < \u00c7A
1713 < CB
1714 < \u00c7C
1715 < D.S.B.
1716 < DA
1717 < \u00d0A
1718 < DB
1719 < \u00d0C
1720 < DSB
1721 < DSC
1722 < EKSTRA_ARBEJDE
1723 < EKSTRABUD0
1724 < H\u00d8ST
1725 < HAAG
1726 < H\u00c5NDBOG
1727 < HAANDV\u00c6RKSBANKEN
1728 < Karl
1729 < karl
1730 < NIELS\u0020J\u00d8RGEN
1731 < NIELS-J\u00d8RGEN
1732 < NIELSEN
1733 < R\u00c9E,\u0020A
1734 < REE,\u0020B
1735 < R\u00c9E,\u0020L
1736 < REE,\u0020V
1737 < SCHYTT,\u0020B
1738 < SCHYTT,\u0020H
1739 < SCH\u00dcTT,\u0020H
1740 < SCHYTT,\u0020L
1741 < SCH\u00dcTT,\u0020M
1742 < SS
1743 < \u00df
1744 < SSA
1745 < STORE\u0020VILDMOSE
1746 < STOREK\u00c6R0
1747 < STORM\u0020PETERSEN
1748 < STORMLY
1749 < THORVALD
1750 < THORVARDUR
1751 < \u00feORVAR\u00d0UR
1752 < THYGESEN
1753 < VESTERG\u00c5RD,\u0020A
1754 < VESTERGAARD,\u0020A
1755 < VESTERG\u00c5RD,\u0020B
1756 < \u00c6BLE
1757 < \u00c4BLE
1758 < \u00d8BERG
1759 < \u00d6BERG
1760
1761 * compare
1762 < andere
1763 < chaque
1764 < chemin
1765 < cote
1766 < cot\u00e9
1767 < c\u00f4te
1768 < c\u00f4t\u00e9
1769 < \u010du\u010d\u0113t
1770 < Czech
1771 < hi\u0161a
1772 < irdisch
1773 < lie
1774 < lire
1775 < llama
1776 < l\u00f5ug
1777 < l\u00f2za
1778 < lu\u010d
1779 < luck
1780 < L\u00fcbeck
1781 < lye
1782 < l\u00e4vi
1783 < L\u00f6wen
1784 < m\u00e0\u0161ta
1785 < m\u00eer
1786 < myndig
1787 < M\u00e4nner
1788 < m\u00f6chten
1789 < pi\u00f1a
1790 < pint
1791 < pylon
1792 < \u0161\u00e0ran
1793 < savoir
1794 < \u0160erb\u016bra
1795 < Sietla
1796 < \u015blub
1797 < subtle
1798 < symbol
1799 < s\u00e4mtlich
1800 < verkehrt
1801 < vox
1802 < v\u00e4ga
1803 < waffle
1804 < wood
1805 < yen
1806 < yuan
1807 < yucca
1808 < \u017eal
1809 < \u017eena
1810 < \u017den\u0113va
1811 < zoo0
1812 < Zviedrija
1813 < Z\u00fcrich
1814 < zysk0
1815 < \u00e4ndere
1816
1817 ** test: DataDrivenCollationTest/hi_TestNewRules
1818 # This test goes through new rules and tests against old rules
1819 @ locale hi
1820 * compare
1821 < कॐ
1822 < कं
1823 < कँ
1824 < कः
1825
1826 ** test: DataDrivenCollationTest/ro_TestNewRules
1827 # This test goes through new rules and tests against old rules
1828 @ locale ro
1829 * compare
1830 < xAx
1831 < xă
1832 < xĂ
1833 < Xă
1834 < XĂ
1835 < xăx
1836 < xĂx
1837 < xâ
1838 < xÂ
1839 < Xâ
1840 < XÂ
1841 < xâx
1842 < xÂx
1843 < xb
1844 < xIx
1845 < xî
1846 < xÎ
1847 < Xî
1848 < XÎ
1849 < xîx
1850 < xÎx
1851 < xj
1852 < xSx
1853 < xș
1854 = xş
1855 < xȘ
1856 = xŞ
1857 < Xș
1858 = Xş
1859 < XȘ
1860 = XŞ
1861 < xșx
1862 = xşx
1863 < xȘx
1864 = xŞx
1865 < xT
1866 < xTx
1867 < xț
1868 = xţ
1869 < xȚ
1870 = xŢ
1871 < Xț
1872 = Xţ
1873 < XȚ
1874 = XŢ
1875 < xțx
1876 = xţx
1877 < xȚx
1878 = xŢx
1879 < xU
1880
1881 ** test: DataDrivenCollationTest/testOffsets
1882 # This tests cases where forwards and backwards iteration get different offsets
1883 @ locale en
1884 % strength=tertiary
1885 * compare
1886 < a\uD800\uDC00\uDC00
1887 < b\uD800\uDC00\uDC00
1888 * compare
1889 < \u0301A\u0301\u0301
1890 < \u0301B\u0301\u0301
1891 * compare
1892 < abcd\r\u0301
1893 < abce\r\u0301
1894 # TODO: test offsets in new CollationTest
1895
1896 # End of test cases moved here from ICU 52's DataDrivenCollationTest.txt.
1897
1898 ** test: was ICU 52 cmsccoll/TestRedundantRules
1899 @ rules
1900 & a < b < c < d& [before 1] c < m
1901 * compare
1902 <1 a
1903 <1 b
1904 <1 m
1905 <1 c
1906 <1 d
1907
1908 @ rules
1909 & a < b <<< c << d <<< e& [before 3] e <<< x
1910 * compare
1911 <1 a
1912 <1 b
1913 <3 c
1914 <2 d
1915 <3 x
1916 <3 e
1917
1918 @ rules
1919 & a < b <<< c << d <<< e <<< f < g& [before 1] g < x
1920 * compare
1921 <1 a
1922 <1 b
1923 <3 c
1924 <2 d
1925 <3 e
1926 <3 f
1927 <1 x
1928 <1 g
1929
1930 @ rules
1931 & a <<< b << c < d& a < m
1932 * compare
1933 <1 a
1934 <3 b
1935 <2 c
1936 <1 m
1937 <1 d
1938
1939 @ rules
1940 &a<b<<b\u0301 &z<b
1941 * compare
1942 <1 a
1943 <1 b\u0301
1944 <1 z
1945 <1 b
1946
1947 @ rules
1948 &z<m<<<q<<<m
1949 * compare
1950 <1 z
1951 <1 q
1952 <3 m
1953
1954 @ rules
1955 &z<<<m<q<<<m
1956 * compare
1957 <1 z
1958 <1 q
1959 <3 m
1960
1961 @ rules
1962 & a < b < c < d& r < c
1963 * compare
1964 <1 a
1965 <1 b
1966 <1 d
1967 <1 r
1968 <1 c
1969
1970 @ rules
1971 & a < b < c < d& c < m
1972 * compare
1973 <1 a
1974 <1 b
1975 <1 c
1976 <1 m
1977 <1 d
1978
1979 @ rules
1980 & a < b < c < d& a < m
1981 * compare
1982 <1 a
1983 <1 m
1984 <1 b
1985 <1 c
1986 <1 d
1987
1988 ** test: was ICU 52 cmsccoll/TestExpansionSyntax
1989 # The following two rules should sort the particular list of strings the same.
1990 @ rules
1991 &AE <<< a << b <<< c &d <<< f
1992 * compare
1993 <1 AE
1994 <3 a
1995 <2 b
1996 <3 c
1997 <1 d
1998 <3 f
1999
2000 @ rules
2001 &A <<< a / E << b / E <<< c /E  &d <<< f
2002 * compare
2003 <1 AE
2004 <3 a
2005 <2 b
2006 <3 c
2007 <1 d
2008 <3 f
2009
2010 # The following two rules should sort the particular list of strings the same.
2011 @ rules
2012 &AE <<< a <<< b << c << d < e < f <<< g
2013 * compare
2014 <1 AE
2015 <3 a
2016 <3 b
2017 <2 c
2018 <2 d
2019 <1 e
2020 <1 f
2021 <3 g
2022
2023 @ rules
2024 &A <<< a / E <<< b / E << c / E << d / E < e < f <<< g
2025 * compare
2026 <1 AE
2027 <3 a
2028 <3 b
2029 <2 c
2030 <2 d
2031 <1 e
2032 <1 f
2033 <3 g
2034
2035 # The following two rules should sort the particular list of strings the same.
2036 @ rules
2037 &AE <<< B <<< C / D <<< F
2038 * compare
2039 <1 AE
2040 <3 B
2041 <3 F
2042 <1 AED
2043 <3 C
2044
2045 @ rules
2046 &A <<< B / E <<< C / ED <<< F / E
2047 * compare
2048 <1 AE
2049 <3 B
2050 <3 F
2051 <1 AED
2052 <3 C
2053
2054 ** test: never reorder trailing primaries
2055 @ root
2056 % reorder Zzzz Grek
2057 * compare
2058 <1 L
2059 <1 字
2060 <1 Ω
2061 <1 \uFFFD
2062 <1 \uFFFF
2063
2064 ** test: fall back to mappings with shorter prefixes, not immediately to ones with no prefixes
2065 @ rules
2066 &u=ab|cd
2067 &v=b|ce
2068 * compare
2069 <1 abc
2070 <1 abcc
2071 <1 abcf
2072 <1 abcd
2073 =  abu
2074 <1 abce
2075 =  abv
2076
2077 # With the following rules, there is only one prefix per composite ĉ or ç,
2078 # but both prefixes apply to just c in NFD form.
2079 # We would get different results for composed vs. NFD input
2080 # if we fell back directly from longest-prefix mappings to no-prefix mappings.
2081 @ rules
2082 &x=op|ĉ
2083 &y=p|ç
2084 * compare
2085 <1 opc
2086 <2 opć
2087 <1 opcz
2088 <1 opd
2089 <1 opĉ
2090 =  opc\u0302
2091 =  opx
2092 <1 opç
2093 =  opc\u0327
2094 =  opy
2095
2096 # The mapping is used which has the longest matching prefix for which
2097 # there is also a suffix match, with the longest suffix match among several for that prefix.
2098 @ rules
2099 &❶=d
2100 &❷=de
2101 &❸=def
2102 &①=c|d
2103 &②=c|de
2104 &③=c|def
2105 &④=bc|d
2106 &⑤=bc|de
2107 &⑥=bc|def
2108 &⑦=abc|d
2109 &⑧=abc|de
2110 &⑨=abc|def
2111 * compare
2112 <1 9aadzz
2113 =  9aa❶zz
2114 <1 9aadez
2115 =  9aa❷z
2116 <1 9aadef
2117 =  9aa❸
2118 <1 9acdzz
2119 =  9ac①zz
2120 <1 9acdez
2121 =  9ac②z
2122 <1 9acdef
2123 =  9ac③
2124 <1 9bcdzz
2125 =  9bc④zz
2126 <1 9bcdez
2127 =  9bc⑤z
2128 <1 9bcdef
2129 =  9bc⑥
2130 <1 abcdzz
2131 =  abc⑦zz
2132 <1 abcdez
2133 =  abc⑧z
2134 <1 abcdef
2135 =  abc⑨
2136
2137 ** test: prefix + discontiguous contraction with missing prefix contraction
2138 # Unfortunate terminology: The first "prefix" here is the pre-context,
2139 # the second "prefix" refers to the contraction/relation string that is
2140 # one shorter than the one being tested.
2141 @ rules
2142 &x=p|e
2143 &y=p|ê
2144 &z=op|ê
2145 # No mapping for op|e:
2146 # Discontiguous contraction matching should not match op|ê in opệ
2147 # because it would have to skip the dot below and extend a match on op|e by the circumflex,
2148 # but there is no match on op|e.
2149 * compare
2150 <1 oPe
2151 <1 ope
2152 =  opx
2153 <1 opệ
2154 =  opy\u0323  # y not z
2155 <1 opê
2156 =  opz
2157
2158 # We cannot test for fallback by whether the contraction default CE32
2159 # is for another contraction. With the following rules, there is no mapping for op|e,
2160 # and the fallback to prefix p has no contractions.
2161 @ rules
2162 &x=p|e
2163 &z=op|ê
2164 * compare
2165 <1 oPe
2166 <1 ope
2167 =  opx
2168 <2 opệ
2169 =  opx\u0323\u0302  # x not z
2170 <1 opê
2171 =  opz
2172
2173 # One more variation: Fallback to the simple code point, no shorter non-empty prefix.
2174 @ rules
2175 &x=e
2176 &z=op|ê
2177 * compare
2178 <1 ope
2179 =  opx
2180 <3 oPe
2181 =  oPx
2182 <2 opệ
2183 =  opx\u0323\u0302  # x not z
2184 <1 opê
2185 =  opz
2186
2187 ** test: maxVariable via rules
2188 @ rules
2189 [maxVariable space][alternate shifted]
2190 * compare
2191 =  \u0020
2192 =  \u000A
2193 <1 .
2194 <1 °  # degree sign
2195 <1 $
2196 <1 0
2197
2198 ** test: maxVariable via setting
2199 @ root
2200 % maxVariable=currency
2201 % alternate=shifted
2202 * compare
2203 =  \u0020
2204 =  \u000A
2205 =  .
2206 =  °  # degree sign
2207 =  $
2208 <1 0
2209
2210 ** test: ICU4J CollationMiscTest/TestContractionClosure (ää)
2211 # This tests canonical closure, but it also tests that CollationFastLatin
2212 # bails out properly for contractions with combining marks.
2213 # For that we need pairs of strings that remain in the Latin fastpath
2214 # long enough, hence the extra "= b" lines.
2215 @ rules
2216 &b=\u00e4\u00e4
2217 * compare
2218 <1 b
2219 =  \u00e4\u00e4
2220 =  b
2221 =  a\u0308a\u0308
2222 =  b
2223 =  \u00e4a\u0308
2224 =  b
2225 =  a\u0308\u00e4
2226
2227 ** test: ICU4J CollationMiscTest/TestContractionClosure (Å)
2228 @ rules
2229 &b=\u00C5
2230 * compare
2231 <1 b
2232 =  \u00C5
2233 =  b
2234 =  A\u030A
2235 =  b
2236 =  \u212B
2237
2238 ** test: reset-before on already-tailored characters, ICU ticket 10108
2239 @ rules
2240 &a<w<<x &[before 2]x<<y
2241 * compare
2242 <1 a
2243 <1 w
2244 <2 y
2245 <2 x
2246
2247 @ rules
2248 &a<<w<<<x &[before 2]x<<y
2249 * compare
2250 <1 a
2251 <2 y
2252 <2 w
2253 <3 x
2254
2255 @ rules
2256 &a<w<x &[before 2]x<<y
2257 * compare
2258 <1 a
2259 <1 w
2260 <1 y
2261 <2 x
2262
2263 @ rules
2264 &a<w<<<x &[before 2]x<<y
2265 * compare
2266 <1 a
2267 <1 y
2268 <2 w
2269 <3 x
2270
2271 ** test: numeric collation with other settings, ICU ticket 9092
2272 @ root
2273 % strength=identical
2274 % caseFirst=upper
2275 % numeric=on
2276 * compare
2277 <1 100\u0020a
2278 <1 101
2279
2280 ** test: collation type fallback from unsupported type, ICU ticket 10149
2281 @ locale fr-CA-u-co-phonebk
2282 # Expect the same result as with fr-CA, using backwards-secondary order.
2283 # That is, we should fall back from the unsupported collation type
2284 # to the locale's default collation type.
2285 * compare
2286 <1 cote
2287 <2 côte
2288 <2 coté
2289 <2 côté
2290
2291 ** test: @ is equivalent to [backwards 2], ICU ticket 9956
2292 @ rules
2293 &b<a @ &v<<w
2294 * compare
2295 <1 b
2296 <1 a
2297 <1 cote
2298 <2 côte
2299 <2 coté
2300 <2 côté
2301 <1 v
2302 <2 w
2303 <1 x
2304
2305 ** test: shifted+reordering, ICU ticket 9507
2306 @ root
2307 % reorder Grek punct space
2308 % alternate=shifted
2309 % strength=quaternary
2310 # Which primaries are "variable" should be determined without script reordering,
2311 # and then primaries should be reordered whether they are shifted to quaternary or not.
2312 * compare
2313 <4 (  # punctuation
2314 <4 )
2315 <4 \u0020  # space
2316 <1 `  # symbol
2317 <1 ^
2318 <1 $  # currency symbol
2319 <1 €
2320 <1 0  # numbers
2321 <1 ε  # Greek
2322 <1 e  # Latin
2323 <1 e(e
2324 <4 e)e
2325 <4 e\u0020e
2326 <4 ee
2327 <3 e(E
2328 <4 e)E
2329 <4 e\u0020E
2330 <4 eE
2331
2332 ** test: "uppercase first" could sort a string before its prefix, ICU ticket 9351
2333 @ rules
2334 &\u0001<<<b<<<B
2335 % caseFirst=upper
2336 * compare
2337 <1 aaa
2338 <3 aaaB
2339
2340 ** test: secondary+case ignores secondary ignorables, ICU ticket 9355
2341 @ rules
2342 &\u0001<<<b<<<B
2343 % strength=secondary
2344 % caseLevel=on
2345 * compare
2346 <1 a
2347 =  ab
2348 =  aB
2349
2350 ** test: custom collation rules involving tail of a contraction in Malayalam, ICU ticket 6328
2351 @ rules
2352 &[before 2] ൌ << ൗ  # U+0D57 << U+0D4C == 0D46+0D57
2353 * compare
2354 <1 ൗx
2355 <2 ൌx
2356 <1 ൗy
2357 <2 ൌy
2358
2359 ** test: quoted apostrophe in compact syntax, ICU ticket 8204
2360 @ rules
2361 &q<<*a''c
2362 * compare
2363 <1 d
2364 <1 p
2365 <1 q
2366 <2 a
2367 <2 \u0027
2368 <2 c
2369 <1 r
2370
2371 # ICU ticket #8260 "Support all collation-related keywords in Collator.getInstance()"
2372 ** test: locale -u- with collation keywords, ICU ticket 8260
2373 @ locale de-u-kv-sPace-ka-shifTed-kn-kk-falsE-kf-Upper-kc-tRue-ks-leVel4
2374 * compare
2375 <4 \u0020  # space is shifted, strength=quaternary
2376 <1 !  # punctuation is regular
2377 <1 2
2378 <1 12  # numeric sorting
2379 <1 B
2380 <c b  # uppercase first on case level
2381 <1 x\u0301\u0308
2382 <2 x\u0308\u0301  # normalization off
2383
2384 ** test: locale @ with collation keywords, ICU ticket 8260
2385 @ locale fr@colbAckwards=yes;ColStrength=Quaternary;kv=currencY;colalternate=shifted
2386 * compare
2387 <4 $  # currency symbols are shifted, strength=quaternary
2388 <1 àla
2389 <2 alà  # backwards secondary level
2390
2391 ** test: locale -u- with script reordering, ICU ticket 8260
2392 @ locale el-u-kr-kana-SYMBOL-Grek-hani-cyrl-latn-digit-armn-deva-ethi-thai
2393 * compare
2394 <1 \u0020
2395 <1 あ
2396 <1 ☂
2397 <1 Ω
2398 <1 丂
2399 <1 ж
2400 <1 L
2401 <1 4
2402 <1 Ձ
2403 <1 अ
2404 <1 ሄ
2405 <1 ฉ
2406
2407 ** test: locale @collation=type should be case-insensitive
2408 @ locale de@coLLation=PhoneBook
2409 * compare
2410 <1 ae
2411 <2 ä
2412 <3 Ä
2413
2414 ** test: import root search rules plus German phonebook rules, ICU ticket 8962
2415 @ locale de-u-co-search
2416 * compare
2417 <1 =
2418 <1 ≠
2419 <1 a
2420 <1 ae
2421 <2 ä
2422
2423 # Once more, but with runtime builder.
2424 @ rules
2425 [import und-u-co-search][import de-u-co-phonebk]
2426 * compare
2427 <1 =
2428 <1 ≠
2429 <1 a
2430 <1 ae
2431 <2 ä
2432
2433 # Once again, with import from "root" not "und" (as in a proper language tag).
2434 @ rules
2435 [import root-u-co-search][import de-u-co-phonebk]
2436 * compare
2437 <1 =
2438 <1 ≠
2439 <1 a
2440 <1 ae
2441 <2 ä
2442
2443 ** test: import rules from a language with non-Latin native script, and reset the reordering, ICU ticket 10998
2444 # Greek should sort Greek first.
2445 @ rules
2446 [import el]
2447 * compare
2448 <1 4
2449 <1 Ω
2450 <1 L
2451
2452 # Import Greek, and then reset the reordering.
2453 @ rules
2454 [import el][reorder Zzzz]
2455 * compare
2456 <1 4
2457 <1 L
2458 <1 Ω
2459
2460 # "others" is a synonym for Zzzz.
2461 @ rules
2462 [import el][reorder others]
2463 * compare
2464 <1 4
2465 <1 L
2466 <1 Ω
2467
2468 ** test: regression test for CollationFastLatinBuilder, ICU ticket 11388
2469 @ rules
2470 &x<<aa<<<Aa<<<AA
2471 % strength=secondary
2472 * compare
2473 <1 AA
2474 <2 Aẩ
2475 <2 aą
2476 * compare
2477 <1 AA
2478 <2 aą
2479
2480 ** test: tailor tertiary-after a common tertiary where there is a lower one
2481 # Assume that Hiragana small A has a below-common tertiary, and Hiragana A has a common one.
2482 # See ICU ticket 11448 & CLDR ticket 7222.
2483 @ rules
2484 &あ<<<x<<<y<<<z
2485 * compare
2486 <1 ぁ
2487 <3 あ
2488 <3 x
2489 <3 y
2490 <3 z
2491 <3 ァ
2492 <1 い
2493
2494 ** test: tailor tertiary-after a below-common tertiary
2495 @ rules
2496 &ぁ<<<x<<<y<<<z
2497 * compare
2498 <1 ぁ
2499 <3 x
2500 <3 y
2501 <3 z
2502 <3 あ
2503 <3 ァ
2504 <1 い
2505
2506 ** test: tailor tertiary-before a common tertiary where there is a lower one
2507 @ rules
2508 &[before 3]あ<<<x<<<y<<<z
2509 * compare
2510 <1 ぁ
2511 <3 x
2512 <3 y
2513 <3 z
2514 <3 あ
2515 <3 ァ
2516 <1 い
2517
2518 ** test: tailor tertiary-before a below-common tertiary
2519 @ rules
2520 &[before 3]ぁ<<<x<<<y<<<z
2521 * compare
2522 <1 x
2523 <3 y
2524 <3 z
2525 <3 ぁ
2526 <3 あ
2527 <3 ァ
2528 <1 い
2529
2530 ** test: reorder single scripts not groups, ICU ticket 11449
2531 @ root
2532 % reorder Goth Latn
2533 * compare
2534 <1 4
2535 <1 𐌰  # Gothic
2536 <1 L
2537 <1 Ω
2538 # Before ICU 55, the following reordered together with Gothic.
2539 <1 𐌈  # Old Italic
2540 <1 𐑐  # Shavian