icuSources/test/testdata/collationtest.txt

   1 # Copyright (C) 2016 and later: Unicode, Inc. and others.
   2 # License & terms of use: http://www.unicode.org/copyright.html
   3 # Copyright (c) 2012-2015 International Business Machines
   4 # Corporation and others. All Rights Reserved.
   5 #
   6 # This file should be in UTF-8 with a signature byte sequence ("BOM").
   7 #
   8 # collationtest.txt: Collation test data.
   9 #
  10 # created on: 2012apr13
  11 # created by: Markus W. Scherer
  12
  13 # A line with "** test: description" is used for verbose and error output.
  14
  15 # A collator can be set with "@ root" or "@ locale language-tag",
  16 # for example "@ locale de-u-co-phonebk".
  17 # An old-style locale ID can also be used, for example "@ locale de@collation=phonebook".
  18
  19 # A collator can be built with "@ rules".
  20 # An "@ rules" line is followed by one or more lines with the tailoring rules.
  21
  22 # A collator can be modified with "% attribute=value".
  23
  24 # "* compare" tests the order (= or <) of the following strings.
  25 # The relation can be "=" or "<" (the level of the difference is not specified)
  26 # or "<1", "<2", "<c", "<3", "<4" (indicating the level of the difference).
  27
  28 # Test sections ("* compare") are terminated by
  29 # definitions of new collators, changing attributes, or new test sections.
  30
  31 ** test: simple CEs & expansions
  32 # Many types of mappings are tested elsewhere, including via the UCA conformance tests.
  33 # Here we mostly cover a few unusual mappings.
  34 @ rules
  35 &\x01                           # most control codes are ignorable
  36 <<<\u0300                       # tertiary CE
  37 &9<\x00                         # NUL not ignorable
  38 &\uA00A\uA00B=\uA002            # two long-primary CEs
  39 &\uA00A\uA00B\u00050005=\uA003  # three CEs, require 64 bits
  40
  41 * compare
  42 =  \x01
  43 =  \x02
  44 <3 \u0300
  45 <1 9
  46 <1 \x00
  47 =  \x01\x00\x02
  48 <1 a
  49 <3 a\u0300
  50 <2 a\u0308
  51 =  ä
  52 <1 b
  53 <1 か        # Hiragana Ka (U+304B)
  54 <2 か\u3099  # plus voiced sound mark
  55 =  が        # Hiragana Ga (U+304C)
  56 <1 \uA00A\uA00B
  57 =  \uA002
  58 <1 \uA00A\uA00B\u00050004
  59 <1 \uA00A\uA00B\u00050005
  60 =  \uA003
  61 <1 \uA00A\uA00B\u00050006
  62
  63 ** test: contractions
  64 # Create some interesting mappings, and map some normalization-inert characters
  65 # (which are not subject to canonical reordering)
  66 # to some of the same CEs to check the sequence of CEs.
  67 @ rules
  68
  69 # Contractions starting with 'a' should not continue with any character < U+0300
  70 # so that we can test a shortcut for that.
  71 &a=ⓐ
  72 &b<bz=ⓑ
  73 &d<dz\u0301=ⓓ           # d+z+acute
  74 &z
  75 <a\u0301=Ⓐ              # a+acute sorts after z
  76 <a\u0301\u0301=Ⓑ        # a+acute+acute
  77 <a\u0301\u0301\u0358=Ⓒ  # a+acute+acute+dot above right
  78 <a\u030a=Ⓓ              # a+ring
  79 <a\u0323=Ⓔ              # a+dot below
  80 <a\u0323\u0358=Ⓕ        # a+dot below+dot above right
  81 <a\u0327\u0323\u030a=Ⓖ  # a+cedilla+dot below+ring
  82 <a\u0327\u0323bz=Ⓗ      # a+cedilla+dot below+b+z
  83
  84 &\U0001D158=⁰           # musical notehead black (has a symbol primary)
  85 <\U0001D158\U0001D165=¼ # musical quarter note
  86
  87 # deliberately missing prefix contractions:
  88 # dz
  89 # a\u0327
  90 # a\u0327\u0323
  91 # a\u0327\u0323b
  92
  93 &\x01
  94 <<<\U0001D165=¹         # musical stem (ccc=216)
  95 <<<\U0001D16D=²         # musical augmentation dot (ccc=226)
  96 <<<\U0001D165\U0001D16D=³  # stem+dot (ccc=216 226)
  97 &\u0301=❶               # acute (ccc=230)
  98 &\u030a=❷               # ring (ccc=230)
  99 &\u0308=❸               # diaeresis (ccc=230)
 100 <<\u0308\u0301=❹        # diaeresis+acute (=dialytika tonos) (ccc=230 230)
 101 &\u0327=❺               # cedilla (ccc=202)
 102 &\u0323=❻               # dot below (ccc=220)
 103 &\u0331=❼               # macron below (ccc=220)
 104 <<\u0331\u0358=❽        # macron below+dot above right (ccc=220 232)
 105 &\u0334=❾               # tilde overlay (ccc=1)
 106 &\u0358=❿               # dot above right (ccc=232)
 107
 108 &\u0f71=①               # tibetan vowel sign aa
 109 &\u0f72=②               # tibetan vowel sign i
 110 #  \u0f71\u0f72         # tibetan vowel sign aa + i = ii = U+0F73
 111 &\u0f73=③               # tibetan vowel sign ii (ccc=0 but lccc=129)
 112
 113 ** test: simple contractions
 114
 115 # Some strings are chosen to cause incremental contiguous contraction matching to
 116 # go into partial matches for prefixes of contractions
 117 # (where the prefixes are deliberately not also contractions).
 118 # When there is no complete match, then the matching code must back out of those
 119 # so that discontiguous contractions work as specified.
 120
 121 * compare
 122 # contraction starter with no following text, or mismatch, or blocked
 123 <1 a
 124 =  ⓐ
 125 <1 aa
 126 =  ⓐⓐ
 127 <1 ab
 128 =  ⓐb
 129 <1 az
 130 =  ⓐz
 131
 132 * compare
 133 <1 a
 134 <2 a\u0308\u030a  # ring blocked by diaeresis
 135 =  ⓐ❸❷
 136 <2 a\u0327
 137 =  ⓐ❺
 138
 139 * compare
 140 <2 \u0308
 141 =  ❸
 142 <2 \u0308\u030a\u0301  # acute blocked by ring
 143 =  ❸❷❶
 144
 145 * compare
 146 <1 \U0001D158
 147 =  ⁰
 148 <1 \U0001D158\U0001D165
 149 =  ¼
 150
 151 # no discontiguous contraction because of missing prefix contraction d+z,
 152 # and a starter ('z') after the 'd'
 153 * compare
 154 <1 dz\u0323\u0301
 155 =  dz❻❶
 156
 157 # contiguous contractions
 158 * compare
 159 <1 abz
 160 =  ⓐⓑ
 161 <1 abzz
 162 =  ⓐⓑz
 163
 164 * compare
 165 <1 a
 166 <1 z
 167 <1 a\u0301
 168 =  Ⓐ
 169 <1 a\u0301\u0301
 170 =  Ⓑ
 171 <1 a\u0301\u0301\u0358
 172 =  Ⓒ
 173 <1 a\u030a
 174 =  Ⓓ
 175 <1 a\u0323\u0358
 176 =  Ⓕ
 177 <1 a\u0327\u0323\u030a  # match despite missing prefix
 178 =  Ⓖ
 179 <1 a\u0327\u0323bz
 180 =  Ⓗ
 181
 182 * compare
 183 <2 \u0308\u0308\u0301  # acute blocked from first diaeresis, contracts with second
 184 =  ❸❹
 185
 186 * compare
 187 <1 \U0001D158\U0001D165
 188 =  ¼
 189
 190 * compare
 191 <3 \U0001D165\U0001D16D
 192 =  ³
 193
 194 ** test: discontiguous contractions
 195 * compare
 196 <1 a\u0327\u030a                # a+ring skips cedilla
 197 =  Ⓓ❺
 198 <2 a\u0327\u0327\u030a          # a+ring skips 2 cedillas
 199 =  Ⓓ❺❺
 200 <2 a\u0327\u0327\u0327\u030a    # a+ring skips 3 cedillas
 201 =  Ⓓ❺❺❺
 202 <2 a\u0334\u0327\u0327\u030a    # a+ring skips tilde overlay & 2 cedillas
 203 =  Ⓓ❾❺❺
 204 <1 a\u0327\u0323                # a+dot below skips cedilla
 205 =  Ⓔ❺
 206 <1 a\u0323\u0301\u0358          # a+dot below+dot ab.r.: 2-char match, then skips acute
 207 =  Ⓕ❶
 208 <2 a\u0334\u0323\u0358          # a+dot below skips tilde overlay
 209 =  Ⓕ❾
 210
 211 * compare
 212 <2 \u0331\u0331\u0358           # macron below+dot ab.r. skips the second macron below
 213 =  ❽❼
 214
 215 * compare
 216 <1 a\u0327\u0331\u0323\u030a    # a+ring skips cedilla, macron below, dot below (dot blocked by macron)
 217 =  Ⓓ❺❼❻
 218 <1 a\u0327\u0323\U0001D16D\u030a  # a+dot below skips cedilla
 219 =  Ⓔ❺²❷
 220 <2 a\u0327\u0327\u0323\u030a    # a+dot below skips 2 cedillas
 221 =  Ⓔ❺❺❷
 222 <2 a\u0327\u0323\u0323\u030a    # a+dot below skips cedilla
 223 =  Ⓔ❺❻❷
 224 <2 a\u0334\u0327\u0323\u030a    # a+dot below skips tilde overlay & cedilla
 225 =  Ⓔ❾❺❷
 226
 227 * compare
 228 <1 \U0001D158\u0327\U0001D165   # quarter note skips cedilla
 229 =  ¼❺
 230 <1 a\U0001D165\u0323            # a+dot below skips stem
 231 =  Ⓔ¹
 232
 233 # partial contiguous match, backs up, matches discontiguous contraction
 234 <1 a\u0327\u0323b
 235 =  Ⓔ❺b
 236 <1 a\u0327\u0323ba
 237 =  Ⓔ❺bⓐ
 238
 239 # a+acute+acute+dot above right skips cedilla, continues matching 2 same-ccc combining marks
 240 * compare
 241 <1 a\u0327\u0301\u0301\u0358
 242 =  Ⓒ❺
 243
 244 # FCD but not NFD
 245 * compare
 246 <1 a\u0f73\u0301                # a+acute skips tibetan ii
 247 =  Ⓐ③
 248
 249 # FCD but the 0f71 inside the 0f73 must be skipped
 250 # to match the discontiguous contraction of the first 0f71 with the trailing 0f72 inside the 0f73
 251 * compare
 252 <1 \u0f71\u0f73                 # == \u0f73\u0f71 == \u0f71\u0f71\u0f72
 253 =  ③①
 254
 255 ** test: discontiguous contractions with nested contractions
 256 * compare
 257 <1 a\u0323\u0308\u0301\u0358
 258 =  Ⓕ❹
 259 <2 a\u0323\u0308\u0301\u0308\u0301\u0358
 260 =  Ⓕ❹❹
 261
 262 ** test: discontiguous contractions with interleaved contractions
 263 * compare
 264 # a+ring & cedilla & macron below+dot above right
 265 <1 a\u0327\u0331\u030a\u0358
 266 =  Ⓓ❺❽
 267
 268 # a+ring & 1x..3x macron below+dot above right
 269 <2 a\u0331\u030a\u0358
 270 =  Ⓓ❽
 271 <2 a\u0331\u0331\u030a\u0358\u0358
 272 =  Ⓓ❽❽
 273 # also skips acute
 274 <2 a\u0331\u0331\u0331\u030a\u0301\u0358\u0358\u0358
 275 =  Ⓓ❽❽❽❶
 276
 277 # a+dot below & stem+augmentation dot, followed by contiguous d+z+acute
 278 <1 a\U0001D165\u0323\U0001D16Ddz\u0301
 279 =  Ⓔ³ⓓ
 280
 281 ** test: some simple string comparisons
 282 @ root
 283 * compare
 284 # first string compares against ""
 285 = \u0000
 286 < a
 287 <1 b
 288 <3 B
 289 = \u0000B\u0000
 290
 291 ** test: compare with strength=primary
 292 % strength=primary
 293 * compare
 294 <1 a
 295 <1 b
 296 = B
 297
 298 ** test: compare with strength=secondary
 299 % strength=secondary
 300 * compare
 301 <1 a
 302 <1 b
 303 = B
 304
 305 ** test: compare with strength=tertiary
 306 % strength=tertiary
 307 * compare
 308 <1 a
 309 <1 b
 310 <3 B
 311
 312 ** test: compare with strength=quaternary
 313 % strength=quaternary
 314 * compare
 315 <1 a
 316 <1 b
 317 <3 B
 318
 319 ** test: compare with strength=identical
 320 % strength=identical
 321 * compare
 322 <1 a
 323 <1 b
 324 <3 B
 325
 326 ** test: côté with forwards secondary
 327 @ root
 328 * compare
 329 <1 cote
 330 <2 coté
 331 <2 côte
 332 <2 côté
 333
 334 ** test: côté with forwards secondary vs. U+FFFE merge separator
 335 # Merged sort keys: On each level, any difference in the first segment
 336 # must trump any further difference.
 337 * compare
 338 <1 cote\uFFFEcôté
 339 <2 coté\uFFFEcôte
 340 <2 côte\uFFFEcoté
 341 <2 côté\uFFFEcote
 342
 343 ** test: côté with backwards secondary
 344 % backwards=on
 345 * compare
 346 <1 cote
 347 <2 côte
 348 <2 coté
 349 <2 côté
 350
 351 ** test: côté with backwards secondary vs. U+FFFE merge separator
 352 # Merged sort keys: On each level, any difference in the first segment
 353 # must trump any further difference.
 354 * compare
 355 <1 cote\uFFFEcôté
 356 <2 côte\uFFFEcoté
 357 <2 coté\uFFFEcôte
 358 <2 côté\uFFFEcote
 359
 360 ** test: U+FFFE on identical level
 361 @ root
 362 % strength=identical
 363 * compare
 364 # All of these control codes are completely-ignorable, so that
 365 # their low code points are compared with the merge separator.
 366 # The merge separator must compare less than any other character.
 367 <1 \uFFFE\u0001\u0002\u0003
 368 <i \u0001\uFFFE\u0002\u0003
 369 <i \u0001\u0002\uFFFE\u0003
 370 <i \u0001\u0002\u0003\uFFFE
 371
 372 * compare
 373 # The merge separator must even compare less than U+0000.
 374 <1 \uFFFE\u0000\u0000
 375 <i \u0000\uFFFE\u0000
 376 <i \u0000\u0000\uFFFE
 377
 378 ** test: Hani < surrogates < U+FFFD
 379 # Note: compareUTF8() treats unpaired surrogates like U+FFFD,
 380 # so with that the strings with surrogates will compare equal to each other
 381 # and equal to the string with U+FFFD.
 382 @ root
 383 % strength=identical
 384 * compare
 385 <1 abz
 386 <1 a\u4e00z
 387 <1 a\U00020000z
 388 <1 a\ud800z
 389 <1 a\udbffz
 390 <1 a\udc00z
 391 <1 a\udfffz
 392 <1 a\ufffdz
 393
 394 ** test: script reordering
 395 @ root
 396 % reorder Hani Zzzz digit
 397 * compare
 398 <1 ?
 399 <1 +
 400 <1 丂
 401 <1 a
 402 <1 α
 403 <1 5
 404
 405 % reorder default
 406 * compare
 407 <1 ?
 408 <1 +
 409 <1 5
 410 <1 a
 411 <1 α
 412 <1 丂
 413
 414 ** test: empty rules
 415 @ rules
 416 * compare
 417 <1 a
 418 <2 ä
 419 <3 Ä
 420 <1 b
 421
 422 ** test: very simple rules
 423 @ rules
 424 &a=e<<<<q<<<<r<x<<<X<<y<<<Y;z,Z
 425 % strength=quaternary
 426 * compare
 427 <1 a
 428 =  e
 429 <4 q
 430 <4 r
 431 <1 x
 432 <3 X
 433 <2 y
 434 <3 Y
 435 <2 z
 436 <3 Z
 437
 438 ** test: tailoring twice before a root position: primary
 439 @ rules
 440 &[before 1]b<p
 441 &[before 1]b<q
 442 * compare
 443 <1 a
 444 <1 p
 445 <1 q
 446 <1 b
 447
 448 ** test: tailoring twice before a root position: secondary
 449 @ rules
 450 &[before 2]ſ<<p
 451 &[before 2]ſ<<q
 452 * compare
 453 <1 s
 454 <2 p
 455 <2 q
 456 <2 ſ
 457
 458 # secondary-before common weight
 459 @ rules
 460 &[before 2]b<<p
 461 &[before 2]b<<q
 462 * compare
 463 <1 a
 464 <1 p
 465 <2 q
 466 <2 b
 467
 468 ** test: tailoring twice before a root position: tertiary
 469 @ rules
 470 &[before 3]B<<<p
 471 &[before 3]B<<<q
 472 * compare
 473 <1 b
 474 <3 p
 475 <3 q
 476 <3 B
 477
 478 # tertiary-before common weight
 479 @ rules
 480 &[before 3]b<<<p
 481 &[before 3]b<<<q
 482 * compare
 483 <1 a
 484 <1 p
 485 <3 q
 486 <3 b
 487
 488 @ rules
 489 &[before 2]b<<s
 490 &[before 3]s<<<p
 491 &[before 3]s<<<q
 492 * compare
 493 <1 a
 494 <1 p
 495 <3 q
 496 <3 s
 497 <2 b
 498
 499 ** test: tailor after completely ignorable
 500 @ rules
 501 &\x00<<<x<<y
 502 * compare
 503 = \x00
 504 = \x1F
 505 <3 x
 506 <2 y
 507
 508 ** test: secondary tailoring gaps, ICU ticket 9362
 509 @ rules
 510 &[before 2]s<<'_'
 511 &s<<r  # secondary between s and ſ (long s)
 512 &ſ<<*a-q  # more than 15 between ſ and secondary CE boundary
 513 &[before 2][first primary ignorable]<<u<<v  # between secondary CE boundary & lowest secondary CE
 514 &[last primary ignorable]<<y<<z
 515
 516 * compare
 517 <2 u
 518 <2 v
 519 <2 \u0332  # lowest secondary CE
 520 <2 \u0308
 521 <2 y
 522 <2 z
 523 <1 s_
 524 <2 ss
 525 <2 sr
 526 <2 sſ
 527 <2 sa
 528 <2 sb
 529 <2 sp
 530 <2 sq
 531 <2 sus
 532 <2 svs
 533 <2 rs
 534
 535 ** test: tertiary tailoring gaps, ICU ticket 9362
 536 @ rules
 537 &[before 3]t<<<'_'
 538 &t<<<r  # tertiary between t and fullwidth t
 539 &ᵀ<<<*a-q  # more than 15 between ᵀ (modifier letter T) and tertiary CE boundary
 540 &[before 3][first secondary ignorable]<<<u<<<v  # between tertiary CE boundary & lowest tertiary CE
 541 &[last secondary ignorable]<<<y<<<z
 542
 543 * compare
 544 <3 u
 545 <3 v
 546 # Note: The root collator currently does not map any characters to tertiary CEs.
 547 <3 y
 548 <3 z
 549 <1 t_
 550 <3 tt
 551 <3 tr
 552 <3 tｔ
 553 <3 tᵀ
 554 <3 ta
 555 <3 tb
 556 <3 tp
 557 <3 tq
 558 <3 tut
 559 <3 tvt
 560 <3 rt
 561
 562 ** test: secondary & tertiary around root character
 563 @ rules
 564 &[before 2]m<<r
 565 &m<<s
 566 &[before 3]m<<<u
 567 &m<<<v
 568 * compare
 569 <1 l
 570 <1 r
 571 <2 u
 572 <3 m
 573 <3 v
 574 <2 s
 575 <1 n
 576
 577 ** test: secondary & tertiary around tailored item
 578 @ rules
 579 &m<x
 580 &[before 2]x<<r
 581 &x<<s
 582 &[before 3]x<<<u
 583 &x<<<v
 584 * compare
 585 <1 m
 586 <1 r
 587 <2 u
 588 <3 x
 589 <3 v
 590 <2 s
 591 <1 n
 592
 593 ** test: more nesting of secondary & tertiary before
 594 @ rules
 595 &[before 3]m<<<u
 596 &[before 2]m<<r
 597 &[before 3]r<<<q
 598 &m<<<w
 599 &m<<t
 600 &[before 3]w<<<v
 601 &w<<<x
 602 &w<<s
 603 * compare
 604 <1 l
 605 <1 q
 606 <3 r
 607 <2 u
 608 <3 m
 609 <3 v
 610 <3 w
 611 <3 x
 612 <2 s
 613 <2 t
 614 <1 n
 615
 616 ** test: case bits
 617 @ rules
 618 &w<x  # tailored CE getting case bits
 619   =uv=uV=Uv=UV  # 2 chars -> 1 CE
 620 &ae=ch=cH=Ch=CH  # 2 chars -> 2 CEs
 621 &rst=yz=yZ=Yz=YZ  # 2 chars -> 3 CEs
 622 % caseFirst=lower
 623 * compare
 624 <1 ae
 625 =  ch
 626 <3 cH
 627 <3 Ch
 628 <3 CH
 629 <1 rst
 630 =  yz
 631 <3 yZ
 632 <3 Yz
 633 <3 YZ
 634 <1 w
 635 <1 x
 636 =  uv
 637 <3 uV
 638 =  Uv  # mixed case on single CE cannot distinguish variations
 639 <3 UV
 640
 641 ** test: tertiary CEs, tertiary, caseLevel=off, caseFirst=lower
 642 @ rules
 643 &\u0001<<<t<<<T  # tertiary CEs
 644 % caseFirst=lower
 645 * compare
 646 <1 aa
 647 <3 aat
 648 <3 aaT
 649 <3 aA
 650 <3 aAt
 651 <3 ata
 652 <3 aTa
 653
 654 ** test: tertiary CEs, tertiary, caseLevel=off, caseFirst=upper
 655 % caseFirst=upper
 656 * compare
 657 <1 aA
 658 <3 aAt
 659 <3 aa
 660 <3 aat
 661 <3 aaT
 662 <3 ata
 663 <3 aTa
 664
 665 ** test: reset on expansion, ICU tickets 9415 & 9593
 666 @ rules
 667 &æ<x    # tailor the last primary CE so that x sorts between ae and af
 668 &æb=bæ  # copy all reset CEs to make bæ sort the same
 669 &각<h    # copy/tailor 3 CEs to make h sort before the next Hangul syllable 갂
 670 &⒀<<y   # copy/tailor 4 CEs to make y sort with only a secondary difference
 671 &l·=z   # handle the pre-context for · when fetching reset CEs
 672    <<u  # copy/tailor 2 CEs
 673
 674 * compare
 675 <1 ae
 676 <2 æ
 677 <1 x
 678 <1 af
 679
 680 * compare
 681 <1 aeb
 682 <2 æb
 683 =  bæ
 684
 685 * compare
 686 <1 각
 687 <1 h
 688 <1 갂
 689 <1 갃
 690
 691 * compare
 692 <1 ·    # by itself: primary CE
 693 <1 l
 694 <2 l·   # l+middle dot has only a secondary difference from l
 695 =  z
 696 <2 u
 697
 698 * compare
 699 <1 (13)
 700 <3 ⒀  # DUCET sets special tertiary weights in all CEs
 701 <2 y
 702 <1 (13[
 703
 704 % alternate=shifted
 705 * compare
 706 <1 (13)
 707 =  13
 708 <3 ⒀
 709 =  y  # alternate=shifted removes the tailoring difference on the last CE
 710 <1 14
 711
 712 ** test: contraction inside extension, ICU ticket 9378
 713 @ rules
 714 &а<<х/й     # all letters are Cyrillic
 715 * compare
 716 <1 ай
 717 <2 х
 718
 719 ** test: no duplicate tailored CEs for different reset positions with same CEs, ICU ticket 10104
 720 @ rules
 721 &t<x &ᵀ<y           # same primary weights
 722 &q<u &[before 1]ꝗ<v # q and ꝗ are primary adjacent
 723 * compare
 724 <1 q
 725 <1 u
 726 <1 v
 727 <1 ꝗ
 728 <1 t
 729 <3 ᵀ
 730 <1 y
 731 <1 x
 732
 733 # Principle: Each rule builds on the state of preceding rules and ignores following rules.
 734
 735 ** test: later rule does not affect earlier reset position, ICU ticket 10105
 736 @ rules
 737 &a < u < v < w  &ov < x  &b < v
 738 * compare
 739 <1 oa
 740 <1 ou
 741 <1 x    # CE(o) followed by CE between u and w
 742 <1 ow
 743 <1 ob
 744 <1 ov
 745
 746 ** test: later rule does not affect earlier extension (1), ICU ticket 10105
 747 @ rules
 748 &a=x/b &v=b
 749 % strength=secondary
 750 * compare
 751 <1 B
 752 <1 c
 753 <1 v
 754 =  b
 755 * compare
 756 <1 AB
 757 =  x
 758 <1 ac
 759 <1 av
 760 =  ab
 761
 762 ** test: later rule does not affect earlier extension (2), ICU ticket 10105
 763 @ rules
 764 &a <<< c / e &g <<< e / l
 765 % strength=secondary
 766 * compare
 767 <1 AE
 768 =  c
 769 <2 æ
 770 <1 agl
 771 =  ae
 772
 773 ** test: later rule does not affect earlier extension (3), ICU ticket 10105
 774 @ rules
 775 &a = b / c  &d = c / e
 776 % strength=secondary
 777 * compare
 778 <1 AC  # C is still only tertiary different from the original c
 779 =  b
 780 <1 ade
 781 =  ac
 782
 783 ** test: extension contains tailored character, ICU ticket 10105
 784 @ rules
 785 &a=e &b=u/e
 786 * compare
 787 <1 a
 788 =  e
 789 <1 ba
 790 =  be
 791 =  u
 792
 793 ** test: add simple mappings for characters with root context
 794 @ rules
 795 &z=·    # middle dot has a prefix mapping in the CLDR root
 796 &n=и    # и (U+0438) has contractions in the root
 797 * compare
 798 <1 l
 799 <2 l·   # root mapping for l|· still works
 800 <1 z
 801 =  ·
 802 * compare
 803 <1 n
 804 =  и
 805 <1 И
 806 <1 и\u0306  # root mapping for й=и\u0306 still works
 807 =  й
 808 <3 Й
 809
 810 ** test: add context mappings around characters with root context
 811 @ rules
 812 &z=·h   # middle dot has a prefix mapping in the CLDR root
 813 &n=ә|и  # и (U+0438) has contractions in the root
 814 * compare
 815 <1 l
 816 <2 l·   # root mapping for l|· still works
 817 <1 z
 818 =  ·h
 819 * compare
 820 <1 и
 821 <3 И
 822 <1 и\u0306  # root mapping for й=и\u0306 still works
 823 =  й
 824 * compare
 825 <1 әn
 826 =  әи
 827 <1 әo
 828
 829 ** test: many secondary CEs at the top of their range
 830 @ rules
 831 &[last primary ignorable]<<*\u2801-\u28ff
 832 * compare
 833 <2 \u0308
 834 <2 \u2801
 835 <2 \u2802
 836 <2 \u2803
 837 <2 \u2804
 838 <2 \u28fd
 839 <2 \u28fe
 840 <2 \u28ff
 841 <1 \x20
 842
 843 ** test: many tertiary CEs at the top of their range
 844 @ rules
 845 &[last secondary ignorable]<<<*a-z
 846 * compare
 847 <3 a
 848 <3 b
 849 <3 c
 850 <3 d
 851 # e..w
 852 <3 x
 853 <3 y
 854 <3 z
 855 <2 \u0308
 856
 857 ** test: tailor contraction together with nearly equivalent prefix, ICU ticket 10101
 858 @ rules
 859 &a=p|x &b=px &c=op
 860 * compare
 861 <1 b
 862 =  px
 863 <3 B
 864 <1 c
 865 =  op
 866 <3 C
 867 * compare
 868 <1 ca
 869 =  opx  # first contraction op, then prefix p|x
 870 <3 cA
 871 <3 Ca
 872
 873 ** test: reset position with prefix (pre-context), ICU ticket 10102
 874 @ rules
 875 &a=p|x &px=y
 876 * compare
 877 <1 pa
 878 =  px
 879 =  y
 880 <3 pA
 881 <1 q
 882 <1 x
 883
 884 ** test: prefix+contraction together (1), ICU ticket 10071
 885 @ rules
 886 &x=a|bc
 887 * compare
 888 <1 ab
 889 <1 Abc
 890 <1 abd
 891 <1 ac
 892 <1 aw
 893 <1 ax
 894 =  abc
 895 <3 aX
 896 <3 Ax
 897 <1 b
 898 <1 bb
 899 <1 bc
 900 <3 bC
 901 <3 Bc
 902 <1 bd
 903
 904 ** test: prefix+contraction together (2), ICU ticket 10071
 905 @ rules
 906 &w=bc &x=a|b
 907 * compare
 908 <1 w
 909 =  bc
 910 <3 W
 911 * compare
 912 <1 aw
 913 <1 ax
 914 =  ab
 915 <3 aX
 916 <1 axb
 917 <1 axc
 918 =  abc  # prefix match a|b takes precedence over contraction match bc
 919 <3 abC
 920 <1 abd
 921 <1 ay
 922
 923 ** test: prefix+contraction together (3), ICU ticket 10071
 924 @ rules
 925 &x=a|b &w=bc    # reverse order of rules as previous test, order should not matter here
 926 * compare       # same "compare" sequences as previous test
 927 <1 w
 928 =  bc
 929 <3 W
 930 * compare
 931 <1 aw
 932 <1 ax
 933 =  ab
 934 <3 aX
 935 <1 axb
 936 <1 axc
 937 =  abc  # prefix match a|b takes precedence over contraction match bc
 938 <3 abC
 939 <1 abd
 940 <1 ay
 941
 942 ** test: no mapping p|c, falls back to contraction ch, CLDR ticket 5962
 943 @ rules
 944 &d=ch &v=p|ci
 945 * compare
 946 <1 pc
 947 <3 pC
 948 <1 pcH
 949 <1 pcI
 950 <1 pd
 951 =  pch  # no-prefix contraction ch matches
 952 <3 pD
 953 <1 pv
 954 =  pci  # prefix+contraction p|ci matches
 955 <3 pV
 956
 957 ** test: tailor in & around compact ranges of root primaries
 958 # The Ogham characters U+1681..U+169A are in simple ascending order of primary CEs
 959 # which should be reliably encoded as one range in the root elements data.
 960 @ rules
 961 &[before 1]ᚁ<a
 962 &ᚁ<b
 963 &[before 1]ᚂ<c
 964 &ᚂ<d
 965 &[before 1]ᚚ<y
 966 &ᚚ<z
 967 &[before 2]ᚁ<<r
 968 &ᚁ<<s
 969 &[before 3]ᚚ<<<t
 970 &ᚚ<<<u
 971 * compare
 972 <1 ᣵ    # U+18F5 last Canadian Aboriginal
 973 <1 a
 974 <1 r
 975 <2 ᚁ
 976 <2 s
 977 <1 b
 978 <1 c
 979 <1 ᚂ
 980 <1 d
 981 <1 ᚃ
 982 <1 ᚙ
 983 <1 y
 984 <1 t
 985 <3 ᚚ
 986 <3 u
 987 <1 z
 988 <1 ᚠ    # U+16A0 first Runic
 989
 990 ** test: suppressContractions
 991 @ rules
 992 &z<ch<әж [suppressContractions [·cә]]
 993 * compare
 994 <1 ch
 995 <3 cH   # ch was suppressed
 996 <1 l
 997 <1 l·   # primary difference, not secondary, because l|· was suppressed
 998 <1 ә
 999 <2 ә\u0308  # secondary difference, not primary, because contractions for ә were suppressed
1000 <1 әж
1001 <3 әЖ
1002
1003 ** test: Hangul & Jamo
1004 @ rules
1005 &L=\u1100  # first Jamo L
1006 &V=\u1161  # first Jamo V
1007 &T=\u11A8  # first Jamo T
1008 &\uAC01<<*\u4E00-\u4EFF  # first Hangul LVT syllable & lots of secondary diffs
1009 * compare
1010 <1 Lv
1011 <3 LV
1012 =  \u1100\u1161
1013 =  \uAC00
1014 <1 LVt
1015 <3 LVT
1016 =  \u1100\u1161\u11A8
1017 =  \uAC00\u11A8
1018 =  \uAC01
1019 <2 LVT\u0308
1020 <2 \u4E00
1021 <2 \u4E01
1022 <2 \u4E80
1023 <2 \u4EFF
1024 <2 LV\u0308T
1025 <1 \uAC02
1026
1027 ** test: adjust special reset positions according to previous rules, CLDR ticket 6070
1028 @ rules
1029 &[last variable]<x
1030 [maxVariable space]  # has effect only after building, no effect on following rules
1031 &[last variable]<y
1032 &[before 1][first regular]<z
1033 * compare
1034 <1 ?  # some punctuation
1035 <1 x
1036 <1 y
1037 <1 z
1038 <1 $  # some symbol
1039
1040 @ rules
1041 &[last primary ignorable]<<x<<<y
1042 &[last primary ignorable]<<z
1043 * compare
1044 <2 \u0358
1045 <2 x
1046 <3 y
1047 <2 z
1048 <1 \x20
1049
1050 @ rules
1051 &[last secondary ignorable]<<<x
1052 &[last secondary ignorable]<<<y
1053 * compare
1054 <3 x
1055 <3 y
1056 <2 \u0358
1057
1058 @ rules
1059 &[before 2][first variable]<<z
1060 &[before 2][first variable]<<y
1061 &[before 3][first variable]<<<x
1062 &[before 3][first variable]<<<w
1063 &[before 1][first variable]<v
1064 &[before 2][first variable]<<u
1065 &[before 3][first variable]<<<t
1066 &[before 2]\uFDD1\xA0<<s  # FractionalUCA.txt: FDD1 00A0, SPACE first primary
1067 * compare
1068 <2 \u0358
1069 <1 s
1070 <2 \uFDD1\xA0
1071 <1 t
1072 <3 u
1073 <2 v
1074 <1 w
1075 <3 x
1076 <3 y
1077 <2 z
1078 <2 \t
1079
1080 @ rules
1081 &[before 2][first regular]<<z
1082 &[before 3][first regular]<<<y
1083 &[before 1][first regular]<x
1084 &[before 3][first regular]<<<w
1085 &[before 2]\uFDD1\u263A<<v  # FractionalUCA.txt: FDD1 263A, SYMBOL first primary
1086 &[before 3][first regular]<<<u
1087 &[before 1][first regular]<p  # primary before the boundary: becomes variable
1088 &[before 3][first regular]<<<t  # not affected by p
1089 &[last variable]<q              # after p!
1090 * compare
1091 <1 ?
1092 <1 p
1093 <1 q
1094 <1 t
1095 <3 u
1096 <3 v
1097 <1 w
1098 <3 x
1099 <1 y
1100 <3 z
1101 <1 $
1102
1103 # check that p & q are indeed variable
1104 % alternate=shifted
1105 * compare
1106 =  ?
1107 =  p
1108 =  q
1109 <1 t
1110 <3 u
1111 <3 v
1112 <1 w
1113 <3 x
1114 <1 y
1115 <3 z
1116 <1 $
1117
1118 @ rules
1119 &[before 2][first trailing]<<z
1120 &[before 1][first trailing]<y
1121 &[before 3][first trailing]<<<x
1122 * compare
1123 <1 \u4E00  # first Han, first implicit
1124 <1 \uFDD1\uFDD0  # FractionalUCA.txt: unassigned first primary
1125 # Note: The root collator currently does not map any characters to the trailing first boundary primary.
1126 <1 x
1127 <3 y
1128 <1 z
1129 <2 \uFFFD  # The root collator currently maps U+FFFD to the first real trailing primary.
1130
1131 @ rules
1132 &[before 2][first primary ignorable]<<z
1133 &[before 2][first primary ignorable]<<y
1134 &[before 3][first primary ignorable]<<<x
1135 &[before 3][first primary ignorable]<<<w
1136 * compare
1137 =  \x01
1138 <2 w
1139 <3 x
1140 <3 y
1141 <2 z
1142 <2 \u0301
1143
1144 @ rules
1145 &[before 3][first secondary ignorable]<<<y
1146 &[before 3][first secondary ignorable]<<<x
1147 * compare
1148 =  \x01
1149 <3 x
1150 <3 y
1151 <2 \u0301
1152
1153 ** test: canonical closure
1154 @ rules
1155 &X=A &U=Â
1156 * compare
1157 <1 U
1158 =  Â
1159 =  A\u0302
1160 <2 Ú  # U with acute
1161 =  U\u0301
1162 =  Ấ  # A with circumflex & acute
1163 =  Â\u0301
1164 =  A\u0302\u0301
1165 <1 X
1166 =  A
1167 <2 X\u030A  # with ring above
1168 =  Å
1169 =  A\u030A
1170 =  \u212B  # Angstrom sign
1171
1172 @ rules
1173 &x=\u5140\u55C0
1174 * compare
1175 <1 x
1176 =  \u5140\u55C0
1177 =  \u5140\uFA0D
1178 =  \uFA0C\u55C0
1179 =  \uFA0C\uFA0D  # CJK compatibility characters
1180 <3 X
1181
1182 # canonical closure on prefix rules, ICU ticket 9444
1183 @ rules
1184 &x=ä|ŝ
1185 * compare
1186 <1 äs  # not tailored
1187 <1 äx
1188 =  äŝ
1189 =  a\u0308s\u0302
1190 =  a\u0308ŝ
1191 =  äs\u0302
1192 <3 äX
1193
1194 ** test: conjoining Jamo map to expansions
1195 @ rules
1196 &gg=\u1101  # Jamo Lead consonant GG
1197 &nj=\u11AC  # Jamo Trail consonant NJ
1198 * compare
1199 <1 gg\u1161nj
1200 =  \u1101\u1161\u11AC
1201 =  \uAE4C\u11AC
1202 =  \uAE51
1203 <3 gg\u1161nJ
1204 <1 \u1100\u1100
1205
1206 ** test: canonical tail closure, ICU ticket 5913
1207 @ rules
1208 &a<â
1209 * compare
1210 <1 a
1211 <1 â              # tailored
1212 =  a\u0302
1213 <2 a\u0323\u0302  # discontiguous contraction
1214 =  ạ\u0302        # equivalent
1215 =  ậ              # equivalent
1216 <1 b
1217
1218 @ rules
1219 &a<ạ
1220 * compare
1221 <1 a
1222 <1 ạ              # tailored
1223 =  a\u0323
1224 <2 a\u0323\u0302  # contiguous contraction plus extra diacritic
1225 =  ạ\u0302        # equivalent
1226 =  ậ              # equivalent
1227 <1 b
1228
1229 # Tail closure should work even if there is a prefix and/or contraction.
1230 @ rules
1231 &a<\u5140|câ
1232 # In order to find discontiguous contractions for \u5140|câ
1233 # there must exist a mapping for \u5140|ca, regardless of what it maps to.
1234 # (This follows from the UCA spec.)
1235 &x=\u5140|ca
1236 * compare
1237 <1 \u5140a
1238 =  \uFA0Ca
1239 <1 \u5140câ              # tailored
1240 =  \uFA0Ccâ
1241 =  \u5140ca\u0302
1242 =  \uFA0Cca\u0302
1243 <2 \u5140ca\u0323\u0302  # discontiguous contraction
1244 =  \uFA0Cca\u0323\u0302
1245 =  \u5140cạ\u0302
1246 =  \uFA0Ccạ\u0302
1247 =  \u5140cậ
1248 =  \uFA0Ccậ
1249 <1 \u5140b
1250 =  \uFA0Cb
1251 <1 \u5140x
1252 =  \u5140ca
1253
1254 # Double-check that without the extra mapping there will be no discontiguous match.
1255 @ rules
1256 &a<\u5140|câ
1257 * compare
1258 <1 \u5140a
1259 =  \uFA0Ca
1260 <1 \u5140câ              # tailored
1261 =  \uFA0Ccâ
1262 =  \u5140ca\u0302
1263 =  \uFA0Cca\u0302
1264 <1 \u5140b
1265 =  \uFA0Cb
1266 <1 \u5140ca\u0323\u0302  # no discontiguous contraction
1267 =  \uFA0Cca\u0323\u0302
1268 =  \u5140cạ\u0302
1269 =  \uFA0Ccạ\u0302
1270 =  \u5140cậ
1271 =  \uFA0Ccậ
1272
1273 @ rules
1274 &a<cạ
1275 * compare
1276 <1 a
1277 <1 cạ              # tailored
1278 =  ca\u0323
1279 <2 ca\u0323\u0302  # contiguous contraction plus extra diacritic
1280 =  cạ\u0302        # equivalent
1281 =  cậ              # equivalent
1282 <1 b
1283
1284 # ᾢ = U+1FA2 GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1285 #   = 03C9 0313 0300 0345
1286 # ccc = 0, 230, 230, 240
1287 @ rules
1288 &δ=αῳ
1289 # In order to find discontiguous contractions for αῳ
1290 # there must exist a mapping for αω, regardless of what it maps to.
1291 # (This follows from the UCA spec.)
1292 &ε=αω
1293 * compare
1294 <1 δ
1295 =  αῳ
1296 =  αω\u0345
1297 <2 αω\u0313\u0300\u0345  # discontiguous contraction
1298 =  αὠ\u0300\u0345
1299 =  αὢ\u0345
1300 =  αᾢ
1301 <2 αω\u0300\u0313\u0345
1302 =  αὼ\u0313\u0345
1303 =  αῲ\u0313  # not FCD
1304 <1 ε
1305 =  αω
1306
1307 # Double-check that without the extra mapping there will be no discontiguous match.
1308 @ rules
1309 &δ=αῳ
1310 * compare
1311 <1 αω\u0313\u0300\u0345  # no discontiguous contraction
1312 =  αὠ\u0300\u0345
1313 =  αὢ\u0345
1314 =  αᾢ
1315 <2 αω\u0300\u0313\u0345
1316 =  αὼ\u0313\u0345
1317 =  αῲ\u0313  # not FCD
1318 <1 δ
1319 =  αῳ
1320 =  αω\u0345
1321
1322 # Add U+0315 COMBINING COMMA ABOVE RIGHT which has ccc=232.
1323 # Tests code paths where the tailored string has a combining mark
1324 # that does not occur in any composite's decomposition.
1325 @ rules
1326 &δ=αὼ\u0315
1327 * compare
1328 <1 αω\u0313\u0300\u0315  # Not tailored: The grave accent blocks the comma above.
1329 =  αὠ\u0300\u0315
1330 =  αὢ\u0315
1331 <1 δ
1332 =  αὼ\u0315
1333 =  αω\u0300\u0315
1334 <2 αω\u0300\u0315\u0345
1335 =  αὼ\u0315\u0345
1336 =  αῲ\u0315  # not FCD
1337
1338 ** test: danish a+a vs. a-umlaut, ICU ticket 9319
1339 @ rules
1340 &z<aa
1341 * compare
1342 <1 z
1343 <1 aa
1344 <2 aa\u0308
1345 =  aä
1346
1347 ** test: Jamo L with and in prefix
1348 # Useful for the Korean "searchjl" tailoring (instead of contractions of pairs of Jamo L).
1349 @ rules
1350 # Jamo Lead consonant G after G or GG
1351 &[last primary ignorable]<<\u1100|\u1100=\u1101|\u1100
1352 # Jamo Lead consonant GG sorts like G+G
1353 &\u1100\u1100=\u1101
1354 # Note: Making G|GG and GG|GG sort the same as G|G+G
1355 # would require the ability to reset on G|G+G,
1356 # or we could make G-after-G equal to some secondary-CE character,
1357 # and reset on a pair of those.
1358 # (It does not matter much if there are at most two G in a row in real text.)
1359 * compare
1360 <1 \u1100
1361 <2 \u1100\u1100  # only one primary from a sequence of G lead consonants
1362 =  \u1101
1363 <2 \u1100\u1100\u1100
1364 =  \u1101\u1100
1365 # but not = \u1100\u1101, see above
1366 <1 \u1100\u1161
1367 =  \uAC00
1368 <2 \u1100\u1100\u1161
1369 =  \u1100\uAC00  # prefix match from the L of the LV syllable
1370 =  \u1101\u1161
1371 =  \uAE4C
1372
1373 ** test: proposed Korean "searchjl" tailoring with prefixes, CLDR ticket 6546
1374 @ rules
1375 # Low secondary CEs for Jamo V & T.
1376 # Note: T should sort before V for proper syllable order.
1377 &\u0332  # COMBINING LOW LINE (first primary ignorable)
1378 <<\u1161<<\u1162
1379
1380 # Korean Jamo lead consonant search rules, part 2:
1381 # Make modern compound L jamo primary equivalent to non-compound forms.
1382
1383 # Secondary CEs for Jamo L-after-L, greater than Jamo V & T.
1384 &\u0313  # COMBINING COMMA ABOVE (second primary ignorable)
1385 =\u1100|\u1100
1386 =\u1103|\u1103
1387 =\u1107|\u1107
1388 =\u1109|\u1109
1389 =\u110C|\u110C
1390
1391 # Compound L Jamo map to equivalent expansions of primary+secondary CE.
1392 &\u1100\u0313=\u1101<<<\u3132  # HANGUL CHOSEONG SSANGKIYEOK, HANGUL LETTER SSANGKIYEOK
1393 &\u1103\u0313=\u1104<<<\u3138  # HANGUL CHOSEONG SSANGTIKEUT, HANGUL LETTER SSANGTIKEUT
1394 &\u1107\u0313=\u1108<<<\u3143  # HANGUL CHOSEONG SSANGPIEUP, HANGUL LETTER SSANGPIEUP
1395 &\u1109\u0313=\u110A<<<\u3146  # HANGUL CHOSEONG SSANGSIOS, HANGUL LETTER SSANGSIOS
1396 &\u110C\u0313=\u110D<<<\u3149  # HANGUL CHOSEONG SSANGCIEUC, HANGUL LETTER SSANGCIEUC
1397
1398 * compare
1399 <1 \u1100\u1161
1400 =  \uAC00
1401 <2 \u1100\u1162
1402 =  \uAC1C
1403 <2 \u1100\u1100\u1161
1404 =  \u1100\uAC00
1405 =  \u1101\u1161
1406 =  \uAE4C
1407 <3 \u3132\u1161
1408
1409 ** test: Hangul syllables in prefix & in the interior of a contraction
1410 @ rules
1411 &x=\u1100\u1161|a\u1102\u1162z
1412 * compare
1413 <1 \u1100\u1161x
1414 =  \u1100\u1161a\u1102\u1162z
1415 =  \u1100\u1161a\uB0B4z
1416 =  \uAC00a\u1102\u1162z
1417 =  \uAC00a\uB0B4z
1418
1419 ** test: digits are unsafe-backwards when numeric=on
1420 @ root
1421 % numeric=on
1422 * compare
1423 # If digits are not unsafe, then numeric collation sees "1"=="01" and "b">"a".
1424 # We need to back up before the identical prefix "1" and compare the full numbers.
1425 <1 11b
1426 <1 101a
1427
1428 ** test: simple locale data test
1429 @ locale de
1430 * compare
1431 <1 a
1432 <2 ä
1433 <1 ae
1434 <2 æ
1435
1436 @ locale de-u-co-phonebk
1437 * compare
1438 <1 a
1439 <1 ae
1440 <2 ä
1441 <2 æ
1442
1443 # The following test cases were moved here from ICU 52's DataDrivenCollationTest.txt.
1444
1445 ** test: DataDrivenCollationTest/TestMorePinyin
1446 # Testing the primary strength.
1447 @ locale zh
1448 % strength=primary
1449 * compare
1450 < lā
1451 = lĀ
1452 = Lā
1453 = LĀ
1454 < lān
1455 = lĀn
1456 < lē
1457 = lĒ
1458 = Lē
1459 = LĒ
1460 < lēn
1461 = lĒn
1462
1463 ** test: DataDrivenCollationTest/TestLithuanian
1464 # Lithuanian sort order.
1465 @ locale lt
1466 * compare
1467 < cz
1468 < č
1469 < d
1470 < iz
1471 < j
1472 < sz
1473 < š
1474 < t
1475 < zz
1476 < ž
1477
1478 ** test: DataDrivenCollationTest/TestLatvian
1479 # Latvian sort order.
1480 @ locale lv
1481 * compare
1482 < cz
1483 < č
1484 < d
1485 < gz
1486 < ģ
1487 < h
1488 < iz
1489 < j
1490 < kz
1491 < ķ
1492 < l
1493 < lz
1494 < ļ
1495 < m
1496 < nz
1497 < ņ
1498 < o
1499 < rz
1500 < ŗ
1501 < s
1502 < sz
1503 < š
1504 < t
1505 < zz
1506 < ž
1507
1508 ** test: DataDrivenCollationTest/TestEstonian
1509 # Estonian sort order.
1510 @ locale et
1511 * compare
1512 < sy
1513 < š
1514 < šy
1515 < z
1516 < zy
1517 < ž
1518 < v
1519 < va
1520 < w
1521 < õ
1522 < õy
1523 < ä
1524 < äy
1525 < ö
1526 < öy
1527 < ü
1528 < üy
1529 < x
1530
1531 ** test: DataDrivenCollationTest/TestAlbanian
1532 # Albanian sort order.
1533 @ locale sq
1534 * compare
1535 < cz
1536 < ç
1537 < d
1538 < dz
1539 < dh
1540 < e
1541 < ez
1542 < ë
1543 < f
1544 < gz
1545 < gj
1546 < h
1547 < lz
1548 < ll
1549 < m
1550 < nz
1551 < nj
1552 < o
1553 < rz
1554 < rr
1555 < s
1556 < sz
1557 < sh
1558 < t
1559 < tz
1560 < th
1561 < u
1562 < xz
1563 < xh
1564 < y
1565 < zz
1566 < zh
1567
1568 ** test: DataDrivenCollationTest/TestSimplifiedChineseOrder
1569 # Sorted file has different order.
1570 @ root
1571 # normalization=on turned on & off automatically.
1572 * compare
1573 < \u5F20
1574 < \u5F20\u4E00\u8E3F
1575
1576 ** test: DataDrivenCollationTest/TestTibetanNormalizedIterativeCrash
1577 # This pretty much crashes.
1578 @ root
1579 * compare
1580 < \u0f71\u0f72\u0f80\u0f71\u0f72
1581 < \u0f80
1582
1583 ** test: DataDrivenCollationTest/TestThaiPartialSortKeyProblems
1584 # These are examples of strings that caused trouble in partial sort key testing.
1585 @ locale th-TH
1586 * compare
1587 < \u0E01\u0E01\u0E38\u0E18\u0E20\u0E31\u0E13\u0E11\u0E4C
1588 < \u0E01\u0E01\u0E38\u0E2A\u0E31\u0E19\u0E42\u0E18
1589 * compare
1590 < \u0E01\u0E07\u0E01\u0E32\u0E23
1591 < \u0E01\u0E07\u0E42\u0E01\u0E49
1592 * compare
1593 < \u0E01\u0E23\u0E19\u0E17\u0E32
1594 < \u0E01\u0E23\u0E19\u0E19\u0E40\u0E0A\u0E49\u0E32
1595 * compare
1596 < \u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E22\u0E27
1597 < \u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E4A\u0E22\u0E27
1598 * compare
1599 < \u0E01\u0E23\u0E23\u0E40\u0E0A\u0E2D
1600 < \u0E01\u0E23\u0E23\u0E40\u0E0A\u0E49\u0E32
1601
1602 ** test: DataDrivenCollationTest/TestJavaStyleRule
1603 # java.text allows rules to start as '<<<x<<<y...'
1604 # we emulate this by assuming a &[first tertiary ignorable] in this case.
1605 @ rules
1606 &\u0001=equal<<<z<<x<<<w &[first tertiary ignorable]=a &[first primary ignorable]=b
1607 * compare
1608 = a
1609 = equal
1610 < z
1611 < x
1612 = b  # x had become the new first primary ignorable
1613 < w
1614
1615 ** test: DataDrivenCollationTest/TestShiftedIgnorable
1616 # The UCA states that primary ignorables should be completely
1617 # ignorable when following a shifted code point.
1618 @ root
1619 % alternate=shifted
1620 % strength=quaternary
1621 * compare
1622 < a\u0020b
1623 = a\u0020\u0300b
1624 = a\u0020\u0301b
1625 < a_b
1626 = a_\u0300b
1627 = a_\u0301b
1628 < A\u0020b
1629 = A\u0020\u0300b
1630 = A\u0020\u0301b
1631 < A_b
1632 = A_\u0300b
1633 = A_\u0301b
1634 < a\u0301b
1635 < A\u0301b
1636 < a\u0300b
1637 < A\u0300b
1638
1639 ** test: DataDrivenCollationTest/TestNShiftedIgnorable
1640 # The UCA states that primary ignorables should be completely
1641 # ignorable when following a shifted code point.
1642 @ root
1643 % alternate=non-ignorable
1644 % strength=tertiary
1645 * compare
1646 < a\u0020b
1647 < A\u0020b
1648 < a\u0020\u0301b
1649 < A\u0020\u0301b
1650 < a\u0020\u0300b
1651 < A\u0020\u0300b
1652 < a_b
1653 < A_b
1654 < a_\u0301b
1655 < A_\u0301b
1656 < a_\u0300b
1657 < A_\u0300b
1658 < a\u0301b
1659 < A\u0301b
1660 < a\u0300b
1661 < A\u0300b
1662
1663 ** test: DataDrivenCollationTest/TestSafeSurrogates
1664 # It turned out that surrogates were not skipped properly
1665 # when iterating backwards if they were in the middle of a
1666 # contraction. This test assures that this is fixed.
1667 @ rules
1668 &a < x\ud800\udc00b
1669 * compare
1670 < a
1671 < x\ud800\udc00b
1672
1673 ** test: DataDrivenCollationTest/da_TestPrimary
1674 # This test goes through primary strength cases
1675 @ locale da
1676 % strength=primary
1677 * compare
1678 < Lvi
1679 < Lwi
1680 * compare
1681 < L\u00e4vi
1682 < L\u00f6wi
1683 * compare
1684 < L\u00fcbeck
1685 = Lybeck
1686
1687 ** test: DataDrivenCollationTest/da_TestTertiary
1688 # This test goes through tertiary strength cases
1689 @ locale da
1690 % strength=tertiary
1691 * compare
1692 < Luc
1693 < luck
1694 * compare
1695 < luck
1696 < L\u00fcbeck
1697 * compare
1698 < lybeck
1699 < L\u00fcbeck
1700 * compare
1701 < L\u00e4vi
1702 < L\u00f6we
1703 * compare
1704 < L\u00f6ww
1705 < mast
1706
1707 * compare
1708 < A/S
1709 < ANDRE
1710 < ANDR\u00c9
1711 < ANDREAS
1712 < AS
1713 < CA
1714 < \u00c7A
1715 < CB
1716 < \u00c7C
1717 < D.S.B.
1718 < DA
1719 < \u00d0A
1720 < DB
1721 < \u00d0C
1722 < DSB
1723 < DSC
1724 < EKSTRA_ARBEJDE
1725 < EKSTRABUD0
1726 < H\u00d8ST
1727 < HAAG
1728 < H\u00c5NDBOG
1729 < HAANDV\u00c6RKSBANKEN
1730 < Karl
1731 < karl
1732 < NIELS\u0020J\u00d8RGEN
1733 < NIELS-J\u00d8RGEN
1734 < NIELSEN
1735 < R\u00c9E,\u0020A
1736 < REE,\u0020B
1737 < R\u00c9E,\u0020L
1738 < REE,\u0020V
1739 < SCHYTT,\u0020B
1740 < SCHYTT,\u0020H
1741 < SCH\u00dcTT,\u0020H
1742 < SCHYTT,\u0020L
1743 < SCH\u00dcTT,\u0020M
1744 < SS
1745 < \u00df
1746 < SSA
1747 < STORE\u0020VILDMOSE
1748 < STOREK\u00c6R0
1749 < STORM\u0020PETERSEN
1750 < STORMLY
1751 < THORVALD
1752 < THORVARDUR
1753 < \u00feORVAR\u00d0UR
1754 < THYGESEN
1755 < VESTERG\u00c5RD,\u0020A
1756 < VESTERGAARD,\u0020A
1757 < VESTERG\u00c5RD,\u0020B
1758 < \u00c6BLE
1759 < \u00c4BLE
1760 < \u00d8BERG
1761 < \u00d6BERG
1762
1763 * compare
1764 < andere
1765 < chaque
1766 < chemin
1767 < cote
1768 < cot\u00e9
1769 < c\u00f4te
1770 < c\u00f4t\u00e9
1771 < \u010du\u010d\u0113t
1772 < Czech
1773 < hi\u0161a
1774 < irdisch
1775 < lie
1776 < lire
1777 < llama
1778 < l\u00f5ug
1779 < l\u00f2za
1780 < lu\u010d
1781 < luck
1782 < L\u00fcbeck
1783 < lye
1784 < l\u00e4vi
1785 < L\u00f6wen
1786 < m\u00e0\u0161ta
1787 < m\u00eer
1788 < myndig
1789 < M\u00e4nner
1790 < m\u00f6chten
1791 < pi\u00f1a
1792 < pint
1793 < pylon
1794 < \u0161\u00e0ran
1795 < savoir
1796 < \u0160erb\u016bra
1797 < Sietla
1798 < \u015blub
1799 < subtle
1800 < symbol
1801 < s\u00e4mtlich
1802 < verkehrt
1803 < vox
1804 < v\u00e4ga
1805 < waffle
1806 < wood
1807 < yen
1808 < yuan
1809 < yucca
1810 < \u017eal
1811 < \u017eena
1812 < \u017den\u0113va
1813 < zoo0
1814 < Zviedrija
1815 < Z\u00fcrich
1816 < zysk0
1817 < \u00e4ndere
1818
1819 ** test: DataDrivenCollationTest/hi_TestNewRules
1820 # This test goes through new rules and tests against old rules
1821 @ locale hi
1822 * compare
1823 < कॐ
1824 < कं
1825 < कँ
1826 < कः
1827
1828 ** test: DataDrivenCollationTest/ro_TestNewRules
1829 # This test goes through new rules and tests against old rules
1830 @ locale ro
1831 * compare
1832 < xAx
1833 < xă
1834 < xĂ
1835 < Xă
1836 < XĂ
1837 < xăx
1838 < xĂx
1839 < xâ
1840 < xÂ
1841 < Xâ
1842 < XÂ
1843 < xâx
1844 < xÂx
1845 < xb
1846 < xIx
1847 < xî
1848 < xÎ
1849 < Xî
1850 < XÎ
1851 < xîx
1852 < xÎx
1853 < xj
1854 < xSx
1855 < xș
1856 = xş
1857 < xȘ
1858 = xŞ
1859 < Xș
1860 = Xş
1861 < XȘ
1862 = XŞ
1863 < xșx
1864 = xşx
1865 < xȘx
1866 = xŞx
1867 < xT
1868 < xTx
1869 < xț
1870 = xţ
1871 < xȚ
1872 = xŢ
1873 < Xț
1874 = Xţ
1875 < XȚ
1876 = XŢ
1877 < xțx
1878 = xţx
1879 < xȚx
1880 = xŢx
1881 < xU
1882
1883 ** test: DataDrivenCollationTest/testOffsets
1884 # This tests cases where forwards and backwards iteration get different offsets
1885 @ locale en
1886 % strength=tertiary
1887 * compare
1888 < a\uD800\uDC00\uDC00
1889 < b\uD800\uDC00\uDC00
1890 * compare
1891 < \u0301A\u0301\u0301
1892 < \u0301B\u0301\u0301
1893 * compare
1894 < abcd\r\u0301
1895 < abce\r\u0301
1896 # TODO: test offsets in new CollationTest
1897
1898 # End of test cases moved here from ICU 52's DataDrivenCollationTest.txt.
1899
1900 ** test: was ICU 52 cmsccoll/TestRedundantRules
1901 @ rules
1902 & a < b < c < d& [before 1] c < m
1903 * compare
1904 <1 a
1905 <1 b
1906 <1 m
1907 <1 c
1908 <1 d
1909
1910 @ rules
1911 & a < b <<< c << d <<< e& [before 3] e <<< x
1912 * compare
1913 <1 a
1914 <1 b
1915 <3 c
1916 <2 d
1917 <3 x
1918 <3 e
1919
1920 @ rules
1921 & a < b <<< c << d <<< e <<< f < g& [before 1] g < x
1922 * compare
1923 <1 a
1924 <1 b
1925 <3 c
1926 <2 d
1927 <3 e
1928 <3 f
1929 <1 x
1930 <1 g
1931
1932 @ rules
1933 & a <<< b << c < d& a < m
1934 * compare
1935 <1 a
1936 <3 b
1937 <2 c
1938 <1 m
1939 <1 d
1940
1941 @ rules
1942 &a<b<<b\u0301 &z<b
1943 * compare
1944 <1 a
1945 <1 b\u0301
1946 <1 z
1947 <1 b
1948
1949 @ rules
1950 &z<m<<<q<<<m
1951 * compare
1952 <1 z
1953 <1 q
1954 <3 m
1955
1956 @ rules
1957 &z<<<m<q<<<m
1958 * compare
1959 <1 z
1960 <1 q
1961 <3 m
1962
1963 @ rules
1964 & a < b < c < d& r < c
1965 * compare
1966 <1 a
1967 <1 b
1968 <1 d
1969 <1 r
1970 <1 c
1971
1972 @ rules
1973 & a < b < c < d& c < m
1974 * compare
1975 <1 a
1976 <1 b
1977 <1 c
1978 <1 m
1979 <1 d
1980
1981 @ rules
1982 & a < b < c < d& a < m
1983 * compare
1984 <1 a
1985 <1 m
1986 <1 b
1987 <1 c
1988 <1 d
1989
1990 ** test: was ICU 52 cmsccoll/TestExpansionSyntax
1991 # The following two rules should sort the particular list of strings the same.
1992 @ rules
1993 &AE <<< a << b <<< c &d <<< f
1994 * compare
1995 <1 AE
1996 <3 a
1997 <2 b
1998 <3 c
1999 <1 d
2000 <3 f
2001
2002 @ rules
2003 &A <<< a / E << b / E <<< c /E  &d <<< f
2004 * compare
2005 <1 AE
2006 <3 a
2007 <2 b
2008 <3 c
2009 <1 d
2010 <3 f
2011
2012 # The following two rules should sort the particular list of strings the same.
2013 @ rules
2014 &AE <<< a <<< b << c << d < e < f <<< g
2015 * compare
2016 <1 AE
2017 <3 a
2018 <3 b
2019 <2 c
2020 <2 d
2021 <1 e
2022 <1 f
2023 <3 g
2024
2025 @ rules
2026 &A <<< a / E <<< b / E << c / E << d / E < e < f <<< g
2027 * compare
2028 <1 AE
2029 <3 a
2030 <3 b
2031 <2 c
2032 <2 d
2033 <1 e
2034 <1 f
2035 <3 g
2036
2037 # The following two rules should sort the particular list of strings the same.
2038 @ rules
2039 &AE <<< B <<< C / D <<< F
2040 * compare
2041 <1 AE
2042 <3 B
2043 <3 F
2044 <1 AED
2045 <3 C
2046
2047 @ rules
2048 &A <<< B / E <<< C / ED <<< F / E
2049 * compare
2050 <1 AE
2051 <3 B
2052 <3 F
2053 <1 AED
2054 <3 C
2055
2056 ** test: never reorder trailing primaries
2057 @ root
2058 % reorder Zzzz Grek
2059 * compare
2060 <1 L
2061 <1 字
2062 <1 Ω
2063 <1 \uFFFD
2064 <1 \uFFFF
2065
2066 ** test: fall back to mappings with shorter prefixes, not immediately to ones with no prefixes
2067 @ rules
2068 &u=ab|cd
2069 &v=b|ce
2070 * compare
2071 <1 abc
2072 <1 abcc
2073 <1 abcf
2074 <1 abcd
2075 =  abu
2076 <1 abce
2077 =  abv
2078
2079 # With the following rules, there is only one prefix per composite ĉ or ç,
2080 # but both prefixes apply to just c in NFD form.
2081 # We would get different results for composed vs. NFD input
2082 # if we fell back directly from longest-prefix mappings to no-prefix mappings.
2083 @ rules
2084 &x=op|ĉ
2085 &y=p|ç
2086 * compare
2087 <1 opc
2088 <2 opć
2089 <1 opcz
2090 <1 opd
2091 <1 opĉ
2092 =  opc\u0302
2093 =  opx
2094 <1 opç
2095 =  opc\u0327
2096 =  opy
2097
2098 # The mapping is used which has the longest matching prefix for which
2099 # there is also a suffix match, with the longest suffix match among several for that prefix.
2100 @ rules
2101 &❶=d
2102 &❷=de
2103 &❸=def
2104 &①=c|d
2105 &②=c|de
2106 &③=c|def
2107 &④=bc|d
2108 &⑤=bc|de
2109 &⑥=bc|def
2110 &⑦=abc|d
2111 &⑧=abc|de
2112 &⑨=abc|def
2113 * compare
2114 <1 9aadzz
2115 =  9aa❶zz
2116 <1 9aadez
2117 =  9aa❷z
2118 <1 9aadef
2119 =  9aa❸
2120 <1 9acdzz
2121 =  9ac①zz
2122 <1 9acdez
2123 =  9ac②z
2124 <1 9acdef
2125 =  9ac③
2126 <1 9bcdzz
2127 =  9bc④zz
2128 <1 9bcdez
2129 =  9bc⑤z
2130 <1 9bcdef
2131 =  9bc⑥
2132 <1 abcdzz
2133 =  abc⑦zz
2134 <1 abcdez
2135 =  abc⑧z
2136 <1 abcdef
2137 =  abc⑨
2138
2139 ** test: prefix + discontiguous contraction with missing prefix contraction
2140 # Unfortunate terminology: The first "prefix" here is the pre-context,
2141 # the second "prefix" refers to the contraction/relation string that is
2142 # one shorter than the one being tested.
2143 @ rules
2144 &x=p|e
2145 &y=p|ê
2146 &z=op|ê
2147 # No mapping for op|e:
2148 # Discontiguous contraction matching should not match op|ê in opệ
2149 # because it would have to skip the dot below and extend a match on op|e by the circumflex,
2150 # but there is no match on op|e.
2151 * compare
2152 <1 oPe
2153 <1 ope
2154 =  opx
2155 <1 opệ
2156 =  opy\u0323  # y not z
2157 <1 opê
2158 =  opz
2159
2160 # We cannot test for fallback by whether the contraction default CE32
2161 # is for another contraction. With the following rules, there is no mapping for op|e,
2162 # and the fallback to prefix p has no contractions.
2163 @ rules
2164 &x=p|e
2165 &z=op|ê
2166 * compare
2167 <1 oPe
2168 <1 ope
2169 =  opx
2170 <2 opệ
2171 =  opx\u0323\u0302  # x not z
2172 <1 opê
2173 =  opz
2174
2175 # One more variation: Fallback to the simple code point, no shorter non-empty prefix.
2176 @ rules
2177 &x=e
2178 &z=op|ê
2179 * compare
2180 <1 ope
2181 =  opx
2182 <3 oPe
2183 =  oPx
2184 <2 opệ
2185 =  opx\u0323\u0302  # x not z
2186 <1 opê
2187 =  opz
2188
2189 ** test: maxVariable via rules
2190 @ rules
2191 [maxVariable space][alternate shifted]
2192 * compare
2193 =  \u0020
2194 =  \u000A
2195 <1 .
2196 <1 °  # degree sign
2197 <1 $
2198 <1 0
2199
2200 ** test: maxVariable via setting
2201 @ root
2202 % maxVariable=currency
2203 % alternate=shifted
2204 * compare
2205 =  \u0020
2206 =  \u000A
2207 =  .
2208 =  °  # degree sign
2209 =  $
2210 <1 0
2211
2212 ** test: ICU4J CollationMiscTest/TestContractionClosure (ää)
2213 # This tests canonical closure, but it also tests that CollationFastLatin
2214 # bails out properly for contractions with combining marks.
2215 # For that we need pairs of strings that remain in the Latin fastpath
2216 # long enough, hence the extra "= b" lines.
2217 @ rules
2218 &b=\u00e4\u00e4
2219 * compare
2220 <1 b
2221 =  \u00e4\u00e4
2222 =  b
2223 =  a\u0308a\u0308
2224 =  b
2225 =  \u00e4a\u0308
2226 =  b
2227 =  a\u0308\u00e4
2228
2229 ** test: ICU4J CollationMiscTest/TestContractionClosure (Å)
2230 @ rules
2231 &b=\u00C5
2232 * compare
2233 <1 b
2234 =  \u00C5
2235 =  b
2236 =  A\u030A
2237 =  b
2238 =  \u212B
2239
2240 ** test: reset-before on already-tailored characters, ICU ticket 10108
2241 @ rules
2242 &a<w<<x &[before 2]x<<y
2243 * compare
2244 <1 a
2245 <1 w
2246 <2 y
2247 <2 x
2248
2249 @ rules
2250 &a<<w<<<x &[before 2]x<<y
2251 * compare
2252 <1 a
2253 <2 y
2254 <2 w
2255 <3 x
2256
2257 @ rules
2258 &a<w<x &[before 2]x<<y
2259 * compare
2260 <1 a
2261 <1 w
2262 <1 y
2263 <2 x
2264
2265 @ rules
2266 &a<w<<<x &[before 2]x<<y
2267 * compare
2268 <1 a
2269 <1 y
2270 <2 w
2271 <3 x
2272
2273 ** test: numeric collation with other settings, ICU ticket 9092
2274 @ root
2275 % strength=identical
2276 % caseFirst=upper
2277 % numeric=on
2278 * compare
2279 <1 100\u0020a
2280 <1 101
2281
2282 ** test: collation type fallback from unsupported type, ICU ticket 10149
2283 @ locale fr-CA-u-co-phonebk
2284 # Expect the same result as with fr-CA, using backwards-secondary order.
2285 # That is, we should fall back from the unsupported collation type
2286 # to the locale's default collation type.
2287 * compare
2288 <1 cote
2289 <2 côte
2290 <2 coté
2291 <2 côté
2292
2293 ** test: @ is equivalent to [backwards 2], ICU ticket 9956
2294 @ rules
2295 &b<a @ &v<<w
2296 * compare
2297 <1 b
2298 <1 a
2299 <1 cote
2300 <2 côte
2301 <2 coté
2302 <2 côté
2303 <1 v
2304 <2 w
2305 <1 x
2306
2307 ** test: shifted+reordering, ICU ticket 9507
2308 @ root
2309 % reorder Grek punct space
2310 % alternate=shifted
2311 % strength=quaternary
2312 # Which primaries are "variable" should be determined without script reordering,
2313 # and then primaries should be reordered whether they are shifted to quaternary or not.
2314 * compare
2315 <4 (  # punctuation
2316 <4 )
2317 <4 \u0020  # space
2318 <1 `  # symbol
2319 <1 ^
2320 <1 $  # currency symbol
2321 <1 €
2322 <1 0  # numbers
2323 <1 ε  # Greek
2324 <1 e  # Latin
2325 <1 e(e
2326 <4 e)e
2327 <4 e\u0020e
2328 <4 ee
2329 <3 e(E
2330 <4 e)E
2331 <4 e\u0020E
2332 <4 eE
2333
2334 ** test: "uppercase first" could sort a string before its prefix, ICU ticket 9351
2335 @ rules
2336 &\u0001<<<b<<<B
2337 % caseFirst=upper
2338 * compare
2339 <1 aaa
2340 <3 aaaB
2341
2342 ** test: secondary+case ignores secondary ignorables, ICU ticket 9355
2343 @ rules
2344 &\u0001<<<b<<<B
2345 % strength=secondary
2346 % caseLevel=on
2347 * compare
2348 <1 a
2349 =  ab
2350 =  aB
2351
2352 ** test: custom collation rules involving tail of a contraction in Malayalam, ICU ticket 6328
2353 @ rules
2354 &[before 2] ൌ << ൗ  # U+0D57 << U+0D4C == 0D46+0D57
2355 * compare
2356 <1 ൗx
2357 <2 ൌx
2358 <1 ൗy
2359 <2 ൌy
2360
2361 ** test: quoted apostrophe in compact syntax, ICU ticket 8204
2362 @ rules
2363 &q<<*a''c
2364 * compare
2365 <1 d
2366 <1 p
2367 <1 q
2368 <2 a
2369 <2 \u0027
2370 <2 c
2371 <1 r
2372
2373 # ICU ticket #8260 "Support all collation-related keywords in Collator.getInstance()"
2374 ** test: locale -u- with collation keywords, ICU ticket 8260
2375 @ locale de-u-kv-sPace-ka-shifTed-kn-kk-falsE-kf-Upper-kc-tRue-ks-leVel4
2376 * compare
2377 <4 \u0020  # space is shifted, strength=quaternary
2378 <1 !  # punctuation is regular
2379 <1 2
2380 <1 12  # numeric sorting
2381 <1 B
2382 <c b  # uppercase first on case level
2383 <1 x\u0301\u0308
2384 <2 x\u0308\u0301  # normalization off
2385
2386 ** test: locale @ with collation keywords, ICU ticket 8260
2387 @ locale fr@colbAckwards=yes;ColStrength=Quaternary;kv=currencY;colalternate=shifted
2388 * compare
2389 <4 $  # currency symbols are shifted, strength=quaternary
2390 <1 àla
2391 <2 alà  # backwards secondary level
2392
2393 ** test: locale -u- with script reordering, ICU ticket 8260
2394 @ locale el-u-kr-kana-SYMBOL-Grek-hani-cyrl-latn-digit-armn-deva-ethi-thai
2395 * compare
2396 <1 \u0020
2397 <1 あ
2398 <1 ☂
2399 <1 Ω
2400 <1 丂
2401 <1 ж
2402 <1 L
2403 <1 4
2404 <1 Ձ
2405 <1 अ
2406 <1 ሄ
2407 <1 ฉ
2408
2409 ** test: locale @collation=type should be case-insensitive
2410 @ locale de@coLLation=PhoneBook
2411 * compare
2412 <1 ae
2413 <2 ä
2414 <3 Ä
2415
2416 ** test: import root search rules plus German phonebook rules, ICU ticket 8962
2417 @ locale de-u-co-search
2418 * compare
2419 <1 =
2420 <1 ≠
2421 <1 a
2422 <1 ae
2423 <2 ä
2424
2425 # Once more, but with runtime builder.
2426 @ rules
2427 [import und-u-co-search][import de-u-co-phonebk]
2428 * compare
2429 <1 =
2430 <1 ≠
2431 <1 a
2432 <1 ae
2433 <2 ä
2434
2435 # Once again, with import from "root" not "und" (as in a proper language tag).
2436 @ rules
2437 [import root-u-co-search][import de-u-co-phonebk]
2438 * compare
2439 <1 =
2440 <1 ≠
2441 <1 a
2442 <1 ae
2443 <2 ä
2444
2445 ** test: import rules from a language with non-Latin native script, and reset the reordering, ICU ticket 10998
2446 # Greek should sort Greek first.
2447 @ rules
2448 [import el]
2449 * compare
2450 <1 4
2451 <1 Ω
2452 <1 L
2453
2454 # Import Greek, and then reset the reordering.
2455 @ rules
2456 [import el][reorder Zzzz]
2457 * compare
2458 <1 4
2459 <1 L
2460 <1 Ω
2461
2462 # "others" is a synonym for Zzzz.
2463 @ rules
2464 [import el][reorder others]
2465 * compare
2466 <1 4
2467 <1 L
2468 <1 Ω
2469
2470 ** test: regression test for CollationFastLatinBuilder, ICU ticket 11388
2471 @ rules
2472 &x<<aa<<<Aa<<<AA
2473 % strength=secondary
2474 * compare
2475 <1 AA
2476 <2 Aẩ
2477 <2 aą
2478 * compare
2479 <1 AA
2480 <2 aą
2481
2482 ** test: tailor tertiary-after a common tertiary where there is a lower one
2483 # Assume that Hiragana small A has a below-common tertiary, and Hiragana A has a common one.
2484 # See ICU ticket 11448 & CLDR ticket 7222.
2485 @ rules
2486 &あ<<<x<<<y<<<z
2487 * compare
2488 <1 ぁ
2489 <3 あ
2490 <3 x
2491 <3 y
2492 <3 z
2493 <3 ァ
2494 <1 い
2495
2496 ** test: tailor tertiary-after a below-common tertiary
2497 @ rules
2498 &ぁ<<<x<<<y<<<z
2499 * compare
2500 <1 ぁ
2501 <3 x
2502 <3 y
2503 <3 z
2504 <3 あ
2505 <3 ァ
2506 <1 い
2507
2508 ** test: tailor tertiary-before a common tertiary where there is a lower one
2509 @ rules
2510 &[before 3]あ<<<x<<<y<<<z
2511 * compare
2512 <1 ぁ
2513 <3 x
2514 <3 y
2515 <3 z
2516 <3 あ
2517 <3 ァ
2518 <1 い
2519
2520 ** test: tailor tertiary-before a below-common tertiary
2521 @ rules
2522 &[before 3]ぁ<<<x<<<y<<<z
2523 * compare
2524 <1 x
2525 <3 y
2526 <3 z
2527 <3 ぁ
2528 <3 あ
2529 <3 ァ
2530 <1 い
2531
2532 ** test: reorder single scripts not groups, ICU ticket 11449
2533 @ root
2534 % reorder Goth Latn
2535 * compare
2536 <1 4
2537 <1 𐌰  # Gothic
2538 <1 L
2539 <1 Ω
2540 # Before ICU 55, the following reordered together with Gothic.
2541 <1 𐌈  # Old Italic
2542 <1 𐑐  # Shavian