]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/testdata/collationtest.txt
ICU-62135.0.1.tar.gz
[apple/icu.git] / icuSources / test / testdata / collationtest.txt
1 # Copyright (C) 2016 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html
3 # Copyright (c) 2012-2015 International Business Machines
4 # Corporation and others. All Rights Reserved.
5 #
6 # This file should be in UTF-8 with a signature byte sequence ("BOM").
7 #
8 # collationtest.txt: Collation test data.
9 #
10 # created on: 2012apr13
11 # created by: Markus W. Scherer
12
13 # A line with "** test: description" is used for verbose and error output.
14
15 # A collator can be set with "@ root" or "@ locale language-tag",
16 # for example "@ locale de-u-co-phonebk".
17 # An old-style locale ID can also be used, for example "@ locale de@collation=phonebook".
18
19 # A collator can be built with "@ rules".
20 # An "@ rules" line is followed by one or more lines with the tailoring rules.
21
22 # A collator can be modified with "% attribute=value".
23
24 # "* compare" tests the order (= or <) of the following strings.
25 # The relation can be "=" or "<" (the level of the difference is not specified)
26 # or "<1", "<2", "<c", "<3", "<4" (indicating the level of the difference).
27
28 # Test sections ("* compare") are terminated by
29 # definitions of new collators, changing attributes, or new test sections.
30
31 ** test: simple CEs & expansions
32 # Many types of mappings are tested elsewhere, including via the UCA conformance tests.
33 # Here we mostly cover a few unusual mappings.
34 @ rules
35 &\x01 # most control codes are ignorable
36 <<<\u0300 # tertiary CE
37 &9<\x00 # NUL not ignorable
38 &\uA00A\uA00B=\uA002 # two long-primary CEs
39 &\uA00A\uA00B\u00050005=\uA003 # three CEs, require 64 bits
40
41 * compare
42 = \x01
43 = \x02
44 <3 \u0300
45 <1 9
46 <1 \x00
47 = \x01\x00\x02
48 <1 a
49 <3 a\u0300
50 <2 a\u0308
51 = ä
52 <1 b
53 <1 か # Hiragana Ka (U+304B)
54 <2 か\u3099 # plus voiced sound mark
55 = が # Hiragana Ga (U+304C)
56 <1 \uA00A\uA00B
57 = \uA002
58 <1 \uA00A\uA00B\u00050004
59 <1 \uA00A\uA00B\u00050005
60 = \uA003
61 <1 \uA00A\uA00B\u00050006
62
63 ** test: contractions
64 # Create some interesting mappings, and map some normalization-inert characters
65 # (which are not subject to canonical reordering)
66 # to some of the same CEs to check the sequence of CEs.
67 @ rules
68
69 # Contractions starting with 'a' should not continue with any character < U+0300
70 # so that we can test a shortcut for that.
71 &a=ⓐ
72 &b<bz=ⓑ
73 &d<dz\u0301=ⓓ # d+z+acute
74 &z
75 <a\u0301=Ⓐ # a+acute sorts after z
76 <a\u0301\u0301=Ⓑ # a+acute+acute
77 <a\u0301\u0301\u0358=Ⓒ # a+acute+acute+dot above right
78 <a\u030a=Ⓓ # a+ring
79 <a\u0323=Ⓔ # a+dot below
80 <a\u0323\u0358=Ⓕ # a+dot below+dot above right
81 <a\u0327\u0323\u030a=Ⓖ # a+cedilla+dot below+ring
82 <a\u0327\u0323bz=Ⓗ # a+cedilla+dot below+b+z
83
84 &\U0001D158=⁰ # musical notehead black (has a symbol primary)
85 <\U0001D158\U0001D165=¼ # musical quarter note
86
87 # deliberately missing prefix contractions:
88 # dz
89 # a\u0327
90 # a\u0327\u0323
91 # a\u0327\u0323b
92
93 &\x01
94 <<<\U0001D165=¹ # musical stem (ccc=216)
95 <<<\U0001D16D=² # musical augmentation dot (ccc=226)
96 <<<\U0001D165\U0001D16D=³ # stem+dot (ccc=216 226)
97 &\u0301=❶ # acute (ccc=230)
98 &\u030a=❷ # ring (ccc=230)
99 &\u0308=❸ # diaeresis (ccc=230)
100 <<\u0308\u0301=❹ # diaeresis+acute (=dialytika tonos) (ccc=230 230)
101 &\u0327=❺ # cedilla (ccc=202)
102 &\u0323=❻ # dot below (ccc=220)
103 &\u0331=❼ # macron below (ccc=220)
104 <<\u0331\u0358=❽ # macron below+dot above right (ccc=220 232)
105 &\u0334=❾ # tilde overlay (ccc=1)
106 &\u0358=❿ # dot above right (ccc=232)
107
108 &\u0f71=① # tibetan vowel sign aa
109 &\u0f72=② # tibetan vowel sign i
110 # \u0f71\u0f72 # tibetan vowel sign aa + i = ii = U+0F73
111 &\u0f73=③ # tibetan vowel sign ii (ccc=0 but lccc=129)
112
113 ** test: simple contractions
114
115 # Some strings are chosen to cause incremental contiguous contraction matching to
116 # go into partial matches for prefixes of contractions
117 # (where the prefixes are deliberately not also contractions).
118 # When there is no complete match, then the matching code must back out of those
119 # so that discontiguous contractions work as specified.
120
121 * compare
122 # contraction starter with no following text, or mismatch, or blocked
123 <1 a
124 = ⓐ
125 <1 aa
126 = ⓐⓐ
127 <1 ab
128 = ⓐb
129 <1 az
130 = ⓐz
131
132 * compare
133 <1 a
134 <2 a\u0308\u030a # ring blocked by diaeresis
135 = ⓐ❸❷
136 <2 a\u0327
137 = ⓐ❺
138
139 * compare
140 <2 \u0308
141 = ❸
142 <2 \u0308\u030a\u0301 # acute blocked by ring
143 = ❸❷❶
144
145 * compare
146 <1 \U0001D158
147 = ⁰
148 <1 \U0001D158\U0001D165
149 = ¼
150
151 # no discontiguous contraction because of missing prefix contraction d+z,
152 # and a starter ('z') after the 'd'
153 * compare
154 <1 dz\u0323\u0301
155 = dz❻❶
156
157 # contiguous contractions
158 * compare
159 <1 abz
160 = ⓐⓑ
161 <1 abzz
162 = ⓐⓑz
163
164 * compare
165 <1 a
166 <1 z
167 <1 a\u0301
168 = Ⓐ
169 <1 a\u0301\u0301
170 = Ⓑ
171 <1 a\u0301\u0301\u0358
172 = Ⓒ
173 <1 a\u030a
174 = Ⓓ
175 <1 a\u0323\u0358
176 = Ⓕ
177 <1 a\u0327\u0323\u030a # match despite missing prefix
178 = Ⓖ
179 <1 a\u0327\u0323bz
180 = Ⓗ
181
182 * compare
183 <2 \u0308\u0308\u0301 # acute blocked from first diaeresis, contracts with second
184 = ❸❹
185
186 * compare
187 <1 \U0001D158\U0001D165
188 = ¼
189
190 * compare
191 <3 \U0001D165\U0001D16D
192 = ³
193
194 ** test: discontiguous contractions
195 * compare
196 <1 a\u0327\u030a # a+ring skips cedilla
197 = Ⓓ❺
198 <2 a\u0327\u0327\u030a # a+ring skips 2 cedillas
199 = Ⓓ❺❺
200 <2 a\u0327\u0327\u0327\u030a # a+ring skips 3 cedillas
201 = Ⓓ❺❺❺
202 <2 a\u0334\u0327\u0327\u030a # a+ring skips tilde overlay & 2 cedillas
203 = Ⓓ❾❺❺
204 <1 a\u0327\u0323 # a+dot below skips cedilla
205 = Ⓔ❺
206 <1 a\u0323\u0301\u0358 # a+dot below+dot ab.r.: 2-char match, then skips acute
207 = Ⓕ❶
208 <2 a\u0334\u0323\u0358 # a+dot below skips tilde overlay
209 = Ⓕ❾
210
211 * compare
212 <2 \u0331\u0331\u0358 # macron below+dot ab.r. skips the second macron below
213 = ❽❼
214
215 * compare
216 <1 a\u0327\u0331\u0323\u030a # a+ring skips cedilla, macron below, dot below (dot blocked by macron)
217 = Ⓓ❺❼❻
218 <1 a\u0327\u0323\U0001D16D\u030a # a+dot below skips cedilla
219 = Ⓔ❺²❷
220 <2 a\u0327\u0327\u0323\u030a # a+dot below skips 2 cedillas
221 = Ⓔ❺❺❷
222 <2 a\u0327\u0323\u0323\u030a # a+dot below skips cedilla
223 = Ⓔ❺❻❷
224 <2 a\u0334\u0327\u0323\u030a # a+dot below skips tilde overlay & cedilla
225 = Ⓔ❾❺❷
226
227 * compare
228 <1 \U0001D158\u0327\U0001D165 # quarter note skips cedilla
229 = ¼❺
230 <1 a\U0001D165\u0323 # a+dot below skips stem
231 = Ⓔ¹
232
233 # partial contiguous match, backs up, matches discontiguous contraction
234 <1 a\u0327\u0323b
235 = Ⓔ❺b
236 <1 a\u0327\u0323ba
237 = Ⓔ❺bⓐ
238
239 # a+acute+acute+dot above right skips cedilla, continues matching 2 same-ccc combining marks
240 * compare
241 <1 a\u0327\u0301\u0301\u0358
242 = Ⓒ❺
243
244 # FCD but not NFD
245 * compare
246 <1 a\u0f73\u0301 # a+acute skips tibetan ii
247 = Ⓐ③
248
249 # FCD but the 0f71 inside the 0f73 must be skipped
250 # to match the discontiguous contraction of the first 0f71 with the trailing 0f72 inside the 0f73
251 * compare
252 <1 \u0f71\u0f73 # == \u0f73\u0f71 == \u0f71\u0f71\u0f72
253 = ③①
254
255 ** test: discontiguous contractions with nested contractions
256 * compare
257 <1 a\u0323\u0308\u0301\u0358
258 = Ⓕ❹
259 <2 a\u0323\u0308\u0301\u0308\u0301\u0358
260 = Ⓕ❹❹
261
262 ** test: discontiguous contractions with interleaved contractions
263 * compare
264 # a+ring & cedilla & macron below+dot above right
265 <1 a\u0327\u0331\u030a\u0358
266 = Ⓓ❺❽
267
268 # a+ring & 1x..3x macron below+dot above right
269 <2 a\u0331\u030a\u0358
270 = Ⓓ❽
271 <2 a\u0331\u0331\u030a\u0358\u0358
272 = Ⓓ❽❽
273 # also skips acute
274 <2 a\u0331\u0331\u0331\u030a\u0301\u0358\u0358\u0358
275 = Ⓓ❽❽❽❶
276
277 # a+dot below & stem+augmentation dot, followed by contiguous d+z+acute
278 <1 a\U0001D165\u0323\U0001D16Ddz\u0301
279 = Ⓔ³ⓓ
280
281 ** test: some simple string comparisons
282 @ root
283 * compare
284 # first string compares against ""
285 = \u0000
286 < a
287 <1 b
288 <3 B
289 = \u0000B\u0000
290
291 ** test: compare with strength=primary
292 % strength=primary
293 * compare
294 <1 a
295 <1 b
296 = B
297
298 ** test: compare with strength=secondary
299 % strength=secondary
300 * compare
301 <1 a
302 <1 b
303 = B
304
305 ** test: compare with strength=tertiary
306 % strength=tertiary
307 * compare
308 <1 a
309 <1 b
310 <3 B
311
312 ** test: compare with strength=quaternary
313 % strength=quaternary
314 * compare
315 <1 a
316 <1 b
317 <3 B
318
319 ** test: compare with strength=identical
320 % strength=identical
321 * compare
322 <1 a
323 <1 b
324 <3 B
325
326 ** test: côté with forwards secondary
327 @ root
328 * compare
329 <1 cote
330 <2 coté
331 <2 côte
332 <2 côté
333
334 ** test: côté with forwards secondary vs. U+FFFE merge separator
335 # Merged sort keys: On each level, any difference in the first segment
336 # must trump any further difference.
337 * compare
338 <1 cote\uFFFEcôté
339 <2 coté\uFFFEcôte
340 <2 côte\uFFFEcoté
341 <2 côté\uFFFEcote
342
343 ** test: côté with backwards secondary
344 % backwards=on
345 * compare
346 <1 cote
347 <2 côte
348 <2 coté
349 <2 côté
350
351 ** test: côté with backwards secondary vs. U+FFFE merge separator
352 # Merged sort keys: On each level, any difference in the first segment
353 # must trump any further difference.
354 * compare
355 <1 cote\uFFFEcôté
356 <2 côte\uFFFEcoté
357 <2 coté\uFFFEcôte
358 <2 côté\uFFFEcote
359
360 ** test: U+FFFE on identical level
361 @ root
362 % strength=identical
363 * compare
364 # All of these control codes are completely-ignorable, so that
365 # their low code points are compared with the merge separator.
366 # The merge separator must compare less than any other character.
367 <1 \uFFFE\u0001\u0002\u0003
368 <i \u0001\uFFFE\u0002\u0003
369 <i \u0001\u0002\uFFFE\u0003
370 <i \u0001\u0002\u0003\uFFFE
371
372 * compare
373 # The merge separator must even compare less than U+0000.
374 <1 \uFFFE\u0000\u0000
375 <i \u0000\uFFFE\u0000
376 <i \u0000\u0000\uFFFE
377
378 ** test: Hani < surrogates < U+FFFD
379 # Note: compareUTF8() treats unpaired surrogates like U+FFFD,
380 # so with that the strings with surrogates will compare equal to each other
381 # and equal to the string with U+FFFD.
382 @ root
383 % strength=identical
384 * compare
385 <1 abz
386 <1 a\u4e00z
387 <1 a\U00020000z
388 <1 a\ud800z
389 <1 a\udbffz
390 <1 a\udc00z
391 <1 a\udfffz
392 <1 a\ufffdz
393
394 ** test: script reordering
395 @ root
396 % reorder Hani Zzzz digit
397 * compare
398 <1 ?
399 <1 +
400 <1 丂
401 <1 a
402 <1 α
403 <1 5
404
405 % reorder default
406 * compare
407 <1 ?
408 <1 +
409 <1 5
410 <1 a
411 <1 α
412 <1 丂
413
414 ** test: empty rules
415 @ rules
416 * compare
417 <1 a
418 <2 ä
419 <3 Ä
420 <1 b
421
422 ** test: very simple rules
423 @ rules
424 &a=e<<<<q<<<<r<x<<<X<<y<<<Y;z,Z
425 % strength=quaternary
426 * compare
427 <1 a
428 = e
429 <4 q
430 <4 r
431 <1 x
432 <3 X
433 <2 y
434 <3 Y
435 <2 z
436 <3 Z
437
438 ** test: tailoring twice before a root position: primary
439 @ rules
440 &[before 1]b<p
441 &[before 1]b<q
442 * compare
443 <1 a
444 <1 p
445 <1 q
446 <1 b
447
448 ** test: tailoring twice before a root position: secondary
449 @ rules
450 &[before 2]ſ<<p
451 &[before 2]ſ<<q
452 * compare
453 <1 s
454 <2 p
455 <2 q
456 <2 ſ
457
458 # secondary-before common weight
459 @ rules
460 &[before 2]b<<p
461 &[before 2]b<<q
462 * compare
463 <1 a
464 <1 p
465 <2 q
466 <2 b
467
468 ** test: tailoring twice before a root position: tertiary
469 @ rules
470 &[before 3]B<<<p
471 &[before 3]B<<<q
472 * compare
473 <1 b
474 <3 p
475 <3 q
476 <3 B
477
478 # tertiary-before common weight
479 @ rules
480 &[before 3]b<<<p
481 &[before 3]b<<<q
482 * compare
483 <1 a
484 <1 p
485 <3 q
486 <3 b
487
488 @ rules
489 &[before 2]b<<s
490 &[before 3]s<<<p
491 &[before 3]s<<<q
492 * compare
493 <1 a
494 <1 p
495 <3 q
496 <3 s
497 <2 b
498
499 ** test: tailor after completely ignorable
500 @ rules
501 &\x00<<<x<<y
502 * compare
503 = \x00
504 = \x1F
505 <3 x
506 <2 y
507
508 ** test: secondary tailoring gaps, ICU ticket 9362
509 @ rules
510 &[before 2]s<<'_'
511 &s<<r # secondary between s and ſ (long s)
512 &ſ<<*a-q # more than 15 between ſ and secondary CE boundary
513 &[before 2][first primary ignorable]<<u<<v # between secondary CE boundary & lowest secondary CE
514 &[last primary ignorable]<<y<<z
515
516 * compare
517 <2 u
518 <2 v
519 <2 \u0332 # lowest secondary CE
520 <2 \u0308
521 <2 y
522 <2 z
523 <1 s_
524 <2 ss
525 <2 sr
526 <2 sſ
527 <2 sa
528 <2 sb
529 <2 sp
530 <2 sq
531 <2 sus
532 <2 svs
533 <2 rs
534
535 ** test: tertiary tailoring gaps, ICU ticket 9362
536 @ rules
537 &[before 3]t<<<'_'
538 &t<<<r # tertiary between t and fullwidth t
539 &ᵀ<<<*a-q # more than 15 between ᵀ (modifier letter T) and tertiary CE boundary
540 &[before 3][first secondary ignorable]<<<u<<<v # between tertiary CE boundary & lowest tertiary CE
541 &[last secondary ignorable]<<<y<<<z
542
543 * compare
544 <3 u
545 <3 v
546 # Note: The root collator currently does not map any characters to tertiary CEs.
547 <3 y
548 <3 z
549 <1 t_
550 <3 tt
551 <3 tr
552 <3 tt
553 <3 tᵀ
554 <3 ta
555 <3 tb
556 <3 tp
557 <3 tq
558 <3 tut
559 <3 tvt
560 <3 rt
561
562 ** test: secondary & tertiary around root character
563 @ rules
564 &[before 2]m<<r
565 &m<<s
566 &[before 3]m<<<u
567 &m<<<v
568 * compare
569 <1 l
570 <1 r
571 <2 u
572 <3 m
573 <3 v
574 <2 s
575 <1 n
576
577 ** test: secondary & tertiary around tailored item
578 @ rules
579 &m<x
580 &[before 2]x<<r
581 &x<<s
582 &[before 3]x<<<u
583 &x<<<v
584 * compare
585 <1 m
586 <1 r
587 <2 u
588 <3 x
589 <3 v
590 <2 s
591 <1 n
592
593 ** test: more nesting of secondary & tertiary before
594 @ rules
595 &[before 3]m<<<u
596 &[before 2]m<<r
597 &[before 3]r<<<q
598 &m<<<w
599 &m<<t
600 &[before 3]w<<<v
601 &w<<<x
602 &w<<s
603 * compare
604 <1 l
605 <1 q
606 <3 r
607 <2 u
608 <3 m
609 <3 v
610 <3 w
611 <3 x
612 <2 s
613 <2 t
614 <1 n
615
616 ** test: case bits
617 @ rules
618 &w<x # tailored CE getting case bits
619 =uv=uV=Uv=UV # 2 chars -> 1 CE
620 &ae=ch=cH=Ch=CH # 2 chars -> 2 CEs
621 &rst=yz=yZ=Yz=YZ # 2 chars -> 3 CEs
622 % caseFirst=lower
623 * compare
624 <1 ae
625 = ch
626 <3 cH
627 <3 Ch
628 <3 CH
629 <1 rst
630 = yz
631 <3 yZ
632 <3 Yz
633 <3 YZ
634 <1 w
635 <1 x
636 = uv
637 <3 uV
638 = Uv # mixed case on single CE cannot distinguish variations
639 <3 UV
640
641 ** test: tertiary CEs, tertiary, caseLevel=off, caseFirst=lower
642 @ rules
643 &\u0001<<<t<<<T # tertiary CEs
644 % caseFirst=lower
645 * compare
646 <1 aa
647 <3 aat
648 <3 aaT
649 <3 aA
650 <3 aAt
651 <3 ata
652 <3 aTa
653
654 ** test: tertiary CEs, tertiary, caseLevel=off, caseFirst=upper
655 % caseFirst=upper
656 * compare
657 <1 aA
658 <3 aAt
659 <3 aa
660 <3 aat
661 <3 aaT
662 <3 ata
663 <3 aTa
664
665 ** test: reset on expansion, ICU tickets 9415 & 9593
666 @ rules
667 &æ<x # tailor the last primary CE so that x sorts between ae and af
668 &æb=bæ # copy all reset CEs to make bæ sort the same
669 &각<h # copy/tailor 3 CEs to make h sort before the next Hangul syllable 갂
670 &⒀<<y # copy/tailor 4 CEs to make y sort with only a secondary difference
671 &l·=z # handle the pre-context for · when fetching reset CEs
672 <<u # copy/tailor 2 CEs
673
674 * compare
675 <1 ae
676 <2 æ
677 <1 x
678 <1 af
679
680 * compare
681 <1 aeb
682 <2 æb
683 = bæ
684
685 * compare
686 <1 각
687 <1 h
688 <1 갂
689 <1 갃
690
691 * compare
692 <1 · # by itself: primary CE
693 <1 l
694 <2 l· # l+middle dot has only a secondary difference from l
695 = z
696 <2 u
697
698 * compare
699 <1 (13)
700 <3 ⒀ # DUCET sets special tertiary weights in all CEs
701 <2 y
702 <1 (13[
703
704 % alternate=shifted
705 * compare
706 <1 (13)
707 = 13
708 <3 ⒀
709 = y # alternate=shifted removes the tailoring difference on the last CE
710 <1 14
711
712 ** test: contraction inside extension, ICU ticket 9378
713 @ rules
714 &а<<х/й # all letters are Cyrillic
715 * compare
716 <1 ай
717 <2 х
718
719 ** test: no duplicate tailored CEs for different reset positions with same CEs, ICU ticket 10104
720 @ rules
721 &t<x &ᵀ<y # same primary weights
722 &q<u &[before 1]ꝗ<v # q and ꝗ are primary adjacent
723 * compare
724 <1 q
725 <1 u
726 <1 v
727 <1 ꝗ
728 <1 t
729 <3 ᵀ
730 <1 y
731 <1 x
732
733 # Principle: Each rule builds on the state of preceding rules and ignores following rules.
734
735 ** test: later rule does not affect earlier reset position, ICU ticket 10105
736 @ rules
737 &a < u < v < w &ov < x &b < v
738 * compare
739 <1 oa
740 <1 ou
741 <1 x # CE(o) followed by CE between u and w
742 <1 ow
743 <1 ob
744 <1 ov
745
746 ** test: later rule does not affect earlier extension (1), ICU ticket 10105
747 @ rules
748 &a=x/b &v=b
749 % strength=secondary
750 * compare
751 <1 B
752 <1 c
753 <1 v
754 = b
755 * compare
756 <1 AB
757 = x
758 <1 ac
759 <1 av
760 = ab
761
762 ** test: later rule does not affect earlier extension (2), ICU ticket 10105
763 @ rules
764 &a <<< c / e &g <<< e / l
765 % strength=secondary
766 * compare
767 <1 AE
768 = c
769 <2 æ
770 <1 agl
771 = ae
772
773 ** test: later rule does not affect earlier extension (3), ICU ticket 10105
774 @ rules
775 &a = b / c &d = c / e
776 % strength=secondary
777 * compare
778 <1 AC # C is still only tertiary different from the original c
779 = b
780 <1 ade
781 = ac
782
783 ** test: extension contains tailored character, ICU ticket 10105
784 @ rules
785 &a=e &b=u/e
786 * compare
787 <1 a
788 = e
789 <1 ba
790 = be
791 = u
792
793 ** test: add simple mappings for characters with root context
794 @ rules
795 &z=· # middle dot has a prefix mapping in the CLDR root
796 &n=и # и (U+0438) has contractions in the root
797 * compare
798 <1 l
799 <2 l· # root mapping for l|· still works
800 <1 z
801 = ·
802 * compare
803 <1 n
804 = и
805 <1 И
806 <1 и\u0306 # root mapping for й=и\u0306 still works
807 = й
808 <3 Й
809
810 ** test: add context mappings around characters with root context
811 @ rules
812 &z=·h # middle dot has a prefix mapping in the CLDR root
813 &n=ә|и # и (U+0438) has contractions in the root
814 * compare
815 <1 l
816 <2 l· # root mapping for l|· still works
817 <1 z
818 = ·h
819 * compare
820 <1 и
821 <3 И
822 <1 и\u0306 # root mapping for й=и\u0306 still works
823 = й
824 * compare
825 <1 әn
826 = әи
827 <1 әo
828
829 ** test: many secondary CEs at the top of their range
830 @ rules
831 &[last primary ignorable]<<*\u2801-\u28ff
832 * compare
833 <2 \u0308
834 <2 \u2801
835 <2 \u2802
836 <2 \u2803
837 <2 \u2804
838 <2 \u28fd
839 <2 \u28fe
840 <2 \u28ff
841 <1 \x20
842
843 ** test: many tertiary CEs at the top of their range
844 @ rules
845 &[last secondary ignorable]<<<*a-z
846 * compare
847 <3 a
848 <3 b
849 <3 c
850 <3 d
851 # e..w
852 <3 x
853 <3 y
854 <3 z
855 <2 \u0308
856
857 ** test: tailor contraction together with nearly equivalent prefix, ICU ticket 10101
858 @ rules
859 &a=p|x &b=px &c=op
860 * compare
861 <1 b
862 = px
863 <3 B
864 <1 c
865 = op
866 <3 C
867 * compare
868 <1 ca
869 = opx # first contraction op, then prefix p|x
870 <3 cA
871 <3 Ca
872
873 ** test: reset position with prefix (pre-context), ICU ticket 10102
874 @ rules
875 &a=p|x &px=y
876 * compare
877 <1 pa
878 = px
879 = y
880 <3 pA
881 <1 q
882 <1 x
883
884 ** test: prefix+contraction together (1), ICU ticket 10071
885 @ rules
886 &x=a|bc
887 * compare
888 <1 ab
889 <1 Abc
890 <1 abd
891 <1 ac
892 <1 aw
893 <1 ax
894 = abc
895 <3 aX
896 <3 Ax
897 <1 b
898 <1 bb
899 <1 bc
900 <3 bC
901 <3 Bc
902 <1 bd
903
904 ** test: prefix+contraction together (2), ICU ticket 10071
905 @ rules
906 &w=bc &x=a|b
907 * compare
908 <1 w
909 = bc
910 <3 W
911 * compare
912 <1 aw
913 <1 ax
914 = ab
915 <3 aX
916 <1 axb
917 <1 axc
918 = abc # prefix match a|b takes precedence over contraction match bc
919 <3 abC
920 <1 abd
921 <1 ay
922
923 ** test: prefix+contraction together (3), ICU ticket 10071
924 @ rules
925 &x=a|b &w=bc # reverse order of rules as previous test, order should not matter here
926 * compare # same "compare" sequences as previous test
927 <1 w
928 = bc
929 <3 W
930 * compare
931 <1 aw
932 <1 ax
933 = ab
934 <3 aX
935 <1 axb
936 <1 axc
937 = abc # prefix match a|b takes precedence over contraction match bc
938 <3 abC
939 <1 abd
940 <1 ay
941
942 ** test: no mapping p|c, falls back to contraction ch, CLDR ticket 5962
943 @ rules
944 &d=ch &v=p|ci
945 * compare
946 <1 pc
947 <3 pC
948 <1 pcH
949 <1 pcI
950 <1 pd
951 = pch # no-prefix contraction ch matches
952 <3 pD
953 <1 pv
954 = pci # prefix+contraction p|ci matches
955 <3 pV
956
957 ** test: tailor in & around compact ranges of root primaries
958 # The Ogham characters U+1681..U+169A are in simple ascending order of primary CEs
959 # which should be reliably encoded as one range in the root elements data.
960 @ rules
961 &[before 1]ᚁ<a
962 &ᚁ<b
963 &[before 1]ᚂ<c
964 &ᚂ<d
965 &[before 1]ᚚ<y
966 &ᚚ<z
967 &[before 2]ᚁ<<r
968 &ᚁ<<s
969 &[before 3]ᚚ<<<t
970 &ᚚ<<<u
971 * compare
972 <1 ᣵ # U+18F5 last Canadian Aboriginal
973 <1 a
974 <1 r
975 <2 ᚁ
976 <2 s
977 <1 b
978 <1 c
979 <1 ᚂ
980 <1 d
981 <1 ᚃ
982 <1 ᚙ
983 <1 y
984 <1 t
985 <3 ᚚ
986 <3 u
987 <1 z
988 <1 ᚠ # U+16A0 first Runic
989
990 ** test: suppressContractions
991 @ rules
992 &z<ch<әж [suppressContractions [·cә]]
993 * compare
994 <1 ch
995 <3 cH # ch was suppressed
996 <1 l
997 <1 l· # primary difference, not secondary, because l|· was suppressed
998 <1 ә
999 <2 ә\u0308 # secondary difference, not primary, because contractions for ә were suppressed
1000 <1 әж
1001 <3 әЖ
1002
1003 ** test: Hangul & Jamo
1004 @ rules
1005 &L=\u1100 # first Jamo L
1006 &V=\u1161 # first Jamo V
1007 &T=\u11A8 # first Jamo T
1008 &\uAC01<<*\u4E00-\u4EFF # first Hangul LVT syllable & lots of secondary diffs
1009 * compare
1010 <1 Lv
1011 <3 LV
1012 = \u1100\u1161
1013 = \uAC00
1014 <1 LVt
1015 <3 LVT
1016 = \u1100\u1161\u11A8
1017 = \uAC00\u11A8
1018 = \uAC01
1019 <2 LVT\u0308
1020 <2 \u4E00
1021 <2 \u4E01
1022 <2 \u4E80
1023 <2 \u4EFF
1024 <2 LV\u0308T
1025 <1 \uAC02
1026
1027 ** test: adjust special reset positions according to previous rules, CLDR ticket 6070
1028 @ rules
1029 &[last variable]<x
1030 [maxVariable space] # has effect only after building, no effect on following rules
1031 &[last variable]<y
1032 &[before 1][first regular]<z
1033 * compare
1034 <1 ? # some punctuation
1035 <1 x
1036 <1 y
1037 <1 z
1038 <1 $ # some symbol
1039
1040 @ rules
1041 &[last primary ignorable]<<x<<<y
1042 &[last primary ignorable]<<z
1043 * compare
1044 <2 \u0358
1045 <2 x
1046 <3 y
1047 <2 z
1048 <1 \x20
1049
1050 @ rules
1051 &[last secondary ignorable]<<<x
1052 &[last secondary ignorable]<<<y
1053 * compare
1054 <3 x
1055 <3 y
1056 <2 \u0358
1057
1058 @ rules
1059 &[before 2][first variable]<<z
1060 &[before 2][first variable]<<y
1061 &[before 3][first variable]<<<x
1062 &[before 3][first variable]<<<w
1063 &[before 1][first variable]<v
1064 &[before 2][first variable]<<u
1065 &[before 3][first variable]<<<t
1066 &[before 2]\uFDD1\xA0<<s # FractionalUCA.txt: FDD1 00A0, SPACE first primary
1067 * compare
1068 <2 \u0358
1069 <1 s
1070 <2 \uFDD1\xA0
1071 <1 t
1072 <3 u
1073 <2 v
1074 <1 w
1075 <3 x
1076 <3 y
1077 <2 z
1078 <2 \t
1079
1080 @ rules
1081 &[before 2][first regular]<<z
1082 &[before 3][first regular]<<<y
1083 &[before 1][first regular]<x
1084 &[before 3][first regular]<<<w
1085 &[before 2]\uFDD1\u263A<<v # FractionalUCA.txt: FDD1 263A, SYMBOL first primary
1086 &[before 3][first regular]<<<u
1087 &[before 1][first regular]<p # primary before the boundary: becomes variable
1088 &[before 3][first regular]<<<t # not affected by p
1089 &[last variable]<q # after p!
1090 * compare
1091 <1 ?
1092 <1 p
1093 <1 q
1094 <1 t
1095 <3 u
1096 <3 v
1097 <1 w
1098 <3 x
1099 <1 y
1100 <3 z
1101 <1 $
1102
1103 # check that p & q are indeed variable
1104 % alternate=shifted
1105 * compare
1106 = ?
1107 = p
1108 = q
1109 <1 t
1110 <3 u
1111 <3 v
1112 <1 w
1113 <3 x
1114 <1 y
1115 <3 z
1116 <1 $
1117
1118 @ rules
1119 &[before 2][first trailing]<<z
1120 &[before 1][first trailing]<y
1121 &[before 3][first trailing]<<<x
1122 * compare
1123 <1 \u4E00 # first Han, first implicit
1124 <1 \uFDD1\uFDD0 # FractionalUCA.txt: unassigned first primary
1125 # Note: The root collator currently does not map any characters to the trailing first boundary primary.
1126 <1 x
1127 <3 y
1128 <1 z
1129 <2 \uFFFD # The root collator currently maps U+FFFD to the first real trailing primary.
1130
1131 @ rules
1132 &[before 2][first primary ignorable]<<z
1133 &[before 2][first primary ignorable]<<y
1134 &[before 3][first primary ignorable]<<<x
1135 &[before 3][first primary ignorable]<<<w
1136 * compare
1137 = \x01
1138 <2 w
1139 <3 x
1140 <3 y
1141 <2 z
1142 <2 \u0301
1143
1144 @ rules
1145 &[before 3][first secondary ignorable]<<<y
1146 &[before 3][first secondary ignorable]<<<x
1147 * compare
1148 = \x01
1149 <3 x
1150 <3 y
1151 <2 \u0301
1152
1153 ** test: canonical closure
1154 @ rules
1155 &X=A &U=Â
1156 * compare
1157 <1 U
1158 = Â
1159 = A\u0302
1160 <2 Ú # U with acute
1161 = U\u0301
1162 = Ấ # A with circumflex & acute
1163 = Â\u0301
1164 = A\u0302\u0301
1165 <1 X
1166 = A
1167 <2 X\u030A # with ring above
1168 = Å
1169 = A\u030A
1170 = \u212B # Angstrom sign
1171
1172 @ rules
1173 &x=\u5140\u55C0
1174 * compare
1175 <1 x
1176 = \u5140\u55C0
1177 = \u5140\uFA0D
1178 = \uFA0C\u55C0
1179 = \uFA0C\uFA0D # CJK compatibility characters
1180 <3 X
1181
1182 # canonical closure on prefix rules, ICU ticket 9444
1183 @ rules
1184 &x=ä|ŝ
1185 * compare
1186 <1 äs # not tailored
1187 <1 äx
1188 = äŝ
1189 = a\u0308s\u0302
1190 = a\u0308ŝ
1191 = äs\u0302
1192 <3 äX
1193
1194 ** test: conjoining Jamo map to expansions
1195 @ rules
1196 &gg=\u1101 # Jamo Lead consonant GG
1197 &nj=\u11AC # Jamo Trail consonant NJ
1198 * compare
1199 <1 gg\u1161nj
1200 = \u1101\u1161\u11AC
1201 = \uAE4C\u11AC
1202 = \uAE51
1203 <3 gg\u1161nJ
1204 <1 \u1100\u1100
1205
1206 ** test: canonical tail closure, ICU ticket 5913
1207 @ rules
1208 &a<â
1209 * compare
1210 <1 a
1211 <1 â # tailored
1212 = a\u0302
1213 <2 a\u0323\u0302 # discontiguous contraction
1214 = ạ\u0302 # equivalent
1215 = ậ # equivalent
1216 <1 b
1217
1218 @ rules
1219 &a<ạ
1220 * compare
1221 <1 a
1222 <1 ạ # tailored
1223 = a\u0323
1224 <2 a\u0323\u0302 # contiguous contraction plus extra diacritic
1225 = ạ\u0302 # equivalent
1226 = ậ # equivalent
1227 <1 b
1228
1229 # Tail closure should work even if there is a prefix and/or contraction.
1230 @ rules
1231 &a<\u5140|câ
1232 # In order to find discontiguous contractions for \u5140|câ
1233 # there must exist a mapping for \u5140|ca, regardless of what it maps to.
1234 # (This follows from the UCA spec.)
1235 &x=\u5140|ca
1236 * compare
1237 <1 \u5140a
1238 = \uFA0Ca
1239 <1 \u5140câ # tailored
1240 = \uFA0Ccâ
1241 = \u5140ca\u0302
1242 = \uFA0Cca\u0302
1243 <2 \u5140ca\u0323\u0302 # discontiguous contraction
1244 = \uFA0Cca\u0323\u0302
1245 = \u5140cạ\u0302
1246 = \uFA0Ccạ\u0302
1247 = \u5140cậ
1248 = \uFA0Ccậ
1249 <1 \u5140b
1250 = \uFA0Cb
1251 <1 \u5140x
1252 = \u5140ca
1253
1254 # Double-check that without the extra mapping there will be no discontiguous match.
1255 @ rules
1256 &a<\u5140|câ
1257 * compare
1258 <1 \u5140a
1259 = \uFA0Ca
1260 <1 \u5140câ # tailored
1261 = \uFA0Ccâ
1262 = \u5140ca\u0302
1263 = \uFA0Cca\u0302
1264 <1 \u5140b
1265 = \uFA0Cb
1266 <1 \u5140ca\u0323\u0302 # no discontiguous contraction
1267 = \uFA0Cca\u0323\u0302
1268 = \u5140cạ\u0302
1269 = \uFA0Ccạ\u0302
1270 = \u5140cậ
1271 = \uFA0Ccậ
1272
1273 @ rules
1274 &a<cạ
1275 * compare
1276 <1 a
1277 <1 cạ # tailored
1278 = ca\u0323
1279 <2 ca\u0323\u0302 # contiguous contraction plus extra diacritic
1280 = cạ\u0302 # equivalent
1281 = cậ # equivalent
1282 <1 b
1283
1284 # ᾢ = U+1FA2 GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1285 # = 03C9 0313 0300 0345
1286 # ccc = 0, 230, 230, 240
1287 @ rules
1288 &δ=αῳ
1289 # In order to find discontiguous contractions for αῳ
1290 # there must exist a mapping for αω, regardless of what it maps to.
1291 # (This follows from the UCA spec.)
1292 &ε=αω
1293 * compare
1294 <1 δ
1295 = αῳ
1296 = αω\u0345
1297 <2 αω\u0313\u0300\u0345 # discontiguous contraction
1298 = αὠ\u0300\u0345
1299 = αὢ\u0345
1300 = αᾢ
1301 <2 αω\u0300\u0313\u0345
1302 = αὼ\u0313\u0345
1303 = αῲ\u0313 # not FCD
1304 <1 ε
1305 = αω
1306
1307 # Double-check that without the extra mapping there will be no discontiguous match.
1308 @ rules
1309 &δ=αῳ
1310 * compare
1311 <1 αω\u0313\u0300\u0345 # no discontiguous contraction
1312 = αὠ\u0300\u0345
1313 = αὢ\u0345
1314 = αᾢ
1315 <2 αω\u0300\u0313\u0345
1316 = αὼ\u0313\u0345
1317 = αῲ\u0313 # not FCD
1318 <1 δ
1319 = αῳ
1320 = αω\u0345
1321
1322 # Add U+0315 COMBINING COMMA ABOVE RIGHT which has ccc=232.
1323 # Tests code paths where the tailored string has a combining mark
1324 # that does not occur in any composite's decomposition.
1325 @ rules
1326 &δ=αὼ\u0315
1327 * compare
1328 <1 αω\u0313\u0300\u0315 # Not tailored: The grave accent blocks the comma above.
1329 = αὠ\u0300\u0315
1330 = αὢ\u0315
1331 <1 δ
1332 = αὼ\u0315
1333 = αω\u0300\u0315
1334 <2 αω\u0300\u0315\u0345
1335 = αὼ\u0315\u0345
1336 = αῲ\u0315 # not FCD
1337
1338 ** test: danish a+a vs. a-umlaut, ICU ticket 9319
1339 @ rules
1340 &z<aa
1341 * compare
1342 <1 z
1343 <1 aa
1344 <2 aa\u0308
1345 = aä
1346
1347 ** test: Jamo L with and in prefix
1348 # Useful for the Korean "searchjl" tailoring (instead of contractions of pairs of Jamo L).
1349 @ rules
1350 # Jamo Lead consonant G after G or GG
1351 &[last primary ignorable]<<\u1100|\u1100=\u1101|\u1100
1352 # Jamo Lead consonant GG sorts like G+G
1353 &\u1100\u1100=\u1101
1354 # Note: Making G|GG and GG|GG sort the same as G|G+G
1355 # would require the ability to reset on G|G+G,
1356 # or we could make G-after-G equal to some secondary-CE character,
1357 # and reset on a pair of those.
1358 # (It does not matter much if there are at most two G in a row in real text.)
1359 * compare
1360 <1 \u1100
1361 <2 \u1100\u1100 # only one primary from a sequence of G lead consonants
1362 = \u1101
1363 <2 \u1100\u1100\u1100
1364 = \u1101\u1100
1365 # but not = \u1100\u1101, see above
1366 <1 \u1100\u1161
1367 = \uAC00
1368 <2 \u1100\u1100\u1161
1369 = \u1100\uAC00 # prefix match from the L of the LV syllable
1370 = \u1101\u1161
1371 = \uAE4C
1372
1373 ** test: proposed Korean "searchjl" tailoring with prefixes, CLDR ticket 6546
1374 @ rules
1375 # Low secondary CEs for Jamo V & T.
1376 # Note: T should sort before V for proper syllable order.
1377 &\u0332 # COMBINING LOW LINE (first primary ignorable)
1378 <<\u1161<<\u1162
1379
1380 # Korean Jamo lead consonant search rules, part 2:
1381 # Make modern compound L jamo primary equivalent to non-compound forms.
1382
1383 # Secondary CEs for Jamo L-after-L, greater than Jamo V & T.
1384 &\u0313 # COMBINING COMMA ABOVE (second primary ignorable)
1385 =\u1100|\u1100
1386 =\u1103|\u1103
1387 =\u1107|\u1107
1388 =\u1109|\u1109
1389 =\u110C|\u110C
1390
1391 # Compound L Jamo map to equivalent expansions of primary+secondary CE.
1392 &\u1100\u0313=\u1101<<<\u3132 # HANGUL CHOSEONG SSANGKIYEOK, HANGUL LETTER SSANGKIYEOK
1393 &\u1103\u0313=\u1104<<<\u3138 # HANGUL CHOSEONG SSANGTIKEUT, HANGUL LETTER SSANGTIKEUT
1394 &\u1107\u0313=\u1108<<<\u3143 # HANGUL CHOSEONG SSANGPIEUP, HANGUL LETTER SSANGPIEUP
1395 &\u1109\u0313=\u110A<<<\u3146 # HANGUL CHOSEONG SSANGSIOS, HANGUL LETTER SSANGSIOS
1396 &\u110C\u0313=\u110D<<<\u3149 # HANGUL CHOSEONG SSANGCIEUC, HANGUL LETTER SSANGCIEUC
1397
1398 * compare
1399 <1 \u1100\u1161
1400 = \uAC00
1401 <2 \u1100\u1162
1402 = \uAC1C
1403 <2 \u1100\u1100\u1161
1404 = \u1100\uAC00
1405 = \u1101\u1161
1406 = \uAE4C
1407 <3 \u3132\u1161
1408
1409 ** test: Hangul syllables in prefix & in the interior of a contraction
1410 @ rules
1411 &x=\u1100\u1161|a\u1102\u1162z
1412 * compare
1413 <1 \u1100\u1161x
1414 = \u1100\u1161a\u1102\u1162z
1415 = \u1100\u1161a\uB0B4z
1416 = \uAC00a\u1102\u1162z
1417 = \uAC00a\uB0B4z
1418
1419 ** test: digits are unsafe-backwards when numeric=on
1420 @ root
1421 % numeric=on
1422 * compare
1423 # If digits are not unsafe, then numeric collation sees "1"=="01" and "b">"a".
1424 # We need to back up before the identical prefix "1" and compare the full numbers.
1425 <1 11b
1426 <1 101a
1427
1428 ** test: simple locale data test
1429 @ locale de
1430 * compare
1431 <1 a
1432 <2 ä
1433 <1 ae
1434 <2 æ
1435
1436 @ locale de-u-co-phonebk
1437 * compare
1438 <1 a
1439 <1 ae
1440 <2 ä
1441 <2 æ
1442
1443 # The following test cases were moved here from ICU 52's DataDrivenCollationTest.txt.
1444
1445 ** test: DataDrivenCollationTest/TestMorePinyin
1446 # Testing the primary strength.
1447 @ locale zh
1448 % strength=primary
1449 * compare
1450 < lā
1451 = lĀ
1452 = Lā
1453 = LĀ
1454 < lān
1455 = lĀn
1456 < lē
1457 = lĒ
1458 = Lē
1459 = LĒ
1460 < lēn
1461 = lĒn
1462
1463 ** test: DataDrivenCollationTest/TestLithuanian
1464 # Lithuanian sort order.
1465 @ locale lt
1466 * compare
1467 < cz
1468 < č
1469 < d
1470 < iz
1471 < j
1472 < sz
1473 < š
1474 < t
1475 < zz
1476 < ž
1477
1478 ** test: DataDrivenCollationTest/TestLatvian
1479 # Latvian sort order.
1480 @ locale lv
1481 * compare
1482 < cz
1483 < č
1484 < d
1485 < gz
1486 < ģ
1487 < h
1488 < iz
1489 < j
1490 < kz
1491 < ķ
1492 < l
1493 < lz
1494 < ļ
1495 < m
1496 < nz
1497 < ņ
1498 < o
1499 < rz
1500 < ŗ
1501 < s
1502 < sz
1503 < š
1504 < t
1505 < zz
1506 < ž
1507
1508 ** test: DataDrivenCollationTest/TestEstonian
1509 # Estonian sort order.
1510 @ locale et
1511 * compare
1512 < sy
1513 < š
1514 < šy
1515 < z
1516 < zy
1517 < ž
1518 < v
1519 < va
1520 < w
1521 < õ
1522 < õy
1523 < ä
1524 < äy
1525 < ö
1526 < öy
1527 < ü
1528 < üy
1529 < x
1530
1531 ** test: DataDrivenCollationTest/TestAlbanian
1532 # Albanian sort order.
1533 @ locale sq
1534 * compare
1535 < cz
1536 < ç
1537 < d
1538 < dz
1539 < dh
1540 < e
1541 < ez
1542 < ë
1543 < f
1544 < gz
1545 < gj
1546 < h
1547 < lz
1548 < ll
1549 < m
1550 < nz
1551 < nj
1552 < o
1553 < rz
1554 < rr
1555 < s
1556 < sz
1557 < sh
1558 < t
1559 < tz
1560 < th
1561 < u
1562 < xz
1563 < xh
1564 < y
1565 < zz
1566 < zh
1567
1568 ** test: DataDrivenCollationTest/TestSimplifiedChineseOrder
1569 # Sorted file has different order.
1570 @ root
1571 # normalization=on turned on & off automatically.
1572 * compare
1573 < \u5F20
1574 < \u5F20\u4E00\u8E3F
1575
1576 ** test: DataDrivenCollationTest/TestTibetanNormalizedIterativeCrash
1577 # This pretty much crashes.
1578 @ root
1579 * compare
1580 < \u0f71\u0f72\u0f80\u0f71\u0f72
1581 < \u0f80
1582
1583 ** test: DataDrivenCollationTest/TestThaiPartialSortKeyProblems
1584 # These are examples of strings that caused trouble in partial sort key testing.
1585 @ locale th-TH
1586 * compare
1587 < \u0E01\u0E01\u0E38\u0E18\u0E20\u0E31\u0E13\u0E11\u0E4C
1588 < \u0E01\u0E01\u0E38\u0E2A\u0E31\u0E19\u0E42\u0E18
1589 * compare
1590 < \u0E01\u0E07\u0E01\u0E32\u0E23
1591 < \u0E01\u0E07\u0E42\u0E01\u0E49
1592 * compare
1593 < \u0E01\u0E23\u0E19\u0E17\u0E32
1594 < \u0E01\u0E23\u0E19\u0E19\u0E40\u0E0A\u0E49\u0E32
1595 * compare
1596 < \u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E22\u0E27
1597 < \u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E4A\u0E22\u0E27
1598 * compare
1599 < \u0E01\u0E23\u0E23\u0E40\u0E0A\u0E2D
1600 < \u0E01\u0E23\u0E23\u0E40\u0E0A\u0E49\u0E32
1601
1602 ** test: DataDrivenCollationTest/TestJavaStyleRule
1603 # java.text allows rules to start as '<<<x<<<y...'
1604 # we emulate this by assuming a &[first tertiary ignorable] in this case.
1605 @ rules
1606 &\u0001=equal<<<z<<x<<<w &[first tertiary ignorable]=a &[first primary ignorable]=b
1607 * compare
1608 = a
1609 = equal
1610 < z
1611 < x
1612 = b # x had become the new first primary ignorable
1613 < w
1614
1615 ** test: DataDrivenCollationTest/TestShiftedIgnorable
1616 # The UCA states that primary ignorables should be completely
1617 # ignorable when following a shifted code point.
1618 @ root
1619 % alternate=shifted
1620 % strength=quaternary
1621 * compare
1622 < a\u0020b
1623 = a\u0020\u0300b
1624 = a\u0020\u0301b
1625 < a_b
1626 = a_\u0300b
1627 = a_\u0301b
1628 < A\u0020b
1629 = A\u0020\u0300b
1630 = A\u0020\u0301b
1631 < A_b
1632 = A_\u0300b
1633 = A_\u0301b
1634 < a\u0301b
1635 < A\u0301b
1636 < a\u0300b
1637 < A\u0300b
1638
1639 ** test: DataDrivenCollationTest/TestNShiftedIgnorable
1640 # The UCA states that primary ignorables should be completely
1641 # ignorable when following a shifted code point.
1642 @ root
1643 % alternate=non-ignorable
1644 % strength=tertiary
1645 * compare
1646 < a\u0020b
1647 < A\u0020b
1648 < a\u0020\u0301b
1649 < A\u0020\u0301b
1650 < a\u0020\u0300b
1651 < A\u0020\u0300b
1652 < a_b
1653 < A_b
1654 < a_\u0301b
1655 < A_\u0301b
1656 < a_\u0300b
1657 < A_\u0300b
1658 < a\u0301b
1659 < A\u0301b
1660 < a\u0300b
1661 < A\u0300b
1662
1663 ** test: DataDrivenCollationTest/TestSafeSurrogates
1664 # It turned out that surrogates were not skipped properly
1665 # when iterating backwards if they were in the middle of a
1666 # contraction. This test assures that this is fixed.
1667 @ rules
1668 &a < x\ud800\udc00b
1669 * compare
1670 < a
1671 < x\ud800\udc00b
1672
1673 ** test: DataDrivenCollationTest/da_TestPrimary
1674 # This test goes through primary strength cases
1675 @ locale da
1676 % strength=primary
1677 * compare
1678 < Lvi
1679 < Lwi
1680 * compare
1681 < L\u00e4vi
1682 < L\u00f6wi
1683 * compare
1684 < L\u00fcbeck
1685 = Lybeck
1686
1687 ** test: DataDrivenCollationTest/da_TestTertiary
1688 # This test goes through tertiary strength cases
1689 @ locale da
1690 % strength=tertiary
1691 * compare
1692 < Luc
1693 < luck
1694 * compare
1695 < luck
1696 < L\u00fcbeck
1697 * compare
1698 < lybeck
1699 < L\u00fcbeck
1700 * compare
1701 < L\u00e4vi
1702 < L\u00f6we
1703 * compare
1704 < L\u00f6ww
1705 < mast
1706
1707 * compare
1708 < A/S
1709 < ANDRE
1710 < ANDR\u00c9
1711 < ANDREAS
1712 < AS
1713 < CA
1714 < \u00c7A
1715 < CB
1716 < \u00c7C
1717 < D.S.B.
1718 < DA
1719 < \u00d0A
1720 < DB
1721 < \u00d0C
1722 < DSB
1723 < DSC
1724 < EKSTRA_ARBEJDE
1725 < EKSTRABUD0
1726 < H\u00d8ST
1727 < HAAG
1728 < H\u00c5NDBOG
1729 < HAANDV\u00c6RKSBANKEN
1730 < Karl
1731 < karl
1732 < NIELS\u0020J\u00d8RGEN
1733 < NIELS-J\u00d8RGEN
1734 < NIELSEN
1735 < R\u00c9E,\u0020A
1736 < REE,\u0020B
1737 < R\u00c9E,\u0020L
1738 < REE,\u0020V
1739 < SCHYTT,\u0020B
1740 < SCHYTT,\u0020H
1741 < SCH\u00dcTT,\u0020H
1742 < SCHYTT,\u0020L
1743 < SCH\u00dcTT,\u0020M
1744 < SS
1745 < \u00df
1746 < SSA
1747 < STORE\u0020VILDMOSE
1748 < STOREK\u00c6R0
1749 < STORM\u0020PETERSEN
1750 < STORMLY
1751 < THORVALD
1752 < THORVARDUR
1753 < \u00feORVAR\u00d0UR
1754 < THYGESEN
1755 < VESTERG\u00c5RD,\u0020A
1756 < VESTERGAARD,\u0020A
1757 < VESTERG\u00c5RD,\u0020B
1758 < \u00c6BLE
1759 < \u00c4BLE
1760 < \u00d8BERG
1761 < \u00d6BERG
1762
1763 * compare
1764 < andere
1765 < chaque
1766 < chemin
1767 < cote
1768 < cot\u00e9
1769 < c\u00f4te
1770 < c\u00f4t\u00e9
1771 < \u010du\u010d\u0113t
1772 < Czech
1773 < hi\u0161a
1774 < irdisch
1775 < lie
1776 < lire
1777 < llama
1778 < l\u00f5ug
1779 < l\u00f2za
1780 < lu\u010d
1781 < luck
1782 < L\u00fcbeck
1783 < lye
1784 < l\u00e4vi
1785 < L\u00f6wen
1786 < m\u00e0\u0161ta
1787 < m\u00eer
1788 < myndig
1789 < M\u00e4nner
1790 < m\u00f6chten
1791 < pi\u00f1a
1792 < pint
1793 < pylon
1794 < \u0161\u00e0ran
1795 < savoir
1796 < \u0160erb\u016bra
1797 < Sietla
1798 < \u015blub
1799 < subtle
1800 < symbol
1801 < s\u00e4mtlich
1802 < verkehrt
1803 < vox
1804 < v\u00e4ga
1805 < waffle
1806 < wood
1807 < yen
1808 < yuan
1809 < yucca
1810 < \u017eal
1811 < \u017eena
1812 < \u017den\u0113va
1813 < zoo0
1814 < Zviedrija
1815 < Z\u00fcrich
1816 < zysk0
1817 < \u00e4ndere
1818
1819 ** test: DataDrivenCollationTest/hi_TestNewRules
1820 # This test goes through new rules and tests against old rules
1821 @ locale hi
1822 * compare
1823 < कॐ
1824 < कं
1825 < कँ
1826 < कः
1827
1828 ** test: DataDrivenCollationTest/ro_TestNewRules
1829 # This test goes through new rules and tests against old rules
1830 @ locale ro
1831 * compare
1832 < xAx
1833 < xă
1834 < xĂ
1835 < Xă
1836 < XĂ
1837 < xăx
1838 < xĂx
1839 < xâ
1840 < xÂ
1841 < Xâ
1842 < XÂ
1843 < xâx
1844 < xÂx
1845 < xb
1846 < xIx
1847 < xî
1848 < xÎ
1849 < Xî
1850 < XÎ
1851 < xîx
1852 < xÎx
1853 < xj
1854 < xSx
1855 < xș
1856 = xş
1857 < xȘ
1858 = xŞ
1859 < Xș
1860 = Xş
1861 < XȘ
1862 = XŞ
1863 < xșx
1864 = xşx
1865 < xȘx
1866 = xŞx
1867 < xT
1868 < xTx
1869 < xț
1870 = xţ
1871 < xȚ
1872 = xŢ
1873 < Xț
1874 = Xţ
1875 < XȚ
1876 = XŢ
1877 < xțx
1878 = xţx
1879 < xȚx
1880 = xŢx
1881 < xU
1882
1883 ** test: DataDrivenCollationTest/testOffsets
1884 # This tests cases where forwards and backwards iteration get different offsets
1885 @ locale en
1886 % strength=tertiary
1887 * compare
1888 < a\uD800\uDC00\uDC00
1889 < b\uD800\uDC00\uDC00
1890 * compare
1891 < \u0301A\u0301\u0301
1892 < \u0301B\u0301\u0301
1893 * compare
1894 < abcd\r\u0301
1895 < abce\r\u0301
1896 # TODO: test offsets in new CollationTest
1897
1898 # End of test cases moved here from ICU 52's DataDrivenCollationTest.txt.
1899
1900 ** test: was ICU 52 cmsccoll/TestRedundantRules
1901 @ rules
1902 & a < b < c < d& [before 1] c < m
1903 * compare
1904 <1 a
1905 <1 b
1906 <1 m
1907 <1 c
1908 <1 d
1909
1910 @ rules
1911 & a < b <<< c << d <<< e& [before 3] e <<< x
1912 * compare
1913 <1 a
1914 <1 b
1915 <3 c
1916 <2 d
1917 <3 x
1918 <3 e
1919
1920 @ rules
1921 & a < b <<< c << d <<< e <<< f < g& [before 1] g < x
1922 * compare
1923 <1 a
1924 <1 b
1925 <3 c
1926 <2 d
1927 <3 e
1928 <3 f
1929 <1 x
1930 <1 g
1931
1932 @ rules
1933 & a <<< b << c < d& a < m
1934 * compare
1935 <1 a
1936 <3 b
1937 <2 c
1938 <1 m
1939 <1 d
1940
1941 @ rules
1942 &a<b<<b\u0301 &z<b
1943 * compare
1944 <1 a
1945 <1 b\u0301
1946 <1 z
1947 <1 b
1948
1949 @ rules
1950 &z<m<<<q<<<m
1951 * compare
1952 <1 z
1953 <1 q
1954 <3 m
1955
1956 @ rules
1957 &z<<<m<q<<<m
1958 * compare
1959 <1 z
1960 <1 q
1961 <3 m
1962
1963 @ rules
1964 & a < b < c < d& r < c
1965 * compare
1966 <1 a
1967 <1 b
1968 <1 d
1969 <1 r
1970 <1 c
1971
1972 @ rules
1973 & a < b < c < d& c < m
1974 * compare
1975 <1 a
1976 <1 b
1977 <1 c
1978 <1 m
1979 <1 d
1980
1981 @ rules
1982 & a < b < c < d& a < m
1983 * compare
1984 <1 a
1985 <1 m
1986 <1 b
1987 <1 c
1988 <1 d
1989
1990 ** test: was ICU 52 cmsccoll/TestExpansionSyntax
1991 # The following two rules should sort the particular list of strings the same.
1992 @ rules
1993 &AE <<< a << b <<< c &d <<< f
1994 * compare
1995 <1 AE
1996 <3 a
1997 <2 b
1998 <3 c
1999 <1 d
2000 <3 f
2001
2002 @ rules
2003 &A <<< a / E << b / E <<< c /E &d <<< f
2004 * compare
2005 <1 AE
2006 <3 a
2007 <2 b
2008 <3 c
2009 <1 d
2010 <3 f
2011
2012 # The following two rules should sort the particular list of strings the same.
2013 @ rules
2014 &AE <<< a <<< b << c << d < e < f <<< g
2015 * compare
2016 <1 AE
2017 <3 a
2018 <3 b
2019 <2 c
2020 <2 d
2021 <1 e
2022 <1 f
2023 <3 g
2024
2025 @ rules
2026 &A <<< a / E <<< b / E << c / E << d / E < e < f <<< g
2027 * compare
2028 <1 AE
2029 <3 a
2030 <3 b
2031 <2 c
2032 <2 d
2033 <1 e
2034 <1 f
2035 <3 g
2036
2037 # The following two rules should sort the particular list of strings the same.
2038 @ rules
2039 &AE <<< B <<< C / D <<< F
2040 * compare
2041 <1 AE
2042 <3 B
2043 <3 F
2044 <1 AED
2045 <3 C
2046
2047 @ rules
2048 &A <<< B / E <<< C / ED <<< F / E
2049 * compare
2050 <1 AE
2051 <3 B
2052 <3 F
2053 <1 AED
2054 <3 C
2055
2056 ** test: never reorder trailing primaries
2057 @ root
2058 % reorder Zzzz Grek
2059 * compare
2060 <1 L
2061 <1 字
2062 <1 Ω
2063 <1 \uFFFD
2064 <1 \uFFFF
2065
2066 ** test: fall back to mappings with shorter prefixes, not immediately to ones with no prefixes
2067 @ rules
2068 &u=ab|cd
2069 &v=b|ce
2070 * compare
2071 <1 abc
2072 <1 abcc
2073 <1 abcf
2074 <1 abcd
2075 = abu
2076 <1 abce
2077 = abv
2078
2079 # With the following rules, there is only one prefix per composite ĉ or ç,
2080 # but both prefixes apply to just c in NFD form.
2081 # We would get different results for composed vs. NFD input
2082 # if we fell back directly from longest-prefix mappings to no-prefix mappings.
2083 @ rules
2084 &x=op|ĉ
2085 &y=p|ç
2086 * compare
2087 <1 opc
2088 <2 opć
2089 <1 opcz
2090 <1 opd
2091 <1 opĉ
2092 = opc\u0302
2093 = opx
2094 <1 opç
2095 = opc\u0327
2096 = opy
2097
2098 # The mapping is used which has the longest matching prefix for which
2099 # there is also a suffix match, with the longest suffix match among several for that prefix.
2100 @ rules
2101 &❶=d
2102 &❷=de
2103 &❸=def
2104 &①=c|d
2105 &②=c|de
2106 &③=c|def
2107 &④=bc|d
2108 &⑤=bc|de
2109 &⑥=bc|def
2110 &⑦=abc|d
2111 &⑧=abc|de
2112 &⑨=abc|def
2113 * compare
2114 <1 9aadzz
2115 = 9aa❶zz
2116 <1 9aadez
2117 = 9aa❷z
2118 <1 9aadef
2119 = 9aa❸
2120 <1 9acdzz
2121 = 9ac①zz
2122 <1 9acdez
2123 = 9ac②z
2124 <1 9acdef
2125 = 9ac③
2126 <1 9bcdzz
2127 = 9bc④zz
2128 <1 9bcdez
2129 = 9bc⑤z
2130 <1 9bcdef
2131 = 9bc⑥
2132 <1 abcdzz
2133 = abc⑦zz
2134 <1 abcdez
2135 = abc⑧z
2136 <1 abcdef
2137 = abc⑨
2138
2139 ** test: prefix + discontiguous contraction with missing prefix contraction
2140 # Unfortunate terminology: The first "prefix" here is the pre-context,
2141 # the second "prefix" refers to the contraction/relation string that is
2142 # one shorter than the one being tested.
2143 @ rules
2144 &x=p|e
2145 &y=p|ê
2146 &z=op|ê
2147 # No mapping for op|e:
2148 # Discontiguous contraction matching should not match op|ê in opệ
2149 # because it would have to skip the dot below and extend a match on op|e by the circumflex,
2150 # but there is no match on op|e.
2151 * compare
2152 <1 oPe
2153 <1 ope
2154 = opx
2155 <1 opệ
2156 = opy\u0323 # y not z
2157 <1 opê
2158 = opz
2159
2160 # We cannot test for fallback by whether the contraction default CE32
2161 # is for another contraction. With the following rules, there is no mapping for op|e,
2162 # and the fallback to prefix p has no contractions.
2163 @ rules
2164 &x=p|e
2165 &z=op|ê
2166 * compare
2167 <1 oPe
2168 <1 ope
2169 = opx
2170 <2 opệ
2171 = opx\u0323\u0302 # x not z
2172 <1 opê
2173 = opz
2174
2175 # One more variation: Fallback to the simple code point, no shorter non-empty prefix.
2176 @ rules
2177 &x=e
2178 &z=op|ê
2179 * compare
2180 <1 ope
2181 = opx
2182 <3 oPe
2183 = oPx
2184 <2 opệ
2185 = opx\u0323\u0302 # x not z
2186 <1 opê
2187 = opz
2188
2189 ** test: maxVariable via rules
2190 @ rules
2191 [maxVariable space][alternate shifted]
2192 * compare
2193 = \u0020
2194 = \u000A
2195 <1 .
2196 <1 ° # degree sign
2197 <1 $
2198 <1 0
2199
2200 ** test: maxVariable via setting
2201 @ root
2202 % maxVariable=currency
2203 % alternate=shifted
2204 * compare
2205 = \u0020
2206 = \u000A
2207 = .
2208 = ° # degree sign
2209 = $
2210 <1 0
2211
2212 ** test: ICU4J CollationMiscTest/TestContractionClosure (ää)
2213 # This tests canonical closure, but it also tests that CollationFastLatin
2214 # bails out properly for contractions with combining marks.
2215 # For that we need pairs of strings that remain in the Latin fastpath
2216 # long enough, hence the extra "= b" lines.
2217 @ rules
2218 &b=\u00e4\u00e4
2219 * compare
2220 <1 b
2221 = \u00e4\u00e4
2222 = b
2223 = a\u0308a\u0308
2224 = b
2225 = \u00e4a\u0308
2226 = b
2227 = a\u0308\u00e4
2228
2229 ** test: ICU4J CollationMiscTest/TestContractionClosure (Å)
2230 @ rules
2231 &b=\u00C5
2232 * compare
2233 <1 b
2234 = \u00C5
2235 = b
2236 = A\u030A
2237 = b
2238 = \u212B
2239
2240 ** test: reset-before on already-tailored characters, ICU ticket 10108
2241 @ rules
2242 &a<w<<x &[before 2]x<<y
2243 * compare
2244 <1 a
2245 <1 w
2246 <2 y
2247 <2 x
2248
2249 @ rules
2250 &a<<w<<<x &[before 2]x<<y
2251 * compare
2252 <1 a
2253 <2 y
2254 <2 w
2255 <3 x
2256
2257 @ rules
2258 &a<w<x &[before 2]x<<y
2259 * compare
2260 <1 a
2261 <1 w
2262 <1 y
2263 <2 x
2264
2265 @ rules
2266 &a<w<<<x &[before 2]x<<y
2267 * compare
2268 <1 a
2269 <1 y
2270 <2 w
2271 <3 x
2272
2273 ** test: numeric collation with other settings, ICU ticket 9092
2274 @ root
2275 % strength=identical
2276 % caseFirst=upper
2277 % numeric=on
2278 * compare
2279 <1 100\u0020a
2280 <1 101
2281
2282 ** test: collation type fallback from unsupported type, ICU ticket 10149
2283 @ locale fr-CA-u-co-phonebk
2284 # Expect the same result as with fr-CA, using backwards-secondary order.
2285 # That is, we should fall back from the unsupported collation type
2286 # to the locale's default collation type.
2287 * compare
2288 <1 cote
2289 <2 côte
2290 <2 coté
2291 <2 côté
2292
2293 ** test: @ is equivalent to [backwards 2], ICU ticket 9956
2294 @ rules
2295 &b<a @ &v<<w
2296 * compare
2297 <1 b
2298 <1 a
2299 <1 cote
2300 <2 côte
2301 <2 coté
2302 <2 côté
2303 <1 v
2304 <2 w
2305 <1 x
2306
2307 ** test: shifted+reordering, ICU ticket 9507
2308 @ root
2309 % reorder Grek punct space
2310 % alternate=shifted
2311 % strength=quaternary
2312 # Which primaries are "variable" should be determined without script reordering,
2313 # and then primaries should be reordered whether they are shifted to quaternary or not.
2314 * compare
2315 <4 ( # punctuation
2316 <4 )
2317 <4 \u0020 # space
2318 <1 ` # symbol
2319 <1 ^
2320 <1 $ # currency symbol
2321 <1 €
2322 <1 0 # numbers
2323 <1 ε # Greek
2324 <1 e # Latin
2325 <1 e(e
2326 <4 e)e
2327 <4 e\u0020e
2328 <4 ee
2329 <3 e(E
2330 <4 e)E
2331 <4 e\u0020E
2332 <4 eE
2333
2334 ** test: "uppercase first" could sort a string before its prefix, ICU ticket 9351
2335 @ rules
2336 &\u0001<<<b<<<B
2337 % caseFirst=upper
2338 * compare
2339 <1 aaa
2340 <3 aaaB
2341
2342 ** test: secondary+case ignores secondary ignorables, ICU ticket 9355
2343 @ rules
2344 &\u0001<<<b<<<B
2345 % strength=secondary
2346 % caseLevel=on
2347 * compare
2348 <1 a
2349 = ab
2350 = aB
2351
2352 ** test: custom collation rules involving tail of a contraction in Malayalam, ICU ticket 6328
2353 @ rules
2354 &[before 2] ൌ << ൗ # U+0D57 << U+0D4C == 0D46+0D57
2355 * compare
2356 <1 ൗx
2357 <2 ൌx
2358 <1 ൗy
2359 <2 ൌy
2360
2361 ** test: quoted apostrophe in compact syntax, ICU ticket 8204
2362 @ rules
2363 &q<<*a''c
2364 * compare
2365 <1 d
2366 <1 p
2367 <1 q
2368 <2 a
2369 <2 \u0027
2370 <2 c
2371 <1 r
2372
2373 # ICU ticket #8260 "Support all collation-related keywords in Collator.getInstance()"
2374 ** test: locale -u- with collation keywords, ICU ticket 8260
2375 @ locale de-u-kv-sPace-ka-shifTed-kn-kk-falsE-kf-Upper-kc-tRue-ks-leVel4
2376 * compare
2377 <4 \u0020 # space is shifted, strength=quaternary
2378 <1 ! # punctuation is regular
2379 <1 2
2380 <1 12 # numeric sorting
2381 <1 B
2382 <c b # uppercase first on case level
2383 <1 x\u0301\u0308
2384 <2 x\u0308\u0301 # normalization off
2385
2386 ** test: locale @ with collation keywords, ICU ticket 8260
2387 @ locale fr@colbAckwards=yes;ColStrength=Quaternary;kv=currencY;colalternate=shifted
2388 * compare
2389 <4 $ # currency symbols are shifted, strength=quaternary
2390 <1 àla
2391 <2 alà # backwards secondary level
2392
2393 ** test: locale -u- with script reordering, ICU ticket 8260
2394 @ locale el-u-kr-kana-SYMBOL-Grek-hani-cyrl-latn-digit-armn-deva-ethi-thai
2395 * compare
2396 <1 \u0020
2397 <1 あ
2398 <1 ☂
2399 <1 Ω
2400 <1 丂
2401 <1 ж
2402 <1 L
2403 <1 4
2404 <1 Ձ
2405 <1 अ
2406 <1 ሄ
2407 <1 ฉ
2408
2409 ** test: locale @collation=type should be case-insensitive
2410 @ locale de@coLLation=PhoneBook
2411 * compare
2412 <1 ae
2413 <2 ä
2414 <3 Ä
2415
2416 ** test: import root search rules plus German phonebook rules, ICU ticket 8962
2417 @ locale de-u-co-search
2418 * compare
2419 <1 =
2420 <1 ≠
2421 <1 a
2422 <1 ae
2423 <2 ä
2424
2425 # Once more, but with runtime builder.
2426 @ rules
2427 [import und-u-co-search][import de-u-co-phonebk]
2428 * compare
2429 <1 =
2430 <1 ≠
2431 <1 a
2432 <1 ae
2433 <2 ä
2434
2435 # Once again, with import from "root" not "und" (as in a proper language tag).
2436 @ rules
2437 [import root-u-co-search][import de-u-co-phonebk]
2438 * compare
2439 <1 =
2440 <1 ≠
2441 <1 a
2442 <1 ae
2443 <2 ä
2444
2445 ** test: import rules from a language with non-Latin native script, and reset the reordering, ICU ticket 10998
2446 # Greek should sort Greek first.
2447 @ rules
2448 [import el]
2449 * compare
2450 <1 4
2451 <1 Ω
2452 <1 L
2453
2454 # Import Greek, and then reset the reordering.
2455 @ rules
2456 [import el][reorder Zzzz]
2457 * compare
2458 <1 4
2459 <1 L
2460 <1 Ω
2461
2462 # "others" is a synonym for Zzzz.
2463 @ rules
2464 [import el][reorder others]
2465 * compare
2466 <1 4
2467 <1 L
2468 <1 Ω
2469
2470 ** test: regression test for CollationFastLatinBuilder, ICU ticket 11388
2471 @ rules
2472 &x<<aa<<<Aa<<<AA
2473 % strength=secondary
2474 * compare
2475 <1 AA
2476 <2 Aẩ
2477 <2 aą
2478 * compare
2479 <1 AA
2480 <2 aą
2481
2482 ** test: tailor tertiary-after a common tertiary where there is a lower one
2483 # Assume that Hiragana small A has a below-common tertiary, and Hiragana A has a common one.
2484 # See ICU ticket 11448 & CLDR ticket 7222.
2485 @ rules
2486 &あ<<<x<<<y<<<z
2487 * compare
2488 <1 ぁ
2489 <3 あ
2490 <3 x
2491 <3 y
2492 <3 z
2493 <3 ァ
2494 <1 い
2495
2496 ** test: tailor tertiary-after a below-common tertiary
2497 @ rules
2498 &ぁ<<<x<<<y<<<z
2499 * compare
2500 <1 ぁ
2501 <3 x
2502 <3 y
2503 <3 z
2504 <3 あ
2505 <3 ァ
2506 <1 い
2507
2508 ** test: tailor tertiary-before a common tertiary where there is a lower one
2509 @ rules
2510 &[before 3]あ<<<x<<<y<<<z
2511 * compare
2512 <1 ぁ
2513 <3 x
2514 <3 y
2515 <3 z
2516 <3 あ
2517 <3 ァ
2518 <1 い
2519
2520 ** test: tailor tertiary-before a below-common tertiary
2521 @ rules
2522 &[before 3]ぁ<<<x<<<y<<<z
2523 * compare
2524 <1 x
2525 <3 y
2526 <3 z
2527 <3 ぁ
2528 <3 あ
2529 <3 ァ
2530 <1 い
2531
2532 ** test: reorder single scripts not groups, ICU ticket 11449
2533 @ root
2534 % reorder Goth Latn
2535 * compare
2536 <1 4
2537 <1 𐌰 # Gothic
2538 <1 L
2539 <1 Ω
2540 # Before ICU 55, the following reordered together with Gothic.
2541 <1 𐌈 # Old Italic
2542 <1 𐑐 # Shavian