]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/testdata/collationtest.txt
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / test / testdata / collationtest.txt
1 # Copyright (c) 2012-2015 International Business Machines
2 # Corporation and others. All Rights Reserved.
3 #
4 # This file should be in UTF-8 with a signature byte sequence ("BOM").
5 #
6 # collationtest.txt: Collation test data.
7 #
8 # created on: 2012apr13
9 # created by: Markus W. Scherer
10
11 # A line with "** test: description" is used for verbose and error output.
12
13 # A collator can be set with "@ root" or "@ locale language-tag",
14 # for example "@ locale de-u-co-phonebk".
15 # An old-style locale ID can also be used, for example "@ locale de@collation=phonebook".
16
17 # A collator can be built with "@ rules".
18 # An "@ rules" line is followed by one or more lines with the tailoring rules.
19
20 # A collator can be modified with "% attribute=value".
21
22 # "* compare" tests the order (= or <) of the following strings.
23 # The relation can be "=" or "<" (the level of the difference is not specified)
24 # or "<1", "<2", "<c", "<3", "<4" (indicating the level of the difference).
25
26 # Test sections ("* compare") are terminated by
27 # definitions of new collators, changing attributes, or new test sections.
28
29 ** test: simple CEs & expansions
30 # Many types of mappings are tested elsewhere, including via the UCA conformance tests.
31 # Here we mostly cover a few unusual mappings.
32 @ rules
33 &\x01 # most control codes are ignorable
34 <<<\u0300 # tertiary CE
35 &9<\x00 # NUL not ignorable
36 &\uA00A\uA00B=\uA002 # two long-primary CEs
37 &\uA00A\uA00B\u00050005=\uA003 # three CEs, require 64 bits
38
39 * compare
40 = \x01
41 = \x02
42 <3 \u0300
43 <1 9
44 <1 \x00
45 = \x01\x00\x02
46 <1 a
47 <3 a\u0300
48 <2 a\u0308
49 = ä
50 <1 b
51 <1 か # Hiragana Ka (U+304B)
52 <2 か\u3099 # plus voiced sound mark
53 = が # Hiragana Ga (U+304C)
54 <1 \uA00A\uA00B
55 = \uA002
56 <1 \uA00A\uA00B\u00050004
57 <1 \uA00A\uA00B\u00050005
58 = \uA003
59 <1 \uA00A\uA00B\u00050006
60
61 ** test: contractions
62 # Create some interesting mappings, and map some normalization-inert characters
63 # (which are not subject to canonical reordering)
64 # to some of the same CEs to check the sequence of CEs.
65 @ rules
66
67 # Contractions starting with 'a' should not continue with any character < U+0300
68 # so that we can test a shortcut for that.
69 &a=ⓐ
70 &b<bz=ⓑ
71 &d<dz\u0301=ⓓ # d+z+acute
72 &z
73 <a\u0301=Ⓐ # a+acute sorts after z
74 <a\u0301\u0301=Ⓑ # a+acute+acute
75 <a\u0301\u0301\u0358=Ⓒ # a+acute+acute+dot above right
76 <a\u030a=Ⓓ # a+ring
77 <a\u0323=Ⓔ # a+dot below
78 <a\u0323\u0358=Ⓕ # a+dot below+dot above right
79 <a\u0327\u0323\u030a=Ⓖ # a+cedilla+dot below+ring
80 <a\u0327\u0323bz=Ⓗ # a+cedilla+dot below+b+z
81
82 &\U0001D158=⁰ # musical notehead black (has a symbol primary)
83 <\U0001D158\U0001D165=¼ # musical quarter note
84
85 # deliberately missing prefix contractions:
86 # dz
87 # a\u0327
88 # a\u0327\u0323
89 # a\u0327\u0323b
90
91 &\x01
92 <<<\U0001D165=¹ # musical stem (ccc=216)
93 <<<\U0001D16D=² # musical augmentation dot (ccc=226)
94 <<<\U0001D165\U0001D16D=³ # stem+dot (ccc=216 226)
95 &\u0301=❶ # acute (ccc=230)
96 &\u030a=❷ # ring (ccc=230)
97 &\u0308=❸ # diaeresis (ccc=230)
98 <<\u0308\u0301=❹ # diaeresis+acute (=dialytika tonos) (ccc=230 230)
99 &\u0327=❺ # cedilla (ccc=202)
100 &\u0323=❻ # dot below (ccc=220)
101 &\u0331=❼ # macron below (ccc=220)
102 <<\u0331\u0358=❽ # macron below+dot above right (ccc=220 232)
103 &\u0334=❾ # tilde overlay (ccc=1)
104 &\u0358=❿ # dot above right (ccc=232)
105
106 &\u0f71=① # tibetan vowel sign aa
107 &\u0f72=② # tibetan vowel sign i
108 # \u0f71\u0f72 # tibetan vowel sign aa + i = ii = U+0F73
109 &\u0f73=③ # tibetan vowel sign ii (ccc=0 but lccc=129)
110
111 ** test: simple contractions
112
113 # Some strings are chosen to cause incremental contiguous contraction matching to
114 # go into partial matches for prefixes of contractions
115 # (where the prefixes are deliberately not also contractions).
116 # When there is no complete match, then the matching code must back out of those
117 # so that discontiguous contractions work as specified.
118
119 * compare
120 # contraction starter with no following text, or mismatch, or blocked
121 <1 a
122 = ⓐ
123 <1 aa
124 = ⓐⓐ
125 <1 ab
126 = ⓐb
127 <1 az
128 = ⓐz
129
130 * compare
131 <1 a
132 <2 a\u0308\u030a # ring blocked by diaeresis
133 = ⓐ❸❷
134 <2 a\u0327
135 = ⓐ❺
136
137 * compare
138 <2 \u0308
139 = ❸
140 <2 \u0308\u030a\u0301 # acute blocked by ring
141 = ❸❷❶
142
143 * compare
144 <1 \U0001D158
145 = ⁰
146 <1 \U0001D158\U0001D165
147 = ¼
148
149 # no discontiguous contraction because of missing prefix contraction d+z,
150 # and a starter ('z') after the 'd'
151 * compare
152 <1 dz\u0323\u0301
153 = dz❻❶
154
155 # contiguous contractions
156 * compare
157 <1 abz
158 = ⓐⓑ
159 <1 abzz
160 = ⓐⓑz
161
162 * compare
163 <1 a
164 <1 z
165 <1 a\u0301
166 = Ⓐ
167 <1 a\u0301\u0301
168 = Ⓑ
169 <1 a\u0301\u0301\u0358
170 = Ⓒ
171 <1 a\u030a
172 = Ⓓ
173 <1 a\u0323\u0358
174 = Ⓕ
175 <1 a\u0327\u0323\u030a # match despite missing prefix
176 = Ⓖ
177 <1 a\u0327\u0323bz
178 = Ⓗ
179
180 * compare
181 <2 \u0308\u0308\u0301 # acute blocked from first diaeresis, contracts with second
182 = ❸❹
183
184 * compare
185 <1 \U0001D158\U0001D165
186 = ¼
187
188 * compare
189 <3 \U0001D165\U0001D16D
190 = ³
191
192 ** test: discontiguous contractions
193 * compare
194 <1 a\u0327\u030a # a+ring skips cedilla
195 = Ⓓ❺
196 <2 a\u0327\u0327\u030a # a+ring skips 2 cedillas
197 = Ⓓ❺❺
198 <2 a\u0327\u0327\u0327\u030a # a+ring skips 3 cedillas
199 = Ⓓ❺❺❺
200 <2 a\u0334\u0327\u0327\u030a # a+ring skips tilde overlay & 2 cedillas
201 = Ⓓ❾❺❺
202 <1 a\u0327\u0323 # a+dot below skips cedilla
203 = Ⓔ❺
204 <1 a\u0323\u0301\u0358 # a+dot below+dot ab.r.: 2-char match, then skips acute
205 = Ⓕ❶
206 <2 a\u0334\u0323\u0358 # a+dot below skips tilde overlay
207 = Ⓕ❾
208
209 * compare
210 <2 \u0331\u0331\u0358 # macron below+dot ab.r. skips the second macron below
211 = ❽❼
212
213 * compare
214 <1 a\u0327\u0331\u0323\u030a # a+ring skips cedilla, macron below, dot below (dot blocked by macron)
215 = Ⓓ❺❼❻
216 <1 a\u0327\u0323\U0001D16D\u030a # a+dot below skips cedilla
217 = Ⓔ❺²❷
218 <2 a\u0327\u0327\u0323\u030a # a+dot below skips 2 cedillas
219 = Ⓔ❺❺❷
220 <2 a\u0327\u0323\u0323\u030a # a+dot below skips cedilla
221 = Ⓔ❺❻❷
222 <2 a\u0334\u0327\u0323\u030a # a+dot below skips tilde overlay & cedilla
223 = Ⓔ❾❺❷
224
225 * compare
226 <1 \U0001D158\u0327\U0001D165 # quarter note skips cedilla
227 = ¼❺
228 <1 a\U0001D165\u0323 # a+dot below skips stem
229 = Ⓔ¹
230
231 # partial contiguous match, backs up, matches discontiguous contraction
232 <1 a\u0327\u0323b
233 = Ⓔ❺b
234 <1 a\u0327\u0323ba
235 = Ⓔ❺bⓐ
236
237 # a+acute+acute+dot above right skips cedilla, continues matching 2 same-ccc combining marks
238 * compare
239 <1 a\u0327\u0301\u0301\u0358
240 = Ⓒ❺
241
242 # FCD but not NFD
243 * compare
244 <1 a\u0f73\u0301 # a+acute skips tibetan ii
245 = Ⓐ③
246
247 # FCD but the 0f71 inside the 0f73 must be skipped
248 # to match the discontiguous contraction of the first 0f71 with the trailing 0f72 inside the 0f73
249 * compare
250 <1 \u0f71\u0f73 # == \u0f73\u0f71 == \u0f71\u0f71\u0f72
251 = ③①
252
253 ** test: discontiguous contractions with nested contractions
254 * compare
255 <1 a\u0323\u0308\u0301\u0358
256 = Ⓕ❹
257 <2 a\u0323\u0308\u0301\u0308\u0301\u0358
258 = Ⓕ❹❹
259
260 ** test: discontiguous contractions with interleaved contractions
261 * compare
262 # a+ring & cedilla & macron below+dot above right
263 <1 a\u0327\u0331\u030a\u0358
264 = Ⓓ❺❽
265
266 # a+ring & 1x..3x macron below+dot above right
267 <2 a\u0331\u030a\u0358
268 = Ⓓ❽
269 <2 a\u0331\u0331\u030a\u0358\u0358
270 = Ⓓ❽❽
271 # also skips acute
272 <2 a\u0331\u0331\u0331\u030a\u0301\u0358\u0358\u0358
273 = Ⓓ❽❽❽❶
274
275 # a+dot below & stem+augmentation dot, followed by contiguous d+z+acute
276 <1 a\U0001D165\u0323\U0001D16Ddz\u0301
277 = Ⓔ³ⓓ
278
279 ** test: some simple string comparisons
280 @ root
281 * compare
282 # first string compares against ""
283 = \u0000
284 < a
285 <1 b
286 <3 B
287 = \u0000B\u0000
288
289 ** test: compare with strength=primary
290 % strength=primary
291 * compare
292 <1 a
293 <1 b
294 = B
295
296 ** test: compare with strength=secondary
297 % strength=secondary
298 * compare
299 <1 a
300 <1 b
301 = B
302
303 ** test: compare with strength=tertiary
304 % strength=tertiary
305 * compare
306 <1 a
307 <1 b
308 <3 B
309
310 ** test: compare with strength=quaternary
311 % strength=quaternary
312 * compare
313 <1 a
314 <1 b
315 <3 B
316
317 ** test: compare with strength=identical
318 % strength=identical
319 * compare
320 <1 a
321 <1 b
322 <3 B
323
324 ** test: côté with forwards secondary
325 @ root
326 * compare
327 <1 cote
328 <2 coté
329 <2 côte
330 <2 côté
331
332 ** test: côté with forwards secondary vs. U+FFFE merge separator
333 # Merged sort keys: On each level, any difference in the first segment
334 # must trump any further difference.
335 * compare
336 <1 cote\uFFFEcôté
337 <2 coté\uFFFEcôte
338 <2 côte\uFFFEcoté
339 <2 côté\uFFFEcote
340
341 ** test: côté with backwards secondary
342 % backwards=on
343 * compare
344 <1 cote
345 <2 côte
346 <2 coté
347 <2 côté
348
349 ** test: côté with backwards secondary vs. U+FFFE merge separator
350 # Merged sort keys: On each level, any difference in the first segment
351 # must trump any further difference.
352 * compare
353 <1 cote\uFFFEcôté
354 <2 côte\uFFFEcoté
355 <2 coté\uFFFEcôte
356 <2 côté\uFFFEcote
357
358 ** test: U+FFFE on identical level
359 @ root
360 % strength=identical
361 * compare
362 # All of these control codes are completely-ignorable, so that
363 # their low code points are compared with the merge separator.
364 # The merge separator must compare less than any other character.
365 <1 \uFFFE\u0001\u0002\u0003
366 <i \u0001\uFFFE\u0002\u0003
367 <i \u0001\u0002\uFFFE\u0003
368 <i \u0001\u0002\u0003\uFFFE
369
370 * compare
371 # The merge separator must even compare less than U+0000.
372 <1 \uFFFE\u0000\u0000
373 <i \u0000\uFFFE\u0000
374 <i \u0000\u0000\uFFFE
375
376 ** test: Hani < surrogates < U+FFFD
377 # Note: compareUTF8() treats unpaired surrogates like U+FFFD,
378 # so with that the strings with surrogates will compare equal to each other
379 # and equal to the string with U+FFFD.
380 @ root
381 % strength=identical
382 * compare
383 <1 abz
384 <1 a\u4e00z
385 <1 a\U00020000z
386 <1 a\ud800z
387 <1 a\udbffz
388 <1 a\udc00z
389 <1 a\udfffz
390 <1 a\ufffdz
391
392 ** test: script reordering
393 @ root
394 % reorder Hani Zzzz digit
395 * compare
396 <1 ?
397 <1 +
398 <1 丂
399 <1 a
400 <1 α
401 <1 5
402
403 % reorder default
404 * compare
405 <1 ?
406 <1 +
407 <1 5
408 <1 a
409 <1 α
410 <1 丂
411
412 ** test: empty rules
413 @ rules
414 * compare
415 <1 a
416 <2 ä
417 <3 Ä
418 <1 b
419
420 ** test: very simple rules
421 @ rules
422 &a=e<<<<q<<<<r<x<<<X<<y<<<Y;z,Z
423 % strength=quaternary
424 * compare
425 <1 a
426 = e
427 <4 q
428 <4 r
429 <1 x
430 <3 X
431 <2 y
432 <3 Y
433 <2 z
434 <3 Z
435
436 ** test: tailoring twice before a root position: primary
437 @ rules
438 &[before 1]b<p
439 &[before 1]b<q
440 * compare
441 <1 a
442 <1 p
443 <1 q
444 <1 b
445
446 ** test: tailoring twice before a root position: secondary
447 @ rules
448 &[before 2]ſ<<p
449 &[before 2]ſ<<q
450 * compare
451 <1 s
452 <2 p
453 <2 q
454 <2 ſ
455
456 # secondary-before common weight
457 @ rules
458 &[before 2]b<<p
459 &[before 2]b<<q
460 * compare
461 <1 a
462 <1 p
463 <2 q
464 <2 b
465
466 ** test: tailoring twice before a root position: tertiary
467 @ rules
468 &[before 3]B<<<p
469 &[before 3]B<<<q
470 * compare
471 <1 b
472 <3 p
473 <3 q
474 <3 B
475
476 # tertiary-before common weight
477 @ rules
478 &[before 3]b<<<p
479 &[before 3]b<<<q
480 * compare
481 <1 a
482 <1 p
483 <3 q
484 <3 b
485
486 @ rules
487 &[before 2]b<<s
488 &[before 3]s<<<p
489 &[before 3]s<<<q
490 * compare
491 <1 a
492 <1 p
493 <3 q
494 <3 s
495 <2 b
496
497 ** test: tailor after completely ignorable
498 @ rules
499 &\x00<<<x<<y
500 * compare
501 = \x00
502 = \x1F
503 <3 x
504 <2 y
505
506 ** test: secondary tailoring gaps, ICU ticket 9362
507 @ rules
508 &[before 2]s<<'_'
509 &s<<r # secondary between s and ſ (long s)
510 &ſ<<*a-q # more than 15 between ſ and secondary CE boundary
511 &[before 2][first primary ignorable]<<u<<v # between secondary CE boundary & lowest secondary CE
512 &[last primary ignorable]<<y<<z
513
514 * compare
515 <2 u
516 <2 v
517 <2 \u0332 # lowest secondary CE
518 <2 \u0308
519 <2 y
520 <2 z
521 <1 s_
522 <2 ss
523 <2 sr
524 <2 sſ
525 <2 sa
526 <2 sb
527 <2 sp
528 <2 sq
529 <2 sus
530 <2 svs
531 <2 rs
532
533 ** test: tertiary tailoring gaps, ICU ticket 9362
534 @ rules
535 &[before 3]t<<<'_'
536 &t<<<r # tertiary between t and fullwidth t
537 &ᵀ<<<*a-q # more than 15 between ᵀ (modifier letter T) and tertiary CE boundary
538 &[before 3][first secondary ignorable]<<<u<<<v # between tertiary CE boundary & lowest tertiary CE
539 &[last secondary ignorable]<<<y<<<z
540
541 * compare
542 <3 u
543 <3 v
544 # Note: The root collator currently does not map any characters to tertiary CEs.
545 <3 y
546 <3 z
547 <1 t_
548 <3 tt
549 <3 tr
550 <3 tt
551 <3 tᵀ
552 <3 ta
553 <3 tb
554 <3 tp
555 <3 tq
556 <3 tut
557 <3 tvt
558 <3 rt
559
560 ** test: secondary & tertiary around root character
561 @ rules
562 &[before 2]m<<r
563 &m<<s
564 &[before 3]m<<<u
565 &m<<<v
566 * compare
567 <1 l
568 <1 r
569 <2 u
570 <3 m
571 <3 v
572 <2 s
573 <1 n
574
575 ** test: secondary & tertiary around tailored item
576 @ rules
577 &m<x
578 &[before 2]x<<r
579 &x<<s
580 &[before 3]x<<<u
581 &x<<<v
582 * compare
583 <1 m
584 <1 r
585 <2 u
586 <3 x
587 <3 v
588 <2 s
589 <1 n
590
591 ** test: more nesting of secondary & tertiary before
592 @ rules
593 &[before 3]m<<<u
594 &[before 2]m<<r
595 &[before 3]r<<<q
596 &m<<<w
597 &m<<t
598 &[before 3]w<<<v
599 &w<<<x
600 &w<<s
601 * compare
602 <1 l
603 <1 q
604 <3 r
605 <2 u
606 <3 m
607 <3 v
608 <3 w
609 <3 x
610 <2 s
611 <2 t
612 <1 n
613
614 ** test: case bits
615 @ rules
616 &w<x # tailored CE getting case bits
617 =uv=uV=Uv=UV # 2 chars -> 1 CE
618 &ae=ch=cH=Ch=CH # 2 chars -> 2 CEs
619 &rst=yz=yZ=Yz=YZ # 2 chars -> 3 CEs
620 % caseFirst=lower
621 * compare
622 <1 ae
623 = ch
624 <3 cH
625 <3 Ch
626 <3 CH
627 <1 rst
628 = yz
629 <3 yZ
630 <3 Yz
631 <3 YZ
632 <1 w
633 <1 x
634 = uv
635 <3 uV
636 = Uv # mixed case on single CE cannot distinguish variations
637 <3 UV
638
639 ** test: tertiary CEs, tertiary, caseLevel=off, caseFirst=lower
640 @ rules
641 &\u0001<<<t<<<T # tertiary CEs
642 % caseFirst=lower
643 * compare
644 <1 aa
645 <3 aat
646 <3 aaT
647 <3 aA
648 <3 aAt
649 <3 ata
650 <3 aTa
651
652 ** test: tertiary CEs, tertiary, caseLevel=off, caseFirst=upper
653 % caseFirst=upper
654 * compare
655 <1 aA
656 <3 aAt
657 <3 aa
658 <3 aat
659 <3 aaT
660 <3 ata
661 <3 aTa
662
663 ** test: reset on expansion, ICU tickets 9415 & 9593
664 @ rules
665 &æ<x # tailor the last primary CE so that x sorts between ae and af
666 &æb=bæ # copy all reset CEs to make bæ sort the same
667 &각<h # copy/tailor 3 CEs to make h sort before the next Hangul syllable 갂
668 &⒀<<y # copy/tailor 4 CEs to make y sort with only a secondary difference
669 &l·=z # handle the pre-context for · when fetching reset CEs
670 <<u # copy/tailor 2 CEs
671
672 * compare
673 <1 ae
674 <2 æ
675 <1 x
676 <1 af
677
678 * compare
679 <1 aeb
680 <2 æb
681 = bæ
682
683 * compare
684 <1 각
685 <1 h
686 <1 갂
687 <1 갃
688
689 * compare
690 <1 · # by itself: primary CE
691 <1 l
692 <2 l· # l+middle dot has only a secondary difference from l
693 = z
694 <2 u
695
696 * compare
697 <1 (13)
698 <3 ⒀ # DUCET sets special tertiary weights in all CEs
699 <2 y
700 <1 (13[
701
702 % alternate=shifted
703 * compare
704 <1 (13)
705 = 13
706 <3 ⒀
707 = y # alternate=shifted removes the tailoring difference on the last CE
708 <1 14
709
710 ** test: contraction inside extension, ICU ticket 9378
711 @ rules
712 &а<<х/й # all letters are Cyrillic
713 * compare
714 <1 ай
715 <2 х
716
717 ** test: no duplicate tailored CEs for different reset positions with same CEs, ICU ticket 10104
718 @ rules
719 &t<x &ᵀ<y # same primary weights
720 &q<u &[before 1]ꝗ<v # q and ꝗ are primary adjacent
721 * compare
722 <1 q
723 <1 u
724 <1 v
725 <1 ꝗ
726 <1 t
727 <3 ᵀ
728 <1 y
729 <1 x
730
731 # Principle: Each rule builds on the state of preceding rules and ignores following rules.
732
733 ** test: later rule does not affect earlier reset position, ICU ticket 10105
734 @ rules
735 &a < u < v < w &ov < x &b < v
736 * compare
737 <1 oa
738 <1 ou
739 <1 x # CE(o) followed by CE between u and w
740 <1 ow
741 <1 ob
742 <1 ov
743
744 ** test: later rule does not affect earlier extension (1), ICU ticket 10105
745 @ rules
746 &a=x/b &v=b
747 % strength=secondary
748 * compare
749 <1 B
750 <1 c
751 <1 v
752 = b
753 * compare
754 <1 AB
755 = x
756 <1 ac
757 <1 av
758 = ab
759
760 ** test: later rule does not affect earlier extension (2), ICU ticket 10105
761 @ rules
762 &a <<< c / e &g <<< e / l
763 % strength=secondary
764 * compare
765 <1 AE
766 = c
767 <2 æ
768 <1 agl
769 = ae
770
771 ** test: later rule does not affect earlier extension (3), ICU ticket 10105
772 @ rules
773 &a = b / c &d = c / e
774 % strength=secondary
775 * compare
776 <1 AC # C is still only tertiary different from the original c
777 = b
778 <1 ade
779 = ac
780
781 ** test: extension contains tailored character, ICU ticket 10105
782 @ rules
783 &a=e &b=u/e
784 * compare
785 <1 a
786 = e
787 <1 ba
788 = be
789 = u
790
791 ** test: add simple mappings for characters with root context
792 @ rules
793 &z=· # middle dot has a prefix mapping in the CLDR root
794 &n=и # и (U+0438) has contractions in the root
795 * compare
796 <1 l
797 <2 l· # root mapping for l|· still works
798 <1 z
799 = ·
800 * compare
801 <1 n
802 = и
803 <1 И
804 <1 и\u0306 # root mapping for й=и\u0306 still works
805 = й
806 <3 Й
807
808 ** test: add context mappings around characters with root context
809 @ rules
810 &z=·h # middle dot has a prefix mapping in the CLDR root
811 &n=ә|и # и (U+0438) has contractions in the root
812 * compare
813 <1 l
814 <2 l· # root mapping for l|· still works
815 <1 z
816 = ·h
817 * compare
818 <1 и
819 <3 И
820 <1 и\u0306 # root mapping for й=и\u0306 still works
821 = й
822 * compare
823 <1 әn
824 = әи
825 <1 әo
826
827 ** test: many secondary CEs at the top of their range
828 @ rules
829 &[last primary ignorable]<<*\u2801-\u28ff
830 * compare
831 <2 \u0308
832 <2 \u2801
833 <2 \u2802
834 <2 \u2803
835 <2 \u2804
836 <2 \u28fd
837 <2 \u28fe
838 <2 \u28ff
839 <1 \x20
840
841 ** test: many tertiary CEs at the top of their range
842 @ rules
843 &[last secondary ignorable]<<<*a-z
844 * compare
845 <3 a
846 <3 b
847 <3 c
848 <3 d
849 # e..w
850 <3 x
851 <3 y
852 <3 z
853 <2 \u0308
854
855 ** test: tailor contraction together with nearly equivalent prefix, ICU ticket 10101
856 @ rules
857 &a=p|x &b=px &c=op
858 * compare
859 <1 b
860 = px
861 <3 B
862 <1 c
863 = op
864 <3 C
865 * compare
866 <1 ca
867 = opx # first contraction op, then prefix p|x
868 <3 cA
869 <3 Ca
870
871 ** test: reset position with prefix (pre-context), ICU ticket 10102
872 @ rules
873 &a=p|x &px=y
874 * compare
875 <1 pa
876 = px
877 = y
878 <3 pA
879 <1 q
880 <1 x
881
882 ** test: prefix+contraction together (1), ICU ticket 10071
883 @ rules
884 &x=a|bc
885 * compare
886 <1 ab
887 <1 Abc
888 <1 abd
889 <1 ac
890 <1 aw
891 <1 ax
892 = abc
893 <3 aX
894 <3 Ax
895 <1 b
896 <1 bb
897 <1 bc
898 <3 bC
899 <3 Bc
900 <1 bd
901
902 ** test: prefix+contraction together (2), ICU ticket 10071
903 @ rules
904 &w=bc &x=a|b
905 * compare
906 <1 w
907 = bc
908 <3 W
909 * compare
910 <1 aw
911 <1 ax
912 = ab
913 <3 aX
914 <1 axb
915 <1 axc
916 = abc # prefix match a|b takes precedence over contraction match bc
917 <3 abC
918 <1 abd
919 <1 ay
920
921 ** test: prefix+contraction together (3), ICU ticket 10071
922 @ rules
923 &x=a|b &w=bc # reverse order of rules as previous test, order should not matter here
924 * compare # same "compare" sequences as previous test
925 <1 w
926 = bc
927 <3 W
928 * compare
929 <1 aw
930 <1 ax
931 = ab
932 <3 aX
933 <1 axb
934 <1 axc
935 = abc # prefix match a|b takes precedence over contraction match bc
936 <3 abC
937 <1 abd
938 <1 ay
939
940 ** test: no mapping p|c, falls back to contraction ch, CLDR ticket 5962
941 @ rules
942 &d=ch &v=p|ci
943 * compare
944 <1 pc
945 <3 pC
946 <1 pcH
947 <1 pcI
948 <1 pd
949 = pch # no-prefix contraction ch matches
950 <3 pD
951 <1 pv
952 = pci # prefix+contraction p|ci matches
953 <3 pV
954
955 ** test: tailor in & around compact ranges of root primaries
956 # The Ogham characters U+1681..U+169A are in simple ascending order of primary CEs
957 # which should be reliably encoded as one range in the root elements data.
958 @ rules
959 &[before 1]ᚁ<a
960 &ᚁ<b
961 &[before 1]ᚂ<c
962 &ᚂ<d
963 &[before 1]ᚚ<y
964 &ᚚ<z
965 &[before 2]ᚁ<<r
966 &ᚁ<<s
967 &[before 3]ᚚ<<<t
968 &ᚚ<<<u
969 * compare
970 <1 ᣵ # U+18F5 last Canadian Aboriginal
971 <1 a
972 <1 r
973 <2 ᚁ
974 <2 s
975 <1 b
976 <1 c
977 <1 ᚂ
978 <1 d
979 <1 ᚃ
980 <1 ᚙ
981 <1 y
982 <1 t
983 <3 ᚚ
984 <3 u
985 <1 z
986 <1 ᚠ # U+16A0 first Runic
987
988 ** test: suppressContractions
989 @ rules
990 &z<ch<әж [suppressContractions [·cә]]
991 * compare
992 <1 ch
993 <3 cH # ch was suppressed
994 <1 l
995 <1 l· # primary difference, not secondary, because l|· was suppressed
996 <1 ә
997 <2 ә\u0308 # secondary difference, not primary, because contractions for ә were suppressed
998 <1 әж
999 <3 әЖ
1000
1001 ** test: Hangul & Jamo
1002 @ rules
1003 &L=\u1100 # first Jamo L
1004 &V=\u1161 # first Jamo V
1005 &T=\u11A8 # first Jamo T
1006 &\uAC01<<*\u4E00-\u4EFF # first Hangul LVT syllable & lots of secondary diffs
1007 * compare
1008 <1 Lv
1009 <3 LV
1010 = \u1100\u1161
1011 = \uAC00
1012 <1 LVt
1013 <3 LVT
1014 = \u1100\u1161\u11A8
1015 = \uAC00\u11A8
1016 = \uAC01
1017 <2 LVT\u0308
1018 <2 \u4E00
1019 <2 \u4E01
1020 <2 \u4E80
1021 <2 \u4EFF
1022 <2 LV\u0308T
1023 <1 \uAC02
1024
1025 ** test: adjust special reset positions according to previous rules, CLDR ticket 6070
1026 @ rules
1027 &[last variable]<x
1028 [maxVariable space] # has effect only after building, no effect on following rules
1029 &[last variable]<y
1030 &[before 1][first regular]<z
1031 * compare
1032 <1 ? # some punctuation
1033 <1 x
1034 <1 y
1035 <1 z
1036 <1 $ # some symbol
1037
1038 @ rules
1039 &[last primary ignorable]<<x<<<y
1040 &[last primary ignorable]<<z
1041 * compare
1042 <2 \u0358
1043 <2 x
1044 <3 y
1045 <2 z
1046 <1 \x20
1047
1048 @ rules
1049 &[last secondary ignorable]<<<x
1050 &[last secondary ignorable]<<<y
1051 * compare
1052 <3 x
1053 <3 y
1054 <2 \u0358
1055
1056 @ rules
1057 &[before 2][first variable]<<z
1058 &[before 2][first variable]<<y
1059 &[before 3][first variable]<<<x
1060 &[before 3][first variable]<<<w
1061 &[before 1][first variable]<v
1062 &[before 2][first variable]<<u
1063 &[before 3][first variable]<<<t
1064 &[before 2]\uFDD1\xA0<<s # FractionalUCA.txt: FDD1 00A0, SPACE first primary
1065 * compare
1066 <2 \u0358
1067 <1 s
1068 <2 \uFDD1\xA0
1069 <1 t
1070 <3 u
1071 <2 v
1072 <1 w
1073 <3 x
1074 <3 y
1075 <2 z
1076 <2 \t
1077
1078 @ rules
1079 &[before 2][first regular]<<z
1080 &[before 3][first regular]<<<y
1081 &[before 1][first regular]<x
1082 &[before 3][first regular]<<<w
1083 &[before 2]\uFDD1\u263A<<v # FractionalUCA.txt: FDD1 263A, SYMBOL first primary
1084 &[before 3][first regular]<<<u
1085 &[before 1][first regular]<p # primary before the boundary: becomes variable
1086 &[before 3][first regular]<<<t # not affected by p
1087 &[last variable]<q # after p!
1088 * compare
1089 <1 ?
1090 <1 p
1091 <1 q
1092 <1 t
1093 <3 u
1094 <3 v
1095 <1 w
1096 <3 x
1097 <1 y
1098 <3 z
1099 <1 $
1100
1101 # check that p & q are indeed variable
1102 % alternate=shifted
1103 * compare
1104 = ?
1105 = p
1106 = q
1107 <1 t
1108 <3 u
1109 <3 v
1110 <1 w
1111 <3 x
1112 <1 y
1113 <3 z
1114 <1 $
1115
1116 @ rules
1117 &[before 2][first trailing]<<z
1118 &[before 1][first trailing]<y
1119 &[before 3][first trailing]<<<x
1120 * compare
1121 <1 \u4E00 # first Han, first implicit
1122 <1 \uFDD1\uFDD0 # FractionalUCA.txt: unassigned first primary
1123 # Note: The root collator currently does not map any characters to the trailing first boundary primary.
1124 <1 x
1125 <3 y
1126 <1 z
1127 <2 \uFFFD # The root collator currently maps U+FFFD to the first real trailing primary.
1128
1129 @ rules
1130 &[before 2][first primary ignorable]<<z
1131 &[before 2][first primary ignorable]<<y
1132 &[before 3][first primary ignorable]<<<x
1133 &[before 3][first primary ignorable]<<<w
1134 * compare
1135 = \x01
1136 <2 w
1137 <3 x
1138 <3 y
1139 <2 z
1140 <2 \u0301
1141
1142 @ rules
1143 &[before 3][first secondary ignorable]<<<y
1144 &[before 3][first secondary ignorable]<<<x
1145 * compare
1146 = \x01
1147 <3 x
1148 <3 y
1149 <2 \u0301
1150
1151 ** test: canonical closure
1152 @ rules
1153 &X=A &U=Â
1154 * compare
1155 <1 U
1156 = Â
1157 = A\u0302
1158 <2 Ú # U with acute
1159 = U\u0301
1160 = Ấ # A with circumflex & acute
1161 = Â\u0301
1162 = A\u0302\u0301
1163 <1 X
1164 = A
1165 <2 X\u030A # with ring above
1166 = Å
1167 = A\u030A
1168 = \u212B # Angstrom sign
1169
1170 @ rules
1171 &x=\u5140\u55C0
1172 * compare
1173 <1 x
1174 = \u5140\u55C0
1175 = \u5140\uFA0D
1176 = \uFA0C\u55C0
1177 = \uFA0C\uFA0D # CJK compatibility characters
1178 <3 X
1179
1180 # canonical closure on prefix rules, ICU ticket 9444
1181 @ rules
1182 &x=ä|ŝ
1183 * compare
1184 <1 äs # not tailored
1185 <1 äx
1186 = äŝ
1187 = a\u0308s\u0302
1188 = a\u0308ŝ
1189 = äs\u0302
1190 <3 äX
1191
1192 ** test: conjoining Jamo map to expansions
1193 @ rules
1194 &gg=\u1101 # Jamo Lead consonant GG
1195 &nj=\u11AC # Jamo Trail consonant NJ
1196 * compare
1197 <1 gg\u1161nj
1198 = \u1101\u1161\u11AC
1199 = \uAE4C\u11AC
1200 = \uAE51
1201 <3 gg\u1161nJ
1202 <1 \u1100\u1100
1203
1204 ** test: canonical tail closure, ICU ticket 5913
1205 @ rules
1206 &a<â
1207 * compare
1208 <1 a
1209 <1 â # tailored
1210 = a\u0302
1211 <2 a\u0323\u0302 # discontiguous contraction
1212 = ạ\u0302 # equivalent
1213 = ậ # equivalent
1214 <1 b
1215
1216 @ rules
1217 &a<ạ
1218 * compare
1219 <1 a
1220 <1 ạ # tailored
1221 = a\u0323
1222 <2 a\u0323\u0302 # contiguous contraction plus extra diacritic
1223 = ạ\u0302 # equivalent
1224 = ậ # equivalent
1225 <1 b
1226
1227 # Tail closure should work even if there is a prefix and/or contraction.
1228 @ rules
1229 &a<\u5140|câ
1230 # In order to find discontiguous contractions for \u5140|câ
1231 # there must exist a mapping for \u5140|ca, regardless of what it maps to.
1232 # (This follows from the UCA spec.)
1233 &x=\u5140|ca
1234 * compare
1235 <1 \u5140a
1236 = \uFA0Ca
1237 <1 \u5140câ # tailored
1238 = \uFA0Ccâ
1239 = \u5140ca\u0302
1240 = \uFA0Cca\u0302
1241 <2 \u5140ca\u0323\u0302 # discontiguous contraction
1242 = \uFA0Cca\u0323\u0302
1243 = \u5140cạ\u0302
1244 = \uFA0Ccạ\u0302
1245 = \u5140cậ
1246 = \uFA0Ccậ
1247 <1 \u5140b
1248 = \uFA0Cb
1249 <1 \u5140x
1250 = \u5140ca
1251
1252 # Double-check that without the extra mapping there will be no discontiguous match.
1253 @ rules
1254 &a<\u5140|câ
1255 * compare
1256 <1 \u5140a
1257 = \uFA0Ca
1258 <1 \u5140câ # tailored
1259 = \uFA0Ccâ
1260 = \u5140ca\u0302
1261 = \uFA0Cca\u0302
1262 <1 \u5140b
1263 = \uFA0Cb
1264 <1 \u5140ca\u0323\u0302 # no discontiguous contraction
1265 = \uFA0Cca\u0323\u0302
1266 = \u5140cạ\u0302
1267 = \uFA0Ccạ\u0302
1268 = \u5140cậ
1269 = \uFA0Ccậ
1270
1271 @ rules
1272 &a<cạ
1273 * compare
1274 <1 a
1275 <1 cạ # tailored
1276 = ca\u0323
1277 <2 ca\u0323\u0302 # contiguous contraction plus extra diacritic
1278 = cạ\u0302 # equivalent
1279 = cậ # equivalent
1280 <1 b
1281
1282 # ᾢ = U+1FA2 GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1283 # = 03C9 0313 0300 0345
1284 # ccc = 0, 230, 230, 240
1285 @ rules
1286 &δ=αῳ
1287 # In order to find discontiguous contractions for αῳ
1288 # there must exist a mapping for αω, regardless of what it maps to.
1289 # (This follows from the UCA spec.)
1290 &ε=αω
1291 * compare
1292 <1 δ
1293 = αῳ
1294 = αω\u0345
1295 <2 αω\u0313\u0300\u0345 # discontiguous contraction
1296 = αὠ\u0300\u0345
1297 = αὢ\u0345
1298 = αᾢ
1299 <2 αω\u0300\u0313\u0345
1300 = αὼ\u0313\u0345
1301 = αῲ\u0313 # not FCD
1302 <1 ε
1303 = αω
1304
1305 # Double-check that without the extra mapping there will be no discontiguous match.
1306 @ rules
1307 &δ=αῳ
1308 * compare
1309 <1 αω\u0313\u0300\u0345 # no discontiguous contraction
1310 = αὠ\u0300\u0345
1311 = αὢ\u0345
1312 = αᾢ
1313 <2 αω\u0300\u0313\u0345
1314 = αὼ\u0313\u0345
1315 = αῲ\u0313 # not FCD
1316 <1 δ
1317 = αῳ
1318 = αω\u0345
1319
1320 # Add U+0315 COMBINING COMMA ABOVE RIGHT which has ccc=232.
1321 # Tests code paths where the tailored string has a combining mark
1322 # that does not occur in any composite's decomposition.
1323 @ rules
1324 &δ=αὼ\u0315
1325 * compare
1326 <1 αω\u0313\u0300\u0315 # Not tailored: The grave accent blocks the comma above.
1327 = αὠ\u0300\u0315
1328 = αὢ\u0315
1329 <1 δ
1330 = αὼ\u0315
1331 = αω\u0300\u0315
1332 <2 αω\u0300\u0315\u0345
1333 = αὼ\u0315\u0345
1334 = αῲ\u0315 # not FCD
1335
1336 ** test: danish a+a vs. a-umlaut, ICU ticket 9319
1337 @ rules
1338 &z<aa
1339 * compare
1340 <1 z
1341 <1 aa
1342 <2 aa\u0308
1343 = aä
1344
1345 ** test: Jamo L with and in prefix
1346 # Useful for the Korean "searchjl" tailoring (instead of contractions of pairs of Jamo L).
1347 @ rules
1348 # Jamo Lead consonant G after G or GG
1349 &[last primary ignorable]<<\u1100|\u1100=\u1101|\u1100
1350 # Jamo Lead consonant GG sorts like G+G
1351 &\u1100\u1100=\u1101
1352 # Note: Making G|GG and GG|GG sort the same as G|G+G
1353 # would require the ability to reset on G|G+G,
1354 # or we could make G-after-G equal to some secondary-CE character,
1355 # and reset on a pair of those.
1356 # (It does not matter much if there are at most two G in a row in real text.)
1357 * compare
1358 <1 \u1100
1359 <2 \u1100\u1100 # only one primary from a sequence of G lead consonants
1360 = \u1101
1361 <2 \u1100\u1100\u1100
1362 = \u1101\u1100
1363 # but not = \u1100\u1101, see above
1364 <1 \u1100\u1161
1365 = \uAC00
1366 <2 \u1100\u1100\u1161
1367 = \u1100\uAC00 # prefix match from the L of the LV syllable
1368 = \u1101\u1161
1369 = \uAE4C
1370
1371 ** test: proposed Korean "searchjl" tailoring with prefixes, CLDR ticket 6546
1372 @ rules
1373 # Low secondary CEs for Jamo V & T.
1374 # Note: T should sort before V for proper syllable order.
1375 &\u0332 # COMBINING LOW LINE (first primary ignorable)
1376 <<\u1161<<\u1162
1377
1378 # Korean Jamo lead consonant search rules, part 2:
1379 # Make modern compound L jamo primary equivalent to non-compound forms.
1380
1381 # Secondary CEs for Jamo L-after-L, greater than Jamo V & T.
1382 &\u0313 # COMBINING COMMA ABOVE (second primary ignorable)
1383 =\u1100|\u1100
1384 =\u1103|\u1103
1385 =\u1107|\u1107
1386 =\u1109|\u1109
1387 =\u110C|\u110C
1388
1389 # Compound L Jamo map to equivalent expansions of primary+secondary CE.
1390 &\u1100\u0313=\u1101<<<\u3132 # HANGUL CHOSEONG SSANGKIYEOK, HANGUL LETTER SSANGKIYEOK
1391 &\u1103\u0313=\u1104<<<\u3138 # HANGUL CHOSEONG SSANGTIKEUT, HANGUL LETTER SSANGTIKEUT
1392 &\u1107\u0313=\u1108<<<\u3143 # HANGUL CHOSEONG SSANGPIEUP, HANGUL LETTER SSANGPIEUP
1393 &\u1109\u0313=\u110A<<<\u3146 # HANGUL CHOSEONG SSANGSIOS, HANGUL LETTER SSANGSIOS
1394 &\u110C\u0313=\u110D<<<\u3149 # HANGUL CHOSEONG SSANGCIEUC, HANGUL LETTER SSANGCIEUC
1395
1396 * compare
1397 <1 \u1100\u1161
1398 = \uAC00
1399 <2 \u1100\u1162
1400 = \uAC1C
1401 <2 \u1100\u1100\u1161
1402 = \u1100\uAC00
1403 = \u1101\u1161
1404 = \uAE4C
1405 <3 \u3132\u1161
1406
1407 ** test: Hangul syllables in prefix & in the interior of a contraction
1408 @ rules
1409 &x=\u1100\u1161|a\u1102\u1162z
1410 * compare
1411 <1 \u1100\u1161x
1412 = \u1100\u1161a\u1102\u1162z
1413 = \u1100\u1161a\uB0B4z
1414 = \uAC00a\u1102\u1162z
1415 = \uAC00a\uB0B4z
1416
1417 ** test: digits are unsafe-backwards when numeric=on
1418 @ root
1419 % numeric=on
1420 * compare
1421 # If digits are not unsafe, then numeric collation sees "1"=="01" and "b">"a".
1422 # We need to back up before the identical prefix "1" and compare the full numbers.
1423 <1 11b
1424 <1 101a
1425
1426 ** test: simple locale data test
1427 @ locale de
1428 * compare
1429 <1 a
1430 <2 ä
1431 <1 ae
1432 <2 æ
1433
1434 @ locale de-u-co-phonebk
1435 * compare
1436 <1 a
1437 <1 ae
1438 <2 ä
1439 <2 æ
1440
1441 # The following test cases were moved here from ICU 52's DataDrivenCollationTest.txt.
1442
1443 ** test: DataDrivenCollationTest/TestMorePinyin
1444 # Testing the primary strength.
1445 @ locale zh
1446 % strength=primary
1447 * compare
1448 < lā
1449 = lĀ
1450 = Lā
1451 = LĀ
1452 < lān
1453 = lĀn
1454 < lē
1455 = lĒ
1456 = Lē
1457 = LĒ
1458 < lēn
1459 = lĒn
1460
1461 ** test: DataDrivenCollationTest/TestLithuanian
1462 # Lithuanian sort order.
1463 @ locale lt
1464 * compare
1465 < cz
1466 < č
1467 < d
1468 < iz
1469 < j
1470 < sz
1471 < š
1472 < t
1473 < zz
1474 < ž
1475
1476 ** test: DataDrivenCollationTest/TestLatvian
1477 # Latvian sort order.
1478 @ locale lv
1479 * compare
1480 < cz
1481 < č
1482 < d
1483 < gz
1484 < ģ
1485 < h
1486 < iz
1487 < j
1488 < kz
1489 < ķ
1490 < l
1491 < lz
1492 < ļ
1493 < m
1494 < nz
1495 < ņ
1496 < o
1497 < rz
1498 < ŗ
1499 < s
1500 < sz
1501 < š
1502 < t
1503 < zz
1504 < ž
1505
1506 ** test: DataDrivenCollationTest/TestEstonian
1507 # Estonian sort order.
1508 @ locale et
1509 * compare
1510 < sy
1511 < š
1512 < šy
1513 < z
1514 < zy
1515 < ž
1516 < v
1517 < va
1518 < w
1519 < õ
1520 < õy
1521 < ä
1522 < äy
1523 < ö
1524 < öy
1525 < ü
1526 < üy
1527 < x
1528
1529 ** test: DataDrivenCollationTest/TestAlbanian
1530 # Albanian sort order.
1531 @ locale sq
1532 * compare
1533 < cz
1534 < ç
1535 < d
1536 < dz
1537 < dh
1538 < e
1539 < ez
1540 < ë
1541 < f
1542 < gz
1543 < gj
1544 < h
1545 < lz
1546 < ll
1547 < m
1548 < nz
1549 < nj
1550 < o
1551 < rz
1552 < rr
1553 < s
1554 < sz
1555 < sh
1556 < t
1557 < tz
1558 < th
1559 < u
1560 < xz
1561 < xh
1562 < y
1563 < zz
1564 < zh
1565
1566 ** test: DataDrivenCollationTest/TestSimplifiedChineseOrder
1567 # Sorted file has different order.
1568 @ root
1569 # normalization=on turned on & off automatically.
1570 * compare
1571 < \u5F20
1572 < \u5F20\u4E00\u8E3F
1573
1574 ** test: DataDrivenCollationTest/TestTibetanNormalizedIterativeCrash
1575 # This pretty much crashes.
1576 @ root
1577 * compare
1578 < \u0f71\u0f72\u0f80\u0f71\u0f72
1579 < \u0f80
1580
1581 ** test: DataDrivenCollationTest/TestThaiPartialSortKeyProblems
1582 # These are examples of strings that caused trouble in partial sort key testing.
1583 @ locale th-TH
1584 * compare
1585 < \u0E01\u0E01\u0E38\u0E18\u0E20\u0E31\u0E13\u0E11\u0E4C
1586 < \u0E01\u0E01\u0E38\u0E2A\u0E31\u0E19\u0E42\u0E18
1587 * compare
1588 < \u0E01\u0E07\u0E01\u0E32\u0E23
1589 < \u0E01\u0E07\u0E42\u0E01\u0E49
1590 * compare
1591 < \u0E01\u0E23\u0E19\u0E17\u0E32
1592 < \u0E01\u0E23\u0E19\u0E19\u0E40\u0E0A\u0E49\u0E32
1593 * compare
1594 < \u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E22\u0E27
1595 < \u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E4A\u0E22\u0E27
1596 * compare
1597 < \u0E01\u0E23\u0E23\u0E40\u0E0A\u0E2D
1598 < \u0E01\u0E23\u0E23\u0E40\u0E0A\u0E49\u0E32
1599
1600 ** test: DataDrivenCollationTest/TestJavaStyleRule
1601 # java.text allows rules to start as '<<<x<<<y...'
1602 # we emulate this by assuming a &[first tertiary ignorable] in this case.
1603 @ rules
1604 &\u0001=equal<<<z<<x<<<w &[first tertiary ignorable]=a &[first primary ignorable]=b
1605 * compare
1606 = a
1607 = equal
1608 < z
1609 < x
1610 = b # x had become the new first primary ignorable
1611 < w
1612
1613 ** test: DataDrivenCollationTest/TestShiftedIgnorable
1614 # The UCA states that primary ignorables should be completely
1615 # ignorable when following a shifted code point.
1616 @ root
1617 % alternate=shifted
1618 % strength=quaternary
1619 * compare
1620 < a\u0020b
1621 = a\u0020\u0300b
1622 = a\u0020\u0301b
1623 < a_b
1624 = a_\u0300b
1625 = a_\u0301b
1626 < A\u0020b
1627 = A\u0020\u0300b
1628 = A\u0020\u0301b
1629 < A_b
1630 = A_\u0300b
1631 = A_\u0301b
1632 < a\u0301b
1633 < A\u0301b
1634 < a\u0300b
1635 < A\u0300b
1636
1637 ** test: DataDrivenCollationTest/TestNShiftedIgnorable
1638 # The UCA states that primary ignorables should be completely
1639 # ignorable when following a shifted code point.
1640 @ root
1641 % alternate=non-ignorable
1642 % strength=tertiary
1643 * compare
1644 < a\u0020b
1645 < A\u0020b
1646 < a\u0020\u0301b
1647 < A\u0020\u0301b
1648 < a\u0020\u0300b
1649 < A\u0020\u0300b
1650 < a_b
1651 < A_b
1652 < a_\u0301b
1653 < A_\u0301b
1654 < a_\u0300b
1655 < A_\u0300b
1656 < a\u0301b
1657 < A\u0301b
1658 < a\u0300b
1659 < A\u0300b
1660
1661 ** test: DataDrivenCollationTest/TestSafeSurrogates
1662 # It turned out that surrogates were not skipped properly
1663 # when iterating backwards if they were in the middle of a
1664 # contraction. This test assures that this is fixed.
1665 @ rules
1666 &a < x\ud800\udc00b
1667 * compare
1668 < a
1669 < x\ud800\udc00b
1670
1671 ** test: DataDrivenCollationTest/da_TestPrimary
1672 # This test goes through primary strength cases
1673 @ locale da
1674 % strength=primary
1675 * compare
1676 < Lvi
1677 < Lwi
1678 * compare
1679 < L\u00e4vi
1680 < L\u00f6wi
1681 * compare
1682 < L\u00fcbeck
1683 = Lybeck
1684
1685 ** test: DataDrivenCollationTest/da_TestTertiary
1686 # This test goes through tertiary strength cases
1687 @ locale da
1688 % strength=tertiary
1689 * compare
1690 < Luc
1691 < luck
1692 * compare
1693 < luck
1694 < L\u00fcbeck
1695 * compare
1696 < lybeck
1697 < L\u00fcbeck
1698 * compare
1699 < L\u00e4vi
1700 < L\u00f6we
1701 * compare
1702 < L\u00f6ww
1703 < mast
1704
1705 * compare
1706 < A/S
1707 < ANDRE
1708 < ANDR\u00c9
1709 < ANDREAS
1710 < AS
1711 < CA
1712 < \u00c7A
1713 < CB
1714 < \u00c7C
1715 < D.S.B.
1716 < DA
1717 < \u00d0A
1718 < DB
1719 < \u00d0C
1720 < DSB
1721 < DSC
1722 < EKSTRA_ARBEJDE
1723 < EKSTRABUD0
1724 < H\u00d8ST
1725 < HAAG
1726 < H\u00c5NDBOG
1727 < HAANDV\u00c6RKSBANKEN
1728 < Karl
1729 < karl
1730 < NIELS\u0020J\u00d8RGEN
1731 < NIELS-J\u00d8RGEN
1732 < NIELSEN
1733 < R\u00c9E,\u0020A
1734 < REE,\u0020B
1735 < R\u00c9E,\u0020L
1736 < REE,\u0020V
1737 < SCHYTT,\u0020B
1738 < SCHYTT,\u0020H
1739 < SCH\u00dcTT,\u0020H
1740 < SCHYTT,\u0020L
1741 < SCH\u00dcTT,\u0020M
1742 < SS
1743 < \u00df
1744 < SSA
1745 < STORE\u0020VILDMOSE
1746 < STOREK\u00c6R0
1747 < STORM\u0020PETERSEN
1748 < STORMLY
1749 < THORVALD
1750 < THORVARDUR
1751 < \u00feORVAR\u00d0UR
1752 < THYGESEN
1753 < VESTERG\u00c5RD,\u0020A
1754 < VESTERGAARD,\u0020A
1755 < VESTERG\u00c5RD,\u0020B
1756 < \u00c6BLE
1757 < \u00c4BLE
1758 < \u00d8BERG
1759 < \u00d6BERG
1760
1761 * compare
1762 < andere
1763 < chaque
1764 < chemin
1765 < cote
1766 < cot\u00e9
1767 < c\u00f4te
1768 < c\u00f4t\u00e9
1769 < \u010du\u010d\u0113t
1770 < Czech
1771 < hi\u0161a
1772 < irdisch
1773 < lie
1774 < lire
1775 < llama
1776 < l\u00f5ug
1777 < l\u00f2za
1778 < lu\u010d
1779 < luck
1780 < L\u00fcbeck
1781 < lye
1782 < l\u00e4vi
1783 < L\u00f6wen
1784 < m\u00e0\u0161ta
1785 < m\u00eer
1786 < myndig
1787 < M\u00e4nner
1788 < m\u00f6chten
1789 < pi\u00f1a
1790 < pint
1791 < pylon
1792 < \u0161\u00e0ran
1793 < savoir
1794 < \u0160erb\u016bra
1795 < Sietla
1796 < \u015blub
1797 < subtle
1798 < symbol
1799 < s\u00e4mtlich
1800 < verkehrt
1801 < vox
1802 < v\u00e4ga
1803 < waffle
1804 < wood
1805 < yen
1806 < yuan
1807 < yucca
1808 < \u017eal
1809 < \u017eena
1810 < \u017den\u0113va
1811 < zoo0
1812 < Zviedrija
1813 < Z\u00fcrich
1814 < zysk0
1815 < \u00e4ndere
1816
1817 ** test: DataDrivenCollationTest/hi_TestNewRules
1818 # This test goes through new rules and tests against old rules
1819 @ locale hi
1820 * compare
1821 < कॐ
1822 < कं
1823 < कँ
1824 < कः
1825
1826 ** test: DataDrivenCollationTest/ro_TestNewRules
1827 # This test goes through new rules and tests against old rules
1828 @ locale ro
1829 * compare
1830 < xAx
1831 < xă
1832 < xĂ
1833 < Xă
1834 < XĂ
1835 < xăx
1836 < xĂx
1837 < xâ
1838 < xÂ
1839 < Xâ
1840 < XÂ
1841 < xâx
1842 < xÂx
1843 < xb
1844 < xIx
1845 < xî
1846 < xÎ
1847 < Xî
1848 < XÎ
1849 < xîx
1850 < xÎx
1851 < xj
1852 < xSx
1853 < xș
1854 = xş
1855 < xȘ
1856 = xŞ
1857 < Xș
1858 = Xş
1859 < XȘ
1860 = XŞ
1861 < xșx
1862 = xşx
1863 < xȘx
1864 = xŞx
1865 < xT
1866 < xTx
1867 < xț
1868 = xţ
1869 < xȚ
1870 = xŢ
1871 < Xț
1872 = Xţ
1873 < XȚ
1874 = XŢ
1875 < xțx
1876 = xţx
1877 < xȚx
1878 = xŢx
1879 < xU
1880
1881 ** test: DataDrivenCollationTest/testOffsets
1882 # This tests cases where forwards and backwards iteration get different offsets
1883 @ locale en
1884 % strength=tertiary
1885 * compare
1886 < a\uD800\uDC00\uDC00
1887 < b\uD800\uDC00\uDC00
1888 * compare
1889 < \u0301A\u0301\u0301
1890 < \u0301B\u0301\u0301
1891 * compare
1892 < abcd\r\u0301
1893 < abce\r\u0301
1894 # TODO: test offsets in new CollationTest
1895
1896 # End of test cases moved here from ICU 52's DataDrivenCollationTest.txt.
1897
1898 ** test: was ICU 52 cmsccoll/TestRedundantRules
1899 @ rules
1900 & a < b < c < d& [before 1] c < m
1901 * compare
1902 <1 a
1903 <1 b
1904 <1 m
1905 <1 c
1906 <1 d
1907
1908 @ rules
1909 & a < b <<< c << d <<< e& [before 3] e <<< x
1910 * compare
1911 <1 a
1912 <1 b
1913 <3 c
1914 <2 d
1915 <3 x
1916 <3 e
1917
1918 @ rules
1919 & a < b <<< c << d <<< e <<< f < g& [before 1] g < x
1920 * compare
1921 <1 a
1922 <1 b
1923 <3 c
1924 <2 d
1925 <3 e
1926 <3 f
1927 <1 x
1928 <1 g
1929
1930 @ rules
1931 & a <<< b << c < d& a < m
1932 * compare
1933 <1 a
1934 <3 b
1935 <2 c
1936 <1 m
1937 <1 d
1938
1939 @ rules
1940 &a<b<<b\u0301 &z<b
1941 * compare
1942 <1 a
1943 <1 b\u0301
1944 <1 z
1945 <1 b
1946
1947 @ rules
1948 &z<m<<<q<<<m
1949 * compare
1950 <1 z
1951 <1 q
1952 <3 m
1953
1954 @ rules
1955 &z<<<m<q<<<m
1956 * compare
1957 <1 z
1958 <1 q
1959 <3 m
1960
1961 @ rules
1962 & a < b < c < d& r < c
1963 * compare
1964 <1 a
1965 <1 b
1966 <1 d
1967 <1 r
1968 <1 c
1969
1970 @ rules
1971 & a < b < c < d& c < m
1972 * compare
1973 <1 a
1974 <1 b
1975 <1 c
1976 <1 m
1977 <1 d
1978
1979 @ rules
1980 & a < b < c < d& a < m
1981 * compare
1982 <1 a
1983 <1 m
1984 <1 b
1985 <1 c
1986 <1 d
1987
1988 ** test: was ICU 52 cmsccoll/TestExpansionSyntax
1989 # The following two rules should sort the particular list of strings the same.
1990 @ rules
1991 &AE <<< a << b <<< c &d <<< f
1992 * compare
1993 <1 AE
1994 <3 a
1995 <2 b
1996 <3 c
1997 <1 d
1998 <3 f
1999
2000 @ rules
2001 &A <<< a / E << b / E <<< c /E &d <<< f
2002 * compare
2003 <1 AE
2004 <3 a
2005 <2 b
2006 <3 c
2007 <1 d
2008 <3 f
2009
2010 # The following two rules should sort the particular list of strings the same.
2011 @ rules
2012 &AE <<< a <<< b << c << d < e < f <<< g
2013 * compare
2014 <1 AE
2015 <3 a
2016 <3 b
2017 <2 c
2018 <2 d
2019 <1 e
2020 <1 f
2021 <3 g
2022
2023 @ rules
2024 &A <<< a / E <<< b / E << c / E << d / E < e < f <<< g
2025 * compare
2026 <1 AE
2027 <3 a
2028 <3 b
2029 <2 c
2030 <2 d
2031 <1 e
2032 <1 f
2033 <3 g
2034
2035 # The following two rules should sort the particular list of strings the same.
2036 @ rules
2037 &AE <<< B <<< C / D <<< F
2038 * compare
2039 <1 AE
2040 <3 B
2041 <3 F
2042 <1 AED
2043 <3 C
2044
2045 @ rules
2046 &A <<< B / E <<< C / ED <<< F / E
2047 * compare
2048 <1 AE
2049 <3 B
2050 <3 F
2051 <1 AED
2052 <3 C
2053
2054 ** test: never reorder trailing primaries
2055 @ root
2056 % reorder Zzzz Grek
2057 * compare
2058 <1 L
2059 <1 字
2060 <1 Ω
2061 <1 \uFFFD
2062 <1 \uFFFF
2063
2064 ** test: fall back to mappings with shorter prefixes, not immediately to ones with no prefixes
2065 @ rules
2066 &u=ab|cd
2067 &v=b|ce
2068 * compare
2069 <1 abc
2070 <1 abcc
2071 <1 abcf
2072 <1 abcd
2073 = abu
2074 <1 abce
2075 = abv
2076
2077 # With the following rules, there is only one prefix per composite ĉ or ç,
2078 # but both prefixes apply to just c in NFD form.
2079 # We would get different results for composed vs. NFD input
2080 # if we fell back directly from longest-prefix mappings to no-prefix mappings.
2081 @ rules
2082 &x=op|ĉ
2083 &y=p|ç
2084 * compare
2085 <1 opc
2086 <2 opć
2087 <1 opcz
2088 <1 opd
2089 <1 opĉ
2090 = opc\u0302
2091 = opx
2092 <1 opç
2093 = opc\u0327
2094 = opy
2095
2096 # The mapping is used which has the longest matching prefix for which
2097 # there is also a suffix match, with the longest suffix match among several for that prefix.
2098 @ rules
2099 &❶=d
2100 &❷=de
2101 &❸=def
2102 &①=c|d
2103 &②=c|de
2104 &③=c|def
2105 &④=bc|d
2106 &⑤=bc|de
2107 &⑥=bc|def
2108 &⑦=abc|d
2109 &⑧=abc|de
2110 &⑨=abc|def
2111 * compare
2112 <1 9aadzz
2113 = 9aa❶zz
2114 <1 9aadez
2115 = 9aa❷z
2116 <1 9aadef
2117 = 9aa❸
2118 <1 9acdzz
2119 = 9ac①zz
2120 <1 9acdez
2121 = 9ac②z
2122 <1 9acdef
2123 = 9ac③
2124 <1 9bcdzz
2125 = 9bc④zz
2126 <1 9bcdez
2127 = 9bc⑤z
2128 <1 9bcdef
2129 = 9bc⑥
2130 <1 abcdzz
2131 = abc⑦zz
2132 <1 abcdez
2133 = abc⑧z
2134 <1 abcdef
2135 = abc⑨
2136
2137 ** test: prefix + discontiguous contraction with missing prefix contraction
2138 # Unfortunate terminology: The first "prefix" here is the pre-context,
2139 # the second "prefix" refers to the contraction/relation string that is
2140 # one shorter than the one being tested.
2141 @ rules
2142 &x=p|e
2143 &y=p|ê
2144 &z=op|ê
2145 # No mapping for op|e:
2146 # Discontiguous contraction matching should not match op|ê in opệ
2147 # because it would have to skip the dot below and extend a match on op|e by the circumflex,
2148 # but there is no match on op|e.
2149 * compare
2150 <1 oPe
2151 <1 ope
2152 = opx
2153 <1 opệ
2154 = opy\u0323 # y not z
2155 <1 opê
2156 = opz
2157
2158 # We cannot test for fallback by whether the contraction default CE32
2159 # is for another contraction. With the following rules, there is no mapping for op|e,
2160 # and the fallback to prefix p has no contractions.
2161 @ rules
2162 &x=p|e
2163 &z=op|ê
2164 * compare
2165 <1 oPe
2166 <1 ope
2167 = opx
2168 <2 opệ
2169 = opx\u0323\u0302 # x not z
2170 <1 opê
2171 = opz
2172
2173 # One more variation: Fallback to the simple code point, no shorter non-empty prefix.
2174 @ rules
2175 &x=e
2176 &z=op|ê
2177 * compare
2178 <1 ope
2179 = opx
2180 <3 oPe
2181 = oPx
2182 <2 opệ
2183 = opx\u0323\u0302 # x not z
2184 <1 opê
2185 = opz
2186
2187 ** test: maxVariable via rules
2188 @ rules
2189 [maxVariable space][alternate shifted]
2190 * compare
2191 = \u0020
2192 = \u000A
2193 <1 .
2194 <1 ° # degree sign
2195 <1 $
2196 <1 0
2197
2198 ** test: maxVariable via setting
2199 @ root
2200 % maxVariable=currency
2201 % alternate=shifted
2202 * compare
2203 = \u0020
2204 = \u000A
2205 = .
2206 = ° # degree sign
2207 = $
2208 <1 0
2209
2210 ** test: ICU4J CollationMiscTest/TestContractionClosure (ää)
2211 # This tests canonical closure, but it also tests that CollationFastLatin
2212 # bails out properly for contractions with combining marks.
2213 # For that we need pairs of strings that remain in the Latin fastpath
2214 # long enough, hence the extra "= b" lines.
2215 @ rules
2216 &b=\u00e4\u00e4
2217 * compare
2218 <1 b
2219 = \u00e4\u00e4
2220 = b
2221 = a\u0308a\u0308
2222 = b
2223 = \u00e4a\u0308
2224 = b
2225 = a\u0308\u00e4
2226
2227 ** test: ICU4J CollationMiscTest/TestContractionClosure (Å)
2228 @ rules
2229 &b=\u00C5
2230 * compare
2231 <1 b
2232 = \u00C5
2233 = b
2234 = A\u030A
2235 = b
2236 = \u212B
2237
2238 ** test: reset-before on already-tailored characters, ICU ticket 10108
2239 @ rules
2240 &a<w<<x &[before 2]x<<y
2241 * compare
2242 <1 a
2243 <1 w
2244 <2 y
2245 <2 x
2246
2247 @ rules
2248 &a<<w<<<x &[before 2]x<<y
2249 * compare
2250 <1 a
2251 <2 y
2252 <2 w
2253 <3 x
2254
2255 @ rules
2256 &a<w<x &[before 2]x<<y
2257 * compare
2258 <1 a
2259 <1 w
2260 <1 y
2261 <2 x
2262
2263 @ rules
2264 &a<w<<<x &[before 2]x<<y
2265 * compare
2266 <1 a
2267 <1 y
2268 <2 w
2269 <3 x
2270
2271 ** test: numeric collation with other settings, ICU ticket 9092
2272 @ root
2273 % strength=identical
2274 % caseFirst=upper
2275 % numeric=on
2276 * compare
2277 <1 100\u0020a
2278 <1 101
2279
2280 ** test: collation type fallback from unsupported type, ICU ticket 10149
2281 @ locale fr-CA-u-co-phonebk
2282 # Expect the same result as with fr-CA, using backwards-secondary order.
2283 # That is, we should fall back from the unsupported collation type
2284 # to the locale's default collation type.
2285 * compare
2286 <1 cote
2287 <2 côte
2288 <2 coté
2289 <2 côté
2290
2291 ** test: @ is equivalent to [backwards 2], ICU ticket 9956
2292 @ rules
2293 &b<a @ &v<<w
2294 * compare
2295 <1 b
2296 <1 a
2297 <1 cote
2298 <2 côte
2299 <2 coté
2300 <2 côté
2301 <1 v
2302 <2 w
2303 <1 x
2304
2305 ** test: shifted+reordering, ICU ticket 9507
2306 @ root
2307 % reorder Grek punct space
2308 % alternate=shifted
2309 % strength=quaternary
2310 # Which primaries are "variable" should be determined without script reordering,
2311 # and then primaries should be reordered whether they are shifted to quaternary or not.
2312 * compare
2313 <4 ( # punctuation
2314 <4 )
2315 <4 \u0020 # space
2316 <1 ` # symbol
2317 <1 ^
2318 <1 $ # currency symbol
2319 <1 €
2320 <1 0 # numbers
2321 <1 ε # Greek
2322 <1 e # Latin
2323 <1 e(e
2324 <4 e)e
2325 <4 e\u0020e
2326 <4 ee
2327 <3 e(E
2328 <4 e)E
2329 <4 e\u0020E
2330 <4 eE
2331
2332 ** test: "uppercase first" could sort a string before its prefix, ICU ticket 9351
2333 @ rules
2334 &\u0001<<<b<<<B
2335 % caseFirst=upper
2336 * compare
2337 <1 aaa
2338 <3 aaaB
2339
2340 ** test: secondary+case ignores secondary ignorables, ICU ticket 9355
2341 @ rules
2342 &\u0001<<<b<<<B
2343 % strength=secondary
2344 % caseLevel=on
2345 * compare
2346 <1 a
2347 = ab
2348 = aB
2349
2350 ** test: custom collation rules involving tail of a contraction in Malayalam, ICU ticket 6328
2351 @ rules
2352 &[before 2] ൌ << ൗ # U+0D57 << U+0D4C == 0D46+0D57
2353 * compare
2354 <1 ൗx
2355 <2 ൌx
2356 <1 ൗy
2357 <2 ൌy
2358
2359 ** test: quoted apostrophe in compact syntax, ICU ticket 8204
2360 @ rules
2361 &q<<*a''c
2362 * compare
2363 <1 d
2364 <1 p
2365 <1 q
2366 <2 a
2367 <2 \u0027
2368 <2 c
2369 <1 r
2370
2371 # ICU ticket #8260 "Support all collation-related keywords in Collator.getInstance()"
2372 ** test: locale -u- with collation keywords, ICU ticket 8260
2373 @ locale de-u-kv-sPace-ka-shifTed-kn-kk-falsE-kf-Upper-kc-tRue-ks-leVel4
2374 * compare
2375 <4 \u0020 # space is shifted, strength=quaternary
2376 <1 ! # punctuation is regular
2377 <1 2
2378 <1 12 # numeric sorting
2379 <1 B
2380 <c b # uppercase first on case level
2381 <1 x\u0301\u0308
2382 <2 x\u0308\u0301 # normalization off
2383
2384 ** test: locale @ with collation keywords, ICU ticket 8260
2385 @ locale fr@colbAckwards=yes;ColStrength=Quaternary;kv=currencY;colalternate=shifted
2386 * compare
2387 <4 $ # currency symbols are shifted, strength=quaternary
2388 <1 àla
2389 <2 alà # backwards secondary level
2390
2391 ** test: locale -u- with script reordering, ICU ticket 8260
2392 @ locale el-u-kr-kana-SYMBOL-Grek-hani-cyrl-latn-digit-armn-deva-ethi-thai
2393 * compare
2394 <1 \u0020
2395 <1 あ
2396 <1 ☂
2397 <1 Ω
2398 <1 丂
2399 <1 ж
2400 <1 L
2401 <1 4
2402 <1 Ձ
2403 <1 अ
2404 <1 ሄ
2405 <1 ฉ
2406
2407 ** test: locale @collation=type should be case-insensitive
2408 @ locale de@coLLation=PhoneBook
2409 * compare
2410 <1 ae
2411 <2 ä
2412 <3 Ä
2413
2414 ** test: import root search rules plus German phonebook rules, ICU ticket 8962
2415 @ locale de-u-co-search
2416 * compare
2417 <1 =
2418 <1 ≠
2419 <1 a
2420 <1 ae
2421 <2 ä
2422
2423 # Once more, but with runtime builder.
2424 @ rules
2425 [import und-u-co-search][import de-u-co-phonebk]
2426 * compare
2427 <1 =
2428 <1 ≠
2429 <1 a
2430 <1 ae
2431 <2 ä
2432
2433 # Once again, with import from "root" not "und" (as in a proper language tag).
2434 @ rules
2435 [import root-u-co-search][import de-u-co-phonebk]
2436 * compare
2437 <1 =
2438 <1 ≠
2439 <1 a
2440 <1 ae
2441 <2 ä
2442
2443 ** test: import rules from a language with non-Latin native script, and reset the reordering, ICU ticket 10998
2444 # Greek should sort Greek first.
2445 @ rules
2446 [import el]
2447 * compare
2448 <1 4
2449 <1 Ω
2450 <1 L
2451
2452 # Import Greek, and then reset the reordering.
2453 @ rules
2454 [import el][reorder Zzzz]
2455 * compare
2456 <1 4
2457 <1 L
2458 <1 Ω
2459
2460 # "others" is a synonym for Zzzz.
2461 @ rules
2462 [import el][reorder others]
2463 * compare
2464 <1 4
2465 <1 L
2466 <1 Ω
2467
2468 ** test: regression test for CollationFastLatinBuilder, ICU ticket 11388
2469 @ rules
2470 &x<<aa<<<Aa<<<AA
2471 % strength=secondary
2472 * compare
2473 <1 AA
2474 <2 Aẩ
2475 <2 aą
2476 * compare
2477 <1 AA
2478 <2 aą
2479
2480 ** test: tailor tertiary-after a common tertiary where there is a lower one
2481 # Assume that Hiragana small A has a below-common tertiary, and Hiragana A has a common one.
2482 # See ICU ticket 11448 & CLDR ticket 7222.
2483 @ rules
2484 &あ<<<x<<<y<<<z
2485 * compare
2486 <1 ぁ
2487 <3 あ
2488 <3 x
2489 <3 y
2490 <3 z
2491 <3 ァ
2492 <1 い
2493
2494 ** test: tailor tertiary-after a below-common tertiary
2495 @ rules
2496 &ぁ<<<x<<<y<<<z
2497 * compare
2498 <1 ぁ
2499 <3 x
2500 <3 y
2501 <3 z
2502 <3 あ
2503 <3 ァ
2504 <1 い
2505
2506 ** test: tailor tertiary-before a common tertiary where there is a lower one
2507 @ rules
2508 &[before 3]あ<<<x<<<y<<<z
2509 * compare
2510 <1 ぁ
2511 <3 x
2512 <3 y
2513 <3 z
2514 <3 あ
2515 <3 ァ
2516 <1 い
2517
2518 ** test: tailor tertiary-before a below-common tertiary
2519 @ rules
2520 &[before 3]ぁ<<<x<<<y<<<z
2521 * compare
2522 <1 x
2523 <3 y
2524 <3 z
2525 <3 ぁ
2526 <3 あ
2527 <3 ァ
2528 <1 い
2529
2530 ** test: reorder single scripts not groups, ICU ticket 11449
2531 @ root
2532 % reorder Goth Latn
2533 * compare
2534 <1 4
2535 <1 𐌰 # Gothic
2536 <1 L
2537 <1 Ω
2538 # Before ICU 55, the following reordered together with Gothic.
2539 <1 𐌈 # Old Italic
2540 <1 𐑐 # Shavian