]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/testdata/break_rules/grapheme.txt
ICU-66108.tar.gz
[apple/icu.git] / icuSources / test / testdata / break_rules / grapheme.txt
1 #
2 # Copyright (C) 2016 and later: Unicode, Inc. and others.
3 # License & terms of use: http://www.unicode.org/copyright.html
4 # Copyright (c) 2016, International Business Machines Corporation and others. All Rights Reserved.
5
6 # file: grapheme.txt
7 #
8 # Reference Grapheme Break rules for intltest rbbi/RBBIMonkeyTest
9 #
10 #
11 # Note: Rule syntax and the monkey test itself are still a work in progress.
12 # They are expected to change with review and the addition of support for rule tailoring.
13
14 type = grapheme; # one of grapheme | word | line | sentence
15 locale = en;
16
17 CR = [\p{Grapheme_Cluster_Break = CR}];
18 LF = [\p{Grapheme_Cluster_Break = LF}];
19
20 Control = [[\p{Grapheme_Cluster_Break = Control}]];
21 Extend = [[\p{Grapheme_Cluster_Break = Extend}]];
22 ZWJ = [\p{Grapheme_Cluster_Break = ZWJ}];
23 Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}];
24 Prepend = [\p{Grapheme_Cluster_Break = Prepend}];
25 SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
26
27 #
28 # Korean Syllable Definitions
29 #
30 L = [\p{Grapheme_Cluster_Break = L}];
31 V = [\p{Grapheme_Cluster_Break = V}];
32 T = [\p{Grapheme_Cluster_Break = T}];
33 LV = [\p{Grapheme_Cluster_Break = LV}];
34 LVT = [\p{Grapheme_Cluster_Break = LVT}];
35
36 # Emoji defintions
37
38 Extended_Pict = [:ExtPict:];
39
40 # Indic Sequences
41 Virama_ = [[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}] & [\p{Indic_Syllabic_Category=Virama}]];
42
43 LinkingConsonant = [[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}] & [\p{Indic_Syllabic_Category=Consonant}]];
44
45 ExtCccZwj = [[Extend-[\p{ccc=0}]] ZWJ];
46
47 GB3: CR LF;
48 GB4: (Control | CR | LF) ÷;
49 GB5: . ÷ (Control | CR | LF);
50
51 GB6: L (L | V | LV | LVT);
52 GB7: (LV | V) (V | T);
53 GB8: (LVT | T) T;
54
55 GB11: Extended_Pict Extend* ZWJ Extended_Pict;
56 GB9c: LinkingConsonant ExtCccZwj* Virama_ ExtCccZwj* LinkingConsonant;
57 GB9: . (Extend | ZWJ);
58
59 GB9a: . SpacingMark;
60 GB9b: Prepend .;
61
62 # Regional Indicators, split into pairs.
63 # Note that a pair of RIs that is not followed by a third RI will fall into
64 # the normal rules for Extend, etc.
65 #
66 GB12: Regional_Indicator Regional_Indicator ÷ Regional_Indicator;
67 GB13: Regional_Indicator Regional_Indicator;
68
69 GB999: . ÷;