2 # Copyright (C) 2016 and later: Unicode, Inc. and others.
3 # License & terms of use: http://www.unicode.org/copyright.html
4 # Copyright (c) 2016, International Business Machines Corporation and others. All Rights Reserved.
8 # Reference Grapheme Break rules for intltest rbbi/RBBIMonkeyTest
11 # Note: Rule syntax and the monkey test itself are still a work in progress.
12 # They are expected to change with review and the addition of support for rule tailoring.
14 type = grapheme; # one of grapheme | word | line | sentence
17 CR = [\p{Grapheme_Cluster_Break = CR}];
18 LF = [\p{Grapheme_Cluster_Break = LF}];
20 Control = [[\p{Grapheme_Cluster_Break = Control}]];
21 Extend = [[\p{Grapheme_Cluster_Break = Extend}]];
22 ZWJ = [\p{Grapheme_Cluster_Break = ZWJ}];
23 Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}];
24 Prepend = [\p{Grapheme_Cluster_Break = Prepend}];
25 SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
28 # Korean Syllable Definitions
30 L = [\p{Grapheme_Cluster_Break = L}];
31 V = [\p{Grapheme_Cluster_Break = V}];
32 T = [\p{Grapheme_Cluster_Break = T}];
33 LV = [\p{Grapheme_Cluster_Break = LV}];
34 LVT = [\p{Grapheme_Cluster_Break = LVT}];
38 Extended_Pict = [:ExtPict:];
41 Virama_ = [[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}] & [\p{Indic_Syllabic_Category=Virama}]];
43 LinkingConsonant = [[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}] & [\p{Indic_Syllabic_Category=Consonant}]];
45 ExtCccZwj = [[Extend-[\p{ccc=0}]] ZWJ];
48 GB4: (Control | CR | LF) ÷;
49 GB5: . ÷ (Control | CR | LF);
51 GB6: L (L | V | LV | LVT);
52 GB7: (LV | V) (V | T);
55 GB11: Extended_Pict Extend* ZWJ Extended_Pict;
56 GB9c: LinkingConsonant ExtCccZwj* Virama_ ExtCccZwj* LinkingConsonant;
57 GB9: . (Extend | ZWJ);
62 # Regional Indicators, split into pairs.
63 # Note that a pair of RIs that is not followed by a third RI will fall into
64 # the normal rules for Extend, etc.
66 GB12: Regional_Indicator Regional_Indicator ÷ Regional_Indicator;
67 GB13: Regional_Indicator Regional_Indicator;