]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/testdata/break_rules/grapheme.txt
ICU-62141.0.1.tar.gz
[apple/icu.git] / icuSources / test / testdata / break_rules / grapheme.txt
CommitLineData
2ca993e8 1#
f3c0d7a5
A
2# Copyright (C) 2016 and later: Unicode, Inc. and others.
3# License & terms of use: http://www.unicode.org/copyright.html
2ca993e8
A
4# Copyright (c) 2016, International Business Machines Corporation and others. All Rights Reserved.
5
6# file: grapheme.txt
7#
8# Reference Grapheme Break rules for intltest rbbi/RBBIMonkeyTest
9#
10#
11# Note: Rule syntax and the monkey test itself are still a work in progress.
12# They are expected to change with review and the addition of support for rule tailoring.
13
14type = grapheme; # one of grapheme | word | line | sentence
15locale = en;
16
f3c0d7a5
A
17CR = [\p{Grapheme_Cluster_Break = CR}];
18LF = [\p{Grapheme_Cluster_Break = LF}];
2ca993e8 19
f3c0d7a5
A
20Control = [[\p{Grapheme_Cluster_Break = Control}]];
21Extend = [[\p{Grapheme_Cluster_Break = Extend}]];
22ZWJ = [\p{Grapheme_Cluster_Break = ZWJ}];
2ca993e8 23Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}];
f3c0d7a5 24Prepend = [\p{Grapheme_Cluster_Break = Prepend}];
2ca993e8
A
25SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
26
27#
28# Korean Syllable Definitions
29#
f3c0d7a5
A
30L = [\p{Grapheme_Cluster_Break = L}];
31V = [\p{Grapheme_Cluster_Break = V}];
32T = [\p{Grapheme_Cluster_Break = T}];
33LV = [\p{Grapheme_Cluster_Break = LV}];
34LVT = [\p{Grapheme_Cluster_Break = LVT}];
2ca993e8
A
35
36# Emoji defintions
37
0f5d89e8 38Extended_Pict = [:ExtPict:];
2ca993e8
A
39
40GB3: CR LF;
41GB4: (Control | CR | LF) ÷;
42GB5: . ÷ (Control | CR | LF);
43
44GB6: L (L | V | LV | LVT);
45GB7: (LV | V) (V | T);
46GB8: (LVT | T) T;
47
0f5d89e8 48GB11: Extended_Pict Extend* ZWJ Extended_Pict;
2ca993e8
A
49GB9: . (Extend | ZWJ);
50
51GB9a: . SpacingMark;
52GB9b: Prepend .;
2ca993e8
A
53
54# Regional Indicators, split into pairs.
55# Note that a pair of RIs that is not followed by a third RI will fall into
56# the normal rules for Extend, etc.
57#
58GB12: Regional_Indicator Regional_Indicator ÷ Regional_Indicator;
59GB13: Regional_Indicator Regional_Indicator;
60
61GB999: . ÷;