]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/data/brkitr/char.txt
ICU-491.11.2.tar.gz
[apple/icu.git] / icuSources / data / brkitr / char.txt
... / ...
CommitLineData
1#
2# Copyright (C) 2002-2011, International Business Machines Corporation and others.
3# All Rights Reserved.
4#
5# file: char.txt
6#
7# ICU Character Break Rules, also known as Grapheme Cluster Boundaries
8# See Unicode Standard Annex #29.
9# These rules are based on UAX #29 Revision 19 for Unicode Version 6.1
10#
11
12#
13# Character Class Definitions.
14#
15$CR = [\p{Grapheme_Cluster_Break = CR}];
16$LF = [\p{Grapheme_Cluster_Break = LF}];
17$Control = [\p{Grapheme_Cluster_Break = Control}];
18# TODO: Restore if the Prepend set becomes non-empty again: $Prepend = [\p{Grapheme_Cluster_Break = Prepend}];
19$Extend = [\p{Grapheme_Cluster_Break = Extend}];
20$SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
21
22#
23# Korean Syllable Definitions
24#
25$L = [\p{Grapheme_Cluster_Break = L}];
26$V = [\p{Grapheme_Cluster_Break = V}];
27$T = [\p{Grapheme_Cluster_Break = T}];
28
29$LV = [\p{Grapheme_Cluster_Break = LV}];
30$LVT = [\p{Grapheme_Cluster_Break = LVT}];
31
32#
33# Emoji regional indicators
34#
35$RI = [\U0001F1E6-\U0001F1FF];
36
37## -------------------------------------------------
38!!chain;
39
40!!forward;
41
42$CR $LF;
43
44$L ($L | $V | $LV | $LVT);
45($LV | $V) ($V | $T);
46($LVT | $T) $T;
47
48[^$Control $CR $LF] $Extend;
49
50[^$Control $CR $LF] $SpacingMark;
51# TODO: Restore if the Prepend set becomes non-empty again: $Prepend [^$Control $CR $LF];
52
53$RI $RI / $RI;
54$RI $RI;
55
56## -------------------------------------------------
57
58!!reverse;
59$LF $CR;
60($L | $V | $LV | $LVT) $L;
61($V | $T) ($LV | $V);
62$T ($LVT | $T);
63
64$Extend [^$Control $CR $LF];
65$SpacingMark [^$Control $CR $LF];
66# TODO: Restore if the Prepend set becomes non-empty again: [^$Control $CR $LF] $Prepend;
67
68$RI $RI / $RI $RI;
69$RI $RI;
70
71## -------------------------------------------------
72
73!!safe_reverse;
74
75
76## -------------------------------------------------
77
78!!safe_forward;
79