]> git.saurik.com Git - apple/icu.git/blame - icuSources/data/brkitr/char.txt
ICU-400.42.tar.gz
[apple/icu.git] / icuSources / data / brkitr / char.txt
CommitLineData
b75a7d8f 1#
46f4442e 2# Copyright (C) 2002-2008, International Business Machines Corporation and others.
b75a7d8f
A
3# All Rights Reserved.
4#
5# file: char.txt
6#
7# ICU Character Break Rules, also known as Grapheme Cluster Boundaries
8# See Unicode Standard Annex #29.
46f4442e 9# These rules are based on TR29 Revision 12, for Unicode Version 5.1
b75a7d8f
A
10#
11
12#
13# Character Class Definitions.
b75a7d8f 14#
46f4442e
A
15$CR = [\p{Grapheme_Cluster_Break = CR}];
16$LF = [\p{Grapheme_Cluster_Break = LF}];
17$Control = [\p{Grapheme_Cluster_Break = Control}];
18$Prepend = [\p{Grapheme_Cluster_Break = Prepend}];
19$Extend = [\p{Grapheme_Cluster_Break = Extend}];
20$SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
b75a7d8f
A
21
22#
23# Korean Syllable Definitions
24#
73c04bcf
A
25$L = [\p{Grapheme_Cluster_Break = L}];
26$V = [\p{Grapheme_Cluster_Break = V}];
27$T = [\p{Grapheme_Cluster_Break = T}];
b75a7d8f 28
73c04bcf
A
29$LV = [\p{Grapheme_Cluster_Break = LV}];
30$LVT = [\p{Grapheme_Cluster_Break = LVT}];
b75a7d8f 31
b75a7d8f 32
374ca955 33## -------------------------------------------------
46f4442e 34!!chain;
374ca955
A
35
36!!forward;
37
b75a7d8f 38$CR $LF;
46f4442e
A
39
40$L ($L | $V | $LV | $LVT);
41($LV | $V) ($V | $T);
42($LVT | $T) $T;
43
44[^$Control $CR $LF] $Extend;
45
46[^$Control $CR $LF] $SpacingMark;
47$Prepend [^$Control $CR $LF];
48
b75a7d8f 49
374ca955 50## -------------------------------------------------
b75a7d8f 51
374ca955 52!!reverse;
46f4442e
A
53$LF $CR;
54($L | $V | $LV | $LVT) $L;
55($V | $T) ($LV | $V);
56$T ($LVT | $T);
57
58$Extend [^$Control $CR $LF];
59$SpacingMark [^$Control $CR $LF];
60[^$Control $CR $LF] $Prepend;
374ca955 61
374ca955
A
62
63## -------------------------------------------------
64
65!!safe_reverse;
66
374ca955
A
67
68## -------------------------------------------------
69
70!!safe_forward;
71