2 # Copyright (C) 2002-2013, International Business Machines Corporation and others.
7 # ICU Character Break Rules, also known as Grapheme Cluster Boundaries
8 # See Unicode Standard Annex #29.
9 # These rules are based on UAX #29 Revision 20 for Unicode Version 6.2
13 # Character Class Definitions.
15 $CR = [\p{Grapheme_Cluster_Break = CR}];
16 $LF = [\p{Grapheme_Cluster_Break = LF}];
17 $Control = [\p{Grapheme_Cluster_Break = Control}];
18 # TODO: Restore if the Prepend set becomes non-empty again: $Prepend = [\p{Grapheme_Cluster_Break = Prepend}];
19 $Extend = [\p{Grapheme_Cluster_Break = Extend}];
20 $SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
22 $RI_A = \U0001F1E6; # Trail ERTU
23 $RI_B = \U0001F1E7; # Trail EGR
24 $RI_C = \U0001F1E8; # Trail AHLNZ
25 $RI_D = \U0001F1E9; # Trail EK
26 $RI_E = \U0001F1EA; # Trail GS
27 $RI_F = \U0001F1EB; # Trail IR
28 $RI_G = \U0001F1EC; # Trail BR
29 $RI_H = \U0001F1ED; # Trail KU
30 $RI_I = \U0001F1EE; # Trail DLNT
31 $RI_J = \U0001F1EF; # Trail OP
32 $RI_K = \U0001F1F0; # Trail R
33 $RI_L = \U0001F1F1; # Trail B
34 $RI_M = \U0001F1F2; # Trail OXY
35 $RI_N = \U0001F1F3; # Trail LO
36 $RI_P = \U0001F1F5; # Trail LT
37 $RI_R = \U0001F1F7; # Trail OU
38 $RI_S = \U0001F1F8; # Trail AEGK
39 $RI_T = \U0001F1F9; # Trail HRW
40 $RI_U = \U0001F1FA; # Trail AS
41 $RI_V = \U0001F1FB; # Trail N
43 $RI_A_End = [\U0001F1EA \U0001F1F7 \U0001F1F9 \U0001F1FA]; # ERTU
44 $RI_B_End = [\U0001F1EA \U0001F1EC \U0001F1F7]; # EGR
45 $RI_C_End = [\U0001F1E6 \U0001F1ED \U0001F1F1 \U0001F1F3 \U0001F1FF]; # AHLNZ
46 $RI_D_End = [\U0001F1EA \U0001F1F0]; # EK
47 $RI_E_End = [\U0001F1EC \U0001F1F8]; # GS
48 $RI_F_End = [\U0001F1EE \U0001F1F7]; # IR
49 $RI_G_End = [\U0001F1E7 \U0001F1F7]; # BR
50 $RI_H_End = [\U0001F1F0 \U0001F1FA]; # KU
51 $RI_I_End = [\U0001F1E9 \U0001F1F1 \U0001F1F3 \U0001F1F9]; # DLNT
52 $RI_J_End = [\U0001F1F4 \U0001F1F5]; # OP
53 $RI_K_End = \U0001F1F7; # R
54 $RI_L_End = \U0001F1E7; # B
55 $RI_M_End = [\U0001F1F4 \U0001F1FD \U0001F1FE]; # OXY
56 $RI_N_End = [\U0001F1F1 \U0001F1F4]; # LO
57 $RI_P_End = [\U0001F1F1 \U0001F1F9]; # LT
58 $RI_R_End = [\U0001F1F4 \U0001F1FA]; # OU
59 $RI_S_End = [\U0001F1E6 \U0001F1EA \U0001F1EC \U0001F1F0]; # AEGK
60 $RI_T_End = [\U0001F1ED \U0001F1F7 \U0001F1FC]; # HRW
61 $RI_U_End = [\U0001F1E6 \U0001F1F8]; # AS
62 $RI_V_End = \U0001F1F3; # N
65 # Korean Syllable Definitions
67 $L = [\p{Grapheme_Cluster_Break = L}];
68 $V = [\p{Grapheme_Cluster_Break = V}];
69 $T = [\p{Grapheme_Cluster_Break = T}];
71 $LV = [\p{Grapheme_Cluster_Break = LV}];
72 $LVT = [\p{Grapheme_Cluster_Break = LVT}];
75 ## -------------------------------------------------
82 $L ($L | $V | $LV | $LVT);
107 [^$Control $CR $LF] $Extend;
109 [^$Control $CR $LF] $SpacingMark;
110 # TODO: Restore if the Prepend set becomes non-empty again: $Prepend [^$Control $CR $LF];
113 ## -------------------------------------------------
117 ($L | $V | $LV | $LVT) $L;
118 ($V | $T) ($LV | $V);
142 $Extend [^$Control $CR $LF];
143 $SpacingMark [^$Control $CR $LF];
144 # TODO: Restore if the Prepend set becomes non-empty again: [^$Control $CR $LF] $Prepend;
147 ## -------------------------------------------------
148 # We don't logically need safe char break rules, but if we don't provide any at all
149 # the engine for preceding() and following() will fall back to the
150 # old style inefficient algorithm.
155 ## -------------------------------------------------