]>
Commit | Line | Data |
---|---|---|
b75a7d8f | 1 | # |
4388f060 | 2 | # Copyright (C) 2002-2011, International Business Machines Corporation and others. |
b75a7d8f A |
3 | # All Rights Reserved. |
4 | # | |
5 | # file: char.txt | |
6 | # | |
7 | # ICU Character Break Rules, also known as Grapheme Cluster Boundaries | |
8 | # See Unicode Standard Annex #29. | |
4388f060 | 9 | # These rules are based on UAX #29 Revision 19 for Unicode Version 6.1 |
b75a7d8f A |
10 | # |
11 | ||
12 | # | |
13 | # Character Class Definitions. | |
b75a7d8f | 14 | # |
46f4442e A |
15 | $CR = [\p{Grapheme_Cluster_Break = CR}]; |
16 | $LF = [\p{Grapheme_Cluster_Break = LF}]; | |
17 | $Control = [\p{Grapheme_Cluster_Break = Control}]; | |
4388f060 | 18 | # TODO: Restore if the Prepend set becomes non-empty again: $Prepend = [\p{Grapheme_Cluster_Break = Prepend}]; |
46f4442e A |
19 | $Extend = [\p{Grapheme_Cluster_Break = Extend}]; |
20 | $SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}]; | |
b75a7d8f A |
21 | |
22 | # | |
23 | # Korean Syllable Definitions | |
24 | # | |
73c04bcf A |
25 | $L = [\p{Grapheme_Cluster_Break = L}]; |
26 | $V = [\p{Grapheme_Cluster_Break = V}]; | |
27 | $T = [\p{Grapheme_Cluster_Break = T}]; | |
b75a7d8f | 28 | |
73c04bcf A |
29 | $LV = [\p{Grapheme_Cluster_Break = LV}]; |
30 | $LVT = [\p{Grapheme_Cluster_Break = LVT}]; | |
b75a7d8f | 31 | |
4388f060 A |
32 | # |
33 | # Emoji regional indicators | |
34 | # | |
35 | $RI = [\U0001F1E6-\U0001F1FF]; | |
b75a7d8f | 36 | |
374ca955 | 37 | ## ------------------------------------------------- |
46f4442e | 38 | !!chain; |
374ca955 A |
39 | |
40 | !!forward; | |
41 | ||
b75a7d8f | 42 | $CR $LF; |
46f4442e A |
43 | |
44 | $L ($L | $V | $LV | $LVT); | |
45 | ($LV | $V) ($V | $T); | |
46 | ($LVT | $T) $T; | |
47 | ||
48 | [^$Control $CR $LF] $Extend; | |
49 | ||
50 | [^$Control $CR $LF] $SpacingMark; | |
4388f060 | 51 | # TODO: Restore if the Prepend set becomes non-empty again: $Prepend [^$Control $CR $LF]; |
46f4442e | 52 | |
4388f060 A |
53 | $RI $RI / $RI; |
54 | $RI $RI; | |
b75a7d8f | 55 | |
374ca955 | 56 | ## ------------------------------------------------- |
b75a7d8f | 57 | |
374ca955 | 58 | !!reverse; |
46f4442e A |
59 | $LF $CR; |
60 | ($L | $V | $LV | $LVT) $L; | |
61 | ($V | $T) ($LV | $V); | |
62 | $T ($LVT | $T); | |
63 | ||
64 | $Extend [^$Control $CR $LF]; | |
65 | $SpacingMark [^$Control $CR $LF]; | |
4388f060 | 66 | # TODO: Restore if the Prepend set becomes non-empty again: [^$Control $CR $LF] $Prepend; |
374ca955 | 67 | |
4388f060 A |
68 | $RI $RI / $RI $RI; |
69 | $RI $RI; | |
374ca955 A |
70 | |
71 | ## ------------------------------------------------- | |
72 | ||
73 | !!safe_reverse; | |
74 | ||
374ca955 A |
75 | |
76 | ## ------------------------------------------------- | |
77 | ||
78 | !!safe_forward; | |
79 |