]>
Commit | Line | Data |
---|---|---|
1 | # | |
2 | # Copyright (C) 2002-2011, International Business Machines Corporation and others. | |
3 | # All Rights Reserved. | |
4 | # | |
5 | # file: char.txt | |
6 | # | |
7 | # ICU Character Break Rules, also known as Grapheme Cluster Boundaries | |
8 | # See Unicode Standard Annex #29. | |
9 | # These rules are based on UAX #29 Revision 19 for Unicode Version 6.1 | |
10 | # | |
11 | ||
12 | # | |
13 | # Character Class Definitions. | |
14 | # | |
15 | $CR = [\p{Grapheme_Cluster_Break = CR}]; | |
16 | $LF = [\p{Grapheme_Cluster_Break = LF}]; | |
17 | $Control = [\p{Grapheme_Cluster_Break = Control}]; | |
18 | # TODO: Restore if the Prepend set becomes non-empty again: $Prepend = [\p{Grapheme_Cluster_Break = Prepend}]; | |
19 | $Extend = [\p{Grapheme_Cluster_Break = Extend}]; | |
20 | $SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}]; | |
21 | ||
22 | # | |
23 | # Korean Syllable Definitions | |
24 | # | |
25 | $L = [\p{Grapheme_Cluster_Break = L}]; | |
26 | $V = [\p{Grapheme_Cluster_Break = V}]; | |
27 | $T = [\p{Grapheme_Cluster_Break = T}]; | |
28 | ||
29 | $LV = [\p{Grapheme_Cluster_Break = LV}]; | |
30 | $LVT = [\p{Grapheme_Cluster_Break = LVT}]; | |
31 | ||
32 | # | |
33 | # Emoji regional indicators | |
34 | # | |
35 | $RI = [\U0001F1E6-\U0001F1FF]; | |
36 | ||
37 | ## ------------------------------------------------- | |
38 | !!chain; | |
39 | ||
40 | !!forward; | |
41 | ||
42 | $CR $LF; | |
43 | ||
44 | $L ($L | $V | $LV | $LVT); | |
45 | ($LV | $V) ($V | $T); | |
46 | ($LVT | $T) $T; | |
47 | ||
48 | [^$Control $CR $LF] $Extend; | |
49 | ||
50 | [^$Control $CR $LF] $SpacingMark; | |
51 | # TODO: Restore if the Prepend set becomes non-empty again: $Prepend [^$Control $CR $LF]; | |
52 | ||
53 | $RI $RI / $RI; | |
54 | $RI $RI; | |
55 | ||
56 | ## ------------------------------------------------- | |
57 | ||
58 | !!reverse; | |
59 | $LF $CR; | |
60 | ($L | $V | $LV | $LVT) $L; | |
61 | ($V | $T) ($LV | $V); | |
62 | $T ($LVT | $T); | |
63 | ||
64 | $Extend [^$Control $CR $LF]; | |
65 | $SpacingMark [^$Control $CR $LF]; | |
66 | # TODO: Restore if the Prepend set becomes non-empty again: [^$Control $CR $LF] $Prepend; | |
67 | ||
68 | $RI $RI / $RI $RI; | |
69 | $RI $RI; | |
70 | ||
71 | ## ------------------------------------------------- | |
72 | ||
73 | !!safe_reverse; | |
74 | ||
75 | ||
76 | ## ------------------------------------------------- | |
77 | ||
78 | !!safe_forward; | |
79 |