Commit | Line | Data |
---|---|---|
46f4442e A |
1 | # |
2 | # Copyright (C) 2002-2009, International Business Machines Corporation and others. | |
3 | # All Rights Reserved. | |
4 | # | |
5 | # file: char_th.txt | |
6 | # | |
7 | # ICU Character Break Rules, also known as Grapheme Cluster Boundaries | |
8 | # See Unicode Standard Annex #29. | |
9 | # These rules are based on TR29 Revision 12, for Unicode Version 5.1 | |
10 | # | |
11 | ||
12 | # | |
13 | # Character Class Definitions. | |
14 | # | |
15 | $CR = [\p{Grapheme_Cluster_Break = CR}]; | |
16 | $LF = [\p{Grapheme_Cluster_Break = LF}]; | |
17 | $Control = [\p{Grapheme_Cluster_Break = Control}]; | |
18 | $Extend = [\p{Grapheme_Cluster_Break = Extend} - [\u0E30 \u0E32 \u0E45 \u0EB0 \u0EB2]]; | |
19 | $SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}]; | |
20 | ||
21 | # | |
22 | # Korean Syllable Definitions | |
23 | # | |
24 | $L = [\p{Grapheme_Cluster_Break = L}]; | |
25 | $V = [\p{Grapheme_Cluster_Break = V}]; | |
26 | $T = [\p{Grapheme_Cluster_Break = T}]; | |
27 | ||
28 | $LV = [\p{Grapheme_Cluster_Break = LV}]; | |
29 | $LVT = [\p{Grapheme_Cluster_Break = LVT}]; | |
30 | ||
31 | ||
32 | ## ------------------------------------------------- | |
33 | !!chain; | |
34 | ||
35 | !!forward; | |
36 | ||
37 | $CR $LF; | |
38 | ||
39 | $L ($L | $V | $LV | $LVT); | |
40 | ($LV | $V) ($V | $T); | |
41 | ($LVT | $T) $T; | |
42 | ||
43 | [^$Control $CR $LF] $Extend; | |
44 | ||
45 | [^$Control $CR $LF] $SpacingMark; | |
46 | ||
47 | ||
48 | ## ------------------------------------------------- | |
49 | ||
50 | !!reverse; | |
51 | $LF $CR; | |
52 | ($L | $V | $LV | $LVT) $L; | |
53 | ($V | $T) ($LV | $V); | |
54 | $T ($LVT | $T); | |
55 | ||
56 | $Extend [^$Control $CR $LF]; | |
57 | $SpacingMark [^$Control $CR $LF]; | |
58 | ||
59 | ||
60 | ## ------------------------------------------------- | |
61 | ||
62 | !!safe_reverse; | |
63 | ||
64 | ||
65 | ## ------------------------------------------------- | |
66 | ||
67 | !!safe_forward; | |
68 |