]>
Commit | Line | Data |
---|---|---|
b75a7d8f | 1 | # |
374ca955 | 2 | # Copyright (C) 2002-2004, International Business Machines Corporation and others. |
b75a7d8f A |
3 | # All Rights Reserved. |
4 | # | |
5 | # file: char.txt | |
6 | # | |
7 | # ICU Character Break Rules, also known as Grapheme Cluster Boundaries | |
8 | # See Unicode Standard Annex #29. | |
9 | # These rules are based on TR29 Version 4.0.0 | |
10 | # | |
11 | ||
12 | # | |
13 | # Character Class Definitions. | |
14 | # The names are those from TR29. | |
15 | # | |
16 | $CR = \r; | |
17 | $LF = \n; | |
374ca955 | 18 | $Control = [[:Zl:] [:Zp:] [:Cc:] [:Cf:] - [:Grapheme_Extend = TRUE:]]; |
b75a7d8f | 19 | |
374ca955 | 20 | $Extend = [[:Grapheme_Extend = TRUE:]]; |
b75a7d8f A |
21 | |
22 | # | |
23 | # Korean Syllable Definitions | |
24 | # | |
25 | $L = [:Hangul_Syllable_Type = L:]; | |
26 | $V = [:Hangul_Syllable_Type = V:]; | |
27 | $T = [:Hangul_Syllable_Type = T:]; | |
28 | ||
29 | $LV = [:Hangul_Syllable_Type = LV:]; | |
30 | $LVT = [:Hangul_Syllable_Type = LVT:]; | |
31 | ||
32 | $HangulSyllable = $L+ | ($L* ($LV? $V+ | $LV | $LVT) $T*) | $T+; | |
33 | ||
374ca955 A |
34 | ## ------------------------------------------------- |
35 | ||
36 | !!forward; | |
37 | ||
b75a7d8f A |
38 | $CR $LF; |
39 | ([^$Control] | $HangulSyllable) $Extend*; | |
b75a7d8f | 40 | |
374ca955 | 41 | ## ------------------------------------------------- |
b75a7d8f | 42 | |
374ca955 A |
43 | !!reverse; |
44 | ||
45 | $BackHangulSyllable = $L+ | ($T* ($V+$LV? | $LV | $LVT) $L*) | $T+; | |
46 | $BackOneCluster = ($LF $CR) | ($Extend* ([^$Control] | $BackHangulSyllable)); | |
47 | $BackOneCluster; | |
48 | ||
49 | ## ------------------------------------------------- | |
50 | ||
51 | !!safe_reverse; | |
52 | ||
53 | # rule 6, 7, 8 | |
54 | $V+ $L; | |
55 | ||
56 | ## ------------------------------------------------- | |
57 | ||
58 | !!safe_forward; | |
59 | ||
60 | # rule 6, 7, 8 | |
61 | $V+ $T; |