2 # Copyright (C) 2002-2004, International Business Machines Corporation
3 # and others. All Rights Reserved.
8 # See Unicode Standard Annex #29.
9 # These rules are based on Version 4.1 draft, dated 2004-11-11
12 ##############################################################################
14 # Character class definitions from TR 29
16 ##############################################################################
20 $Katakana = [[:Script = KATAKANA:]
21 [:name = VERTICAL KANA REPEAT MARK:]
22 [:name = VERTICAL KANA REPEAT WITH VOICED SOUND MARK:]
23 [:name = VERTICAL KANA REPEAT MARK UPPER HALF:]
24 [:name = VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HALF:]
25 [:name = VERTICAL KANA REPEAT MARK LOWER HALF:]
26 [:name = KATAKANA-HIRAGANA VOICED SOUND MARK:]
27 [:name = KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK:]
28 [:name = KATAKANA-HIRAGANA DOUBLE HYPHEN:]
29 [:name = KATAKANA-HIRAGANA PROLONGED SOUND MARK:]
30 [:name = HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK:]
31 [:name = HALFWIDTH KATAKANA VOICED SOUND MARK:]
32 [:name = HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK:]];
35 $ALetter = [[:Alphabetic:] [:name= HEBREW PUNCTUATION GERESH:]
38 - [:Script = Hiragana:]
40 - [:Grapheme_Extend = TRUE:]];
42 $MidLetter = [[:name = APOSTROPHE:] [:name = MIDDLE DOT:]
43 [:name = HEBREW PUNCTUATION GERSHAYIM:]
44 [:name = RIGHT SINGLE QUOTATION MARK:]
45 [:name = HYPHENATION POINT:]];
48 $MidNum = [[:LineBreak = Infix_Numeric:]];
49 $Numeric = [[:Nd:][:name = ARABIC DECIMAL SEPARATOR:][:name = ARABIC THOUSANDS SEPARATOR:]];
50 $ExtendNumLet = [[:Connector_Punctuation:]
51 - [:name = KATAKANA MIDDLE DOT:]
52 - [:name = HALFWIDTH KATAKANA MIDDLE DOT:]];
57 # Character Class Definitions.
58 # The names are those from TR29.
63 $Extend = [[:Grapheme_Extend = TRUE:]];
64 $Control = [[:Zl:] [:Zp:] [:Cc:] [:Cf:] - $Extend];
65 $Format = [[:Cf:] - $Extend];
66 $Hiragana = [:Hiragana:];
67 $Ideographic = [:IDEOGRAPHIC:];
69 $ALetterEx = $ALetter $Extend*;
70 $NumericEx = $Numeric $Extend*;
71 $MidNumEx = $MidNum $Extend*;
72 $MidLetterEx = $MidLetter $Extend*;
73 $KatakanaEx = $Katakana $Extend*;
74 $ExtendNumLetEx = $ExtendNumLet $Extend*;
76 ## -------------------------------------------------
81 # Rule 3 - don't break grapheme clusters.
82 # see character breaks
85 #[^$Control] $Extend*;
86 #$NumericEx $Extend* {100};
87 #$ALetterEx $Extend* {200};
95 $ALetterEx $Format* $ALetterEx {200};
98 $ALetterEx $Format* $MidLetterEx $Format* $ALetterEx {200};
102 $NumericEx $Format* $NumericEx {100};
106 $ALetterEx $Format* $NumericEx {200};
110 $NumericEx $Format* $ALetterEx {200};
114 $NumericEx $Format* $MidNumEx $Format* $NumericEx {100};
118 $KatakanaEx $Format* $KatakanaEx {300};
119 $Hiragana $Extend* {300};
120 $Ideographic $Extend* {400};
124 $ALetterEx $Format* $ExtendNumLetEx {200}; # (13a)
125 $NumericEx $Format* $ExtendNumLetEx {100}; # (13a)
126 $KatakanaEx $Format* $ExtendNumLetEx {300}; # (13a)
127 $ExtendNumLetEx $Format* $ExtendNumLetEx{200}; # (13a)
129 $ExtendNumLetEx $Format* $ALetterEx {200}; # (13b)
130 $ExtendNumLetEx $Format* $NumericEx {100}; # (13b)
131 $ExtendNumLetEx $Format* $KatakanaEx {300}; # (13b)
135 ## -------------------------------------------------
139 $BackALetterEx = $Extend* $ALetter;
140 $BackNumericEx = $Extend* $Numeric;
141 $BackMidNumEx = $Extend* $MidNum;
142 $BackMidLetterEx = $Extend* $MidLetter;
143 $BackKatakanaEx = $Extend* $Katakana;
144 $BackExtendNumLetEx= $Extend* $ExtendNumLet;
148 # see character breaks
150 $Extend* [^$Control];
154 $BackALetterEx $Format* $BackALetterEx;
158 $BackALetterEx $Format* $BackMidLetterEx $Format* $BackALetterEx;
163 $BackNumericEx $Format* $BackNumericEx;
167 $BackNumericEx $Format* $BackALetterEx;
171 $BackALetterEx $Format* $BackNumericEx;
175 $BackNumericEx $Format* $BackMidNumEx $Format* $BackNumericEx;
179 $BackKatakanaEx $Format* $BackKatakanaEx;
183 ($BackALetterEx | $BackNumericEx | $BackKatakanaEx | $BackExtendNumLetEx) $Format* $BackExtendNumLetEx;
184 $BackExtendNumLetEx $Format* ($BackALetterEx | $BackNumericEx | $BackKatakanaEx);
186 ## -------------------------------------------------
192 $Extend+; # comes into play when buffer _begins_ with an $Extend+.
195 $Format+ $BackALetterEx;
196 $Format+ $BackNumericEx;
197 $Format+ $BackMidLetterEx;
198 $Format+ $BackMidNumEx;
199 $Format+ $BackKatakanaEx;
200 $Format+ $BackExtendNumLetEx;
204 $MidLetter $Format* $BackALetterEx;
207 $MidNum $Format* $BackNumericEx;
209 ## -------------------------------------------------
217 $Extend* $Format+ $ALetterEx;
218 $Extend* $Format+ $NumericEx;
219 $Extend* $Format+ $MidLetterEx;
220 $Extend* $Format+ $MidNumEx;
221 $Extend* $Format+ $KatakanaEx;
222 $Extend* $Format+ $ExtendNumLetEx;
224 $Extend+ $Format* $ALetterEx;
225 $Extend+ $Format* $NumericEx;
226 $Extend+ $Format* $MidLetterEx;
227 $Extend+ $Format* $MidNumEx;
228 $Extend+ $Format* $KatakanaEx;
229 $Extend+ $Format* $ExtendNumLetEx;
232 $MidLetterEx $Format* $ALetterEx;
235 $MidNumEx $Format* $NumericEx;