]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | // -*- Coding: utf-8; -*- |
2 | //-------------------------------------------------------------------- | |
3 | // Copyright (c) 1999-2002, International Business Machines | |
4 | // Corporation and others. All Rights Reserved. | |
5 | //-------------------------------------------------------------------- | |
6 | // THIS IS A MACHINE-GENERATED FILE | |
7 | // Tool: dumpicurules.bat | |
8 | // Source: ../../../impl/data/Transliterator_Hiragana_Katakana.txt | |
9 | // Date: Sat Jul 27 10:31:07 2002 | |
10 | //-------------------------------------------------------------------- | |
11 | ||
12 | // Hiragana_Katakana | |
13 | ||
14 | t_Hira_Kana { | |
15 | Rule { | |
16 | //-------------------------------------------------------------------- | |
17 | //-------------------------------------------------------------------- | |
18 | //-------------------------------------------------------------------- | |
19 | ||
20 | // note: a global filter is more efficient, but MUST include all source chars | |
21 | ":: [\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;" | |
22 | ":: NFKC ();" | |
23 | ||
24 | // Hiragana-Katakana | |
25 | ||
26 | // This is largely a one-to-one mapping, but it has a | |
27 | // few kinks: | |
28 | ||
29 | // 1. The Katakana va/vi/ve/vo (30F7-30FA) have no | |
30 | // Hiragana equivalents. We use Hiragana wa/wi/we/wo | |
31 | // (308F-3092) with a voicing mark (3099), which is | |
32 | // semantically equivalent. However, this is a non- | |
33 | // roundtripping transformation. | |
34 | ||
35 | // 2. The Katakana small ka/ke (30F5,30F6) have no | |
36 | // Hiragana equiavlents. We convert them to normal | |
37 | // Hiragana ka/ke (304B,3051). This is a one-way | |
38 | // information-losing transformation and precludes | |
39 | // round-tripping of 30F5 and 30F6. | |
40 | ||
41 | // 3. The combining marks 3099-309C are in the Hiragana | |
42 | // block, but they apply to Katakana as well, so we | |
43 | // leave them untouched. | |
44 | ||
45 | // 4. The Katakana prolonged sound mark 30FC doubles the | |
46 | // preceding vowel. This is a one-way information- | |
47 | // losing transformation from Katakana to Hiragana. | |
48 | ||
49 | // 5. The Katakana middle dot separates words in foreign | |
50 | // expressions; we leave this unmodified. | |
51 | ||
52 | // The above points preclude successful round-trip | |
53 | // transformations of arbitrary input text. However, | |
54 | // they provide naturalistic results that should conform | |
55 | // to user expectations. | |
56 | ||
57 | ||
58 | // Combining equivalents va/vi/ve/vo | |
59 | "わ゙ <> ヷ;" | |
60 | "ゐ゙ <> ヸ;" | |
61 | "ゑ゙ <> ヹ;" | |
62 | "を゙ <> ヺ;" | |
63 | ||
64 | // One-to-one mappings, main block | |
65 | // 3041:3094 <> 30A1:30F4 | |
66 | // 309D,E <> 30FD,E | |
67 | "ぁ <> ァ;" | |
68 | "あ <> ア;" | |
69 | "ぃ <> ィ;" | |
70 | "い <> イ;" | |
71 | "ぅ <> ゥ;" | |
72 | "う <> ウ;" | |
73 | "ぇ <> ェ;" | |
74 | "え <> エ;" | |
75 | "ぉ <> ォ;" | |
76 | "お <> オ;" | |
77 | "か <> カ;" | |
78 | "が <> ガ;" | |
79 | "き <> キ;" | |
80 | "ぎ <> ギ;" | |
81 | "く <> ク;" | |
82 | "ぐ <> グ;" | |
83 | "け <> ケ;" | |
84 | "げ <> ゲ;" | |
85 | "こ <> コ;" | |
86 | "ご <> ゴ;" | |
87 | "さ <> サ;" | |
88 | "ざ <> ザ;" | |
89 | "し <> シ;" | |
90 | "じ <> ジ;" | |
91 | "す <> ス;" | |
92 | "ず <> ズ;" | |
93 | "せ <> セ;" | |
94 | "ぜ <> ゼ;" | |
95 | "そ <> ソ;" | |
96 | "ぞ <> ゾ;" | |
97 | "た <> タ;" | |
98 | "だ <> ダ;" | |
99 | "ち <> チ;" | |
100 | "ぢ <> ヂ;" | |
101 | "っ <> ッ;" | |
102 | "つ <> ツ;" | |
103 | "づ <> ヅ;" | |
104 | "て <> テ;" | |
105 | "で <> デ;" | |
106 | "と <> ト;" | |
107 | "ど <> ド;" | |
108 | "な <> ナ;" | |
109 | "に <> ニ;" | |
110 | "ぬ <> ヌ;" | |
111 | "ね <> ネ;" | |
112 | "の <> ノ;" | |
113 | "は <> ハ;" | |
114 | "ば <> バ;" | |
115 | "ぱ <> パ;" | |
116 | "ひ <> ヒ;" | |
117 | "び <> ビ;" | |
118 | "ぴ <> ピ;" | |
119 | "ふ <> フ;" | |
120 | "ぶ <> ブ;" | |
121 | "ぷ <> プ;" | |
122 | "へ <> ヘ;" | |
123 | "べ <> ベ;" | |
124 | "ぺ <> ペ;" | |
125 | "ほ <> ホ;" | |
126 | "ぼ <> ボ;" | |
127 | "ぽ <> ポ;" | |
128 | "ま <> マ;" | |
129 | "み <> ミ;" | |
130 | "む <> ム;" | |
131 | "め <> メ;" | |
132 | "も <> モ;" | |
133 | "ゃ <> ャ;" | |
134 | "や <> ヤ;" | |
135 | "ゅ <> ュ;" | |
136 | "ゆ <> ユ;" | |
137 | "ょ <> ョ;" | |
138 | "よ <> ヨ;" | |
139 | "ら <> ラ;" | |
140 | "り <> リ;" | |
141 | "る <> ル;" | |
142 | "れ <> レ;" | |
143 | "ろ <> ロ;" | |
144 | "ゎ <> ヮ;" | |
145 | "わ <> ワ;" | |
146 | "ゐ <> ヰ;" | |
147 | "ゑ <> ヱ;" | |
148 | "を <> ヲ;" | |
149 | "ん <> ン;" | |
150 | "ゔ <> ヴ;" | |
151 | "ゝ <> ヽ;" | |
152 | "ゞ <> ヾ;" | |
153 | ||
154 | // One-way Katakana-Hiragana xform of small K ka/ke to | |
155 | // normal H ka/ke. | |
156 | "か < ヵ;" | |
157 | "け < ヶ;" | |
158 | ||
159 | // Katakana followed by a prolonged sound mark 30FC has | |
160 | // its final vowel doubled. This is a Katakana-Hiragana | |
161 | // one-way information-losing transformation. We | |
162 | // include the small Katakana (e.g., small A 3041) and | |
163 | // do not distinguish them from their large | |
164 | // counterparts. It doesn't make sense to double a | |
165 | // small counterpart vowel as a small Hiragana vowel, so | |
166 | // we don't do so. In natural text this should never | |
167 | // occur anyway. If a 30FC is seen without a preceding | |
168 | // vowel sound (e.g., after n 30F3) we do not change it. | |
169 | ||
170 | //## $long = ー; | |
171 | ||
172 | // The following categories are Hiragana, not Katakana | |
173 | // as might be expected, since by the time we get to the | |
174 | // 30FC, the preceding character will have already been | |
175 | // transformed to Hiragana. | |
176 | ||
177 | // {The following mechanically generated from the | |
178 | // Unicode 3.0 data:} | |
179 | ||
180 | "$xa = [" | |
181 | "ぁ あ か が さ ざ" | |
182 | "た だ な は ば ぱ" | |
183 | "ま ゃ や ら ゎ わ" | |
184 | "];" | |
185 | ||
186 | "$xi = [" | |
187 | "ぃ い き ぎ し じ" | |
188 | "ち ぢ に ひ び ぴ" | |
189 | "み り ゐ" | |
190 | "];" | |
191 | ||
192 | "$xu = [" | |
193 | "ぅ う く ぐ す ず" | |
194 | "っ つ づ ぬ ふ ぶ" | |
195 | "ぷ む ゅ ゆ る ゔ" | |
196 | "];" | |
197 | ||
198 | "$xe = [" | |
199 | "ぇ え け げ せ ぜ" | |
200 | "て で ね へ べ ぺ" | |
201 | "め れ ゑ" | |
202 | "];" | |
203 | ||
204 | "$xo = [" | |
205 | "ぉ お こ ご そ ぞ" | |
206 | "と ど の ほ ぼ ぽ" | |
207 | "も ょ よ ろ を" | |
208 | "];" | |
209 | ||
210 | "あ < $xa {ー};" | |
211 | "い < $xi {ー};" | |
212 | "う < $xu {ー};" | |
213 | "え < $xe {ー};" | |
214 | "お < $xo {ー};" | |
215 | ||
216 | ":: (NFKC) ;" | |
217 | ||
218 | // note: a global filter is more efficient, but MUST include all source chars!! | |
219 | ":: ([\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);" | |
220 | ||
221 | // eof | |
222 | } | |
223 | } |