]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/data/translit/t_Hira_Kana.txt
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / data / translit / t_Hira_Kana.txt
... / ...
CommitLineData
1 // -*- Coding: utf-8; -*-
2//--------------------------------------------------------------------
3// Copyright (c) 1999-2002, International Business Machines
4// Corporation and others. All Rights Reserved.
5//--------------------------------------------------------------------
6// THIS IS A MACHINE-GENERATED FILE
7// Tool: dumpicurules.bat
8// Source: ../../../impl/data/Transliterator_Hiragana_Katakana.txt
9// Date: Sat Jul 27 10:31:07 2002
10//--------------------------------------------------------------------
11
12// Hiragana_Katakana
13
14t_Hira_Kana {
15 Rule {
16//--------------------------------------------------------------------
17//--------------------------------------------------------------------
18//--------------------------------------------------------------------
19
20// note: a global filter is more efficient, but MUST include all source chars
21":: [\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;"
22":: NFKC ();"
23
24// Hiragana-Katakana
25
26// This is largely a one-to-one mapping, but it has a
27// few kinks:
28
29// 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
30// Hiragana equivalents. We use Hiragana wa/wi/we/wo
31// (308F-3092) with a voicing mark (3099), which is
32// semantically equivalent. However, this is a non-
33// roundtripping transformation.
34
35// 2. The Katakana small ka/ke (30F5,30F6) have no
36// Hiragana equiavlents. We convert them to normal
37// Hiragana ka/ke (304B,3051). This is a one-way
38// information-losing transformation and precludes
39// round-tripping of 30F5 and 30F6.
40
41// 3. The combining marks 3099-309C are in the Hiragana
42// block, but they apply to Katakana as well, so we
43// leave them untouched.
44
45// 4. The Katakana prolonged sound mark 30FC doubles the
46// preceding vowel. This is a one-way information-
47// losing transformation from Katakana to Hiragana.
48
49// 5. The Katakana middle dot separates words in foreign
50// expressions; we leave this unmodified.
51
52// The above points preclude successful round-trip
53// transformations of arbitrary input text. However,
54// they provide naturalistic results that should conform
55// to user expectations.
56
57
58// Combining equivalents va/vi/ve/vo
59"わ゙ <> ヷ;"
60"ゐ゙ <> ヸ;"
61"ゑ゙ <> ヹ;"
62"を゙ <> ヺ;"
63
64// One-to-one mappings, main block
65// 3041:3094 <> 30A1:30F4
66// 309D,E <> 30FD,E
67"ぁ <> ァ;"
68"あ <> ア;"
69"ぃ <> ィ;"
70"い <> イ;"
71"ぅ <> ゥ;"
72"う <> ウ;"
73"ぇ <> ェ;"
74"え <> エ;"
75"ぉ <> ォ;"
76"お <> オ;"
77"か <> カ;"
78"が <> ガ;"
79"き <> キ;"
80"ぎ <> ギ;"
81"く <> ク;"
82"ぐ <> グ;"
83"け <> ケ;"
84"げ <> ゲ;"
85"こ <> コ;"
86"ご <> ゴ;"
87"さ <> サ;"
88"ざ <> ザ;"
89"し <> シ;"
90"じ <> ジ;"
91"す <> ス;"
92"ず <> ズ;"
93"せ <> セ;"
94"ぜ <> ゼ;"
95"そ <> ソ;"
96"ぞ <> ゾ;"
97"た <> タ;"
98"だ <> ダ;"
99"ち <> チ;"
100"ぢ <> ヂ;"
101"っ <> ッ;"
102"つ <> ツ;"
103"づ <> ヅ;"
104"て <> テ;"
105"で <> デ;"
106"と <> ト;"
107"ど <> ド;"
108"な <> ナ;"
109"に <> ニ;"
110"ぬ <> ヌ;"
111"ね <> ネ;"
112"の <> ノ;"
113"は <> ハ;"
114"ば <> バ;"
115"ぱ <> パ;"
116"ひ <> ヒ;"
117"び <> ビ;"
118"ぴ <> ピ;"
119"ふ <> フ;"
120"ぶ <> ブ;"
121"ぷ <> プ;"
122"へ <> ヘ;"
123"べ <> ベ;"
124"ぺ <> ペ;"
125"ほ <> ホ;"
126"ぼ <> ボ;"
127"ぽ <> ポ;"
128"ま <> マ;"
129"み <> ミ;"
130"む <> ム;"
131"め <> メ;"
132"も <> モ;"
133"ゃ <> ャ;"
134"や <> ヤ;"
135"ゅ <> ュ;"
136"ゆ <> ユ;"
137"ょ <> ョ;"
138"よ <> ヨ;"
139"ら <> ラ;"
140"り <> リ;"
141"る <> ル;"
142"れ <> レ;"
143"ろ <> ロ;"
144"ゎ <> ヮ;"
145"わ <> ワ;"
146"ゐ <> ヰ;"
147"ゑ <> ヱ;"
148"を <> ヲ;"
149"ん <> ン;"
150"ゔ <> ヴ;"
151"ゝ <> ヽ;"
152"ゞ <> ヾ;"
153
154// One-way Katakana-Hiragana xform of small K ka/ke to
155// normal H ka/ke.
156"か < ヵ;"
157"け < ヶ;"
158
159// Katakana followed by a prolonged sound mark 30FC has
160// its final vowel doubled. This is a Katakana-Hiragana
161// one-way information-losing transformation. We
162// include the small Katakana (e.g., small A 3041) and
163// do not distinguish them from their large
164// counterparts. It doesn't make sense to double a
165// small counterpart vowel as a small Hiragana vowel, so
166// we don't do so. In natural text this should never
167// occur anyway. If a 30FC is seen without a preceding
168// vowel sound (e.g., after n 30F3) we do not change it.
169
170//## $long = ー;
171
172// The following categories are Hiragana, not Katakana
173// as might be expected, since by the time we get to the
174// 30FC, the preceding character will have already been
175// transformed to Hiragana.
176
177// {The following mechanically generated from the
178// Unicode 3.0 data:}
179
180"$xa = ["
181"ぁ あ か が さ ざ"
182"た だ な は ば ぱ"
183"ま ゃ や ら ゎ わ"
184"];"
185
186"$xi = ["
187"ぃ い き ぎ し じ"
188"ち ぢ に ひ び ぴ"
189"み り ゐ"
190"];"
191
192"$xu = ["
193"ぅ う く ぐ す ず"
194"っ つ づ ぬ ふ ぶ"
195"ぷ む ゅ ゆ る ゔ"
196"];"
197
198"$xe = ["
199"ぇ え け げ せ ぜ"
200"て で ね へ べ ぺ"
201"め れ ゑ"
202"];"
203
204"$xo = ["
205"ぉ お こ ご そ ぞ"
206"と ど の ほ ぼ ぽ"
207"も ょ よ ろ を"
208"];"
209
210"あ < $xa {ー};"
211"い < $xi {ー};"
212"う < $xu {ー};"
213"え < $xe {ー};"
214"お < $xo {ー};"
215
216":: (NFKC) ;"
217
218// note: a global filter is more efficient, but MUST include all source chars!!
219":: ([\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);"
220
221// eof
222 }
223}