[apple/icu.git] / icuSources / data / translit / t_Hira_Kana.txt

 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2002, International Business Machines
// Corporation and others.  All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpicurules.bat
// Source: ../../../impl/data/Transliterator_Hiragana_Katakana.txt
// Date: Sat Jul 27 10:31:07 2002
//--------------------------------------------------------------------

// Hiragana_Katakana

t_Hira_Kana {
  Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------

// note: a global filter is more efficient, but MUST include all source chars
":: [\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;"
":: NFKC ();"

// Hiragana-Katakana

// This is largely a one-to-one mapping, but it has a
// few kinks:

// 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
// Hiragana equivalents.  We use Hiragana wa/wi/we/wo
// (308F-3092) with a voicing mark (3099), which is
// semantically equivalent.  However, this is a non-
// roundtripping transformation.

// 2. The Katakana small ka/ke (30F5,30F6) have no
// Hiragana equiavlents.  We convert them to normal
// Hiragana ka/ke (304B,3051).  This is a one-way
// information-losing transformation and precludes
// round-tripping of 30F5 and 30F6.

// 3. The combining marks 3099-309C are in the Hiragana
// block, but they apply to Katakana as well, so we
// leave them untouched.

// 4. The Katakana prolonged sound mark 30FC doubles the
// preceding vowel.  This is a one-way information-
// losing transformation from Katakana to Hiragana.

// 5. The Katakana middle dot separates words in foreign
// expressions; we leave this unmodified.

// The above points preclude successful round-trip
// transformations of arbitrary input text.  However,
// they provide naturalistic results that should conform
// to user expectations.


// Combining equivalents va/vi/ve/vo
"わ゙ <> ヷ;"
"ゐ゙ <> ヸ;"
"ゑ゙ <> ヹ;"
"を゙ <> ヺ;"

// One-to-one mappings, main block
// 3041:3094 <> 30A1:30F4
// 309D,E <> 30FD,E
"ぁ <> ァ;"
"あ <> ア;"
"ぃ <> ィ;"
"い <> イ;"
"ぅ <> ゥ;"
"う <> ウ;"
"ぇ <> ェ;"
"え <> エ;"
"ぉ <> ォ;"
"お <> オ;"
"か <> カ;"
"が <> ガ;"
"き <> キ;"
"ぎ <> ギ;"
"く <> ク;"
"ぐ <> グ;"
"け <> ケ;"
"げ <> ゲ;"
"こ <> コ;"
"ご <> ゴ;"
"さ <> サ;"
"ざ <> ザ;"
"し <> シ;"
"じ <> ジ;"
"す <> ス;"
"ず <> ズ;"
"せ <> セ;"
"ぜ <> ゼ;"
"そ <> ソ;"
"ぞ <> ゾ;"
"た <> タ;"
"だ <> ダ;"
"ち <> チ;"
"ぢ <> ヂ;"
"っ <> ッ;"
"つ <> ツ;"
"づ <> ヅ;"
"て <> テ;"
"で <> デ;"
"と <> ト;"
"ど <> ド;"
"な <> ナ;"
"に <> ニ;"
"ぬ <> ヌ;"
"ね <> ネ;"
"の <> ノ;"
"は <> ハ;"
"ば <> バ;"
"ぱ <> パ;"
"ひ <> ヒ;"
"び <> ビ;"
"ぴ <> ピ;"
"ふ <> フ;"
"ぶ <> ブ;"
"ぷ <> プ;"
"へ <> ヘ;"
"べ <> ベ;"
"ぺ <> ペ;"
"ほ <> ホ;"
"ぼ <> ボ;"
"ぽ <> ポ;"
"ま <> マ;"
"み <> ミ;"
"む <> ム;"
"め <> メ;"
"も <> モ;"
"ゃ <> ャ;"
"や <> ヤ;"
"ゅ <> ュ;"
"ゆ <> ユ;"
"ょ <> ョ;"
"よ <> ヨ;"
"ら <> ラ;"
"り <> リ;"
"る <> ル;"
"れ <> レ;"
"ろ <> ロ;"
"ゎ <> ヮ;"
"わ <> ワ;"
"ゐ <> ヰ;"
"ゑ <> ヱ;"
"を <> ヲ;"
"ん <> ン;"
"ゔ <> ヴ;"
"ゝ <> ヽ;"
"ゞ <> ヾ;"

// One-way Katakana-Hiragana xform of small K ka/ke to
// normal H ka/ke.
"か < ヵ;"
"け < ヶ;"

// Katakana followed by a prolonged sound mark 30FC has
// its final vowel doubled.  This is a Katakana-Hiragana
// one-way information-losing transformation.  We
// include the small Katakana (e.g., small A 3041) and
// do not distinguish them from their large
// counterparts.  It doesn't make sense to double a
// small counterpart vowel as a small Hiragana vowel, so
// we don't do so.  In natural text this should never
// occur anyway.  If a 30FC is seen without a preceding
// vowel sound (e.g., after n 30F3) we do not change it.

//## $long = ー;

// The following categories are Hiragana, not Katakana
// as might be expected, since by the time we get to the
// 30FC, the preceding character will have already been
// transformed to Hiragana.

// {The following mechanically generated from the
// Unicode 3.0 data:}

"$xa = [" 
"ぁ あ か が さ ざ" 
"た だ な は ば ぱ" 
"ま ゃ や ら ゎ わ" 
"];"

"$xi = [" 
"ぃ い き ぎ し じ" 
"ち ぢ に ひ び ぴ" 
"み り ゐ" 
"];"

"$xu = [" 
"ぅ う く ぐ す ず" 
"っ つ づ ぬ ふ ぶ" 
"ぷ む ゅ ゆ る ゔ" 
"];"

"$xe = [" 
"ぇ え け げ せ ぜ" 
"て で ね へ べ ぺ" 
"め れ ゑ" 
"];"

"$xo = [" 
"ぉ お こ ご そ ぞ" 
"と ど の ほ ぼ ぽ" 
"も ょ よ ろ を" 
"];"

"あ < $xa {ー};"
"い < $xi {ー};"
"う < $xu {ー};"
"え < $xe {ー};"
"お < $xo {ー};"

":: (NFKC) ;"

// note: a global filter is more efficient, but MUST include all source chars!!
":: ([\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);"

// eof
  }
}
Commit	Line	Data
b75a7d8f A	1	// -- Coding: utf-8; --
	2	//--------------------------------------------------------------------
	3	// Copyright (c) 1999-2002, International Business Machines
	4	// Corporation and others. All Rights Reserved.
	5	//--------------------------------------------------------------------
	6	// THIS IS A MACHINE-GENERATED FILE
	7	// Tool: dumpicurules.bat
	8	// Source: ../../../impl/data/Transliterator_Hiragana_Katakana.txt
	9	// Date: Sat Jul 27 10:31:07 2002
	10	//--------------------------------------------------------------------
	11
	12	// Hiragana_Katakana
	13
	14	t_Hira_Kana {
	15	Rule {
	16	//--------------------------------------------------------------------
	17	//--------------------------------------------------------------------
	18	//--------------------------------------------------------------------
	19
	20	// note: a global filter is more efficient, but MUST include all source chars
	21	":: [\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;"
	22	":: NFKC ();"
	23
	24	// Hiragana-Katakana
	25
	26	// This is largely a one-to-one mapping, but it has a
	27	// few kinks:
	28
	29	// 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
	30	// Hiragana equivalents. We use Hiragana wa/wi/we/wo
	31	// (308F-3092) with a voicing mark (3099), which is
	32	// semantically equivalent. However, this is a non-
	33	// roundtripping transformation.
	34
	35	// 2. The Katakana small ka/ke (30F5,30F6) have no
	36	// Hiragana equiavlents. We convert them to normal
	37	// Hiragana ka/ke (304B,3051). This is a one-way
	38	// information-losing transformation and precludes
	39	// round-tripping of 30F5 and 30F6.
	40
	41	// 3. The combining marks 3099-309C are in the Hiragana
	42	// block, but they apply to Katakana as well, so we
	43	// leave them untouched.
	44
	45	// 4. The Katakana prolonged sound mark 30FC doubles the
	46	// preceding vowel. This is a one-way information-
	47	// losing transformation from Katakana to Hiragana.
	48
	49	// 5. The Katakana middle dot separates words in foreign
	50	// expressions; we leave this unmodified.
	51
	52	// The above points preclude successful round-trip
	53	// transformations of arbitrary input text. However,
	54	// they provide naturalistic results that should conform
	55	// to user expectations.
	56
	57
	58	// Combining equivalents va/vi/ve/vo
	59	"わ゙ <> ヷ;"
	60	"ゐ゙ <> ヸ;"
	61	"ゑ゙ <> ヹ;"
	62	"を゙ <> ヺ;"
	63
	64	// One-to-one mappings, main block
65	// 3041:3094 <> 30A1:30F4
66	// 309D,E <> 30FD,E
67	"ぁ <> ァ;"
68	"あ <> ア;"
69	"ぃ <> ィ;"
70	"い <> イ;"
71	"ぅ <> ゥ;"
72	"う <> ウ;"
73	"ぇ <> ェ;"
74	"え <> エ;"
75	"ぉ <> ォ;"
76	"お <> オ;"
77	"か <> カ;"
78	"が <> ガ;"
79	"き <> キ;"
80	"ぎ <> ギ;"
81	"く <> ク;"
82	"ぐ <> グ;"
83	"け <> ケ;"
84	"げ <> ゲ;"
85	"こ <> コ;"
86	"ご <> ゴ;"
87	"さ <> サ;"
88	"ざ <> ザ;"
89	"し <> シ;"
90	"じ <> ジ;"
91	"す <> ス;"
92	"ず <> ズ;"
93	"せ <> セ;"
94	"ぜ <> ゼ;"
95	"そ <> ソ;"
96	"ぞ <> ゾ;"
97	"た <> タ;"
98	"だ <> ダ;"
99	"ち <> チ;"
100	"ぢ <> ヂ;"
101	"っ <> ッ;"
102	"つ <> ツ;"
103	"づ <> ヅ;"
104	"て <> テ;"
105	"で <> デ;"
106	"と <> ト;"
107	"ど <> ド;"
108	"な <> ナ;"
109	"に <> ニ;"
110	"ぬ <> ヌ;"
111	"ね <> ネ;"
112	"の <> ノ;"
113	"は <> ハ;"
114	"ば <> バ;"
115	"ぱ <> パ;"
116	"ひ <> ヒ;"
117	"び <> ビ;"
118	"ぴ <> ピ;"
119	"ふ <> フ;"
120	"ぶ <> ブ;"
121	"ぷ <> プ;"
122	"へ <> ヘ;"
123	"べ <> ベ;"
124	"ぺ <> ペ;"
125	"ほ <> ホ;"
126	"ぼ <> ボ;"
127	"ぽ <> ポ;"
128	"ま <> マ;"
129	"み <> ミ;"
130	"む <> ム;"
131	"め <> メ;"
132	"も <> モ;"
133	"ゃ <> ャ;"
134	"や <> ヤ;"
135	"ゅ <> ュ;"
136	"ゆ <> ユ;"
137	"ょ <> ョ;"
138	"よ <> ヨ;"
139	"ら <> ラ;"
140	"り <> リ;"
141	"る <> ル;"
142	"れ <> レ;"
143	"ろ <> ロ;"
144	"ゎ <> ヮ;"
145	"わ <> ワ;"
146	"ゐ <> ヰ;"
147	"ゑ <> ヱ;"
148	"を <> ヲ;"
149	"ん <> ン;"
150	"ゔ <> ヴ;"
151	"ゝ <> ヽ;"
152	"ゞ <> ヾ;"
153
154	// One-way Katakana-Hiragana xform of small K ka/ke to
155	// normal H ka/ke.
156	"か < ヵ;"
157	"け < ヶ;"
158
159	// Katakana followed by a prolonged sound mark 30FC has
160	// its final vowel doubled. This is a Katakana-Hiragana
161	// one-way information-losing transformation. We
162	// include the small Katakana (e.g., small A 3041) and
163	// do not distinguish them from their large
164	// counterparts. It doesn't make sense to double a
165	// small counterpart vowel as a small Hiragana vowel, so
166	// we don't do so. In natural text this should never
167	// occur anyway. If a 30FC is seen without a preceding
168	// vowel sound (e.g., after n 30F3) we do not change it.
169
170	//## $long = ー;
171
172	// The following categories are Hiragana, not Katakana
173	// as might be expected, since by the time we get to the
174	// 30FC, the preceding character will have already been
175	// transformed to Hiragana.
176
177	// {The following mechanically generated from the
178	// Unicode 3.0 data:}
179
180	"$xa = ["
181	"ぁあかがさざ"
182	"ただなはばぱ"
183	"まゃやらゎわ"
184	"];"
185
186	"$xi = ["
187	"ぃいきぎしじ"
188	"ちぢにひびぴ"
189	"みりゐ"
190	"];"
191
192	"$xu = ["
193	"ぅうくぐすず"
194	"っつづぬふぶ"
195	"ぷむゅゆるゔ"
196	"];"
197
198	"$xe = ["
199	"ぇえけげせぜ"
200	"てでねへべぺ"
201	"めれゑ"
202	"];"
203
204	"$xo = ["
205	"ぉおこごそぞ"
206	"とどのほぼぽ"
207	"もょよろを"
208	"];"
209
210	"あ < $xa {ー};"
211	"い < $xi {ー};"
212	"う < $xu {ー};"
213	"え < $xe {ー};"
214	"お < $xo {ー};"
215
216	":: (NFKC) ;"
217
218	// note: a global filter is more efficient, but MUST include all source chars!!
219	":: ([\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);"
220
221	// eof
222	}
223	}