[apple/icu.git] / icuSources / data / translit / Hira_Kana.txt

# ***************************************************************************
# *
# *  Copyright (C) 2004-2016, International Business Machines
# *  Corporation; Unicode, Inc.; and others.  All Rights Reserved.
# *
# ***************************************************************************
# File: Hira_Kana.txt
# Generated from CLDR 
#

# note: a global filter is more efficient, but MUST include all source chars
:: [\u0000-\u007E 、。 \u3099-゜ ァ-ー ｡-ﾟー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;
:: NFKC ();
# Hiragana-Katakana
# This is largely a one-to-one mapping, but it has a
# few kinks:
# 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
# Hiragana equivalents.  We use Hiragana wa/wi/we/wo
# (308F-3092) with a voicing mark (3099), which is
# semantically equivalent.  However, this is a non-
# roundtripping transformation.
# 2. The Katakana small ka/ke (30F5,30F6) have no
# Hiragana equiavlents.  We convert them to normal
# Hiragana ka/ke (304B,3051).  This is a one-way
# information-losing transformation and precludes
# round-tripping of 30F5 and 30F6.
# 3. The combining marks 3099-309C are in the Hiragana
# block, but they apply to Katakana as well, so we
# leave them untouched.
# 4. The Katakana prolonged sound mark 30FC doubles the
# preceding vowel.  This is a one-way information-
# losing transformation from Katakana to Hiragana.
# 5. The Katakana middle dot separates words in foreign
# expressions; we leave this unmodified.
# The above points preclude successful round-trip
# transformations of arbitrary input text.  However,
# they provide naturalistic results that should conform
# to user expectations.
# Combining equivalents va/vi/ve/vo
わ\u3099 ↔ ヷ;
ゐ\u3099 ↔ ヸ;
ゑ\u3099 ↔ ヹ;
を\u3099 ↔ ヺ;
# One-to-one mappings, main block
# 3041:3094 ↔ 30A1:30F4
# 309D,E ↔ 30FD,E
ぁ ↔ ァ;
あ ↔ ア;
ぃ ↔ ィ;
い ↔ イ;
ぅ ↔ ゥ;
う ↔ ウ;
ぇ ↔ ェ;
え ↔ エ;
ぉ ↔ ォ;
お ↔ オ;
か ↔ カ;
が ↔ ガ;
き ↔ キ;
ぎ ↔ ギ;
く ↔ ク;
ぐ ↔ グ;
け ↔ ケ;
げ ↔ ゲ;
こ ↔ コ;
ご ↔ ゴ;
さ ↔ サ;
ざ ↔ ザ;
し ↔ シ;
じ ↔ ジ;
す ↔ ス;
ず ↔ ズ;
せ ↔ セ;
ぜ ↔ ゼ;
そ ↔ ソ;
ぞ ↔ ゾ;
た ↔ タ;
だ ↔ ダ;
ち ↔ チ;
ぢ ↔ ヂ;
っ ↔ ッ;
つ ↔ ツ;
づ ↔ ヅ;
て ↔ テ;
で ↔ デ;
と ↔ ト;
ど ↔ ド;
な ↔ ナ;
に ↔ ニ;
ぬ ↔ ヌ;
ね ↔ ネ;
の ↔ ノ;
は ↔ ハ;
ば ↔ バ;
ぱ ↔ パ;
ひ ↔ ヒ;
び ↔ ビ;
ぴ ↔ ピ;
ふ ↔ フ;
ぶ ↔ ブ;
ぷ ↔ プ;
へ ↔ ヘ;
べ ↔ ベ;
ぺ ↔ ペ;
ほ ↔ ホ;
ぼ ↔ ボ;
ぽ ↔ ポ;
ま ↔ マ;
み ↔ ミ;
む ↔ ム;
め ↔ メ;
も ↔ モ;
ゃ ↔ ャ;
や ↔ ヤ;
ゅ ↔ ュ;
ゆ ↔ ユ;
ょ ↔ ョ;
よ ↔ ヨ;
ら ↔ ラ;
り ↔ リ;
る ↔ ル;
れ ↔ レ;
ろ ↔ ロ;
ゎ ↔ ヮ;
わ ↔ ワ;
ゐ ↔ ヰ;
ゑ ↔ ヱ;
を ↔ ヲ;
ん ↔ ン;
ゔ ↔ ヴ;
ゝ ↔ ヽ;
ゞ ↔ ヾ;
# One-way Katakana-Hiragana xform of small K ka/ke to
# normal H ka/ke.
か ← ヵ;
け ← ヶ;
# Katakana followed by a prolonged sound mark 30FC has
# its final vowel doubled.  This is a Katakana-Hiragana
# one-way information-losing transformation.  We
# include the small Katakana (e.g., small A 3041) and
# do not distinguish them from their large
# counterparts.  It doesn't make sense to double a
# small counterpart vowel as a small Hiragana vowel, so
# we don't do so.  In natural text this should never
# occur anyway.  If a 30FC is seen without a preceding
# vowel sound (e.g., after n 30F3) we do not change it.
### $long = ー;
# The following categories are Hiragana, not Katakana
# as might be expected, since by the time we get to the
# 30FC, the preceding character will have already been
# transformed to Hiragana.
# {The following mechanically generated from the
# Unicode 3.0 data:}
$xa = [ \
ぁ あ か が さ ざ \
た だ な は ば ぱ \
ま ゃ や ら ゎ わ \
];
$xi = [ \
ぃ い き ぎ し じ \
ち ぢ に ひ び ぴ \
み り ゐ \
];
$xu = [ \
ぅ う く ぐ す ず \
っ つ づ ぬ ふ ぶ \
ぷ む ゅ ゆ る ゔ \
];
$xe = [ \
ぇ え け げ せ ぜ \
て で ね へ べ ぺ \
め れ ゑ \
];
$xo = [ \
ぉ お こ ご そ ぞ \
と ど の ほ ぼ ぽ \
も ょ よ ろ を \
];
あ ← $xa {ー};
い ← $xi {ー};
う ← $xu {ー};
え ← $xe {ー};
お ← $xo {ー};
:: (NFKC) ;
# note: a global filter is more efficient, but MUST include all source chars!!
:: ([\u0000-\u007E 、。 \u3099-゜ ァ-ー ｡-ﾟー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);
# eof
Commit	Line	Data
2ca993e8 A	1	# ***************************************************************************
	2	# *
	3	# * Copyright (C) 2004-2016, International Business Machines
	4	# * Corporation; Unicode, Inc.; and others. All Rights Reserved.
	5	# *
	6	# ***************************************************************************
	7	# File: Hira_Kana.txt
	8	# Generated from CLDR
	9	#
	10
	11	# note: a global filter is more efficient, but MUST include all source chars
	12	:: [\u0000-\u007E 、。 \u3099-゜ァ-ー｡-ﾟー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;
	13	:: NFKC ();
	14	# Hiragana-Katakana
	15	# This is largely a one-to-one mapping, but it has a
	16	# few kinks:
	17	# 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
	18	# Hiragana equivalents. We use Hiragana wa/wi/we/wo
	19	# (308F-3092) with a voicing mark (3099), which is
	20	# semantically equivalent. However, this is a non-
	21	# roundtripping transformation.
	22	# 2. The Katakana small ka/ke (30F5,30F6) have no
	23	# Hiragana equiavlents. We convert them to normal
	24	# Hiragana ka/ke (304B,3051). This is a one-way
	25	# information-losing transformation and precludes
	26	# round-tripping of 30F5 and 30F6.
	27	# 3. The combining marks 3099-309C are in the Hiragana
	28	# block, but they apply to Katakana as well, so we
	29	# leave them untouched.
	30	# 4. The Katakana prolonged sound mark 30FC doubles the
	31	# preceding vowel. This is a one-way information-
	32	# losing transformation from Katakana to Hiragana.
	33	# 5. The Katakana middle dot separates words in foreign
	34	# expressions; we leave this unmodified.
	35	# The above points preclude successful round-trip
	36	# transformations of arbitrary input text. However,
	37	# they provide naturalistic results that should conform
	38	# to user expectations.
	39	# Combining equivalents va/vi/ve/vo
	40	わ\u3099 ↔ ヷ;
	41	ゐ\u3099 ↔ ヸ;
	42	ゑ\u3099 ↔ ヹ;
	43	を\u3099 ↔ ヺ;
	44	# One-to-one mappings, main block
	45	# 3041:3094 ↔ 30A1:30F4
	46	# 309D,E ↔ 30FD,E
	47	ぁ ↔ ァ;
	48	あ ↔ ア;
	49	ぃ ↔ ィ;
	50	い ↔ イ;
	51	ぅ ↔ ゥ;
	52	う ↔ ウ;
	53	ぇ ↔ ェ;
	54	え ↔ エ;
	55	ぉ ↔ ォ;
	56	お ↔ オ;
	57	か ↔ カ;
	58	が ↔ ガ;
	59	き ↔ キ;
	60	ぎ ↔ ギ;
	61	く ↔ ク;
	62	ぐ ↔ グ;
	63	け ↔ ケ;
	64	げ ↔ ゲ;
65	こ ↔ コ;
66	ご ↔ ゴ;
67	さ ↔ サ;
68	ざ ↔ ザ;
69	し ↔ シ;
70	じ ↔ ジ;
71	す ↔ ス;
72	ず ↔ ズ;
73	せ ↔ セ;
74	ぜ ↔ ゼ;
75	そ ↔ ソ;
76	ぞ ↔ ゾ;
77	た ↔ タ;
78	だ ↔ ダ;
79	ち ↔ チ;
80	ぢ ↔ ヂ;
81	っ ↔ ッ;
82	つ ↔ ツ;
83	づ ↔ ヅ;
84	て ↔ テ;
85	で ↔ デ;
86	と ↔ ト;
87	ど ↔ ド;
88	な ↔ ナ;
89	に ↔ ニ;
90	ぬ ↔ ヌ;
91	ね ↔ ネ;
92	の ↔ ノ;
93	は ↔ ハ;
94	ば ↔ バ;
95	ぱ ↔ パ;
96	ひ ↔ ヒ;
97	び ↔ ビ;
98	ぴ ↔ ピ;
99	ふ ↔ フ;
100	ぶ ↔ ブ;
101	ぷ ↔ プ;
102	へ ↔ ヘ;
103	べ ↔ ベ;
104	ぺ ↔ ペ;
105	ほ ↔ ホ;
106	ぼ ↔ ボ;
107	ぽ ↔ ポ;
108	ま ↔ マ;
109	み ↔ ミ;
110	む ↔ ム;
111	め ↔ メ;
112	も ↔ モ;
113	ゃ ↔ ャ;
114	や ↔ ヤ;
115	ゅ ↔ ュ;
116	ゆ ↔ ユ;
117	ょ ↔ ョ;
118	よ ↔ ヨ;
119	ら ↔ ラ;
120	り ↔ リ;
121	る ↔ ル;
122	れ ↔ レ;
123	ろ ↔ ロ;
124	ゎ ↔ ヮ;
125	わ ↔ ワ;
126	ゐ ↔ ヰ;
127	ゑ ↔ ヱ;
128	を ↔ ヲ;
129	ん ↔ ン;
130	ゔ ↔ ヴ;
131	ゝ ↔ ヽ;
132	ゞ ↔ ヾ;
133	# One-way Katakana-Hiragana xform of small K ka/ke to
134	# normal H ka/ke.
135	か ← ヵ;
136	け ← ヶ;
137	# Katakana followed by a prolonged sound mark 30FC has
138	# its final vowel doubled. This is a Katakana-Hiragana
139	# one-way information-losing transformation. We
140	# include the small Katakana (e.g., small A 3041) and
141	# do not distinguish them from their large
142	# counterparts. It doesn't make sense to double a
143	# small counterpart vowel as a small Hiragana vowel, so
144	# we don't do so. In natural text this should never
145	# occur anyway. If a 30FC is seen without a preceding
146	# vowel sound (e.g., after n 30F3) we do not change it.
147	### $long = ー;
148	# The following categories are Hiragana, not Katakana
149	# as might be expected, since by the time we get to the
150	# 30FC, the preceding character will have already been
151	# transformed to Hiragana.
152	# {The following mechanically generated from the
153	# Unicode 3.0 data:}
154	$xa = [ \
155	ぁあかがさざ \
156	ただなはばぱ \
157	まゃやらゎわ \
158	];
159	$xi = [ \
160	ぃいきぎしじ \
161	ちぢにひびぴ \
162	みりゐ \
163	];
164	$xu = [ \
165	ぅうくぐすず \
166	っつづぬふぶ \
167	ぷむゅゆるゔ \
168	];
169	$xe = [ \
170	ぇえけげせぜ \
171	てでねへべぺ \
172	めれゑ \
173	];
174	$xo = [ \
175	ぉおこごそぞ \
176	とどのほぼぽ \
177	もょよろを \
178	];
179	あ ← $xa {ー};
180	い ← $xi {ー};
181	う ← $xu {ー};
182	え ← $xe {ー};
183	お ← $xo {ー};
184	:: (NFKC) ;
185	# note: a global filter is more efficient, but MUST include all source chars!!
186	:: ([\u0000-\u007E 、。 \u3099-゜ァ-ー｡-ﾟー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);
187	# eof
188