git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/data/translit/Hira

... / ...

Commit	Line	Data
	1	# © 2016 and later: Unicode, Inc. and others.
	2	# License & terms of use: http://www.unicode.org/copyright.html#License
	3	#
	4	# File: Hira_Kana.txt
	5	# Generated from CLDR
	6	#
	7
	8	# note: a global filter is more efficient, but MUST include all source chars
	9	:: [\u0000-\u007E 、。 \u3099-゜ァ-ー｡-ﾟー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;
	10	:: NFKC ();
	11	# Hiragana-Katakana
	12	# This is largely a one-to-one mapping, but it has a
	13	# few kinks:
	14	# 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
	15	# Hiragana equivalents. We use Hiragana wa/wi/we/wo
	16	# (308F-3092) with a voicing mark (3099), which is
	17	# semantically equivalent. However, this is a non-
	18	# roundtripping transformation.
	19	# 2. The Katakana small ka/ke (30F5,30F6) have no
	20	# Hiragana equiavlents. We convert them to normal
	21	# Hiragana ka/ke (304B,3051). This is a one-way
	22	# information-losing transformation and precludes
	23	# round-tripping of 30F5 and 30F6.
	24	# 3. The combining marks 3099-309C are in the Hiragana
	25	# block, but they apply to Katakana as well, so we
	26	# leave them untouched.
	27	# 4. The Katakana prolonged sound mark 30FC doubles the
	28	# preceding vowel. This is a one-way information-
	29	# losing transformation from Katakana to Hiragana.
	30	# 5. The Katakana middle dot separates words in foreign
	31	# expressions; we leave this unmodified.
	32	# The above points preclude successful round-trip
	33	# transformations of arbitrary input text. However,
	34	# they provide naturalistic results that should conform
	35	# to user expectations.
	36	# Combining equivalents va/vi/ve/vo
	37	わ\u3099 ↔ ヷ;
	38	ゐ\u3099 ↔ ヸ;
	39	ゑ\u3099 ↔ ヹ;
	40	を\u3099 ↔ ヺ;
	41	# One-to-one mappings, main block
	42	# 3041:3094 ↔ 30A1:30F4
	43	# 309D,E ↔ 30FD,E
	44	ぁ ↔ ァ;
	45	あ ↔ ア;
	46	ぃ ↔ ィ;
	47	い ↔ イ;
	48	ぅ ↔ ゥ;
	49	う ↔ ウ;
	50	ぇ ↔ ェ;
	51	え ↔ エ;
	52	ぉ ↔ ォ;
	53	お ↔ オ;
	54	か ↔ カ;
	55	が ↔ ガ;
	56	き ↔ キ;
	57	ぎ ↔ ギ;
	58	く ↔ ク;
	59	ぐ ↔ グ;
	60	け ↔ ケ;
	61	げ ↔ ゲ;
	62	こ ↔ コ;
	63	ご ↔ ゴ;
	64	さ ↔ サ;
	65	ざ ↔ ザ;
	66	し ↔ シ;
	67	じ ↔ ジ;
	68	す ↔ ス;
	69	ず ↔ ズ;
	70	せ ↔ セ;
	71	ぜ ↔ ゼ;
	72	そ ↔ ソ;
	73	ぞ ↔ ゾ;
	74	た ↔ タ;
	75	だ ↔ ダ;
	76	ち ↔ チ;
	77	ぢ ↔ ヂ;
	78	っ ↔ ッ;
	79	つ ↔ ツ;
	80	づ ↔ ヅ;
	81	て ↔ テ;
	82	で ↔ デ;
	83	と ↔ ト;
	84	ど ↔ ド;
	85	な ↔ ナ;
	86	に ↔ ニ;
	87	ぬ ↔ ヌ;
	88	ね ↔ ネ;
	89	の ↔ ノ;
	90	は ↔ ハ;
	91	ば ↔ バ;
	92	ぱ ↔ パ;
	93	ひ ↔ ヒ;
	94	び ↔ ビ;
	95	ぴ ↔ ピ;
	96	ふ ↔ フ;
	97	ぶ ↔ ブ;
	98	ぷ ↔ プ;
	99	へ ↔ ヘ;
	100	べ ↔ ベ;
	101	ぺ ↔ ペ;
	102	ほ ↔ ホ;
	103	ぼ ↔ ボ;
	104	ぽ ↔ ポ;
	105	ま ↔ マ;
	106	み ↔ ミ;
	107	む ↔ ム;
	108	め ↔ メ;
	109	も ↔ モ;
	110	ゃ ↔ ャ;
	111	や ↔ ヤ;
	112	ゅ ↔ ュ;
	113	ゆ ↔ ユ;
	114	ょ ↔ ョ;
	115	よ ↔ ヨ;
	116	ら ↔ ラ;
	117	り ↔ リ;
	118	る ↔ ル;
	119	れ ↔ レ;
	120	ろ ↔ ロ;
	121	ゎ ↔ ヮ;
	122	わ ↔ ワ;
	123	ゐ ↔ ヰ;
	124	ゑ ↔ ヱ;
	125	を ↔ ヲ;
	126	ん ↔ ン;
	127	ゔ ↔ ヴ;
	128	ゝ ↔ ヽ;
	129	ゞ ↔ ヾ;
	130	# One-way Katakana-Hiragana xform of small K ka/ke to
	131	# normal H ka/ke.
	132	か ← ヵ;
	133	け ← ヶ;
	134	# Katakana followed by a prolonged sound mark 30FC has
	135	# its final vowel doubled. This is a Katakana-Hiragana
	136	# one-way information-losing transformation. We
	137	# include the small Katakana (e.g., small A 3041) and
	138	# do not distinguish them from their large
	139	# counterparts. It doesn't make sense to double a
	140	# small counterpart vowel as a small Hiragana vowel, so
	141	# we don't do so. In natural text this should never
	142	# occur anyway. If a 30FC is seen without a preceding
	143	# vowel sound (e.g., after n 30F3) we do not change it.
	144	### $long = ー;
	145	# The following categories are Hiragana, not Katakana
	146	# as might be expected, since by the time we get to the
	147	# 30FC, the preceding character will have already been
	148	# transformed to Hiragana.
	149	# {The following mechanically generated from the
	150	# Unicode 3.0 data:}
	151	$xa = [ \
	152	ぁあかがさざ \
	153	ただなはばぱ \
	154	まゃやらゎわ \
	155	];
	156	$xi = [ \
	157	ぃいきぎしじ \
	158	ちぢにひびぴ \
	159	みりゐ \
	160	];
	161	$xu = [ \
	162	ぅうくぐすず \
	163	っつづぬふぶ \
	164	ぷむゅゆるゔ \
	165	];
	166	$xe = [ \
	167	ぇえけげせぜ \
	168	てでねへべぺ \
	169	めれゑ \
	170	];
	171	$xo = [ \
	172	ぉおこごそぞ \
	173	とどのほぼぽ \
	174	もょよろを \
	175	];
	176	あ ← $xa {ー};
	177	い ← $xi {ー};
	178	う ← $xu {ー};
	179	え ← $xe {ー};
	180	お ← $xo {ー};
	181	:: (NFKC) ;
	182	# note: a global filter is more efficient, but MUST include all source chars!!
	183	:: ([\u0000-\u007E 、。 \u3099-゜ァ-ー｡-ﾟー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);
	184	# eof
	185