]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/Latn_Kana.txt
ICU-62141.0.1.tar.gz
[apple/icu.git] / icuSources / data / translit / Latn_Kana.txt
1 # © 2016 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html#License
3 #
4 # File: Latn_Kana.txt
5 # Generated from CLDR
6 #
7
8 # note: a global filter is more efficient, but MUST include all source chars
9 #:: [\u0000-\u007E 、。 \u3099-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:nonspacing mark:]] ;
10 # MINIMAL FILTER GENERATED FOR: Latin-Katakana
11 ### WARNING -- must add width filter, both here and below!!! ###
12 :: [[ᄀ-ᄒᄚᄡ\u1160-ᅵᆪᆬ-ᆭᆰ-ᆵ←-↓│■○\u3000-。「-」\u3099-\u309Aァ-ロワヲ-ヴヷヺ-ー!-~¢-₩][',.A-Za-z~À-ÖØ-öø-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0304Ӣ-ӣӮ-ӯḀ-ẙẠ-ỹᾱᾹῑῙῡῩK-Å]] ;
13 :: [:Latin:] fullwidth-halfwidth ();
14 :: NFD (NFC);
15 :: Lower (); # whenever transliterating from cased to uncased script, include this
16 # :: NFD () ; # this would catch the odd cases where a lowercase is not in NFD, but none are important for Japanese
17 # Uses modified Hepburn. Small changes to make unambiguous.
18 # | Kunrei-shiki: Hepburn/MHepburn
19 # | ------------------------------
20 # | si: shi
21 # | si ~ya: sha
22 # | si ~yu: shu
23 # | si ~yo: sho
24 # | zi: ji
25 # | zi ~ya: ja
26 # | zi ~yu: ju
27 # | zi ~yo: jo
28 # | ti: chi
29 # | ti ~ya: cha
30 # | ti ~yu: chu
31 # | ti ~yu: cho
32 # | tu: tsu
33 # | di: ji/dji
34 # | du: zu/dzu
35 # | hu: fu
36 # | For foreign words:
37 # | -----------------
38 # | se ~i si
39 # | si ~e she
40 # |
41 # | ze ~i zi
42 # | zi ~e je
43 # |
44 # | te ~i ti
45 # | ti ~e che
46 # | te ~u tu
47 # |
48 # | de ~i di
49 # | de ~u du
50 # | de ~i di
51 # |
52 # | he ~u: hu
53 # | hu ~a fa
54 # | hu ~i fi
55 # | hu ~e he
56 # | hu ~o ho
57 # Most small forms are generated, but if necessary
58 # explicit small forms are given with ~a, ~ya, etc.
59 #------------------------------------------------------
60 # Variables
61 $vowel = [aeiou] ;
62 $consonant = [bcdfghjklmnpqrstvwxyz] ;
63 $macron = \u0304 ;
64 # Variables used for doubled-consonants with tsu
65 $kana = [ぁ-ゔ] ;
66 $voice = [\u3099゛];
67 $semivoice = [\u309A゜];
68 $k_start = [カキクケコかきくけこ] ;
69 $s_start = [サシスセソさしすせそ] ;
70 $j_start = [シし] $voice ;
71 $t_start = [タチツテトたちつてと] ;
72 $n_start = [ナニヌネノンなにぬねの] ;
73 $h_start = [ハヒヘホはひへほ] ;
74 $f_start = [フふ] ;
75 $m_start = [マミムメモまみむめも] ;
76 $y_start = [ヤユヨやゆよ] ;
77 $r_start = [ラリルレロらりるれろ] ;
78 $w_start = [ワヰヱヲわゐゑを] ;
79 $v_start = [ワヰヱヲ]\u3099 ;
80 $voweled_basekana = [ァ-オカキクケコサシスセソタチッツテトナ-ノハヒフヘホマ-ヲヵヶ] ;
81 # if ン is followed by $n_quoter, then it needs an
82 # apostrophe after its romaji form to disambiguate it.
83 # e.g., ン ア ! = ナ, so represent as "n'a", not "na".
84 $n_quoter = [ア イ ウ エ オ ナ ニ ヌ ネ ノ ヤ ユ ヨ ン] ;
85 $small_y = [ャィュェョ] ;
86 $iteration = ゝ ;
87 #------------------------------------------------------
88 # katakana rules
89 # Punctuation
90 '.' ↔ 。;
91 ',' ↔ 、;
92 # ' ' } [a-z] → ; # delete spaces before latin
93 # ' ' ← [^' '゠-ヿ] {} ['゠-ヿ] ; #insert spaces before hiragana
94 # Iteration Mark
95 # Copy previous letter § marks
96 # TODO
97 # | $1 $1 ← ($kana [[:M:]$voice$semivoice]?) $iteration
98 # Specials for katakana -- not shared with hiragana
99 va ↔ ワ\u3099 ;
100 vi ↔ ヰ\u3099 ;
101 ve ↔ ヱ\u3099 ;
102 vo ↔ ヲ\u3099 ;
103 '~ka' ↔ ヵ ;
104 '~ke' ↔ ヶ ;
105 # ~~~ begin shared rules ~~~
106 #special
107 ya ← '~'ャ;
108 yi ← '~'ィ ;
109 yu ← '~'ュ;
110 ye ← '~'ェ;
111 yo ← '~'ョ;
112 #normal
113 a ↔ ア ;
114 b | '~' ← ヒ \u3099} $small_y ;
115 by } $vowel → ヒ\u3099 | '~y' ;
116 ba ↔ ハ\u3099 ;
117 bi ↔ ヒ\u3099 ;
118 bu ↔ フ\u3099 ;
119 be ↔ ヘ\u3099 ;
120 bo ↔ ホ\u3099 ;
121 c } i → | s ;
122 c } e → | s ;
123 da ↔ タ\u3099 ;
124 di ↔ テ\u3099ィ ;
125 du ↔ テ\u3099ゥ ;
126 de ↔ テ\u3099 ;
127 do ↔ ト\u3099 ;
128 dzu ↔ ツ\u3099 ;
129 dja ← チ\u3099ャ ;
130 dji'~i' ← チ\u3099ィ ; # liu
131 dju ← チ\u3099ュ ;
132 dje ← チ\u3099ェ ;
133 djo ← チ\u3099ョ ;
134 dji ↔ チ\u3099 ;
135 dj } $vowel → チ\u3099 | '~y' ;
136 # TODO: QUESTION: use ĵĴżŻ instead of dj, dz
137 cha ← チャ ;
138 chi'~i' ← チィ ; # liu
139 chu ← チュ ;
140 che ← チェ ;
141 cho ← チョ ;
142 chi ↔ チ ;
143 ch } $vowel → チ | '~y' ;
144 e ↔ エ ;
145 g | '~' ← キ\u3099} $small_y ;
146 gy } $vowel → キ\u3099 | '~y' ;
147 ga ↔ カ\u3099 ;
148 gi ↔ キ\u3099 ;
149 gu ↔ ク\u3099 ;
150 ge ↔ ケ\u3099 ;
151 go ↔ コ\u3099 ;
152 i ↔ イ ;
153 # j } $vowel → シ\u3099 | '~y' ;
154 ja ↔ シ\u3099ャ ;
155 ji'~i' ← シ\u3099ィ ; # liu
156 ju ↔ シ\u3099ュ ;
157 je ↔ シ\u3099ェ ;
158 jo ↔ シ\u3099ョ ;
159 ji ↔ シ\u3099 ;
160 k | '~' ← キ} $small_y ;
161 ky } $vowel → キ | '~y' ;
162 ka ↔ カ ;
163 ki ↔ キ ;
164 ku ↔ ク ;
165 ke ↔ ケ ;
166 ko ↔ コ ;
167 m | '~' ← ミ} $small_y ;
168 my } $vowel → ミ | '~y' ;
169 ma ↔ マ ;
170 mi ↔ ミ ;
171 mu ↔ ム ;
172 me ↔ メ ;
173 mo ↔ モ ;
174 m } [pbfv] → ン ;
175 n | '~' ← ニ } $small_y ;
176 ny } $vowel → ニ | '~y' ;
177 na ↔ ナ ;
178 ni ↔ ニ ;
179 nu ↔ ヌ ;
180 ne ↔ ネ ;
181 no ↔ ノ ;
182 o ↔ オ ;
183 p | '~' ← ヒ\u309A } $small_y ;
184 py } $vowel → ヒ\u309A | '~y' ;
185 pa ↔ ハ\u309A ;
186 pi ↔ ヒ\u309A ;
187 pu ↔ フ\u309A ;
188 pe ↔ ヘ\u309A ;
189 po ↔ ホ\u309A ;
190 h | '~' ← ヒ } $small_y ;
191 hy } $vowel → ヒ | '~y' ;
192 ha ↔ ハ ;
193 hi ↔ ヒ ;
194 hu ↔ ヘゥ ;
195 he ↔ ヘ ;
196 ho ↔ ホ ;
197 # f | '~' ← フ } $small_y ;
198 # f } $vowel → フ | '~' ;
199 fa ↔ ファ ;
200 fi ↔ フィ ;
201 fe ↔ フェ ;
202 fo ↔ フォ ;
203 fu ↔ フ ;
204 r | '~' ← リ } $small_y ;
205 ry } $vowel → リ | '~y' ;
206 ra ↔ ラ ;
207 ri ↔ リ ;
208 ru ↔ ル ;
209 re ↔ レ ;
210 ro ↔ ロ ;
211 za ↔ サ\u3099 ;
212 zi ↔ セ\u3099ィ ;
213 zu ↔ ス\u3099 ;
214 ze ↔ セ\u3099 ;
215 zo ↔ ソ\u3099 ;
216 sa ↔ サ ;
217 si ↔ セィ ;
218 su ↔ ス ;
219 se ↔ セ ;
220 so ↔ ソ ;
221 sha ← シャ ;
222 shi'~i' ← シィ ; # liu
223 shu ← シュ ;
224 she ← シェ ;
225 sho ← ショ ;
226 shi ↔ シ ;
227 sh } $vowel → シ | '~y' ;
228 ta ↔ タ ;
229 ti ↔ ティ ;
230 tu ↔ テゥ ;
231 te ↔ テ ;
232 to ↔ ト ;
233 tsu ↔ ツ ;
234 # v } $vowel → ウ\u3099 | '~' ;
235 #'v~a' ← ウ\u3099ァ ; # liu
236 #'v~i' ← ウ\u3099ィ ; # liu
237 #'v~e' ← ウ\u3099ェ ; # liu
238 #'v~o' ← ウ\u3099ォ ; # liu
239 vu ↔ ウ\u3099 ;
240 u ↔ ウ ;
241 # w } $vowel → ウ | '~' ;
242 wa ↔ ワ ;
243 wi ↔ ヰ ;
244 wu → ウ ;
245 we ↔ ヱ ;
246 wo ↔ ヲ ;
247 ya ↔ ヤ ;
248 yi → イ ;
249 yu ↔ ユ ;
250 ye → エ ;
251 yo ↔ ヨ ;
252 # double consonants
253 #specials
254 s } sh → ッ ;
255 t } ch → ッ ;
256 #voiced
257 j } j ↔ ッ } $j_start ;
258 b } b ↔ ッ } [$h_start$f_start] $voice;
259 d } d ↔ ッ } $t_start $voice;
260 g } g ↔ ッ } $k_start $voice;
261 p } p ↔ ッ } [$h_start$f_start] $semivoice;
262 # v } v ↔ ッ } [ワヰウヱヲう] $voice ;
263 z } z ↔ ッ } $s_start $voice;
264 v } v ↔ ッ } $v_start;
265 # normal
266 k } k ↔ ッ } $k_start ;
267 m } m ↔ ッ } $m_start ;
268 n } n ↔ ッ } $n_start ;
269 h } h ↔ ッ } $h_start ;
270 f } f ↔ ッ } $f_start ;
271 r } r ↔ ッ } $r_start ;
272 t } t ↔ ッ } $t_start ;
273 s } s ↔ ッ } $s_start ;
274 w } w ↔ ッ } $w_start;
275 y } y ↔ ッ } $y_start;
276 # completeness
277 x } x → ッ ;
278 c } k → ッ ;
279 c } c → ッ ;
280 c } q → ッ ;
281 l } l → ッ ;
282 q } q → ッ ;
283 # y } y → ッ ;
284 # w } w → ッ ;
285 # prolonged vowel mark. this indicates a doubling of
286 # the preceding vowel sound
287 #a ← a { ー ; # liu
288 #e ← e { ー ; # liu
289 #i ← i { ー ; # liu
290 #o ← o { ー ; # liu
291 #u ← u { ー ; # liu
292 $macron ↔ ー ;
293 # small forms
294 '~a' ↔ ァ ;
295 '~i' ↔ ィ ;
296 '~u' ↔ ゥ ;
297 '~e' ↔ ェ ;
298 '~o' ↔ ォ ;
299 '~tsu' ↔ ッ ;
300 '~wa' ↔ ヮ ;
301 '~ya' ↔ ャ ;
302 '~yi' → ィ ;
303 '~yu' ↔ ュ ;
304 '~ye' → ェ ;
305 '~yo' ↔ ョ ;
306 # iteration marks
307 # TODO: make more accurate
308 j $1 ← sh (y* $vowel) {ヽ$voice ;
309 dj $1 ← ch (y* $vowel) {ヽ$voice ;
310 dz $1 ← ts (y* $vowel) {ヽ$voice ;
311 g $1 ← k (y* $vowel) {ヽ$voice ;
312 z $1 ← s (y* $vowel) {ヽ$voice ;
313 d $1 ← t (y* $vowel) {ヽ$voice ;
314 h $1 ← b (y* $vowel) {ヽ$voice ;
315 v $1 ← w (y* $vowel) {ヽ$voice ;
316 sh $1 ← sh (y* $vowel) {ヽ$voice ;
317 j $1 ← j (y* $vowel) {ヽ$voice ;
318 ch $1 ← ch (y* $vowel) {ヽ$voice ;
319 dj $1 ← dj(y* $vowel) {ヽ$voice ;
320 ts $1 ← ts (y* $vowel) {ヽ$voice ;
321 dz $1 ← dz (y* $vowel) {ヽ$voice ;
322 $1 ← ($consonant y* $vowel) {ヽ$voice? ;
323 $1 ← (.) {ヽ $voice? ; # otherwise repeat last character
324 ← ヽ $voice? ; # delete if no characters found
325 # h- rule: lengthens vowel if not followed by a vowel.
326 # At the point this is applied, latin [cons]?vowel sequences
327 # have been converted to katakana in NFD form.
328 $voweled_basekana [\u3099 \u309A]? { h → ー ;
329 # one-way latin- → kana rules. these do not occur in
330 # well-formed romaji representing actual japanese text.
331 # their purpose is to make all romaji map to kana of
332 # some sort.
333 # the following are not really necessary, but produce
334 # slightly more natural results.
335 cy → セィ ;
336 dy → テ\u3099ィ ;
337 hy → ヒ ;
338 sy → セィ ;
339 ty → ティ ;
340 zy → セ\u3099ィ ;
341 h → ヘ ;
342 # isolated consonants listed here so as not to mask
343 # longer rules above.
344 ch → チ;
345 sh → シ ;
346 dz → ツ\u3099 ;
347 dj → チ\u3099;
348 b → フ\u3099 ;
349 d → テ\u3099 ;
350 g → ク\u3099 ;
351 k → ク ;
352 m → ム ;
353 n'' ← ン } $n_quoter ;
354 n ↔ ン ;
355 p → フ\u309A ;
356 r → ル ;
357 s → ス ;
358 t → テ ;
359 y → イ ;
360 z → ス\u3099 ;
361 v → ウ\u3099 ;
362 f → フ;
363 j → シ\u3099;
364 w → ウ;
365 ß → | ss ;
366 æ → | e ;
367 ð → | d ;
368 ø → | u ;
369 þ → | th ;
370 # simple substitutions using backup
371 c → | k ;
372 l → | r ;
373 q → | k ;
374 x → | ks ;
375 # ~~~ END shared rules ~~~
376 #------------------------------------------------------
377 # Final cleanup
378 '~' → ; # delete stray tildes between letters
379 [:Katakana:] { '' } [:Latin:] → ; # delete stray quotes between letters
380 # [ʾ[:Nonspacing Mark:]-[\u3099-゜]] → ; # delete any non-spacing marks that we didn't use
381 :: NFC (NFD) ;
382 :: ([[:Katakana:][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] halfwidth-fullwidth);
383 # note: a global filter is more efficient, but MUST include all source chars!!
384 #:: ([\u0000-\u007E 、。 \u3099-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:nonspacing mark:]]);
385 # MINIMAL FILTER GENERATED FOR: Latin-Katakana BACKWARD
386 :: ( [[\ -~¢-£¥-¦¬\u0304₩。-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ│-○][~、-。がぎぐげござじずぜぞだぢづでどば-ぱび-ぴぶ-ぷべ-ぺぼ-ぽゔ\u3099-゛ゞァ-ヺー-ヾ][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] ) ;
387 # eof
388