1 # ***************************************************************************
3 # * Copyright (C) 2004-2008, International Business Machines
4 # * Corporation; Unicode, Inc.; and others. All Rights Reserved.
6 # ***************************************************************************
7 # File: Latin_Katakana.txt
10 :: [[ᄀ-ᄒᄚᄡ\u1160-ᅵᆪᆬ-ᆭᆰ-ᆵ<-↓│■○\u3000-。「-」\u3099-\u309Aァ-ロワヲ-ヴヷヺ-ー!-~¢-₩][',.A-Za-z~À-ÖØ-öø-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0304Ӣ-ӣӮ-ӯḀ-ẙẠ-ỹᾱᾹῑῙῡῩK-Å]] ;
11 :: [:Latin:] fullwidth-halfwidth ();
13 :: Lower (); # whenever transliterating from cased to uncased script, include this
15 $consonant = [bcdfghjklmnpqrstvwxyz] ;
19 $semivoice = [\u309A゜];
20 $k_start = [カキクケコかきくけこ] ;
21 $s_start = [サシスセソさしすせそ] ;
22 $j_start = [シし] $voice ;
23 $t_start = [タチツテトたちつてと] ;
24 $n_start = [ナニヌネノンなにぬねの] ;
25 $h_start = [ハヒヘホはひへほ] ;
27 $m_start = [マミムメモまみむめも] ;
29 $r_start = [ラリルレロらりるれろ] ;
30 $w_start = [ワヰヱヲわゐゑを] ;
31 $v_start = [ワヰヱヲ]\u3099 ;
32 $n_quoter = [ア イ ウ エ オ ナ ニ ヌ ネ ノ ヤ ユ ヨ ン] ;
35 $voweled_basekana = [ァ-オカキクケコサシスセソタチッツテトナ-ノハヒフヘホマ-ヲヵヶ] ;
50 b | '~' < ヒ \u3099} $small_y ;
51 by } $vowel > ヒ\u3099 | '~y' ;
66 dji'~i' < チ\u3099ィ ; # liu
71 dj } $vowel > チ\u3099 | '~y' ;
78 ch } $vowel > チ | '~y' ;
80 g | '~' < キ\u3099} $small_y ;
81 gy } $vowel > キ\u3099 | '~y' ;
89 ji'~i' < シ\u3099ィ ; # liu
94 k | '~' < キ} $small_y ;
95 ky } $vowel > キ | '~y' ;
101 m | '~' < ミ} $small_y ;
102 my } $vowel > ミ | '~y' ;
109 n | '~' < ニ } $small_y ;
110 ny } $vowel > ニ | '~y' ;
117 p | '~' < ヒ\u309A } $small_y ;
118 py } $vowel > ヒ\u309A | '~y' ;
124 h | '~' < ヒ } $small_y ;
125 hy } $vowel > ヒ | '~y' ;
136 r | '~' < リ } $small_y ;
137 ry } $vowel > リ | '~y' ;
159 sh } $vowel > シ | '~y' ;
180 j } j <> ッ } $j_start ;
181 b } b <> ッ } [$h_start$f_start] $voice;
182 d } d <> ッ } $t_start $voice;
183 g } g <> ッ } $k_start $voice;
184 p } p <> ッ } [$h_start$f_start] $semivoice;
185 z } z <> ッ } $s_start $voice;
186 v } v <> ッ } $v_start;
187 k } k <> ッ } $k_start ;
188 m } m <> ッ } $m_start ;
189 n } n <> ッ } $n_start ;
190 h } h <> ッ } $h_start ;
191 f } f <> ッ } $f_start ;
192 r } r <> ッ } $r_start ;
193 t } t <> ッ } $t_start ;
194 s } s <> ッ } $s_start ;
195 w } w <> ッ } $w_start;
196 y } y <> ッ } $y_start;
216 j $1 < sh (y* $vowel) {ヽ$voice ;
217 dj $1 < ch (y* $vowel) {ヽ$voice ;
218 dz $1 < ts (y* $vowel) {ヽ$voice ;
219 g $1 < k (y* $vowel) {ヽ$voice ;
220 z $1 < s (y* $vowel) {ヽ$voice ;
221 d $1 < t (y* $vowel) {ヽ$voice ;
222 h $1 < b (y* $vowel) {ヽ$voice ;
223 v $1 < w (y* $vowel) {ヽ$voice ;
224 sh $1 < sh (y* $vowel) {ヽ$voice ;
225 j $1 < j (y* $vowel) {ヽ$voice ;
226 ch $1 < ch (y* $vowel) {ヽ$voice ;
227 dj $1 < dj(y* $vowel) {ヽ$voice ;
228 ts $1 < ts (y* $vowel) {ヽ$voice ;
229 dz $1 < dz (y* $vowel) {ヽ$voice ;
230 $1 < ($consonant y* $vowel) {ヽ$voice? ;
231 $1 < (.) {ヽ $voice? ; # otherwise repeat last character
232 < ヽ $voice? ; # delete if no characters found
233 $voweled_basekana [\u3099 \u309A]? { h > ー ;
250 n'' < ン } $n_quoter ;
271 '~' > ; # delete stray tildes between letters
272 [:Katakana:] { '' } [:Latin:] > ; # delete stray quotes between letters
274 :: ([[:Katakana:][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] halfwidth-fullwidth);
275 :: ( [[\ -~¢-£¥-¦¬\u0304₩。-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ│-○][~、-。がぎぐげござじずぜぞだぢづでどば-ぱび-ぴぶ-ぷべ-ぺぼ-ぽゔ\u3099-゛ゞァ-ヺー-ヾ][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] ) ;