]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | # © 2016 and later: Unicode, Inc. and others. |
2 | # License & terms of use: http://www.unicode.org/copyright.html#License | |
3 | # | |
73c04bcf | 4 | # File: Latin_ConjoiningJamo.txt |
f3c0d7a5 | 5 | # Generated from CLDR |
73c04bcf | 6 | # |
2ca993e8 A |
7 | |
8 | # Follows the Ministry of Culture and Tourism romanization: see http://www.korea.net/korea/kor_loca.asp?code=A020303 | |
9 | # http://www.unicode.org/cldr/transliteration_guidelines.html#Korean | |
10 | #- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in | |
11 | #- the INDEX file. This transliterator is, by itself, not | |
12 | #- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or | |
13 | #- inverses thereof. | |
14 | # Transliteration from Latin characters to Korean script is done in | |
15 | # two steps: Latin to Jamo, then Jamo to Hangul. The Jamo-Hangul | |
16 | # transliteration is done algorithmically following Unicode 3.0 | |
17 | # section 3.11. This file implements the Latin to Jamo | |
18 | # transliteration using rules. | |
19 | # Jamo occupy the block 1100-11FF. Within this block there are three | |
20 | # groups of characters: initial consonants or choseong (I), medial | |
21 | # vowels or jungseong (M), and trailing consonants or jongseong (F). | |
22 | # Standard Korean syllables are of the form I+M+F*. | |
23 | # Section 3.11 describes the use of 'filler' jamo to convert | |
24 | # nonstandard syllables to standard form: the choseong filler 115F and | |
25 | # the junseong filler 1160. In this transliterator, we will not use | |
26 | # 115F or 1160. | |
27 | # We will, however, insert two 'null' jamo to make foreign words | |
28 | # conform to Korean syllable structure. These are the null initial | |
29 | # consonant 110B (IEUNG) and the null vowel 1173 (EU). In Latin text, | |
30 | # we will use the separator in order to disambiguate strings, | |
31 | # e.g. "kan-ggan" (initial GG) vs. "kanggan" (final NG + initial G). | |
32 | # We will not use all of the characters in the jamo block. We will | |
33 | # only use the 19 initials, 21 medials, and 27 finals possessing a | |
34 | # jamo short name as defined in section 4.4 of the Unicode book. | |
35 | # Rules of thumb. These guidelines provide the basic framework | |
36 | # for the rules. They are phrased in terms of Latin-Jamo transliteration. | |
37 | # The Jamo-Latin rules derive from these, since the Jamo-Latin rules are | |
38 | # just context-free transliteration of jamo to corresponding short names, | |
39 | # with the addition of separators to maintain round-trip integrity | |
40 | # in the context of the Latin-Jamo rules. | |
41 | # A sequence of vowels: | |
42 | # - Take the longest sequence you can. If there are too many, or you don't | |
43 | # have a starting consonant, introduce a 110B necessary. | |
44 | # A sequence of consonants. | |
45 | # - First join the double consonants: G + G -→ GG | |
46 | # - In the remaining list, | |
47 | # -- If there is no preceding vowel, take the first consonant, and insert EU | |
48 | # after it. Continue with the rest of the consonants. | |
49 | # -- If there is one consonant, attach to the following vowel | |
50 | # -- If there are two consonants and a following vowel, attach one to the | |
51 | # preceeding vowel, and one to the following vowel. | |
52 | # -- If there are more than two consonants, join the first two together if you | |
53 | # can: L + G =→ LG | |
54 | # -- If you still end up with more than 2 consonants, insert EU after the | |
55 | # first one, and continue with the rest of the consonants. | |
56 | #---------------------------------------------------------------------- | |
57 | # Variables | |
58 | # Some latin consonants or consonant pairs only occur as initials, and | |
59 | # some only as finals, but some occur as both. This makes some jamo | |
60 | # consonants ambiguous when transliterated into latin. | |
61 | # Initial only: IEUNG BB DD JJ R | |
62 | # Final only: BS GS L LB LG LH LM LP LS LT NG NH NJ | |
63 | # Initial and Final: B C D G GG H J K M N P S SS T | |
73c04bcf | 64 | $Gi = ᄀ; |
46f4442e | 65 | $KKi = ᄁ; |
73c04bcf A |
66 | $Ni = ᄂ; |
67 | $Di = ᄃ; | |
46f4442e A |
68 | $TTi = ᄄ; |
69 | $Li = ᄅ; | |
73c04bcf A |
70 | $Mi = ᄆ; |
71 | $Bi = ᄇ; | |
46f4442e | 72 | $PPi = ᄈ; |
73c04bcf A |
73 | $Si = ᄉ; |
74 | $SSi = ᄊ; | |
75 | $IEUNG = ᄋ; # null initial, inserted during Latin-Jamo | |
76 | $Ji = ᄌ; | |
46f4442e A |
77 | $JJi = ᄍ; |
78 | $CHi = ᄎ; | |
73c04bcf A |
79 | $Ki = ᄏ; |
80 | $Ti = ᄐ; | |
81 | $Pi = ᄑ; | |
82 | $Hi = ᄒ; | |
83 | $A = ᅡ; | |
84 | $AE = ᅢ; | |
85 | $YA = ᅣ; | |
86 | $YAE = ᅤ; | |
87 | $EO = ᅥ; | |
88 | $E = ᅦ; | |
89 | $YEO = ᅧ; | |
90 | $YE = ᅨ; | |
91 | $O = ᅩ; | |
92 | $WA = ᅪ; | |
93 | $WAE = ᅫ; | |
94 | $OE = ᅬ; | |
95 | $YO = ᅭ; | |
96 | $U = ᅮ; | |
46f4442e | 97 | $WO = ᅯ; |
73c04bcf A |
98 | $WE = ᅰ; |
99 | $WI = ᅱ; | |
100 | $YU = ᅲ; | |
101 | $EU = ᅳ; # null medial, inserted during Latin-Jamo | |
46f4442e | 102 | $UI = ᅴ; |
73c04bcf A |
103 | $I = ᅵ; |
104 | $Gf = ᆨ; | |
105 | $GGf = ᆩ; | |
106 | $GS = ᆪ; | |
107 | $Nf = ᆫ; | |
108 | $NJ = ᆬ; | |
109 | $NH = ᆭ; | |
110 | $Df = ᆮ; | |
111 | $L = ᆯ; | |
112 | $LG = ᆰ; | |
113 | $LM = ᆱ; | |
114 | $LB = ᆲ; | |
115 | $LS = ᆳ; | |
116 | $LT = ᆴ; | |
117 | $LP = ᆵ; | |
118 | $LH = ᆶ; | |
119 | $Mf = ᆷ; | |
120 | $Bf = ᆸ; | |
121 | $BS = ᆹ; | |
122 | $Sf = ᆺ; | |
123 | $SSf = ᆻ; | |
124 | $NG = ᆼ; | |
125 | $Jf = ᆽ; | |
126 | $Cf = ᆾ; | |
127 | $Kf = ᆿ; | |
128 | $Tf = ᇀ; | |
129 | $Pf = ᇁ; | |
130 | $Hf = ᇂ; | |
131 | $jamoInitial = [ᄀ-ᄒ]; | |
132 | $jamoMedial = [ᅡ-ᅵ]; | |
46f4442e | 133 | $latinInitial = [bcdghjklmnprst]; |
2ca993e8 | 134 | # Any character in the latin transliteration of a medial |
73c04bcf | 135 | $latinMedial = [aeiouwy]; |
2ca993e8 | 136 | # The last character of the latin transliteration of a medial |
73c04bcf | 137 | $latinMedialEnd = [aeiou]; |
2ca993e8 | 138 | # Disambiguation separator |
46f4442e | 139 | $sep = \-; |
2ca993e8 A |
140 | #---------------------------------------------------------------------- |
141 | # Jamo-Latin | |
142 | # | |
143 | # Jamo to latin is relatively simple, since it is the latin that is | |
144 | # ambiguous. Most rules are straightforward, and we encode them below | |
145 | # as simple add-on back rule, e.g.: | |
146 | # $jamoMedial {bs} → $BS; | |
147 | # becomes | |
148 | # $jamoMedial {bs} ↔ $BS; | |
149 | # | |
150 | # Furthermore, we don't care about the ordering for Jamo-Latin because | |
151 | # we are going from single characters, so we can very easily piggyback | |
152 | # on the Latin-Jamo. | |
153 | # | |
154 | # The main issue with Jamo-Latin is when to insert separators. | |
155 | # Separators are inserted to obtain correct round trip behavior. For | |
156 | # example, the sequence Ki A Gf Gi E, if transliterated to "kagge", | |
157 | # would then round trip to Ki A GGi E. To prevent this, we insert a | |
158 | # separator: "kag-ge". IMPORTANT: The need for separators depends | |
159 | # very specifically on the behavior of the Latin-Jamo rules. A change | |
160 | # in the Latin-Jamo behavior can completely change the way the | |
161 | # separator insertion must be done. | |
162 | # First try to preserve actual separators in the jamo text by doubling | |
163 | # them. This fixes problems like: | |
164 | # (Di)(A)(Ji)(U)(NG)-(IEUNG)(YEO)(Nf)(Gi)(YEO)(L) =→ dajung-yeongyeol | |
165 | # =→ (Di)(A)(Ji)(U)(NG)(IEUNG)(YEO)(Nf)(Gi)(YEO)(L). This is optional | |
166 | # -- if we don't care about losing separators in the jamo, we can delete | |
167 | # this rule. | |
729e4ab9 | 168 | $sep $sep ↔ $sep; |
2ca993e8 A |
169 | # Triple consonants. For three consonants "axxx" we insert a |
170 | # separator between the first and second "x" if XXf, Xf, and Xi all | |
171 | # exist, and we have A Xf XXi. This prevents the reverse | |
172 | # transliteration to A XXf Xi. | |
729e4ab9 | 173 | $sep ← $latinMedialEnd s {} $SSi; |
2ca993e8 A |
174 | # For vowels the rule is similar. If there is a vowel "ae" such that |
175 | # "a" by itself and "e" by itself are vowels, then we want to map A E | |
176 | # to "a-e" so as not to round trip to AE. However, in the text Ki EO | |
177 | # IEUNG E we don't need to map to "keo-e". "keoe" suffices. For | |
178 | # vowels of the form "aei", both "ae" + "i" and "a" + "ei" must be | |
179 | # tested. NOTE: These rules used to have a left context of | |
180 | # $latinInitial instead of [^$latinMedial]. The problem with this is | |
181 | # sequences where an initial IEUNG is transliterated away: | |
182 | # (IEUNG)(A)(IEUNG)(EO) =→ aeo =→ (IEUNG)(AE)(IEUNG)(O) | |
183 | # Also problems in cases like gayeo, which needs to be gaye-o | |
184 | # The hard case is a chain, like aeoeu. Normally interpreted as ae oe u. So for a-eoeu, we have to insert $sep | |
185 | # But, we don't insert between the o and the e. | |
186 | # | |
187 | # a ae | |
188 | # e eo eu | |
189 | # i | |
190 | # o oe | |
191 | # u | |
192 | # ui | |
193 | # wa wae we wi | |
194 | # yae ya yeo ye yo yu | |
195 | # These are simple, since they can't chain. Note that we don't handle extreme cases like [ga][eo][e][o] | |
729e4ab9 A |
196 | $sep ← a {} [$E $EO $EU]; |
197 | $sep ← [^aow] e {} [$O $OE]; | |
198 | $sep ← [^aowy] e {} [$U $UI]; | |
199 | $sep ← [^ey] o {} [$E $EO $EU]; | |
200 | $sep ← [^y] u {} [$I]; | |
2ca993e8 | 201 | # Similar to the above, but with an intervening $IEUNG. |
729e4ab9 A |
202 | $sep ← [^$latinMedial] [y] e {} $IEUNG [$O $OE]; |
203 | $sep ← [^$latinMedial] e {} $IEUNG [$O $OE $U]; | |
204 | $sep ← [^$latinMedial] [o a] {} $IEUNG [$E $EO $EU]; | |
205 | $sep ← [^$latinMedial] [w y] a {} $IEUNG [$E $EO $EU]; | |
2ca993e8 A |
206 | # Single finals followed by IEUNG. The jamo sequence A Xf IEUNG E, |
207 | # where Xi also exists, must be transliterated as "ax-e" to prevent | |
208 | # the round trip conversion to A Xi E. | |
729e4ab9 A |
209 | $sep ← $latinMedialEnd b {} $IEUNG $jamoMedial; |
210 | $sep ← $latinMedialEnd d {} $IEUNG $jamoMedial; | |
211 | $sep ← $latinMedialEnd g {} $IEUNG $jamoMedial; | |
212 | $sep ← $latinMedialEnd h {} $IEUNG $jamoMedial; | |
213 | $sep ← $latinMedialEnd j {} $IEUNG $jamoMedial; | |
214 | $sep ← $latinMedialEnd k {} $IEUNG $jamoMedial; | |
215 | $sep ← $latinMedialEnd m {} $IEUNG $jamoMedial; | |
216 | $sep ← $latinMedialEnd n {} $IEUNG $jamoMedial; | |
217 | $sep ← $latinMedialEnd p {} $IEUNG $jamoMedial; | |
218 | $sep ← $latinMedialEnd s {} $IEUNG $jamoMedial; | |
219 | $sep ← $latinMedialEnd t {} $IEUNG $jamoMedial; | |
220 | $sep ← $latinMedialEnd l {} $IEUNG $jamoMedial; | |
2ca993e8 A |
221 | # Double finals followed by IEUNG. Similar to the single finals |
222 | # followed by IEUNG. Any latin consonant pair X Y, between medials, | |
223 | # that we would split by Latin-Jamo, we must handle when it occurs as | |
224 | # part of A XYf IEUNG E, to prevent round trip conversion to A Xf Yi E | |
729e4ab9 A |
225 | $sep ← $latinMedialEnd b s {} $IEUNG $jamoMedial; |
226 | $sep ← $latinMedialEnd k k {} $IEUNG $jamoMedial; | |
227 | $sep ← $latinMedialEnd g s {} $IEUNG $jamoMedial; | |
228 | $sep ← $latinMedialEnd l b {} $IEUNG $jamoMedial; | |
229 | $sep ← $latinMedialEnd l g {} $IEUNG $jamoMedial; | |
230 | $sep ← $latinMedialEnd l h {} $IEUNG $jamoMedial; | |
231 | $sep ← $latinMedialEnd l m {} $IEUNG $jamoMedial; | |
232 | $sep ← $latinMedialEnd l p {} $IEUNG $jamoMedial; | |
233 | $sep ← $latinMedialEnd l s {} $IEUNG $jamoMedial; | |
234 | $sep ← $latinMedialEnd l t {} $IEUNG $jamoMedial; | |
235 | $sep ← $latinMedialEnd n g {} $IEUNG $jamoMedial; | |
236 | $sep ← $latinMedialEnd n h {} $IEUNG $jamoMedial; | |
237 | $sep ← $latinMedialEnd n j {} $IEUNG $jamoMedial; | |
238 | $sep ← $latinMedialEnd s s {} $IEUNG $jamoMedial; | |
239 | $sep ← $latinMedialEnd ch {} $IEUNG $jamoMedial; | |
2ca993e8 A |
240 | # Split doubles. Text of the form A Xi Xf E, where XXi also occurs, |
241 | # we transliterate as "ax-xe" to prevent round trip transliteration as | |
242 | # A XXi E. | |
729e4ab9 A |
243 | $sep ← $latinMedialEnd j {} $Ji $jamoMedial; |
244 | $sep ← $latinMedialEnd k {} $Ki $jamoMedial; | |
245 | $sep ← $latinMedialEnd s {} $Si $jamoMedial; | |
2ca993e8 A |
246 | # XYY. This corresponds to the XYY rule in Latin-Jamo. By default |
247 | # Latin-Jamo maps "xyy" to Xf YYi, to keep YY together. As a result, | |
248 | # "xyy" forms that correspond to XYf Yi must be transliterated as | |
249 | # "xy-y". | |
729e4ab9 A |
250 | $sep ← $latinMedialEnd b s {} [$Si $SSi]; |
251 | $sep ← $latinMedialEnd g s {} [$Si $SSi]; | |
252 | $sep ← $latinMedialEnd l b {} [$Bi]; | |
253 | $sep ← $latinMedialEnd l g {} [$Gi]; | |
254 | $sep ← $latinMedialEnd l s {} [$Si $SSi]; | |
255 | $sep ← $latinMedialEnd n g {} [$Gi]; | |
256 | $sep ← $latinMedialEnd n j {} [$Ji $JJi]; | |
2ca993e8 A |
257 | # $sep ← $latinMedialEnd l {} [$PPi]; |
258 | # $sep ← $latinMedialEnd l {} [$TTi]; | |
729e4ab9 A |
259 | $sep ← $latinMedialEnd l p {} [$Pi]; |
260 | $sep ← $latinMedialEnd l t {} [$Ti]; | |
261 | $sep ← $latinMedialEnd k {} [$KKi $Ki]; | |
262 | $sep ← $latinMedialEnd p {} $Pi; | |
263 | $sep ← $latinMedialEnd t {} $Ti; | |
264 | $sep ← $latinMedialEnd c {} [$Hi]; | |
2ca993e8 A |
265 | # Deletion of IEUNG is handled below. |
266 | #---------------------------------------------------------------------- | |
267 | # Latin-Jamo | |
268 | # [Basic, context-free Jamo-Latin rules are embedded here too. See | |
269 | # above.] | |
270 | # Split digraphs: Text of the form 'axye', where 'xy' is a final | |
271 | # digraph, 'x' is a final (by itself), 'y' is an initial, and 'a' and | |
272 | # 'e' are medials, we want to transliterate this as A Xf Yi E rather | |
273 | # than A XYf IEUNG E. We do NOT include text of the form "axxe", | |
274 | # since that is handled differently below. These rules are generated | |
275 | # programmatically from the jamo data. | |
729e4ab9 A |
276 | $jamoMedial {b s} $latinMedial → $Bf $Si; |
277 | $jamoMedial {g s} $latinMedial → $Gf $Si; | |
278 | $jamoMedial {l b} $latinMedial → $L $Bi; | |
279 | $jamoMedial {l g} $latinMedial → $L $Gi; | |
280 | $jamoMedial {l h} $latinMedial → $L $Hi; | |
281 | $jamoMedial {l m} $latinMedial → $L $Mi; | |
282 | $jamoMedial {l p} $latinMedial → $L $Pi; | |
283 | $jamoMedial {l s} $latinMedial → $L $Si; | |
284 | $jamoMedial {l t} $latinMedial → $L $Ti; | |
285 | $jamoMedial {n g} $latinMedial → $Nf $Gi; | |
286 | $jamoMedial {n h} $latinMedial → $Nf $Hi; | |
287 | $jamoMedial {n j} $latinMedial → $Nf $Ji; | |
2ca993e8 A |
288 | # Single consonants are initials: Text of the form 'axe', where 'x' |
289 | # can be an initial or a final, and 'a' and 'e' are medials, we want | |
290 | # to transliterate as A Xi E rather than A Xf IEUNG E. | |
729e4ab9 A |
291 | $jamoMedial {b} $latinMedial → $Bi; |
292 | $jamoMedial {ch} $latinMedial → $CHi; | |
293 | $jamoMedial {d} $latinMedial → $Di; | |
294 | $jamoMedial {g} $latinMedial → $Gi; | |
295 | $jamoMedial {h} $latinMedial → $Hi; | |
296 | $jamoMedial {j} $latinMedial → $Ji; | |
297 | $jamoMedial {k} $latinMedial → $Ki; | |
298 | $jamoMedial {m} $latinMedial → $Mi; | |
299 | $jamoMedial {n} $latinMedial → $Ni; | |
300 | $jamoMedial {p} $latinMedial → $Pi; | |
301 | $jamoMedial {s} $latinMedial → $Si; | |
302 | $jamoMedial {t} $latinMedial → $Ti; | |
303 | $jamoMedial {l} $latinMedial → $Li; | |
2ca993e8 A |
304 | # Doubled initials. The sequence "axxe", where XX exists as an initial |
305 | # (XXi), and also Xi and Xf exist (true of all digraphs XX), we want | |
306 | # to transliterate as A XXi E, rather than split to A Xf Xi E. | |
729e4ab9 A |
307 | $jamoMedial {p p} $latinMedial → $PPi; |
308 | $jamoMedial {t t} $latinMedial → $TTi; | |
309 | $jamoMedial {j j} $latinMedial → $JJi; | |
310 | $jamoMedial {k k} $latinMedial → $KKi; | |
311 | $jamoMedial {s s} $latinMedial → $SSi; | |
2ca993e8 A |
312 | # XYY. Because doubled consonants bind more strongly than XY |
313 | # consonants, we must handle the sequence "axyy" specially. Here XYf | |
314 | # and YYi must exist. In these cases, we map to Xf YYi rather than | |
315 | # XYf. | |
316 | # However, there are two special cases. | |
729e4ab9 A |
317 | $jamoMedial {lp} p p → $LP; |
318 | $jamoMedial {lt} t t → $LT; | |
2ca993e8 | 319 | # End special cases |
729e4ab9 A |
320 | $jamoMedial {b} s s → $Bf; |
321 | $jamoMedial {g} s s → $Gf; | |
322 | $jamoMedial {l} b b → $L; | |
323 | $jamoMedial {l} g g → $L; | |
324 | $jamoMedial {l} s s → $L; | |
325 | $jamoMedial {l} t t → $L; | |
326 | $jamoMedial {l} p p → $L; | |
327 | $jamoMedial {n} g g → $Nf; | |
328 | $jamoMedial {n} j j → $Nf; | |
2ca993e8 A |
329 | # Finals: Attach consonant with preceding medial to preceding medial. |
330 | # Do this BEFORE mapping consonants to initials. Longer keys must | |
331 | # precede shorter keys that they start with, e.g., the rule for 'bs' | |
332 | # must precede 'b'. | |
333 | # [BASIC Jamo-Latin FINALS handled here. Order irrelevant within this | |
334 | # block for Jamo-Latin.] | |
729e4ab9 A |
335 | $jamoMedial {bs} ↔ $BS; |
336 | $jamoMedial {b} ↔ $Bf; | |
337 | $jamoMedial {ch} ↔ $Cf; | |
338 | $jamoMedial {c} → $Cf; | |
339 | $jamoMedial {d} ↔ $Df; | |
340 | $jamoMedial {kk} ↔ $GGf; | |
341 | $jamoMedial {gs} ↔ $GS; | |
342 | $jamoMedial {g} ↔ $Gf; | |
343 | $jamoMedial {h} ↔ $Hf; | |
344 | $jamoMedial {j} ↔ $Jf; | |
345 | $jamoMedial {k} ↔ $Kf; | |
51004dcb | 346 | $jamoMedial {lb} ↔ $LB; $jamoMedial {lg} ↔ $LG; |
729e4ab9 A |
347 | $jamoMedial {lh} ↔ $LH; |
348 | $jamoMedial {lm} ↔ $LM; | |
349 | $jamoMedial {lp} ↔ $LP; | |
350 | $jamoMedial {ls} ↔ $LS; | |
351 | $jamoMedial {lt} ↔ $LT; | |
352 | $jamoMedial {l} ↔ $L; | |
353 | $jamoMedial {m} ↔ $Mf; | |
354 | $jamoMedial {ng} ↔ $NG; | |
355 | $jamoMedial {nh} ↔ $NH; | |
356 | $jamoMedial {nj} ↔ $NJ; | |
357 | $jamoMedial {n} ↔ $Nf; | |
358 | $jamoMedial {p} ↔ $Pf; | |
359 | $jamoMedial {ss} ↔ $SSf; | |
360 | $jamoMedial {s} ↔ $Sf; | |
361 | $jamoMedial {t} ↔ $Tf; | |
2ca993e8 A |
362 | # Initials: Attach single consonant to following medial. Do this |
363 | # AFTER mapping finals. Longer keys must precede shorter keys that | |
364 | # they start with, e.g., the rule for 'gg' must precede 'g'. | |
365 | # [BASIC Jamo-Latin INITIALS handled here. Order irrelevant within | |
366 | # this block for Jamo-Latin.] | |
729e4ab9 A |
367 | {kk} $latinMedial ↔ $KKi; |
368 | {g} $latinMedial ↔ $Gi; | |
369 | {n} $latinMedial ↔ $Ni; | |
370 | {tt} $latinMedial ↔ $TTi; | |
371 | {d} $latinMedial ↔ $Di; | |
372 | {l} $latinMedial ↔ $Li; | |
373 | {m} $latinMedial ↔ $Mi; | |
374 | {pp} $latinMedial ↔ $PPi; | |
375 | {b} $latinMedial ↔ $Bi; | |
376 | {ss} $latinMedial ↔ $SSi; | |
377 | {s} $latinMedial ↔ $Si; | |
378 | {jj} $latinMedial ↔ $JJi; | |
379 | {j} $latinMedial ↔ $Ji; | |
380 | {ch} $latinMedial ↔ $CHi; | |
381 | {c} $latinMedial → $CHi; | |
382 | {k} $latinMedial ↔ $Ki; | |
383 | {t} $latinMedial ↔ $Ti; | |
384 | {p} $latinMedial ↔ $Pi; | |
385 | {h} $latinMedial ↔ $Hi; | |
2ca993e8 A |
386 | # 'r' in final position. Because of the equivalency of the 'l' and |
387 | # 'r' jamo (the glyphs are the same), we try to provide the same | |
388 | # equivalency in Latin-Jamo. The 'l' to 'r' conversion is handled | |
389 | # below. If we see an 'r' in an apparent final position, treat it | |
390 | # like 'l'. For example, "karka" =→ Ki A R EU Ki A without this rule. | |
391 | # Instead, we want Ki A L Ki A. | |
392 | # Initial + Final: If we match the next rule, we have initial then | |
393 | # final consonant with no intervening medial. We insert the null | |
394 | # vowel BEFORE it to create a well-formed syllable. (In the next rule | |
395 | # we insert a null vowel AFTER an anomalous initial.) | |
396 | # Initial + X: This block matches an initial consonant not followed by | |
397 | # a medial. We insert the null vowel after it. We handle double | |
398 | # initials explicitly here; for single initial consonants we insert EU | |
399 | # (as Latin) after them and let standard rules do the rest. | |
400 | # BREAKS ROUND TRIP INTEGRITY | |
729e4ab9 A |
401 | kk → $KKi $EU; |
402 | tt → $TTi $EU; | |
403 | pp → $PPi $EU; | |
404 | ss → $SSi $EU; | |
405 | jj → $JJi $EU; | |
406 | ch → $CHi $EU; | |
407 | ([lbdghjkmnpst]) → | $1 eu; | |
2ca993e8 A |
408 | # X + Final: Finally we have to deal with a consonant that can only be |
409 | # interpreted as a final (not an initial) and which is preceded | |
410 | # neither by an initial nor a medial. It is the start of the | |
411 | # syllable, but cannot be. Most of these will already be handled by | |
412 | # the above rules. 'bs' splits into Bi EU Sf. Similar for 'gs' 'ng' | |
413 | # 'nh' 'nj'. The only problem is 'l' and digraphs starting with 'l'. | |
414 | # For this isolated case, we could add a null initial and medial, | |
415 | # which would give "la" =→ IEUNG EU L IEUNG A, for example. A more | |
416 | # economical solution is to transliterate isolated "l" (that is, | |
417 | # initial "l") to "r". (Other similar conversions of consonants that | |
418 | # occur neither as initials nor as finals are handled below.) | |
729e4ab9 | 419 | l → | r; |
2ca993e8 A |
420 | # Medials. If a medial is preceded by an initial, then we proceed |
421 | # normally. As usual, longer keys must precede shorter ones. | |
422 | # [BASIC Jamo-Latin MEDIALS handled here. Order irrelevant within | |
423 | # this block for Jamo-Latin.] | |
424 | # | |
425 | # a e i o u | |
426 | # ae | |
427 | # eo eu | |
428 | # oe | |
429 | # ui | |
430 | # wa we wi | |
431 | # wae | |
432 | # yae ya yeo ye yo yu | |
729e4ab9 A |
433 | $jamoInitial {ae} ↔ $AE; |
434 | $jamoInitial {a} ↔ $A; | |
435 | $jamoInitial {eo} ↔ $EO; | |
436 | $jamoInitial {eu} ↔ $EU; | |
437 | $jamoInitial {e} ↔ $E; | |
438 | $jamoInitial {i} ↔ $I; | |
439 | $jamoInitial {oe} ↔ $OE; | |
440 | $jamoInitial {o} ↔ $O; | |
441 | $jamoInitial {ui} ↔ $UI; | |
442 | $jamoInitial {u} ↔ $U; | |
443 | $jamoInitial {wae} ↔ $WAE; | |
444 | $jamoInitial {wa} ↔ $WA; | |
445 | $jamoInitial {wo} ↔ $WO; | |
446 | $jamoInitial {we} ↔ $WE; | |
447 | $jamoInitial {wi} ↔ $WI; | |
448 | $jamoInitial {yae} ↔ $YAE; | |
449 | $jamoInitial {ya} ↔ $YA; | |
450 | $jamoInitial {yeo} ↔ $YEO; | |
451 | $jamoInitial {ye} ↔ $YE; | |
452 | $jamoInitial {yo} ↔ $YO; | |
453 | $jamoInitial {yu} ↔ $YU; | |
2ca993e8 A |
454 | # We may see an anomalous isolated 'w' or 'y'. In that case, we |
455 | # interpret it as 'wi' and 'yu', respectively. | |
456 | # BREAKS ROUND TRIP INTEGRITY | |
729e4ab9 A |
457 | $jamoInitial {w} → | wi; |
458 | $jamoInitial {y} → | yu; | |
2ca993e8 A |
459 | # Otherwise, insert a null consonant IEUNG before the medial (which is |
460 | # still an untransliterated latin vowel). | |
729e4ab9 | 461 | ($latinMedial) → $IEUNG | $1; |
2ca993e8 A |
462 | # Convert non-jamo latin consonants to equivalents. These occur as |
463 | # neither initials nor finals in jamo. 'l' occurs as a final, but not | |
464 | # an initial; it is handled above. The following letters (left hand | |
465 | # side) will never be output by Jamo-Latin. | |
729e4ab9 A |
466 | f → | p; |
467 | q → | k; | |
468 | v → | b; | |
469 | x → | ks; | |
470 | z → | s; | |
471 | r → | l; | |
472 | c → | k; | |
2ca993e8 | 473 | # Delete separators (Latin-Jamo). |
729e4ab9 | 474 | $sep → ; |
2ca993e8 A |
475 | # Delete null consonants (Jamo-Latin). Do NOT delete null EU vowels, |
476 | # since these may also occur in text. | |
729e4ab9 | 477 | ← $IEUNG; |
2ca993e8 A |
478 | #- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in |
479 | #- the INDEX file. This transliterator is, by itself, not | |
480 | #- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or | |
481 | #- inverses thereof. | |
482 | # eof | |
483 |