git.saurik.com Git - apple/icu.git/blame - icuSources/data/translit/Latin

Commit	Line	Data
f3c0d7a5 A	1	# © 2016 and later: Unicode, Inc. and others.
	2	# License & terms of use: http://www.unicode.org/copyright.html#License
	3	#
73c04bcf	4	# File: Latin_ConjoiningJamo.txt
f3c0d7a5	5	# Generated from CLDR
73c04bcf	6	#
2ca993e8 A	7
	8	# Follows the Ministry of Culture and Tourism romanization: see http://www.korea.net/korea/kor_loca.asp?code=A020303
	9	# http://www.unicode.org/cldr/transliteration_guidelines.html#Korean
	10	#- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in
	11	#- the INDEX file. This transliterator is, by itself, not
	12	#- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or
	13	#- inverses thereof.
	14	# Transliteration from Latin characters to Korean script is done in
	15	# two steps: Latin to Jamo, then Jamo to Hangul. The Jamo-Hangul
	16	# transliteration is done algorithmically following Unicode 3.0
	17	# section 3.11. This file implements the Latin to Jamo
	18	# transliteration using rules.
	19	# Jamo occupy the block 1100-11FF. Within this block there are three
	20	# groups of characters: initial consonants or choseong (I), medial
	21	# vowels or jungseong (M), and trailing consonants or jongseong (F).
	22	# Standard Korean syllables are of the form I+M+F*.
	23	# Section 3.11 describes the use of 'filler' jamo to convert
	24	# nonstandard syllables to standard form: the choseong filler 115F and
	25	# the junseong filler 1160. In this transliterator, we will not use
	26	# 115F or 1160.
	27	# We will, however, insert two 'null' jamo to make foreign words
	28	# conform to Korean syllable structure. These are the null initial
	29	# consonant 110B (IEUNG) and the null vowel 1173 (EU). In Latin text,
	30	# we will use the separator in order to disambiguate strings,
	31	# e.g. "kan-ggan" (initial GG) vs. "kanggan" (final NG + initial G).
	32	# We will not use all of the characters in the jamo block. We will
	33	# only use the 19 initials, 21 medials, and 27 finals possessing a
	34	# jamo short name as defined in section 4.4 of the Unicode book.
	35	# Rules of thumb. These guidelines provide the basic framework
	36	# for the rules. They are phrased in terms of Latin-Jamo transliteration.
	37	# The Jamo-Latin rules derive from these, since the Jamo-Latin rules are
	38	# just context-free transliteration of jamo to corresponding short names,
	39	# with the addition of separators to maintain round-trip integrity
	40	# in the context of the Latin-Jamo rules.
	41	# A sequence of vowels:
	42	# - Take the longest sequence you can. If there are too many, or you don't
	43	# have a starting consonant, introduce a 110B necessary.
	44	# A sequence of consonants.
	45	# - First join the double consonants: G + G -→ GG
	46	# - In the remaining list,
	47	# -- If there is no preceding vowel, take the first consonant, and insert EU
	48	# after it. Continue with the rest of the consonants.
	49	# -- If there is one consonant, attach to the following vowel
	50	# -- If there are two consonants and a following vowel, attach one to the
	51	# preceeding vowel, and one to the following vowel.
	52	# -- If there are more than two consonants, join the first two together if you
	53	# can: L + G =→ LG
	54	# -- If you still end up with more than 2 consonants, insert EU after the
	55	# first one, and continue with the rest of the consonants.
	56	#----------------------------------------------------------------------
	57	# Variables
	58	# Some latin consonants or consonant pairs only occur as initials, and
	59	# some only as finals, but some occur as both. This makes some jamo
	60	# consonants ambiguous when transliterated into latin.
	61	# Initial only: IEUNG BB DD JJ R
	62	# Final only: BS GS L LB LG LH LM LP LS LT NG NH NJ
	63	# Initial and Final: B C D G GG H J K M N P S SS T
73c04bcf	64	$Gi = ᄀ;
46f4442e	65	$KKi = ᄁ;
73c04bcf A	66	$Ni = ᄂ;
73c04bcf A	67	$Di = ᄃ;
46f4442e A	68	$TTi = ᄄ;
46f4442e A	69	$Li = ᄅ;
73c04bcf A	70	$Mi = ᄆ;
73c04bcf A	71	$Bi = ᄇ;
46f4442e	72	$PPi = ᄈ;
73c04bcf A	73	$Si = ᄉ;
	74	$SSi = ᄊ;
	75	$IEUNG = ᄋ; # null initial, inserted during Latin-Jamo
	76	$Ji = ᄌ;
46f4442e A	77	$JJi = ᄍ;
46f4442e A	78	$CHi = ᄎ;
73c04bcf A	79	$Ki = ᄏ;
	80	$Ti = ᄐ;
	81	$Pi = ᄑ;
	82	$Hi = ᄒ;
	83	$A = ᅡ;
	84	$AE = ᅢ;
	85	$YA = ᅣ;
	86	$YAE = ᅤ;
	87	$EO = ᅥ;
	88	$E = ᅦ;
	89	$YEO = ᅧ;
	90	$YE = ᅨ;
	91	$O = ᅩ;
	92	$WA = ᅪ;
	93	$WAE = ᅫ;
	94	$OE = ᅬ;
	95	$YO = ᅭ;
	96	$U = ᅮ;
46f4442e	97	$WO = ᅯ;
73c04bcf A	98	$WE = ᅰ;
	99	$WI = ᅱ;
	100	$YU = ᅲ;
	101	$EU = ᅳ; # null medial, inserted during Latin-Jamo
46f4442e	102	$UI = ᅴ;
73c04bcf A	103	$I = ᅵ;
	104	$Gf = ᆨ;
	105	$GGf = ᆩ;
	106	$GS = ᆪ;
	107	$Nf = ᆫ;
	108	$NJ = ᆬ;
	109	$NH = ᆭ;
	110	$Df = ᆮ;
	111	$L = ᆯ;
	112	$LG = ᆰ;
	113	$LM = ᆱ;
	114	$LB = ᆲ;
	115	$LS = ᆳ;
	116	$LT = ᆴ;
	117	$LP = ᆵ;
	118	$LH = ᆶ;
	119	$Mf = ᆷ;
	120	$Bf = ᆸ;
	121	$BS = ᆹ;
	122	$Sf = ᆺ;
	123	$SSf = ᆻ;
	124	$NG = ᆼ;
	125	$Jf = ᆽ;
	126	$Cf = ᆾ;
	127	$Kf = ᆿ;
	128	$Tf = ᇀ;
	129	$Pf = ᇁ;
	130	$Hf = ᇂ;
	131	$jamoInitial = [ᄀ-ᄒ];
	132	$jamoMedial = [ᅡ-ᅵ];
46f4442e	133	$latinInitial = [bcdghjklmnprst];
2ca993e8	134	# Any character in the latin transliteration of a medial
73c04bcf	135	$latinMedial = [aeiouwy];
2ca993e8	136	# The last character of the latin transliteration of a medial
73c04bcf	137	$latinMedialEnd = [aeiou];
2ca993e8	138	# Disambiguation separator
46f4442e	139	$sep = \-;
2ca993e8 A	140	#----------------------------------------------------------------------
	141	# Jamo-Latin
	142	#
	143	# Jamo to latin is relatively simple, since it is the latin that is
	144	# ambiguous. Most rules are straightforward, and we encode them below
	145	# as simple add-on back rule, e.g.:
	146	# $jamoMedial {bs} → $BS;
	147	# becomes
	148	# $jamoMedial {bs} ↔ $BS;
	149	#
	150	# Furthermore, we don't care about the ordering for Jamo-Latin because
	151	# we are going from single characters, so we can very easily piggyback
	152	# on the Latin-Jamo.
	153	#
	154	# The main issue with Jamo-Latin is when to insert separators.
	155	# Separators are inserted to obtain correct round trip behavior. For
	156	# example, the sequence Ki A Gf Gi E, if transliterated to "kagge",
	157	# would then round trip to Ki A GGi E. To prevent this, we insert a
	158	# separator: "kag-ge". IMPORTANT: The need for separators depends
	159	# very specifically on the behavior of the Latin-Jamo rules. A change
	160	# in the Latin-Jamo behavior can completely change the way the
	161	# separator insertion must be done.
	162	# First try to preserve actual separators in the jamo text by doubling
	163	# them. This fixes problems like:
	164	# (Di)(A)(Ji)(U)(NG)-(IEUNG)(YEO)(Nf)(Gi)(YEO)(L) =→ dajung-yeongyeol
	165	# =→ (Di)(A)(Ji)(U)(NG)(IEUNG)(YEO)(Nf)(Gi)(YEO)(L). This is optional
	166	# -- if we don't care about losing separators in the jamo, we can delete
	167	# this rule.
729e4ab9	168	$sep $sep ↔ $sep;
2ca993e8 A	169	# Triple consonants. For three consonants "axxx" we insert a
	170	# separator between the first and second "x" if XXf, Xf, and Xi all
	171	# exist, and we have A Xf XXi. This prevents the reverse
	172	# transliteration to A XXf Xi.
729e4ab9	173	$sep ← $latinMedialEnd s {} $SSi;
2ca993e8 A	174	# For vowels the rule is similar. If there is a vowel "ae" such that
	175	# "a" by itself and "e" by itself are vowels, then we want to map A E
	176	# to "a-e" so as not to round trip to AE. However, in the text Ki EO
	177	# IEUNG E we don't need to map to "keo-e". "keoe" suffices. For
	178	# vowels of the form "aei", both "ae" + "i" and "a" + "ei" must be
	179	# tested. NOTE: These rules used to have a left context of
	180	# $latinInitial instead of [^$latinMedial]. The problem with this is
	181	# sequences where an initial IEUNG is transliterated away:
	182	# (IEUNG)(A)(IEUNG)(EO) =→ aeo =→ (IEUNG)(AE)(IEUNG)(O)
	183	# Also problems in cases like gayeo, which needs to be gaye-o
	184	# The hard case is a chain, like aeoeu. Normally interpreted as ae oe u. So for a-eoeu, we have to insert $sep
	185	# But, we don't insert between the o and the e.
	186	#
	187	# a ae
	188	# e eo eu
	189	# i
	190	# o oe
	191	# u
	192	# ui
	193	# wa wae we wi
	194	# yae ya yeo ye yo yu
	195	# These are simple, since they can't chain. Note that we don't handle extreme cases like [ga][eo][e][o]
729e4ab9 A	196	$sep ← a {} [$E $EO $EU];
	197	$sep ← [^aow] e {} [$O $OE];
	198	$sep ← [^aowy] e {} [$U $UI];
	199	$sep ← [^ey] o {} [$E $EO $EU];
	200	$sep ← [^y] u {} [$I];
2ca993e8	201	# Similar to the above, but with an intervening $IEUNG.
729e4ab9 A	202	$sep ← [^$latinMedial] [y] e {} $IEUNG [$O $OE];
	203	$sep ← [^$latinMedial] e {} $IEUNG [$O $OE $U];
	204	$sep ← [^$latinMedial] [o a] {} $IEUNG [$E $EO $EU];
	205	$sep ← [^$latinMedial] [w y] a {} $IEUNG [$E $EO $EU];
2ca993e8 A	206	# Single finals followed by IEUNG. The jamo sequence A Xf IEUNG E,
	207	# where Xi also exists, must be transliterated as "ax-e" to prevent
	208	# the round trip conversion to A Xi E.
729e4ab9 A	209	$sep ← $latinMedialEnd b {} $IEUNG $jamoMedial;
	210	$sep ← $latinMedialEnd d {} $IEUNG $jamoMedial;
	211	$sep ← $latinMedialEnd g {} $IEUNG $jamoMedial;
	212	$sep ← $latinMedialEnd h {} $IEUNG $jamoMedial;
	213	$sep ← $latinMedialEnd j {} $IEUNG $jamoMedial;
	214	$sep ← $latinMedialEnd k {} $IEUNG $jamoMedial;
	215	$sep ← $latinMedialEnd m {} $IEUNG $jamoMedial;
	216	$sep ← $latinMedialEnd n {} $IEUNG $jamoMedial;
	217	$sep ← $latinMedialEnd p {} $IEUNG $jamoMedial;
	218	$sep ← $latinMedialEnd s {} $IEUNG $jamoMedial;
	219	$sep ← $latinMedialEnd t {} $IEUNG $jamoMedial;
	220	$sep ← $latinMedialEnd l {} $IEUNG $jamoMedial;
2ca993e8 A	221	# Double finals followed by IEUNG. Similar to the single finals
	222	# followed by IEUNG. Any latin consonant pair X Y, between medials,
	223	# that we would split by Latin-Jamo, we must handle when it occurs as
	224	# part of A XYf IEUNG E, to prevent round trip conversion to A Xf Yi E
729e4ab9 A	225	$sep ← $latinMedialEnd b s {} $IEUNG $jamoMedial;
	226	$sep ← $latinMedialEnd k k {} $IEUNG $jamoMedial;
	227	$sep ← $latinMedialEnd g s {} $IEUNG $jamoMedial;
	228	$sep ← $latinMedialEnd l b {} $IEUNG $jamoMedial;
	229	$sep ← $latinMedialEnd l g {} $IEUNG $jamoMedial;
	230	$sep ← $latinMedialEnd l h {} $IEUNG $jamoMedial;
	231	$sep ← $latinMedialEnd l m {} $IEUNG $jamoMedial;
	232	$sep ← $latinMedialEnd l p {} $IEUNG $jamoMedial;
	233	$sep ← $latinMedialEnd l s {} $IEUNG $jamoMedial;
	234	$sep ← $latinMedialEnd l t {} $IEUNG $jamoMedial;
	235	$sep ← $latinMedialEnd n g {} $IEUNG $jamoMedial;
	236	$sep ← $latinMedialEnd n h {} $IEUNG $jamoMedial;
	237	$sep ← $latinMedialEnd n j {} $IEUNG $jamoMedial;
	238	$sep ← $latinMedialEnd s s {} $IEUNG $jamoMedial;
	239	$sep ← $latinMedialEnd ch {} $IEUNG $jamoMedial;
2ca993e8 A	240	# Split doubles. Text of the form A Xi Xf E, where XXi also occurs,
	241	# we transliterate as "ax-xe" to prevent round trip transliteration as
	242	# A XXi E.
729e4ab9 A	243	$sep ← $latinMedialEnd j {} $Ji $jamoMedial;
	244	$sep ← $latinMedialEnd k {} $Ki $jamoMedial;
	245	$sep ← $latinMedialEnd s {} $Si $jamoMedial;
2ca993e8 A	246	# XYY. This corresponds to the XYY rule in Latin-Jamo. By default
	247	# Latin-Jamo maps "xyy" to Xf YYi, to keep YY together. As a result,
	248	# "xyy" forms that correspond to XYf Yi must be transliterated as
	249	# "xy-y".
729e4ab9 A	250	$sep ← $latinMedialEnd b s {} [$Si $SSi];
	251	$sep ← $latinMedialEnd g s {} [$Si $SSi];
	252	$sep ← $latinMedialEnd l b {} [$Bi];
	253	$sep ← $latinMedialEnd l g {} [$Gi];
	254	$sep ← $latinMedialEnd l s {} [$Si $SSi];
	255	$sep ← $latinMedialEnd n g {} [$Gi];
	256	$sep ← $latinMedialEnd n j {} [$Ji $JJi];
2ca993e8 A	257	# $sep ← $latinMedialEnd l {} [$PPi];
2ca993e8 A	258	# $sep ← $latinMedialEnd l {} [$TTi];
729e4ab9 A	259	$sep ← $latinMedialEnd l p {} [$Pi];
	260	$sep ← $latinMedialEnd l t {} [$Ti];
	261	$sep ← $latinMedialEnd k {} [$KKi $Ki];
	262	$sep ← $latinMedialEnd p {} $Pi;
	263	$sep ← $latinMedialEnd t {} $Ti;
	264	$sep ← $latinMedialEnd c {} [$Hi];
2ca993e8 A	265	# Deletion of IEUNG is handled below.
	266	#----------------------------------------------------------------------
	267	# Latin-Jamo
	268	# [Basic, context-free Jamo-Latin rules are embedded here too. See
	269	# above.]
	270	# Split digraphs: Text of the form 'axye', where 'xy' is a final
	271	# digraph, 'x' is a final (by itself), 'y' is an initial, and 'a' and
	272	# 'e' are medials, we want to transliterate this as A Xf Yi E rather
	273	# than A XYf IEUNG E. We do NOT include text of the form "axxe",
	274	# since that is handled differently below. These rules are generated
	275	# programmatically from the jamo data.
729e4ab9 A	276	$jamoMedial {b s} $latinMedial → $Bf $Si;
	277	$jamoMedial {g s} $latinMedial → $Gf $Si;
	278	$jamoMedial {l b} $latinMedial → $L $Bi;
	279	$jamoMedial {l g} $latinMedial → $L $Gi;
	280	$jamoMedial {l h} $latinMedial → $L $Hi;
	281	$jamoMedial {l m} $latinMedial → $L $Mi;
	282	$jamoMedial {l p} $latinMedial → $L $Pi;
	283	$jamoMedial {l s} $latinMedial → $L $Si;
	284	$jamoMedial {l t} $latinMedial → $L $Ti;
	285	$jamoMedial {n g} $latinMedial → $Nf $Gi;
	286	$jamoMedial {n h} $latinMedial → $Nf $Hi;
	287	$jamoMedial {n j} $latinMedial → $Nf $Ji;
2ca993e8 A	288	# Single consonants are initials: Text of the form 'axe', where 'x'
	289	# can be an initial or a final, and 'a' and 'e' are medials, we want
	290	# to transliterate as A Xi E rather than A Xf IEUNG E.
729e4ab9 A	291	$jamoMedial {b} $latinMedial → $Bi;
	292	$jamoMedial {ch} $latinMedial → $CHi;
	293	$jamoMedial {d} $latinMedial → $Di;
	294	$jamoMedial {g} $latinMedial → $Gi;
	295	$jamoMedial {h} $latinMedial → $Hi;
	296	$jamoMedial {j} $latinMedial → $Ji;
	297	$jamoMedial {k} $latinMedial → $Ki;
	298	$jamoMedial {m} $latinMedial → $Mi;
	299	$jamoMedial {n} $latinMedial → $Ni;
	300	$jamoMedial {p} $latinMedial → $Pi;
	301	$jamoMedial {s} $latinMedial → $Si;
	302	$jamoMedial {t} $latinMedial → $Ti;
	303	$jamoMedial {l} $latinMedial → $Li;
2ca993e8 A	304	# Doubled initials. The sequence "axxe", where XX exists as an initial
	305	# (XXi), and also Xi and Xf exist (true of all digraphs XX), we want
	306	# to transliterate as A XXi E, rather than split to A Xf Xi E.
729e4ab9 A	307	$jamoMedial {p p} $latinMedial → $PPi;
	308	$jamoMedial {t t} $latinMedial → $TTi;
	309	$jamoMedial {j j} $latinMedial → $JJi;
	310	$jamoMedial {k k} $latinMedial → $KKi;
	311	$jamoMedial {s s} $latinMedial → $SSi;
2ca993e8 A	312	# XYY. Because doubled consonants bind more strongly than XY
	313	# consonants, we must handle the sequence "axyy" specially. Here XYf
	314	# and YYi must exist. In these cases, we map to Xf YYi rather than
	315	# XYf.
	316	# However, there are two special cases.
729e4ab9 A	317	$jamoMedial {lp} p p → $LP;
729e4ab9 A	318	$jamoMedial {lt} t t → $LT;
2ca993e8	319	# End special cases
729e4ab9 A	320	$jamoMedial {b} s s → $Bf;
	321	$jamoMedial {g} s s → $Gf;
	322	$jamoMedial {l} b b → $L;
	323	$jamoMedial {l} g g → $L;
	324	$jamoMedial {l} s s → $L;
	325	$jamoMedial {l} t t → $L;
	326	$jamoMedial {l} p p → $L;
	327	$jamoMedial {n} g g → $Nf;
	328	$jamoMedial {n} j j → $Nf;
2ca993e8 A	329	# Finals: Attach consonant with preceding medial to preceding medial.
	330	# Do this BEFORE mapping consonants to initials. Longer keys must
	331	# precede shorter keys that they start with, e.g., the rule for 'bs'
	332	# must precede 'b'.
	333	# [BASIC Jamo-Latin FINALS handled here. Order irrelevant within this
	334	# block for Jamo-Latin.]
729e4ab9 A	335	$jamoMedial {bs} ↔ $BS;
	336	$jamoMedial {b} ↔ $Bf;
	337	$jamoMedial {ch} ↔ $Cf;
	338	$jamoMedial {c} → $Cf;
	339	$jamoMedial {d} ↔ $Df;
	340	$jamoMedial {kk} ↔ $GGf;
	341	$jamoMedial {gs} ↔ $GS;
	342	$jamoMedial {g} ↔ $Gf;
	343	$jamoMedial {h} ↔ $Hf;
	344	$jamoMedial {j} ↔ $Jf;
	345	$jamoMedial {k} ↔ $Kf;
51004dcb	346	$jamoMedial {lb} ↔ $LB; $jamoMedial {lg} ↔ $LG;
729e4ab9 A	347	$jamoMedial {lh} ↔ $LH;
	348	$jamoMedial {lm} ↔ $LM;
	349	$jamoMedial {lp} ↔ $LP;
	350	$jamoMedial {ls} ↔ $LS;
	351	$jamoMedial {lt} ↔ $LT;
	352	$jamoMedial {l} ↔ $L;
	353	$jamoMedial {m} ↔ $Mf;
	354	$jamoMedial {ng} ↔ $NG;
	355	$jamoMedial {nh} ↔ $NH;
	356	$jamoMedial {nj} ↔ $NJ;
	357	$jamoMedial {n} ↔ $Nf;
	358	$jamoMedial {p} ↔ $Pf;
	359	$jamoMedial {ss} ↔ $SSf;
	360	$jamoMedial {s} ↔ $Sf;
	361	$jamoMedial {t} ↔ $Tf;
2ca993e8 A	362	# Initials: Attach single consonant to following medial. Do this
	363	# AFTER mapping finals. Longer keys must precede shorter keys that
	364	# they start with, e.g., the rule for 'gg' must precede 'g'.
	365	# [BASIC Jamo-Latin INITIALS handled here. Order irrelevant within
	366	# this block for Jamo-Latin.]
729e4ab9 A	367	{kk} $latinMedial ↔ $KKi;
	368	{g} $latinMedial ↔ $Gi;
	369	{n} $latinMedial ↔ $Ni;
	370	{tt} $latinMedial ↔ $TTi;
	371	{d} $latinMedial ↔ $Di;
	372	{l} $latinMedial ↔ $Li;
	373	{m} $latinMedial ↔ $Mi;
	374	{pp} $latinMedial ↔ $PPi;
	375	{b} $latinMedial ↔ $Bi;
	376	{ss} $latinMedial ↔ $SSi;
	377	{s} $latinMedial ↔ $Si;
	378	{jj} $latinMedial ↔ $JJi;
	379	{j} $latinMedial ↔ $Ji;
	380	{ch} $latinMedial ↔ $CHi;
	381	{c} $latinMedial → $CHi;
	382	{k} $latinMedial ↔ $Ki;
	383	{t} $latinMedial ↔ $Ti;
	384	{p} $latinMedial ↔ $Pi;
	385	{h} $latinMedial ↔ $Hi;
2ca993e8 A	386	# 'r' in final position. Because of the equivalency of the 'l' and
	387	# 'r' jamo (the glyphs are the same), we try to provide the same
	388	# equivalency in Latin-Jamo. The 'l' to 'r' conversion is handled
	389	# below. If we see an 'r' in an apparent final position, treat it
	390	# like 'l'. For example, "karka" =→ Ki A R EU Ki A without this rule.
	391	# Instead, we want Ki A L Ki A.
	392	# Initial + Final: If we match the next rule, we have initial then
	393	# final consonant with no intervening medial. We insert the null
	394	# vowel BEFORE it to create a well-formed syllable. (In the next rule
	395	# we insert a null vowel AFTER an anomalous initial.)
	396	# Initial + X: This block matches an initial consonant not followed by
	397	# a medial. We insert the null vowel after it. We handle double
	398	# initials explicitly here; for single initial consonants we insert EU
	399	# (as Latin) after them and let standard rules do the rest.
	400	# BREAKS ROUND TRIP INTEGRITY
729e4ab9 A	401	kk → $KKi $EU;
	402	tt → $TTi $EU;
	403	pp → $PPi $EU;
	404	ss → $SSi $EU;
	405	jj → $JJi $EU;
	406	ch → $CHi $EU;
	407	([lbdghjkmnpst]) → \| $1 eu;
2ca993e8 A	408	# X + Final: Finally we have to deal with a consonant that can only be
	409	# interpreted as a final (not an initial) and which is preceded
	410	# neither by an initial nor a medial. It is the start of the
	411	# syllable, but cannot be. Most of these will already be handled by
	412	# the above rules. 'bs' splits into Bi EU Sf. Similar for 'gs' 'ng'
	413	# 'nh' 'nj'. The only problem is 'l' and digraphs starting with 'l'.
	414	# For this isolated case, we could add a null initial and medial,
	415	# which would give "la" =→ IEUNG EU L IEUNG A, for example. A more
	416	# economical solution is to transliterate isolated "l" (that is,
	417	# initial "l") to "r". (Other similar conversions of consonants that
	418	# occur neither as initials nor as finals are handled below.)
729e4ab9	419	l → \| r;
2ca993e8 A	420	# Medials. If a medial is preceded by an initial, then we proceed
	421	# normally. As usual, longer keys must precede shorter ones.
	422	# [BASIC Jamo-Latin MEDIALS handled here. Order irrelevant within
	423	# this block for Jamo-Latin.]
	424	#
	425	# a e i o u
	426	# ae
	427	# eo eu
	428	# oe
	429	# ui
	430	# wa we wi
	431	# wae
	432	# yae ya yeo ye yo yu
729e4ab9 A	433	$jamoInitial {ae} ↔ $AE;
	434	$jamoInitial {a} ↔ $A;
	435	$jamoInitial {eo} ↔ $EO;
	436	$jamoInitial {eu} ↔ $EU;
	437	$jamoInitial {e} ↔ $E;
	438	$jamoInitial {i} ↔ $I;
	439	$jamoInitial {oe} ↔ $OE;
	440	$jamoInitial {o} ↔ $O;
	441	$jamoInitial {ui} ↔ $UI;
	442	$jamoInitial {u} ↔ $U;
	443	$jamoInitial {wae} ↔ $WAE;
	444	$jamoInitial {wa} ↔ $WA;
	445	$jamoInitial {wo} ↔ $WO;
	446	$jamoInitial {we} ↔ $WE;
	447	$jamoInitial {wi} ↔ $WI;
	448	$jamoInitial {yae} ↔ $YAE;
	449	$jamoInitial {ya} ↔ $YA;
	450	$jamoInitial {yeo} ↔ $YEO;
	451	$jamoInitial {ye} ↔ $YE;
	452	$jamoInitial {yo} ↔ $YO;
	453	$jamoInitial {yu} ↔ $YU;
2ca993e8 A	454	# We may see an anomalous isolated 'w' or 'y'. In that case, we
	455	# interpret it as 'wi' and 'yu', respectively.
	456	# BREAKS ROUND TRIP INTEGRITY
729e4ab9 A	457	$jamoInitial {w} → \| wi;
729e4ab9 A	458	$jamoInitial {y} → \| yu;
2ca993e8 A	459	# Otherwise, insert a null consonant IEUNG before the medial (which is
2ca993e8 A	460	# still an untransliterated latin vowel).
729e4ab9	461	($latinMedial) → $IEUNG \| $1;
2ca993e8 A	462	# Convert non-jamo latin consonants to equivalents. These occur as
	463	# neither initials nor finals in jamo. 'l' occurs as a final, but not
	464	# an initial; it is handled above. The following letters (left hand
	465	# side) will never be output by Jamo-Latin.
729e4ab9 A	466	f → \| p;
	467	q → \| k;
	468	v → \| b;
	469	x → \| ks;
	470	z → \| s;
	471	r → \| l;
	472	c → \| k;
2ca993e8	473	# Delete separators (Latin-Jamo).
729e4ab9	474	$sep → ;
2ca993e8 A	475	# Delete null consonants (Jamo-Latin). Do NOT delete null EU vowels,
2ca993e8 A	476	# since these may also occur in text.
729e4ab9	477	← $IEUNG;
2ca993e8 A	478	#- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in
	479	#- the INDEX file. This transliterator is, by itself, not
	480	#- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or
	481	#- inverses thereof.
	482	# eof
	483