git.saurik.com Git - apple/icu.git/blame - icuSources/data/translit/blt_blt

Commit	Line	Data
0f5d89e8 A	1	# © 2016 and later: Unicode, Inc. and others.
	2	# License & terms of use: http://www.unicode.org/copyright.html#License
	3	#
	4	# File: blt_blt_FONIPA.txt
	5	# Generated from CLDR
	6	#
	7
	8	# Output phonemes
	9	# ---------------
	10	# Nasals: m mʷ n nʷ ɲ ɲʷ ŋ ŋʷ
	11	# Plosives: p pʰ pʰʷ pʷ b t tʷ tʰ d dʷ k kʰ kʰʷ kʷ ɡ ɡʷ ʔ
	12	# Fricatives: f fʷ v s sʷ h hʷ x xʷ
	13	# Other consonants: w j l
	14	# Affricates: t\u0361ɕ t\u0361ɕʷ t\u0361ɕʰ t\u0361ɕʰʷ
	15	# Vowels: i ɨ u ɛ e ə ɔ o a aː
	16	# Diphthongs: iə\u032F ɨə\u032F uə\u032F ai\u032F
	17	# Tones: ˨ ˧˥ ˨˩ ˥ ˦ ˧˩
	18	#
	19	#
	20	# Limitations
	21	# -----------
	22	# Currently, these rules only support tone marks, but not tone letters.
	23	#
	24	#
	25	# References
	26	# ----------
	27	# [1] Jim Brase, SIL International: Proposal to encode the Tai Viet script
	28	# in the UCS. ISO/IEC JTC1/SC2/WG2 N3220. March 20, 2007.
	29	# http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3220.pdf
	30	#
	31	# [2] ScriptSource: Tai Viet Vowels.
	32	# http://scriptsource.org/entry/eusd5ehysa
	33	#
	34	# [3] ScriptSource: Tai Viet Consonants.
	35	# http://scriptsource.org/entry/lbwpkrqk7b
	36	#
	37	# [4] http://www.seasite.niu.edu/tai/TaiDam/tones.htm
	38	$LO = [ꪀ ꪂ ꪄ ꪆ ꪈ ꪊ ꪌ ꪎ ꪐ ꪒ ꪔ ꪖ ꪘ ꪚ ꪜ ꪞ ꪠ ꪢ ꪤ ꪦ ꪨ ꪪ ꪬ ꪮ];
	39	$HI = [ꪁ ꪃ ꪅ ꪇ ꪉ ꪋ ꪍ ꪏ ꪑ ꪓ ꪕ ꪗ ꪙ ꪛ ꪝ ꪟ ꪡ ꪣ ꪥ ꪧ ꪩ ꪫ ꪭ ꪯ];
	40	$C = [$LO $HI];
	41	$V1 = [ꪵ ꪶ ꪹ ꪻ ꪼ]; # vowels written before consonant
	42	$V2 = [ \uAAB0 \uAAB2 \uAAB3 \uAAB4 \uAAB7 \uAAB8 \uAABE]; # vowels written above or below consonant
	43	$V3 = [ꪱ ꪮ ꪺ ꪽ]; # vowels written after consonant
	44	$DIGRAPHS = [{ꪹ \uAAB8} {ꪹ \uAAB7} {ꪹ ꪱ}];
	45	$V12 = [$V1 $V2 $DIGRAPHS];
	46	$V123 = [$V12 $V3];
	47	$W = [ꪫ]; # labialization marker
	48	$F = [$C]; # syllable-final consonant
	49	$IPA_TONE = [˥ ˦ ˧ ˨ ˩];
	50	$NOT_IPA_TONE = [^$IPA_TONE];
	51	$BOUNDARY = [^[:L:][:M:][:N:]];
	52	# Consonants at the end of “checked” syllables.
	53	$CHK = [ꪜ ꪝ ꪞ ꪟ ꪔ ꪕ ꪖ ꪗ ꪀ ꪁ ꪂ ꪃ ꪮ ꪯ];
	54	# The Tai Viet script uses visual ordering. Convert to logical order.
	55	($V1) ($C $W?) → $2 $1;
	56	::null;
	57	# Special handling for stand-alone ꪽ ‘that’.
	58	$BOUNDARY {ꪽ} $BOUNDARY → nan˧˩;
	59	# Convert tones for checked syllables (those ending in /p/, /t/, /k/, /ʔ/)
	60	$LO $W? $V12 {($CHK)} → $1 ˧˥; # Tone class 2: High-rising tone
	61	$LO $W? {($V3 $CHK)} → $1 ˧˥; # Tone class 2: High-rising tone
	62	$HI $W? $V12 {($CHK)} → $1 ˦; # Tone class 5: High-mid tone
	63	$HI $W? {($V3 $CHK)} → $1 ˦; # Tone class 5: High-mid tone
	64	# Convert tones for unchecked syllables with vowels that are written
65	# after the consonant (V3).
66	# TODO: Also support tone letters, not just tone marks.
67	$LO $W? { \uAABF ($V3 $F?)} → $1 ˧˥; # Tone class 2: High-rising tone
68	$LO $W? { \uAAC1 ($V3 $F?)} → $1 ˨˩; # Tone class 3: Low-falling tone
69	$HI $W? { \uAABF ($V3 $F?)} → $1 ˦; # Tone class 5: High-mid tone
70	$HI $W? { \uAAC1 ($V3 $F?)} → $1 ˧˩; # Tone class 6: Mid-falling tone
71	# Convert tones for unchecked syllables with vowels that are either written
72	# before the consonant (V1) or vowels that are written above or below it (V2).
73	# TODO: Also support tone letters, not just tone marks.
74	$LO $W? $V12 { \uAABF ($F?)} → $1 ˧˥; # Tone class 2: High-rising tone
75	$LO $W? $V12 { \uAAC1 ($F?)} → $1 ˨˩; # Tone class 3: Low-falling tone
76	$HI $W? $V12 { \uAABF ($F?)} → $1 ˦; # Tone class 5: High-mid tone
77	$HI $W? $V12 { \uAAC1 ($F?)} → $1 ˧˩; # Tone class 6: Mid-falling tone
78	::null;
79	{($LO $W? $V123 $F?)} $NOT_IPA_TONE → $1 ˨; # Tone class 1: Low-mid tone.
80	{($HI $W? $V123 $F?)} $NOT_IPA_TONE → $1 ˥; # Tone class 4: High tone.
81	::null;
82	# Harden syllable-final consonants.
83	$C $W? $V123 {ꪒ} → ꪔ; # /d/ → /t/
84	::null;
85	# Convert labialization marker.
86	$C {$W} $V123 → ʷ;
87	::null;
88	[ꪀ ꪁ] → k;
89	[ꪂ ꪃ] → kʰ; # Tai Dón; not used in Tai Dam according to [3]
90	[ꪄ ꪅ] → x;
91	[ꪆ ꪇ] → ɡ; # only in loanwords, according to [3]
92	[ꪈ ꪉ] → ŋ;
93	[ꪊ ꪋ] → t\u0361ɕ; # Tai Dón; not used in Tai Dam according to [3]
94	[ꪌ ꪍ] → t\u0361ɕʰ; # Tai Dón; not used in Tai Dam according to [3]
95	[ꪎ ꪏ] → s;
96	[ꪐ ꪑ] → ɲ;
97	[ꪒ ꪓ] → d;
98	[ꪔ ꪕ] → t;
99	[ꪖ ꪗ] → tʰ;
100	[ꪘ ꪙ] → n;
101	[ꪚ ꪛ] → b;
102	[ꪜ ꪝ] → p;
103	[ꪞ ꪟ] → pʰ; # Tai Dón; not used in Tai Dam according to [3]
104	[ꪠ ꪡ] → f;
105	[ꪢ ꪣ] → m;
106	[ꪤ ꪥ] → j;
107	[ꪦ ꪧ] → r; # only in loanwords, according to [3]
108	[ꪨ ꪩ] → l;
109	{[ꪪ ꪫ]} $IPA_TONE → w; # at the end of a syllable (before tone letters)
110	[ꪪ ꪫ] → v; # not at the end of a syllable
111	[ꪬ ꪭ] → h;
112	ʔ {[ꪮ ꪯ]} → ɔ; # eg. ꪮꪮꪀ
113	[ꪮ ꪯ] → ʔ;
114	# Digraphs.
115	ꪹ \uAAB8 → e;
116	ꪹ \uAAB7 → ə;
117	ꪹ ꪱ → aːw;
118	# Vowels.
119	\uAAB0 → a;
120	ꪱ → aː;
121	\uAAB2 → i;
122	\uAAB3 → ɨ;
123	\uAAB4 → u;
124	ꪵ → ɛ;
125	ꪶ → o;
126	\uAAB7 → ɔ;
127	ꪮ → ɔ;
128	ꪺ → uə\u032F;
129	ꪽ → an;
130	ꪹ → ɨə\u032F;
131	\uAAB8 → iə\u032F;
132	ꪻ → əw;
133	ꪼ → ai\u032F;
134	\uAABE → am;
135	# Word ligature symbols.
136	ꫛ → kon˥;
137	ꫜ → nɨŋ˦;
138