]> git.saurik.com Git - apple/icu.git/blame - icuSources/data/translit/my_my_FONIPA.txt
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / data / translit / my_my_FONIPA.txt
CommitLineData
2ca993e8
A
1# ***************************************************************************
2# *
3# * Copyright (C) 2004-2016, International Business Machines
4# * Corporation; Unicode, Inc.; and others. All Rights Reserved.
5# *
6# ***************************************************************************
7# File: my_my_FONIPA.txt
8# Generated from CLDR
9#
10
11# Pronunciation rules for Burmese.
12#
13# The following rules are lexical and heuristic: lexical in the sense
14# that they generate phoneme strings which may further undergo
15# post-lexical phonological processes, in particular voicing, to
16# result in actual surface forms; heuristic in the sense that they try
17# to resolve ambiguities, especially around reduced vowels, in a
18# systematic way that may be incorrect in many situations. Vowel
19# reduction depends on many factors, such as morphemic structure,
20# which are not available here.
21#
22# Definitions
23#
24# Dependent vowel signs
25$vs_AA = \u102B;
26$vs_aa = \u102C;
27$vs_i = \u102D;
28$vs_ii = \u102E;
29$vs_u = \u102F;
30$vs_uu = \u1030;
31$vs_e = \u1031;
32$vs_ai = \u1032;
33# Various signs
34$anusvara = \u1036;
35$visarga = \u1038;
36$virama = \u1039;
37$asat = \u103A;
38# Dependent (medial) consonant signs
39$med_y = \u103B;
40$med_r = \u103C;
41$med_w = \u103D;
42$med_h = \u103E;
43# Independent letters and letter-like punctuation symbols
44$independent = [\u1000-\u102A \u103F \u104C-\u104F \u1050-\u1055];
45$creaky = \u0330;
46$high = \u0301;
47$low = \u0300;
48$coda = [$creaky $high $low ɴ ʔ ə]; # TODO: remove if unused
49#
50# Preprocessing
51#
52::NFC;
53# Replace U+102B TALL AA with U+102C AA. Their pronunciation is identical.
54$vs_AA → $vs_aa;
55# Unstack kinzi (င\u103A plus U+1039 VIRAMA) into plain င\u103A.
56# Hmm, what would happen if the syllable ending in kinzi had non-low tone?
57င\u103A $virama → င\u103A;
58# Unstack everything else, i.e. replace U+1039 VIRAMA with U+103A ASAT.
59$virama → $asat;
60# Unstack U+103F GREAT SA.
61ဿ → သ\u103Aသ;
62# Insert a syllable boundary marker /./ before every independent letter.
63::Null;
64[^.$] { } $independent ([\u1037\u103B-\u103E])* [^\u103A] → \.;
65# Insert default inherent vowel: /a\u0330/ at the end, /ə/ everywhere else.
66::Null;
67([\u1000-\u1021\u103F] [\u103B-\u103E]*) } [$] → $1 a $creaky;
68([\u1000-\u1021\u103F] [\u103B-\u103E]*) } \. → $1 ə;
69# Allow for additional coda consonants.
70#
71# This only covers a few of the cases in which full coda consonants
72# can appear in loanwords. The general situation is somewhat rare and
73# is more easily dealt with in a formalism that can impose structural
74# constraints on syllables more easily.
75::Null;
76$asat ($visarga)? [\u1000-\u102A] { $asat → ;
77# Deal with ၎င\u103Aး early.
78၎င\u103Aး → lə\.ɡa $high ʊ\u032Fɴ;
79#
80# Rhymes
81#
82::Null;
83က\u103A → ɛʔ;
84ဂ\u103A → ɛʔ; # in မဂ\u1039ဂဇင\u103Aး ~ မဂ\u103Aဂဇင\u103Aး /mɛʔ.ɡə.zɪ\u0301ɴ/
85င\u1037\u103A → ɪ $creaky ɴ;
86င\u103Aး → ɪ $high ɴ;
87င\u103A → ɪ $low ɴ;
88စ\u103A → ɪʔ; # maybe sometimes /eɪ\u032Fʔ/
89ဉ\u1037\u103A → ɪ $creaky ɴ;
90ဉ\u103Aး → ɪ $high ɴ;
91ဉ\u103A → ɪ $low ɴ;
92ည\u1037\u103A → ɛ $creaky;
93ည\u103Aး → ɛ $high;
94ည\u103A → ɛ $low;
95ဏ\u1037\u103A → a $creaky ɴ;
96ဏ\u103Aး → a $high ɴ;
97ဏ\u103A → a $low ɴ;
98တ\u103A → aʔ;
99န\u1037\u103A → a $creaky ɴ;
100န\u103Aး → a $high ɴ;
101န\u103A → a $low ɴ;
102ပ\u103A → aʔ;
103မ\u1037\u103A → a $creaky ɴ;
104မ\u103Aး → a $high ɴ;
105မ\u103A → a $low ɴ;
106ယ\u1037\u103A → ɛ $creaky;
107ယ\u103Aး → ɛ $high;
108ယ\u103A → ɛ $low;
109သ\u103A → aʔ;
110$vs_aa ဉ\u1037\u103A → ɪ $creaky ɴ;
111$vs_aa ဉ\u103Aး → ɪ $high ɴ;
112$vs_aa ဉ\u103A → ɪ $low ɴ;
113$vs_aa တ\u103A → aʔ;
114$vs_aa ဏ\u1037\u103A → a $creaky ɴ;
115$vs_aa ဏ\u103Aး → a $high ɴ;
116$vs_aa ဏ\u103A → a $low ɴ;
117$vs_aa န\u1037\u103A → a $creaky ɴ;
118$vs_aa န\u103Aး → a $high ɴ;
119$vs_aa န\u103A → a $low ɴ;
120$vs_aa ပ\u103A → aʔ; # in ကလာပ\u103Aစည\u103Aး /kə.laʔ.sɛ\u0301/ (club cell)
121$vs_aa ယ\u1037\u103A → ɛ $creaky;
122$vs_aa ယ\u103Aး → ɛ $high;
123$vs_aa ယ\u103A → ɛ $low;
124$vs_aa \u1037 → a $creaky; # redundant creaky tone
125$vs_aa း → a $high;
126$vs_aa → a $low;
127$vs_i က\u103A → eɪ\u032Fʔ;
128$vs_i စ\u103A → eɪ\u032Fʔ;
129$vs_i တ\u103A → eɪ\u032Fʔ;
130$vs_i န\u1037\u103A → e $creaky ɪ\u032Fɴ;
131$vs_i န\u103Aး → e $high ɪ\u032Fɴ;
132$vs_i န\u103A → e $low ɪ\u032Fɴ;
133$vs_i ပ\u103A → eɪ\u032Fʔ;
134$vs_i မ\u1037\u103A → e $creaky ɪ\u032Fɴ;
135$vs_i မ\u103Aး → e $high ɪ\u032Fɴ;
136$vs_i မ\u103A → e $low ɪ\u032Fɴ;
137$vs_i $vs_u က\u103A → aɪ\u032Fʔ;
138$vs_i $vs_u င\u1037\u103A → a $creaky ɪ\u032Fɴ;
139$vs_i $vs_u င\u103Aး → a $high ɪ\u032Fɴ;
140$vs_i $vs_u င\u103A → a $low ɪ\u032Fɴ;
141$vs_i $vs_u ဏ\u1037\u103A → a $creaky ɪ\u032Fɴ;
142$vs_i $vs_u ဏ\u103Aး → a $high ɪ\u032Fɴ;
143$vs_i $vs_u ဏ\u103A → a $low ɪ\u032Fɴ;
144$vs_i $vs_u ယ\u1037\u103A → o $creaky;
145$vs_i $vs_u ယ\u103Aး → o $high;
146$vs_i $vs_u ယ\u103A → o $low; # in က\u102D\u102Fယ\u103A /kò/
147$vs_i $vs_u \u1037 → o $creaky;
148$vs_i $vs_u း → o $high;
149$vs_i $vs_u → o $low;
150$vs_i $anusvara \u1037 → e $creaky ɪ\u032Fɴ;
151$vs_i $anusvara း → e $high ɪ\u032Fɴ;
152$vs_i $anusvara → e $low ɪ\u032Fɴ;
153$vs_i → i $creaky;
154$vs_ii \u1037 → i $creaky; # this does not usually occur
155$vs_ii း → i $high;
156$vs_ii → i $low;
157$vs_u က\u103A → oʊ\u032Fʔ;
158$vs_u ဂ\u103A → oʊ\u032Fʔ;
159$vs_u ဏ\u1037\u103A → o $creaky ʊ\u032Fɴ;
160$vs_u ဏ\u103Aး → o $high ʊ\u032Fɴ;
161$vs_u ဏ\u103A → o $low ʊ\u032Fɴ;
162$vs_u တ\u103A → oʊ\u032Fʔ;
163$vs_u န\u1037\u103A → o $creaky ʊ\u032Fɴ;
164$vs_u န\u103Aး → o $high ʊ\u032Fɴ;
165$vs_u န\u103A → o $low ʊ\u032Fɴ;
166$vs_u ပ\u103A → oʊ\u032Fʔ;
167$vs_u မ\u1037\u103A → o $creaky ʊ\u032Fɴ;
168$vs_u မ\u103Aး → o $high ʊ\u032Fɴ;
169$vs_u မ\u103A → o $low ʊ\u032Fɴ;
170$vs_u $anusvara \u1037 → o $creaky ʊ\u032Fɴ;
171$vs_u $anusvara း → o $high ʊ\u032Fɴ;
172$vs_u $anusvara → o $low ʊ\u032Fɴ;
173$vs_u → u $creaky;
174$vs_uu \u1037 → u $creaky; # this does not usually occur
175$vs_uu း → u $high;
176$vs_uu → u $low;
177$vs_e တ\u103A → ɪʔ;
178$vs_e $vs_aa က\u103A → aʊ\u032Fʔ;
179$vs_e $vs_aa င\u1037\u103A → a $creaky ʊ\u032Fɴ;
180$vs_e $vs_aa င\u103Aး → a $high ʊ\u032Fɴ;
181$vs_e $vs_aa င\u103A → a $low ʊ\u032Fɴ;
182$vs_e $vs_aa \u1037 → ɔ $creaky;
183$vs_e $vs_aa း → ɔ $high; # redundant high tone; this does not usually occur
184$vs_e $vs_aa \u103A → ɔ $low;
185$vs_e $vs_aa → ɔ $high;
186$vs_e \u1037 → e $creaky;
187$vs_e း → e $high;
188$vs_e → e $low;
189$vs_ai \u1037 → ɛ $creaky;
190$vs_ai း → ɛ $high; # redundant high tone; this does not usually occur
191$vs_ai → ɛ $high;
192$anusvara \u1037 → a $creaky ɴ;
193$anusvara း → a $high ɴ;
194$anusvara → a $low ɴ;
195$med_w တ\u103A → ʊʔ;
196$med_w န\u1037\u103A → ʊ $creaky ɴ;
197$med_w န\u103Aး → ʊ $high ɴ;
198$med_w န\u103A → ʊ $low ɴ;
199$med_w ပ\u103A → ʊʔ;
200$med_w မ\u1037\u103A → ʊ $creaky ɴ;
201$med_w မ\u103Aး → ʊ $high ɴ;
202$med_w မ\u103A → ʊ $low ɴ;
203#
204# Medials
205#
206::Null;
207# Palatalization of the velar stops before MEDIAL YA and MEDIAL RA:
208# velar + /j/ ==> modern palatals.
209ကျ → t\u0361ɕ;
210ချ → t\u0361ɕʰ;
211ဂျ → d\u0361ʑ;
212ဃျ → d\u0361ʑ;
213ကြ → t\u0361ɕ;
214ခြ → t\u0361ɕʰ;
215ဂြ → d\u0361ʑ;
216ဃြ → d\u0361ʑ;
217# Remove redundant MEDIAL YA and MEDIAL RA after initial YA.
218ယ { [$med_y $med_r] → ;
219# Reorder the medials so that U+103E SIGN MEDIAL HA comes before any
220# other medials.
221# First, push U+103E MEDIAL HA before U+103D MEDIAL WA.
222\u103D \u103E → \u103E \u103D;
223::Null;
224# Now MEDIAL WA comes last.
225# Produce the palatal ʃ from (SA|LA)+YA+HA.
226သျ\u103E → ʃ;
227လျ\u103E → ʃ;
228# Second, push U+103E MEDIAL HA before U+103C MEDIAL RA.
229\u103C \u103E → \u103E \u103C;
230::Null;
231# Finally, push U+103E MEDIAL HA before U+103B MEDIAL YA.
232\u103B \u103E → \u103E \u103B;
233::Null;
234# Consume MEDIAL HA and apply devoicing.
235င\u103E → ŋ\u030A;
236ဉ\u103E → ɲ\u0325;
237ည\u103E → ɲ\u0325;
238ဏ\u103E → n\u0325;
239န\u103E → n\u0325;
240မ\u103E → m\u0325;
241ယ\u103E → ʃ;
242ရ\u103E → ʃ;
243လ\u103E → l\u0325;
244ဝ\u103E → w\u0325;
245ဠ\u103E → l\u0325;
246# Drop any remaining U+103E MEDIAL HA.
247\u103E → ;
248# Simplify medial cluster /jw/ to /w/, i.e. drop U+103B MEDIAL YA and
249# U+103C MEDIAL RA before U+103D MEDIAL WA. # TODO: revisit this
250\u103B } \u103D → ;
251\u103C } \u103D → ;
252\u103B → j;
253\u103C → j;
254\u103D → w;
255#
256# Initials
257#
258# Velars
259က → k;
260ခ → kʰ;
261ဂ → ɡ;
262ဃ → ɡ;
263င → ŋ;
264# Historic palatals
265စ → s;
266ဆ → sʰ;
267ဇ → z;
268ဈ → z;
269ဉ → ɲ;
270ည → ɲ;
271# Alveolars
272ဋ → t;
273ဌ → tʰ;
274ဍ → d;
275ဎ → d;
276ဏ → n;
277# Historic dentals ==> alveolars
278တ → t;
279ထ → tʰ;
280ဒ → d;
281ဓ → d;
282န → n;
283# Labials
284ပ → p;
285ဖ → pʰ;
286ဗ → b;
287ဘ → b;
288မ → m;
289# Other letters
290ယ → j;
291ရ → j; # historic /r/
292လ\u103A → ; # final, typically not pronounced in native words
293လ → l;
294ဝ → w;
295သ → θ; # historic /s/ ==> modern dental
296ဟ → h;
297ဠ → l;
298အ → ʔ;
299# Independent vowels
300ဣ\u1037 → ʔḭ; # redundant creaky tone; this does not usually occur
301ဣး → ʔí; # this does not usually occur
302ဣ → ʔḭ;
303ဤ\u1037 → ʔḭ; # this does not usually occur
304ဤး → ʔí; # this does not usually occur
305ဤ → ʔì;
306ဥ\u1037 → ʔṵ; # redundant creaky tone; this does not usually occur
307ဥး → ʔú; # this does not usually occur
308ဥ → ʔṵ;
309ဦ\u1037 → ʔṵ; # this does not usually occur
310ဦး → ʔú;
311ဦ → ʔù;
312ဧ\u1037 → ʔḛ; # this does not usually occur
313ဧး → ʔé;
314ဧ → ʔè;
315ဩ\u1037 → ʔɔ\u0330; # this does not usually occur
316ဩး → ʔɔ\u0301; # redundant high tone; this does not usually occur
317ဩ → ʔɔ\u0301;
318ဪ\u1037 → ʔɔ\u0330; # this does not usually occur
319ဪး → ʔɔ\u0301; # this does not usually occur
320ဪ → ʔɔ\u0300;
321# Various signs
322၌ → n\u0325aɪ\u032Fʔ;
323၍ → jwḛ;
324# ၎င\u103Aး was handled earlier.
325၏ → ʔḭ;
326#
327# Postprocessing
328#
329# Delete any remaining U+103A ASAT.
330$asat → ;
331# Delete zero-width space, non-joiner, joiner.
332[\u200B-\u200D] → ;
333::NFC;
334