1 # ***************************************************************************
3 # * Copyright (C) 2004-2016, International Business Machines
4 # * Corporation; Unicode, Inc.; and others. All Rights Reserved.
6 # ***************************************************************************
7 # File: my_my_FONIPA.txt
11 # Pronunciation rules for Burmese.
13 # The following rules are lexical and heuristic: lexical in the sense
14 # that they generate phoneme strings which may further undergo
15 # post-lexical phonological processes, in particular voicing, to
16 # result in actual surface forms; heuristic in the sense that they try
17 # to resolve ambiguities, especially around reduced vowels, in a
18 # systematic way that may be incorrect in many situations. Vowel
19 # reduction depends on many factors, such as morphemic structure,
20 # which are not available here.
24 # Dependent vowel signs
38 # Dependent (medial) consonant signs
43 # Independent letters and letter-like punctuation symbols
44 $independent = [\u1000-\u102A \u103F \u104C-\u104F \u1050-\u1055];
48 $coda = [$creaky $high $low ɴ ʔ ə]; # TODO: remove if unused
53 # Replace U+102B TALL AA with U+102C AA. Their pronunciation is identical.
55 # Unstack kinzi (င\u103A plus U+1039 VIRAMA) into plain င\u103A.
56 # Hmm, what would happen if the syllable ending in kinzi had non-low tone?
57 င\u103A $virama → င\u103A;
58 # Unstack everything else, i.e. replace U+1039 VIRAMA with U+103A ASAT.
60 # Unstack U+103F GREAT SA.
62 # Insert a syllable boundary marker /./ before every independent letter.
64 [^.$] { } $independent ([\u1037\u103B-\u103E])* [^\u103A] → \.;
65 # Insert default inherent vowel: /a\u0330/ at the end, /ə/ everywhere else.
67 ([\u1000-\u1021\u103F] [\u103B-\u103E]*) } [$] → $1 a $creaky;
68 ([\u1000-\u1021\u103F] [\u103B-\u103E]*) } \. → $1 ə;
69 # Allow for additional coda consonants.
71 # This only covers a few of the cases in which full coda consonants
72 # can appear in loanwords. The general situation is somewhat rare and
73 # is more easily dealt with in a formalism that can impose structural
74 # constraints on syllables more easily.
76 $asat ($visarga)? [\u1000-\u102A] { $asat → ;
77 # Deal with ၎င\u103Aး early.
78 ၎င\u103Aး → lə\.ɡa $high ʊ\u032Fɴ;
84 ဂ\u103A → ɛʔ; # in မဂ\u1039ဂဇင\u103Aး ~ မဂ\u103Aဂဇင\u103Aး /mɛʔ.ɡə.zɪ\u0301ɴ/
85 င\u1037\u103A → ɪ $creaky ɴ;
88 စ\u103A → ɪʔ; # maybe sometimes /eɪ\u032Fʔ/
89 ဉ\u1037\u103A → ɪ $creaky ɴ;
92 ည\u1037\u103A → ɛ $creaky;
95 ဏ\u1037\u103A → a $creaky ɴ;
99 န\u1037\u103A → a $creaky ɴ;
100 န\u103Aး → a $high ɴ;
103 မ\u1037\u103A → a $creaky ɴ;
104 မ\u103Aး → a $high ɴ;
106 ယ\u1037\u103A → ɛ $creaky;
110 $vs_aa ဉ\u1037\u103A → ɪ $creaky ɴ;
111 $vs_aa ဉ\u103Aး → ɪ $high ɴ;
112 $vs_aa ဉ\u103A → ɪ $low ɴ;
114 $vs_aa ဏ\u1037\u103A → a $creaky ɴ;
115 $vs_aa ဏ\u103Aး → a $high ɴ;
116 $vs_aa ဏ\u103A → a $low ɴ;
117 $vs_aa န\u1037\u103A → a $creaky ɴ;
118 $vs_aa န\u103Aး → a $high ɴ;
119 $vs_aa န\u103A → a $low ɴ;
120 $vs_aa ပ\u103A → aʔ; # in ကလာပ\u103Aစည\u103Aး /kə.laʔ.sɛ\u0301/ (club cell)
121 $vs_aa ယ\u1037\u103A → ɛ $creaky;
122 $vs_aa ယ\u103Aး → ɛ $high;
123 $vs_aa ယ\u103A → ɛ $low;
124 $vs_aa \u1037 → a $creaky; # redundant creaky tone
127 $vs_i က\u103A → eɪ\u032Fʔ;
128 $vs_i စ\u103A → eɪ\u032Fʔ;
129 $vs_i တ\u103A → eɪ\u032Fʔ;
130 $vs_i န\u1037\u103A → e $creaky ɪ\u032Fɴ;
131 $vs_i န\u103Aး → e $high ɪ\u032Fɴ;
132 $vs_i န\u103A → e $low ɪ\u032Fɴ;
133 $vs_i ပ\u103A → eɪ\u032Fʔ;
134 $vs_i မ\u1037\u103A → e $creaky ɪ\u032Fɴ;
135 $vs_i မ\u103Aး → e $high ɪ\u032Fɴ;
136 $vs_i မ\u103A → e $low ɪ\u032Fɴ;
137 $vs_i $vs_u က\u103A → aɪ\u032Fʔ;
138 $vs_i $vs_u င\u1037\u103A → a $creaky ɪ\u032Fɴ;
139 $vs_i $vs_u င\u103Aး → a $high ɪ\u032Fɴ;
140 $vs_i $vs_u င\u103A → a $low ɪ\u032Fɴ;
141 $vs_i $vs_u ဏ\u1037\u103A → a $creaky ɪ\u032Fɴ;
142 $vs_i $vs_u ဏ\u103Aး → a $high ɪ\u032Fɴ;
143 $vs_i $vs_u ဏ\u103A → a $low ɪ\u032Fɴ;
144 $vs_i $vs_u ယ\u1037\u103A → o $creaky;
145 $vs_i $vs_u ယ\u103Aး → o $high;
146 $vs_i $vs_u ယ\u103A → o $low; # in က\u102D\u102Fယ\u103A /kò/
147 $vs_i $vs_u \u1037 → o $creaky;
148 $vs_i $vs_u း → o $high;
149 $vs_i $vs_u → o $low;
150 $vs_i $anusvara \u1037 → e $creaky ɪ\u032Fɴ;
151 $vs_i $anusvara း → e $high ɪ\u032Fɴ;
152 $vs_i $anusvara → e $low ɪ\u032Fɴ;
154 $vs_ii \u1037 → i $creaky; # this does not usually occur
157 $vs_u က\u103A → oʊ\u032Fʔ;
158 $vs_u ဂ\u103A → oʊ\u032Fʔ;
159 $vs_u ဏ\u1037\u103A → o $creaky ʊ\u032Fɴ;
160 $vs_u ဏ\u103Aး → o $high ʊ\u032Fɴ;
161 $vs_u ဏ\u103A → o $low ʊ\u032Fɴ;
162 $vs_u တ\u103A → oʊ\u032Fʔ;
163 $vs_u န\u1037\u103A → o $creaky ʊ\u032Fɴ;
164 $vs_u န\u103Aး → o $high ʊ\u032Fɴ;
165 $vs_u န\u103A → o $low ʊ\u032Fɴ;
166 $vs_u ပ\u103A → oʊ\u032Fʔ;
167 $vs_u မ\u1037\u103A → o $creaky ʊ\u032Fɴ;
168 $vs_u မ\u103Aး → o $high ʊ\u032Fɴ;
169 $vs_u မ\u103A → o $low ʊ\u032Fɴ;
170 $vs_u $anusvara \u1037 → o $creaky ʊ\u032Fɴ;
171 $vs_u $anusvara း → o $high ʊ\u032Fɴ;
172 $vs_u $anusvara → o $low ʊ\u032Fɴ;
174 $vs_uu \u1037 → u $creaky; # this does not usually occur
178 $vs_e $vs_aa က\u103A → aʊ\u032Fʔ;
179 $vs_e $vs_aa င\u1037\u103A → a $creaky ʊ\u032Fɴ;
180 $vs_e $vs_aa င\u103Aး → a $high ʊ\u032Fɴ;
181 $vs_e $vs_aa င\u103A → a $low ʊ\u032Fɴ;
182 $vs_e $vs_aa \u1037 → ɔ $creaky;
183 $vs_e $vs_aa း → ɔ $high; # redundant high tone; this does not usually occur
184 $vs_e $vs_aa \u103A → ɔ $low;
185 $vs_e $vs_aa → ɔ $high;
186 $vs_e \u1037 → e $creaky;
189 $vs_ai \u1037 → ɛ $creaky;
190 $vs_ai း → ɛ $high; # redundant high tone; this does not usually occur
192 $anusvara \u1037 → a $creaky ɴ;
193 $anusvara း → a $high ɴ;
194 $anusvara → a $low ɴ;
196 $med_w န\u1037\u103A → ʊ $creaky ɴ;
197 $med_w န\u103Aး → ʊ $high ɴ;
198 $med_w န\u103A → ʊ $low ɴ;
200 $med_w မ\u1037\u103A → ʊ $creaky ɴ;
201 $med_w မ\u103Aး → ʊ $high ɴ;
202 $med_w မ\u103A → ʊ $low ɴ;
207 # Palatalization of the velar stops before MEDIAL YA and MEDIAL RA:
208 # velar + /j/ ==> modern palatals.
217 # Remove redundant MEDIAL YA and MEDIAL RA after initial YA.
218 ယ { [$med_y $med_r] → ;
219 # Reorder the medials so that U+103E SIGN MEDIAL HA comes before any
221 # First, push U+103E MEDIAL HA before U+103D MEDIAL WA.
222 \u103D \u103E → \u103E \u103D;
224 # Now MEDIAL WA comes last.
225 # Produce the palatal ʃ from (SA|LA)+YA+HA.
228 # Second, push U+103E MEDIAL HA before U+103C MEDIAL RA.
229 \u103C \u103E → \u103E \u103C;
231 # Finally, push U+103E MEDIAL HA before U+103B MEDIAL YA.
232 \u103B \u103E → \u103E \u103B;
234 # Consume MEDIAL HA and apply devoicing.
246 # Drop any remaining U+103E MEDIAL HA.
248 # Simplify medial cluster /jw/ to /w/, i.e. drop U+103B MEDIAL YA and
249 # U+103C MEDIAL RA before U+103D MEDIAL WA. # TODO: revisit this
277 # Historic dentals ==> alveolars
291 ရ → j; # historic /r/
292 လ\u103A → ; # final, typically not pronounced in native words
295 သ → θ; # historic /s/ ==> modern dental
300 ဣ\u1037 → ʔḭ; # redundant creaky tone; this does not usually occur
301 ဣး → ʔí; # this does not usually occur
303 ဤ\u1037 → ʔḭ; # this does not usually occur
304 ဤး → ʔí; # this does not usually occur
306 ဥ\u1037 → ʔṵ; # redundant creaky tone; this does not usually occur
307 ဥး → ʔú; # this does not usually occur
309 ဦ\u1037 → ʔṵ; # this does not usually occur
312 ဧ\u1037 → ʔḛ; # this does not usually occur
315 ဩ\u1037 → ʔɔ\u0330; # this does not usually occur
316 ဩး → ʔɔ\u0301; # redundant high tone; this does not usually occur
318 ဪ\u1037 → ʔɔ\u0330; # this does not usually occur
319 ဪး → ʔɔ\u0301; # this does not usually occur
322 ၌ → n\u0325aɪ\u032Fʔ;
324 # ၎င\u103Aး was handled earlier.
329 # Delete any remaining U+103A ASAT.
331 # Delete zero-width space, non-joiner, joiner.