]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | # © 2016 and later: Unicode, Inc. and others. |
2 | # License & terms of use: http://www.unicode.org/copyright.html#License | |
3 | # | |
2ca993e8 | 4 | # File: my_my_FONIPA.txt |
f3c0d7a5 | 5 | # Generated from CLDR |
2ca993e8 A |
6 | # |
7 | ||
8 | # Pronunciation rules for Burmese. | |
9 | # | |
10 | # The following rules are lexical and heuristic: lexical in the sense | |
11 | # that they generate phoneme strings which may further undergo | |
12 | # post-lexical phonological processes, in particular voicing, to | |
13 | # result in actual surface forms; heuristic in the sense that they try | |
14 | # to resolve ambiguities, especially around reduced vowels, in a | |
15 | # systematic way that may be incorrect in many situations. Vowel | |
16 | # reduction depends on many factors, such as morphemic structure, | |
17 | # which are not available here. | |
18 | # | |
19 | # Definitions | |
20 | # | |
21 | # Dependent vowel signs | |
22 | $vs_AA = \u102B; | |
23 | $vs_aa = \u102C; | |
24 | $vs_i = \u102D; | |
25 | $vs_ii = \u102E; | |
26 | $vs_u = \u102F; | |
27 | $vs_uu = \u1030; | |
28 | $vs_e = \u1031; | |
29 | $vs_ai = \u1032; | |
30 | # Various signs | |
31 | $anusvara = \u1036; | |
32 | $visarga = \u1038; | |
33 | $virama = \u1039; | |
34 | $asat = \u103A; | |
35 | # Dependent (medial) consonant signs | |
36 | $med_y = \u103B; | |
37 | $med_r = \u103C; | |
38 | $med_w = \u103D; | |
39 | $med_h = \u103E; | |
40 | # Independent letters and letter-like punctuation symbols | |
41 | $independent = [\u1000-\u102A \u103F \u104C-\u104F \u1050-\u1055]; | |
42 | $creaky = \u0330; | |
43 | $high = \u0301; | |
44 | $low = \u0300; | |
45 | $coda = [$creaky $high $low ɴ ʔ ə]; # TODO: remove if unused | |
46 | # | |
47 | # Preprocessing | |
48 | # | |
49 | ::NFC; | |
50 | # Replace U+102B TALL AA with U+102C AA. Their pronunciation is identical. | |
51 | $vs_AA → $vs_aa; | |
52 | # Unstack kinzi (င\u103A plus U+1039 VIRAMA) into plain င\u103A. | |
53 | # Hmm, what would happen if the syllable ending in kinzi had non-low tone? | |
54 | င\u103A $virama → င\u103A; | |
55 | # Unstack everything else, i.e. replace U+1039 VIRAMA with U+103A ASAT. | |
56 | $virama → $asat; | |
57 | # Unstack U+103F GREAT SA. | |
58 | ဿ → သ\u103Aသ; | |
59 | # Insert a syllable boundary marker /./ before every independent letter. | |
60 | ::Null; | |
61 | [^.$] { } $independent ([\u1037\u103B-\u103E])* [^\u103A] → \.; | |
62 | # Insert default inherent vowel: /a\u0330/ at the end, /ə/ everywhere else. | |
63 | ::Null; | |
64 | ([\u1000-\u1021\u103F] [\u103B-\u103E]*) } [$] → $1 a $creaky; | |
65 | ([\u1000-\u1021\u103F] [\u103B-\u103E]*) } \. → $1 ə; | |
66 | # Allow for additional coda consonants. | |
67 | # | |
68 | # This only covers a few of the cases in which full coda consonants | |
69 | # can appear in loanwords. The general situation is somewhat rare and | |
70 | # is more easily dealt with in a formalism that can impose structural | |
71 | # constraints on syllables more easily. | |
72 | ::Null; | |
73 | $asat ($visarga)? [\u1000-\u102A] { $asat → ; | |
74 | # Deal with ၎င\u103Aး early. | |
75 | ၎င\u103Aး → lə\.ɡa $high ʊ\u032Fɴ; | |
76 | # | |
77 | # Rhymes | |
78 | # | |
79 | ::Null; | |
80 | က\u103A → ɛʔ; | |
81 | ဂ\u103A → ɛʔ; # in မဂ\u1039ဂဇင\u103Aး ~ မဂ\u103Aဂဇင\u103Aး /mɛʔ.ɡə.zɪ\u0301ɴ/ | |
82 | င\u1037\u103A → ɪ $creaky ɴ; | |
83 | င\u103Aး → ɪ $high ɴ; | |
84 | င\u103A → ɪ $low ɴ; | |
85 | စ\u103A → ɪʔ; # maybe sometimes /eɪ\u032Fʔ/ | |
86 | ဉ\u1037\u103A → ɪ $creaky ɴ; | |
87 | ဉ\u103Aး → ɪ $high ɴ; | |
88 | ဉ\u103A → ɪ $low ɴ; | |
89 | ည\u1037\u103A → ɛ $creaky; | |
90 | ည\u103Aး → ɛ $high; | |
91 | ည\u103A → ɛ $low; | |
92 | ဏ\u1037\u103A → a $creaky ɴ; | |
93 | ဏ\u103Aး → a $high ɴ; | |
94 | ဏ\u103A → a $low ɴ; | |
95 | တ\u103A → aʔ; | |
96 | န\u1037\u103A → a $creaky ɴ; | |
97 | န\u103Aး → a $high ɴ; | |
98 | န\u103A → a $low ɴ; | |
99 | ပ\u103A → aʔ; | |
100 | မ\u1037\u103A → a $creaky ɴ; | |
101 | မ\u103Aး → a $high ɴ; | |
102 | မ\u103A → a $low ɴ; | |
103 | ယ\u1037\u103A → ɛ $creaky; | |
104 | ယ\u103Aး → ɛ $high; | |
105 | ယ\u103A → ɛ $low; | |
106 | သ\u103A → aʔ; | |
107 | $vs_aa ဉ\u1037\u103A → ɪ $creaky ɴ; | |
108 | $vs_aa ဉ\u103Aး → ɪ $high ɴ; | |
109 | $vs_aa ဉ\u103A → ɪ $low ɴ; | |
110 | $vs_aa တ\u103A → aʔ; | |
111 | $vs_aa ဏ\u1037\u103A → a $creaky ɴ; | |
112 | $vs_aa ဏ\u103Aး → a $high ɴ; | |
113 | $vs_aa ဏ\u103A → a $low ɴ; | |
114 | $vs_aa န\u1037\u103A → a $creaky ɴ; | |
115 | $vs_aa န\u103Aး → a $high ɴ; | |
116 | $vs_aa န\u103A → a $low ɴ; | |
117 | $vs_aa ပ\u103A → aʔ; # in ကလာပ\u103Aစည\u103Aး /kə.laʔ.sɛ\u0301/ (club cell) | |
118 | $vs_aa ယ\u1037\u103A → ɛ $creaky; | |
119 | $vs_aa ယ\u103Aး → ɛ $high; | |
120 | $vs_aa ယ\u103A → ɛ $low; | |
121 | $vs_aa \u1037 → a $creaky; # redundant creaky tone | |
122 | $vs_aa း → a $high; | |
123 | $vs_aa → a $low; | |
124 | $vs_i က\u103A → eɪ\u032Fʔ; | |
125 | $vs_i စ\u103A → eɪ\u032Fʔ; | |
126 | $vs_i တ\u103A → eɪ\u032Fʔ; | |
127 | $vs_i န\u1037\u103A → e $creaky ɪ\u032Fɴ; | |
128 | $vs_i န\u103Aး → e $high ɪ\u032Fɴ; | |
129 | $vs_i န\u103A → e $low ɪ\u032Fɴ; | |
130 | $vs_i ပ\u103A → eɪ\u032Fʔ; | |
131 | $vs_i မ\u1037\u103A → e $creaky ɪ\u032Fɴ; | |
132 | $vs_i မ\u103Aး → e $high ɪ\u032Fɴ; | |
133 | $vs_i မ\u103A → e $low ɪ\u032Fɴ; | |
134 | $vs_i $vs_u က\u103A → aɪ\u032Fʔ; | |
135 | $vs_i $vs_u င\u1037\u103A → a $creaky ɪ\u032Fɴ; | |
136 | $vs_i $vs_u င\u103Aး → a $high ɪ\u032Fɴ; | |
137 | $vs_i $vs_u င\u103A → a $low ɪ\u032Fɴ; | |
138 | $vs_i $vs_u ဏ\u1037\u103A → a $creaky ɪ\u032Fɴ; | |
139 | $vs_i $vs_u ဏ\u103Aး → a $high ɪ\u032Fɴ; | |
140 | $vs_i $vs_u ဏ\u103A → a $low ɪ\u032Fɴ; | |
141 | $vs_i $vs_u ယ\u1037\u103A → o $creaky; | |
142 | $vs_i $vs_u ယ\u103Aး → o $high; | |
143 | $vs_i $vs_u ယ\u103A → o $low; # in က\u102D\u102Fယ\u103A /kò/ | |
144 | $vs_i $vs_u \u1037 → o $creaky; | |
145 | $vs_i $vs_u း → o $high; | |
146 | $vs_i $vs_u → o $low; | |
147 | $vs_i $anusvara \u1037 → e $creaky ɪ\u032Fɴ; | |
148 | $vs_i $anusvara း → e $high ɪ\u032Fɴ; | |
149 | $vs_i $anusvara → e $low ɪ\u032Fɴ; | |
150 | $vs_i → i $creaky; | |
151 | $vs_ii \u1037 → i $creaky; # this does not usually occur | |
152 | $vs_ii း → i $high; | |
153 | $vs_ii → i $low; | |
154 | $vs_u က\u103A → oʊ\u032Fʔ; | |
155 | $vs_u ဂ\u103A → oʊ\u032Fʔ; | |
156 | $vs_u ဏ\u1037\u103A → o $creaky ʊ\u032Fɴ; | |
157 | $vs_u ဏ\u103Aး → o $high ʊ\u032Fɴ; | |
158 | $vs_u ဏ\u103A → o $low ʊ\u032Fɴ; | |
159 | $vs_u တ\u103A → oʊ\u032Fʔ; | |
160 | $vs_u န\u1037\u103A → o $creaky ʊ\u032Fɴ; | |
161 | $vs_u န\u103Aး → o $high ʊ\u032Fɴ; | |
162 | $vs_u န\u103A → o $low ʊ\u032Fɴ; | |
163 | $vs_u ပ\u103A → oʊ\u032Fʔ; | |
164 | $vs_u မ\u1037\u103A → o $creaky ʊ\u032Fɴ; | |
165 | $vs_u မ\u103Aး → o $high ʊ\u032Fɴ; | |
166 | $vs_u မ\u103A → o $low ʊ\u032Fɴ; | |
167 | $vs_u $anusvara \u1037 → o $creaky ʊ\u032Fɴ; | |
168 | $vs_u $anusvara း → o $high ʊ\u032Fɴ; | |
169 | $vs_u $anusvara → o $low ʊ\u032Fɴ; | |
170 | $vs_u → u $creaky; | |
171 | $vs_uu \u1037 → u $creaky; # this does not usually occur | |
172 | $vs_uu း → u $high; | |
173 | $vs_uu → u $low; | |
174 | $vs_e တ\u103A → ɪʔ; | |
175 | $vs_e $vs_aa က\u103A → aʊ\u032Fʔ; | |
176 | $vs_e $vs_aa င\u1037\u103A → a $creaky ʊ\u032Fɴ; | |
177 | $vs_e $vs_aa င\u103Aး → a $high ʊ\u032Fɴ; | |
178 | $vs_e $vs_aa င\u103A → a $low ʊ\u032Fɴ; | |
179 | $vs_e $vs_aa \u1037 → ɔ $creaky; | |
180 | $vs_e $vs_aa း → ɔ $high; # redundant high tone; this does not usually occur | |
181 | $vs_e $vs_aa \u103A → ɔ $low; | |
182 | $vs_e $vs_aa → ɔ $high; | |
183 | $vs_e \u1037 → e $creaky; | |
184 | $vs_e း → e $high; | |
185 | $vs_e → e $low; | |
186 | $vs_ai \u1037 → ɛ $creaky; | |
187 | $vs_ai း → ɛ $high; # redundant high tone; this does not usually occur | |
188 | $vs_ai → ɛ $high; | |
189 | $anusvara \u1037 → a $creaky ɴ; | |
190 | $anusvara း → a $high ɴ; | |
191 | $anusvara → a $low ɴ; | |
192 | $med_w တ\u103A → ʊʔ; | |
193 | $med_w န\u1037\u103A → ʊ $creaky ɴ; | |
194 | $med_w န\u103Aး → ʊ $high ɴ; | |
195 | $med_w န\u103A → ʊ $low ɴ; | |
196 | $med_w ပ\u103A → ʊʔ; | |
197 | $med_w မ\u1037\u103A → ʊ $creaky ɴ; | |
198 | $med_w မ\u103Aး → ʊ $high ɴ; | |
199 | $med_w မ\u103A → ʊ $low ɴ; | |
200 | # | |
201 | # Medials | |
202 | # | |
203 | ::Null; | |
204 | # Palatalization of the velar stops before MEDIAL YA and MEDIAL RA: | |
205 | # velar + /j/ ==> modern palatals. | |
206 | ကျ → t\u0361ɕ; | |
207 | ချ → t\u0361ɕʰ; | |
208 | ဂျ → d\u0361ʑ; | |
209 | ဃျ → d\u0361ʑ; | |
210 | ကြ → t\u0361ɕ; | |
211 | ခြ → t\u0361ɕʰ; | |
212 | ဂြ → d\u0361ʑ; | |
213 | ဃြ → d\u0361ʑ; | |
214 | # Remove redundant MEDIAL YA and MEDIAL RA after initial YA. | |
215 | ယ { [$med_y $med_r] → ; | |
216 | # Reorder the medials so that U+103E SIGN MEDIAL HA comes before any | |
217 | # other medials. | |
218 | # First, push U+103E MEDIAL HA before U+103D MEDIAL WA. | |
219 | \u103D \u103E → \u103E \u103D; | |
220 | ::Null; | |
221 | # Now MEDIAL WA comes last. | |
222 | # Produce the palatal ʃ from (SA|LA)+YA+HA. | |
223 | သျ\u103E → ʃ; | |
224 | လျ\u103E → ʃ; | |
225 | # Second, push U+103E MEDIAL HA before U+103C MEDIAL RA. | |
226 | \u103C \u103E → \u103E \u103C; | |
227 | ::Null; | |
228 | # Finally, push U+103E MEDIAL HA before U+103B MEDIAL YA. | |
229 | \u103B \u103E → \u103E \u103B; | |
230 | ::Null; | |
231 | # Consume MEDIAL HA and apply devoicing. | |
232 | င\u103E → ŋ\u030A; | |
233 | ဉ\u103E → ɲ\u0325; | |
234 | ည\u103E → ɲ\u0325; | |
235 | ဏ\u103E → n\u0325; | |
236 | န\u103E → n\u0325; | |
237 | မ\u103E → m\u0325; | |
238 | ယ\u103E → ʃ; | |
239 | ရ\u103E → ʃ; | |
240 | လ\u103E → l\u0325; | |
241 | ဝ\u103E → w\u0325; | |
242 | ဠ\u103E → l\u0325; | |
243 | # Drop any remaining U+103E MEDIAL HA. | |
244 | \u103E → ; | |
245 | # Simplify medial cluster /jw/ to /w/, i.e. drop U+103B MEDIAL YA and | |
246 | # U+103C MEDIAL RA before U+103D MEDIAL WA. # TODO: revisit this | |
247 | \u103B } \u103D → ; | |
248 | \u103C } \u103D → ; | |
249 | \u103B → j; | |
250 | \u103C → j; | |
251 | \u103D → w; | |
252 | # | |
253 | # Initials | |
254 | # | |
255 | # Velars | |
256 | က → k; | |
257 | ခ → kʰ; | |
258 | ဂ → ɡ; | |
259 | ဃ → ɡ; | |
260 | င → ŋ; | |
261 | # Historic palatals | |
262 | စ → s; | |
263 | ဆ → sʰ; | |
264 | ဇ → z; | |
265 | ဈ → z; | |
266 | ဉ → ɲ; | |
267 | ည → ɲ; | |
268 | # Alveolars | |
269 | ဋ → t; | |
270 | ဌ → tʰ; | |
271 | ဍ → d; | |
272 | ဎ → d; | |
273 | ဏ → n; | |
274 | # Historic dentals ==> alveolars | |
275 | တ → t; | |
276 | ထ → tʰ; | |
277 | ဒ → d; | |
278 | ဓ → d; | |
279 | န → n; | |
280 | # Labials | |
281 | ပ → p; | |
282 | ဖ → pʰ; | |
283 | ဗ → b; | |
284 | ဘ → b; | |
285 | မ → m; | |
286 | # Other letters | |
287 | ယ → j; | |
288 | ရ → j; # historic /r/ | |
289 | လ\u103A → ; # final, typically not pronounced in native words | |
290 | လ → l; | |
291 | ဝ → w; | |
292 | သ → θ; # historic /s/ ==> modern dental | |
293 | ဟ → h; | |
294 | ဠ → l; | |
295 | အ → ʔ; | |
296 | # Independent vowels | |
297 | ဣ\u1037 → ʔḭ; # redundant creaky tone; this does not usually occur | |
298 | ဣး → ʔí; # this does not usually occur | |
299 | ဣ → ʔḭ; | |
300 | ဤ\u1037 → ʔḭ; # this does not usually occur | |
301 | ဤး → ʔí; # this does not usually occur | |
302 | ဤ → ʔì; | |
303 | ဥ\u1037 → ʔṵ; # redundant creaky tone; this does not usually occur | |
304 | ဥး → ʔú; # this does not usually occur | |
305 | ဥ → ʔṵ; | |
306 | ဦ\u1037 → ʔṵ; # this does not usually occur | |
307 | ဦး → ʔú; | |
308 | ဦ → ʔù; | |
309 | ဧ\u1037 → ʔḛ; # this does not usually occur | |
310 | ဧး → ʔé; | |
311 | ဧ → ʔè; | |
312 | ဩ\u1037 → ʔɔ\u0330; # this does not usually occur | |
313 | ဩး → ʔɔ\u0301; # redundant high tone; this does not usually occur | |
314 | ဩ → ʔɔ\u0301; | |
315 | ဪ\u1037 → ʔɔ\u0330; # this does not usually occur | |
316 | ဪး → ʔɔ\u0301; # this does not usually occur | |
317 | ဪ → ʔɔ\u0300; | |
318 | # Various signs | |
319 | ၌ → n\u0325aɪ\u032Fʔ; | |
320 | ၍ → jwḛ; | |
321 | # ၎င\u103Aး was handled earlier. | |
322 | ၏ → ʔḭ; | |
323 | # | |
324 | # Postprocessing | |
325 | # | |
326 | # Delete any remaining U+103A ASAT. | |
327 | $asat → ; | |
328 | # Delete zero-width space, non-joiner, joiner. | |
329 | [\u200B-\u200D] → ; | |
330 | ::NFC; | |
331 |