]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/my_my_FONIPA.txt
ICU-62107.0.1.tar.gz
[apple/icu.git] / icuSources / data / translit / my_my_FONIPA.txt
1 # © 2016 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html#License
3 #
4 # File: my_my_FONIPA.txt
5 # Generated from CLDR
6 #
7
8 # Pronunciation rules for Burmese.
9 #
10 # The following rules are lexical and heuristic: lexical in the sense
11 # that they generate phoneme strings which may further undergo
12 # post-lexical phonological processes, in particular voicing, to
13 # result in actual surface forms; heuristic in the sense that they try
14 # to resolve ambiguities, especially around reduced vowels, in a
15 # systematic way that may be incorrect in many situations. Vowel
16 # reduction depends on many factors, such as morphemic structure,
17 # which are not available here.
18 #
19 # Definitions
20 #
21 # Dependent vowel signs
22 $vs_AA = \u102B;
23 $vs_aa = \u102C;
24 $vs_i = \u102D;
25 $vs_ii = \u102E;
26 $vs_u = \u102F;
27 $vs_uu = \u1030;
28 $vs_e = \u1031;
29 $vs_ai = \u1032;
30 # Various signs
31 $anusvara = \u1036;
32 $visarga = \u1038;
33 $virama = \u1039;
34 $asat = \u103A;
35 # Dependent (medial) consonant signs
36 $med_y = \u103B;
37 $med_r = \u103C;
38 $med_w = \u103D;
39 $med_h = \u103E;
40 # Independent letters and letter-like punctuation symbols
41 $independent = [\u1000-\u102A \u103F \u104C-\u104F \u1050-\u1055];
42 $creaky = \u0330;
43 $high = \u0301;
44 $low = \u0300;
45 $coda = [$creaky $high $low ɴ ʔ ə]; # TODO: remove if unused
46 #
47 # Preprocessing
48 #
49 ::NFC;
50 # Replace U+102B TALL AA with U+102C AA. Their pronunciation is identical.
51 $vs_AA → $vs_aa;
52 # Unstack kinzi (င\u103A plus U+1039 VIRAMA) into plain င\u103A.
53 # Hmm, what would happen if the syllable ending in kinzi had non-low tone?
54 င\u103A $virama → င\u103A;
55 # Unstack everything else, i.e. replace U+1039 VIRAMA with U+103A ASAT.
56 $virama → $asat;
57 # Unstack U+103F GREAT SA.
58 ဿ → သ\u103Aသ;
59 # Insert a syllable boundary marker /./ before every independent letter.
60 ::Null;
61 [^.$] { } $independent ([\u1037\u103B-\u103E])* [^\u103A] → \.;
62 # Insert default inherent vowel: /a\u0330/ at the end, /ə/ everywhere else.
63 ::Null;
64 ([\u1000-\u1021\u103F] [\u103B-\u103E]*) } [$] → $1 a $creaky;
65 ([\u1000-\u1021\u103F] [\u103B-\u103E]*) } \. → $1 ə;
66 # Allow for additional coda consonants.
67 #
68 # This only covers a few of the cases in which full coda consonants
69 # can appear in loanwords. The general situation is somewhat rare and
70 # is more easily dealt with in a formalism that can impose structural
71 # constraints on syllables more easily.
72 ::Null;
73 $asat ($visarga)? [\u1000-\u102A] { $asat → ;
74 # Deal with ၎င\u103Aး early.
75 ၎င\u103Aး → lə\.ɡa $high ʊ\u032Fɴ;
76 #
77 # Rhymes
78 #
79 ::Null;
80 က\u103A → ɛʔ;
81 ဂ\u103A → ɛʔ; # in မဂ\u1039ဂဇင\u103Aး ~ မဂ\u103Aဂဇင\u103Aး /mɛʔ.ɡə.zɪ\u0301ɴ/
82 င\u1037\u103A → ɪ $creaky ɴ;
83 င\u103Aး → ɪ $high ɴ;
84 င\u103A → ɪ $low ɴ;
85 စ\u103A → ɪʔ; # maybe sometimes /eɪ\u032Fʔ/
86 ဉ\u1037\u103A → ɪ $creaky ɴ;
87 ဉ\u103Aး → ɪ $high ɴ;
88 ဉ\u103A → ɪ $low ɴ;
89 ည\u1037\u103A → ɛ $creaky;
90 ည\u103Aး → ɛ $high;
91 ည\u103A → ɛ $low;
92 ဏ\u1037\u103A → a $creaky ɴ;
93 ဏ\u103Aး → a $high ɴ;
94 ဏ\u103A → a $low ɴ;
95 တ\u103A → aʔ;
96 န\u1037\u103A → a $creaky ɴ;
97 န\u103Aး → a $high ɴ;
98 န\u103A → a $low ɴ;
99 ပ\u103A → aʔ;
100 မ\u1037\u103A → a $creaky ɴ;
101 မ\u103Aး → a $high ɴ;
102 မ\u103A → a $low ɴ;
103 ယ\u1037\u103A → ɛ $creaky;
104 ယ\u103Aး → ɛ $high;
105 ယ\u103A → ɛ $low;
106 သ\u103A → aʔ;
107 $vs_aa ဉ\u1037\u103A → ɪ $creaky ɴ;
108 $vs_aa ဉ\u103Aး → ɪ $high ɴ;
109 $vs_aa ဉ\u103A → ɪ $low ɴ;
110 $vs_aa တ\u103A → aʔ;
111 $vs_aa ဏ\u1037\u103A → a $creaky ɴ;
112 $vs_aa ဏ\u103Aး → a $high ɴ;
113 $vs_aa ဏ\u103A → a $low ɴ;
114 $vs_aa န\u1037\u103A → a $creaky ɴ;
115 $vs_aa န\u103Aး → a $high ɴ;
116 $vs_aa န\u103A → a $low ɴ;
117 $vs_aa ပ\u103A → aʔ; # in ကလာပ\u103Aစည\u103Aး /kə.laʔ.sɛ\u0301/ (club cell)
118 $vs_aa ယ\u1037\u103A → ɛ $creaky;
119 $vs_aa ယ\u103Aး → ɛ $high;
120 $vs_aa ယ\u103A → ɛ $low;
121 $vs_aa \u1037 → a $creaky; # redundant creaky tone
122 $vs_aa း → a $high;
123 $vs_aa → a $low;
124 $vs_i က\u103A → eɪ\u032Fʔ;
125 $vs_i စ\u103A → eɪ\u032Fʔ;
126 $vs_i တ\u103A → eɪ\u032Fʔ;
127 $vs_i န\u1037\u103A → e $creaky ɪ\u032Fɴ;
128 $vs_i န\u103Aး → e $high ɪ\u032Fɴ;
129 $vs_i န\u103A → e $low ɪ\u032Fɴ;
130 $vs_i ပ\u103A → eɪ\u032Fʔ;
131 $vs_i မ\u1037\u103A → e $creaky ɪ\u032Fɴ;
132 $vs_i မ\u103Aး → e $high ɪ\u032Fɴ;
133 $vs_i မ\u103A → e $low ɪ\u032Fɴ;
134 $vs_i $vs_u က\u103A → aɪ\u032Fʔ;
135 $vs_i $vs_u င\u1037\u103A → a $creaky ɪ\u032Fɴ;
136 $vs_i $vs_u င\u103Aး → a $high ɪ\u032Fɴ;
137 $vs_i $vs_u င\u103A → a $low ɪ\u032Fɴ;
138 $vs_i $vs_u ဏ\u1037\u103A → a $creaky ɪ\u032Fɴ;
139 $vs_i $vs_u ဏ\u103Aး → a $high ɪ\u032Fɴ;
140 $vs_i $vs_u ဏ\u103A → a $low ɪ\u032Fɴ;
141 $vs_i $vs_u ယ\u1037\u103A → o $creaky;
142 $vs_i $vs_u ယ\u103Aး → o $high;
143 $vs_i $vs_u ယ\u103A → o $low; # in က\u102D\u102Fယ\u103A /kò/
144 $vs_i $vs_u \u1037 → o $creaky;
145 $vs_i $vs_u း → o $high;
146 $vs_i $vs_u → o $low;
147 $vs_i $anusvara \u1037 → e $creaky ɪ\u032Fɴ;
148 $vs_i $anusvara း → e $high ɪ\u032Fɴ;
149 $vs_i $anusvara → e $low ɪ\u032Fɴ;
150 $vs_i → i $creaky;
151 $vs_ii \u1037 → i $creaky; # this does not usually occur
152 $vs_ii း → i $high;
153 $vs_ii → i $low;
154 $vs_u က\u103A → oʊ\u032Fʔ;
155 $vs_u ဂ\u103A → oʊ\u032Fʔ;
156 $vs_u ဏ\u1037\u103A → o $creaky ʊ\u032Fɴ;
157 $vs_u ဏ\u103Aး → o $high ʊ\u032Fɴ;
158 $vs_u ဏ\u103A → o $low ʊ\u032Fɴ;
159 $vs_u တ\u103A → oʊ\u032Fʔ;
160 $vs_u န\u1037\u103A → o $creaky ʊ\u032Fɴ;
161 $vs_u န\u103Aး → o $high ʊ\u032Fɴ;
162 $vs_u န\u103A → o $low ʊ\u032Fɴ;
163 $vs_u ပ\u103A → oʊ\u032Fʔ;
164 $vs_u မ\u1037\u103A → o $creaky ʊ\u032Fɴ;
165 $vs_u မ\u103Aး → o $high ʊ\u032Fɴ;
166 $vs_u မ\u103A → o $low ʊ\u032Fɴ;
167 $vs_u $anusvara \u1037 → o $creaky ʊ\u032Fɴ;
168 $vs_u $anusvara း → o $high ʊ\u032Fɴ;
169 $vs_u $anusvara → o $low ʊ\u032Fɴ;
170 $vs_u → u $creaky;
171 $vs_uu \u1037 → u $creaky; # this does not usually occur
172 $vs_uu း → u $high;
173 $vs_uu → u $low;
174 $vs_e တ\u103A → ɪʔ;
175 $vs_e $vs_aa က\u103A → aʊ\u032Fʔ;
176 $vs_e $vs_aa င\u1037\u103A → a $creaky ʊ\u032Fɴ;
177 $vs_e $vs_aa င\u103Aး → a $high ʊ\u032Fɴ;
178 $vs_e $vs_aa င\u103A → a $low ʊ\u032Fɴ;
179 $vs_e $vs_aa \u1037 → ɔ $creaky;
180 $vs_e $vs_aa း → ɔ $high; # redundant high tone; this does not usually occur
181 $vs_e $vs_aa \u103A → ɔ $low;
182 $vs_e $vs_aa → ɔ $high;
183 $vs_e \u1037 → e $creaky;
184 $vs_e း → e $high;
185 $vs_e → e $low;
186 $vs_ai \u1037 → ɛ $creaky;
187 $vs_ai း → ɛ $high; # redundant high tone; this does not usually occur
188 $vs_ai → ɛ $high;
189 $anusvara \u1037 → a $creaky ɴ;
190 $anusvara း → a $high ɴ;
191 $anusvara → a $low ɴ;
192 $med_w တ\u103A → ʊʔ;
193 $med_w န\u1037\u103A → ʊ $creaky ɴ;
194 $med_w န\u103Aး → ʊ $high ɴ;
195 $med_w န\u103A → ʊ $low ɴ;
196 $med_w ပ\u103A → ʊʔ;
197 $med_w မ\u1037\u103A → ʊ $creaky ɴ;
198 $med_w မ\u103Aး → ʊ $high ɴ;
199 $med_w မ\u103A → ʊ $low ɴ;
200 #
201 # Medials
202 #
203 ::Null;
204 # Palatalization of the velar stops before MEDIAL YA and MEDIAL RA:
205 # velar + /j/ ==> modern palatals.
206 ကျ → t\u0361ɕ;
207 ချ → t\u0361ɕʰ;
208 ဂျ → d\u0361ʑ;
209 ဃျ → d\u0361ʑ;
210 ကြ → t\u0361ɕ;
211 ခြ → t\u0361ɕʰ;
212 ဂြ → d\u0361ʑ;
213 ဃြ → d\u0361ʑ;
214 # Remove redundant MEDIAL YA and MEDIAL RA after initial YA.
215 ယ { [$med_y $med_r] → ;
216 # Reorder the medials so that U+103E SIGN MEDIAL HA comes before any
217 # other medials.
218 # First, push U+103E MEDIAL HA before U+103D MEDIAL WA.
219 \u103D \u103E → \u103E \u103D;
220 ::Null;
221 # Now MEDIAL WA comes last.
222 # Produce the palatal ʃ from (SA|LA)+YA+HA.
223 သျ\u103E → ʃ;
224 လျ\u103E → ʃ;
225 # Second, push U+103E MEDIAL HA before U+103C MEDIAL RA.
226 \u103C \u103E → \u103E \u103C;
227 ::Null;
228 # Finally, push U+103E MEDIAL HA before U+103B MEDIAL YA.
229 \u103B \u103E → \u103E \u103B;
230 ::Null;
231 # Consume MEDIAL HA and apply devoicing.
232 င\u103E → ŋ\u030A;
233 ဉ\u103E → ɲ\u0325;
234 ည\u103E → ɲ\u0325;
235 ဏ\u103E → n\u0325;
236 န\u103E → n\u0325;
237 မ\u103E → m\u0325;
238 ယ\u103E → ʃ;
239 ရ\u103E → ʃ;
240 လ\u103E → l\u0325;
241 ဝ\u103E → w\u0325;
242 ဠ\u103E → l\u0325;
243 # Drop any remaining U+103E MEDIAL HA.
244 \u103E → ;
245 # Simplify medial cluster /jw/ to /w/, i.e. drop U+103B MEDIAL YA and
246 # U+103C MEDIAL RA before U+103D MEDIAL WA. # TODO: revisit this
247 \u103B } \u103D → ;
248 \u103C } \u103D → ;
249 \u103B → j;
250 \u103C → j;
251 \u103D → w;
252 #
253 # Initials
254 #
255 # Velars
256 က → k;
257 ခ → kʰ;
258 ဂ → ɡ;
259 ဃ → ɡ;
260 င → ŋ;
261 # Historic palatals
262 စ → s;
263 ဆ → sʰ;
264 ဇ → z;
265 ဈ → z;
266 ဉ → ɲ;
267 ည → ɲ;
268 # Alveolars
269 ဋ → t;
270 ဌ → tʰ;
271 ဍ → d;
272 ဎ → d;
273 ဏ → n;
274 # Historic dentals ==> alveolars
275 တ → t;
276 ထ → tʰ;
277 ဒ → d;
278 ဓ → d;
279 န → n;
280 # Labials
281 ပ → p;
282 ဖ → pʰ;
283 ဗ → b;
284 ဘ → b;
285 မ → m;
286 # Other letters
287 ယ → j;
288 ရ → j; # historic /r/
289 လ\u103A → ; # final, typically not pronounced in native words
290 လ → l;
291 ဝ → w;
292 သ → θ; # historic /s/ ==> modern dental
293 ဟ → h;
294 ဠ → l;
295 အ → ʔ;
296 # Independent vowels
297 ဣ\u1037 → ʔḭ; # redundant creaky tone; this does not usually occur
298 ဣး → ʔí; # this does not usually occur
299 ဣ → ʔḭ;
300 ဤ\u1037 → ʔḭ; # this does not usually occur
301 ဤး → ʔí; # this does not usually occur
302 ဤ → ʔì;
303 ဥ\u1037 → ʔṵ; # redundant creaky tone; this does not usually occur
304 ဥး → ʔú; # this does not usually occur
305 ဥ → ʔṵ;
306 ဦ\u1037 → ʔṵ; # this does not usually occur
307 ဦး → ʔú;
308 ဦ → ʔù;
309 ဧ\u1037 → ʔḛ; # this does not usually occur
310 ဧး → ʔé;
311 ဧ → ʔè;
312 ဩ\u1037 → ʔɔ\u0330; # this does not usually occur
313 ဩး → ʔɔ\u0301; # redundant high tone; this does not usually occur
314 ဩ → ʔɔ\u0301;
315 ဪ\u1037 → ʔɔ\u0330; # this does not usually occur
316 ဪး → ʔɔ\u0301; # this does not usually occur
317 ဪ → ʔɔ\u0300;
318 # Various signs
319 ၌ → n\u0325aɪ\u032Fʔ;
320 ၍ → jwḛ;
321 # ၎င\u103Aး was handled earlier.
322 ၏ → ʔḭ;
323 #
324 # Postprocessing
325 #
326 # Delete any remaining U+103A ASAT.
327 $asat → ;
328 # Delete zero-width space, non-joiner, joiner.
329 [\u200B-\u200D] → ;
330 ::NFC;
331