]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | # © 2016 and later: Unicode, Inc. and others. |
2 | # License & terms of use: http://www.unicode.org/copyright.html#License | |
3 | # | |
2ca993e8 | 4 | # File: si_si_FONIPA.txt |
f3c0d7a5 | 5 | # Generated from CLDR |
2ca993e8 A |
6 | # |
7 | ||
8 | # Sinhala pronunciation rules | |
9 | # | |
10 | # Output | |
11 | # k ɡ ŋ ᵑɡ c ɟ ɲ ʈ ɖ ⁿɖ t d n ⁿd p b m ᵐb j r l w ʃ s h f | |
12 | # ə əː a aː æ æː i iː u uː e eː o oː | |
13 | # | |
14 | # References | |
15 | # [1] Asanka Wasala, Ruvan Weerasinghe, and Kumudu Gamage: | |
16 | # Sinhala Grapheme-to-Phoneme Conversion and Rules for Schwa Epenthesis. | |
17 | # Proceedings of the COLING/ACL 2006 Main Conference Poster Sessions, | |
18 | # pages 890–897. http://www.aclweb.org/anthology/P06-2114 | |
19 | # Simplify ya + yansaya to plain ya after a consonant. | |
20 | [\u0D9A-\u0DC6] \u0DCA (\u200D)? { ය\u0DCAය → ය; | |
21 | # Delete ZWNJ and ZWJ to simplify further processing. | |
22 | \u200C → ; | |
23 | \u200D → ; | |
24 | # Insert a schwa after every consonant that is not followed by a dependent vowel | |
25 | # or virama. | |
26 | ::Null; | |
27 | ([\u0D9A-\u0DC6]) } [^\u0DCA-\u0DDF \u0DF2\u0DF3] → $1 ə; | |
28 | # Pronunciation rules proper. | |
29 | ::Null; | |
30 | # fප is an alternative spelling of ෆ. | |
31 | # This occurs e.g. in ඩේව\u0DD2ඩ\u0DCA කොපර\u0DCAfප\u0DD3ල\u0DCAඩ\u0DCA (David Copperfield) | |
32 | # [see http://bradshawofthefuture.blogspot.com/2013/02/f.html]. | |
33 | [Ff]ප → f; | |
34 | # zස is seemingly the only way to unambiguously indicate a voiced /z/ sound. | |
35 | # This occurs in e.g. ඇල\u0DCAzසය\u0DD2ම' රෝගය (Alzheimer's disease) | |
36 | # [see https://si.wikipedia.org/wiki/ඇල\u0DCAzසය\u0DD2ම%27_රෝගය] | |
37 | # or in zස\u0DD3බ\u0DCAරා (zebra) [see https://si.wikipedia.org/wiki/zස\u0DD3බ\u0DCAරා]. | |
38 | [Zz]ස → z; | |
39 | ං → ŋ; | |
40 | o → ŋ; # common substitution for anusvaraya | |
41 | ඃ ([\u0D9A-\u0DC6]) → | $1 \u0DCA $1; # TODO: check which consonants geminate | |
42 | ඃ → h; | |
43 | අ → a; | |
44 | ආ → aː; | |
45 | ඇ → æ; | |
46 | ඈ → æː; | |
47 | ඉ → i; | |
48 | ඊ → iː; | |
49 | උ → u; | |
50 | ඌ → uː; | |
51 | ඍ → ri; | |
52 | ඎ → ruː; | |
53 | ඏ → ilu; | |
54 | ඐ → iluː; | |
55 | එ → e; | |
56 | ඒ → eː; | |
57 | ඓ → aj; | |
58 | ඔ → o; | |
59 | ඕ → oː; | |
60 | ඖ → aw; # TODO: check if this is correct | |
61 | ක → k; | |
62 | ඛ → k; | |
63 | ග → ɡ; | |
64 | ඝ → ɡ; | |
65 | ඞ → ŋ; | |
66 | ඟ → ᵑɡ; | |
67 | ච → c; | |
68 | ඡ → c; | |
69 | ජ → ɟ; | |
70 | ඣ → ɟ; | |
71 | ඤ → ɲ; | |
72 | ඥ → kɲ; # TODO: double-check | |
73 | ඦ → ɟ; | |
74 | ට → ʈ; | |
75 | ඨ → ʈ; | |
76 | ඩ → ɖ; | |
77 | ඪ → ɖ; | |
78 | ණ → n; | |
79 | ඬ → ⁿɖ; | |
80 | ත → t; | |
81 | ථ → t; | |
82 | ද → d; | |
83 | ධ → d; | |
84 | න → n; | |
85 | ඳ → ⁿd; | |
86 | ප → p; | |
87 | ඵ → p; | |
88 | බ → b; | |
89 | භ → b; | |
90 | ම → m; | |
91 | ඹ → ᵐb; | |
92 | ය → j; | |
93 | ර → r; | |
94 | ල → l; | |
95 | ව → w; | |
96 | ශ → ʃ; | |
97 | ෂ → ʃ; | |
98 | ස → s; | |
99 | හ → h; | |
100 | ළ → l; | |
101 | ෆ → f; | |
102 | \u0DCA → ; # delete virama | |
103 | ා → aː; | |
104 | ැ → æ; | |
105 | ෑ → æː; | |
106 | \u0DD2 → i; | |
107 | \u0DD3 → iː; | |
108 | \u0DD4 → u; | |
109 | \u0DD6 → uː; | |
110 | ෘ → ru; | |
111 | ෙ → e; | |
112 | ේ → eː; | |
113 | ෛ → aj; | |
114 | ො → o; | |
115 | ෝ → oː; | |
116 | ෞ → aw; # TODO: check if this is correct | |
117 | ෟ → lu; | |
118 | ෲ → ruː; | |
119 | ෳ → luː; | |
120 | # Heuristics for turning /ə/ into /a/. Based on [1]. | |
121 | $c=[k ɡ ŋ {ᵑɡ} c ɟ ɲ ʈ ɖ {ⁿɖ} t d n {ⁿd} p b m {ᵐb} j r l w ʃ s z h f]; | |
122 | $s=[:^L:]; | |
123 | # Rule #1 | |
124 | ::Null; | |
125 | $s sv { ə → ə; # exception (a) | |
126 | $s k { ə } r → ə; # exception (b) | |
127 | $s $c { ə } $s → ə; # exception (c) | |
128 | $s $c $c { ə → a; | |
129 | $s $c { ə → a; | |
130 | # Rule #2 | |
131 | ::Null; | |
132 | $c r { ə } $c → a; # clause (a) and (b) | |
133 | $c r { a } h → a; # clause (d), exception | |
134 | $c r { a } $c → ə; # clause (c) | |
135 | # Rule #3 | |
136 | # The paper is unclear about what this rule means. The interpretation here | |
137 | # assumes that "preceded" in the paper is a typo and should be read "followed". | |
138 | ::Null; | |
139 | [a e æ o ə] h { ə → a; | |
140 | # Rules #4 through #7 | |
141 | ::Null; | |
142 | ə } $c $c → a; # Rule #4 | |
143 | ə } [rbɖʈ] $s → ə; # Rule #5 exception | |
144 | ə } $c $s → a; # Rule #5 | |
145 | ə } ji $s → a; # Rule #6 | |
146 | k { ə } [rl] u → a; # Rule #7 | |
147 | # Rule #8 | |
148 | # Note that the paper doesn't say explicitly that this rule should be | |
149 | # anchored at the beginning of a word, but the remarks before the rules | |
150 | # seem to imply this. | |
151 | ::Null; | |
152 | $s k { a } l[aeo]ːj → ə; # Typo in paper: /j/ was /y/. | |
153 | $s k { a } le[mh][ui] → ə; | |
154 | $s k { alə } h[ui] → əle; | |
155 | $s k { a } lə → ə; | |
156 | # Diphthongs | |
157 | ::Null; | |
158 | www+ → ww; # යෞව\u0DCAවන | |
159 | [i {iː} e {eː} æ {æː} o {oː} a {aː}] { wu → w; | |
160 | əji → aj; | |
161 | iji → iː; # perhaps: ij | |
162 | [u {uː} e {eː} æ {æː} o {oː} a {aː}] { ji → j; | |
163 |