]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/si_si_FONIPA.txt
ICU-66108.tar.gz
[apple/icu.git] / icuSources / data / translit / si_si_FONIPA.txt
1 # © 2016 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html#License
3 #
4 # File: si_si_FONIPA.txt
5 # Generated from CLDR
6 #
7
8 # Sinhala pronunciation rules
9 #
10 # Output
11 # k ɡ ŋ ᵑɡ c ɟ ɲ ʈ ɖ ⁿɖ t d n ⁿd p b m ᵐb j r l w ʃ s h f
12 # ə əː a aː æ æː i iː u uː e eː o oː
13 #
14 # References
15 # [1] Asanka Wasala, Ruvan Weerasinghe, and Kumudu Gamage:
16 # Sinhala Grapheme-to-Phoneme Conversion and Rules for Schwa Epenthesis.
17 # Proceedings of the COLING/ACL 2006 Main Conference Poster Sessions,
18 # pages 890–897. http://www.aclweb.org/anthology/P06-2114
19 # Simplify ya + yansaya to plain ya after a consonant.
20 [\u0D9A-\u0DC6] \u0DCA (\u200D)? { ය\u0DCA‍ය → ය;
21 # Delete ZWNJ and ZWJ to simplify further processing.
22 \u200C → ;
23 \u200D → ;
24 # Insert a schwa after every consonant that is not followed by a dependent vowel
25 # or virama.
26 ::Null;
27 ([\u0D9A-\u0DC6]) } [^\u0DCA-\u0DDF \u0DF2\u0DF3] → $1 ə;
28 # Pronunciation rules proper.
29 ::Null;
30 # fප is an alternative spelling of ෆ.
31 # This occurs e.g. in ඩේව\u0DD2ඩ\u0DCA කොපර\u0DCAfප\u0DD3ල\u0DCAඩ\u0DCA (David Copperfield)
32 # [see http://bradshawofthefuture.blogspot.com/2013/02/f.html].
33 [Ff]ප → f;
34 # zස is seemingly the only way to unambiguously indicate a voiced /z/ sound.
35 # This occurs in e.g. ඇල\u0DCAzසය\u0DD2ම' රෝගය (Alzheimer's disease)
36 # [see https://si.wikipedia.org/wiki/ඇල\u0DCAzසය\u0DD2ම%27_රෝගය]
37 # or in zස\u0DD3බ\u0DCA‍රා (zebra) [see https://si.wikipedia.org/wiki/‍zස\u0DD3බ\u0DCA‍රා].
38 [Zz]ස → z;
39 ං → ŋ;
40 o → ŋ; # common substitution for anusvaraya
41 ඃ ([\u0D9A-\u0DC6]) → | $1 \u0DCA $1; # TODO: check which consonants geminate
42 ඃ → h;
43 අ → a;
44 ආ → aː;
45 ඇ → æ;
46 ඈ → æː;
47 ඉ → i;
48 ඊ → iː;
49 උ → u;
50 ඌ → uː;
51 ඍ → ri;
52 ඎ → ruː;
53 ඏ → ilu;
54 ඐ → iluː;
55 එ → e;
56 ඒ → eː;
57 ඓ → aj;
58 ඔ → o;
59 ඕ → oː;
60 ඖ → aw; # TODO: check if this is correct
61 ක → k;
62 ඛ → k;
63 ග → ɡ;
64 ඝ → ɡ;
65 ඞ → ŋ;
66 ඟ → ᵑɡ;
67 ච → c;
68 ඡ → c;
69 ජ → ɟ;
70 ඣ → ɟ;
71 ඤ → ɲ;
72 ඥ → kɲ; # TODO: double-check
73 ඦ → ɟ;
74 ට → ʈ;
75 ඨ → ʈ;
76 ඩ → ɖ;
77 ඪ → ɖ;
78 ණ → n;
79 ඬ → ⁿɖ;
80 ත → t;
81 ථ → t;
82 ද → d;
83 ධ → d;
84 න → n;
85 ඳ → ⁿd;
86 ප → p;
87 ඵ → p;
88 බ → b;
89 භ → b;
90 ම → m;
91 ඹ → ᵐb;
92 ය → j;
93 ර → r;
94 ල → l;
95 ව → w;
96 ශ → ʃ;
97 ෂ → ʃ;
98 ස → s;
99 හ → h;
100 ළ → l;
101 ෆ → f;
102 \u0DCA → ; # delete virama
103 ා → aː;
104 ැ → æ;
105 ෑ → æː;
106 \u0DD2 → i;
107 \u0DD3 → iː;
108 \u0DD4 → u;
109 \u0DD6 → uː;
110 ෘ → ru;
111 ෙ → e;
112 ේ → eː;
113 ෛ → aj;
114 ො → o;
115 ෝ → oː;
116 ෞ → aw; # TODO: check if this is correct
117 ෟ → lu;
118 ෲ → ruː;
119 ෳ → luː;
120 # Heuristics for turning /ə/ into /a/. Based on [1].
121 $c=[k ɡ ŋ {ᵑɡ} c ɟ ɲ ʈ ɖ {ⁿɖ} t d n {ⁿd} p b m {ᵐb} j r l w ʃ s z h f];
122 $s=[:^L:];
123 # Rule #1
124 ::Null;
125 $s sv { ə → ə; # exception (a)
126 $s k { ə } r → ə; # exception (b)
127 $s $c { ə } $s → ə; # exception (c)
128 $s $c $c { ə → a;
129 $s $c { ə → a;
130 # Rule #2
131 ::Null;
132 $c r { ə } $c → a; # clause (a) and (b)
133 $c r { a } h → a; # clause (d), exception
134 $c r { a } $c → ə; # clause (c)
135 # Rule #3
136 # The paper is unclear about what this rule means. The interpretation here
137 # assumes that "preceded" in the paper is a typo and should be read "followed".
138 ::Null;
139 [a e æ o ə] h { ə → a;
140 # Rules #4 through #7
141 ::Null;
142 ə } $c $c → a; # Rule #4
143 ə } [rbɖʈ] $s → ə; # Rule #5 exception
144 ə } $c $s → a; # Rule #5
145 ə } ji $s → a; # Rule #6
146 k { ə } [rl] u → a; # Rule #7
147 # Rule #8
148 # Note that the paper doesn't say explicitly that this rule should be
149 # anchored at the beginning of a word, but the remarks before the rules
150 # seem to imply this.
151 ::Null;
152 $s k { a } l[aeo]ːj → ə; # Typo in paper: /j/ was /y/.
153 $s k { a } le[mh][ui] → ə;
154 $s k { alə } h[ui] → əle;
155 $s k { a } lə → ə;
156 # Diphthongs
157 ::Null;
158 www+ → ww; # යෞව\u0DCAවන
159 [i {iː} e {eː} æ {æː} o {oː} a {aː}] { wu → w;
160 əji → aj;
161 iji → iː; # perhaps: ij
162 [u {uː} e {eː} æ {æː} o {oː} a {aː}] { ji → j;
163