]> git.saurik.com Git - apple/icu.git/blame - icuSources/data/translit/si_si_FONIPA.txt
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / data / translit / si_si_FONIPA.txt
CommitLineData
2ca993e8
A
1# ***************************************************************************
2# *
3# * Copyright (C) 2004-2016, International Business Machines
4# * Corporation; Unicode, Inc.; and others. All Rights Reserved.
5# *
6# ***************************************************************************
7# File: si_si_FONIPA.txt
8# Generated from CLDR
9#
10
11# Sinhala pronunciation rules
12#
13# Output
14# k ɡ ŋ ᵑɡ c ɟ ɲ ʈ ɖ ⁿɖ t d n ⁿd p b m ᵐb j r l w ʃ s h f
15# ə əː a aː æ æː i iː u uː e eː o oː
16#
17# References
18# [1] Asanka Wasala, Ruvan Weerasinghe, and Kumudu Gamage:
19# Sinhala Grapheme-to-Phoneme Conversion and Rules for Schwa Epenthesis.
20# Proceedings of the COLING/ACL 2006 Main Conference Poster Sessions,
21# pages 890–897. http://www.aclweb.org/anthology/P06-2114
22# Simplify ya + yansaya to plain ya after a consonant.
23[\u0D9A-\u0DC6] \u0DCA (\u200D)? { ය\u0DCA‍ය → ය;
24# Delete ZWNJ and ZWJ to simplify further processing.
25\u200C → ;
26\u200D → ;
27# Insert a schwa after every consonant that is not followed by a dependent vowel
28# or virama.
29::Null;
30([\u0D9A-\u0DC6]) } [^\u0DCA-\u0DDF \u0DF2\u0DF3] → $1 ə;
31# Pronunciation rules proper.
32::Null;
33# fප is an alternative spelling of ෆ.
34# This occurs e.g. in ඩේව\u0DD2ඩ\u0DCA කොපර\u0DCAfප\u0DD3ල\u0DCAඩ\u0DCA (David Copperfield)
35# [see http://bradshawofthefuture.blogspot.com/2013/02/f.html].
36[Ff]ප → f;
37# zස is seemingly the only way to unambiguously indicate a voiced /z/ sound.
38# This occurs in e.g. ඇල\u0DCAzසය\u0DD2ම' රෝගය (Alzheimer's disease)
39# [see https://si.wikipedia.org/wiki/ඇල\u0DCAzසය\u0DD2ම%27_රෝගය]
40# or in zස\u0DD3බ\u0DCA‍රා (zebra) [see https://si.wikipedia.org/wiki/‍zස\u0DD3බ\u0DCA‍රා].
41[Zz]ස → z;
42ං → ŋ;
43o → ŋ; # common substitution for anusvaraya
44ඃ ([\u0D9A-\u0DC6]) → | $1 \u0DCA $1; # TODO: check which consonants geminate
45ඃ → h;
46අ → a;
47ආ → aː;
48ඇ → æ;
49ඈ → æː;
50ඉ → i;
51ඊ → iː;
52උ → u;
53ඌ → uː;
54ඍ → ri;
55ඎ → ruː;
56ඏ → ilu;
57ඐ → iluː;
58එ → e;
59ඒ → eː;
60ඓ → aj;
61ඔ → o;
62ඕ → oː;
63ඖ → aw; # TODO: check if this is correct
64ක → k;
65ඛ → k;
66ග → ɡ;
67ඝ → ɡ;
68ඞ → ŋ;
69ඟ → ᵑɡ;
70ච → c;
71ඡ → c;
72ජ → ɟ;
73ඣ → ɟ;
74ඤ → ɲ;
75ඥ → kɲ; # TODO: double-check
76ඦ → ɟ;
77ට → ʈ;
78ඨ → ʈ;
79ඩ → ɖ;
80ඪ → ɖ;
81ණ → n;
82ඬ → ⁿɖ;
83ත → t;
84ථ → t;
85ද → d;
86ධ → d;
87න → n;
88ඳ → ⁿd;
89ප → p;
90ඵ → p;
91බ → b;
92භ → b;
93ම → m;
94ඹ → ᵐb;
95ය → j;
96ර → r;
97ල → l;
98ව → w;
99ශ → ʃ;
100ෂ → ʃ;
101ස → s;
102හ → h;
103ළ → l;
104ෆ → f;
105\u0DCA → ; # delete virama
106ා → aː;
107ැ → æ;
108ෑ → æː;
109\u0DD2 → i;
110\u0DD3 → iː;
111\u0DD4 → u;
112\u0DD6 → uː;
113ෘ → ru;
114ෙ → e;
115ේ → eː;
116ෛ → aj;
117ො → o;
118ෝ → oː;
119ෞ → aw; # TODO: check if this is correct
120ෟ → lu;
121ෲ → ruː;
122ෳ → luː;
123# Heuristics for turning /ə/ into /a/. Based on [1].
124$c=[k ɡ ŋ {ᵑɡ} c ɟ ɲ ʈ ɖ {ⁿɖ} t d n {ⁿd} p b m {ᵐb} j r l w ʃ s z h f];
125$s=[:^L:];
126# Rule #1
127::Null;
128$s sv { ə → ə; # exception (a)
129$s k { ə } r → ə; # exception (b)
130$s $c { ə } $s → ə; # exception (c)
131$s $c $c { ə → a;
132$s $c { ə → a;
133# Rule #2
134::Null;
135$c r { ə } $c → a; # clause (a) and (b)
136$c r { a } h → a; # clause (d), exception
137$c r { a } $c → ə; # clause (c)
138# Rule #3
139# The paper is unclear about what this rule means. The interpretation here
140# assumes that "preceded" in the paper is a typo and should be read "followed".
141::Null;
142[a e æ o ə] h { ə → a;
143# Rules #4 through #7
144::Null;
145ə } $c $c → a; # Rule #4
146ə } [rbɖʈ] $s → ə; # Rule #5 exception
147ə } $c $s → a; # Rule #5
148ə } ji $s → a; # Rule #6
149k { ə } [rl] u → a; # Rule #7
150# Rule #8
151# Note that the paper doesn't say explicitly that this rule should be
152# anchored at the beginning of a word, but the remarks before the rules
153# seem to imply this.
154::Null;
155$s k { a } l[aeo]ːj → ə; # Typo in paper: /j/ was /y/.
156$s k { a } le[mh][ui] → ə;
157$s k { alə } h[ui] → əle;
158$s k { a } lə → ə;
159# Diphthongs
160::Null;
161www+ → ww; # යෞව\u0DCAවන
162[i {iː} e {eː} æ {æː} o {oː} a {aː}] { wu → w;
163əji → aj;
164iji → iː; # perhaps: ij
165[u {uː} e {eː} æ {æː} o {oː} a {aː}] { ji → j;
166