]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | # © 2016 and later: Unicode, Inc. and others. |
2 | # License & terms of use: http://www.unicode.org/copyright.html#License | |
3 | # | |
2ca993e8 | 4 | # File: sat_Olck_sat_FONIPA.txt |
f3c0d7a5 | 5 | # Generated from CLDR |
2ca993e8 A |
6 | # |
7 | ||
8 | # Santali (Ol Chiki) → Santali (International Phonetic Alphabet) | |
9 | # Output | |
10 | # ------ | |
11 | # m mː n nː ɳ ɳː ɲ ɲː ŋ ŋː | |
12 | # p pʰ pʼ b bʰ t tʰ tʼ d dʰ ʈ ʈʰ ɖ ɖʰ c cʰ cʼ k kʰ kʼ ɡ ʔ | |
13 | # s sː h | |
14 | # d\u0361ʒ | |
15 | # ɽ r | |
16 | # l lː | |
17 | # w wː w\u0303 w\u0303ː | |
18 | # | |
19 | # i iː ĩ ĩː u uː ũ ũː | |
20 | # e eː ẽ ẽː ə əː ə\u0303 ə\u0303ː o oː õ õː | |
21 | # ɛ ɛː ɛ\u0303 ɛ\u0303ː ɔ ɔː ɔ\u0303 ɔ\u0303ː | |
22 | # a aː ã ãː | |
23 | # References | |
24 | # ---------- | |
25 | # [1] Michael Everson: Final proposal to encode the Ol Chiki script | |
26 | # in the UCS. ISO/IEC JTC1/SC2/WG2 Working Group Document N2984R, | |
27 | # September 21, 2005. http://std.dkuug.dk/jtc1/sc2/wg2/docs/n2984.pdf | |
28 | # | |
29 | # [2] George L. Campbell: Compendium of the World's Languages. | |
30 | # Volume 2: Ladakhi to Zuni. ISBN 0-415-20297-3. Taylor & Francis, 2000. | |
31 | # Pages 1454 to 1458. | |
32 | # Notes | |
33 | # ----- | |
34 | # According to [1] (page 3), ᱽ can only follow the four ejective | |
35 | # consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/; these become | |
36 | # ᱵᱽ /b/, ᱫᱽ /d/, ᱡᱽ /d\u0361ʒ/, and ᱜᱽ /ɡ/. In online texts, however, | |
37 | # we have occasionally encountered ᱽ following non-ejective plosives, | |
38 | # for example after ᱯ /p/. These might possibly be typos. Our rules | |
39 | # try to be resilient and handle ᱯᱽ as /b/. | |
40 | # | |
41 | # According to [1] (page 2), U+1C7C PHAARKAA follows the four “glottal” | |
42 | # consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/ (these are actually | |
43 | # ejective, not glottal). In online texts, however, we have frequently | |
44 | # encountered ᱼ following non-ejective consonants. | |
45 | $inword = [[:L:][:M:]]; | |
46 | # Some online texts use a decomposed form of U+1C7A MU-GAAHLAA TTUDDAG. | |
47 | ᱹᱸ → ᱺ ; | |
48 | ᱸᱹ → ᱺ ; | |
49 | ::null(); | |
50 | # To simplify the rules below, enforce a uniform ordering of marks. | |
51 | ᱻᱹ → ᱹᱻ ; | |
52 | ᱻᱸ → ᱸᱻ ; | |
53 | ᱻᱺ → ᱺᱻ ; | |
54 | ᱼᱹ → ᱹᱼ ; | |
55 | ᱼᱸ → ᱸᱼ ; | |
56 | ᱼᱺ → ᱺᱼ ; | |
57 | ::null(); | |
58 | # Some online texts use U+1C7C PHAARKAA instead of U+1C7B RELAA for indicating | |
59 | # long phonemes, presumably because the graphemes look similar in some fonts. | |
60 | # Since phaarkaa is used for voicing ejectives and plosives (which cannot | |
61 | # be lenghtened), we rewrite phaarkaa to relaa. | |
62 | [ᱚᱟᱤᱩᱮᱳᱶᱢᱝᱞᱱ] [ᱹᱸᱺ]* {ᱼ} → ᱻ ; | |
63 | ::null(); | |
64 | ᱚᱹᱻ → ɔː ; | |
65 | ᱚᱹ → ɔ ; | |
66 | ᱚᱸᱻ → ɔ\u0303ː ; | |
67 | ᱚᱸ → ɔ\u0303 ; | |
68 | ᱚᱺᱻ → ɔ\u0303ː ; | |
69 | ᱚᱺ → ɔ\u0303 ; | |
70 | ᱚᱻ → ɔː ; | |
71 | ᱚ → ɔ ; | |
72 | ᱛᱼ → t ; | |
73 | ᱛᱷ → tʰ ; | |
74 | ᱛᱽ → d ; | |
75 | $inword {ᱛ} → d ; | |
76 | ᱛ → t ; | |
77 | ᱜᱼ → kʼ ; | |
78 | ᱜᱷ → kʰ ; | |
79 | ᱜᱽ → ɡ ; | |
80 | $inword {ᱜ} → ɡ ; | |
81 | ᱜ → kʼ ; | |
82 | ᱝᱻ → ŋː ; | |
83 | ᱝ → ŋ ; | |
84 | ᱞᱻ → lː ; | |
85 | ᱞ → l ; | |
86 | ᱟᱹᱻ → əː ; | |
87 | ᱟᱹ → ə ; | |
88 | ᱟᱸᱻ → ãː ; | |
89 | ᱟᱸ → ã ; | |
90 | ᱟᱺᱻ → ə\u0303ː ; | |
91 | ᱟᱺ → ə\u0303 ; | |
92 | ᱟᱻ → aː ; | |
93 | ᱟ → a ; | |
94 | ᱠᱼ → k ; | |
95 | ᱠᱷ → kʰ ; | |
96 | ᱠᱽ → ɡ ; | |
97 | ᱠ → k ; | |
98 | ᱡᱼ → cʼ ; | |
99 | ᱡᱷ → cʰ ; | |
100 | ᱡᱽ → d\u0361ʒ ; | |
101 | $inword {ᱡ} → d\u0361ʒ ; | |
102 | ᱡ → cʼ ; | |
103 | ᱢᱻ → mː ; | |
104 | ᱢ → m ; | |
105 | # According to [1], ᱣ is sometimes /v/ and sometimes /w/. | |
106 | # TODO: Find out if there is a rule for this. | |
107 | ᱣᱸ → w\u0303 ; | |
108 | ᱣ → w ; | |
109 | ᱤᱹᱻ → iː ; | |
110 | ᱤᱹ → i ; | |
111 | ᱤᱸᱻ → ĩː ; | |
112 | ᱤᱸ → ĩ ; | |
113 | ᱤᱺᱻ → ĩː ; | |
114 | ᱤᱺ → ĩ ; | |
115 | ᱤᱻ → iː ; | |
116 | ᱤ → i ; | |
117 | ᱥᱻ → sː ; | |
118 | ᱥ → s ; | |
119 | # According to [1], ᱦ is sometimes /h/ and sometimes /ʔ/. | |
120 | # TODO: Find out if there is a rule for this. | |
121 | ᱦ → h ; | |
122 | ᱧᱻ → ɲː ; | |
123 | ᱧ → ɲ ; | |
124 | ᱨᱻ → r ; | |
125 | ᱨ → r ; | |
126 | ᱩᱹᱻ → uː ; | |
127 | ᱩᱹ → u ; | |
128 | ᱩᱸᱻ → ũː ; | |
129 | ᱩᱸ → ũ ; | |
130 | ᱩᱺᱻ → ũː ; | |
131 | ᱩᱺ → ũ ; | |
132 | ᱩᱻ → uː ; | |
133 | ᱩ → u ; | |
134 | ᱪᱼ → c ; | |
135 | ᱪᱷ → cʰ ; | |
136 | ᱪᱽ → d\u0361ʒ ; | |
137 | ᱪ → c ; | |
138 | ᱫᱼ → tʼ ; | |
139 | ᱫᱷ → tʰ ; | |
140 | ᱫᱽ → d ; | |
141 | $inword {ᱫ} → d ; | |
142 | ᱫ → tʼ ; | |
143 | ᱬᱻ → ɳː ; | |
144 | ᱬ → ɳ ; | |
145 | # TODO: ᱵᱷᱭᱨᱚᱵ → bʰhrɔb seems unlikely; would be good to verify. | |
146 | ᱭ → h ; | |
147 | ᱮᱹᱻ → ɛː ; | |
148 | ᱮᱹ → ɛ ; | |
149 | ᱮᱺᱻ → ɛ\u0303ː ; | |
150 | ᱮᱺ → ɛ\u0303 ; | |
151 | ᱮᱸᱻ → ẽː ; | |
152 | ᱮᱸ → ẽ ; | |
153 | ᱮᱻ → eː ; | |
154 | ᱮ → e ; | |
155 | ᱯᱼ → p ; | |
156 | ᱯᱷ → pʰ ; | |
157 | ᱯᱽ → b ; | |
158 | ᱯ → p ; | |
159 | ᱰᱷ → ɖʰ ; | |
160 | ᱰ → ɖ ; | |
161 | ᱱᱻ → nː ; | |
162 | ᱱ → n ; | |
163 | ᱲᱻ → ɽ ; | |
164 | ᱲ → ɽ ; | |
165 | ᱳᱸᱻ → õː ; | |
166 | ᱳᱸ → õ ; | |
167 | ᱳᱻ → oː ; | |
168 | ᱳ → o ; | |
169 | ᱴᱼ → ʈ ; | |
170 | ᱴᱷ → ʈʰ ; | |
171 | ᱴᱽ → ɖ ; | |
172 | ᱴ → ʈ ; | |
173 | ᱵᱼ → pʼ ; | |
174 | ᱵᱷ → bʰ ; | |
175 | ᱵᱽ → b ; | |
176 | $inword {ᱵ} → b ; | |
177 | ᱵ → pʼ ; | |
178 | ᱶᱻ → w\u0303ː ; | |
179 | ᱶ → w\u0303 ; | |
180 |