]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/sat_Olck_sat_FONIPA.txt
ICU-66108.tar.gz
[apple/icu.git] / icuSources / data / translit / sat_Olck_sat_FONIPA.txt
1 # © 2016 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html#License
3 #
4 # File: sat_Olck_sat_FONIPA.txt
5 # Generated from CLDR
6 #
7
8 # Santali (Ol Chiki) → Santali (International Phonetic Alphabet)
9 # Output
10 # ------
11 # m mː n nː ɳ ɳː ɲ ɲː ŋ ŋː
12 # p pʰ pʼ b bʰ t tʰ tʼ d dʰ ʈ ʈʰ ɖ ɖʰ c cʰ cʼ k kʰ kʼ ɡ ʔ
13 # s sː h
14 # d\u0361ʒ
15 # ɽ r
16 # l lː
17 # w wː w\u0303 w\u0303ː
18 #
19 # i iː ĩ ĩː u uː ũ ũː
20 # e eː ẽ ẽː ə əː ə\u0303 ə\u0303ː o oː õ õː
21 # ɛ ɛː ɛ\u0303 ɛ\u0303ː ɔ ɔː ɔ\u0303 ɔ\u0303ː
22 # a aː ã ãː
23 # References
24 # ----------
25 # [1] Michael Everson: Final proposal to encode the Ol Chiki script
26 # in the UCS. ISO/IEC JTC1/SC2/WG2 Working Group Document N2984R,
27 # September 21, 2005. http://std.dkuug.dk/jtc1/sc2/wg2/docs/n2984.pdf
28 #
29 # [2] George L. Campbell: Compendium of the World's Languages.
30 # Volume 2: Ladakhi to Zuni. ISBN 0-415-20297-3. Taylor & Francis, 2000.
31 # Pages 1454 to 1458.
32 # Notes
33 # -----
34 # According to [1] (page 3), ᱽ can only follow the four ejective
35 # consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/; these become
36 # ᱵᱽ /b/, ᱫᱽ /d/, ᱡᱽ /d\u0361ʒ/, and ᱜᱽ /ɡ/. In online texts, however,
37 # we have occasionally encountered ᱽ following non-ejective plosives,
38 # for example after ᱯ /p/. These might possibly be typos. Our rules
39 # try to be resilient and handle ᱯᱽ as /b/.
40 #
41 # According to [1] (page 2), U+1C7C PHAARKAA follows the four “glottal”
42 # consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/ (these are actually
43 # ejective, not glottal). In online texts, however, we have frequently
44 # encountered ᱼ following non-ejective consonants.
45 $inword = [[:L:][:M:]];
46 # Some online texts use a decomposed form of U+1C7A MU-GAAHLAA TTUDDAG.
47 ᱹᱸ → ᱺ ;
48 ᱸᱹ → ᱺ ;
49 ::null();
50 # To simplify the rules below, enforce a uniform ordering of marks.
51 ᱻᱹ → ᱹᱻ ;
52 ᱻᱸ → ᱸᱻ ;
53 ᱻᱺ → ᱺᱻ ;
54 ᱼᱹ → ᱹᱼ ;
55 ᱼᱸ → ᱸᱼ ;
56 ᱼᱺ → ᱺᱼ ;
57 ::null();
58 # Some online texts use U+1C7C PHAARKAA instead of U+1C7B RELAA for indicating
59 # long phonemes, presumably because the graphemes look similar in some fonts.
60 # Since phaarkaa is used for voicing ejectives and plosives (which cannot
61 # be lenghtened), we rewrite phaarkaa to relaa.
62 [ᱚᱟᱤᱩᱮᱳᱶᱢᱝᱞᱱ] [ᱹᱸᱺ]* {ᱼ} → ᱻ ;
63 ::null();
64 ᱚᱹᱻ → ɔː ;
65 ᱚᱹ → ɔ ;
66 ᱚᱸᱻ → ɔ\u0303ː ;
67 ᱚᱸ → ɔ\u0303 ;
68 ᱚᱺᱻ → ɔ\u0303ː ;
69 ᱚᱺ → ɔ\u0303 ;
70 ᱚᱻ → ɔː ;
71 ᱚ → ɔ ;
72 ᱛᱼ → t ;
73 ᱛᱷ → tʰ ;
74 ᱛᱽ → d ;
75 $inword {ᱛ} → d ;
76 ᱛ → t ;
77 ᱜᱼ → kʼ ;
78 ᱜᱷ → kʰ ;
79 ᱜᱽ → ɡ ;
80 $inword {ᱜ} → ɡ ;
81 ᱜ → kʼ ;
82 ᱝᱻ → ŋː ;
83 ᱝ → ŋ ;
84 ᱞᱻ → lː ;
85 ᱞ → l ;
86 ᱟᱹᱻ → əː ;
87 ᱟᱹ → ə ;
88 ᱟᱸᱻ → ãː ;
89 ᱟᱸ → ã ;
90 ᱟᱺᱻ → ə\u0303ː ;
91 ᱟᱺ → ə\u0303 ;
92 ᱟᱻ → aː ;
93 ᱟ → a ;
94 ᱠᱼ → k ;
95 ᱠᱷ → kʰ ;
96 ᱠᱽ → ɡ ;
97 ᱠ → k ;
98 ᱡᱼ → cʼ ;
99 ᱡᱷ → cʰ ;
100 ᱡᱽ → d\u0361ʒ ;
101 $inword {ᱡ} → d\u0361ʒ ;
102 ᱡ → cʼ ;
103 ᱢᱻ → mː ;
104 ᱢ → m ;
105 # According to [1], ᱣ is sometimes /v/ and sometimes /w/.
106 # TODO: Find out if there is a rule for this.
107 ᱣᱸ → w\u0303 ;
108 ᱣ → w ;
109 ᱤᱹᱻ → iː ;
110 ᱤᱹ → i ;
111 ᱤᱸᱻ → ĩː ;
112 ᱤᱸ → ĩ ;
113 ᱤᱺᱻ → ĩː ;
114 ᱤᱺ → ĩ ;
115 ᱤᱻ → iː ;
116 ᱤ → i ;
117 ᱥᱻ → sː ;
118 ᱥ → s ;
119 # According to [1], ᱦ is sometimes /h/ and sometimes /ʔ/.
120 # TODO: Find out if there is a rule for this.
121 ᱦ → h ;
122 ᱧᱻ → ɲː ;
123 ᱧ → ɲ ;
124 ᱨᱻ → r ;
125 ᱨ → r ;
126 ᱩᱹᱻ → uː ;
127 ᱩᱹ → u ;
128 ᱩᱸᱻ → ũː ;
129 ᱩᱸ → ũ ;
130 ᱩᱺᱻ → ũː ;
131 ᱩᱺ → ũ ;
132 ᱩᱻ → uː ;
133 ᱩ → u ;
134 ᱪᱼ → c ;
135 ᱪᱷ → cʰ ;
136 ᱪᱽ → d\u0361ʒ ;
137 ᱪ → c ;
138 ᱫᱼ → tʼ ;
139 ᱫᱷ → tʰ ;
140 ᱫᱽ → d ;
141 $inword {ᱫ} → d ;
142 ᱫ → tʼ ;
143 ᱬᱻ → ɳː ;
144 ᱬ → ɳ ;
145 # TODO: ᱵᱷᱭᱨᱚᱵ → bʰhrɔb seems unlikely; would be good to verify.
146 ᱭ → h ;
147 ᱮᱹᱻ → ɛː ;
148 ᱮᱹ → ɛ ;
149 ᱮᱺᱻ → ɛ\u0303ː ;
150 ᱮᱺ → ɛ\u0303 ;
151 ᱮᱸᱻ → ẽː ;
152 ᱮᱸ → ẽ ;
153 ᱮᱻ → eː ;
154 ᱮ → e ;
155 ᱯᱼ → p ;
156 ᱯᱷ → pʰ ;
157 ᱯᱽ → b ;
158 ᱯ → p ;
159 ᱰᱷ → ɖʰ ;
160 ᱰ → ɖ ;
161 ᱱᱻ → nː ;
162 ᱱ → n ;
163 ᱲᱻ → ɽ ;
164 ᱲ → ɽ ;
165 ᱳᱸᱻ → õː ;
166 ᱳᱸ → õ ;
167 ᱳᱻ → oː ;
168 ᱳ → o ;
169 ᱴᱼ → ʈ ;
170 ᱴᱷ → ʈʰ ;
171 ᱴᱽ → ɖ ;
172 ᱴ → ʈ ;
173 ᱵᱼ → pʼ ;
174 ᱵᱷ → bʰ ;
175 ᱵᱽ → b ;
176 $inword {ᱵ} → b ;
177 ᱵ → pʼ ;
178 ᱶᱻ → w\u0303ː ;
179 ᱶ → w\u0303 ;
180