]> git.saurik.com Git - apple/icu.git/blame - icuSources/data/translit/sat_Olck_sat_FONIPA.txt
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / data / translit / sat_Olck_sat_FONIPA.txt
CommitLineData
2ca993e8
A
1# ***************************************************************************
2# *
3# * Copyright (C) 2004-2016, International Business Machines
4# * Corporation; Unicode, Inc.; and others. All Rights Reserved.
5# *
6# ***************************************************************************
7# File: sat_Olck_sat_FONIPA.txt
8# Generated from CLDR
9#
10
11# Santali (Ol Chiki) → Santali (International Phonetic Alphabet)
12# Output
13# ------
14# m mː n nː ɳ ɳː ɲ ɲː ŋ ŋː
15# p pʰ pʼ b bʰ t tʰ tʼ d dʰ ʈ ʈʰ ɖ ɖʰ c cʰ cʼ k kʰ kʼ ɡ ʔ
16# s sː h
17# d\u0361ʒ
18# ɽ r
19# l lː
20# w wː w\u0303 w\u0303ː
21#
22# i iː ĩ ĩː u uː ũ ũː
23# e eː ẽ ẽː ə əː ə\u0303 ə\u0303ː o oː õ õː
24# ɛ ɛː ɛ\u0303 ɛ\u0303ː ɔ ɔː ɔ\u0303 ɔ\u0303ː
25# a aː ã ãː
26# References
27# ----------
28# [1] Michael Everson: Final proposal to encode the Ol Chiki script
29# in the UCS. ISO/IEC JTC1/SC2/WG2 Working Group Document N2984R,
30# September 21, 2005. http://std.dkuug.dk/jtc1/sc2/wg2/docs/n2984.pdf
31#
32# [2] George L. Campbell: Compendium of the World's Languages.
33# Volume 2: Ladakhi to Zuni. ISBN 0-415-20297-3. Taylor & Francis, 2000.
34# Pages 1454 to 1458.
35# Notes
36# -----
37# According to [1] (page 3), ᱽ can only follow the four ejective
38# consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/; these become
39# ᱵᱽ /b/, ᱫᱽ /d/, ᱡᱽ /d\u0361ʒ/, and ᱜᱽ /ɡ/. In online texts, however,
40# we have occasionally encountered ᱽ following non-ejective plosives,
41# for example after ᱯ /p/. These might possibly be typos. Our rules
42# try to be resilient and handle ᱯᱽ as /b/.
43#
44# According to [1] (page 2), U+1C7C PHAARKAA follows the four “glottal”
45# consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/ (these are actually
46# ejective, not glottal). In online texts, however, we have frequently
47# encountered ᱼ following non-ejective consonants.
48$inword = [[:L:][:M:]];
49# Some online texts use a decomposed form of U+1C7A MU-GAAHLAA TTUDDAG.
50ᱹᱸ → ᱺ ;
51ᱸᱹ → ᱺ ;
52::null();
53# To simplify the rules below, enforce a uniform ordering of marks.
54ᱻᱹ → ᱹᱻ ;
55ᱻᱸ → ᱸᱻ ;
56ᱻᱺ → ᱺᱻ ;
57ᱼᱹ → ᱹᱼ ;
58ᱼᱸ → ᱸᱼ ;
59ᱼᱺ → ᱺᱼ ;
60::null();
61# Some online texts use U+1C7C PHAARKAA instead of U+1C7B RELAA for indicating
62# long phonemes, presumably because the graphemes look similar in some fonts.
63# Since phaarkaa is used for voicing ejectives and plosives (which cannot
64# be lenghtened), we rewrite phaarkaa to relaa.
65[ᱚᱟᱤᱩᱮᱳᱶᱢᱝᱞᱱ] [ᱹᱸᱺ]* {ᱼ} → ᱻ ;
66::null();
67ᱚᱹᱻ → ɔː ;
68ᱚᱹ → ɔ ;
69ᱚᱸᱻ → ɔ\u0303ː ;
70ᱚᱸ → ɔ\u0303 ;
71ᱚᱺᱻ → ɔ\u0303ː ;
72ᱚᱺ → ɔ\u0303 ;
73ᱚᱻ → ɔː ;
74ᱚ → ɔ ;
75ᱛᱼ → t ;
76ᱛᱷ → tʰ ;
77ᱛᱽ → d ;
78$inword {ᱛ} → d ;
79ᱛ → t ;
80ᱜᱼ → kʼ ;
81ᱜᱷ → kʰ ;
82ᱜᱽ → ɡ ;
83$inword {ᱜ} → ɡ ;
84ᱜ → kʼ ;
85ᱝᱻ → ŋː ;
86ᱝ → ŋ ;
87ᱞᱻ → lː ;
88ᱞ → l ;
89ᱟᱹᱻ → əː ;
90ᱟᱹ → ə ;
91ᱟᱸᱻ → ãː ;
92ᱟᱸ → ã ;
93ᱟᱺᱻ → ə\u0303ː ;
94ᱟᱺ → ə\u0303 ;
95ᱟᱻ → aː ;
96ᱟ → a ;
97ᱠᱼ → k ;
98ᱠᱷ → kʰ ;
99ᱠᱽ → ɡ ;
100ᱠ → k ;
101ᱡᱼ → cʼ ;
102ᱡᱷ → cʰ ;
103ᱡᱽ → d\u0361ʒ ;
104$inword {ᱡ} → d\u0361ʒ ;
105ᱡ → cʼ ;
106ᱢᱻ → mː ;
107ᱢ → m ;
108# According to [1], ᱣ is sometimes /v/ and sometimes /w/.
109# TODO: Find out if there is a rule for this.
110ᱣᱸ → w\u0303 ;
111ᱣ → w ;
112ᱤᱹᱻ → iː ;
113ᱤᱹ → i ;
114ᱤᱸᱻ → ĩː ;
115ᱤᱸ → ĩ ;
116ᱤᱺᱻ → ĩː ;
117ᱤᱺ → ĩ ;
118ᱤᱻ → iː ;
119ᱤ → i ;
120ᱥᱻ → sː ;
121ᱥ → s ;
122# According to [1], ᱦ is sometimes /h/ and sometimes /ʔ/.
123# TODO: Find out if there is a rule for this.
124ᱦ → h ;
125ᱧᱻ → ɲː ;
126ᱧ → ɲ ;
127ᱨᱻ → r ;
128ᱨ → r ;
129ᱩᱹᱻ → uː ;
130ᱩᱹ → u ;
131ᱩᱸᱻ → ũː ;
132ᱩᱸ → ũ ;
133ᱩᱺᱻ → ũː ;
134ᱩᱺ → ũ ;
135ᱩᱻ → uː ;
136ᱩ → u ;
137ᱪᱼ → c ;
138ᱪᱷ → cʰ ;
139ᱪᱽ → d\u0361ʒ ;
140ᱪ → c ;
141ᱫᱼ → tʼ ;
142ᱫᱷ → tʰ ;
143ᱫᱽ → d ;
144$inword {ᱫ} → d ;
145ᱫ → tʼ ;
146ᱬᱻ → ɳː ;
147ᱬ → ɳ ;
148# TODO: ᱵᱷᱭᱨᱚᱵ → bʰhrɔb seems unlikely; would be good to verify.
149ᱭ → h ;
150ᱮᱹᱻ → ɛː ;
151ᱮᱹ → ɛ ;
152ᱮᱺᱻ → ɛ\u0303ː ;
153ᱮᱺ → ɛ\u0303 ;
154ᱮᱸᱻ → ẽː ;
155ᱮᱸ → ẽ ;
156ᱮᱻ → eː ;
157ᱮ → e ;
158ᱯᱼ → p ;
159ᱯᱷ → pʰ ;
160ᱯᱽ → b ;
161ᱯ → p ;
162ᱰᱷ → ɖʰ ;
163ᱰ → ɖ ;
164ᱱᱻ → nː ;
165ᱱ → n ;
166ᱲᱻ → ɽ ;
167ᱲ → ɽ ;
168ᱳᱸᱻ → õː ;
169ᱳᱸ → õ ;
170ᱳᱻ → oː ;
171ᱳ → o ;
172ᱴᱼ → ʈ ;
173ᱴᱷ → ʈʰ ;
174ᱴᱽ → ɖ ;
175ᱴ → ʈ ;
176ᱵᱼ → pʼ ;
177ᱵᱷ → bʰ ;
178ᱵᱽ → b ;
179$inword {ᱵ} → b ;
180ᱵ → pʼ ;
181ᱶᱻ → w\u0303ː ;
182ᱶ → w\u0303 ;
183