]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/Latin_InterIndic.txt
ICU-66108.tar.gz
[apple/icu.git] / icuSources / data / translit / Latin_InterIndic.txt
1 # © 2016 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html#License
3 #
4 # File: Latin_InterIndic.txt
5 # Generated from CLDR
6 #
7
8 # Latin-InterIndic
9 #:: NFD;
10 #\u0E00 reserved
11 #consonants
12 $chandrabindu=\uE001;
13 $anusvara=\uE002;
14 $visarga=\uE003;
15 #\u0E004 reserved
16 # w←vowel→ represents the stand-alone form
17 $wa=\uE005;
18 $waa=\uE006;
19 $wi=\uE007;
20 $wii=\uE008;
21 $wu=\uE009;
22 $wuu=\uE00A;
23 $wr=\uE00B;
24 $wl=\uE00C;
25 $wce=\uE00D; # LETTER CANDRA E
26 $wse=\uE00E; # LETTER SHORT E
27 $we=\uE00F; # ए LETTER E
28 $wai=\uE010;
29 $wco=\uE011; # LETTER CANDRA O
30 $wso=\uE012; # LETTER SHORT O
31 $wo=\uE013; # ओ LETTER O
32 $wau=\uE014;
33 $ka=\uE015;
34 $kha=\uE016;
35 $ga=\uE017;
36 $gha=\uE018;
37 $nga=\uE019;
38 $ca=\uE01A;
39 $cha=\uE01B;
40 $ja=\uE01C;
41 $jha=\uE01D;
42 $nya=\uE01E;
43 $tta=\uE01F;
44 $ttha=\uE020;
45 $dda=\uE021;
46 $ddha=\uE022;
47 $nna=\uE023;
48 $ta=\uE024;
49 $tha=\uE025;
50 $da=\uE026;
51 $dha=\uE027;
52 $na=\uE028;
53 $ena=\uE029; #compatibility
54 $pa=\uE02A;
55 $pha=\uE02B;
56 $ba=\uE02C;
57 $bha=\uE02D;
58 $ma=\uE02E;
59 $ya=\uE02F;
60 $ra=\uE030;
61 $rra=\uE031;
62 $la=\uE032;
63 $lla=\uE033;
64 $ela=\uE034; #compatibility
65 $va=\uE035;
66 $vva=\uE081;
67 $sha=\uE036;
68 $ssa=\uE037;
69 $sa=\uE038;
70 $ha=\uE039;
71 #\u093A Reserved
72 #\u093B Reserved
73 $nukta=\uE03C;
74 $avagraha=\uE03D; # SIGN AVAGRAHA
75 # ←vowel→ represents the dependent form
76 $aa=\uE03E;
77 $i=\uE03F;
78 $ii=\uE040;
79 $u=\uE041;
80 $uu=\uE042;
81 $rh=\uE043;
82 $rrh=\uE044;
83 $ce=\uE045; #VOWEL SIGN CANDRA E
84 $se=\uE046; #VOWEL SIGN SHORT E
85 $e=\uE047;
86 $ai=\uE048;
87 $co=\uE049; # VOWEL SIGN CANDRA O
88 $so=\uE04A; # VOWEL SIGN SHORT O
89 $o=\uE04B; # ो
90 $au=\uE04C;
91 $virama=\uE04D;
92 # \u094E Reserved
93 # \u094F Reserved
94 $om = \uE050; # OM
95 # \u0951→; # UNMAPPED STRESS SIGN UDATTA
96 # \u0952→; # UNMAPPED STRESS SIGN ANUDATTA
97 # \u0953→; # UNMAPPED GRAVE ACCENT
98 # \u0954→; # UNMAPPED ACUTE ACCENT
99 $lm = \uE055;# Telugu Length Mark
100 $ailm=\uE056;# AI Length Mark
101 $aulm=\uE057;# AU Length Mark
102 #urdu compatibity forms
103 $uka=\uE058;
104 $ukha=\uE059;
105 $ugha=\uE05A;
106 $ujha=\uE05B;
107 $uddha=\uE05C;
108 $udha=\uE05D;
109 $ufa=\uE05E;
110 $uya=\uE05F;
111 $wrr=\uE060;
112 $wll=\uE061;
113 $lh=\uE062;
114 $llh=\uE063;
115 $danda=\uE064;
116 $doubleDanda=\uE065;
117 $zero=\uE066; # DIGIT ZERO
118 $one=\uE067; # DIGIT ONE
119 $two=\uE068; # DIGIT TWO
120 $three=\uE069; # DIGIT THREE
121 $four=\uE06A; # DIGIT FOUR
122 $five=\uE06B; # DIGIT FIVE
123 $six=\uE06C; # DIGIT SIX
124 $seven=\uE06D; # DIGIT SEVEN
125 $eight=\uE06E; # DIGIT EIGHT
126 $nine=\uE06F; # DIGIT NINE
127 $dgs=\uE082;
128 # For all other scripts
129 $ecp0=\uE070;
130 $ecp1=\uE071;
131 $ecp2=\uE072;
132 $ecp3=\uE073;
133 $ecp4=\uE074;
134 $ecp5=\uE075;
135 $ecp6=\uE076;
136 $ecp7=\uE077;
137 $ecp8=\uE078;
138 $ecp9=\uE079;
139 $ecpA=\uE07A;
140 $ecpB=\uE07B;
141 $ecpC=\uE07C;
142 $ecpD=\uE07D;
143 $ecpE=\uE07E;
144 $ecpF=\uE07F;
145 # Khanda-ta
146 $kta=\uE083;
147 # ॰→; # nothing in Latin maps to InterIndic ABBREVIATION SIGN
148 $depVowelAbove=[\uE03E-\uE040\uE045-\uE04C];
149 $depVowelBelow=[\uE041-\uE044];
150 $endThing=[$danda$doubleDanda];
151 # $x was originally called '§'; $z was '%'
152 $x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];
153 $z=[bcdfghjklmnpqrstvwxyz];
154 $consonants=[[$ka-$ha]$z[क-ह][ক-হ][ਕ-ਹ][ક-હ][କ-ହ][க-ஹ][క-హ][ಕ-ಹ][ക-ഹ]];
155 \u0315 → $avagraha;
156 \u0303→$chandrabindu$anusvara;
157 m\u0310→$chandrabindu;
158 h\u0323→$visarga;
159 x→$ka$virama$sa;
160 # convert to independent forms at start of word or syllable:
161 # dependent forms for roundtrip
162 \u0314a\u0304→$aa;
163 \u0314ai→$ai;
164 \u0314au→$au;
165 \u0314ii→$ii;
166 \u0314i\u0304→$ii;
167 \u0314i→$i;
168 \u0314u\u0304→$uu;
169 \u0314u→$u;
170 \u0314r\u0325\u0304→$rrh;
171 \u0314r\u0325→$rh;
172 \u0314l\u0325\u0304→$llh;
173 \u0314lh→$lh;
174 \u0314l\u0325→$lh;
175 \u0314e\u0304→$e;
176 \u0314o\u0304→$o;
177 \u0314a→;
178 \u0314e\u0306→$ce;
179 \u0314o\u0306→$co;
180 \u0314e→$se;
181 \u0314o→$so;
182 # preceeded by consonants
183 $consonants{ a\u0304→$aa;
184 $consonants{ ai→$ai;
185 $consonants{ au→$au;
186 $consonants{ ii→$ii;
187 $consonants{ i\u0304→$ii;
188 $consonants{ i→$i;
189 $consonants{ u\u0304→$uu;
190 $consonants{ u→$u;
191 $consonants{ r\u0325\u0304→$rrh;
192 $consonants{ r\u0325a→$rh;
193 $consonants{ r\u0325→$rh;
194 $consonants{ l\u0325\u0304→$llh;
195 $consonants{ lh→$lh;
196 $consonants{ l\u0325→$lh;
197 $consonants{ e\u0304→$e;
198 $consonants{ o\u0304→$o;
199 $consonants{ e\u0306→$ce;
200 $consonants{ o\u0306→$co;
201 $consonants{ e→$se;
202 $consonants{ o→$so;
203 # e.g. keai -→ {ka}{e}{wai}; k'ai -→ {ka}{wai}; (ai) -→ ({wai})
204 a\u0304→$waa;
205 ai→$wai;
206 au→$wau;
207 i\u0304→$wii;
208 i→$wi;
209 u\u0304→$wuu;
210 u→$wu;
211 r\u0325\u0304→$wrr;
212 r\u0325→$wr;
213 l\u0325\u0304→$wll;
214 lh→$wl;
215 l\u0325→$wl;
216 e\u0304→$we;
217 o\u0304→$wo;
218 a→$wa;
219 e\u0306→$wce;
220 o\u0306→$wco;
221 e→$wse;
222 ''om→$om;
223 o→$wso;
224 # rules for anusvara
225 n}r\u0325 → $na|$virama;
226 n}l\u0325 → $na|$virama;
227 n}na → $na|$virama;
228 n\u0307}[kg] → $anusvara;
229 n\u0307}n\u0307 → $anusvara;
230 n\u0304}[cj] → $anusvara;
231 n\u0304}n\u0303 → $anusvara;
232 n\u0323}[tdn]\u0323 → $anusvara;
233 n}[tdn] → $anusvara;
234 m}[pbm] → $anusvara;
235 n}[ylvshr] → $anusvara;
236 m\u0307 → $anusvara;
237 #urdu compatibility
238 q→$uka|$virama;
239 k\u0331h\u0331→$ukha |$virama;
240 g\u0307→ $ugha | $virama;
241 z → $ujha |$virama;
242 f → $ufa|$virama;
243 t\u0331→$kta;
244 # dev
245 y\u0307→$uya|$virama;
246 l\u0331→$ela|$virama;
247 n\u0331→$ena|$virama;
248 n\u0307→$nga|$virama;
249 n\u0303→$nya|$virama;
250 n\u0323→$nna|$virama;
251 t\u0323h→$ttha|$virama;
252 t\u0323→$tta|$virama;
253 r\u0323h→$udha|$virama;
254 r\u0323→$uddha|$virama;
255 d\u0323h→$ddha|$virama;
256 d\u0323→$dda|$virama;
257 kh→$kha|$virama;
258 k→$ka|$virama;
259 gh→$gha|$virama;
260 g→$ga|$virama;
261 ch→$cha|$virama;
262 c→$ca|$virama;
263 jh→$jha|$virama;
264 j→$ja|$virama;
265 ny→$nya|$virama;
266 tth→$ttha|$virama;
267 ddh→$ddha|$virama;
268 th→$tha|$virama;
269 t→$ta|$virama;
270 dh→$dha|$virama;
271 d→$da|$virama;
272 n→$na|$virama;
273 ph→$pha|$virama;
274 p→$pa|$virama;
275 bh→$bha|$virama;
276 b→$ba|$virama;
277 m→$ma|$virama;
278 y→$ya|$virama;
279 r\u0331→$rra|$virama;
280 r→$ra|$virama;
281 l\u0323→$lla|$virama;
282 l→$la|$virama;
283 v→$va|$virama;
284 w\u0307→$vva|$virama;
285 w→$va|$virama;
286 sh→$sha|$virama;
287 ss→$ssa|$virama;
288 s\u0323→$ssa|$virama;
289 s\u0301→$sha|$virama;
290 s→$sa|$virama;
291 h→$ha|$virama;
292 '.'→$danda;
293 $danda'.'→$doubleDanda;
294 $depVowelAbove{'~'→$anusvara;
295 $depVowelBelow{'~'→$chandrabindu;
296 # convert to dependent forms after consonant with no vowel:
297 # e.g. kai -→ {ka}{virama}ai -→ {ka}{ai}
298 #$virama aa→$aa;
299 $virama a\u0304→$aa;
300 $virama ai→$ai;
301 $virama au→$au;
302 $virama ii→$ii;
303 $virama i\u0304→$ii;
304 $virama i→$i;
305 #$virama uu→$uu;
306 $virama u\u0304→$uu;
307 $virama u→$u;
308 #$virama rrh→$rrh;
309 $virama r\u0325\u0304→$rrh;
310 #$virama rh→$rh;
311 $virama r\u0325a→$rh;
312 $virama r\u0325→$rh;
313 $virama l\u0325\u0304→$llh;
314 $virama lh→$lh;
315 $virama l\u0325→$lh;
316 $virama e\u0304→$e;
317 $virama o\u0304→$o;
318 $virama a→;
319 $virama e\u0306→$ce;
320 $virama o\u0306→$co;
321 $virama e→$se;
322 $virama o→$so;
323 # otherwise convert independent forms when separated by ': k'ai -→ {ka}{virama}{wai}
324 #$virama''aa→$waa;
325 $virama''a\u0304→$waa;
326 $virama''ai→$wai;
327 $virama''au→$wau;
328 #$virama''ii→$wii;
329 $virama''i\u0304→$wii;
330 $virama''i→$wi;
331 #$virama''uu→$wuu;
332 $virama''u\u0304→$wuu;
333 $virama''u→$wu;
334 #$virama''rrh→$wrr;
335 $virama''r\u0325\u0304→$wrr;
336 #$virama''rh→$wr;
337 $virama''r\u0325→$wr;
338 $virama''l\u0325\u0304→$wll;
339 #$virama''lh→$wl;
340 $virama''l\u0325→$wl;
341 $virama''e\u0304→$we;
342 $virama''o\u0304→$wo;
343 $virama''a→$wa;
344 $virama''e\u0306→$wce;
345 $virama''o\u0306→$wco;
346 $virama''e→$wse;
347 $virama''o→$wso;
348 # no virama
349 ''a\u0304→$waa;
350 ''ai→$wai;
351 ''au→$wau;
352 ''i\u0304→$wii;
353 ''i→$wi;
354 ''u\u0304→$wuu;
355 ''u→$wu;
356 ''r\u0325\u0304→$wrr;
357 ''r\u0325→$wr;
358 ''l\u0325\u0304→$wll;
359 ''l\u0325→$wl;
360 ''e\u0304→$we;
361 ''o\u0304→$wo;
362 ''a→$wa;
363 ''e\u0306→$wce;
364 ''o\u0306→$wco;
365 ''e→$wse;
366 ''o→$wso;
367 $virama } [$z] → $virama;
368 $virama } ' ' → $virama ;
369 $virama}$endThing→;
370 ʔ→$dgs; # Glottal Stop
371 0→$zero;
372 1→$one;
373 2→$two;
374 3→$three;
375 4→$four;
376 5→$five;
377 6→$six;
378 7→$seven;
379 8→$eight;
380 9→$nine;
381 ''→;
382 #:: NFC (NFD) ;
383