]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/Latin_InterIndic.txt
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / data / translit / Latin_InterIndic.txt
1 # ***************************************************************************
2 # *
3 # * Copyright (C) 2004-2016, International Business Machines
4 # * Corporation; Unicode, Inc.; and others. All Rights Reserved.
5 # *
6 # ***************************************************************************
7 # File: Latin_InterIndic.txt
8 # Generated from CLDR
9 #
10
11 # Latin-InterIndic
12 #:: NFD;
13 #\u0E00 reserved
14 #consonants
15 $chandrabindu=\uE001;
16 $anusvara=\uE002;
17 $visarga=\uE003;
18 #\u0E004 reserved
19 # w←vowel→ represents the stand-alone form
20 $wa=\uE005;
21 $waa=\uE006;
22 $wi=\uE007;
23 $wii=\uE008;
24 $wu=\uE009;
25 $wuu=\uE00A;
26 $wr=\uE00B;
27 $wl=\uE00C;
28 $wce=\uE00D; # LETTER CANDRA E
29 $wse=\uE00E; # LETTER SHORT E
30 $we=\uE00F; # ए LETTER E
31 $wai=\uE010;
32 $wco=\uE011; # LETTER CANDRA O
33 $wso=\uE012; # LETTER SHORT O
34 $wo=\uE013; # ओ LETTER O
35 $wau=\uE014;
36 $ka=\uE015;
37 $kha=\uE016;
38 $ga=\uE017;
39 $gha=\uE018;
40 $nga=\uE019;
41 $ca=\uE01A;
42 $cha=\uE01B;
43 $ja=\uE01C;
44 $jha=\uE01D;
45 $nya=\uE01E;
46 $tta=\uE01F;
47 $ttha=\uE020;
48 $dda=\uE021;
49 $ddha=\uE022;
50 $nna=\uE023;
51 $ta=\uE024;
52 $tha=\uE025;
53 $da=\uE026;
54 $dha=\uE027;
55 $na=\uE028;
56 $ena=\uE029; #compatibility
57 $pa=\uE02A;
58 $pha=\uE02B;
59 $ba=\uE02C;
60 $bha=\uE02D;
61 $ma=\uE02E;
62 $ya=\uE02F;
63 $ra=\uE030;
64 $rra=\uE031;
65 $la=\uE032;
66 $lla=\uE033;
67 $ela=\uE034; #compatibility
68 $va=\uE035;
69 $vva=\uE081;
70 $sha=\uE036;
71 $ssa=\uE037;
72 $sa=\uE038;
73 $ha=\uE039;
74 #\u093A Reserved
75 #\u093B Reserved
76 $nukta=\uE03C;
77 $avagraha=\uE03D; # SIGN AVAGRAHA
78 # ←vowel→ represents the dependent form
79 $aa=\uE03E;
80 $i=\uE03F;
81 $ii=\uE040;
82 $u=\uE041;
83 $uu=\uE042;
84 $rh=\uE043;
85 $rrh=\uE044;
86 $ce=\uE045; #VOWEL SIGN CANDRA E
87 $se=\uE046; #VOWEL SIGN SHORT E
88 $e=\uE047;
89 $ai=\uE048;
90 $co=\uE049; # VOWEL SIGN CANDRA O
91 $so=\uE04A; # VOWEL SIGN SHORT O
92 $o=\uE04B; # ो
93 $au=\uE04C;
94 $virama=\uE04D;
95 # \u094E Reserved
96 # \u094F Reserved
97 $om = \uE050; # OM
98 # \u0951→; # UNMAPPED STRESS SIGN UDATTA
99 # \u0952→; # UNMAPPED STRESS SIGN ANUDATTA
100 # \u0953→; # UNMAPPED GRAVE ACCENT
101 # \u0954→; # UNMAPPED ACUTE ACCENT
102 $lm = \uE055;# Telugu Length Mark
103 $ailm=\uE056;# AI Length Mark
104 $aulm=\uE057;# AU Length Mark
105 #urdu compatibity forms
106 $uka=\uE058;
107 $ukha=\uE059;
108 $ugha=\uE05A;
109 $ujha=\uE05B;
110 $uddha=\uE05C;
111 $udha=\uE05D;
112 $ufa=\uE05E;
113 $uya=\uE05F;
114 $wrr=\uE060;
115 $wll=\uE061;
116 $lh=\uE062;
117 $llh=\uE063;
118 $danda=\uE064;
119 $doubleDanda=\uE065;
120 $zero=\uE066; # DIGIT ZERO
121 $one=\uE067; # DIGIT ONE
122 $two=\uE068; # DIGIT TWO
123 $three=\uE069; # DIGIT THREE
124 $four=\uE06A; # DIGIT FOUR
125 $five=\uE06B; # DIGIT FIVE
126 $six=\uE06C; # DIGIT SIX
127 $seven=\uE06D; # DIGIT SEVEN
128 $eight=\uE06E; # DIGIT EIGHT
129 $nine=\uE06F; # DIGIT NINE
130 $dgs=\uE082;
131 # For all other scripts
132 $ecp0=\uE070;
133 $ecp1=\uE071;
134 $ecp2=\uE072;
135 $ecp3=\uE073;
136 $ecp4=\uE074;
137 $ecp5=\uE075;
138 $ecp6=\uE076;
139 $ecp7=\uE077;
140 $ecp8=\uE078;
141 $ecp9=\uE079;
142 $ecpA=\uE07A;
143 $ecpB=\uE07B;
144 $ecpC=\uE07C;
145 $ecpD=\uE07D;
146 $ecpE=\uE07E;
147 $ecpF=\uE07F;
148 # Khanda-ta
149 $kta=\uE083;
150 # ॰→; # nothing in Latin maps to InterIndic ABBREVIATION SIGN
151 $depVowelAbove=[\uE03E-\uE040\uE045-\uE04C];
152 $depVowelBelow=[\uE041-\uE044];
153 $endThing=[$danda$doubleDanda];
154 # $x was originally called '§'; $z was '%'
155 $x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];
156 $z=[bcdfghjklmnpqrstvwxyz];
157 $consonants=[[$ka-$ha]$z[क-ह][ক-হ][ਕ-ਹ][ક-હ][କ-ହ][க-ஹ][క-హ][ಕ-ಹ][ക-ഹ]];
158 \u0315 → $avagraha;
159 \u0303→$chandrabindu$anusvara;
160 m\u0310→$chandrabindu;
161 h\u0323→$visarga;
162 x→$ka$virama$sa;
163 # convert to independent forms at start of word or syllable:
164 # dependent forms for roundtrip
165 \u0314a\u0304→$aa;
166 \u0314ai→$ai;
167 \u0314au→$au;
168 \u0314ii→$ii;
169 \u0314i\u0304→$ii;
170 \u0314i→$i;
171 \u0314u\u0304→$uu;
172 \u0314u→$u;
173 \u0314r\u0325\u0304→$rrh;
174 \u0314r\u0325→$rh;
175 \u0314l\u0325\u0304→$llh;
176 \u0314lh→$lh;
177 \u0314l\u0325→$lh;
178 \u0314e\u0304→$e;
179 \u0314o\u0304→$o;
180 \u0314a→;
181 \u0314e\u0306→$ce;
182 \u0314o\u0306→$co;
183 \u0314e→$se;
184 \u0314o→$so;
185 # preceeded by consonants
186 $consonants{ a\u0304→$aa;
187 $consonants{ ai→$ai;
188 $consonants{ au→$au;
189 $consonants{ ii→$ii;
190 $consonants{ i\u0304→$ii;
191 $consonants{ i→$i;
192 $consonants{ u\u0304→$uu;
193 $consonants{ u→$u;
194 $consonants{ r\u0325\u0304→$rrh;
195 $consonants{ r\u0325a→$rh;
196 $consonants{ r\u0325→$rh;
197 $consonants{ l\u0325\u0304→$llh;
198 $consonants{ lh→$lh;
199 $consonants{ l\u0325→$lh;
200 $consonants{ e\u0304→$e;
201 $consonants{ o\u0304→$o;
202 $consonants{ e\u0306→$ce;
203 $consonants{ o\u0306→$co;
204 $consonants{ e→$se;
205 $consonants{ o→$so;
206 # e.g. keai -→ {ka}{e}{wai}; k'ai -→ {ka}{wai}; (ai) -→ ({wai})
207 a\u0304→$waa;
208 ai→$wai;
209 au→$wau;
210 i\u0304→$wii;
211 i→$wi;
212 u\u0304→$wuu;
213 u→$wu;
214 r\u0325\u0304→$wrr;
215 r\u0325→$wr;
216 l\u0325\u0304→$wll;
217 lh→$wl;
218 l\u0325→$wl;
219 e\u0304→$we;
220 o\u0304→$wo;
221 a→$wa;
222 e\u0306→$wce;
223 o\u0306→$wco;
224 e→$wse;
225 ''om→$om;
226 o→$wso;
227 # rules for anusvara
228 n}r\u0325 → $na|$virama;
229 n}l\u0325 → $na|$virama;
230 n}na → $na|$virama;
231 n\u0307}[kg] → $anusvara;
232 n\u0307}n\u0307 → $anusvara;
233 n\u0304}[cj] → $anusvara;
234 n\u0304}n\u0303 → $anusvara;
235 n\u0323}[tdn]\u0323 → $anusvara;
236 n}[tdn] → $anusvara;
237 m}[pbm] → $anusvara;
238 n}[ylvshr] → $anusvara;
239 m\u0307 → $anusvara;
240 #urdu compatibility
241 q→$uka|$virama;
242 k\u0331h\u0331→$ukha |$virama;
243 g\u0307→ $ugha | $virama;
244 z → $ujha |$virama;
245 f → $ufa|$virama;
246 t\u0331→$kta;
247 # dev
248 y\u0307→$uya|$virama;
249 l\u0331→$ela|$virama;
250 n\u0331→$ena|$virama;
251 n\u0307→$nga|$virama;
252 n\u0303→$nya|$virama;
253 n\u0323→$nna|$virama;
254 t\u0323h→$ttha|$virama;
255 t\u0323→$tta|$virama;
256 r\u0323h→$udha|$virama;
257 r\u0323→$uddha|$virama;
258 d\u0323h→$ddha|$virama;
259 d\u0323→$dda|$virama;
260 kh→$kha|$virama;
261 k→$ka|$virama;
262 gh→$gha|$virama;
263 g→$ga|$virama;
264 ch→$cha|$virama;
265 c→$ca|$virama;
266 jh→$jha|$virama;
267 j→$ja|$virama;
268 ny→$nya|$virama;
269 tth→$ttha|$virama;
270 ddh→$ddha|$virama;
271 th→$tha|$virama;
272 t→$ta|$virama;
273 dh→$dha|$virama;
274 d→$da|$virama;
275 n→$na|$virama;
276 ph→$pha|$virama;
277 p→$pa|$virama;
278 bh→$bha|$virama;
279 b→$ba|$virama;
280 m→$ma|$virama;
281 y→$ya|$virama;
282 r\u0331→$rra|$virama;
283 r→$ra|$virama;
284 l\u0323→$lla|$virama;
285 l→$la|$virama;
286 v→$va|$virama;
287 w\u0307→$vva|$virama;
288 w→$va|$virama;
289 sh→$sha|$virama;
290 ss→$ssa|$virama;
291 s\u0323→$ssa|$virama;
292 s\u0301→$sha|$virama;
293 s→$sa|$virama;
294 h→$ha|$virama;
295 '.'→$danda;
296 $danda'.'→$doubleDanda;
297 $depVowelAbove{'~'→$anusvara;
298 $depVowelBelow{'~'→$chandrabindu;
299 # convert to dependent forms after consonant with no vowel:
300 # e.g. kai -→ {ka}{virama}ai -→ {ka}{ai}
301 #$virama aa→$aa;
302 $virama a\u0304→$aa;
303 $virama ai→$ai;
304 $virama au→$au;
305 $virama ii→$ii;
306 $virama i\u0304→$ii;
307 $virama i→$i;
308 #$virama uu→$uu;
309 $virama u\u0304→$uu;
310 $virama u→$u;
311 #$virama rrh→$rrh;
312 $virama r\u0325\u0304→$rrh;
313 #$virama rh→$rh;
314 $virama r\u0325a→$rh;
315 $virama r\u0325→$rh;
316 $virama l\u0325\u0304→$llh;
317 $virama lh→$lh;
318 $virama l\u0325→$lh;
319 $virama e\u0304→$e;
320 $virama o\u0304→$o;
321 $virama a→;
322 $virama e\u0306→$ce;
323 $virama o\u0306→$co;
324 $virama e→$se;
325 $virama o→$so;
326 # otherwise convert independent forms when separated by ': k'ai -→ {ka}{virama}{wai}
327 #$virama''aa→$waa;
328 $virama''a\u0304→$waa;
329 $virama''ai→$wai;
330 $virama''au→$wau;
331 #$virama''ii→$wii;
332 $virama''i\u0304→$wii;
333 $virama''i→$wi;
334 #$virama''uu→$wuu;
335 $virama''u\u0304→$wuu;
336 $virama''u→$wu;
337 #$virama''rrh→$wrr;
338 $virama''r\u0325\u0304→$wrr;
339 #$virama''rh→$wr;
340 $virama''r\u0325→$wr;
341 $virama''l\u0325\u0304→$wll;
342 #$virama''lh→$wl;
343 $virama''l\u0325→$wl;
344 $virama''e\u0304→$we;
345 $virama''o\u0304→$wo;
346 $virama''a→$wa;
347 $virama''e\u0306→$wce;
348 $virama''o\u0306→$wco;
349 $virama''e→$wse;
350 $virama''o→$wso;
351 # no virama
352 ''a\u0304→$waa;
353 ''ai→$wai;
354 ''au→$wau;
355 ''i\u0304→$wii;
356 ''i→$wi;
357 ''u\u0304→$wuu;
358 ''u→$wu;
359 ''r\u0325\u0304→$wrr;
360 ''r\u0325→$wr;
361 ''l\u0325\u0304→$wll;
362 ''l\u0325→$wl;
363 ''e\u0304→$we;
364 ''o\u0304→$wo;
365 ''a→$wa;
366 ''e\u0306→$wce;
367 ''o\u0306→$wco;
368 ''e→$wse;
369 ''o→$wso;
370 $virama } [$z] → $virama;
371 $virama } ' ' → $virama ;
372 $virama}$endThing→;
373 ʔ→$dgs; # Glottal Stop
374 0→$zero;
375 1→$one;
376 2→$two;
377 3→$three;
378 4→$four;
379 5→$five;
380 6→$six;
381 7→$seven;
382 8→$eight;
383 9→$nine;
384 ''→;
385 #:: NFC (NFD) ;
386