]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/InterIndic_Latin.txt
ICU-62107.0.1.tar.gz
[apple/icu.git] / icuSources / data / translit / InterIndic_Latin.txt
1 # © 2016 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html#License
3 #
4 # File: InterIndic_Latin.txt
5 # Generated from CLDR
6 #
7
8 # InterIndic-Latin
9 #\u0E00 reserved
10 #consonants
11 $chandrabindu=\uE001;
12 $anusvara=\uE002;
13 $visarga=\uE003;
14 #\u0E004 reserved
15 # w←vowel→ represents the stand-alone form
16 $wa=\uE005;
17 $waa=\uE006;
18 $wi=\uE007;
19 $wii=\uE008;
20 $wu=\uE009;
21 $wuu=\uE00A;
22 $wr=\uE00B;
23 $wl=\uE00C;
24 $wce=\uE00D; # LETTER CANDRA E
25 $wse=\uE00E; # LETTER SHORT E
26 $we=\uE00F; # ए LETTER E
27 $wai=\uE010;
28 $wco=\uE011; # LETTER CANDRA O
29 $wso=\uE012; # LETTER SHORT O
30 $wo=\uE013; # ओ LETTER O
31 $wau=\uE014;
32 $ka=\uE015;
33 $kha=\uE016;
34 $ga=\uE017;
35 $gha=\uE018;
36 $nga=\uE019;
37 $ca=\uE01A;
38 $cha=\uE01B;
39 $ja=\uE01C;
40 $jha=\uE01D;
41 $nya=\uE01E;
42 $tta=\uE01F;
43 $ttha=\uE020;
44 $dda=\uE021;
45 $ddha=\uE022;
46 $nna=\uE023;
47 $ta=\uE024;
48 $tha=\uE025;
49 $da=\uE026;
50 $dha=\uE027;
51 $na=\uE028;
52 $ena=\uE029; #compatibility
53 $pa=\uE02A;
54 $pha=\uE02B;
55 $ba=\uE02C;
56 $bha=\uE02D;
57 $ma=\uE02E;
58 $ya=\uE02F;
59 $ra=\uE030;
60 $vva=\uE081;
61 $rra=\uE031;
62 $la=\uE032;
63 $lla=\uE033;
64 $ela=\uE034; #compatibility
65 $va=\uE035;
66 $sha=\uE036;
67 $ssa=\uE037;
68 $sa=\uE038;
69 $ha=\uE039;
70 #\u093A Reserved
71 #\u093B Reserved
72 $nukta=\uE03C;
73 $avagraha=\uE03D; # SIGN AVAGRAHA
74 # ←vowel→ represents the dependent form
75 $aa=\uE03E;
76 $i=\uE03F;
77 $ii=\uE040;
78 $u=\uE041;
79 $uu=\uE042;
80 $rh=\uE043;
81 $rrh=\uE044;
82 $ce=\uE045; #VOWEL SIGN CANDRA E
83 $se=\uE046; #VOWEL SIGN SHORT E
84 $e=\uE047;
85 $ai=\uE048;
86 $co=\uE049; # VOWEL SIGN CANDRA O
87 $so=\uE04A; # VOWEL SIGN SHORT O
88 $o=\uE04B; # ो
89 $au=\uE04C;
90 $virama=\uE04D;
91 # \u094E Reserved
92 # \u094F Reserved
93 $om=\uE050; # OM
94 \uE051→; # UNMAPPED STRESS SIGN UDATTA
95 \uE052→; # UNMAPPED STRESS SIGN ANUDATTA
96 \uE053→; # UNMAPPED GRAVE ACCENT
97 \uE054→; # UNMAPPED ACUTE ACCENT
98 $lm = \uE055;# Telugu Length Mark
99 $ailm=\uE056;# AI Length Mark
100 $aulm=\uE057;# AU Length Mark
101 #urdu compatibity forms
102 $uka=\uE058;
103 $ukha=\uE059;
104 $ugha=\uE05A;
105 $ujha=\uE05B;
106 $uddha=\uE05C;
107 $udha=\uE05D;
108 $ufa=\uE05E;
109 $uya=\uE05F;
110 $wrr=\uE060;
111 $wll=\uE061;
112 $lh=\uE062;
113 $llh=\uE063;
114 $danda=\uE064;
115 $doubleDanda=\uE065;
116 $zero=\uE066; # DIGIT ZERO
117 $one=\uE067; # DIGIT ONE
118 $two=\uE068; # DIGIT TWO
119 $three=\uE069; # DIGIT THREE
120 $four=\uE06A; # DIGIT FOUR
121 $five=\uE06B; # DIGIT FIVE
122 $six=\uE06C; # DIGIT SIX
123 $seven=\uE06D; # DIGIT SEVEN
124 $eight=\uE06E; # DIGIT EIGHT
125 $nine=\uE06F; # DIGIT NINE
126 # Glottal stop
127 $dgs=\uE082;
128 #Khanda-ta
129 $kta=\uE083;
130 $depVowelAbove=[\uE03E-\uE040\uE045-\uE04C];
131 $depVowelBelow=[\uE041-\uE044];
132 # $x was originally called '§'; $z was '%'
133 $x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co];
134 $z=[bcdfghjklmnpqrstvwxyz];
135 $vowels=[aeiour\u0304\u0325\u0306];
136 $forceIndependentMatra = [^[[:L:][\u0300-\u034C]]];
137 ######################################################################
138 # convert from Native letters to Latin letters
139 ######################################################################
140 #transliterations for anusvara
141 $anusvara} [$ka$kha$ga$gha$nga] → n\u0307;
142 $anusvara} [$ca$cha$ja$jha$nya] → n\u0304;
143 $anusvara} [$tta$ttha$dda$ddha$nna] → n\u0323;
144 $anusvara} [$ta$tha$da$dha$na] → n;
145 $anusvara} [$pa$pha$ba$bha$ma] → m;
146 $anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] → n;
147 $anusvara→ m\u0307;
148 # Urdu compatibility
149 $ya$nukta}$x → y\u0307;
150 $ya$nukta$virama → y\u0307;
151 $ya$nukta → y\u0307a;
152 $la$nukta }$x → l\u0331;
153 $la$nukta$virama → l\u0331;
154 $la$nukta → l\u0331a;
155 $na$nukta }$x → n\u0331;
156 $na$nukta$virama → n\u0331;
157 $na$nukta → n\u0331a;
158 $ena }$x → n\u0331;
159 $ena$virama → n\u0331;
160 $ena → n\u0331a;
161 $uka → qa;
162 $ka$nukta }$x → q;
163 $ka$nukta$virama → q;
164 $ka$nukta → qa;
165 $kha$nukta }$x → k\u0331h\u0331;
166 $kha$nukta$virama → k\u0331h\u0331;
167 $kha$nukta → k\u0331h\u0331a;
168 $ukha$virama → k\u0331h\u0331;
169 $ukha → k\u0331h\u0331a;
170 $ugha → g\u0307a;
171 $ga$nukta }$x → g\u0307;
172 $ga$nukta$virama → g\u0307;
173 $ga$nukta → g\u0307a;
174 $ujha → za;
175 $ja$nukta }$x → z;
176 $ja$nukta$virama → z;
177 $ja$nukta → za;
178 $ddha$nukta}$x → r\u0323h;
179 $ddha$nukta$virama → r\u0323h;
180 $ddha$nukta → r\u0323ha;
181 $uddha}$x → r\u0323;
182 $uddha$virama → r\u0323;
183 $uddha → r\u0323a;
184 $udha → r\u0323a;
185 $dda$nukta}$x → r\u0323;
186 $dda$nukta$virama → r\u0323;
187 $dda$nukta → r\u0323a;
188 $pha$nukta }$x → f;
189 $pha$nukta$virama → f;
190 $pha$nukta → fa;
191 $ufa }$x → f;
192 $ufa$virama → f;
193 $ufa → fa;
194 $ra$nukta}$x → r\u0331;
195 $ra$nukta$virama → r\u0331;
196 $ra$nukta → r\u0331a;
197 $lla$nukta}$x → l\u0331;
198 $lla$nukta$virama → l\u0331;
199 $lla$nukta → l\u0331a;
200 $ela}$x → l\u0331;
201 $ela$virama → l\u0331;
202 $ela → l\u0331a;
203 $uya}$x → y\u0307;
204 $uya$virama → y\u0307;
205 $uya → y\u0307a;
206 # normal consonants
207 $ka$virama}$ha→k'';
208 $ka}$x→k;
209 $ka$virama→k;
210 $ka→ka;
211 $kha}$x→kh;
212 $kha$virama→kh;
213 $kha→kha;
214 $ga$virama}$ha→g'';
215 $ga}$x→g;
216 $ga$virama→g;
217 $ga→ga;
218 $gha}$x→gh;
219 $gha$virama→gh;
220 $gha→gha;
221 $nga}$x→n\u0307;
222 $nga$virama→n\u0307;
223 $nga→n\u0307a;
224 $ca$virama}$ha→c'';
225 $ca}$x→c;
226 $ca$virama→c;
227 $ca→ca;
228 $cha}$x→ch;
229 $cha$virama→ch;
230 $cha→cha;
231 $ja$virama}$ha→j'';
232 $ja}$x→j;
233 $ja$virama→j;
234 $ja→ja;
235 $jha}$x→jh;
236 $jha$virama→jh;
237 $jha→jha;
238 $nya }$x→n\u0303;
239 $nya$virama→n\u0303;
240 $nya → n\u0303a;
241 $tta$virama}$ha→t\u0323'';
242 $tta}$x→t\u0323;
243 $tta$virama→t\u0323;
244 $tta→t\u0323a;
245 $ttha}$x→t\u0323h;
246 $ttha$virama→t\u0323h;
247 $ttha→t\u0323ha;
248 $dda}$x$ha→d\u0323'';
249 $dda}$x→d\u0323;
250 $dda$virama→d\u0323;
251 $dda→d\u0323a;
252 $ddha}$x→d\u0323h;
253 $ddha$virama→d\u0323h;
254 $ddha→d\u0323ha;
255 $nna}$x→n\u0323;
256 $nna$virama→n\u0323;
257 $nna→n\u0323a;
258 $ta$virama}$ha→t'';
259 $ta$virama}$ttha→t'';
260 $ta$virama}$tta→t'';
261 $ta$virama}$tha→t'';
262 $ta}$x→t;
263 $ta$virama→t;
264 $ta→ta;
265 $tha}$x→th;
266 $tha$virama→th;
267 $tha→tha;
268 $da$virama}$ha→d'';
269 $da$virama}$ddha→d'';
270 $da$virama}$dda→d'';
271 $da$virama}$dha→d'';
272 $da}$x→d;
273 $da$virama→d;
274 $da→da;
275 $dha}$x→dh;
276 $dha$virama→dh;
277 $dha→dha;
278 $na$virama}$ga→n'';
279 $na$virama}$ya→n'';
280 $na}$x→n;
281 $na$virama→n;
282 $na→na;
283 $pa$virama}$ha→p'';
284 $pa}$x→p;
285 $pa$virama→p;
286 $pa→pa;
287 $pha}$x→ph;
288 $pha$virama→ph;
289 $pha→pha;
290 $ba$virama}$ha→b'';
291 $ba}$x→b;
292 $ba$virama→b;
293 $ba→ba;
294 $bha}$x→bh;
295 $bha$virama→bh;
296 $bha→bha;
297 $ma$virama}$ma→m'';
298 $ma}$x→m;
299 $ma$virama→m;
300 $ma→ma;
301 $ya}$x→y;
302 $ya$virama→y;
303 $ya→ya;
304 $ra$virama}$ha→r'';
305 $ra}$x→r;
306 $ra$virama→r;
307 $ra→ra;
308 $vva$virama}$ha→w\u0307'';
309 $vva}$x→w\u0307;
310 $vva$virama→w\u0307;
311 $vva→w\u0307a;
312 $rra$virama}$ha→r\u0331'';
313 $rra}$x→r\u0331;
314 $rra$virama→r\u0331;
315 $rra→r\u0331a;
316 $la$virama}$ha→l'';
317 $la}$x→l;
318 $la$virama→l;
319 $la→la;
320 $lla$virama}$ha→l\u0323'';
321 $lla}$x→l\u0323;
322 $lla$virama→l\u0323;
323 $lla→l\u0323a;
324 $va}$x→v;
325 $va$virama→v;
326 $va→va;
327 $sa$virama}$ha→s'';
328 $sa$virama}$sha→s'';
329 $sa$virama}$ssa→s'';
330 $sa$virama}$sa→s'';
331 $sa}$x→s;
332 $sa$virama→s;
333 #for gurmukhi
334 $sa$nukta}$x→s\u0301;
335 $sa$nukta$virama→s\u0301;
336 $sa$nukta→s\u0301a;
337 $sa→sa;
338 $sha}$x→s\u0301;
339 $sha$virama→s\u0301;
340 $sha→s\u0301a;
341 $ssa}$x→s\u0323;
342 $ssa$virama→s\u0323;
343 $ssa→s\u0323a;
344 $ha}$x→h;
345 $ha$virama→h;
346 $ha→ha;
347 # dependent vowels (should never occur except following consonants)
348 $forceIndependentMatra{$aa → \u0314a\u0304;
349 $forceIndependentMatra{$ai → \u0314ai;
350 $forceIndependentMatra{$au → \u0314au;
351 $forceIndependentMatra{$ii → \u0314i\u0304;
352 $forceIndependentMatra{$i → \u0314i;
353 $forceIndependentMatra{$uu → \u0314u\u0304;
354 $forceIndependentMatra{$u → \u0314u;
355 $forceIndependentMatra{$rrh → \u0314r\u0325\u0304;
356 $forceIndependentMatra{$rh → \u0314r\u0325;
357 $forceIndependentMatra{$llh → \u0314l\u0325\u0304;
358 $forceIndependentMatra{$lh → \u0314l\u0325;
359 $forceIndependentMatra{$e → \u0314e\u0304;
360 $forceIndependentMatra{$o → \u0314o\u0304;
361 #extra vowels
362 $forceIndependentMatra{$ce → \u0314e\u0306;
363 $forceIndependentMatra{$co → \u0314o\u0306;
364 $forceIndependentMatra{$se → \u0314e;
365 $forceIndependentMatra{$so → \u0314o;
366 $forceIndependentMatra{$nukta →; # Nukta cannot appear independently or as first character
367 $forceIndependentMatra{$virama →; # Virama cannot appear independently or as first character
368 $aa → a\u0304;
369 $ai → ai;
370 $au → au;
371 $ii → i\u0304;
372 $i → i;
373 $uu → u\u0304;
374 $u → u;
375 $rrh → r\u0325\u0304;
376 $rh → r\u0325;
377 $llh → l\u0325\u0304;
378 $lh → l\u0325;
379 $e → e\u0304;
380 $o → o\u0304;
381 #extra vowels
382 $ce → e\u0306;
383 $co → o\u0306;
384 $se → e;
385 $so → o;
386 #dependent vowels when following independent vowels. Generally Illegal only for roundtripping
387 $waa} $x → a\u0304\u0314;
388 $wai} $x → ai\u0314;
389 $wau} $x → au\u0314;
390 $wii} $x → i\u0304\u0314;
391 $wi } $x → i\u0314;
392 $wuu} $x → u\u0304\u0314;
393 $wu } $x → u\u0314;
394 $wrr} $x → r\u0325\u0304\u0314;
395 $wr } $x → r\u0325\u0314;
396 $wll} $x → l\u0325\u0304\u0314;
397 $wl } $x → l\u0325\u0314;
398 $we } $x → e\u0304\u0314;
399 $wo } $x → o\u0304\u0314;
400 $wa } $x → a\u0314;
401 #extra vowels
402 $wce} $x → e\u0306\u0314;
403 $wco} $x → o\u0306\u0314;
404 $wse} $x → e\u0314;
405 $wso} $x → o\u0314;
406 $om} $x → ''om\u0314;
407 # independent vowels when preceeded by vowels
408 $vowels{$waa → ''a\u0304;
409 $vowels{$wai → ''ai;
410 $vowels{$wau → ''au;
411 $vowels{$wii → ''i\u0304;
412 $vowels{$wi → ''i;
413 $vowels{$wuu → ''u\u0304;
414 $vowels{$wu → ''u;
415 $vowels{$wrr → ''r\u0325\u0304;
416 $vowels{$wr → ''r\u0325;
417 $vowels{$wll → ''l\u0325\u0304;
418 $vowels{$wl → ''l\u0325;
419 $vowels{$we → ''e\u0304;
420 $vowels{$wo → ''o\u0304;
421 $vowels{$wa → ''a;
422 #extra vowels
423 $vowels{$wce → ''e\u0306;
424 $vowels{$wco → ''o\u0306;
425 $vowels{$wse → ''e;
426 $vowels{$wso → ''o;
427 # independent vowels (otherwise)
428 $waa → a\u0304;
429 $wai → ai;
430 $wau → au;
431 $wii → i\u0304;
432 $wi → i;
433 $wuu → u\u0304;
434 $wu → u;
435 $wrr → r\u0325\u0304;
436 $wr → r\u0325;
437 $wll → l\u0325\u0304;
438 $wl → l\u0325;
439 $we → e\u0304;
440 $wo → o\u0304;
441 $wa → a;
442 #extra vowels
443 $wce → e\u0306;
444 $wco → o\u0306;
445 $wse → e;
446 $wso → o;
447 $om → ''om;
448 #stress marks
449 $avagraha → \u0315;
450 $chandrabindu$anusvara→\u0303;
451 $chandrabindu → m\u0310;
452 $visarga→h\u0323;
453 #numbers
454 $zero → 0;
455 $one → 1;
456 $two → 2;
457 $three → 3;
458 $four → 4;
459 $five → 5;
460 $six → 6;
461 $seven → 7;
462 $eight → 8;
463 $nine → 9;
464 $lm →;
465 $ailm →;
466 $aulm →;
467 $dgs→ʔ;
468 $kta→t\u0331;
469 $danda→'.';
470 $doubleDanda→'.';
471 \uE070→; # ABBREVIATION SIGN
472 # LETTER RA WITH MIDDLE DIAGONAL
473 \uE071}$x→ra;
474 \uE071$virama→r;
475 \uE071→ra;
476 # LETTER RA WITH LOWER DIAGONAL
477 \uE072}$x→ra;
478 \uE072$virama→r;
479 \uE072→ra;
480 \uE073→; # RUPEE MARK
481 \uE074→; # RUPEE SIGN
482 \uE075→; # CURRENCY NUMERATOR ONE
483 \uE076→; # CURRENCY NUMERATOR TWO
484 \uE077→; # CURRENCY NUMERATOR THREE
485 \uE078→; # CURRENCY NUMERATOR FOUR
486 \uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
487 \uE07A→; # CURRENCY DENOMINATOR SIXTEEN
488 \uE07B→; # ISSHAR
489 \uE07C→; # TIPPI
490 \uE07D→; # ADDAK
491 \uE07E→; # IRI
492 \uE07F→; # URA
493 \uE080→; # EK ONKAR
494 \uE004→; # DEVANAGARI VOWEL SIGN SHORT A
495