]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/InterIndic_Latin.txt
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / data / translit / InterIndic_Latin.txt
1 # ***************************************************************************
2 # *
3 # * Copyright (C) 2004-2016, International Business Machines
4 # * Corporation; Unicode, Inc.; and others. All Rights Reserved.
5 # *
6 # ***************************************************************************
7 # File: InterIndic_Latin.txt
8 # Generated from CLDR
9 #
10
11 # InterIndic-Latin
12 #\u0E00 reserved
13 #consonants
14 $chandrabindu=\uE001;
15 $anusvara=\uE002;
16 $visarga=\uE003;
17 #\u0E004 reserved
18 # w←vowel→ represents the stand-alone form
19 $wa=\uE005;
20 $waa=\uE006;
21 $wi=\uE007;
22 $wii=\uE008;
23 $wu=\uE009;
24 $wuu=\uE00A;
25 $wr=\uE00B;
26 $wl=\uE00C;
27 $wce=\uE00D; # LETTER CANDRA E
28 $wse=\uE00E; # LETTER SHORT E
29 $we=\uE00F; # ए LETTER E
30 $wai=\uE010;
31 $wco=\uE011; # LETTER CANDRA O
32 $wso=\uE012; # LETTER SHORT O
33 $wo=\uE013; # ओ LETTER O
34 $wau=\uE014;
35 $ka=\uE015;
36 $kha=\uE016;
37 $ga=\uE017;
38 $gha=\uE018;
39 $nga=\uE019;
40 $ca=\uE01A;
41 $cha=\uE01B;
42 $ja=\uE01C;
43 $jha=\uE01D;
44 $nya=\uE01E;
45 $tta=\uE01F;
46 $ttha=\uE020;
47 $dda=\uE021;
48 $ddha=\uE022;
49 $nna=\uE023;
50 $ta=\uE024;
51 $tha=\uE025;
52 $da=\uE026;
53 $dha=\uE027;
54 $na=\uE028;
55 $ena=\uE029; #compatibility
56 $pa=\uE02A;
57 $pha=\uE02B;
58 $ba=\uE02C;
59 $bha=\uE02D;
60 $ma=\uE02E;
61 $ya=\uE02F;
62 $ra=\uE030;
63 $vva=\uE081;
64 $rra=\uE031;
65 $la=\uE032;
66 $lla=\uE033;
67 $ela=\uE034; #compatibility
68 $va=\uE035;
69 $sha=\uE036;
70 $ssa=\uE037;
71 $sa=\uE038;
72 $ha=\uE039;
73 #\u093A Reserved
74 #\u093B Reserved
75 $nukta=\uE03C;
76 $avagraha=\uE03D; # SIGN AVAGRAHA
77 # ←vowel→ represents the dependent form
78 $aa=\uE03E;
79 $i=\uE03F;
80 $ii=\uE040;
81 $u=\uE041;
82 $uu=\uE042;
83 $rh=\uE043;
84 $rrh=\uE044;
85 $ce=\uE045; #VOWEL SIGN CANDRA E
86 $se=\uE046; #VOWEL SIGN SHORT E
87 $e=\uE047;
88 $ai=\uE048;
89 $co=\uE049; # VOWEL SIGN CANDRA O
90 $so=\uE04A; # VOWEL SIGN SHORT O
91 $o=\uE04B; # ो
92 $au=\uE04C;
93 $virama=\uE04D;
94 # \u094E Reserved
95 # \u094F Reserved
96 $om=\uE050; # OM
97 \uE051→; # UNMAPPED STRESS SIGN UDATTA
98 \uE052→; # UNMAPPED STRESS SIGN ANUDATTA
99 \uE053→; # UNMAPPED GRAVE ACCENT
100 \uE054→; # UNMAPPED ACUTE ACCENT
101 $lm = \uE055;# Telugu Length Mark
102 $ailm=\uE056;# AI Length Mark
103 $aulm=\uE057;# AU Length Mark
104 #urdu compatibity forms
105 $uka=\uE058;
106 $ukha=\uE059;
107 $ugha=\uE05A;
108 $ujha=\uE05B;
109 $uddha=\uE05C;
110 $udha=\uE05D;
111 $ufa=\uE05E;
112 $uya=\uE05F;
113 $wrr=\uE060;
114 $wll=\uE061;
115 $lh=\uE062;
116 $llh=\uE063;
117 $danda=\uE064;
118 $doubleDanda=\uE065;
119 $zero=\uE066; # DIGIT ZERO
120 $one=\uE067; # DIGIT ONE
121 $two=\uE068; # DIGIT TWO
122 $three=\uE069; # DIGIT THREE
123 $four=\uE06A; # DIGIT FOUR
124 $five=\uE06B; # DIGIT FIVE
125 $six=\uE06C; # DIGIT SIX
126 $seven=\uE06D; # DIGIT SEVEN
127 $eight=\uE06E; # DIGIT EIGHT
128 $nine=\uE06F; # DIGIT NINE
129 # Glottal stop
130 $dgs=\uE082;
131 #Khanda-ta
132 $kta=\uE083;
133 $depVowelAbove=[\uE03E-\uE040\uE045-\uE04C];
134 $depVowelBelow=[\uE041-\uE044];
135 # $x was originally called '§'; $z was '%'
136 $x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co];
137 $z=[bcdfghjklmnpqrstvwxyz];
138 $vowels=[aeiour\u0304\u0325\u0306];
139 $forceIndependentMatra = [^[[:L:][\u0300-\u034C]]];
140 ######################################################################
141 # convert from Native letters to Latin letters
142 ######################################################################
143 #transliterations for anusvara
144 $anusvara} [$ka$kha$ga$gha$nga] → n\u0307;
145 $anusvara} [$ca$cha$ja$jha$nya] → n\u0304;
146 $anusvara} [$tta$ttha$dda$ddha$nna] → n\u0323;
147 $anusvara} [$ta$tha$da$dha$na] → n;
148 $anusvara} [$pa$pha$ba$bha$ma] → m;
149 $anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] → n;
150 $anusvara→ m\u0307;
151 # Urdu compatibility
152 $ya$nukta}$x → y\u0307;
153 $ya$nukta$virama → y\u0307;
154 $ya$nukta → y\u0307a;
155 $la$nukta }$x → l\u0331;
156 $la$nukta$virama → l\u0331;
157 $la$nukta → l\u0331a;
158 $na$nukta }$x → n\u0331;
159 $na$nukta$virama → n\u0331;
160 $na$nukta → n\u0331a;
161 $ena }$x → n\u0331;
162 $ena$virama → n\u0331;
163 $ena → n\u0331a;
164 $uka → qa;
165 $ka$nukta }$x → q;
166 $ka$nukta$virama → q;
167 $ka$nukta → qa;
168 $kha$nukta }$x → k\u0331h\u0331;
169 $kha$nukta$virama → k\u0331h\u0331;
170 $kha$nukta → k\u0331h\u0331a;
171 $ukha$virama → k\u0331h\u0331;
172 $ukha → k\u0331h\u0331a;
173 $ugha → g\u0307a;
174 $ga$nukta }$x → g\u0307;
175 $ga$nukta$virama → g\u0307;
176 $ga$nukta → g\u0307a;
177 $ujha → za;
178 $ja$nukta }$x → z;
179 $ja$nukta$virama → z;
180 $ja$nukta → za;
181 $ddha$nukta}$x → r\u0323h;
182 $ddha$nukta$virama → r\u0323h;
183 $ddha$nukta → r\u0323ha;
184 $uddha}$x → r\u0323;
185 $uddha$virama → r\u0323;
186 $uddha → r\u0323a;
187 $udha → r\u0323a;
188 $dda$nukta}$x → r\u0323;
189 $dda$nukta$virama → r\u0323;
190 $dda$nukta → r\u0323a;
191 $pha$nukta }$x → f;
192 $pha$nukta$virama → f;
193 $pha$nukta → fa;
194 $ufa }$x → f;
195 $ufa$virama → f;
196 $ufa → fa;
197 $ra$nukta}$x → r\u0331;
198 $ra$nukta$virama → r\u0331;
199 $ra$nukta → r\u0331a;
200 $lla$nukta}$x → l\u0331;
201 $lla$nukta$virama → l\u0331;
202 $lla$nukta → l\u0331a;
203 $ela}$x → l\u0331;
204 $ela$virama → l\u0331;
205 $ela → l\u0331a;
206 $uya}$x → y\u0307;
207 $uya$virama → y\u0307;
208 $uya → y\u0307a;
209 # normal consonants
210 $ka$virama}$ha→k'';
211 $ka}$x→k;
212 $ka$virama→k;
213 $ka→ka;
214 $kha}$x→kh;
215 $kha$virama→kh;
216 $kha→kha;
217 $ga$virama}$ha→g'';
218 $ga}$x→g;
219 $ga$virama→g;
220 $ga→ga;
221 $gha}$x→gh;
222 $gha$virama→gh;
223 $gha→gha;
224 $nga}$x→n\u0307;
225 $nga$virama→n\u0307;
226 $nga→n\u0307a;
227 $ca$virama}$ha→c'';
228 $ca}$x→c;
229 $ca$virama→c;
230 $ca→ca;
231 $cha}$x→ch;
232 $cha$virama→ch;
233 $cha→cha;
234 $ja$virama}$ha→j'';
235 $ja}$x→j;
236 $ja$virama→j;
237 $ja→ja;
238 $jha}$x→jh;
239 $jha$virama→jh;
240 $jha→jha;
241 $nya }$x→n\u0303;
242 $nya$virama→n\u0303;
243 $nya → n\u0303a;
244 $tta$virama}$ha→t\u0323'';
245 $tta}$x→t\u0323;
246 $tta$virama→t\u0323;
247 $tta→t\u0323a;
248 $ttha}$x→t\u0323h;
249 $ttha$virama→t\u0323h;
250 $ttha→t\u0323ha;
251 $dda}$x$ha→d\u0323'';
252 $dda}$x→d\u0323;
253 $dda$virama→d\u0323;
254 $dda→d\u0323a;
255 $ddha}$x→d\u0323h;
256 $ddha$virama→d\u0323h;
257 $ddha→d\u0323ha;
258 $nna}$x→n\u0323;
259 $nna$virama→n\u0323;
260 $nna→n\u0323a;
261 $ta$virama}$ha→t'';
262 $ta$virama}$ttha→t'';
263 $ta$virama}$tta→t'';
264 $ta$virama}$tha→t'';
265 $ta}$x→t;
266 $ta$virama→t;
267 $ta→ta;
268 $tha}$x→th;
269 $tha$virama→th;
270 $tha→tha;
271 $da$virama}$ha→d'';
272 $da$virama}$ddha→d'';
273 $da$virama}$dda→d'';
274 $da$virama}$dha→d'';
275 $da}$x→d;
276 $da$virama→d;
277 $da→da;
278 $dha}$x→dh;
279 $dha$virama→dh;
280 $dha→dha;
281 $na$virama}$ga→n'';
282 $na$virama}$ya→n'';
283 $na}$x→n;
284 $na$virama→n;
285 $na→na;
286 $pa$virama}$ha→p'';
287 $pa}$x→p;
288 $pa$virama→p;
289 $pa→pa;
290 $pha}$x→ph;
291 $pha$virama→ph;
292 $pha→pha;
293 $ba$virama}$ha→b'';
294 $ba}$x→b;
295 $ba$virama→b;
296 $ba→ba;
297 $bha}$x→bh;
298 $bha$virama→bh;
299 $bha→bha;
300 $ma$virama}$ma→m'';
301 $ma}$x→m;
302 $ma$virama→m;
303 $ma→ma;
304 $ya}$x→y;
305 $ya$virama→y;
306 $ya→ya;
307 $ra$virama}$ha→r'';
308 $ra}$x→r;
309 $ra$virama→r;
310 $ra→ra;
311 $vva$virama}$ha→w\u0307'';
312 $vva}$x→w\u0307;
313 $vva$virama→w\u0307;
314 $vva→w\u0307a;
315 $rra$virama}$ha→r\u0331'';
316 $rra}$x→r\u0331;
317 $rra$virama→r\u0331;
318 $rra→r\u0331a;
319 $la$virama}$ha→l'';
320 $la}$x→l;
321 $la$virama→l;
322 $la→la;
323 $lla$virama}$ha→l\u0323'';
324 $lla}$x→l\u0323;
325 $lla$virama→l\u0323;
326 $lla→l\u0323a;
327 $va}$x→v;
328 $va$virama→v;
329 $va→va;
330 $sa$virama}$ha→s'';
331 $sa$virama}$sha→s'';
332 $sa$virama}$ssa→s'';
333 $sa$virama}$sa→s'';
334 $sa}$x→s;
335 $sa$virama→s;
336 #for gurmukhi
337 $sa$nukta}$x→s\u0301;
338 $sa$nukta$virama→s\u0301;
339 $sa$nukta→s\u0301a;
340 $sa→sa;
341 $sha}$x→s\u0301;
342 $sha$virama→s\u0301;
343 $sha→s\u0301a;
344 $ssa}$x→s\u0323;
345 $ssa$virama→s\u0323;
346 $ssa→s\u0323a;
347 $ha}$x→h;
348 $ha$virama→h;
349 $ha→ha;
350 # dependent vowels (should never occur except following consonants)
351 $forceIndependentMatra{$aa → \u0314a\u0304;
352 $forceIndependentMatra{$ai → \u0314ai;
353 $forceIndependentMatra{$au → \u0314au;
354 $forceIndependentMatra{$ii → \u0314i\u0304;
355 $forceIndependentMatra{$i → \u0314i;
356 $forceIndependentMatra{$uu → \u0314u\u0304;
357 $forceIndependentMatra{$u → \u0314u;
358 $forceIndependentMatra{$rrh → \u0314r\u0325\u0304;
359 $forceIndependentMatra{$rh → \u0314r\u0325;
360 $forceIndependentMatra{$llh → \u0314l\u0325\u0304;
361 $forceIndependentMatra{$lh → \u0314l\u0325;
362 $forceIndependentMatra{$e → \u0314e\u0304;
363 $forceIndependentMatra{$o → \u0314o\u0304;
364 #extra vowels
365 $forceIndependentMatra{$ce → \u0314e\u0306;
366 $forceIndependentMatra{$co → \u0314o\u0306;
367 $forceIndependentMatra{$se → \u0314e;
368 $forceIndependentMatra{$so → \u0314o;
369 $forceIndependentMatra{$nukta →; # Nukta cannot appear independently or as first character
370 $forceIndependentMatra{$virama →; # Virama cannot appear independently or as first character
371 $aa → a\u0304;
372 $ai → ai;
373 $au → au;
374 $ii → i\u0304;
375 $i → i;
376 $uu → u\u0304;
377 $u → u;
378 $rrh → r\u0325\u0304;
379 $rh → r\u0325;
380 $llh → l\u0325\u0304;
381 $lh → l\u0325;
382 $e → e\u0304;
383 $o → o\u0304;
384 #extra vowels
385 $ce → e\u0306;
386 $co → o\u0306;
387 $se → e;
388 $so → o;
389 #dependent vowels when following independent vowels. Generally Illegal only for roundtripping
390 $waa} $x → a\u0304\u0314;
391 $wai} $x → ai\u0314;
392 $wau} $x → au\u0314;
393 $wii} $x → i\u0304\u0314;
394 $wi } $x → i\u0314;
395 $wuu} $x → u\u0304\u0314;
396 $wu } $x → u\u0314;
397 $wrr} $x → r\u0325\u0304\u0314;
398 $wr } $x → r\u0325\u0314;
399 $wll} $x → l\u0325\u0304\u0314;
400 $wl } $x → l\u0325\u0314;
401 $we } $x → e\u0304\u0314;
402 $wo } $x → o\u0304\u0314;
403 $wa } $x → a\u0314;
404 #extra vowels
405 $wce} $x → e\u0306\u0314;
406 $wco} $x → o\u0306\u0314;
407 $wse} $x → e\u0314;
408 $wso} $x → o\u0314;
409 $om} $x → ''om\u0314;
410 # independent vowels when preceeded by vowels
411 $vowels{$waa → ''a\u0304;
412 $vowels{$wai → ''ai;
413 $vowels{$wau → ''au;
414 $vowels{$wii → ''i\u0304;
415 $vowels{$wi → ''i;
416 $vowels{$wuu → ''u\u0304;
417 $vowels{$wu → ''u;
418 $vowels{$wrr → ''r\u0325\u0304;
419 $vowels{$wr → ''r\u0325;
420 $vowels{$wll → ''l\u0325\u0304;
421 $vowels{$wl → ''l\u0325;
422 $vowels{$we → ''e\u0304;
423 $vowels{$wo → ''o\u0304;
424 $vowels{$wa → ''a;
425 #extra vowels
426 $vowels{$wce → ''e\u0306;
427 $vowels{$wco → ''o\u0306;
428 $vowels{$wse → ''e;
429 $vowels{$wso → ''o;
430 # independent vowels (otherwise)
431 $waa → a\u0304;
432 $wai → ai;
433 $wau → au;
434 $wii → i\u0304;
435 $wi → i;
436 $wuu → u\u0304;
437 $wu → u;
438 $wrr → r\u0325\u0304;
439 $wr → r\u0325;
440 $wll → l\u0325\u0304;
441 $wl → l\u0325;
442 $we → e\u0304;
443 $wo → o\u0304;
444 $wa → a;
445 #extra vowels
446 $wce → e\u0306;
447 $wco → o\u0306;
448 $wse → e;
449 $wso → o;
450 $om → ''om;
451 #stress marks
452 $avagraha → \u0315;
453 $chandrabindu$anusvara→\u0303;
454 $chandrabindu → m\u0310;
455 $visarga→h\u0323;
456 #numbers
457 $zero → 0;
458 $one → 1;
459 $two → 2;
460 $three → 3;
461 $four → 4;
462 $five → 5;
463 $six → 6;
464 $seven → 7;
465 $eight → 8;
466 $nine → 9;
467 $lm →;
468 $ailm →;
469 $aulm →;
470 $dgs→ʔ;
471 $kta→t\u0331;
472 $danda→'.';
473 $doubleDanda→'.';
474 \uE070→; # ABBREVIATION SIGN
475 # LETTER RA WITH MIDDLE DIAGONAL
476 \uE071}$x→ra;
477 \uE071$virama→r;
478 \uE071→ra;
479 # LETTER RA WITH LOWER DIAGONAL
480 \uE072}$x→ra;
481 \uE072$virama→r;
482 \uE072→ra;
483 \uE073→; # RUPEE MARK
484 \uE074→; # RUPEE SIGN
485 \uE075→; # CURRENCY NUMERATOR ONE
486 \uE076→; # CURRENCY NUMERATOR TWO
487 \uE077→; # CURRENCY NUMERATOR THREE
488 \uE078→; # CURRENCY NUMERATOR FOUR
489 \uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
490 \uE07A→; # CURRENCY DENOMINATOR SIXTEEN
491 \uE07B→; # ISSHAR
492 \uE07C→; # TIPPI
493 \uE07D→; # ADDAK
494 \uE07E→; # IRI
495 \uE07F→; # URA
496 \uE080→; # EK ONKAR
497 \uE004→; # DEVANAGARI VOWEL SIGN SHORT A
498