]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/InterIndic_Latin.txt
ICU-6.2.4.tar.gz
[apple/icu.git] / icuSources / data / translit / InterIndic_Latin.txt
1 #--------------------------------------------------------------------
2 # Copyright (c) 1999-2004, International Business Machines
3 # Corporation and others. All Rights Reserved.
4 #--------------------------------------------------------------------
5
6 # InterIndic-Latin
7 #\u0e00 reserved
8 #consonants
9 $chandrabindu=\ue001;
10 $anusvara=\ue002;
11 $visarga=\ue003;
12 #\u0e004 reserved
13 # w<vowel> represents the stand-alone form
14 $wa=\ue005;
15 $waa=\ue006;
16 $wi=\ue007;
17 $wii=\ue008;
18 $wu=\ue009;
19 $wuu=\ue00a;
20 $wr=\ue00b;
21 $wl=\ue00c;
22 $wce=\ue00d; # LETTER CANDRA E
23 $wse=\ue00e; # LETTER SHORT E
24 $we=\ue00f; # \u090f LETTER E
25 $wai=\ue010;
26 $wco=\ue011; # LETTER CANDRA O
27 $wso=\ue012; # LETTER SHORT O
28 $wo=\ue013; # \u0913 LETTER O
29 $wau=\ue014;
30 $ka=\ue015;
31 $kha=\ue016;
32 $ga=\ue017;
33 $gha=\ue018;
34 $nga=\ue019;
35 $ca=\ue01a;
36 $cha=\ue01b;
37 $ja=\ue01c;
38 $jha=\ue01d;
39 $nya=\ue01e;
40 $tta=\ue01f;
41 $ttha=\ue020;
42 $dda=\ue021;
43 $ddha=\ue022;
44 $nna=\ue023;
45 $ta=\ue024;
46 $tha=\ue025;
47 $da=\ue026;
48 $dha=\ue027;
49 $na=\ue028;
50 $ena=\ue029; #compatibility
51 $pa=\ue02a;
52 $pha=\ue02b;
53 $ba=\ue02c;
54 $bha=\ue02d;
55 $ma=\ue02e;
56 $ya=\ue02f;
57 $ra=\ue030;
58 $vva=\ue081;
59 $rra=\ue031;
60 $la=\ue032;
61 $lla=\ue033;
62 $ela=\ue034; #compatibility
63 $va=\ue035;
64 $sha=\ue036;
65 $ssa=\ue037;
66 $sa=\ue038;
67 $ha=\ue039;
68 #\u093a Reserved
69 #\u093b Reserved
70 $nukta=\ue03c;
71 $avagraha=\ue03d; # SIGN AVAGRAHA
72 # <vowel> represents the dependent form
73 $aa=\ue03e;
74 $i=\ue03f;
75 $ii=\ue040;
76 $u=\ue041;
77 $uu=\ue042;
78 $rh=\ue043;
79 $lh=\ue044;
80 $ce=\ue045; #VOWEL SIGN CANDRA E
81 $se=\ue046; #VOWEL SIGN SHORT E
82 $e=\ue047;
83 $ai=\ue048;
84 $co=\ue049; # VOWEL SIGN CANDRA O
85 $so=\ue04a; # VOWEL SIGN SHORT O
86 $o=\ue04b; # \u094b
87 $au=\ue04c;
88 $virama=\ue04d;
89 # \u094e Reserved
90 # \u094f Reserved
91 $om=\ue050; # OM
92 \ue051>; # UNMAPPED STRESS SIGN UDATTA
93 \ue052>; # UNMAPPED STRESS SIGN ANUDATTA
94 \ue053>; # UNMAPPED GRAVE ACCENT
95 \ue054>; # UNMAPPED ACUTE ACCENT
96 $lm = \ue055;# Telugu Length Mark
97 $ailm=\ue056;# AI Length Mark
98 $aulm=\ue057;# AU Length Mark
99 #urdu compatibity forms
100 $uka=\ue058;
101 $ukha=\ue059;
102 $ugha=\ue05a;
103 $ujha=\ue05b;
104 $uddha=\ue05c;
105 $udha=\ue05d;
106 $ufa=\ue05e;
107 $uya=\ue05f;
108 $wrr=\ue060;
109 $wll=\ue061;
110 $rrh=\ue062;
111 $llh=\ue063;
112 $danda=\ue064;
113 $doubleDanda=\ue065;
114 $zero=\ue066; # DIGIT ZERO
115 $one=\ue067; # DIGIT ONE
116 $two=\ue068; # DIGIT TWO
117 $three=\ue069; # DIGIT THREE
118 $four=\ue06a; # DIGIT FOUR
119 $five=\ue06b; # DIGIT FIVE
120 $six=\ue06c; # DIGIT SIX
121 $seven=\ue06d; # DIGIT SEVEN
122 $eight=\ue06e; # DIGIT EIGHT
123 $nine=\ue06f; # DIGIT NINE
124
125 # \u0970>; # UNMAPPED ABBREVIATION SIGN
126 $depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];
127 $depVowelBelow=[\ue041-\ue044];
128 # $x was originally called '&'; $z was '%'
129 $x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co];
130 $z=[bcdfghjklmnpqrstvwxyz];
131 $vowels=[aeiour\u0304\u0325\u0306];
132 $forceIndependentMatra = [^[[:L:][\u0300-\u034c]]];
133 ######################################################################
134 # convert from Native letters to Latin letters
135 ######################################################################
136 #transliterations for anusvara
137 $anusvara} [$ka$kha$ga$gha$nga] > n\u0307;
138 $anusvara} [$ca$cha$ja$jha$nya] > n\u0304;
139 $anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323;
140 $anusvara} [$ta$tha$da$dha$na] > n ;
141 $anusvara} [$pa$pha$ba$bha$ma] > m ;
142 $anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ;
143 $anusvara> m\u0307;
144
145 # Urdu compatibility
146 $ya$nukta}$x > y\u0307 ;
147 $ya$nukta$virama > y\u0307 ;
148 $ya$nukta > y\u0307a ;
149
150 $la$nukta }$x > l\u0331 ;
151 $la$nukta$virama > l\u0331 ;
152 $la$nukta > l\u0331a ;
153
154 $na$nukta }$x > n\u0331 ;
155 $na$nukta$virama > n\u0331 ;
156 $na$nukta > n\u0331a ;
157
158 $ena }$x > n\u0331 ;
159 $ena$virama > n\u0331 ;
160 $ena > n\u0331a ;
161 $uka > qa ;
162 $ka$nukta }$x > q ;
163 $ka$nukta$virama > q ;
164 $ka$nukta > qa ;
165 $kha$nukta }$x > k\u0331h\u0331 ;
166 $kha$nukta$virama > k\u0331h\u0331 ;
167 $kha$nukta > k\u0331h\u0331a ;
168 $ukha$virama > k\u0331h\u0331;
169 $ukha > k\u0331h\u0331a;
170 $ugha > g\u0307a ;
171 $ga$nukta }$x > g\u0307 ;
172 $ga$nukta$virama > g\u0307 ;
173 $ga$nukta > g\u0307a ;
174
175 $ujha > za ;
176 $ja$nukta }$x > z ;
177 $ja$nukta$virama > z ;
178 $ja$nukta > za ;
179 $ddha$nukta}$x > r\u0323h ;
180 $ddha$nukta$virama > r\u0323h ;
181 $ddha$nukta > r\u0323ha;
182
183 $uddha}$x > r\u0323 ;
184 $uddha$virama > r\u0323 ;
185 $uddha > r\u0323a;
186
187 $udha > r\u0323a ;
188 $dda$nukta}$x > r\u0323 ;
189 $dda$nukta$virama > r\u0323 ;
190 $dda$nukta > r\u0323a ;
191 $pha$nukta }$x > f ;
192 $pha$nukta$virama > f ;
193 $pha$nukta > fa ;
194 $ufa }$x > f ;
195 $ufa$virama > f ;
196 $ufa > fa ;
197
198 $ra$nukta}$x > r\u0331;
199 $ra$nukta$virama > r\u0331;
200 $ra$nukta > r\u0331a;
201 $lla$nukta}$x > l\u0331;
202 $lla$nukta$virama > l\u0331;
203 $lla$nukta > l\u0331a;
204
205 $ela}$x > l\u0331;
206 $ela$virama > l\u0331;
207 $ela > l\u0331a;
208
209 $uya}$x > y\u0307;
210 $uya$virama > y\u0307;
211 $uya > y\u0307a;
212
213
214 # normal consonants
215 $ka$virama}$ha>k'';
216 $ka}$x>k;
217 $ka$virama>k;
218 $ka>ka;
219 $kha}$x>kh;
220 $kha$virama>kh;
221 $kha>kha;
222 $ga$virama}$ha>g'';
223 $ga}$x>g;
224 $ga$virama>g;
225 $ga>ga;
226
227 $gha}$x>gh;
228 $gha$virama>gh;
229 $gha>gha;
230
231 $nga}$x>n\u0307;
232 $nga$virama>n\u0307;
233 $nga>n\u0307a ;
234 $ca$virama}$ha>c'';
235 $ca}$x>c;
236 $ca$virama>c;
237 $ca>ca;
238
239 $cha}$x>ch;
240 $cha$virama>ch;
241 $cha>cha;
242 $ja$virama}$ha>j'';
243 $ja}$x>j;
244 $ja$virama>j;
245 $ja>ja;
246
247 $jha}$x>jh;
248 $jha$virama>jh;
249 $jha>jha;
250
251 $nya }$x>n\u0303 ;
252 $nya$virama>n\u0303;
253 $nya > n\u0303a ;
254
255
256 $tta$virama}$ha>t\u0323'';
257 $tta}$x>t\u0323;
258 $tta$virama>t\u0323;
259 $tta>t\u0323a;
260
261 $ttha}$x>t\u0323h;
262 $ttha$virama>t\u0323h;
263 $ttha>t\u0323ha;
264 $dda}$x$ha>d\u0323'';
265 $dda}$x>d\u0323;
266 $dda$virama>d\u0323;
267 $dda>d\u0323a;
268
269 $ddha}$x>d\u0323h;
270 $ddha$virama>d\u0323h;
271 $ddha>d\u0323ha;
272
273 $nna}$x>n\u0323 ;
274 $nna$virama>n\u0323;
275 $nna>n\u0323a ;
276
277
278 $ta$virama}$ha>t'';
279 $ta$virama}$ttha>t'';
280 $ta$virama}$tta>t'';
281 $ta$virama}$tha>t'';
282 $ta}$x>t;
283 $ta$virama>t;
284 $ta>ta;
285 $tha}$x>th;
286 $tha$virama>th;
287 $tha>tha;
288
289 $da$virama}$ha>d'';
290 $da$virama}$ddha>d'';
291 $da$virama}$dda>d'';
292 $da$virama}$dha>d'';
293 $da}$x>d;
294 $da$virama>d;
295 $da>da;
296 $dha}$x>dh;
297 $dha$virama>dh;
298 $dha>dha;
299 $na$virama}$ga>n'';
300 $na$virama}$ya>n'';
301 $na}$x>n;
302 $na$virama>n;
303 $na>na;
304
305
306 $pa$virama}$ha>p'';
307 $pa}$x>p;
308 $pa$virama>p;
309 $pa>pa;
310 $pha}$x>ph;
311 $pha$virama>ph;
312 $pha>pha;
313 $ba$virama}$ha>b'';
314 $ba}$x>b;
315 $ba$virama>b;
316 $ba>ba;
317
318 $bha}$x>bh;
319 $bha$virama>bh;
320 $bha>bha;
321
322 $ma$virama}$ma>m'';
323 $ma}$x>m;
324 $ma$virama>m;
325 $ma>ma;
326
327 $ya}$x>y;
328 $ya$virama>y;
329 $ya>ya;
330 $ra$virama}$ha>r'';
331 $ra}$x>r;
332 $ra$virama>r;
333 $ra>ra;
334 $vva$virama}$ha>w\u0307'';
335 $vva}$x>w\u0307;
336 $vva$virama>w\u0307;
337 $vva>w\u0307a;
338 $rra$virama}$ha>r\u0331'';
339 $rra}$x>r\u0331;
340 $rra$virama>r\u0331;
341 $rra>r\u0331a;
342 $la$virama}$ha>l'';
343 $la}$x>l;
344 $la$virama>l;
345 $la>la;
346 $lla$virama}$ha>l\u0323'';
347 $lla}$x>l\u0323;
348 $lla$virama>l\u0323;
349 $lla>l\u0323a;
350 $va}$x>v;
351 $va$virama>v;
352 $va>va;
353 $sa$virama}$ha>s'';
354 $sa$virama}$sha>s'';
355 $sa$virama}$ssa>s'';
356 $sa$virama}$sa>s'';
357 $sa}$x>s;
358 $sa$virama>s;
359
360 #for gurmukhi
361 $sa$nukta}$x>s\u0301;
362 $sa$nukta$virama>s\u0301;
363 $sa$nukta>s\u0301a;
364 $sa>sa;
365
366 $sha}$x>s\u0301;
367 $sha$virama>s\u0301;
368 $sha>s\u0301a;
369
370 $ssa}$x>s\u0323;
371 $ssa$virama>s\u0323;
372 $ssa>s\u0323a;
373 $ha}$x>h;
374 $ha$virama>h;
375 $ha>ha;
376
377 # dependent vowels (should never occur except following consonants)
378 $forceIndependentMatra{$aa > \u0314a\u0304 ;
379 $forceIndependentMatra{$ai > \u0314ai ;
380 $forceIndependentMatra{$au > \u0314au ;
381 $forceIndependentMatra{$ii > \u0314i\u0304 ;
382 $forceIndependentMatra{$i > \u0314i ;
383 $forceIndependentMatra{$uu > \u0314u\u0304 ;
384 $forceIndependentMatra{$u > \u0314u ;
385 $forceIndependentMatra{$rrh > \u0314r\u0325\u0304 ;
386 $forceIndependentMatra{$rh > \u0314r\u0325 ;
387 $forceIndependentMatra{$llh > \u0314l\u0325\u0304 ;
388 $forceIndependentMatra{$lh > \u0314l\u0325 ;
389 $forceIndependentMatra{$e > \u0314e\u0304 ;
390 $forceIndependentMatra{$o > \u0314o\u0304 ;
391 #extra vowels
392 $forceIndependentMatra{$ce > \u0314e\u0306 ;
393 $forceIndependentMatra{$co > \u0314o\u0306 ;
394 $forceIndependentMatra{$se > \u0314e ;
395 $forceIndependentMatra{$so > \u0314o ;
396 $forceIndependentMatra{$nukta >; # Nukta cannot appear independently or as first character
397 $forceIndependentMatra{$virama >; # Virama cannot appear independently or as first character
398 $aa > a\u0304 ;
399 $ai > ai ;
400 $au > au ;
401 $ii > i\u0304 ;
402 $i > i ;
403 $uu > u\u0304 ;
404 $u > u ;
405 $rrh > r\u0325\u0304 ;
406 $rh > r\u0325 ;
407 $llh > l\u0325\u0304 ;
408 $lh > l\u0325 ;
409 $e > e\u0304 ;
410 $o > o\u0304 ;
411 #extra vowels
412 $ce > e\u0306 ;
413 $co > o\u0306 ;
414 $se > e ;
415 $so > o ;
416 #dependent vowels when following independent vowels. Generally Illegal only for roundtripping
417 $waa} $x > a\u0304\u0314 ;
418 $wai} $x > ai\u0314 ;
419 $wau} $x > au\u0314 ;
420 $wii} $x > i\u0304\u0314 ;
421 $wi } $x > i\u0314 ;
422 $wuu} $x > u\u0304\u0314 ;
423 $wu } $x > u\u0314 ;
424 $wrr} $x > r\u0325\u0304\u0314 ;
425 $wr } $x > r\u0325\u0314 ;
426 $wll} $x > l\u0325\u0304\u0314 ;
427 $wl } $x > l\u0325\u0314 ;
428 $we } $x > e\u0304\u0314 ;
429 $wo } $x > o\u0304\u0314 ;
430 $wa } $x > a\u0314 ;
431 #extra vowels
432 $wce} $x > e\u0306\u0314 ;
433 $wco} $x > o\u0306\u0314 ;
434 $wse} $x > e\u0314 ;
435 $wso} $x > o\u0314 ;
436 $om} $x > ''om\u0314 ;
437
438 # independent vowels when preceeded by vowels
439 $vowels{$waa > ''a\u0304 ;
440 $vowels{$wai > ''ai ;
441 $vowels{$wau > ''au ;
442 $vowels{$wii > ''i\u0304 ;
443 $vowels{$wi > ''i ;
444 $vowels{$wuu > ''u\u0304 ;
445 $vowels{$wu > ''u ;
446 $vowels{$wrr > ''r\u0325\u0304 ;
447 $vowels{$wr > ''r\u0325 ;
448 $vowels{$wll > ''l\u0325\u0304 ;
449 $vowels{$wl > ''l\u0325 ;
450 $vowels{$we > ''e\u0304 ;
451 $vowels{$wo > ''o\u0304 ;
452 $vowels{$wa > ''a ;
453 #extra vowels
454 $vowels{$wce > ''e\u0306 ;
455 $vowels{$wco > ''o\u0306 ;
456 $vowels{$wse > ''e ;
457 $vowels{$wso > ''o ;
458
459 # independent vowels (otherwise)
460 $waa > a\u0304 ;
461 $wai > ai ;
462 $wau > au ;
463 $wii > i\u0304 ;
464 $wi > i ;
465 $wuu > u\u0304 ;
466 $wu > u ;
467 $wrr > r\u0325\u0304 ;
468 $wr > r\u0325 ;
469 $wll > l\u0325\u0304 ;
470 $wl > l\u0325 ;
471 $we > e\u0304 ;
472 $wo > o\u0304 ;
473 $wa > a ;
474 #extra vowels
475 $wce > e\u0306 ;
476 $wco > o\u0306 ;
477 $wse > e ;
478 $wso > o ;
479 $om > ''om ;
480
481 #stress marks
482 $avagraha > \u0315;
483 $chandrabindu$anusvara>\u0303;
484 $chandrabindu > m\u0310;
485 $visarga>h\u0323;
486 #numbers
487 $zero > 0;
488 $one > 1;
489 $two > 2;
490 $three > 3;
491 $four > 4;
492 $five > 5;
493 $six > 6;
494 $seven > 7;
495 $eight > 8;
496 $nine > 9;
497 $lm >;
498 $ailm >;
499 $aulm >;
500
501 $danda>'.';
502 $doubleDanda>'.';
503
504 \ue070>; # ABBREVIATION SIGN
505 # LETTER RA WITH MIDDLE DIAGONAL
506 \ue071}$x>ra;
507 \ue071$virama>r;
508 \ue071>ra;
509 # LETTER RA WITH LOWER DIAGONAL
510 \ue072}$x>ra;
511 \ue072$virama>r;
512 \ue072>ra;
513
514 \ue073>; # RUPEE MARK
515 \ue074>; # RUPEE SIGN
516 \ue075>; # CURRENCY NUMERATOR ONE
517 \ue076>; # CURRENCY NUMERATOR TWO
518 \ue077>; # CURRENCY NUMERATOR THREE
519 \ue078>; # CURRENCY NUMERATOR FOUR
520 \ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
521 \ue07A>; # CURRENCY DENOMINATOR SIXTEEN
522 \ue07B>; # ISSHAR
523 \uE07C>; # TIPPI
524 \uE07D>; # ADDAK
525 \uE07E>; # IRI
526 \uE07F>; # URA
527 \uE080>; # EK ONKAR
528 \uE004>; # DEVANAGARI VOWEL SIGN SHORT A
529