1 # ***************************************************************************
3 # * Copyright (C) 2004-2016, International Business Machines
4 # * Corporation; Unicode, Inc.; and others. All Rights Reserved.
6 # ***************************************************************************
7 # File: InterIndic_Latin.txt
18 # w←vowel→ represents the stand-alone form
27 $wce=\uE00D; # LETTER CANDRA E
28 $wse=\uE00E; # LETTER SHORT E
29 $we=\uE00F; # ए LETTER E
31 $wco=\uE011; # LETTER CANDRA O
32 $wso=\uE012; # LETTER SHORT O
33 $wo=\uE013; # ओ LETTER O
55 $ena=\uE029; #compatibility
67 $ela=\uE034; #compatibility
76 $avagraha=\uE03D; # SIGN AVAGRAHA
77 # ←vowel→ represents the dependent form
85 $ce=\uE045; #VOWEL SIGN CANDRA E
86 $se=\uE046; #VOWEL SIGN SHORT E
89 $co=\uE049; # VOWEL SIGN CANDRA O
90 $so=\uE04A; # VOWEL SIGN SHORT O
97 \uE051→; # UNMAPPED STRESS SIGN UDATTA
98 \uE052→; # UNMAPPED STRESS SIGN ANUDATTA
99 \uE053→; # UNMAPPED GRAVE ACCENT
100 \uE054→; # UNMAPPED ACUTE ACCENT
101 $lm = \uE055;# Telugu Length Mark
102 $ailm=\uE056;# AI Length Mark
103 $aulm=\uE057;# AU Length Mark
104 #urdu compatibity forms
119 $zero=\uE066; # DIGIT ZERO
120 $one=\uE067; # DIGIT ONE
121 $two=\uE068; # DIGIT TWO
122 $three=\uE069; # DIGIT THREE
123 $four=\uE06A; # DIGIT FOUR
124 $five=\uE06B; # DIGIT FIVE
125 $six=\uE06C; # DIGIT SIX
126 $seven=\uE06D; # DIGIT SEVEN
127 $eight=\uE06E; # DIGIT EIGHT
128 $nine=\uE06F; # DIGIT NINE
133 $depVowelAbove=[\uE03E-\uE040\uE045-\uE04C];
134 $depVowelBelow=[\uE041-\uE044];
135 # $x was originally called '§'; $z was '%'
136 $x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co];
137 $z=[bcdfghjklmnpqrstvwxyz];
138 $vowels=[aeiour\u0304\u0325\u0306];
139 $forceIndependentMatra = [^[[:L:][\u0300-\u034C]]];
140 ######################################################################
141 # convert from Native letters to Latin letters
142 ######################################################################
143 #transliterations for anusvara
144 $anusvara} [$ka$kha$ga$gha$nga] → n\u0307;
145 $anusvara} [$ca$cha$ja$jha$nya] → n\u0304;
146 $anusvara} [$tta$ttha$dda$ddha$nna] → n\u0323;
147 $anusvara} [$ta$tha$da$dha$na] → n;
148 $anusvara} [$pa$pha$ba$bha$ma] → m;
149 $anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] → n;
152 $ya$nukta}$x → y\u0307;
153 $ya$nukta$virama → y\u0307;
154 $ya$nukta → y\u0307a;
155 $la$nukta }$x → l\u0331;
156 $la$nukta$virama → l\u0331;
157 $la$nukta → l\u0331a;
158 $na$nukta }$x → n\u0331;
159 $na$nukta$virama → n\u0331;
160 $na$nukta → n\u0331a;
162 $ena$virama → n\u0331;
166 $ka$nukta$virama → q;
168 $kha$nukta }$x → k\u0331h\u0331;
169 $kha$nukta$virama → k\u0331h\u0331;
170 $kha$nukta → k\u0331h\u0331a;
171 $ukha$virama → k\u0331h\u0331;
172 $ukha → k\u0331h\u0331a;
174 $ga$nukta }$x → g\u0307;
175 $ga$nukta$virama → g\u0307;
176 $ga$nukta → g\u0307a;
179 $ja$nukta$virama → z;
181 $ddha$nukta}$x → r\u0323h;
182 $ddha$nukta$virama → r\u0323h;
183 $ddha$nukta → r\u0323ha;
185 $uddha$virama → r\u0323;
188 $dda$nukta}$x → r\u0323;
189 $dda$nukta$virama → r\u0323;
190 $dda$nukta → r\u0323a;
192 $pha$nukta$virama → f;
197 $ra$nukta}$x → r\u0331;
198 $ra$nukta$virama → r\u0331;
199 $ra$nukta → r\u0331a;
200 $lla$nukta}$x → l\u0331;
201 $lla$nukta$virama → l\u0331;
202 $lla$nukta → l\u0331a;
204 $ela$virama → l\u0331;
207 $uya$virama → y\u0307;
244 $tta$virama}$ha→t\u0323'';
249 $ttha$virama→t\u0323h;
251 $dda}$x$ha→d\u0323'';
256 $ddha$virama→d\u0323h;
262 $ta$virama}$ttha→t'';
272 $da$virama}$ddha→d'';
311 $vva$virama}$ha→w\u0307'';
315 $rra$virama}$ha→r\u0331'';
323 $lla$virama}$ha→l\u0323'';
337 $sa$nukta}$x→s\u0301;
338 $sa$nukta$virama→s\u0301;
350 # dependent vowels (should never occur except following consonants)
351 $forceIndependentMatra{$aa → \u0314a\u0304;
352 $forceIndependentMatra{$ai → \u0314ai;
353 $forceIndependentMatra{$au → \u0314au;
354 $forceIndependentMatra{$ii → \u0314i\u0304;
355 $forceIndependentMatra{$i → \u0314i;
356 $forceIndependentMatra{$uu → \u0314u\u0304;
357 $forceIndependentMatra{$u → \u0314u;
358 $forceIndependentMatra{$rrh → \u0314r\u0325\u0304;
359 $forceIndependentMatra{$rh → \u0314r\u0325;
360 $forceIndependentMatra{$llh → \u0314l\u0325\u0304;
361 $forceIndependentMatra{$lh → \u0314l\u0325;
362 $forceIndependentMatra{$e → \u0314e\u0304;
363 $forceIndependentMatra{$o → \u0314o\u0304;
365 $forceIndependentMatra{$ce → \u0314e\u0306;
366 $forceIndependentMatra{$co → \u0314o\u0306;
367 $forceIndependentMatra{$se → \u0314e;
368 $forceIndependentMatra{$so → \u0314o;
369 $forceIndependentMatra{$nukta →; # Nukta cannot appear independently or as first character
370 $forceIndependentMatra{$virama →; # Virama cannot appear independently or as first character
378 $rrh → r\u0325\u0304;
380 $llh → l\u0325\u0304;
389 #dependent vowels when following independent vowels. Generally Illegal only for roundtripping
390 $waa} $x → a\u0304\u0314;
393 $wii} $x → i\u0304\u0314;
395 $wuu} $x → u\u0304\u0314;
397 $wrr} $x → r\u0325\u0304\u0314;
398 $wr } $x → r\u0325\u0314;
399 $wll} $x → l\u0325\u0304\u0314;
400 $wl } $x → l\u0325\u0314;
401 $we } $x → e\u0304\u0314;
402 $wo } $x → o\u0304\u0314;
405 $wce} $x → e\u0306\u0314;
406 $wco} $x → o\u0306\u0314;
409 $om} $x → ''om\u0314;
410 # independent vowels when preceeded by vowels
411 $vowels{$waa → ''a\u0304;
414 $vowels{$wii → ''i\u0304;
416 $vowels{$wuu → ''u\u0304;
418 $vowels{$wrr → ''r\u0325\u0304;
419 $vowels{$wr → ''r\u0325;
420 $vowels{$wll → ''l\u0325\u0304;
421 $vowels{$wl → ''l\u0325;
422 $vowels{$we → ''e\u0304;
423 $vowels{$wo → ''o\u0304;
426 $vowels{$wce → ''e\u0306;
427 $vowels{$wco → ''o\u0306;
430 # independent vowels (otherwise)
438 $wrr → r\u0325\u0304;
440 $wll → l\u0325\u0304;
453 $chandrabindu$anusvara→\u0303;
454 $chandrabindu → m\u0310;
474 \uE070→; # ABBREVIATION SIGN
475 # LETTER RA WITH MIDDLE DIAGONAL
479 # LETTER RA WITH LOWER DIAGONAL
483 \uE073→; # RUPEE MARK
484 \uE074→; # RUPEE SIGN
485 \uE075→; # CURRENCY NUMERATOR ONE
486 \uE076→; # CURRENCY NUMERATOR TWO
487 \uE077→; # CURRENCY NUMERATOR THREE
488 \uE078→; # CURRENCY NUMERATOR FOUR
489 \uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
490 \uE07A→; # CURRENCY DENOMINATOR SIXTEEN
497 \uE004→; # DEVANAGARI VOWEL SIGN SHORT A