1 # © 2016 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html#License
4 # File: InterIndic_Latin.txt
15 # w←vowel→ represents the stand-alone form
24 $wce=\uE00D; # LETTER CANDRA E
25 $wse=\uE00E; # LETTER SHORT E
26 $we=\uE00F; # ए LETTER E
28 $wco=\uE011; # LETTER CANDRA O
29 $wso=\uE012; # LETTER SHORT O
30 $wo=\uE013; # ओ LETTER O
52 $ena=\uE029; #compatibility
64 $ela=\uE034; #compatibility
73 $avagraha=\uE03D; # SIGN AVAGRAHA
74 # ←vowel→ represents the dependent form
82 $ce=\uE045; #VOWEL SIGN CANDRA E
83 $se=\uE046; #VOWEL SIGN SHORT E
86 $co=\uE049; # VOWEL SIGN CANDRA O
87 $so=\uE04A; # VOWEL SIGN SHORT O
94 \uE051→; # UNMAPPED STRESS SIGN UDATTA
95 \uE052→; # UNMAPPED STRESS SIGN ANUDATTA
96 \uE053→; # UNMAPPED GRAVE ACCENT
97 \uE054→; # UNMAPPED ACUTE ACCENT
98 $lm = \uE055;# Telugu Length Mark
99 $ailm=\uE056;# AI Length Mark
100 $aulm=\uE057;# AU Length Mark
101 #urdu compatibity forms
116 $zero=\uE066; # DIGIT ZERO
117 $one=\uE067; # DIGIT ONE
118 $two=\uE068; # DIGIT TWO
119 $three=\uE069; # DIGIT THREE
120 $four=\uE06A; # DIGIT FOUR
121 $five=\uE06B; # DIGIT FIVE
122 $six=\uE06C; # DIGIT SIX
123 $seven=\uE06D; # DIGIT SEVEN
124 $eight=\uE06E; # DIGIT EIGHT
125 $nine=\uE06F; # DIGIT NINE
130 $depVowelAbove=[\uE03E-\uE040\uE045-\uE04C];
131 $depVowelBelow=[\uE041-\uE044];
132 # $x was originally called '§'; $z was '%'
133 $x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co];
134 $z=[bcdfghjklmnpqrstvwxyz];
135 $vowels=[aeiour\u0304\u0325\u0306];
136 $forceIndependentMatra = [^[[:L:][\u0300-\u034C]]];
137 ######################################################################
138 # convert from Native letters to Latin letters
139 ######################################################################
140 #transliterations for anusvara
141 $anusvara} [$ka$kha$ga$gha$nga] → n\u0307;
142 $anusvara} [$ca$cha$ja$jha$nya] → n\u0304;
143 $anusvara} [$tta$ttha$dda$ddha$nna] → n\u0323;
144 $anusvara} [$ta$tha$da$dha$na] → n;
145 $anusvara} [$pa$pha$ba$bha$ma] → m;
146 $anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] → n;
149 $ya$nukta}$x → y\u0307;
150 $ya$nukta$virama → y\u0307;
151 $ya$nukta → y\u0307a;
152 $la$nukta }$x → l\u0331;
153 $la$nukta$virama → l\u0331;
154 $la$nukta → l\u0331a;
155 $na$nukta }$x → n\u0331;
156 $na$nukta$virama → n\u0331;
157 $na$nukta → n\u0331a;
159 $ena$virama → n\u0331;
163 $ka$nukta$virama → q;
165 $kha$nukta }$x → k\u0331h\u0331;
166 $kha$nukta$virama → k\u0331h\u0331;
167 $kha$nukta → k\u0331h\u0331a;
168 $ukha$virama → k\u0331h\u0331;
169 $ukha → k\u0331h\u0331a;
171 $ga$nukta }$x → g\u0307;
172 $ga$nukta$virama → g\u0307;
173 $ga$nukta → g\u0307a;
176 $ja$nukta$virama → z;
178 $ddha$nukta}$x → r\u0323h;
179 $ddha$nukta$virama → r\u0323h;
180 $ddha$nukta → r\u0323ha;
182 $uddha$virama → r\u0323;
185 $dda$nukta}$x → r\u0323;
186 $dda$nukta$virama → r\u0323;
187 $dda$nukta → r\u0323a;
189 $pha$nukta$virama → f;
194 $ra$nukta}$x → r\u0331;
195 $ra$nukta$virama → r\u0331;
196 $ra$nukta → r\u0331a;
197 $lla$nukta}$x → l\u0331;
198 $lla$nukta$virama → l\u0331;
199 $lla$nukta → l\u0331a;
201 $ela$virama → l\u0331;
204 $uya$virama → y\u0307;
241 $tta$virama}$ha→t\u0323'';
246 $ttha$virama→t\u0323h;
248 $dda}$x$ha→d\u0323'';
253 $ddha$virama→d\u0323h;
259 $ta$virama}$ttha→t'';
269 $da$virama}$ddha→d'';
308 $vva$virama}$ha→w\u0307'';
312 $rra$virama}$ha→r\u0331'';
320 $lla$virama}$ha→l\u0323'';
334 $sa$nukta}$x→s\u0301;
335 $sa$nukta$virama→s\u0301;
347 # dependent vowels (should never occur except following consonants)
348 $forceIndependentMatra{$aa → \u0314a\u0304;
349 $forceIndependentMatra{$ai → \u0314ai;
350 $forceIndependentMatra{$au → \u0314au;
351 $forceIndependentMatra{$ii → \u0314i\u0304;
352 $forceIndependentMatra{$i → \u0314i;
353 $forceIndependentMatra{$uu → \u0314u\u0304;
354 $forceIndependentMatra{$u → \u0314u;
355 $forceIndependentMatra{$rrh → \u0314r\u0325\u0304;
356 $forceIndependentMatra{$rh → \u0314r\u0325;
357 $forceIndependentMatra{$llh → \u0314l\u0325\u0304;
358 $forceIndependentMatra{$lh → \u0314l\u0325;
359 $forceIndependentMatra{$e → \u0314e\u0304;
360 $forceIndependentMatra{$o → \u0314o\u0304;
362 $forceIndependentMatra{$ce → \u0314e\u0306;
363 $forceIndependentMatra{$co → \u0314o\u0306;
364 $forceIndependentMatra{$se → \u0314e;
365 $forceIndependentMatra{$so → \u0314o;
366 $forceIndependentMatra{$nukta →; # Nukta cannot appear independently or as first character
367 $forceIndependentMatra{$virama →; # Virama cannot appear independently or as first character
375 $rrh → r\u0325\u0304;
377 $llh → l\u0325\u0304;
386 #dependent vowels when following independent vowels. Generally Illegal only for roundtripping
387 $waa} $x → a\u0304\u0314;
390 $wii} $x → i\u0304\u0314;
392 $wuu} $x → u\u0304\u0314;
394 $wrr} $x → r\u0325\u0304\u0314;
395 $wr } $x → r\u0325\u0314;
396 $wll} $x → l\u0325\u0304\u0314;
397 $wl } $x → l\u0325\u0314;
398 $we } $x → e\u0304\u0314;
399 $wo } $x → o\u0304\u0314;
402 $wce} $x → e\u0306\u0314;
403 $wco} $x → o\u0306\u0314;
406 $om} $x → ''om\u0314;
407 # independent vowels when preceeded by vowels
408 $vowels{$waa → ''a\u0304;
411 $vowels{$wii → ''i\u0304;
413 $vowels{$wuu → ''u\u0304;
415 $vowels{$wrr → ''r\u0325\u0304;
416 $vowels{$wr → ''r\u0325;
417 $vowels{$wll → ''l\u0325\u0304;
418 $vowels{$wl → ''l\u0325;
419 $vowels{$we → ''e\u0304;
420 $vowels{$wo → ''o\u0304;
423 $vowels{$wce → ''e\u0306;
424 $vowels{$wco → ''o\u0306;
427 # independent vowels (otherwise)
435 $wrr → r\u0325\u0304;
437 $wll → l\u0325\u0304;
450 $chandrabindu$anusvara→\u0303;
451 $chandrabindu → m\u0310;
471 \uE070→; # ABBREVIATION SIGN
472 # LETTER RA WITH MIDDLE DIAGONAL
476 # LETTER RA WITH LOWER DIAGONAL
480 \uE073→; # RUPEE MARK
481 \uE074→; # RUPEE SIGN
482 \uE075→; # CURRENCY NUMERATOR ONE
483 \uE076→; # CURRENCY NUMERATOR TWO
484 \uE077→; # CURRENCY NUMERATOR THREE
485 \uE078→; # CURRENCY NUMERATOR FOUR
486 \uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
487 \uE07A→; # CURRENCY DENOMINATOR SIXTEEN
494 \uE004→; # DEVANAGARI VOWEL SIGN SHORT A