1 # ***************************************************************************
3 # * Copyright (C) 2004-2016, International Business Machines
4 # * Corporation; Unicode, Inc.; and others. All Rights Reserved.
6 # ***************************************************************************
7 # File: Latin_InterIndic.txt
19 # w←vowel→ represents the stand-alone form
28 $wce=\uE00D; # LETTER CANDRA E
29 $wse=\uE00E; # LETTER SHORT E
30 $we=\uE00F; # ए LETTER E
32 $wco=\uE011; # LETTER CANDRA O
33 $wso=\uE012; # LETTER SHORT O
34 $wo=\uE013; # ओ LETTER O
56 $ena=\uE029; #compatibility
67 $ela=\uE034; #compatibility
77 $avagraha=\uE03D; # SIGN AVAGRAHA
78 # ←vowel→ represents the dependent form
86 $ce=\uE045; #VOWEL SIGN CANDRA E
87 $se=\uE046; #VOWEL SIGN SHORT E
90 $co=\uE049; # VOWEL SIGN CANDRA O
91 $so=\uE04A; # VOWEL SIGN SHORT O
98 # \u0951→; # UNMAPPED STRESS SIGN UDATTA
99 # \u0952→; # UNMAPPED STRESS SIGN ANUDATTA
100 # \u0953→; # UNMAPPED GRAVE ACCENT
101 # \u0954→; # UNMAPPED ACUTE ACCENT
102 $lm = \uE055;# Telugu Length Mark
103 $ailm=\uE056;# AI Length Mark
104 $aulm=\uE057;# AU Length Mark
105 #urdu compatibity forms
120 $zero=\uE066; # DIGIT ZERO
121 $one=\uE067; # DIGIT ONE
122 $two=\uE068; # DIGIT TWO
123 $three=\uE069; # DIGIT THREE
124 $four=\uE06A; # DIGIT FOUR
125 $five=\uE06B; # DIGIT FIVE
126 $six=\uE06C; # DIGIT SIX
127 $seven=\uE06D; # DIGIT SEVEN
128 $eight=\uE06E; # DIGIT EIGHT
129 $nine=\uE06F; # DIGIT NINE
131 # For all other scripts
150 # ॰→; # nothing in Latin maps to InterIndic ABBREVIATION SIGN
151 $depVowelAbove=[\uE03E-\uE040\uE045-\uE04C];
152 $depVowelBelow=[\uE041-\uE044];
153 $endThing=[$danda$doubleDanda];
154 # $x was originally called '§'; $z was '%'
155 $x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];
156 $z=[bcdfghjklmnpqrstvwxyz];
157 $consonants=[[$ka-$ha]$z[क-ह][ক-হ][ਕ-ਹ][ક-હ][କ-ହ][க-ஹ][క-హ][ಕ-ಹ][ക-ഹ]];
159 \u0303→$chandrabindu$anusvara;
160 m\u0310→$chandrabindu;
163 # convert to independent forms at start of word or syllable:
164 # dependent forms for roundtrip
173 \u0314r\u0325\u0304→$rrh;
175 \u0314l\u0325\u0304→$llh;
185 # preceeded by consonants
186 $consonants{ a\u0304→$aa;
190 $consonants{ i\u0304→$ii;
192 $consonants{ u\u0304→$uu;
194 $consonants{ r\u0325\u0304→$rrh;
195 $consonants{ r\u0325a→$rh;
196 $consonants{ r\u0325→$rh;
197 $consonants{ l\u0325\u0304→$llh;
199 $consonants{ l\u0325→$lh;
200 $consonants{ e\u0304→$e;
201 $consonants{ o\u0304→$o;
202 $consonants{ e\u0306→$ce;
203 $consonants{ o\u0306→$co;
206 # e.g. keai -→ {ka}{e}{wai}; k'ai -→ {ka}{wai}; (ai) -→ ({wai})
228 n}r\u0325 → $na|$virama;
229 n}l\u0325 → $na|$virama;
231 n\u0307}[kg] → $anusvara;
232 n\u0307}n\u0307 → $anusvara;
233 n\u0304}[cj] → $anusvara;
234 n\u0304}n\u0303 → $anusvara;
235 n\u0323}[tdn]\u0323 → $anusvara;
238 n}[ylvshr] → $anusvara;
242 k\u0331h\u0331→$ukha |$virama;
243 g\u0307→ $ugha | $virama;
248 y\u0307→$uya|$virama;
249 l\u0331→$ela|$virama;
250 n\u0331→$ena|$virama;
251 n\u0307→$nga|$virama;
252 n\u0303→$nya|$virama;
253 n\u0323→$nna|$virama;
254 t\u0323h→$ttha|$virama;
255 t\u0323→$tta|$virama;
256 r\u0323h→$udha|$virama;
257 r\u0323→$uddha|$virama;
258 d\u0323h→$ddha|$virama;
259 d\u0323→$dda|$virama;
282 r\u0331→$rra|$virama;
284 l\u0323→$lla|$virama;
287 w\u0307→$vva|$virama;
291 s\u0323→$ssa|$virama;
292 s\u0301→$sha|$virama;
296 $danda'.'→$doubleDanda;
297 $depVowelAbove{'~'→$anusvara;
298 $depVowelBelow{'~'→$chandrabindu;
299 # convert to dependent forms after consonant with no vowel:
300 # e.g. kai -→ {ka}{virama}ai -→ {ka}{ai}
312 $virama r\u0325\u0304→$rrh;
314 $virama r\u0325a→$rh;
316 $virama l\u0325\u0304→$llh;
326 # otherwise convert independent forms when separated by ': k'ai -→ {ka}{virama}{wai}
328 $virama''a\u0304→$waa;
332 $virama''i\u0304→$wii;
335 $virama''u\u0304→$wuu;
338 $virama''r\u0325\u0304→$wrr;
340 $virama''r\u0325→$wr;
341 $virama''l\u0325\u0304→$wll;
343 $virama''l\u0325→$wl;
344 $virama''e\u0304→$we;
345 $virama''o\u0304→$wo;
347 $virama''e\u0306→$wce;
348 $virama''o\u0306→$wco;
359 ''r\u0325\u0304→$wrr;
361 ''l\u0325\u0304→$wll;
370 $virama } [$z] → $virama;
371 $virama } ' ' → $virama ;
373 ʔ→$dgs; # Glottal Stop