1 #--------------------------------------------------------------------
2 # Copyright (c) 1999-2004, International Business Machines
3 # Corporation and others. All Rights Reserved.
4 #--------------------------------------------------------------------
14 # w<vowel> represents the stand-alone form
23 $wce=\ue00d; # LETTER CANDRA E
24 $wse=\ue00e; # LETTER SHORT E
25 $we=\ue00f; # \u090f LETTER E
27 $wco=\ue011; # LETTER CANDRA O
28 $wso=\ue012; # LETTER SHORT O
29 $wo=\ue013; # \u0913 LETTER O
51 $ena=\ue029; #compatibility
62 $ela=\ue034; #compatibility
72 $avagraha=\ue03d; # SIGN AVAGRAHA
73 # <vowel> represents the dependent form
81 $ce=\ue045; #VOWEL SIGN CANDRA E
82 $se=\ue046; #VOWEL SIGN SHORT E
85 $co=\ue049; # VOWEL SIGN CANDRA O
86 $so=\ue04a; # VOWEL SIGN SHORT O
93 # \u0951>; # UNMAPPED STRESS SIGN UDATTA
94 # \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
95 # \u0953>; # UNMAPPED GRAVE ACCENT
96 # \u0954>; # UNMAPPED ACUTE ACCENT
97 $lm = \ue055;# Telugu Length Mark
98 $ailm=\ue056;# AI Length Mark
99 $aulm=\ue057;# AU Length Mark
100 #urdu compatibity forms
115 $zero=\ue066; # DIGIT ZERO
116 $one=\ue067; # DIGIT ONE
117 $two=\ue068; # DIGIT TWO
118 $three=\ue069; # DIGIT THREE
119 $four=\ue06a; # DIGIT FOUR
120 $five=\ue06b; # DIGIT FIVE
121 $six=\ue06c; # DIGIT SIX
122 $seven=\ue06d; # DIGIT SEVEN
123 $eight=\ue06e; # DIGIT EIGHT
124 $nine=\ue06f; # DIGIT NINE
125 # For all other scripts
142 # \u0970>; # UNMAPPED ABBREVIATION SIGN
143 $depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];
144 $depVowelBelow=[\ue041-\ue044];
145 $endThing=[$danda$doubleDanda];
146 # $x was originally called '&'; $z was '%'
147 $x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];
148 $z=[bcdfghjklmnpqrstvwxyz];
149 $consonants=[[$ka-$ha]$z[\u0915-\u0939][\u0995-\u09b9][\u0a15-\u0a39][\u0a95-\u0ab9][\u0b15-\u0b39][\u0b95-\u0bb9][\u0c15-\u0c39][\u0c95-\u0cb9][\u0d15-\u0d39]];
151 \u0303>$chandrabindu$anusvara;
152 m\u0310>$chandrabindu;
155 # convert to independent forms at start of word or syllable:
156 # dependent forms for roundtrip
165 \u0314r\u0325\u0304>$rrh;
167 \u0314l\u0325\u0304>$llh;
178 # preceeded by consonants
179 $consonants{ a\u0304>$aa;
183 $consonants{ i\u0304>$ii;
185 $consonants{ u\u0304>$uu;
187 $consonants{ r\u0325\u0304>$rrh;
188 $consonants{ r\u0325a>$rh;
189 $consonants{ r\u0325>$rh;
190 $consonants{ l\u0325\u0304>$llh;
192 $consonants{ l\u0325>$lh;
193 $consonants{ e\u0304>$e;
194 $consonants{ o\u0304>$o;
195 $consonants{ e\u0306>$ce;
196 $consonants{ o\u0306>$co;
200 # e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
223 n}r\u0325 > $na|$virama;
224 n}l\u0325 > $na|$virama;
226 n\u0307}[kg] > $anusvara;
227 n\u0307}n\u0307 > $anusvara;
228 n\u0304}[cj] > $anusvara;
229 n\u0304}n\u0303 > $anusvara;
230 n\u0323}[tdn]\u0323 > $anusvara;
233 n}[ylvshr] > $anusvara;
238 k\u0331h\u0331>$ukha |$virama;
239 g\u0307> $ugha | $virama;
244 y\u0307>$uya|$virama;
245 l\u0331>$ela|$virama;
246 n\u0331>$ena|$virama;
247 n\u0307>$nga|$virama;
248 n\u0303>$nya|$virama;
249 n\u0323>$nna|$virama;
250 t\u0323h>$ttha|$virama;
251 t\u0323>$tta|$virama;
252 r\u0323h>$udha|$virama;
253 r\u0323>$uddha|$virama;
254 d\u0323h>$ddha|$virama;
255 d\u0323>$dda|$virama;
278 r\u0331>$rra|$virama;
280 l\u0323>$lla|$virama;
283 w\u0307>$vva|$virama;
287 s\u0323>$ssa|$virama;
288 s\u0301>$sha|$virama;
292 $danda'.'>$doubleDanda;
293 $depVowelAbove{'~'>$anusvara;
294 $depVowelBelow{'~'>$chandrabindu;
295 # convert to dependent forms after consonant with no vowel:
296 # e.g. kai -> {ka}{virama}ai -> {ka}{ai}
308 $virama r\u0325\u0304>$rrh;
310 $virama r\u0325a>$rh;
312 $virama l\u0325\u0304>$llh;
324 # otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
326 $virama''a\u0304>$waa;
330 $virama''i\u0304>$wii;
333 $virama''u\u0304>$wuu;
336 $virama''r\u0325\u0304>$wrr;
338 $virama''r\u0325>$wr;
339 $virama''l\u0325\u0304>$wll;
341 $virama''l\u0325>$wl;
342 $virama''e\u0304>$we;
343 $virama''o\u0304>$wo;
345 $virama''e\u0306>$wce;
346 $virama''o\u0306>$wco;
357 ''r\u0325\u0304>$wrr;
359 ''l\u0325\u0304>$wll;
369 $virama } [$z] > $virama;
370 $virama } ' ' > $virama ;