1 // -*- Coding: utf-8; -*-
2 //--------------------------------------------------------------------
3 // Copyright (c) 1999-2002, International Business Machines
4 // Corporation and others. All Rights Reserved.
5 //--------------------------------------------------------------------
6 // THIS IS A MACHINE-GENERATED FILE
7 // Tool: dumpicurules.bat
8 // Source: ../../../impl/data/Transliterator_Latin_InterIndic.txt
9 // Date: Sat Jul 27 10:31:07 2002
10 //--------------------------------------------------------------------
16 //--------------------------------------------------------------------
17 //--------------------------------------------------------------------
18 //--------------------------------------------------------------------
24 "$chandrabindu=\ue001;"
28 // w<vowel> represents the stand-alone form
37 "$wce=\ue00d;" // LETTER CANDRA E
38 "$wse=\ue00e;" // LETTER SHORT E
39 "$we=\ue00f;" // \u090f LETTER E
41 "$wco=\ue011;" // LETTER CANDRA O
42 "$wso=\ue012;" // LETTER SHORT O
43 "$wo=\ue013;" // \u0913 LETTER O
65 "$ena=\ue029;" //compatibility
76 "$ela=\ue034;" //compatibility
85 "$avagraha=\ue03d;" // SIGN AVAGRAHA
86 // <vowel> represents the dependent form
94 "$ce=\ue045;" //VOWEL SIGN CANDRA E
95 "$se=\ue046;" //VOWEL SIGN SHORT E
98 "$co=\ue049;" // VOWEL SIGN CANDRA O
99 "$so=\ue04a;" // VOWEL SIGN SHORT O
100 "$o=\ue04b;" // \u094b
105 "$om = \ue050;" // OM
106 // \u0951>; # UNMAPPED STRESS SIGN UDATTA
107 // \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
108 // \u0953>; # UNMAPPED GRAVE ACCENT
109 // \u0954>; # UNMAPPED ACUTE ACCENT
110 "$lm = \ue055;"// Telugu Length Mark
111 "$ailm=\ue056;"// AI Length Mark
112 "$aulm=\ue057;"// AU Length Mark
113 //urdu compatibity forms
127 "$doubleDanda=\ue065;"
128 "$zero=\ue066;" // DIGIT ZERO
129 "$one=\ue067;" // DIGIT ONE
130 "$two=\ue068;" // DIGIT TWO
131 "$three=\ue069;" // DIGIT THREE
132 "$four=\ue06a;" // DIGIT FOUR
133 "$five=\ue06b;" // DIGIT FIVE
134 "$six=\ue06c;" // DIGIT SIX
135 "$seven=\ue06d;" // DIGIT SEVEN
136 "$eight=\ue06e;" // DIGIT EIGHT
137 "$nine=\ue06f;" // DIGIT NINE
138 // For all other scripts
155 // \u0970>; # UNMAPPED ABBREVIATION SIGN
156 "$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];"
157 "$depVowelBelow=[\ue041-\ue044];"
158 "$endThing=[$danda$doubleDanda];"
159 // $x was originally called '&'; $z was '%'
160 "$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];"
161 "$z=[bcdfghjklmnpqrstvwxyz];"
162 "$consonants=[[$ka-$ha]$z[\u0915-\u0939][\u0995-\u09b9][\u0a15-\u0a39][\u0a95-\u0ab9][\u0b15-\u0b39][\u0b95-\u0bb9][\u0c15-\u0c39][\u0c95-\u0cb9][\u0d15-\u0d39]];"
163 "\u0315 > $avagraha;"
164 "\u0303>$chandrabindu$anusvara;"
165 "m\u0310>$chandrabindu;"
168 // convert to independent forms at start of word or syllable:
169 // dependent forms for roundtrip
178 "\u0314r\u0325\u0304>$rrh;"
180 "\u0314l\u0325\u0304>$llh;"
191 // preceeded by consonants
192 "$consonants{ a\u0304>$aa;"
193 "$consonants{ ai>$ai;"
194 "$consonants{ au>$au;"
195 "$consonants{ ii>$ii;"
196 "$consonants{ i\u0304>$ii;"
198 "$consonants{ u\u0304>$uu;"
200 "$consonants{ r\u0325\u0304>$rrh;"
201 "$consonants{ r\u0325a>$rh;"
202 "$consonants{ r\u0325>$rh;"
203 "$consonants{ l\u0325\u0304>$llh;"
204 "$consonants{ lh>$lh;"
205 "$consonants{ l\u0325>$lh;"
206 "$consonants{ e\u0304>$e;"
207 "$consonants{ o\u0304>$o;"
208 "$consonants{ e\u0306>$ce;"
209 "$consonants{ o\u0306>$co;"
210 "$consonants{ e>$se;"
211 "$consonants{ o>$so;"
213 // e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
221 "r\u0325\u0304>$wrr;"
223 "l\u0325\u0304>$wll;"
235 // rules for anusvara
236 "n}r\u0325 > $na|$virama;"
237 "n}l\u0325 > $na|$virama;"
238 "n}na > $na|$virama;"
239 "n\u0307}[kg] > $anusvara;"
240 "n\u0307}n\u0307 > $anusvara;"
241 "n\u0304}[cj] > $anusvara;"
242 "n\u0304}n\u0303 > $anusvara;"
243 "n\u0323}[tdn]\u0323 > $anusvara;"
244 "n}[tdn] > $anusvara;"
245 "m}[pbm] > $anusvara;"
246 "n}[ylvshr] > $anusvara;"
247 "m\u0307 > $anusvara;"
251 "k\u0331h\u0331>$ukha |$virama;"
252 "g\u0307> $ugha | $virama;"
253 "z > $ujha |$virama;"
257 "y\u0307>$uya|$virama;"
258 "l\u0331>$ela|$virama;"
259 "n\u0331>$ena|$virama;"
260 "n\u0307>$nga|$virama;"
261 "n\u0303>$nya|$virama;"
262 "n\u0323>$nna|$virama;"
263 "t\u0323h>$ttha|$virama;"
264 "t\u0323>$tta|$virama;"
265 "r\u0323h>$udha|$virama;"
266 "r\u0323>$uddha|$virama;"
267 "d\u0323h>$ddha|$virama;"
268 "d\u0323>$dda|$virama;"
291 "r\u0331>$rra|$virama;"
293 "l\u0323>$lla|$virama;"
299 "s\u0323>$ssa|$virama;"
300 "s\u0301>$sha|$virama;"
304 "$danda'.'>$doubleDanda;"
305 "$depVowelAbove{'~'>$anusvara;"
306 "$depVowelBelow{'~'>$chandrabindu;"
307 // convert to dependent forms after consonant with no vowel:
308 // e.g. kai -> {ka}{virama}ai -> {ka}{ai}
310 "$virama a\u0304>$aa;"
314 "$virama i\u0304>$ii;"
317 "$virama u\u0304>$uu;"
320 "$virama r\u0325\u0304>$rrh;"
322 "$virama r\u0325a>$rh;"
323 "$virama r\u0325>$rh;"
324 "$virama l\u0325\u0304>$llh;"
326 "$virama l\u0325>$lh;"
327 "$virama e\u0304>$e;"
328 "$virama o\u0304>$o;"
330 "$virama e\u0306>$ce;"
331 "$virama o\u0306>$co;"
336 // otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
338 "$virama''a\u0304>$waa;"
342 "$virama''i\u0304>$wii;"
345 "$virama''u\u0304>$wuu;"
348 "$virama''r\u0325\u0304>$wrr;"
350 "$virama''r\u0325>$wr;"
351 "$virama''l\u0325\u0304>$wll;"
353 "$virama''l\u0325>$wl;"
354 "$virama''e\u0304>$we;"
355 "$virama''o\u0304>$wo;"
357 "$virama''e\u0306>$wce;"
358 "$virama''o\u0306>$wco;"
369 "''r\u0325\u0304>$wrr;"
371 "''l\u0325\u0304>$wll;"
381 "$virama } [$z] > $virama;"
382 "$virama } ' ' > $virama ;"
383 "$virama}$endThing>;"