]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/Latin_InterIndic.txt
ICU-6.2.8.tar.gz
[apple/icu.git] / icuSources / data / translit / Latin_InterIndic.txt
1 #--------------------------------------------------------------------
2 # Copyright (c) 1999-2004, International Business Machines
3 # Corporation and others. All Rights Reserved.
4 #--------------------------------------------------------------------
5
6 # Latin-InterIndic
7 #:: NFD;
8 #\u0e00 reserved
9 #consonants
10 $chandrabindu=\ue001;
11 $anusvara=\ue002;
12 $visarga=\ue003;
13 #\u0e004 reserved
14 # w<vowel> represents the stand-alone form
15 $wa=\ue005;
16 $waa=\ue006;
17 $wi=\ue007;
18 $wii=\ue008;
19 $wu=\ue009;
20 $wuu=\ue00a;
21 $wr=\ue00b;
22 $wl=\ue00c;
23 $wce=\ue00d; # LETTER CANDRA E
24 $wse=\ue00e; # LETTER SHORT E
25 $we=\ue00f; # \u090f LETTER E
26 $wai=\ue010;
27 $wco=\ue011; # LETTER CANDRA O
28 $wso=\ue012; # LETTER SHORT O
29 $wo=\ue013; # \u0913 LETTER O
30 $wau=\ue014;
31 $ka=\ue015;
32 $kha=\ue016;
33 $ga=\ue017;
34 $gha=\ue018;
35 $nga=\ue019;
36 $ca=\ue01a;
37 $cha=\ue01b;
38 $ja=\ue01c;
39 $jha=\ue01d;
40 $nya=\ue01e;
41 $tta=\ue01f;
42 $ttha=\ue020;
43 $dda=\ue021;
44 $ddha=\ue022;
45 $nna=\ue023;
46 $ta=\ue024;
47 $tha=\ue025;
48 $da=\ue026;
49 $dha=\ue027;
50 $na=\ue028;
51 $ena=\ue029; #compatibility
52 $pa=\ue02a;
53 $pha=\ue02b;
54 $ba=\ue02c;
55 $bha=\ue02d;
56 $ma=\ue02e;
57 $ya=\ue02f;
58 $ra=\ue030;
59 $rra=\ue031;
60 $la=\ue032;
61 $lla=\ue033;
62 $ela=\ue034; #compatibility
63 $va=\ue035;
64 $vva=\ue081;
65 $sha=\ue036;
66 $ssa=\ue037;
67 $sa=\ue038;
68 $ha=\ue039;
69 #\u093a Reserved
70 #\u093b Reserved
71 $nukta=\ue03c;
72 $avagraha=\ue03d; # SIGN AVAGRAHA
73 # <vowel> represents the dependent form
74 $aa=\ue03e;
75 $i=\ue03f;
76 $ii=\ue040;
77 $u=\ue041;
78 $uu=\ue042;
79 $rh=\ue043;
80 $lh=\ue044;
81 $ce=\ue045; #VOWEL SIGN CANDRA E
82 $se=\ue046; #VOWEL SIGN SHORT E
83 $e=\ue047;
84 $ai=\ue048;
85 $co=\ue049; # VOWEL SIGN CANDRA O
86 $so=\ue04a; # VOWEL SIGN SHORT O
87 $o=\ue04b; # \u094b
88 $au=\ue04c;
89 $virama=\ue04d;
90 # \u094e Reserved
91 # \u094f Reserved
92 $om = \ue050; # OM
93 # \u0951>; # UNMAPPED STRESS SIGN UDATTA
94 # \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
95 # \u0953>; # UNMAPPED GRAVE ACCENT
96 # \u0954>; # UNMAPPED ACUTE ACCENT
97 $lm = \ue055;# Telugu Length Mark
98 $ailm=\ue056;# AI Length Mark
99 $aulm=\ue057;# AU Length Mark
100 #urdu compatibity forms
101 $uka=\ue058;
102 $ukha=\ue059;
103 $ugha=\ue05a;
104 $ujha=\ue05b;
105 $uddha=\ue05c;
106 $udha=\ue05d;
107 $ufa=\ue05e;
108 $uya=\ue05f;
109 $wrr=\ue060;
110 $wll=\ue061;
111 $rrh=\ue062;
112 $llh=\ue063;
113 $danda=\ue064;
114 $doubleDanda=\ue065;
115 $zero=\ue066; # DIGIT ZERO
116 $one=\ue067; # DIGIT ONE
117 $two=\ue068; # DIGIT TWO
118 $three=\ue069; # DIGIT THREE
119 $four=\ue06a; # DIGIT FOUR
120 $five=\ue06b; # DIGIT FIVE
121 $six=\ue06c; # DIGIT SIX
122 $seven=\ue06d; # DIGIT SEVEN
123 $eight=\ue06e; # DIGIT EIGHT
124 $nine=\ue06f; # DIGIT NINE
125 # For all other scripts
126 $ecp0=\ue070;
127 $ecp1=\ue071;
128 $ecp2=\ue072;
129 $ecp3=\ue073;
130 $ecp4=\ue074;
131 $ecp5=\ue075;
132 $ecp6=\ue076;
133 $ecp7=\ue077;
134 $ecp8=\ue078;
135 $ecp9=\ue079;
136 $ecpA=\ue07a;
137 $ecpB=\ue07b;
138 $ecpC=\ue07c;
139 $ecpD=\ue07d;
140 $ecpE=\ue07e;
141 $ecpF=\ue07f;
142 # \u0970>; # UNMAPPED ABBREVIATION SIGN
143 $depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];
144 $depVowelBelow=[\ue041-\ue044];
145 $endThing=[$danda$doubleDanda];
146 # $x was originally called '&'; $z was '%'
147 $x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];
148 $z=[bcdfghjklmnpqrstvwxyz];
149 $consonants=[[$ka-$ha]$z[\u0915-\u0939][\u0995-\u09b9][\u0a15-\u0a39][\u0a95-\u0ab9][\u0b15-\u0b39][\u0b95-\u0bb9][\u0c15-\u0c39][\u0c95-\u0cb9][\u0d15-\u0d39]];
150 \u0315 > $avagraha;
151 \u0303>$chandrabindu$anusvara;
152 m\u0310>$chandrabindu;
153 h\u0323>$visarga;
154 x>$ka$virama$sa;
155 # convert to independent forms at start of word or syllable:
156 # dependent forms for roundtrip
157 \u0314a\u0304>$aa;
158 \u0314ai>$ai;
159 \u0314au>$au;
160 \u0314ii>$ii;
161 \u0314i\u0304>$ii;
162 \u0314i>$i;
163 \u0314u\u0304>$uu;
164 \u0314u>$u;
165 \u0314r\u0325\u0304>$rrh;
166 \u0314r\u0325>$rh;
167 \u0314l\u0325\u0304>$llh;
168 \u0314lh>$lh;
169 \u0314l\u0325>$lh;
170 \u0314e\u0304>$e;
171 \u0314o\u0304>$o;
172 \u0314a>;
173 \u0314e\u0306>$ce;
174 \u0314o\u0306>$co;
175 \u0314e>$se;
176 \u0314o>$so;
177
178 # preceeded by consonants
179 $consonants{ a\u0304>$aa;
180 $consonants{ ai>$ai;
181 $consonants{ au>$au;
182 $consonants{ ii>$ii;
183 $consonants{ i\u0304>$ii;
184 $consonants{ i>$i;
185 $consonants{ u\u0304>$uu;
186 $consonants{ u>$u;
187 $consonants{ r\u0325\u0304>$rrh;
188 $consonants{ r\u0325a>$rh;
189 $consonants{ r\u0325>$rh;
190 $consonants{ l\u0325\u0304>$llh;
191 $consonants{ lh>$lh;
192 $consonants{ l\u0325>$lh;
193 $consonants{ e\u0304>$e;
194 $consonants{ o\u0304>$o;
195 $consonants{ e\u0306>$ce;
196 $consonants{ o\u0306>$co;
197 $consonants{ e>$se;
198 $consonants{ o>$so;
199
200 # e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
201 a\u0304>$waa;
202 ai>$wai;
203 au>$wau;
204 i\u0304>$wii;
205 i>$wi;
206 u\u0304>$wuu;
207 u>$wu;
208 r\u0325\u0304>$wrr;
209 r\u0325>$wr;
210 l\u0325\u0304>$wll;
211 lh>$wl;
212 l\u0325>$wl;
213 e\u0304>$we;
214 o\u0304>$wo;
215 a>$wa;
216 e\u0306>$wce;
217 o\u0306>$wco;
218 e>$wse;
219 ''om>$om;
220 o>$wso;
221
222 # rules for anusvara
223 n}r\u0325 > $na|$virama;
224 n}l\u0325 > $na|$virama;
225 n}na > $na|$virama;
226 n\u0307}[kg] > $anusvara;
227 n\u0307}n\u0307 > $anusvara;
228 n\u0304}[cj] > $anusvara;
229 n\u0304}n\u0303 > $anusvara;
230 n\u0323}[tdn]\u0323 > $anusvara;
231 n}[tdn] > $anusvara;
232 m}[pbm] > $anusvara;
233 n}[ylvshr] > $anusvara;
234 m\u0307 > $anusvara;
235
236 #urdu compatibility
237 q>$uka|$virama;
238 k\u0331h\u0331>$ukha |$virama;
239 g\u0307> $ugha | $virama;
240 z > $ujha |$virama;
241 f > $ufa|$virama;
242
243 # dev
244 y\u0307>$uya|$virama;
245 l\u0331>$ela|$virama;
246 n\u0331>$ena|$virama;
247 n\u0307>$nga|$virama;
248 n\u0303>$nya|$virama;
249 n\u0323>$nna|$virama;
250 t\u0323h>$ttha|$virama;
251 t\u0323>$tta|$virama;
252 r\u0323h>$udha|$virama;
253 r\u0323>$uddha|$virama;
254 d\u0323h>$ddha|$virama;
255 d\u0323>$dda|$virama;
256 kh>$kha|$virama;
257 k>$ka|$virama;
258 gh>$gha|$virama;
259 g>$ga|$virama;
260 ch>$cha|$virama;
261 c>$ca|$virama;
262 jh>$jha|$virama;
263 j>$ja|$virama;
264 ny>$nya|$virama;
265 tth>$ttha|$virama;
266 ddh>$ddha|$virama;
267 th>$tha|$virama;
268 t>$ta|$virama;
269 dh>$dha|$virama;
270 d>$da|$virama;
271 n>$na|$virama;
272 ph>$pha|$virama;
273 p>$pa|$virama;
274 bh>$bha|$virama;
275 b>$ba|$virama;
276 m>$ma|$virama;
277 y>$ya|$virama;
278 r\u0331>$rra|$virama;
279 r>$ra|$virama;
280 l\u0323>$lla|$virama;
281 l>$la|$virama;
282 v>$va|$virama;
283 w\u0307>$vva|$virama;
284 w>$va|$virama;
285 sh>$sha|$virama;
286 ss>$ssa|$virama;
287 s\u0323>$ssa|$virama;
288 s\u0301>$sha|$virama;
289 s>$sa|$virama;
290 h>$ha|$virama;
291 '.'>$danda;
292 $danda'.'>$doubleDanda;
293 $depVowelAbove{'~'>$anusvara;
294 $depVowelBelow{'~'>$chandrabindu;
295 # convert to dependent forms after consonant with no vowel:
296 # e.g. kai -> {ka}{virama}ai -> {ka}{ai}
297 #$virama aa>$aa;
298 $virama a\u0304>$aa;
299 $virama ai>$ai;
300 $virama au>$au;
301 $virama ii>$ii;
302 $virama i\u0304>$ii;
303 $virama i>$i;
304 #$virama uu>$uu;
305 $virama u\u0304>$uu;
306 $virama u>$u;
307 #$virama rrh>$rrh;
308 $virama r\u0325\u0304>$rrh;
309 #$virama rh>$rh;
310 $virama r\u0325a>$rh;
311 $virama r\u0325>$rh;
312 $virama l\u0325\u0304>$llh;
313 $virama lh>$lh;
314 $virama l\u0325>$lh;
315 $virama e\u0304>$e;
316 $virama o\u0304>$o;
317 $virama a>;
318 $virama e\u0306>$ce;
319 $virama o\u0306>$co;
320 $virama e>$se;
321 $virama o>$so;
322
323
324 # otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
325 #$virama''aa>$waa;
326 $virama''a\u0304>$waa;
327 $virama''ai>$wai;
328 $virama''au>$wau;
329 #$virama''ii>$wii;
330 $virama''i\u0304>$wii;
331 $virama''i>$wi;
332 #$virama''uu>$wuu;
333 $virama''u\u0304>$wuu;
334 $virama''u>$wu;
335 #$virama''rrh>$wrr;
336 $virama''r\u0325\u0304>$wrr;
337 #$virama''rh>$wr;
338 $virama''r\u0325>$wr;
339 $virama''l\u0325\u0304>$wll;
340 #$virama''lh>$wl;
341 $virama''l\u0325>$wl;
342 $virama''e\u0304>$we;
343 $virama''o\u0304>$wo;
344 $virama''a>$wa;
345 $virama''e\u0306>$wce;
346 $virama''o\u0306>$wco;
347 $virama''e>$wse;
348 $virama''o>$wso;
349 # no virama
350 ''a\u0304>$waa;
351 ''ai>$wai;
352 ''au>$wau;
353 ''i\u0304>$wii;
354 ''i>$wi;
355 ''u\u0304>$wuu;
356 ''u>$wu;
357 ''r\u0325\u0304>$wrr;
358 ''r\u0325>$wr;
359 ''l\u0325\u0304>$wll;
360 ''l\u0325>$wl;
361 ''e\u0304>$we;
362 ''o\u0304>$wo;
363 ''a>$wa;
364 ''e\u0306>$wce;
365 ''o\u0306>$wco;
366 ''e>$wse;
367 ''o>$wso;
368
369 $virama } [$z] > $virama;
370 $virama } ' ' > $virama ;
371 $virama}$endThing>;
372 0>$zero;
373 1>$one;
374 2>$two;
375 3>$three;
376 4>$four;
377 5>$five;
378 6>$six;
379 7>$seven;
380 8>$eight;
381 9>$nine;
382 ''>;
383 #:: NFC (NFD) ;