]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | # © 2016 and later: Unicode, Inc. and others. |
2 | # License & terms of use: http://www.unicode.org/copyright.html#License | |
3 | # | |
73c04bcf | 4 | # File: InterIndic_Latin.txt |
f3c0d7a5 | 5 | # Generated from CLDR |
73c04bcf | 6 | # |
2ca993e8 A |
7 | |
8 | # InterIndic-Latin | |
9 | #\u0E00 reserved | |
10 | #consonants | |
73c04bcf A |
11 | $chandrabindu=\uE001; |
12 | $anusvara=\uE002; | |
13 | $visarga=\uE003; | |
2ca993e8 A |
14 | #\u0E004 reserved |
15 | # w←vowel→ represents the stand-alone form | |
73c04bcf A |
16 | $wa=\uE005; |
17 | $waa=\uE006; | |
18 | $wi=\uE007; | |
19 | $wii=\uE008; | |
20 | $wu=\uE009; | |
21 | $wuu=\uE00A; | |
22 | $wr=\uE00B; | |
23 | $wl=\uE00C; | |
24 | $wce=\uE00D; # LETTER CANDRA E | |
25 | $wse=\uE00E; # LETTER SHORT E | |
51004dcb | 26 | $we=\uE00F; # ए LETTER E |
73c04bcf A |
27 | $wai=\uE010; |
28 | $wco=\uE011; # LETTER CANDRA O | |
29 | $wso=\uE012; # LETTER SHORT O | |
51004dcb | 30 | $wo=\uE013; # ओ LETTER O |
73c04bcf A |
31 | $wau=\uE014; |
32 | $ka=\uE015; | |
33 | $kha=\uE016; | |
34 | $ga=\uE017; | |
35 | $gha=\uE018; | |
36 | $nga=\uE019; | |
37 | $ca=\uE01A; | |
38 | $cha=\uE01B; | |
39 | $ja=\uE01C; | |
40 | $jha=\uE01D; | |
41 | $nya=\uE01E; | |
42 | $tta=\uE01F; | |
43 | $ttha=\uE020; | |
44 | $dda=\uE021; | |
45 | $ddha=\uE022; | |
46 | $nna=\uE023; | |
47 | $ta=\uE024; | |
48 | $tha=\uE025; | |
49 | $da=\uE026; | |
50 | $dha=\uE027; | |
51 | $na=\uE028; | |
52 | $ena=\uE029; #compatibility | |
53 | $pa=\uE02A; | |
54 | $pha=\uE02B; | |
55 | $ba=\uE02C; | |
56 | $bha=\uE02D; | |
57 | $ma=\uE02E; | |
58 | $ya=\uE02F; | |
59 | $ra=\uE030; | |
60 | $vva=\uE081; | |
61 | $rra=\uE031; | |
62 | $la=\uE032; | |
63 | $lla=\uE033; | |
64 | $ela=\uE034; #compatibility | |
65 | $va=\uE035; | |
66 | $sha=\uE036; | |
67 | $ssa=\uE037; | |
68 | $sa=\uE038; | |
69 | $ha=\uE039; | |
2ca993e8 A |
70 | #\u093A Reserved |
71 | #\u093B Reserved | |
73c04bcf A |
72 | $nukta=\uE03C; |
73 | $avagraha=\uE03D; # SIGN AVAGRAHA | |
2ca993e8 | 74 | # ←vowel→ represents the dependent form |
73c04bcf A |
75 | $aa=\uE03E; |
76 | $i=\uE03F; | |
77 | $ii=\uE040; | |
78 | $u=\uE041; | |
79 | $uu=\uE042; | |
80 | $rh=\uE043; | |
b331163b | 81 | $rrh=\uE044; |
73c04bcf A |
82 | $ce=\uE045; #VOWEL SIGN CANDRA E |
83 | $se=\uE046; #VOWEL SIGN SHORT E | |
84 | $e=\uE047; | |
85 | $ai=\uE048; | |
86 | $co=\uE049; # VOWEL SIGN CANDRA O | |
87 | $so=\uE04A; # VOWEL SIGN SHORT O | |
51004dcb | 88 | $o=\uE04B; # ो |
73c04bcf A |
89 | $au=\uE04C; |
90 | $virama=\uE04D; | |
2ca993e8 A |
91 | # \u094E Reserved |
92 | # \u094F Reserved | |
73c04bcf | 93 | $om=\uE050; # OM |
51004dcb A |
94 | \uE051→; # UNMAPPED STRESS SIGN UDATTA |
95 | \uE052→; # UNMAPPED STRESS SIGN ANUDATTA | |
96 | \uE053→; # UNMAPPED GRAVE ACCENT | |
97 | \uE054→; # UNMAPPED ACUTE ACCENT | |
98 | $lm = \uE055;# Telugu Length Mark | |
99 | $ailm=\uE056;# AI Length Mark | |
100 | $aulm=\uE057;# AU Length Mark | |
2ca993e8 | 101 | #urdu compatibity forms |
73c04bcf A |
102 | $uka=\uE058; |
103 | $ukha=\uE059; | |
104 | $ugha=\uE05A; | |
105 | $ujha=\uE05B; | |
106 | $uddha=\uE05C; | |
107 | $udha=\uE05D; | |
108 | $ufa=\uE05E; | |
109 | $uya=\uE05F; | |
110 | $wrr=\uE060; | |
111 | $wll=\uE061; | |
b331163b | 112 | $lh=\uE062; |
73c04bcf A |
113 | $llh=\uE063; |
114 | $danda=\uE064; | |
115 | $doubleDanda=\uE065; | |
51004dcb A |
116 | $zero=\uE066; # DIGIT ZERO |
117 | $one=\uE067; # DIGIT ONE | |
118 | $two=\uE068; # DIGIT TWO | |
119 | $three=\uE069; # DIGIT THREE | |
120 | $four=\uE06A; # DIGIT FOUR | |
121 | $five=\uE06B; # DIGIT FIVE | |
122 | $six=\uE06C; # DIGIT SIX | |
123 | $seven=\uE06D; # DIGIT SEVEN | |
124 | $eight=\uE06E; # DIGIT EIGHT | |
125 | $nine=\uE06F; # DIGIT NINE | |
2ca993e8 | 126 | # Glottal stop |
73c04bcf | 127 | $dgs=\uE082; |
2ca993e8 | 128 | #Khanda-ta |
73c04bcf A |
129 | $kta=\uE083; |
130 | $depVowelAbove=[\uE03E-\uE040\uE045-\uE04C]; | |
131 | $depVowelBelow=[\uE041-\uE044]; | |
2ca993e8 | 132 | # $x was originally called '§'; $z was '%' |
73c04bcf A |
133 | $x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co]; |
134 | $z=[bcdfghjklmnpqrstvwxyz]; | |
135 | $vowels=[aeiour\u0304\u0325\u0306]; | |
136 | $forceIndependentMatra = [^[[:L:][\u0300-\u034C]]]; | |
2ca993e8 A |
137 | ###################################################################### |
138 | # convert from Native letters to Latin letters | |
139 | ###################################################################### | |
140 | #transliterations for anusvara | |
51004dcb A |
141 | $anusvara} [$ka$kha$ga$gha$nga] → n\u0307; |
142 | $anusvara} [$ca$cha$ja$jha$nya] → n\u0304; | |
143 | $anusvara} [$tta$ttha$dda$ddha$nna] → n\u0323; | |
144 | $anusvara} [$ta$tha$da$dha$na] → n; | |
145 | $anusvara} [$pa$pha$ba$bha$ma] → m; | |
729e4ab9 A |
146 | $anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] → n; |
147 | $anusvara→ m\u0307; | |
2ca993e8 | 148 | # Urdu compatibility |
51004dcb A |
149 | $ya$nukta}$x → y\u0307; |
150 | $ya$nukta$virama → y\u0307; | |
151 | $ya$nukta → y\u0307a; | |
152 | $la$nukta }$x → l\u0331; | |
153 | $la$nukta$virama → l\u0331; | |
154 | $la$nukta → l\u0331a; | |
155 | $na$nukta }$x → n\u0331; | |
156 | $na$nukta$virama → n\u0331; | |
157 | $na$nukta → n\u0331a; | |
158 | $ena }$x → n\u0331; | |
159 | $ena$virama → n\u0331; | |
160 | $ena → n\u0331a; | |
161 | $uka → qa; | |
162 | $ka$nukta }$x → q; | |
163 | $ka$nukta$virama → q; | |
164 | $ka$nukta → qa; | |
165 | $kha$nukta }$x → k\u0331h\u0331; | |
166 | $kha$nukta$virama → k\u0331h\u0331; | |
167 | $kha$nukta → k\u0331h\u0331a; | |
168 | $ukha$virama → k\u0331h\u0331; | |
169 | $ukha → k\u0331h\u0331a; | |
170 | $ugha → g\u0307a; | |
171 | $ga$nukta }$x → g\u0307; | |
172 | $ga$nukta$virama → g\u0307; | |
173 | $ga$nukta → g\u0307a; | |
174 | $ujha → za; | |
175 | $ja$nukta }$x → z; | |
176 | $ja$nukta$virama → z; | |
177 | $ja$nukta → za; | |
178 | $ddha$nukta}$x → r\u0323h; | |
179 | $ddha$nukta$virama → r\u0323h; | |
180 | $ddha$nukta → r\u0323ha; | |
181 | $uddha}$x → r\u0323; | |
182 | $uddha$virama → r\u0323; | |
183 | $uddha → r\u0323a; | |
184 | $udha → r\u0323a; | |
185 | $dda$nukta}$x → r\u0323; | |
186 | $dda$nukta$virama → r\u0323; | |
187 | $dda$nukta → r\u0323a; | |
188 | $pha$nukta }$x → f; | |
189 | $pha$nukta$virama → f; | |
190 | $pha$nukta → fa; | |
191 | $ufa }$x → f; | |
192 | $ufa$virama → f; | |
193 | $ufa → fa; | |
194 | $ra$nukta}$x → r\u0331; | |
195 | $ra$nukta$virama → r\u0331; | |
196 | $ra$nukta → r\u0331a; | |
197 | $lla$nukta}$x → l\u0331; | |
198 | $lla$nukta$virama → l\u0331; | |
199 | $lla$nukta → l\u0331a; | |
200 | $ela}$x → l\u0331; | |
201 | $ela$virama → l\u0331; | |
202 | $ela → l\u0331a; | |
203 | $uya}$x → y\u0307; | |
204 | $uya$virama → y\u0307; | |
205 | $uya → y\u0307a; | |
2ca993e8 | 206 | # normal consonants |
729e4ab9 A |
207 | $ka$virama}$ha→k''; |
208 | $ka}$x→k; | |
209 | $ka$virama→k; | |
210 | $ka→ka; | |
211 | $kha}$x→kh; | |
212 | $kha$virama→kh; | |
213 | $kha→kha; | |
214 | $ga$virama}$ha→g''; | |
215 | $ga}$x→g; | |
216 | $ga$virama→g; | |
217 | $ga→ga; | |
218 | $gha}$x→gh; | |
219 | $gha$virama→gh; | |
220 | $gha→gha; | |
221 | $nga}$x→n\u0307; | |
222 | $nga$virama→n\u0307; | |
223 | $nga→n\u0307a; | |
224 | $ca$virama}$ha→c''; | |
225 | $ca}$x→c; | |
226 | $ca$virama→c; | |
227 | $ca→ca; | |
228 | $cha}$x→ch; | |
229 | $cha$virama→ch; | |
230 | $cha→cha; | |
231 | $ja$virama}$ha→j''; | |
232 | $ja}$x→j; | |
233 | $ja$virama→j; | |
234 | $ja→ja; | |
235 | $jha}$x→jh; | |
236 | $jha$virama→jh; | |
237 | $jha→jha; | |
238 | $nya }$x→n\u0303; | |
239 | $nya$virama→n\u0303; | |
240 | $nya → n\u0303a; | |
241 | $tta$virama}$ha→t\u0323''; | |
242 | $tta}$x→t\u0323; | |
243 | $tta$virama→t\u0323; | |
244 | $tta→t\u0323a; | |
245 | $ttha}$x→t\u0323h; | |
246 | $ttha$virama→t\u0323h; | |
247 | $ttha→t\u0323ha; | |
248 | $dda}$x$ha→d\u0323''; | |
249 | $dda}$x→d\u0323; | |
250 | $dda$virama→d\u0323; | |
251 | $dda→d\u0323a; | |
252 | $ddha}$x→d\u0323h; | |
253 | $ddha$virama→d\u0323h; | |
254 | $ddha→d\u0323ha; | |
255 | $nna}$x→n\u0323; | |
256 | $nna$virama→n\u0323; | |
257 | $nna→n\u0323a; | |
258 | $ta$virama}$ha→t''; | |
259 | $ta$virama}$ttha→t''; | |
260 | $ta$virama}$tta→t''; | |
261 | $ta$virama}$tha→t''; | |
262 | $ta}$x→t; | |
263 | $ta$virama→t; | |
264 | $ta→ta; | |
265 | $tha}$x→th; | |
266 | $tha$virama→th; | |
267 | $tha→tha; | |
268 | $da$virama}$ha→d''; | |
269 | $da$virama}$ddha→d''; | |
270 | $da$virama}$dda→d''; | |
271 | $da$virama}$dha→d''; | |
272 | $da}$x→d; | |
273 | $da$virama→d; | |
274 | $da→da; | |
275 | $dha}$x→dh; | |
276 | $dha$virama→dh; | |
277 | $dha→dha; | |
278 | $na$virama}$ga→n''; | |
279 | $na$virama}$ya→n''; | |
280 | $na}$x→n; | |
281 | $na$virama→n; | |
282 | $na→na; | |
283 | $pa$virama}$ha→p''; | |
284 | $pa}$x→p; | |
285 | $pa$virama→p; | |
286 | $pa→pa; | |
287 | $pha}$x→ph; | |
288 | $pha$virama→ph; | |
289 | $pha→pha; | |
290 | $ba$virama}$ha→b''; | |
291 | $ba}$x→b; | |
292 | $ba$virama→b; | |
293 | $ba→ba; | |
294 | $bha}$x→bh; | |
295 | $bha$virama→bh; | |
296 | $bha→bha; | |
297 | $ma$virama}$ma→m''; | |
298 | $ma}$x→m; | |
299 | $ma$virama→m; | |
300 | $ma→ma; | |
301 | $ya}$x→y; | |
302 | $ya$virama→y; | |
303 | $ya→ya; | |
304 | $ra$virama}$ha→r''; | |
305 | $ra}$x→r; | |
306 | $ra$virama→r; | |
307 | $ra→ra; | |
308 | $vva$virama}$ha→w\u0307''; | |
309 | $vva}$x→w\u0307; | |
310 | $vva$virama→w\u0307; | |
311 | $vva→w\u0307a; | |
312 | $rra$virama}$ha→r\u0331''; | |
313 | $rra}$x→r\u0331; | |
314 | $rra$virama→r\u0331; | |
315 | $rra→r\u0331a; | |
316 | $la$virama}$ha→l''; | |
317 | $la}$x→l; | |
318 | $la$virama→l; | |
319 | $la→la; | |
320 | $lla$virama}$ha→l\u0323''; | |
321 | $lla}$x→l\u0323; | |
322 | $lla$virama→l\u0323; | |
323 | $lla→l\u0323a; | |
324 | $va}$x→v; | |
325 | $va$virama→v; | |
326 | $va→va; | |
327 | $sa$virama}$ha→s''; | |
328 | $sa$virama}$sha→s''; | |
329 | $sa$virama}$ssa→s''; | |
330 | $sa$virama}$sa→s''; | |
331 | $sa}$x→s; | |
332 | $sa$virama→s; | |
2ca993e8 | 333 | #for gurmukhi |
729e4ab9 A |
334 | $sa$nukta}$x→s\u0301; |
335 | $sa$nukta$virama→s\u0301; | |
336 | $sa$nukta→s\u0301a; | |
337 | $sa→sa; | |
338 | $sha}$x→s\u0301; | |
339 | $sha$virama→s\u0301; | |
340 | $sha→s\u0301a; | |
341 | $ssa}$x→s\u0323; | |
342 | $ssa$virama→s\u0323; | |
343 | $ssa→s\u0323a; | |
344 | $ha}$x→h; | |
345 | $ha$virama→h; | |
346 | $ha→ha; | |
2ca993e8 | 347 | # dependent vowels (should never occur except following consonants) |
51004dcb A |
348 | $forceIndependentMatra{$aa → \u0314a\u0304; |
349 | $forceIndependentMatra{$ai → \u0314ai; | |
350 | $forceIndependentMatra{$au → \u0314au; | |
351 | $forceIndependentMatra{$ii → \u0314i\u0304; | |
352 | $forceIndependentMatra{$i → \u0314i; | |
353 | $forceIndependentMatra{$uu → \u0314u\u0304; | |
354 | $forceIndependentMatra{$u → \u0314u; | |
729e4ab9 | 355 | $forceIndependentMatra{$rrh → \u0314r\u0325\u0304; |
51004dcb | 356 | $forceIndependentMatra{$rh → \u0314r\u0325; |
729e4ab9 | 357 | $forceIndependentMatra{$llh → \u0314l\u0325\u0304; |
51004dcb A |
358 | $forceIndependentMatra{$lh → \u0314l\u0325; |
359 | $forceIndependentMatra{$e → \u0314e\u0304; | |
360 | $forceIndependentMatra{$o → \u0314o\u0304; | |
2ca993e8 | 361 | #extra vowels |
51004dcb A |
362 | $forceIndependentMatra{$ce → \u0314e\u0306; |
363 | $forceIndependentMatra{$co → \u0314o\u0306; | |
364 | $forceIndependentMatra{$se → \u0314e; | |
365 | $forceIndependentMatra{$so → \u0314o; | |
366 | $forceIndependentMatra{$nukta →; # Nukta cannot appear independently or as first character | |
729e4ab9 | 367 | $forceIndependentMatra{$virama →; # Virama cannot appear independently or as first character |
51004dcb A |
368 | $aa → a\u0304; |
369 | $ai → ai; | |
370 | $au → au; | |
371 | $ii → i\u0304; | |
372 | $i → i; | |
373 | $uu → u\u0304; | |
374 | $u → u; | |
729e4ab9 | 375 | $rrh → r\u0325\u0304; |
51004dcb | 376 | $rh → r\u0325; |
729e4ab9 | 377 | $llh → l\u0325\u0304; |
51004dcb A |
378 | $lh → l\u0325; |
379 | $e → e\u0304; | |
380 | $o → o\u0304; | |
2ca993e8 | 381 | #extra vowels |
51004dcb A |
382 | $ce → e\u0306; |
383 | $co → o\u0306; | |
384 | $se → e; | |
385 | $so → o; | |
2ca993e8 | 386 | #dependent vowels when following independent vowels. Generally Illegal only for roundtripping |
729e4ab9 A |
387 | $waa} $x → a\u0304\u0314; |
388 | $wai} $x → ai\u0314; | |
389 | $wau} $x → au\u0314; | |
390 | $wii} $x → i\u0304\u0314; | |
391 | $wi } $x → i\u0314; | |
392 | $wuu} $x → u\u0304\u0314; | |
393 | $wu } $x → u\u0314; | |
394 | $wrr} $x → r\u0325\u0304\u0314; | |
395 | $wr } $x → r\u0325\u0314; | |
396 | $wll} $x → l\u0325\u0304\u0314; | |
397 | $wl } $x → l\u0325\u0314; | |
398 | $we } $x → e\u0304\u0314; | |
399 | $wo } $x → o\u0304\u0314; | |
400 | $wa } $x → a\u0314; | |
2ca993e8 | 401 | #extra vowels |
729e4ab9 A |
402 | $wce} $x → e\u0306\u0314; |
403 | $wco} $x → o\u0306\u0314; | |
404 | $wse} $x → e\u0314; | |
405 | $wso} $x → o\u0314; | |
406 | $om} $x → ''om\u0314; | |
2ca993e8 | 407 | # independent vowels when preceeded by vowels |
51004dcb A |
408 | $vowels{$waa → ''a\u0304; |
409 | $vowels{$wai → ''ai; | |
410 | $vowels{$wau → ''au; | |
411 | $vowels{$wii → ''i\u0304; | |
412 | $vowels{$wi → ''i; | |
413 | $vowels{$wuu → ''u\u0304; | |
414 | $vowels{$wu → ''u; | |
415 | $vowels{$wrr → ''r\u0325\u0304; | |
416 | $vowels{$wr → ''r\u0325; | |
417 | $vowels{$wll → ''l\u0325\u0304; | |
418 | $vowels{$wl → ''l\u0325; | |
419 | $vowels{$we → ''e\u0304; | |
420 | $vowels{$wo → ''o\u0304; | |
421 | $vowels{$wa → ''a; | |
2ca993e8 | 422 | #extra vowels |
51004dcb A |
423 | $vowels{$wce → ''e\u0306; |
424 | $vowels{$wco → ''o\u0306; | |
425 | $vowels{$wse → ''e; | |
426 | $vowels{$wso → ''o; | |
2ca993e8 | 427 | # independent vowels (otherwise) |
729e4ab9 A |
428 | $waa → a\u0304; |
429 | $wai → ai; | |
430 | $wau → au; | |
431 | $wii → i\u0304; | |
51004dcb | 432 | $wi → i; |
729e4ab9 | 433 | $wuu → u\u0304; |
51004dcb | 434 | $wu → u; |
729e4ab9 | 435 | $wrr → r\u0325\u0304; |
51004dcb | 436 | $wr → r\u0325; |
729e4ab9 | 437 | $wll → l\u0325\u0304; |
51004dcb A |
438 | $wl → l\u0325; |
439 | $we → e\u0304; | |
440 | $wo → o\u0304; | |
441 | $wa → a; | |
2ca993e8 | 442 | #extra vowels |
729e4ab9 A |
443 | $wce → e\u0306; |
444 | $wco → o\u0306; | |
445 | $wse → e; | |
446 | $wso → o; | |
447 | $om → ''om; | |
2ca993e8 | 448 | #stress marks |
729e4ab9 A |
449 | $avagraha → \u0315; |
450 | $chandrabindu$anusvara→\u0303; | |
451 | $chandrabindu → m\u0310; | |
452 | $visarga→h\u0323; | |
2ca993e8 | 453 | #numbers |
51004dcb A |
454 | $zero → 0; |
455 | $one → 1; | |
456 | $two → 2; | |
729e4ab9 | 457 | $three → 3; |
51004dcb A |
458 | $four → 4; |
459 | $five → 5; | |
460 | $six → 6; | |
729e4ab9 A |
461 | $seven → 7; |
462 | $eight → 8; | |
51004dcb A |
463 | $nine → 9; |
464 | $lm →; | |
729e4ab9 A |
465 | $ailm →; |
466 | $aulm →; | |
467 | $dgs→ʔ; | |
468 | $kta→t\u0331; | |
469 | $danda→'.'; | |
470 | $doubleDanda→'.'; | |
51004dcb | 471 | \uE070→; # ABBREVIATION SIGN |
2ca993e8 | 472 | # LETTER RA WITH MIDDLE DIAGONAL |
729e4ab9 A |
473 | \uE071}$x→ra; |
474 | \uE071$virama→r; | |
475 | \uE071→ra; | |
2ca993e8 | 476 | # LETTER RA WITH LOWER DIAGONAL |
729e4ab9 A |
477 | \uE072}$x→ra; |
478 | \uE072$virama→r; | |
479 | \uE072→ra; | |
51004dcb A |
480 | \uE073→; # RUPEE MARK |
481 | \uE074→; # RUPEE SIGN | |
482 | \uE075→; # CURRENCY NUMERATOR ONE | |
483 | \uE076→; # CURRENCY NUMERATOR TWO | |
484 | \uE077→; # CURRENCY NUMERATOR THREE | |
485 | \uE078→; # CURRENCY NUMERATOR FOUR | |
486 | \uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR | |
487 | \uE07A→; # CURRENCY DENOMINATOR SIXTEEN | |
488 | \uE07B→; # ISSHAR | |
489 | \uE07C→; # TIPPI | |
490 | \uE07D→; # ADDAK | |
491 | \uE07E→; # IRI | |
492 | \uE07F→; # URA | |
493 | \uE080→; # EK ONKAR | |
494 | \uE004→; # DEVANAGARI VOWEL SIGN SHORT A | |
2ca993e8 | 495 |