]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | # © 2016 and later: Unicode, Inc. and others. |
2 | # License & terms of use: http://www.unicode.org/copyright.html#License | |
3 | # | |
73c04bcf | 4 | # File: Latin_InterIndic.txt |
f3c0d7a5 | 5 | # Generated from CLDR |
73c04bcf | 6 | # |
2ca993e8 A |
7 | |
8 | # Latin-InterIndic | |
9 | #:: NFD; | |
10 | #\u0E00 reserved | |
11 | #consonants | |
73c04bcf A |
12 | $chandrabindu=\uE001; |
13 | $anusvara=\uE002; | |
14 | $visarga=\uE003; | |
2ca993e8 A |
15 | #\u0E004 reserved |
16 | # w←vowel→ represents the stand-alone form | |
73c04bcf A |
17 | $wa=\uE005; |
18 | $waa=\uE006; | |
19 | $wi=\uE007; | |
20 | $wii=\uE008; | |
21 | $wu=\uE009; | |
22 | $wuu=\uE00A; | |
23 | $wr=\uE00B; | |
24 | $wl=\uE00C; | |
25 | $wce=\uE00D; # LETTER CANDRA E | |
26 | $wse=\uE00E; # LETTER SHORT E | |
51004dcb | 27 | $we=\uE00F; # ए LETTER E |
73c04bcf A |
28 | $wai=\uE010; |
29 | $wco=\uE011; # LETTER CANDRA O | |
30 | $wso=\uE012; # LETTER SHORT O | |
51004dcb | 31 | $wo=\uE013; # ओ LETTER O |
73c04bcf A |
32 | $wau=\uE014; |
33 | $ka=\uE015; | |
34 | $kha=\uE016; | |
35 | $ga=\uE017; | |
36 | $gha=\uE018; | |
37 | $nga=\uE019; | |
38 | $ca=\uE01A; | |
39 | $cha=\uE01B; | |
40 | $ja=\uE01C; | |
41 | $jha=\uE01D; | |
42 | $nya=\uE01E; | |
43 | $tta=\uE01F; | |
44 | $ttha=\uE020; | |
45 | $dda=\uE021; | |
46 | $ddha=\uE022; | |
47 | $nna=\uE023; | |
48 | $ta=\uE024; | |
49 | $tha=\uE025; | |
50 | $da=\uE026; | |
51 | $dha=\uE027; | |
52 | $na=\uE028; | |
53 | $ena=\uE029; #compatibility | |
54 | $pa=\uE02A; | |
55 | $pha=\uE02B; | |
56 | $ba=\uE02C; | |
57 | $bha=\uE02D; | |
58 | $ma=\uE02E; | |
59 | $ya=\uE02F; | |
60 | $ra=\uE030; | |
61 | $rra=\uE031; | |
62 | $la=\uE032; | |
63 | $lla=\uE033; | |
64 | $ela=\uE034; #compatibility | |
65 | $va=\uE035; | |
66 | $vva=\uE081; | |
67 | $sha=\uE036; | |
68 | $ssa=\uE037; | |
69 | $sa=\uE038; | |
70 | $ha=\uE039; | |
2ca993e8 A |
71 | #\u093A Reserved |
72 | #\u093B Reserved | |
73c04bcf A |
73 | $nukta=\uE03C; |
74 | $avagraha=\uE03D; # SIGN AVAGRAHA | |
2ca993e8 | 75 | # ←vowel→ represents the dependent form |
73c04bcf A |
76 | $aa=\uE03E; |
77 | $i=\uE03F; | |
78 | $ii=\uE040; | |
79 | $u=\uE041; | |
80 | $uu=\uE042; | |
81 | $rh=\uE043; | |
b331163b | 82 | $rrh=\uE044; |
73c04bcf A |
83 | $ce=\uE045; #VOWEL SIGN CANDRA E |
84 | $se=\uE046; #VOWEL SIGN SHORT E | |
85 | $e=\uE047; | |
86 | $ai=\uE048; | |
87 | $co=\uE049; # VOWEL SIGN CANDRA O | |
88 | $so=\uE04A; # VOWEL SIGN SHORT O | |
51004dcb | 89 | $o=\uE04B; # ो |
73c04bcf A |
90 | $au=\uE04C; |
91 | $virama=\uE04D; | |
2ca993e8 A |
92 | # \u094E Reserved |
93 | # \u094F Reserved | |
73c04bcf | 94 | $om = \uE050; # OM |
2ca993e8 A |
95 | # \u0951→; # UNMAPPED STRESS SIGN UDATTA |
96 | # \u0952→; # UNMAPPED STRESS SIGN ANUDATTA | |
97 | # \u0953→; # UNMAPPED GRAVE ACCENT | |
98 | # \u0954→; # UNMAPPED ACUTE ACCENT | |
51004dcb A |
99 | $lm = \uE055;# Telugu Length Mark |
100 | $ailm=\uE056;# AI Length Mark | |
101 | $aulm=\uE057;# AU Length Mark | |
2ca993e8 | 102 | #urdu compatibity forms |
73c04bcf A |
103 | $uka=\uE058; |
104 | $ukha=\uE059; | |
105 | $ugha=\uE05A; | |
106 | $ujha=\uE05B; | |
107 | $uddha=\uE05C; | |
108 | $udha=\uE05D; | |
109 | $ufa=\uE05E; | |
110 | $uya=\uE05F; | |
111 | $wrr=\uE060; | |
112 | $wll=\uE061; | |
b331163b | 113 | $lh=\uE062; |
73c04bcf A |
114 | $llh=\uE063; |
115 | $danda=\uE064; | |
116 | $doubleDanda=\uE065; | |
51004dcb A |
117 | $zero=\uE066; # DIGIT ZERO |
118 | $one=\uE067; # DIGIT ONE | |
119 | $two=\uE068; # DIGIT TWO | |
120 | $three=\uE069; # DIGIT THREE | |
121 | $four=\uE06A; # DIGIT FOUR | |
122 | $five=\uE06B; # DIGIT FIVE | |
123 | $six=\uE06C; # DIGIT SIX | |
124 | $seven=\uE06D; # DIGIT SEVEN | |
125 | $eight=\uE06E; # DIGIT EIGHT | |
126 | $nine=\uE06F; # DIGIT NINE | |
73c04bcf | 127 | $dgs=\uE082; |
2ca993e8 | 128 | # For all other scripts |
73c04bcf A |
129 | $ecp0=\uE070; |
130 | $ecp1=\uE071; | |
131 | $ecp2=\uE072; | |
132 | $ecp3=\uE073; | |
133 | $ecp4=\uE074; | |
134 | $ecp5=\uE075; | |
135 | $ecp6=\uE076; | |
136 | $ecp7=\uE077; | |
137 | $ecp8=\uE078; | |
138 | $ecp9=\uE079; | |
139 | $ecpA=\uE07A; | |
140 | $ecpB=\uE07B; | |
141 | $ecpC=\uE07C; | |
142 | $ecpD=\uE07D; | |
143 | $ecpE=\uE07E; | |
144 | $ecpF=\uE07F; | |
2ca993e8 | 145 | # Khanda-ta |
73c04bcf | 146 | $kta=\uE083; |
2ca993e8 | 147 | # ॰→; # nothing in Latin maps to InterIndic ABBREVIATION SIGN |
73c04bcf A |
148 | $depVowelAbove=[\uE03E-\uE040\uE045-\uE04C]; |
149 | $depVowelBelow=[\uE041-\uE044]; | |
150 | $endThing=[$danda$doubleDanda]; | |
2ca993e8 | 151 | # $x was originally called '§'; $z was '%' |
73c04bcf A |
152 | $x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co]; |
153 | $z=[bcdfghjklmnpqrstvwxyz]; | |
154 | $consonants=[[$ka-$ha]$z[क-ह][ক-হ][ਕ-ਹ][ક-હ][କ-ହ][க-ஹ][క-హ][ಕ-ಹ][ക-ഹ]]; | |
729e4ab9 A |
155 | \u0315 → $avagraha; |
156 | \u0303→$chandrabindu$anusvara; | |
157 | m\u0310→$chandrabindu; | |
158 | h\u0323→$visarga; | |
159 | x→$ka$virama$sa; | |
2ca993e8 A |
160 | # convert to independent forms at start of word or syllable: |
161 | # dependent forms for roundtrip | |
729e4ab9 A |
162 | \u0314a\u0304→$aa; |
163 | \u0314ai→$ai; | |
164 | \u0314au→$au; | |
165 | \u0314ii→$ii; | |
166 | \u0314i\u0304→$ii; | |
167 | \u0314i→$i; | |
168 | \u0314u\u0304→$uu; | |
169 | \u0314u→$u; | |
170 | \u0314r\u0325\u0304→$rrh; | |
171 | \u0314r\u0325→$rh; | |
172 | \u0314l\u0325\u0304→$llh; | |
173 | \u0314lh→$lh; | |
174 | \u0314l\u0325→$lh; | |
175 | \u0314e\u0304→$e; | |
176 | \u0314o\u0304→$o; | |
177 | \u0314a→; | |
178 | \u0314e\u0306→$ce; | |
179 | \u0314o\u0306→$co; | |
180 | \u0314e→$se; | |
181 | \u0314o→$so; | |
2ca993e8 | 182 | # preceeded by consonants |
729e4ab9 A |
183 | $consonants{ a\u0304→$aa; |
184 | $consonants{ ai→$ai; | |
185 | $consonants{ au→$au; | |
186 | $consonants{ ii→$ii; | |
187 | $consonants{ i\u0304→$ii; | |
188 | $consonants{ i→$i; | |
189 | $consonants{ u\u0304→$uu; | |
190 | $consonants{ u→$u; | |
191 | $consonants{ r\u0325\u0304→$rrh; | |
192 | $consonants{ r\u0325a→$rh; | |
193 | $consonants{ r\u0325→$rh; | |
194 | $consonants{ l\u0325\u0304→$llh; | |
195 | $consonants{ lh→$lh; | |
196 | $consonants{ l\u0325→$lh; | |
197 | $consonants{ e\u0304→$e; | |
198 | $consonants{ o\u0304→$o; | |
199 | $consonants{ e\u0306→$ce; | |
200 | $consonants{ o\u0306→$co; | |
201 | $consonants{ e→$se; | |
202 | $consonants{ o→$so; | |
2ca993e8 | 203 | # e.g. keai -→ {ka}{e}{wai}; k'ai -→ {ka}{wai}; (ai) -→ ({wai}) |
729e4ab9 A |
204 | a\u0304→$waa; |
205 | ai→$wai; | |
206 | au→$wau; | |
207 | i\u0304→$wii; | |
208 | i→$wi; | |
209 | u\u0304→$wuu; | |
210 | u→$wu; | |
211 | r\u0325\u0304→$wrr; | |
212 | r\u0325→$wr; | |
213 | l\u0325\u0304→$wll; | |
214 | lh→$wl; | |
215 | l\u0325→$wl; | |
216 | e\u0304→$we; | |
217 | o\u0304→$wo; | |
218 | a→$wa; | |
219 | e\u0306→$wce; | |
220 | o\u0306→$wco; | |
221 | e→$wse; | |
222 | ''om→$om; | |
223 | o→$wso; | |
2ca993e8 | 224 | # rules for anusvara |
51004dcb A |
225 | n}r\u0325 → $na|$virama; |
226 | n}l\u0325 → $na|$virama; | |
227 | n}na → $na|$virama; | |
228 | n\u0307}[kg] → $anusvara; | |
229 | n\u0307}n\u0307 → $anusvara; | |
230 | n\u0304}[cj] → $anusvara; | |
231 | n\u0304}n\u0303 → $anusvara; | |
729e4ab9 | 232 | n\u0323}[tdn]\u0323 → $anusvara; |
51004dcb A |
233 | n}[tdn] → $anusvara; |
234 | m}[pbm] → $anusvara; | |
235 | n}[ylvshr] → $anusvara; | |
236 | m\u0307 → $anusvara; | |
2ca993e8 | 237 | #urdu compatibility |
729e4ab9 A |
238 | q→$uka|$virama; |
239 | k\u0331h\u0331→$ukha |$virama; | |
240 | g\u0307→ $ugha | $virama; | |
241 | z → $ujha |$virama; | |
242 | f → $ufa|$virama; | |
243 | t\u0331→$kta; | |
2ca993e8 | 244 | # dev |
729e4ab9 A |
245 | y\u0307→$uya|$virama; |
246 | l\u0331→$ela|$virama; | |
247 | n\u0331→$ena|$virama; | |
248 | n\u0307→$nga|$virama; | |
249 | n\u0303→$nya|$virama; | |
250 | n\u0323→$nna|$virama; | |
251 | t\u0323h→$ttha|$virama; | |
252 | t\u0323→$tta|$virama; | |
253 | r\u0323h→$udha|$virama; | |
254 | r\u0323→$uddha|$virama; | |
255 | d\u0323h→$ddha|$virama; | |
256 | d\u0323→$dda|$virama; | |
257 | kh→$kha|$virama; | |
258 | k→$ka|$virama; | |
259 | gh→$gha|$virama; | |
260 | g→$ga|$virama; | |
261 | ch→$cha|$virama; | |
262 | c→$ca|$virama; | |
263 | jh→$jha|$virama; | |
264 | j→$ja|$virama; | |
265 | ny→$nya|$virama; | |
266 | tth→$ttha|$virama; | |
267 | ddh→$ddha|$virama; | |
268 | th→$tha|$virama; | |
269 | t→$ta|$virama; | |
270 | dh→$dha|$virama; | |
271 | d→$da|$virama; | |
272 | n→$na|$virama; | |
273 | ph→$pha|$virama; | |
274 | p→$pa|$virama; | |
275 | bh→$bha|$virama; | |
276 | b→$ba|$virama; | |
277 | m→$ma|$virama; | |
278 | y→$ya|$virama; | |
279 | r\u0331→$rra|$virama; | |
280 | r→$ra|$virama; | |
281 | l\u0323→$lla|$virama; | |
282 | l→$la|$virama; | |
283 | v→$va|$virama; | |
284 | w\u0307→$vva|$virama; | |
285 | w→$va|$virama; | |
286 | sh→$sha|$virama; | |
287 | ss→$ssa|$virama; | |
288 | s\u0323→$ssa|$virama; | |
289 | s\u0301→$sha|$virama; | |
290 | s→$sa|$virama; | |
291 | h→$ha|$virama; | |
292 | '.'→$danda; | |
293 | $danda'.'→$doubleDanda; | |
294 | $depVowelAbove{'~'→$anusvara; | |
295 | $depVowelBelow{'~'→$chandrabindu; | |
2ca993e8 A |
296 | # convert to dependent forms after consonant with no vowel: |
297 | # e.g. kai -→ {ka}{virama}ai -→ {ka}{ai} | |
298 | #$virama aa→$aa; | |
729e4ab9 A |
299 | $virama a\u0304→$aa; |
300 | $virama ai→$ai; | |
301 | $virama au→$au; | |
302 | $virama ii→$ii; | |
303 | $virama i\u0304→$ii; | |
304 | $virama i→$i; | |
2ca993e8 | 305 | #$virama uu→$uu; |
729e4ab9 A |
306 | $virama u\u0304→$uu; |
307 | $virama u→$u; | |
2ca993e8 | 308 | #$virama rrh→$rrh; |
729e4ab9 | 309 | $virama r\u0325\u0304→$rrh; |
2ca993e8 | 310 | #$virama rh→$rh; |
729e4ab9 A |
311 | $virama r\u0325a→$rh; |
312 | $virama r\u0325→$rh; | |
313 | $virama l\u0325\u0304→$llh; | |
314 | $virama lh→$lh; | |
315 | $virama l\u0325→$lh; | |
316 | $virama e\u0304→$e; | |
317 | $virama o\u0304→$o; | |
318 | $virama a→; | |
319 | $virama e\u0306→$ce; | |
320 | $virama o\u0306→$co; | |
321 | $virama e→$se; | |
322 | $virama o→$so; | |
2ca993e8 A |
323 | # otherwise convert independent forms when separated by ': k'ai -→ {ka}{virama}{wai} |
324 | #$virama''aa→$waa; | |
729e4ab9 A |
325 | $virama''a\u0304→$waa; |
326 | $virama''ai→$wai; | |
327 | $virama''au→$wau; | |
2ca993e8 | 328 | #$virama''ii→$wii; |
729e4ab9 A |
329 | $virama''i\u0304→$wii; |
330 | $virama''i→$wi; | |
2ca993e8 | 331 | #$virama''uu→$wuu; |
729e4ab9 A |
332 | $virama''u\u0304→$wuu; |
333 | $virama''u→$wu; | |
2ca993e8 | 334 | #$virama''rrh→$wrr; |
729e4ab9 | 335 | $virama''r\u0325\u0304→$wrr; |
2ca993e8 | 336 | #$virama''rh→$wr; |
729e4ab9 A |
337 | $virama''r\u0325→$wr; |
338 | $virama''l\u0325\u0304→$wll; | |
2ca993e8 | 339 | #$virama''lh→$wl; |
729e4ab9 A |
340 | $virama''l\u0325→$wl; |
341 | $virama''e\u0304→$we; | |
342 | $virama''o\u0304→$wo; | |
343 | $virama''a→$wa; | |
344 | $virama''e\u0306→$wce; | |
345 | $virama''o\u0306→$wco; | |
346 | $virama''e→$wse; | |
347 | $virama''o→$wso; | |
2ca993e8 | 348 | # no virama |
729e4ab9 A |
349 | ''a\u0304→$waa; |
350 | ''ai→$wai; | |
351 | ''au→$wau; | |
352 | ''i\u0304→$wii; | |
353 | ''i→$wi; | |
354 | ''u\u0304→$wuu; | |
355 | ''u→$wu; | |
356 | ''r\u0325\u0304→$wrr; | |
357 | ''r\u0325→$wr; | |
358 | ''l\u0325\u0304→$wll; | |
359 | ''l\u0325→$wl; | |
360 | ''e\u0304→$we; | |
361 | ''o\u0304→$wo; | |
362 | ''a→$wa; | |
363 | ''e\u0306→$wce; | |
364 | ''o\u0306→$wco; | |
365 | ''e→$wse; | |
366 | ''o→$wso; | |
367 | $virama } [$z] → $virama; | |
368 | $virama } ' ' → $virama ; | |
369 | $virama}$endThing→; | |
370 | ʔ→$dgs; # Glottal Stop | |
371 | 0→$zero; | |
372 | 1→$one; | |
373 | 2→$two; | |
374 | 3→$three; | |
375 | 4→$four; | |
376 | 5→$five; | |
377 | 6→$six; | |
378 | 7→$seven; | |
379 | 8→$eight; | |
380 | 9→$nine; | |
381 | ''→; | |
2ca993e8 A |
382 | #:: NFC (NFD) ; |
383 |