]> git.saurik.com Git - apple/icu.git/blame - icuSources/data/translit/InterIndic_Latin.txt
ICU-66108.tar.gz
[apple/icu.git] / icuSources / data / translit / InterIndic_Latin.txt
CommitLineData
f3c0d7a5
A
1# © 2016 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html#License
3#
73c04bcf 4# File: InterIndic_Latin.txt
f3c0d7a5 5# Generated from CLDR
73c04bcf 6#
2ca993e8
A
7
8# InterIndic-Latin
9#\u0E00 reserved
10#consonants
73c04bcf
A
11$chandrabindu=\uE001;
12$anusvara=\uE002;
13$visarga=\uE003;
2ca993e8
A
14#\u0E004 reserved
15# w←vowel→ represents the stand-alone form
73c04bcf
A
16$wa=\uE005;
17$waa=\uE006;
18$wi=\uE007;
19$wii=\uE008;
20$wu=\uE009;
21$wuu=\uE00A;
22$wr=\uE00B;
23$wl=\uE00C;
24$wce=\uE00D; # LETTER CANDRA E
25$wse=\uE00E; # LETTER SHORT E
51004dcb 26$we=\uE00F; # ए LETTER E
73c04bcf
A
27$wai=\uE010;
28$wco=\uE011; # LETTER CANDRA O
29$wso=\uE012; # LETTER SHORT O
51004dcb 30$wo=\uE013; # ओ LETTER O
73c04bcf
A
31$wau=\uE014;
32$ka=\uE015;
33$kha=\uE016;
34$ga=\uE017;
35$gha=\uE018;
36$nga=\uE019;
37$ca=\uE01A;
38$cha=\uE01B;
39$ja=\uE01C;
40$jha=\uE01D;
41$nya=\uE01E;
42$tta=\uE01F;
43$ttha=\uE020;
44$dda=\uE021;
45$ddha=\uE022;
46$nna=\uE023;
47$ta=\uE024;
48$tha=\uE025;
49$da=\uE026;
50$dha=\uE027;
51$na=\uE028;
52$ena=\uE029; #compatibility
53$pa=\uE02A;
54$pha=\uE02B;
55$ba=\uE02C;
56$bha=\uE02D;
57$ma=\uE02E;
58$ya=\uE02F;
59$ra=\uE030;
60$vva=\uE081;
61$rra=\uE031;
62$la=\uE032;
63$lla=\uE033;
64$ela=\uE034; #compatibility
65$va=\uE035;
66$sha=\uE036;
67$ssa=\uE037;
68$sa=\uE038;
69$ha=\uE039;
2ca993e8
A
70#\u093A Reserved
71#\u093B Reserved
73c04bcf
A
72$nukta=\uE03C;
73$avagraha=\uE03D; # SIGN AVAGRAHA
2ca993e8 74# ←vowel→ represents the dependent form
73c04bcf
A
75$aa=\uE03E;
76$i=\uE03F;
77$ii=\uE040;
78$u=\uE041;
79$uu=\uE042;
80$rh=\uE043;
b331163b 81$rrh=\uE044;
73c04bcf
A
82$ce=\uE045; #VOWEL SIGN CANDRA E
83$se=\uE046; #VOWEL SIGN SHORT E
84$e=\uE047;
85$ai=\uE048;
86$co=\uE049; # VOWEL SIGN CANDRA O
87$so=\uE04A; # VOWEL SIGN SHORT O
51004dcb 88$o=\uE04B; # ो
73c04bcf
A
89$au=\uE04C;
90$virama=\uE04D;
2ca993e8
A
91# \u094E Reserved
92# \u094F Reserved
73c04bcf 93$om=\uE050; # OM
51004dcb
A
94\uE051→; # UNMAPPED STRESS SIGN UDATTA
95\uE052→; # UNMAPPED STRESS SIGN ANUDATTA
96\uE053→; # UNMAPPED GRAVE ACCENT
97\uE054→; # UNMAPPED ACUTE ACCENT
98$lm = \uE055;# Telugu Length Mark
99$ailm=\uE056;# AI Length Mark
100$aulm=\uE057;# AU Length Mark
2ca993e8 101#urdu compatibity forms
73c04bcf
A
102$uka=\uE058;
103$ukha=\uE059;
104$ugha=\uE05A;
105$ujha=\uE05B;
106$uddha=\uE05C;
107$udha=\uE05D;
108$ufa=\uE05E;
109$uya=\uE05F;
110$wrr=\uE060;
111$wll=\uE061;
b331163b 112$lh=\uE062;
73c04bcf
A
113$llh=\uE063;
114$danda=\uE064;
115$doubleDanda=\uE065;
51004dcb
A
116$zero=\uE066; # DIGIT ZERO
117$one=\uE067; # DIGIT ONE
118$two=\uE068; # DIGIT TWO
119$three=\uE069; # DIGIT THREE
120$four=\uE06A; # DIGIT FOUR
121$five=\uE06B; # DIGIT FIVE
122$six=\uE06C; # DIGIT SIX
123$seven=\uE06D; # DIGIT SEVEN
124$eight=\uE06E; # DIGIT EIGHT
125$nine=\uE06F; # DIGIT NINE
2ca993e8 126# Glottal stop
73c04bcf 127$dgs=\uE082;
2ca993e8 128#Khanda-ta
73c04bcf
A
129$kta=\uE083;
130$depVowelAbove=[\uE03E-\uE040\uE045-\uE04C];
131$depVowelBelow=[\uE041-\uE044];
2ca993e8 132# $x was originally called '§'; $z was '%'
73c04bcf
A
133$x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co];
134$z=[bcdfghjklmnpqrstvwxyz];
135$vowels=[aeiour\u0304\u0325\u0306];
136$forceIndependentMatra = [^[[:L:][\u0300-\u034C]]];
2ca993e8
A
137######################################################################
138# convert from Native letters to Latin letters
139######################################################################
140#transliterations for anusvara
51004dcb
A
141$anusvara} [$ka$kha$ga$gha$nga] → n\u0307;
142$anusvara} [$ca$cha$ja$jha$nya] → n\u0304;
143$anusvara} [$tta$ttha$dda$ddha$nna] → n\u0323;
144$anusvara} [$ta$tha$da$dha$na] → n;
145$anusvara} [$pa$pha$ba$bha$ma] → m;
729e4ab9
A
146$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] → n;
147$anusvara→ m\u0307;
2ca993e8 148# Urdu compatibility
51004dcb
A
149$ya$nukta}$x → y\u0307;
150$ya$nukta$virama → y\u0307;
151$ya$nukta → y\u0307a;
152$la$nukta }$x → l\u0331;
153$la$nukta$virama → l\u0331;
154$la$nukta → l\u0331a;
155$na$nukta }$x → n\u0331;
156$na$nukta$virama → n\u0331;
157$na$nukta → n\u0331a;
158$ena }$x → n\u0331;
159$ena$virama → n\u0331;
160$ena → n\u0331a;
161$uka → qa;
162$ka$nukta }$x → q;
163$ka$nukta$virama → q;
164$ka$nukta → qa;
165$kha$nukta }$x → k\u0331h\u0331;
166$kha$nukta$virama → k\u0331h\u0331;
167$kha$nukta → k\u0331h\u0331a;
168$ukha$virama → k\u0331h\u0331;
169$ukha → k\u0331h\u0331a;
170$ugha → g\u0307a;
171$ga$nukta }$x → g\u0307;
172$ga$nukta$virama → g\u0307;
173$ga$nukta → g\u0307a;
174$ujha → za;
175$ja$nukta }$x → z;
176$ja$nukta$virama → z;
177$ja$nukta → za;
178$ddha$nukta}$x → r\u0323h;
179$ddha$nukta$virama → r\u0323h;
180$ddha$nukta → r\u0323ha;
181$uddha}$x → r\u0323;
182$uddha$virama → r\u0323;
183$uddha → r\u0323a;
184$udha → r\u0323a;
185$dda$nukta}$x → r\u0323;
186$dda$nukta$virama → r\u0323;
187$dda$nukta → r\u0323a;
188$pha$nukta }$x → f;
189$pha$nukta$virama → f;
190$pha$nukta → fa;
191$ufa }$x → f;
192$ufa$virama → f;
193$ufa → fa;
194$ra$nukta}$x → r\u0331;
195$ra$nukta$virama → r\u0331;
196$ra$nukta → r\u0331a;
197$lla$nukta}$x → l\u0331;
198$lla$nukta$virama → l\u0331;
199$lla$nukta → l\u0331a;
200$ela}$x → l\u0331;
201$ela$virama → l\u0331;
202$ela → l\u0331a;
203$uya}$x → y\u0307;
204$uya$virama → y\u0307;
205$uya → y\u0307a;
2ca993e8 206# normal consonants
729e4ab9
A
207$ka$virama}$ha→k'';
208$ka}$x→k;
209$ka$virama→k;
210$ka→ka;
211$kha}$x→kh;
212$kha$virama→kh;
213$kha→kha;
214$ga$virama}$ha→g'';
215$ga}$x→g;
216$ga$virama→g;
217$ga→ga;
218$gha}$x→gh;
219$gha$virama→gh;
220$gha→gha;
221$nga}$x→n\u0307;
222$nga$virama→n\u0307;
223$nga→n\u0307a;
224$ca$virama}$ha→c'';
225$ca}$x→c;
226$ca$virama→c;
227$ca→ca;
228$cha}$x→ch;
229$cha$virama→ch;
230$cha→cha;
231$ja$virama}$ha→j'';
232$ja}$x→j;
233$ja$virama→j;
234$ja→ja;
235$jha}$x→jh;
236$jha$virama→jh;
237$jha→jha;
238$nya }$x→n\u0303;
239$nya$virama→n\u0303;
240$nya → n\u0303a;
241$tta$virama}$ha→t\u0323'';
242$tta}$x→t\u0323;
243$tta$virama→t\u0323;
244$tta→t\u0323a;
245$ttha}$x→t\u0323h;
246$ttha$virama→t\u0323h;
247$ttha→t\u0323ha;
248$dda}$x$ha→d\u0323'';
249$dda}$x→d\u0323;
250$dda$virama→d\u0323;
251$dda→d\u0323a;
252$ddha}$x→d\u0323h;
253$ddha$virama→d\u0323h;
254$ddha→d\u0323ha;
255$nna}$x→n\u0323;
256$nna$virama→n\u0323;
257$nna→n\u0323a;
258$ta$virama}$ha→t'';
259$ta$virama}$ttha→t'';
260$ta$virama}$tta→t'';
261$ta$virama}$tha→t'';
262$ta}$x→t;
263$ta$virama→t;
264$ta→ta;
265$tha}$x→th;
266$tha$virama→th;
267$tha→tha;
268$da$virama}$ha→d'';
269$da$virama}$ddha→d'';
270$da$virama}$dda→d'';
271$da$virama}$dha→d'';
272$da}$x→d;
273$da$virama→d;
274$da→da;
275$dha}$x→dh;
276$dha$virama→dh;
277$dha→dha;
278$na$virama}$ga→n'';
279$na$virama}$ya→n'';
280$na}$x→n;
281$na$virama→n;
282$na→na;
283$pa$virama}$ha→p'';
284$pa}$x→p;
285$pa$virama→p;
286$pa→pa;
287$pha}$x→ph;
288$pha$virama→ph;
289$pha→pha;
290$ba$virama}$ha→b'';
291$ba}$x→b;
292$ba$virama→b;
293$ba→ba;
294$bha}$x→bh;
295$bha$virama→bh;
296$bha→bha;
297$ma$virama}$ma→m'';
298$ma}$x→m;
299$ma$virama→m;
300$ma→ma;
301$ya}$x→y;
302$ya$virama→y;
303$ya→ya;
304$ra$virama}$ha→r'';
305$ra}$x→r;
306$ra$virama→r;
307$ra→ra;
308$vva$virama}$ha→w\u0307'';
309$vva}$x→w\u0307;
310$vva$virama→w\u0307;
311$vva→w\u0307a;
312$rra$virama}$ha→r\u0331'';
313$rra}$x→r\u0331;
314$rra$virama→r\u0331;
315$rra→r\u0331a;
316$la$virama}$ha→l'';
317$la}$x→l;
318$la$virama→l;
319$la→la;
320$lla$virama}$ha→l\u0323'';
321$lla}$x→l\u0323;
322$lla$virama→l\u0323;
323$lla→l\u0323a;
324$va}$x→v;
325$va$virama→v;
326$va→va;
327$sa$virama}$ha→s'';
328$sa$virama}$sha→s'';
329$sa$virama}$ssa→s'';
330$sa$virama}$sa→s'';
331$sa}$x→s;
332$sa$virama→s;
2ca993e8 333#for gurmukhi
729e4ab9
A
334$sa$nukta}$x→s\u0301;
335$sa$nukta$virama→s\u0301;
336$sa$nukta→s\u0301a;
337$sa→sa;
338$sha}$x→s\u0301;
339$sha$virama→s\u0301;
340$sha→s\u0301a;
341$ssa}$x→s\u0323;
342$ssa$virama→s\u0323;
343$ssa→s\u0323a;
344$ha}$x→h;
345$ha$virama→h;
346$ha→ha;
2ca993e8 347# dependent vowels (should never occur except following consonants)
51004dcb
A
348$forceIndependentMatra{$aa → \u0314a\u0304;
349$forceIndependentMatra{$ai → \u0314ai;
350$forceIndependentMatra{$au → \u0314au;
351$forceIndependentMatra{$ii → \u0314i\u0304;
352$forceIndependentMatra{$i → \u0314i;
353$forceIndependentMatra{$uu → \u0314u\u0304;
354$forceIndependentMatra{$u → \u0314u;
729e4ab9 355$forceIndependentMatra{$rrh → \u0314r\u0325\u0304;
51004dcb 356$forceIndependentMatra{$rh → \u0314r\u0325;
729e4ab9 357$forceIndependentMatra{$llh → \u0314l\u0325\u0304;
51004dcb
A
358$forceIndependentMatra{$lh → \u0314l\u0325;
359$forceIndependentMatra{$e → \u0314e\u0304;
360$forceIndependentMatra{$o → \u0314o\u0304;
2ca993e8 361#extra vowels
51004dcb
A
362$forceIndependentMatra{$ce → \u0314e\u0306;
363$forceIndependentMatra{$co → \u0314o\u0306;
364$forceIndependentMatra{$se → \u0314e;
365$forceIndependentMatra{$so → \u0314o;
366$forceIndependentMatra{$nukta →; # Nukta cannot appear independently or as first character
729e4ab9 367$forceIndependentMatra{$virama →; # Virama cannot appear independently or as first character
51004dcb
A
368$aa → a\u0304;
369$ai → ai;
370$au → au;
371$ii → i\u0304;
372$i → i;
373$uu → u\u0304;
374$u → u;
729e4ab9 375$rrh → r\u0325\u0304;
51004dcb 376$rh → r\u0325;
729e4ab9 377$llh → l\u0325\u0304;
51004dcb
A
378$lh → l\u0325;
379$e → e\u0304;
380$o → o\u0304;
2ca993e8 381#extra vowels
51004dcb
A
382$ce → e\u0306;
383$co → o\u0306;
384$se → e;
385$so → o;
2ca993e8 386#dependent vowels when following independent vowels. Generally Illegal only for roundtripping
729e4ab9
A
387$waa} $x → a\u0304\u0314;
388$wai} $x → ai\u0314;
389$wau} $x → au\u0314;
390$wii} $x → i\u0304\u0314;
391$wi } $x → i\u0314;
392$wuu} $x → u\u0304\u0314;
393$wu } $x → u\u0314;
394$wrr} $x → r\u0325\u0304\u0314;
395$wr } $x → r\u0325\u0314;
396$wll} $x → l\u0325\u0304\u0314;
397$wl } $x → l\u0325\u0314;
398$we } $x → e\u0304\u0314;
399$wo } $x → o\u0304\u0314;
400$wa } $x → a\u0314;
2ca993e8 401#extra vowels
729e4ab9
A
402$wce} $x → e\u0306\u0314;
403$wco} $x → o\u0306\u0314;
404$wse} $x → e\u0314;
405$wso} $x → o\u0314;
406$om} $x → ''om\u0314;
2ca993e8 407# independent vowels when preceeded by vowels
51004dcb
A
408$vowels{$waa → ''a\u0304;
409$vowels{$wai → ''ai;
410$vowels{$wau → ''au;
411$vowels{$wii → ''i\u0304;
412$vowels{$wi → ''i;
413$vowels{$wuu → ''u\u0304;
414$vowels{$wu → ''u;
415$vowels{$wrr → ''r\u0325\u0304;
416$vowels{$wr → ''r\u0325;
417$vowels{$wll → ''l\u0325\u0304;
418$vowels{$wl → ''l\u0325;
419$vowels{$we → ''e\u0304;
420$vowels{$wo → ''o\u0304;
421$vowels{$wa → ''a;
2ca993e8 422#extra vowels
51004dcb
A
423$vowels{$wce → ''e\u0306;
424$vowels{$wco → ''o\u0306;
425$vowels{$wse → ''e;
426$vowels{$wso → ''o;
2ca993e8 427# independent vowels (otherwise)
729e4ab9
A
428$waa → a\u0304;
429$wai → ai;
430$wau → au;
431$wii → i\u0304;
51004dcb 432$wi → i;
729e4ab9 433$wuu → u\u0304;
51004dcb 434$wu → u;
729e4ab9 435$wrr → r\u0325\u0304;
51004dcb 436$wr → r\u0325;
729e4ab9 437$wll → l\u0325\u0304;
51004dcb
A
438$wl → l\u0325;
439$we → e\u0304;
440$wo → o\u0304;
441$wa → a;
2ca993e8 442#extra vowels
729e4ab9
A
443$wce → e\u0306;
444$wco → o\u0306;
445$wse → e;
446$wso → o;
447$om → ''om;
2ca993e8 448#stress marks
729e4ab9
A
449$avagraha → \u0315;
450$chandrabindu$anusvara→\u0303;
451$chandrabindu → m\u0310;
452$visarga→h\u0323;
2ca993e8 453#numbers
51004dcb
A
454$zero → 0;
455$one → 1;
456$two → 2;
729e4ab9 457$three → 3;
51004dcb
A
458$four → 4;
459$five → 5;
460$six → 6;
729e4ab9
A
461$seven → 7;
462$eight → 8;
51004dcb
A
463$nine → 9;
464$lm →;
729e4ab9
A
465$ailm →;
466$aulm →;
467$dgs→ʔ;
468$kta→t\u0331;
469$danda→'.';
470$doubleDanda→'.';
51004dcb 471\uE070→; # ABBREVIATION SIGN
2ca993e8 472# LETTER RA WITH MIDDLE DIAGONAL
729e4ab9
A
473\uE071}$x→ra;
474\uE071$virama→r;
475\uE071→ra;
2ca993e8 476# LETTER RA WITH LOWER DIAGONAL
729e4ab9
A
477\uE072}$x→ra;
478\uE072$virama→r;
479\uE072→ra;
51004dcb
A
480\uE073→; # RUPEE MARK
481\uE074→; # RUPEE SIGN
482\uE075→; # CURRENCY NUMERATOR ONE
483\uE076→; # CURRENCY NUMERATOR TWO
484\uE077→; # CURRENCY NUMERATOR THREE
485\uE078→; # CURRENCY NUMERATOR FOUR
486\uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
487\uE07A→; # CURRENCY DENOMINATOR SIXTEEN
488\uE07B→; # ISSHAR
489\uE07C→; # TIPPI
490\uE07D→; # ADDAK
491\uE07E→; # IRI
492\uE07F→; # URA
493\uE080→; # EK ONKAR
494\uE004→; # DEVANAGARI VOWEL SIGN SHORT A
2ca993e8 495