]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/data/translit/Latin_InterIndic.txt
ICU-64260.0.1.tar.gz
[apple/icu.git] / icuSources / data / translit / Latin_InterIndic.txt
index 47e450101309c21285d22259884739218cf419a5..7a4f1feffbcb04fb49da1e1427150ab33c066b7a 100644 (file)
@@ -1,15 +1,19 @@
-# ***************************************************************************
-# *
-# *  Copyright (C) 2004-2008, International Business Machines
-# *  Corporation; Unicode, Inc.; and others.  All Rights Reserved.
-# *
-# ***************************************************************************
+# © 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html#License
+#
 # File: Latin_InterIndic.txt
-# Generated from CLDR 
+# Generated from CLDR
 #
+
+# Latin-InterIndic
+#:: NFD;
+#\u0E00 reserved
+#consonants
 $chandrabindu=\uE001;
 $anusvara=\uE002;
 $visarga=\uE003;
+#\u0E004 reserved
+# w←vowel→ represents the stand-alone form
 $wa=\uE005;
 $waa=\uE006;
 $wi=\uE007;
@@ -64,15 +68,18 @@ $sha=\uE036;
 $ssa=\uE037;
 $sa=\uE038;
 $ha=\uE039;
+#\u093A Reserved
+#\u093B Reserved
 $nukta=\uE03C;
 $avagraha=\uE03D; # SIGN AVAGRAHA
+# ←vowel→ represents the dependent form
 $aa=\uE03E;
 $i=\uE03F;
 $ii=\uE040;
 $u=\uE041;
 $uu=\uE042;
 $rh=\uE043;
-$lh=\uE044;
+$rrh=\uE044;
 $ce=\uE045; #VOWEL SIGN CANDRA E
 $se=\uE046; #VOWEL SIGN SHORT E
 $e=\uE047;
@@ -82,10 +89,17 @@ $so=\uE04A; # VOWEL SIGN SHORT O
 $o=\uE04B;  # ो
 $au=\uE04C;
 $virama=\uE04D;
+# \u094E Reserved
+# \u094F Reserved
 $om = \uE050; # OM
+# \u0951→;        # UNMAPPED STRESS SIGN UDATTA
+# \u0952→;        # UNMAPPED STRESS SIGN ANUDATTA
+# \u0953→;        # UNMAPPED GRAVE ACCENT
+# \u0954→;        # UNMAPPED ACUTE ACCENT
 $lm = \uE055;#  Telugu Length Mark
 $ailm=\uE056;#  AI Length Mark
 $aulm=\uE057;#  AU Length Mark
+#urdu compatibity forms
 $uka=\uE058;
 $ukha=\uE059;
 $ugha=\uE05A;
@@ -96,7 +110,7 @@ $ufa=\uE05E;
 $uya=\uE05F;
 $wrr=\uE060;
 $wll=\uE061;
-$rrh=\uE062;
+$lh=\uE062;
 $llh=\uE063;
 $danda=\uE064;
 $doubleDanda=\uE065;
@@ -111,6 +125,7 @@ $seven=\uE06D;    # DIGIT SEVEN
 $eight=\uE06E;    # DIGIT EIGHT
 $nine=\uE06F;     # DIGIT NINE
 $dgs=\uE082;
+# For all other scripts
 $ecp0=\uE070;
 $ecp1=\uE071;
 $ecp2=\uE072;
@@ -127,216 +142,242 @@ $ecpC=\uE07C;
 $ecpD=\uE07D;
 $ecpE=\uE07E;
 $ecpF=\uE07F;
+# Khanda-ta
 $kta=\uE083;
+# ॰→; # nothing in Latin maps to InterIndic ABBREVIATION SIGN
 $depVowelAbove=[\uE03E-\uE040\uE045-\uE04C];
 $depVowelBelow=[\uE041-\uE044];
 $endThing=[$danda$doubleDanda];
+# $x was originally called '§'; $z was '%'
 $x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];
 $z=[bcdfghjklmnpqrstvwxyz];
 $consonants=[[$ka-$ha]$z[क-ह][ক-হ][ਕ-ਹ][ક-હ][କ-ହ][க-ஹ][క-హ][ಕ-ಹ][ക-ഹ]];
-\u0315 > $avagraha;
-\u0303>$chandrabindu$anusvara;
-m\u0310>$chandrabindu;
-h\u0323>$visarga;
-x>$ka$virama$sa;
-\u0314a\u0304>$aa;
-\u0314ai>$ai;
-\u0314au>$au;
-\u0314ii>$ii;
-\u0314i\u0304>$ii;
-\u0314i>$i;
-\u0314u\u0304>$uu;
-\u0314u>$u;
-\u0314r\u0325\u0304>$rrh;
-\u0314r\u0325>$rh;
-\u0314l\u0325\u0304>$llh;
-\u0314lh>$lh;
-\u0314l\u0325>$lh;
-\u0314e\u0304>$e;
-\u0314o\u0304>$o;
-\u0314a>;
-\u0314e\u0306>$ce;
-\u0314o\u0306>$co;
-\u0314e>$se;
-\u0314o>$so;
-$consonants{ a\u0304>$aa;
-$consonants{ ai>$ai;
-$consonants{ au>$au;
-$consonants{ ii>$ii;
-$consonants{ i\u0304>$ii;
-$consonants{ i>$i;
-$consonants{ u\u0304>$uu;
-$consonants{ u>$u;
-$consonants{ r\u0325\u0304>$rrh;
-$consonants{ r\u0325a>$rh;
-$consonants{ r\u0325>$rh;
-$consonants{ l\u0325\u0304>$llh;
-$consonants{ lh>$lh;
-$consonants{ l\u0325>$lh;
-$consonants{ e\u0304>$e;
-$consonants{ o\u0304>$o;
-$consonants{ e\u0306>$ce;
-$consonants{ o\u0306>$co;
-$consonants{ e>$se;
-$consonants{ o>$so;
-a\u0304>$waa;
-ai>$wai;
-au>$wau;
-i\u0304>$wii;
-i>$wi;
-u\u0304>$wuu;
-u>$wu;
-r\u0325\u0304>$wrr;
-r\u0325>$wr;
-l\u0325\u0304>$wll;
-lh>$wl;
-l\u0325>$wl;
-e\u0304>$we;
-o\u0304>$wo;
-a>$wa;
-e\u0306>$wce;
-o\u0306>$wco;
-e>$wse;
-''om>$om;
-o>$wso;
-n}r\u0325           > $na|$virama;
-n}l\u0325           > $na|$virama;
-n}na                > $na|$virama;
-n\u0307}[kg]        > $anusvara;
-n\u0307}n\u0307     > $anusvara;
-n\u0304}[cj]        > $anusvara;
-n\u0304}n\u0303     > $anusvara;
-n\u0323}[tdn]\u0323 > $anusvara;
-n}[tdn]             > $anusvara;
-m}[pbm]             > $anusvara;
-n}[ylvshr]          > $anusvara;
-m\u0307             > $anusvara;
-q>$uka|$virama;
-k\u0331h\u0331>$ukha |$virama;
-g\u0307> $ugha | $virama;
-z > $ujha |$virama;
-f > $ufa|$virama;
-t\u0331>$kta;
-y\u0307>$uya|$virama;
-l\u0331>$ela|$virama;
-n\u0331>$ena|$virama;
-n\u0307>$nga|$virama;
-n\u0303>$nya|$virama;
-n\u0323>$nna|$virama;
-t\u0323h>$ttha|$virama;
-t\u0323>$tta|$virama;
-r\u0323h>$udha|$virama;
-r\u0323>$uddha|$virama;
-d\u0323h>$ddha|$virama;
-d\u0323>$dda|$virama;
-kh>$kha|$virama;
-k>$ka|$virama;
-gh>$gha|$virama;
-g>$ga|$virama;
-ch>$cha|$virama;
-c>$ca|$virama;
-jh>$jha|$virama;
-j>$ja|$virama;
-ny>$nya|$virama;
-tth>$ttha|$virama;
-ddh>$ddha|$virama;
-th>$tha|$virama;
-t>$ta|$virama;
-dh>$dha|$virama;
-d>$da|$virama;
-n>$na|$virama;
-ph>$pha|$virama;
-p>$pa|$virama;
-bh>$bha|$virama;
-b>$ba|$virama;
-m>$ma|$virama;
-y>$ya|$virama;
-r\u0331>$rra|$virama;
-r>$ra|$virama;
-l\u0323>$lla|$virama;
-l>$la|$virama;
-v>$va|$virama;
-w\u0307>$vva|$virama;
-w>$va|$virama;
-sh>$sha|$virama;
-ss>$ssa|$virama;
-s\u0323>$ssa|$virama;
-s\u0301>$sha|$virama;
-s>$sa|$virama;
-h>$ha|$virama;
-'.'>$danda;
-$danda'.'>$doubleDanda;
-$depVowelAbove{'~'>$anusvara;
-$depVowelBelow{'~'>$chandrabindu;
-$virama a\u0304>$aa;
-$virama ai>$ai;
-$virama au>$au;
-$virama ii>$ii;
-$virama i\u0304>$ii;
-$virama i>$i;
-$virama u\u0304>$uu;
-$virama u>$u;
-$virama r\u0325\u0304>$rrh;
-$virama r\u0325a>$rh;
-$virama r\u0325>$rh;
-$virama l\u0325\u0304>$llh;
-$virama lh>$lh;
-$virama l\u0325>$lh;
-$virama e\u0304>$e;
-$virama o\u0304>$o;
-$virama a>;
-$virama e\u0306>$ce;
-$virama o\u0306>$co;
-$virama e>$se;
-$virama o>$so;
-$virama''a\u0304>$waa;
-$virama''ai>$wai;
-$virama''au>$wau;
-$virama''i\u0304>$wii;
-$virama''i>$wi;
-$virama''u\u0304>$wuu;
-$virama''u>$wu;
-$virama''r\u0325\u0304>$wrr;
-$virama''r\u0325>$wr;
-$virama''l\u0325\u0304>$wll;
-$virama''l\u0325>$wl;
-$virama''e\u0304>$we;
-$virama''o\u0304>$wo;
-$virama''a>$wa;
-$virama''e\u0306>$wce;
-$virama''o\u0306>$wco;
-$virama''e>$wse;
-$virama''o>$wso;
-''a\u0304>$waa;
-''ai>$wai;
-''au>$wau;
-''i\u0304>$wii;
-''i>$wi;
-''u\u0304>$wuu;
-''u>$wu;
-''r\u0325\u0304>$wrr;
-''r\u0325>$wr;
-''l\u0325\u0304>$wll;
-''l\u0325>$wl;
-''e\u0304>$we;
-''o\u0304>$wo;
-''a>$wa;
-''e\u0306>$wce;
-''o\u0306>$wco;
-''e>$wse;
-''o>$wso;
-$virama } [$z] > $virama;
-$virama } ' ' > $virama ;
-$virama}$endThing>;
-ʔ>$dgs; # Glottal Stop
-0>$zero;
-1>$one;
-2>$two;
-3>$three;
-4>$four;
-5>$five;
-6>$six;
-7>$seven;
-8>$eight;
-9>$nine;
-''>;
+\u0315 → $avagraha;
+\u0303→$chandrabindu$anusvara;
+m\u0310→$chandrabindu;
+h\u0323→$visarga;
+x→$ka$virama$sa;
+# convert to independent forms at start of word or syllable:
+# dependent forms for roundtrip
+\u0314a\u0304→$aa;
+\u0314ai→$ai;
+\u0314au→$au;
+\u0314ii→$ii;
+\u0314i\u0304→$ii;
+\u0314i→$i;
+\u0314u\u0304→$uu;
+\u0314u→$u;
+\u0314r\u0325\u0304→$rrh;
+\u0314r\u0325→$rh;
+\u0314l\u0325\u0304→$llh;
+\u0314lh→$lh;
+\u0314l\u0325→$lh;
+\u0314e\u0304→$e;
+\u0314o\u0304→$o;
+\u0314a→;
+\u0314e\u0306→$ce;
+\u0314o\u0306→$co;
+\u0314e→$se;
+\u0314o→$so;
+# preceeded by consonants
+$consonants{ a\u0304→$aa;
+$consonants{ ai→$ai;
+$consonants{ au→$au;
+$consonants{ ii→$ii;
+$consonants{ i\u0304→$ii;
+$consonants{ i→$i;
+$consonants{ u\u0304→$uu;
+$consonants{ u→$u;
+$consonants{ r\u0325\u0304→$rrh;
+$consonants{ r\u0325a→$rh;
+$consonants{ r\u0325→$rh;
+$consonants{ l\u0325\u0304→$llh;
+$consonants{ lh→$lh;
+$consonants{ l\u0325→$lh;
+$consonants{ e\u0304→$e;
+$consonants{ o\u0304→$o;
+$consonants{ e\u0306→$ce;
+$consonants{ o\u0306→$co;
+$consonants{ e→$se;
+$consonants{ o→$so;
+# e.g. keai -→ {ka}{e}{wai}; k'ai -→ {ka}{wai}; (ai) -→ ({wai})
+a\u0304→$waa;
+ai→$wai;
+au→$wau;
+i\u0304→$wii;
+i→$wi;
+u\u0304→$wuu;
+u→$wu;
+r\u0325\u0304→$wrr;
+r\u0325→$wr;
+l\u0325\u0304→$wll;
+lh→$wl;
+l\u0325→$wl;
+e\u0304→$we;
+o\u0304→$wo;
+a→$wa;
+e\u0306→$wce;
+o\u0306→$wco;
+e→$wse;
+''om→$om;
+o→$wso;
+# rules for anusvara
+n}r\u0325           → $na|$virama;
+n}l\u0325           → $na|$virama;
+n}na                → $na|$virama;
+n\u0307}[kg]        → $anusvara;
+n\u0307}n\u0307     → $anusvara;
+n\u0304}[cj]        → $anusvara;
+n\u0304}n\u0303     → $anusvara;
+n\u0323}[tdn]\u0323 → $anusvara;
+n}[tdn]             → $anusvara;
+m}[pbm]             → $anusvara;
+n}[ylvshr]          → $anusvara;
+m\u0307             → $anusvara;
+#urdu compatibility
+q→$uka|$virama;
+k\u0331h\u0331→$ukha |$virama;
+g\u0307→ $ugha | $virama;
+z → $ujha |$virama;
+f → $ufa|$virama;
+t\u0331→$kta;
+# dev
+y\u0307→$uya|$virama;
+l\u0331→$ela|$virama;
+n\u0331→$ena|$virama;
+n\u0307→$nga|$virama;
+n\u0303→$nya|$virama;
+n\u0323→$nna|$virama;
+t\u0323h→$ttha|$virama;
+t\u0323→$tta|$virama;
+r\u0323h→$udha|$virama;
+r\u0323→$uddha|$virama;
+d\u0323h→$ddha|$virama;
+d\u0323→$dda|$virama;
+kh→$kha|$virama;
+k→$ka|$virama;
+gh→$gha|$virama;
+g→$ga|$virama;
+ch→$cha|$virama;
+c→$ca|$virama;
+jh→$jha|$virama;
+j→$ja|$virama;
+ny→$nya|$virama;
+tth→$ttha|$virama;
+ddh→$ddha|$virama;
+th→$tha|$virama;
+t→$ta|$virama;
+dh→$dha|$virama;
+d→$da|$virama;
+n→$na|$virama;
+ph→$pha|$virama;
+p→$pa|$virama;
+bh→$bha|$virama;
+b→$ba|$virama;
+m→$ma|$virama;
+y→$ya|$virama;
+r\u0331→$rra|$virama;
+r→$ra|$virama;
+l\u0323→$lla|$virama;
+l→$la|$virama;
+v→$va|$virama;
+w\u0307→$vva|$virama;
+w→$va|$virama;
+sh→$sha|$virama;
+ss→$ssa|$virama;
+s\u0323→$ssa|$virama;
+s\u0301→$sha|$virama;
+s→$sa|$virama;
+h→$ha|$virama;
+'.'→$danda;
+$danda'.'→$doubleDanda;
+$depVowelAbove{'~'→$anusvara;
+$depVowelBelow{'~'→$chandrabindu;
+# convert to dependent forms after consonant with no vowel:
+# e.g. kai -→ {ka}{virama}ai -→ {ka}{ai}
+#$virama aa→$aa;
+$virama a\u0304→$aa;
+$virama ai→$ai;
+$virama au→$au;
+$virama ii→$ii;
+$virama i\u0304→$ii;
+$virama i→$i;
+#$virama uu→$uu;
+$virama u\u0304→$uu;
+$virama u→$u;
+#$virama rrh→$rrh;
+$virama r\u0325\u0304→$rrh;
+#$virama rh→$rh;
+$virama r\u0325a→$rh;
+$virama r\u0325→$rh;
+$virama l\u0325\u0304→$llh;
+$virama lh→$lh;
+$virama l\u0325→$lh;
+$virama e\u0304→$e;
+$virama o\u0304→$o;
+$virama a→;
+$virama e\u0306→$ce;
+$virama o\u0306→$co;
+$virama e→$se;
+$virama o→$so;
+# otherwise convert independent forms when separated by ': k'ai -→ {ka}{virama}{wai}
+#$virama''aa→$waa;
+$virama''a\u0304→$waa;
+$virama''ai→$wai;
+$virama''au→$wau;
+#$virama''ii→$wii;
+$virama''i\u0304→$wii;
+$virama''i→$wi;
+#$virama''uu→$wuu;
+$virama''u\u0304→$wuu;
+$virama''u→$wu;
+#$virama''rrh→$wrr;
+$virama''r\u0325\u0304→$wrr;
+#$virama''rh→$wr;
+$virama''r\u0325→$wr;
+$virama''l\u0325\u0304→$wll;
+#$virama''lh→$wl;
+$virama''l\u0325→$wl;
+$virama''e\u0304→$we;
+$virama''o\u0304→$wo;
+$virama''a→$wa;
+$virama''e\u0306→$wce;
+$virama''o\u0306→$wco;
+$virama''e→$wse;
+$virama''o→$wso;
+# no virama
+''a\u0304→$waa;
+''ai→$wai;
+''au→$wau;
+''i\u0304→$wii;
+''i→$wi;
+''u\u0304→$wuu;
+''u→$wu;
+''r\u0325\u0304→$wrr;
+''r\u0325→$wr;
+''l\u0325\u0304→$wll;
+''l\u0325→$wl;
+''e\u0304→$we;
+''o\u0304→$wo;
+''a→$wa;
+''e\u0306→$wce;
+''o\u0306→$wco;
+''e→$wse;
+''o→$wso;
+$virama } [$z] → $virama;
+$virama } ' ' → $virama ;
+$virama}$endThing→;
+ʔ→$dgs; # Glottal Stop
+0→$zero;
+1→$one;
+2→$two;
+3→$three;
+4→$four;
+5→$five;
+6→$six;
+7→$seven;
+8→$eight;
+9→$nine;
+''→;
+#:: NFC (NFD) ;
+