]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/Arab_Latn.txt
ICU-62107.0.1.tar.gz
[apple/icu.git] / icuSources / data / translit / Arab_Latn.txt
1 # © 2016 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html#License
3 #
4 # File: Arab_Latn.txt
5 # Generated from CLDR
6 #
7
8 # Generally follows UNGEGN
9 # http://www.eki.ee/wgrs/rom1_ar.pdf
10 # Occasionally deviates in the direction of ISO 233
11 # http://homepage.mac.com/sirbinks/pdf/Arabic.pdf
12 # a) where required for disambiguation.
13 # b) with underdot instead of cedilla for letter like SAD,
14 # since those are explicitly in Unicode for transliteration.
15 # c) with extra non-Arabic-language letters, like PEH
16 #
17 # Does *not* do assimilation of "al", nor hyphenation.
18 # While it could be done, we need to determine whether a prefix "al" could
19 # occur other than as the definite article (since no space is used).
20 :: [[:Arabic:][:block=ARABIC:][‎ⁿ،؛؟ـ\u064B-\u0655٠-٬۰-۹﷼ښ]] ;
21 :: NFKD (NFC);
22 $disambig = \u0331 ;
23 $disambig2 = \u0330 ;
24 $under = \u0323 ;
25 $descender = ˌ;
26 $notAbove = [[:^ccc=0:] & [:^ccc=230:]];
27 # non-letters
28 [:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR
29 [:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR
30 ٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR
31 ٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR
32 # ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate
33 ، ↔ ',' ; # ARABIC COMMA
34 ؛ ↔ ';' ; # ARABIC SEMICOLON
35 ؟ ↔ '?' ; # ARABIC QUESTION MARK
36 ٪ ↔ '%' ; # ARABIC PERCENT SIGN
37 ۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO
38 ۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE
39 ۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO
40 ۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE
41 ۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR
42 ۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE
43 ۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX
44 ۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN
45 ۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT
46 ۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE
47 ٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO
48 ١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE
49 ٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO
50 ٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE
51 ٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR
52 ٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE
53 ٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX
54 ٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN
55 ٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT
56 ٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE
57 # letters
58 # long vowels
59 \u064Eا↔ a\u0304 ; # ARABIC FATHA, ARABIC LETTER ALEF
60 \u064Fو ↔ u\u0304 ; # ARABIC DAMMA, ARABIC LETTER WAW
61 \u0650ي ↔ i\u0304 ; # ARABIC KASRA, ARABIC LETTER YEH
62 # longer items moved here to prevent masking
63 ث ↔ t h $disambig ; # ARABIC LETTER THEH
64 ذ ↔ d h $disambig ; # ARABIC LETTER THAL
65 ش ↔ s h $disambig ; # ARABIC LETTER SHEEN
66 ص ↔ s $under ; # ARABIC LETTER SAD
67 ض ↔ d $under ; # ARABIC LETTER DAD
68 ط ↔ t $under ; # ARABIC LETTER TAH
69 ظ ↔ z $under ; # ARABIC LETTER ZAH
70 غ ↔ g h $disambig ; # ARABIC LETTER GHAIN
71 # WARNING: special case
72 # ←t, umlaut, half-ring below→ will be canonically ordered as ←t, half-ring below, umlaut→
73 # so on the return, we have to skip over (but preserve) the half-ring below (or others like it)
74 # ة\u0655 ← t\u0339\u0308 ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS
75 ة ↔ t \u0308 ; # ARABIC LETTER TEH MARBUTA
76 ة | $1 ← t ($notAbove+) \u0308 ; # ARABIC LETTER TEH MARBUTA
77 # non-Arabic language
78 ژ ↔ z h $disambig ; # ARABIC LETTER JEH
79 ڭ ↔ n $disambig g ; # ARABIC LETTER NG
80 ۋ ↔ v $disambig ; # ARABIC LETTER VE
81 ی ↔ y $disambig2 ; # ARABIC LETTER FARSI YEH
82 ښ ↔ s $descender;
83 # Arabic language
84 ء ↔ ʾ ; # ARABIC LETTER HAMZA
85 ا ↔ a $under; # ARABIC LETTER ALEF
86 ب ↔ b ; # ARABIC LETTER BEH
87 ت ↔ t ; # ARABIC LETTER TEH
88 ج ↔ j ; # ARABIC LETTER JEEM
89 ح ↔ h $under ; # ARABIC LETTER HAH
90 خ ↔ k h $disambig ; # ARABIC LETTER KHAH
91 د ↔ d ; # ARABIC LETTER DAL
92 ر ↔ r ; # ARABIC LETTER REH
93 ز ↔ z ; # ARABIC LETTER ZAIN
94 س ↔ s ; # ARABIC LETTER SEEN
95 ع ↔ ʿ ; # ARABIC LETTER AIN
96 ـ → ; # ARABIC TATWEEL
97 ف ↔ f ; # ARABIC LETTER FEH
98 ق ↔ q ; # ARABIC LETTER QAF
99 ک ↔ k $disambig ; # ARABIC LETTER KEHEH
100 ك ↔ k ; # ARABIC LETTER KAF
101 ل ↔ l ; # ARABIC LETTER LAM
102 م ↔ m ; # ARABIC LETTER MEEM
103 ن ↔ n ; # ARABIC LETTER NOON
104 ه ↔ h ; # ARABIC LETTER HEH
105 و ↔ w ; # ARABIC LETTER WAW
106 ى ↔ y $disambig ; # ARABIC LETTER ALEF MAKSURA
107 ي ↔ y ; # ARABIC LETTER YEH
108 \u064B ↔ aⁿ ; # ARABIC FATHATAN
109 \u064C ↔ uⁿ ; # ARABIC DAMMATAN
110 \u064D ↔ iⁿ ; # ARABIC KASRATAN
111 \u064E ↔ a ; # ARABIC FATHA
112 \u064F ↔ u ; # ARABIC DAMMA
113 \u0650 ↔ i ; # ARABIC KASRA
114 \u0651 ↔ \u0303 ; # ARABIC SHADDA
115 \u0652 ↔ \u030A ; # ARABIC SUKUN
116 # special combining marks
117 \u0653 ↔ \u0302 ; # ARABIC MADDAH ABOVE
118 \u0654 ↔ \u0309 ; # ARABIC HAMZA ABOVE
119 \u0655 ↔ \u0339 ; # ARABIC HAMZA BELOW
120 # Some non-Arabic language (not in UNGEGN)
121 پ ↔ p ; # ARABIC LETTER PEH
122 چ ↔ c h $disambig ; # ARABIC LETTER TCHEH
123 ڤ ↔ v ; # ARABIC LETTER VEH
124 # ڥ ↔ v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW
125 # ڢ ↔ f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW
126 گ ↔ g ; # ARABIC LETTER GAF
127 # fallbacks
128 | s ← c } [eiy];
129 | k ← c ;
130 | i ← e ;
131 | u ← o ;
132 | ks ← x ;
133 | n ← ‎ⁿ;
134 :: (lower) ;
135 ::NFC (NFD);
136 :: ( [[:Latin:] [%,.0-9;?ʾ-ʿ\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339;ˌ]] );
137