]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | # © 2016 and later: Unicode, Inc. and others. |
2 | # License & terms of use: http://www.unicode.org/copyright.html#License | |
3 | # | |
2ca993e8 | 4 | # File: Arab_Latn.txt |
f3c0d7a5 | 5 | # Generated from CLDR |
73c04bcf | 6 | # |
2ca993e8 A |
7 | |
8 | # Generally follows UNGEGN | |
9 | # http://www.eki.ee/wgrs/rom1_ar.pdf | |
10 | # Occasionally deviates in the direction of ISO 233 | |
11 | # http://homepage.mac.com/sirbinks/pdf/Arabic.pdf | |
12 | # a) where required for disambiguation. | |
13 | # b) with underdot instead of cedilla for letter like SAD, | |
14 | # since those are explicitly in Unicode for transliteration. | |
15 | # c) with extra non-Arabic-language letters, like PEH | |
16 | # | |
17 | # Does *not* do assimilation of "al", nor hyphenation. | |
18 | # While it could be done, we need to determine whether a prefix "al" could | |
19 | # occur other than as the definite article (since no space is used). | |
729e4ab9 | 20 | :: [[:Arabic:][:block=ARABIC:][ⁿ،؛؟ـ\u064B-\u0655٠-٬۰-۹﷼ښ]] ; |
374ca955 | 21 | :: NFKD (NFC); |
51004dcb A |
22 | $disambig = \u0331 ; |
23 | $disambig2 = \u0330 ; | |
24 | $under = \u0323 ; | |
73c04bcf | 25 | $descender = ˌ; |
2ca993e8 A |
26 | $notAbove = [[:^ccc=0:] & [:^ccc=230:]]; |
27 | # non-letters | |
729e4ab9 A |
28 | [:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR |
29 | [:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR | |
30 | ٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR | |
31 | ٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR | |
2ca993e8 | 32 | # ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate |
729e4ab9 A |
33 | ، ↔ ',' ; # ARABIC COMMA |
34 | ؛ ↔ ';' ; # ARABIC SEMICOLON | |
35 | ؟ ↔ '?' ; # ARABIC QUESTION MARK | |
36 | ٪ ↔ '%' ; # ARABIC PERCENT SIGN | |
37 | ۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO | |
38 | ۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE | |
39 | ۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO | |
40 | ۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE | |
41 | ۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR | |
42 | ۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE | |
43 | ۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX | |
44 | ۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN | |
45 | ۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT | |
46 | ۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE | |
47 | ٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO | |
48 | ١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE | |
49 | ٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO | |
50 | ٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE | |
51 | ٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR | |
52 | ٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE | |
53 | ٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX | |
54 | ٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN | |
55 | ٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT | |
56 | ٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE | |
2ca993e8 A |
57 | # letters |
58 | # long vowels | |
729e4ab9 A |
59 | \u064Eا↔ a\u0304 ; # ARABIC FATHA, ARABIC LETTER ALEF |
60 | \u064Fو ↔ u\u0304 ; # ARABIC DAMMA, ARABIC LETTER WAW | |
61 | \u0650ي ↔ i\u0304 ; # ARABIC KASRA, ARABIC LETTER YEH | |
2ca993e8 | 62 | # longer items moved here to prevent masking |
729e4ab9 A |
63 | ث ↔ t h $disambig ; # ARABIC LETTER THEH |
64 | ذ ↔ d h $disambig ; # ARABIC LETTER THAL | |
65 | ش ↔ s h $disambig ; # ARABIC LETTER SHEEN | |
66 | ص ↔ s $under ; # ARABIC LETTER SAD | |
67 | ض ↔ d $under ; # ARABIC LETTER DAD | |
68 | ط ↔ t $under ; # ARABIC LETTER TAH | |
69 | ظ ↔ z $under ; # ARABIC LETTER ZAH | |
70 | غ ↔ g h $disambig ; # ARABIC LETTER GHAIN | |
2ca993e8 A |
71 | # WARNING: special case |
72 | # ←t, umlaut, half-ring below→ will be canonically ordered as ←t, half-ring below, umlaut→ | |
73 | # so on the return, we have to skip over (but preserve) the half-ring below (or others like it) | |
74 | # ة\u0655 ← t\u0339\u0308 ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS | |
729e4ab9 A |
75 | ة ↔ t \u0308 ; # ARABIC LETTER TEH MARBUTA |
76 | ة | $1 ← t ($notAbove+) \u0308 ; # ARABIC LETTER TEH MARBUTA | |
2ca993e8 | 77 | # non-Arabic language |
729e4ab9 A |
78 | ژ ↔ z h $disambig ; # ARABIC LETTER JEH |
79 | ڭ ↔ n $disambig g ; # ARABIC LETTER NG | |
80 | ۋ ↔ v $disambig ; # ARABIC LETTER VE | |
81 | ی ↔ y $disambig2 ; # ARABIC LETTER FARSI YEH | |
82 | ښ ↔ s $descender; | |
2ca993e8 | 83 | # Arabic language |
729e4ab9 A |
84 | ء ↔ ʾ ; # ARABIC LETTER HAMZA |
85 | ا ↔ a $under; # ARABIC LETTER ALEF | |
86 | ب ↔ b ; # ARABIC LETTER BEH | |
87 | ت ↔ t ; # ARABIC LETTER TEH | |
88 | ج ↔ j ; # ARABIC LETTER JEEM | |
89 | ح ↔ h $under ; # ARABIC LETTER HAH | |
90 | خ ↔ k h $disambig ; # ARABIC LETTER KHAH | |
91 | د ↔ d ; # ARABIC LETTER DAL | |
92 | ر ↔ r ; # ARABIC LETTER REH | |
93 | ز ↔ z ; # ARABIC LETTER ZAIN | |
94 | س ↔ s ; # ARABIC LETTER SEEN | |
95 | ع ↔ ʿ ; # ARABIC LETTER AIN | |
96 | ـ → ; # ARABIC TATWEEL | |
97 | ف ↔ f ; # ARABIC LETTER FEH | |
98 | ق ↔ q ; # ARABIC LETTER QAF | |
99 | ک ↔ k $disambig ; # ARABIC LETTER KEHEH | |
100 | ك ↔ k ; # ARABIC LETTER KAF | |
101 | ل ↔ l ; # ARABIC LETTER LAM | |
102 | م ↔ m ; # ARABIC LETTER MEEM | |
103 | ن ↔ n ; # ARABIC LETTER NOON | |
104 | ه ↔ h ; # ARABIC LETTER HEH | |
105 | و ↔ w ; # ARABIC LETTER WAW | |
106 | ى ↔ y $disambig ; # ARABIC LETTER ALEF MAKSURA | |
107 | ي ↔ y ; # ARABIC LETTER YEH | |
108 | \u064B ↔ aⁿ ; # ARABIC FATHATAN | |
109 | \u064C ↔ uⁿ ; # ARABIC DAMMATAN | |
110 | \u064D ↔ iⁿ ; # ARABIC KASRATAN | |
111 | \u064E ↔ a ; # ARABIC FATHA | |
112 | \u064F ↔ u ; # ARABIC DAMMA | |
113 | \u0650 ↔ i ; # ARABIC KASRA | |
51004dcb A |
114 | \u0651 ↔ \u0303 ; # ARABIC SHADDA |
115 | \u0652 ↔ \u030A ; # ARABIC SUKUN | |
2ca993e8 | 116 | # special combining marks |
51004dcb A |
117 | \u0653 ↔ \u0302 ; # ARABIC MADDAH ABOVE |
118 | \u0654 ↔ \u0309 ; # ARABIC HAMZA ABOVE | |
119 | \u0655 ↔ \u0339 ; # ARABIC HAMZA BELOW | |
2ca993e8 | 120 | # Some non-Arabic language (not in UNGEGN) |
729e4ab9 A |
121 | پ ↔ p ; # ARABIC LETTER PEH |
122 | چ ↔ c h $disambig ; # ARABIC LETTER TCHEH | |
123 | ڤ ↔ v ; # ARABIC LETTER VEH | |
2ca993e8 A |
124 | # ڥ ↔ v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW |
125 | # ڢ ↔ f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW | |
729e4ab9 | 126 | گ ↔ g ; # ARABIC LETTER GAF |
2ca993e8 | 127 | # fallbacks |
729e4ab9 A |
128 | | s ← c } [eiy]; |
129 | | k ← c ; | |
130 | | i ← e ; | |
131 | | u ← o ; | |
132 | | ks ← x ; | |
133 | | n ← ⁿ; | |
374ca955 A |
134 | :: (lower) ; |
135 | ::NFC (NFD); | |
73c04bcf | 136 | :: ( [[:Latin:] [%,.0-9;?ʾ-ʿ\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339;ˌ]] ); |
2ca993e8 | 137 |