]>
Commit | Line | Data |
---|---|---|
73c04bcf A |
1 | # *************************************************************************** |
2 | # * | |
2ca993e8 | 3 | # * Copyright (C) 2004-2016, International Business Machines |
73c04bcf A |
4 | # * Corporation; Unicode, Inc.; and others. All Rights Reserved. |
5 | # * | |
6 | # *************************************************************************** | |
2ca993e8 | 7 | # File: Arab_Latn.txt |
46f4442e | 8 | # Generated from CLDR |
73c04bcf | 9 | # |
2ca993e8 A |
10 | |
11 | # Generally follows UNGEGN | |
12 | # http://www.eki.ee/wgrs/rom1_ar.pdf | |
13 | # Occasionally deviates in the direction of ISO 233 | |
14 | # http://homepage.mac.com/sirbinks/pdf/Arabic.pdf | |
15 | # a) where required for disambiguation. | |
16 | # b) with underdot instead of cedilla for letter like SAD, | |
17 | # since those are explicitly in Unicode for transliteration. | |
18 | # c) with extra non-Arabic-language letters, like PEH | |
19 | # | |
20 | # Does *not* do assimilation of "al", nor hyphenation. | |
21 | # While it could be done, we need to determine whether a prefix "al" could | |
22 | # occur other than as the definite article (since no space is used). | |
729e4ab9 | 23 | :: [[:Arabic:][:block=ARABIC:][ⁿ،؛؟ـ\u064B-\u0655٠-٬۰-۹﷼ښ]] ; |
374ca955 | 24 | :: NFKD (NFC); |
51004dcb A |
25 | $disambig = \u0331 ; |
26 | $disambig2 = \u0330 ; | |
27 | $under = \u0323 ; | |
73c04bcf | 28 | $descender = ˌ; |
2ca993e8 A |
29 | $notAbove = [[:^ccc=0:] & [:^ccc=230:]]; |
30 | # non-letters | |
729e4ab9 A |
31 | [:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR |
32 | [:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR | |
33 | ٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR | |
34 | ٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR | |
2ca993e8 | 35 | # ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate |
729e4ab9 A |
36 | ، ↔ ',' ; # ARABIC COMMA |
37 | ؛ ↔ ';' ; # ARABIC SEMICOLON | |
38 | ؟ ↔ '?' ; # ARABIC QUESTION MARK | |
39 | ٪ ↔ '%' ; # ARABIC PERCENT SIGN | |
40 | ۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO | |
41 | ۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE | |
42 | ۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO | |
43 | ۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE | |
44 | ۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR | |
45 | ۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE | |
46 | ۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX | |
47 | ۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN | |
48 | ۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT | |
49 | ۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE | |
50 | ٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO | |
51 | ١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE | |
52 | ٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO | |
53 | ٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE | |
54 | ٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR | |
55 | ٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE | |
56 | ٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX | |
57 | ٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN | |
58 | ٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT | |
59 | ٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE | |
2ca993e8 A |
60 | # letters |
61 | # long vowels | |
729e4ab9 A |
62 | \u064Eا↔ a\u0304 ; # ARABIC FATHA, ARABIC LETTER ALEF |
63 | \u064Fو ↔ u\u0304 ; # ARABIC DAMMA, ARABIC LETTER WAW | |
64 | \u0650ي ↔ i\u0304 ; # ARABIC KASRA, ARABIC LETTER YEH | |
2ca993e8 | 65 | # longer items moved here to prevent masking |
729e4ab9 A |
66 | ث ↔ t h $disambig ; # ARABIC LETTER THEH |
67 | ذ ↔ d h $disambig ; # ARABIC LETTER THAL | |
68 | ش ↔ s h $disambig ; # ARABIC LETTER SHEEN | |
69 | ص ↔ s $under ; # ARABIC LETTER SAD | |
70 | ض ↔ d $under ; # ARABIC LETTER DAD | |
71 | ط ↔ t $under ; # ARABIC LETTER TAH | |
72 | ظ ↔ z $under ; # ARABIC LETTER ZAH | |
73 | غ ↔ g h $disambig ; # ARABIC LETTER GHAIN | |
2ca993e8 A |
74 | # WARNING: special case |
75 | # ←t, umlaut, half-ring below→ will be canonically ordered as ←t, half-ring below, umlaut→ | |
76 | # so on the return, we have to skip over (but preserve) the half-ring below (or others like it) | |
77 | # ة\u0655 ← t\u0339\u0308 ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS | |
729e4ab9 A |
78 | ة ↔ t \u0308 ; # ARABIC LETTER TEH MARBUTA |
79 | ة | $1 ← t ($notAbove+) \u0308 ; # ARABIC LETTER TEH MARBUTA | |
2ca993e8 | 80 | # non-Arabic language |
729e4ab9 A |
81 | ژ ↔ z h $disambig ; # ARABIC LETTER JEH |
82 | ڭ ↔ n $disambig g ; # ARABIC LETTER NG | |
83 | ۋ ↔ v $disambig ; # ARABIC LETTER VE | |
84 | ی ↔ y $disambig2 ; # ARABIC LETTER FARSI YEH | |
85 | ښ ↔ s $descender; | |
2ca993e8 | 86 | # Arabic language |
729e4ab9 A |
87 | ء ↔ ʾ ; # ARABIC LETTER HAMZA |
88 | ا ↔ a $under; # ARABIC LETTER ALEF | |
89 | ب ↔ b ; # ARABIC LETTER BEH | |
90 | ت ↔ t ; # ARABIC LETTER TEH | |
91 | ج ↔ j ; # ARABIC LETTER JEEM | |
92 | ح ↔ h $under ; # ARABIC LETTER HAH | |
93 | خ ↔ k h $disambig ; # ARABIC LETTER KHAH | |
94 | د ↔ d ; # ARABIC LETTER DAL | |
95 | ر ↔ r ; # ARABIC LETTER REH | |
96 | ز ↔ z ; # ARABIC LETTER ZAIN | |
97 | س ↔ s ; # ARABIC LETTER SEEN | |
98 | ع ↔ ʿ ; # ARABIC LETTER AIN | |
99 | ـ → ; # ARABIC TATWEEL | |
100 | ف ↔ f ; # ARABIC LETTER FEH | |
101 | ق ↔ q ; # ARABIC LETTER QAF | |
102 | ک ↔ k $disambig ; # ARABIC LETTER KEHEH | |
103 | ك ↔ k ; # ARABIC LETTER KAF | |
104 | ل ↔ l ; # ARABIC LETTER LAM | |
105 | م ↔ m ; # ARABIC LETTER MEEM | |
106 | ن ↔ n ; # ARABIC LETTER NOON | |
107 | ه ↔ h ; # ARABIC LETTER HEH | |
108 | و ↔ w ; # ARABIC LETTER WAW | |
109 | ى ↔ y $disambig ; # ARABIC LETTER ALEF MAKSURA | |
110 | ي ↔ y ; # ARABIC LETTER YEH | |
111 | \u064B ↔ aⁿ ; # ARABIC FATHATAN | |
112 | \u064C ↔ uⁿ ; # ARABIC DAMMATAN | |
113 | \u064D ↔ iⁿ ; # ARABIC KASRATAN | |
114 | \u064E ↔ a ; # ARABIC FATHA | |
115 | \u064F ↔ u ; # ARABIC DAMMA | |
116 | \u0650 ↔ i ; # ARABIC KASRA | |
51004dcb A |
117 | \u0651 ↔ \u0303 ; # ARABIC SHADDA |
118 | \u0652 ↔ \u030A ; # ARABIC SUKUN | |
2ca993e8 | 119 | # special combining marks |
51004dcb A |
120 | \u0653 ↔ \u0302 ; # ARABIC MADDAH ABOVE |
121 | \u0654 ↔ \u0309 ; # ARABIC HAMZA ABOVE | |
122 | \u0655 ↔ \u0339 ; # ARABIC HAMZA BELOW | |
2ca993e8 | 123 | # Some non-Arabic language (not in UNGEGN) |
729e4ab9 A |
124 | پ ↔ p ; # ARABIC LETTER PEH |
125 | چ ↔ c h $disambig ; # ARABIC LETTER TCHEH | |
126 | ڤ ↔ v ; # ARABIC LETTER VEH | |
2ca993e8 A |
127 | # ڥ ↔ v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW |
128 | # ڢ ↔ f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW | |
729e4ab9 | 129 | گ ↔ g ; # ARABIC LETTER GAF |
2ca993e8 | 130 | # fallbacks |
729e4ab9 A |
131 | | s ← c } [eiy]; |
132 | | k ← c ; | |
133 | | i ← e ; | |
134 | | u ← o ; | |
135 | | ks ← x ; | |
136 | | n ← ⁿ; | |
374ca955 A |
137 | :: (lower) ; |
138 | ::NFC (NFD); | |
73c04bcf | 139 | :: ( [[:Latin:] [%,.0-9;?ʾ-ʿ\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339;ˌ]] ); |
2ca993e8 | 140 |