]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | # © 2016 and later: Unicode, Inc. and others. |
2 | # License & terms of use: http://www.unicode.org/copyright.html#License | |
3 | # | |
2ca993e8 | 4 | # File: ar_ar_Latn_BGN.txt |
f3c0d7a5 | 5 | # Generated from CLDR |
729e4ab9 | 6 | # |
2ca993e8 A |
7 | |
8 | # | |
9 | ######################################################################## | |
10 | # BGN/PCGN 1956 System | |
11 | # | |
12 | # This system was adopted by the BGN in 1946 and by the PCGN | |
13 | # in 1956 and has been applied in the systematic romanization | |
14 | # of geographic names in Bahrain, Egypt, Iraq, Jordan, | |
15 | # Kuwait, Lebanon, Libya, Oman, Qatar, Saudi Arabia, Sudan, | |
16 | # Syria, Tunisia, the United Arab Emirates, and Yemen, all | |
17 | # of which has been covered by published BGN engineers. | |
18 | # | |
19 | # Originally prepared by Michael Everson <everson@evertype.com> | |
20 | ######################################################################## | |
21 | # | |
22 | # MINIMAL FILTER: Arabic-Latin | |
23 | # | |
729e4ab9 A |
24 | :: [[:arabic:][:block=ARABIC:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهوىي\u064B\u064C\u064D\u064E\u064F\u0650\u0651\u0652٠١٢٣٤٥٦٧٨٩ٱ]] ; |
25 | :: NFKD (NFC) ; | |
2ca993e8 A |
26 | # |
27 | # | |
28 | ######################################################################## | |
29 | # | |
30 | ######################################################################## | |
31 | # | |
32 | # Define All Transformation Variables | |
33 | # | |
34 | ######################################################################## | |
35 | # | |
729e4ab9 A |
36 | $alef = ’; |
37 | $ayin = ‘; | |
51004dcb | 38 | $disambig = \u0331 ; |
2ca993e8 A |
39 | # |
40 | # | |
41 | # Use this $wordBoundary until bug 2034 is fixed in ICU: | |
42 | # http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest | |
43 | # | |
51004dcb | 44 | $wordBoundary = [^[:L:][:M:][:N:]] ; |
2ca993e8 A |
45 | # |
46 | # | |
47 | ######################################################################## | |
48 | # non-letters | |
729e4ab9 A |
49 | [:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR |
50 | [:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR | |
51 | ٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR | |
52 | ٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR | |
2ca993e8 | 53 | # ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate |
729e4ab9 A |
54 | ، ↔ ',' ; # ARABIC COMMA |
55 | ؛ ↔ ';' ; # ARABIC SEMICOLON | |
56 | ؟ ↔ '?' ; # ARABIC QUESTION MARK | |
57 | ٪ ↔ '%' ; # ARABIC PERCENT SIGN | |
58 | ۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO | |
59 | ۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE | |
60 | ۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO | |
61 | ۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE | |
62 | ۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR | |
63 | ۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE | |
64 | ۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX | |
65 | ۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN | |
66 | ۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT | |
67 | ۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE | |
68 | ٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO | |
69 | ١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE | |
70 | ٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO | |
71 | ٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE | |
72 | ٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR | |
73 | ٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE | |
74 | ٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX | |
75 | ٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN | |
76 | ٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT | |
77 | ٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE | |
2ca993e8 A |
78 | # |
79 | ######################################################################## | |
80 | # | |
81 | # Rules moved to front to avoid masking | |
82 | # | |
83 | ######################################################################## | |
84 | # | |
85 | ######################################################################## | |
86 | # | |
87 | # BGN Page 8 Rule 5 | |
88 | # | |
89 | # The character sequences ت , كه , ته , and سه may be romanized t·h, k·h, | |
90 | # d·h, and s·h in order to differentiate those romanizations from the | |
91 | # digraphs th, kh, dh, and sh. | |
92 | # | |
93 | ######################################################################## | |
94 | # | |
729e4ab9 A |
95 | ته → t·h ; # ARABIC LETTER TEH + HEH |
96 | كه → k·h ; # ARABIC LETTER KAF + HEH | |
97 | ده → d·h ; # ARABIC LETTER DAL + HEH | |
98 | سه → s·h ; # ARABIC LETTER SEEN + HEH | |
2ca993e8 A |
99 | # |
100 | # | |
101 | ######################################################################## | |
102 | # | |
103 | # End Rule 5 | |
104 | # | |
105 | ######################################################################## | |
106 | ######################################################################## | |
107 | # | |
108 | # | |
109 | # BGN Page 8 Rule 9 | |
110 | # | |
111 | # Doubles consonant sounds are represented in Arabic script by placing | |
112 | # a shaddah ( \u0651 ) over a consonant character. In romanization the letter | |
113 | # should be doubled. [The remainder of this rule deals with the definite | |
114 | # article and is lexical.] | |
115 | # | |
116 | ######################################################################## | |
117 | # | |
729e4ab9 A |
118 | ب\u0651 → bb ; # ARABIC LETTER BEH + SHADDA |
119 | ت\u0651 → tt ; # ARABIC LETTER TEH + SHADDA | |
120 | ث\u0651 → thth ; # ARABIC LETTER THEH + SHADDA | |
121 | ج\u0651 → jj ; # ARABIC LETTER JEEM + SHADDA | |
122 | ح\u0651 → ḥḥ ; # ARABIC LETTER HAH + SHADDA | |
123 | خ\u0651 → khkh ; # ARABIC LETTER KHAH + SHADDA | |
124 | د\u0651 → dd ; # ARABIC LETTER DAL + SHADDA | |
125 | ذ\u0651 → dhdh ; # ARABIC LETTER THAL + SHADDA | |
126 | ر\u0651 → rr ; # ARABIC LETTER REH + SHADDA | |
127 | ز\u0651 → zz ; # ARABIC LETTER ZAIN + SHADDA | |
128 | س\u0651 → ss ; # ARABIC LETTER SEEN + SHADDA | |
129 | ش\u0651 → shsh ; # ARABIC LETTER SHEEN + SHADDA | |
130 | ص\u0651 → ṣṣ ; # ARABIC LETTER SAD + SHADDA | |
131 | ض\u0651 → ḍḍ ; # ARABIC LETTER DAD + SHADDA | |
132 | ط\u0651 → ṭṭ ; # ARABIC LETTER TAH + SHADDA | |
133 | ظ\u0651 → ẓẓ ; # ARABIC LETTER ZAH + SHADDA | |
134 | ع\u0651 → $ayin $ayin ; # ARABIC LETTER AIN + SHADDA | |
135 | غ\u0651 → ghgh ; # ARABIC LETTER GHAIN + SHADDA | |
136 | ف\u0651 → ff ; # ARABIC LETTER FEH + SHADDA | |
137 | ق\u0651 → qq ; # ARABIC LETTER QAF + SHADDA | |
138 | ك\u0651 → kk ; # ARABIC LETTER KAF + SHADDA | |
139 | ل\u0651 → ll ; # ARABIC LETTER LAM + SHADDA | |
140 | م\u0651 → mm ; # ARABIC LETTER MEEM + SHADDA | |
141 | ن\u0651 → nn ; # ARABIC LETTER NOON + SHADDA | |
142 | ه\u0651 → hh ; # ARABIC LETTER HEH + SHADDA | |
143 | و\u0651 → ww ; # ARABIC LETTER WAW + SHADDA | |
144 | ى\u0651 → yy ; # ARABIC LETTER YEH + SHADDA | |
2ca993e8 A |
145 | # |
146 | # | |
147 | ######################################################################## | |
148 | # | |
149 | # End Rule 9 | |
150 | # | |
151 | ######################################################################## | |
152 | # | |
153 | ######################################################################## | |
154 | # | |
155 | # Start of Transformations | |
156 | # | |
157 | ######################################################################## | |
158 | # | |
51004dcb | 159 | $wordBoundary{ء → ; # ARABIC LETTER HAMZA |
729e4ab9 | 160 | ء → $alef ; # ARABIC LETTER HAMZA |
51004dcb | 161 | $wordBoundary{ا → ; # ARABIC LETTER ALEF |
729e4ab9 A |
162 | ٱ → $alef ; # ARABIC LETTER ALEF WASLA |
163 | $wordBoundary{آ → ā ; # ARABIC LETTER ALEF WITH MADDA ABOVE | |
164 | آ → $alef ā ; # ARABIC LETTER ALEF WITH MADDA ABOVE | |
165 | ب → b ; # ARABIC LETTER BEH | |
166 | ت → t ; # ARABIC LETTER TEH | |
167 | ة → h ; # ARABIC LETTER TEH MARBUTA | |
168 | ث → th ; # ARABIC LETTER THEH | |
169 | ج → j ; # ARABIC LETTER JEEM | |
170 | ح → ḩ ; # ARABIC LETTER HAH | |
171 | خ → kh ; # ARABIC LETTER KHAH | |
172 | د → d ; # ARABIC LETTER DAL | |
173 | ذ → dh ; # ARABIC LETTER THAL | |
174 | ر → r ; # ARABIC LETTER REH | |
175 | ز → z ; # ARABIC LETTER ZAIN | |
176 | س → s ; # ARABIC LETTER SEEN | |
177 | ش → sh ; # ARABIC LETTER SHEEN | |
178 | ص → ş ; # ARABIC LETTER SAD | |
179 | ض → ḑ ; # ARABIC LETTER DAD | |
180 | ط → ţ ; # ARABIC LETTER TAH | |
181 | ظ → z\u0327 ; # ARABIC LETTER ZAH | |
182 | ع → $ayin ; # ARABIC LETTER AIN | |
183 | غ → gh ; # ARABIC LETTER GHAIN | |
184 | ف → f ; # ARABIC LETTER FEH | |
185 | ق → q ; # ARABIC LETTER QAF | |
186 | ک ↔ k $disambig ; # ARABIC LETTER KEHEH | |
187 | ك ↔ k ; # ARABIC LETTER KAF | |
188 | ل → l ; # ARABIC LETTER LAM | |
189 | م → m ; # ARABIC LETTER MEEM | |
190 | ن → n ; # ARABIC LETTER NOON | |
191 | ه → h ; # ARABIC LETTER HEH | |
192 | و → w ; # ARABIC LETTER WAW | |
193 | ى → y ; # ARABIC LETTER YEH | |
194 | \u064Eا → ā ; # ARABIC FATHA + ALEF | |
195 | \u064Eى → á ; # ARABIC FATHA + ALEF MAKSURA | |
196 | \u064Eي\u0652 → ay ; # ARABIC FATHA + YEH + SUKUN | |
197 | \u064Eو\u0652 → aw ; # ARABIC FATHA + WAW + SUKUN | |
198 | \u064E → a ; # ARABIC FATHA | |
199 | \u0650ي → ī ; # ARABIC KASRA + YEH | |
200 | \u0650 → i ; # ARABIC KASRA | |
201 | \u064Fو → ū ; # ARABIC DAMMA + WAW | |
202 | \u064F → u ; # ARABIC DAMMA | |
51004dcb | 203 | \u0652 → ; # ARABIC SUKUN |
729e4ab9 A |
204 | \u064B → aⁿ ; # ARABIC FATHATAN |
205 | \u064D → iⁿ ; # ARABIC KASRATAN | |
206 | \u064C → uⁿ ; # ARABIC DAMMATAN | |
207 | ::NFC (NFD) ; | |
2ca993e8 A |
208 | # |
209 | # | |
210 | ######################################################################## | |
211 |