]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/fa_fa_Latn_BGN.txt
ICU-66108.tar.gz
[apple/icu.git] / icuSources / data / translit / fa_fa_Latn_BGN.txt
1 # © 2016 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html#License
3 #
4 # File: fa_fa_Latn_BGN.txt
5 # Generated from CLDR
6 #
7
8 #
9 ########################################################################
10 # BGN/PCGN 1956 System
11 #
12 # This system was adopted by the BGN in 1946 and by the PCGN in 1958.
13 # It is used for the romanization of geographic names in Iran and
14 # for Persian-language names in Afghanistan.
15 #
16 # Originally prepared by Michael Everson <everson@evertype.com>
17 ########################################################################
18 #
19 # MINIMAL FILTER: Persian-Latin
20 #
21 :: [[:arabic:][:block=ARABIC:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهویي\u064E\u064F\u0650\u0651\u0652٠١٢٣٤٥٦٧٨٩پچژگی]] ;
22 :: NFKD (NFC) ;
23 #
24 #
25 ########################################################################
26 #
27 ########################################################################
28 #
29 # Define All Transformation Variables
30 #
31 ########################################################################
32 #
33 $alef = ’;
34 $ayin = ‘;
35 $disambig = \u0331 ;
36 #
37 #
38 # Use this $wordBoundary until bug 2034 is fixed in ICU:
39 # http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest
40 #
41 $wordBoundary = [^[:L:][:M:][:N:]] ;
42 #
43 #
44 ########################################################################
45 # non-letters
46 [:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR
47 [:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR
48 ٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR
49 ٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR
50 # ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate
51 ، ↔ ',' ; # ARABIC COMMA
52 ؛ ↔ ';' ; # ARABIC SEMICOLON
53 ؟ ↔ '?' ; # ARABIC QUESTION MARK
54 ٪ ↔ '%' ; # ARABIC PERCENT SIGN
55 ٠ ↔ 0 $disambig ; # ARABIC-INDIC DIGIT ZERO
56 ١ ↔ 1 $disambig ; # ARABIC-INDIC DIGIT ONE
57 ٢ ↔ 2 $disambig ; # ARABIC-INDIC DIGIT TWO
58 ٣ ↔ 3 $disambig ; # ARABIC-INDIC DIGIT THREE
59 ٤ ↔ 4 $disambig ; # ARABIC-INDIC DIGIT FOUR
60 ٥ ↔ 5 $disambig ; # ARABIC-INDIC DIGIT FIVE
61 ٦ ↔ 6 $disambig ; # ARABIC-INDIC DIGIT SIX
62 ٧ ↔ 7 $disambig ; # ARABIC-INDIC DIGIT SEVEN
63 ٨ ↔ 8 $disambig ; # ARABIC-INDIC DIGIT EIGHT
64 ٩ ↔ 9 $disambig ; # ARABIC-INDIC DIGIT NINE
65 ۰ ↔ 0 ; # EXTENDED ARABIC-INDIC DIGIT ZERO
66 ۱ ↔ 1 ; # EXTENDED ARABIC-INDIC DIGIT ONE
67 ۲ ↔ 2 ; # EXTENDED ARABIC-INDIC DIGIT TWO
68 ۳ ↔ 3 ; # EXTENDED ARABIC-INDIC DIGIT THREE
69 ۴ ↔ 4 ; # EXTENDED ARABIC-INDIC DIGIT FOUR
70 ۵ ↔ 5 ; # EXTENDED ARABIC-INDIC DIGIT FIVE
71 ۶ ↔ 6 ; # EXTENDED ARABIC-INDIC DIGIT SIX
72 ۷ ↔ 7 ; # EXTENDED ARABIC-INDIC DIGIT SEVEN
73 ۸ ↔ 8 ; # EXTENDED ARABIC-INDIC DIGIT EIGHT
74 ۹ ↔ 9 ; # EXTENDED ARABIC-INDIC DIGIT NINE
75 #
76 ########################################################################
77 #
78 # Rules moved to front to avoid masking
79 #
80 ########################################################################
81 #
82 ########################################################################
83 #
84 # BGN Page 89 Rule 4
85 #
86 # The character sequences كه , زه , سه , and گه may be romanized k·h, z·h,
87 # s·h, and g·h in order to differentiate those romanizations from the
88 # digraphs kh, zh, sh, and gh.
89 #
90 ########################################################################
91 #
92 كه → k·h ; # ARABIC LETTER KAF + HEH
93 زه → z·h ; # ARABIC LETTER ZAIN + HEH
94 سه → s·h ; # ARABIC LETTER SEEN + HEH
95 گه → g·h ; # ARABIC LETTER GAF + HEH
96 #
97 #
98 ########################################################################
99 #
100 # End Rule 4
101 #
102 ########################################################################
103 #
104 ########################################################################
105 #
106 # BGN Page 91 Rule 7
107 #
108 # Doubles consonant sounds are represented in Arabic script by
109 # placing a shaddah ( \u0651 ) over a consonant character. In romanization
110 # the letter should be doubled. [The remainder of this rule deals with
111 # the definite article and is lexical.]
112 #
113 ########################################################################
114 #
115 ب\u0651 → bb ; # ARABIC LETTER BEH + SHADDA
116 پ\u0651 → pp ; # ARABIC LETTER PEH + SHADDA
117 ت\u0651 → tt ; # ARABIC LETTER TEH + SHADDA
118 ث\u0651 → s\u0304s\u0304 ; # ARABIC LETTER THEH + SHADDA
119 ج\u0651 → jj ; # ARABIC LETTER JEEM + SHADDA
120 چ\u0651 → chch ; # ARABIC LETTER TCHEH + SHADDA
121 ح\u0651 → ḥḥ ; # ARABIC LETTER HAH + SHADDA
122 خ\u0651 → khkh ; # ARABIC LETTER KHAH + SHADDA
123 د\u0651 → dd ; # ARABIC LETTER DAL + SHADDA
124 ذ\u0651 → z\u0304z\u0304 ; # ARABIC LETTER THAL + SHADDA
125 ر\u0651 → rr ; # ARABIC LETTER REH + SHADDA
126 ز\u0651 → zz ; # ARABIC LETTER ZAIN + SHADDA
127 ژ\u0651 → zhzh ; # ARABIC LETTER JEH + SHADDA
128 س\u0651 → ss ; # ARABIC LETTER SEEN + SHADDA
129 ش\u0651 → shsh ; # ARABIC LETTER SHEEN + SHADDA
130 ص\u0651 → ṣṣ ; # ARABIC LETTER SAD + SHADDA
131 ض\u0651 → ḍḍ ; # ARABIC LETTER DAD + SHADDA
132 ط\u0651 → ṭṭ ; # ARABIC LETTER TAH + SHADDA
133 ظ\u0651 → ẓẓ ; # ARABIC LETTER ZAH + SHADDA
134 ع\u0651 → $ayin $ayin ; # ARABIC LETTER AIN + SHADDA
135 غ\u0651 → ghgh ; # ARABIC LETTER GHAIN + SHADDA
136 ف\u0651 → ff ; # ARABIC LETTER FEH + SHADDA
137 ق\u0651 → qq ; # ARABIC LETTER QAF + SHADDA
138 ك\u0651 → kk ; # ARABIC LETTER KAF + SHADDA
139 ل\u0651 → ll ; # ARABIC LETTER LAM + SHADDA
140 م\u0651 → mm ; # ARABIC LETTER MEEM + SHADDA
141 ن\u0651 → nn ; # ARABIC LETTER NOON + SHADDA
142 ه\u0651 → hh ; # ARABIC LETTER HEH + SHADDA
143 و\u0651 → ww ; # ARABIC LETTER WAW + SHADDA
144 ی\u0651 → yy ; # ARABIC LETTER FARSI YEH + SHADDA
145 #
146 #
147 ########################################################################
148 #
149 # End Rule 7
150 #
151 ########################################################################
152 #
153 ########################################################################
154 #
155 # Start of Transformations
156 #
157 ########################################################################
158 #
159 $wordBoundary{ء → ; # ARABIC LETTER HAMZA
160 ء → $alef ; # ARABIC LETTER HAMZA
161 $wordBoundary{ا → ; # ARABIC LETTER ALEF
162 آ → $alef ā ; # ARABIC FATHA ALEF WITH MADDA ABOVE
163 ب → b ; # ARABIC LETTER BEH
164 پ → p ; # ARABIC LETTER PEH
165 ت → t ; # ARABIC LETTER TEH
166 ة → h ; # ARABIC LETTER TEH MARBUTA
167 ث → s\u0304 ; # ARABIC LETTER THEH
168 ج → j ; # ARABIC LETTER JEEM
169 چ → ch ; # ARABIC LETTER TCHEH
170 ح → ḥ ; # ARABIC LETTER HAH
171 خ → kh ; # ARABIC LETTER KHAH
172 د → d ; # ARABIC LETTER DAL
173 ذ → z\u0304 ; # ARABIC LETTER THAL
174 ر → r ; # ARABIC LETTER REH
175 ز → z ; # ARABIC LETTER ZAIN
176 ژ → zh ; # ARABIC LETTER JEH
177 س → s ; # ARABIC LETTER SEEN
178 ش → sh ; # ARABIC LETTER SHEEN
179 ص → ṣ ; # ARABIC LETTER SAD
180 ض → ẕ ; # ARABIC LETTER DAD
181 ط → ṭ ; # ARABIC LETTER TAH
182 ظ → ẓ ; # ARABIC LETTER ZAH
183 ع → $ayin ; # ARABIC LETTER AIN
184 غ → gh ; # ARABIC LETTER GHAIN
185 ف → f ; # ARABIC LETTER FEH
186 ق → q ; # ARABIC LETTER QAF
187 ک ↔ k ; # ARABIC LETTER KEHEH
188 ك ↔ k $disambig ; # ARABIC LETTER KAF
189 گ → g ; # ARABIC LETTER GAF
190 ل → l ; # ARABIC LETTER LAM
191 م → m ; # ARABIC LETTER MEEM
192 ن → n ; # ARABIC LETTER NOON
193 ه → h ; # ARABIC LETTER HEH
194 و → v ; # ARABIC LETTER WAW
195 ی → y ; # ARABIC LETTER FARSI YEH
196 \u064Eا → ā ; # ARABIC FATHA + ALEF
197 \u064Eی → á ; # ARABIC FATHA + FARSI YEH
198 \u064Eو\u0652 → ow ; # ARABIC FATHA + WAW + SUKUN
199 \u064E → a ; # ARABIC FATHA
200 \u0650ي → ī ; # ARABIC KASRA + YEH
201 \u0650 → e ; # ARABIC KASRA
202 \u064Fو → ū ; # ARABIC DAMMA + WAW
203 \u064F → o ; # ARABIC DAMMA
204 \u0652 → ; # ARABIC SUKUN
205 ::NFC (NFD) ;
206 #
207 #
208 ########################################################################
209