]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/ru_ru_Latn_BGN.txt
ICU-62141.0.1.tar.gz
[apple/icu.git] / icuSources / data / translit / ru_ru_Latn_BGN.txt
1 # © 2016 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html#License
3 #
4 # File: ru_ru_Latn_BGN.txt
5 # Generated from CLDR
6 #
7
8 # BGN/PCGN 1947 System
9 #
10 # The BGN/PCGN system for Russian was adopted by the BGN in 1944 and
11 # by the PCGN in 1947 for use in romanizing names written in the
12 # Russian Cyrillic alphabet.
13 #
14 # The Russian Alphabet as defined by the BGN (Page 93):
15 # АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
16 # абвгдеёжзийклмнопрстуфхцчшщъыьэюя
17 #
18 # Originally prepared by Michael Everson everson@evertype.com
19 # Fixed by Frank Yung-Fong Tang ftang@google.com
20 #
21 # Test Data from http://en.wikipedia.org/wiki/BGN/PCGN_romanization_of_Russian
22 ########################################################################
23 # MINIMAL FILTER: Russian-Latin
24 ::[АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя];
25 ::NFC;
26 ########################################################################
27 # Define All Transformation Variables
28 ########################################################################
29 $prime = ʹ ;
30 $doublePrime = ʺ ;
31 $wordBoundary = [^[:L:][:M:][:N:]] ;
32 $upperVowels = [АЕЁЭИОУЫЮЯ] ;
33 $lowerVowels = [аеёэиоуыюя] ;
34 $vowels = [$upperVowels $lowerVowels] ;
35 $upperConsonants = [[:Uppercase:]-$vowels] ;
36 $lowerConsonants = [[:Lowercase:]-$vowels] ;
37 $consonants = [$upperConsonants $lowerConsonants] ;
38 $upper = [:Uppercase:];
39 $lower = [:Lowercase:];
40 ########################################################################
41 # Rules moved to front to avoid masking
42 ########################################################################
43 $lowerVowels { ы → ·y ;
44 $upperVowels { [Ыы] } $lower → ·y ;
45 $upperVowels { [Ыы] } → ·Y ;
46 [$consonants - [Йй]]{Э → ·E ;
47 [$consonants - [Йй]]{э → ·e ;
48 [$upperVowels [ЙЪЬ]] { Е } $upper → YE ; # CYRILLIC CAPITAL LETTER IE
49 [$upperVowels [ЙЪЬ]] { Е → Ye ; # CYRILLIC CAPITAL LETTER IE
50 [$upperVowels $lowerVowels [ЙйЪъЬь]] { е → ye ; # CYRILLIC SMALL LETTER IE
51 [$upperVowels [ЙЪЬ]] { Ё } $upper → YË ; # CYRILLIC CAPITAL LETTER IO
52 [$upperVowels [ЙЪЬ]] { Ё → Yë ; # CYRILLIC CAPITAL LETTER IO
53 [$upperVowels $lowerVowels [ЙйЪъЬь]] { ё → yë ; # CYRILLIC SMALL LETTER IO
54 # Since in the above rule we look at the Cyrillic context before the E/Ё/ё,
55 # we have to transform these in a separate pass before we change the vowels.
56 # The ::Null forces a separate pass.
57 ::Null;
58 ########################################################################
59 # Start of Alphabetic Transformations
60 ########################################################################
61 А → A ; # CYRILLIC CAPITAL LETTER A
62 а → a ; # CYRILLIC SMALL LETTER A
63 Б → B ; # CYRILLIC CAPITAL LETTER BE
64 б → b ; # CYRILLIC SMALL LETTER BE
65 В → V ; # CYRILLIC CAPITAL LETTER VE
66 в → v ; # CYRILLIC SMALL LETTER VE
67 Г → G ; # CYRILLIC CAPITAL LETTER GHE
68 г → g ; # CYRILLIC SMALL LETTER GHE
69 Д → D ; # CYRILLIC CAPITAL LETTER DE
70 д → d ; # CYRILLIC SMALL LETTER DE
71 ########################################################################
72 # BGN Page 94 Rule 1:
73 # # The character e should be romanized ye
74 # initially, after the vowel # characters a, e, ё, и, о, у, ы, э, ю,
75 # and я, and after й, ъ, and ь.
76 # In all other instances, it should
77 # be romanized e.
78 ########################################################################
79 # BUG(ftang)- the following two lines said BEFORE the vowels, instead of AFTER
80 # Е}[$upperVowels [ЙЪЬ]] → YE ; # CYRILLIC CAPITAL LETTER IE
81 # Е}[$lowerVowels [йъь]] → Ye ; # CYRILLIC CAPITAL LETTER IE
82 $wordBoundary{Е} $upper → YE ; # CYRILLIC CAPITAL LETTER IE
83 $wordBoundary{Е → Ye ; # CYRILLIC CAPITAL LETTER IE
84 Е → E ; # CYRILLIC CAPITAL LETTER IE
85 #
86 # BUG(ftang)- the following line said BEFORE the vowels, instead of AFTER
87 # е}[$upperVowels $lowerVowels [ЙйЪъЬь]] → ye ; # CYRILLIC SMALL LETTER IE
88 $wordBoundary{е → ye ; # CYRILLIC SMALL LETTER IE
89 е → e ; # CYRILLIC SMALL LETTER IE
90 ########################################################################
91 # End of Rule 1
92 ########################################################################
93 ########################################################################
94 # BGN Page 94 Rule 2:
95 #
96 # The character ё is not considered a separate character of the
97 # Russian alphabet and the dieresis is generally not shown. When the
98 # dieresis is shown, the character should be romanized yë initially,
99 # after the vowel characters a, e, ё, и, о, у, ы, э, ю, and я, and
100 # after й, ъ, and ь, In all other instances, it should be romanized
101 # ё. When the dieresis is not shown, the character may still be
102 # romanized in the preceding manner or, alternatively, in accordance
103 # with note 1.
104 ########################################################################
105 # BUG(ftang)- the following two lines said BEFORE the vowels, instead of AFTER
106 # Ё}[$upperVowels [ЙЪЬ]] → YË ; # CYRILLIC CAPITAL LETTER IO
107 # Ё}[$lowerVowels [йъь]] → Yë ; # CYRILLIC CAPITAL LETTER IO
108 $wordBoundary {Ё} [·]? $upper → YË ; # CYRILLIC CAPITAL LETTER IO
109 $wordBoundary {Ё} [·]? $lower → Yë ; # CYRILLIC CAPITAL LETTER IO
110 Ё → Ë ; # CYRILLIC CAPITAL LETTER IO
111 # BUG(ftang)- the following line said BEFORE the vowels, instead of AFTER
112 # ё}[$upperVowels $lowerVowels [ЙйЪъЬь]] → yë ; # CYRILLIC SMALL LETTER IO
113 $wordBoundary{ё → yë ; # CYRILLIC SMALL LETTER IO
114 ё → ë ; # CYRILLIC SMALL LETTER IO
115 ########################################################################
116 # End of Rule 2
117 ########################################################################
118 Ж} $lower → Zh ; # CYRILLIC CAPITAL LETTER ZHE
119 Ж → ZH ; # CYRILLIC CAPITAL LETTER ZHE
120 ж → zh ; # CYRILLIC SMALL LETTER ZHE
121 ########################################################################
122 # BGN Page 94 Rule 3.4
123 # э after any consonant character except
124 # й becomes ·е
125 ########################################################################
126 З → Z ; # CYRILLIC CAPITAL LETTER ZE
127 з → z ; # CYRILLIC SMALL LETTER ZE
128 # BUG(ftang) The following two lines said those consonant becomes ·е
129 # [$consonants - [Йй]]}Э → ·Е ;
130 # [$consonants - [Йй]]}э → ·е ;
131 ########################################################################
132 # End of Rule 3.4
133 ########################################################################
134 И → I ; # CYRILLIC CAPITAL LETTER I
135 и → i ; # CYRILLIC SMALL LETTER I
136 ########################################################################
137 # BGN Page 94 Rule 3:
138 #
139 # Unusual Russian character sequences occurring primarily in
140 # non-Russian-language names may be romanized as shown below in order
141 # to provide differentiation from regularly-occurring digraphs and
142 # character sequences.
143 #
144 # BGN Page 94 Rule 3.1
145 # й before а, у, ы, or э becomes у·
146 ########################################################################
147 Й}[АаУуЫыЭэ] → Y· ; # CYRILLIC CAPITAL LETTER I
148 й}[АаУуЫыЭэ] → y· ; # CYRILLIC SMALL LETTER I
149 Й → Y ; # CYRILLIC CAPITAL LETTER I
150 й → y ; # CYRILLIC SMALL LETTER I
151 ########################################################################
152 # End Rule 3.1
153 ########################################################################
154 К → K ; # CYRILLIC CAPITAL LETTER KA
155 к → k ; # CYRILLIC SMALL LETTER KA
156 Л → L ; # CYRILLIC CAPITAL LETTER EL
157 л → l ; # CYRILLIC SMALL LETTER EL
158 М → M ; # CYRILLIC CAPITAL LETTER EM
159 м → m ; # CYRILLIC SMALL LETTER EM
160 Н → N ; # CYRILLIC CAPITAL LETTER EN
161 н → n ; # CYRILLIC SMALL LETTER EN
162 О → O ; # CYRILLIC CAPITAL LETTER O
163 о → o ; # CYRILLIC SMALL LETTER O
164 П → P ; # CYRILLIC CAPITAL LETTER PE
165 п → p ; # CYRILLIC SMALL LETTER PE
166 Р → R ; # CYRILLIC CAPITAL LETTER ER
167 р → r ; # CYRILLIC SMALL LETTER ER
168 С → S ; # CYRILLIC CAPITAL LETTER ES
169 с → s ; # CYRILLIC SMALL LETTER ES
170 ########################################################################
171 # BGN Page 94 Rule 3.5
172 # тс becomes t·s
173 ########################################################################
174 ТС → T·S ; # CYRILLIC CAPITAL LETTER TE
175 Тс → T·s ; # CYRILLIC CAPITAL LETTER TE
176 тс → t·s ; # CYRILLIC SMALL LETTER TE
177 Т → T ; # CYRILLIC CAPITAL LETTER TE
178 т → t ; # CYRILLIC SMALL LETTER TE
179 ########################################################################
180 # End Rule 3.5
181 ########################################################################
182 У → U ; # CYRILLIC CAPITAL LETTER U
183 у → u ; # CYRILLIC SMALL LETTER U
184 Ф → F ; # CYRILLIC CAPITAL LETTER EF
185 ф → f ; # CYRILLIC SMALL LETTER EF
186 Х} $lower → Kh ; # CYRILLIC CAPITAL LETTER HA
187 Х → KH ; # CYRILLIC CAPITAL LETTER HA
188 х → kh ; # CYRILLIC SMALL LETTER HA
189 Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE
190 Ц → TS ; # CYRILLIC CAPITAL LETTER TSE
191 ц → ts ; # CYRILLIC SMALL LETTER TSE
192 Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE
193 Ч → CH ; # CYRILLIC CAPITAL LETTER CHE
194 ч → ch ; # CYRILLIC SMALL LETTER CHE
195 ########################################################################
196 # BGN Page 94 Rule 3.6
197 # шч becomes sh·ch
198 ########################################################################
199 ШЧ → SH·CH ; # CYRILLIC CAPITAL LETTER SHA
200 Шч → Sh·ch ; # CYRILLIC CAPITAL LETTER SHA
201 шч → sh·ch ; # CYRILLIC SMALL LETTER SHA
202 Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA
203 Ш → SH ; # CYRILLIC CAPITAL LETTER SHA
204 ш → sh ; # CYRILLIC SMALL LETTER SHA
205 Щ} $lower → Shch ; # CYRILLIC CAPITAL LETTER SHCHA
206 Щ → SHCH ; # CYRILLIC CAPITAL LETTER SHCHA
207 щ → shch ; # CYRILLIC SMALL LETTER SHCHA
208 ########################################################################
209 # End Rule 3.6
210 ########################################################################
211 Ъ → $doublePrime ; # CYRILLIC CAPITAL LETTER HARD SIGN
212 ъ → $doublePrime ; # CYRILLIC SMALL LETTER HARD SIGN
213 ########################################################################
214 # BGN Page 94 Rule 3.2
215 # ы before а, у, ы, or э becomes у·
216 #
217 # BGN Page 94 Rule 3.3
218 # ы after any vowel character becomes ·у
219 ########################################################################
220 #
221 # BUG(ftang) the following line said the vowels will change
222 # $vowels}Ы → ·Y ; # CYRILLIC CAPITAL LETTER I
223 # $vowels}ы → ·y ; # CYRILLIC CAPITAL LETTER I
224 Ы}[АаУуЫыЭэ] → Y· ; # CYRILLIC CAPITAL LETTER YERU
225 ы}[ауыэ] → y· ; # CYRILLIC SMALL LETTER YERU
226 Ы → Y ; # CYRILLIC CAPITAL LETTER YERU
227 ы → y ; # CYRILLIC SMALL LETTER YERU
228 ########################################################################
229 # End Rule 3.2 and 3.3
230 ########################################################################
231 Ь → $prime ; # CYRILLIC CAPITAL LETTER SOFT SIGN
232 ь → $prime ; # CYRILLIC SMALL LETTER SOFT SIGN
233 Э → E ; # CYRILLIC CAPITAL LETTER E
234 э → e ; # CYRILLIC SMALL LETTER E
235 Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU
236 Ю → YU ; # CYRILLIC CAPITAL LETTER YU
237 ю → yu ; # CYRILLIC SMALL LETTER YU
238 Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA
239 Я → YA ; # CYRILLIC CAPITAL LETTER YA
240 я → ya ; # CYRILLIC SMALL LETTER YA
241