1 # ***************************************************************************
3 # * Copyright (C) 2004-2016, International Business Machines
4 # * Corporation; Unicode, Inc.; and others. All Rights Reserved.
6 # ***************************************************************************
7 # File: ru_ru_Latn_BGN.txt
11 # BGN/PCGN 1947 System
13 # The BGN/PCGN system for Russian was adopted by the BGN in 1944 and
14 # by the PCGN in 1947 for use in romanizing names written in the
15 # Russian Cyrillic alphabet.
17 # The Russian Alphabet as defined by the BGN (Page 93):
18 # АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
19 # абвгдеёжзийклмнопрстуфхцчшщъыьэюя
21 # Originally prepared by Michael Everson everson@evertype.com
22 # Fixed by Frank Yung-Fong Tang ftang@google.com
24 # Test Data from http://en.wikipedia.org/wiki/BGN/PCGN_romanization_of_Russian
25 ########################################################################
26 # MINIMAL FILTER: Russian-Latin
27 ::[АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя];
28 # BUG(ftang) remove the following line. Otherwise the rule for
29 # Й й Ё ё will break since the rule is written in NFC but
30 # the line decomposes the text.
32 ########################################################################
33 # Define All Transformation Variables
34 ########################################################################
37 $wordBoundary = [^[:L:][:M:][:N:]] ;
38 $upperConsonants = [БВГДЖЙКЛМНПРСТФХЦЧШЩЭ] ;
39 $lowerConsonants = [бвгджйклмнпрстфхцчшщэ] ;
40 $consonants = [$upperConsonants $lowerConsonants] ;
41 $upperVowels = [АЕЁЭИОУЫЮЯ] ;
42 $lowerVowels = [аеёэиоуыюя] ;
43 $vowels = [$upperVowels $lowerVowels] ;
44 $lower = [$lowerConsonants $lowerVowels] ;
45 $upper = [$upperConsonants $upperVowels] ;
46 ########################################################################
47 # Rules moved to front to avoid masking
48 ########################################################################
49 $lowerVowels { ы → ·y ;
50 $upperVowels { [Ыы] → ·Y ;
51 [$consonants - [Йй]]{Э → ·E ;
52 [$consonants - [Йй]]{э → ·e ;
53 [$upperVowels [ЙЪЬ]] { Е } $upper → YE ; # CYRILLIC CAPITAL LETTER IE
54 [$upperVowels [ЙЪЬ]] { Е → Ye ; # CYRILLIC CAPITAL LETTER IE
55 [$upperVowels $lowerVowels [ЙйЪъЬь]] { е → ye ; # CYRILLIC SMALL LETTER IE
56 [$upperVowels [ЙЪЬ]] { Ё } $upper → YË ; # CYRILLIC CAPITAL LETTER IO
57 [$upperVowels [ЙЪЬ]] { Ё → Yë ; # CYRILLIC CAPITAL LETTER IO
58 [$upperVowels $lowerVowels [ЙйЪъЬь]] { ё → yë ; # CYRILLIC SMALL LETTER IO
59 # Since in the above rule we look use the context before the characters,
60 # we have to perform them in a separate pass before we change the vowels
61 # the ::Null forces a separate pass.
63 ########################################################################
64 # Start of Alphabetic Transformations
65 ########################################################################
66 А → A ; # CYRILLIC CAPITAL LETTER A
67 а → a ; # CYRILLIC SMALL LETTER A
68 Б → B ; # CYRILLIC CAPITAL LETTER BE
69 б → b ; # CYRILLIC SMALL LETTER BE
70 В → V ; # CYRILLIC CAPITAL LETTER VE
71 в → v ; # CYRILLIC SMALL LETTER VE
72 Г → G ; # CYRILLIC CAPITAL LETTER GHE
73 г → g ; # CYRILLIC SMALL LETTER GHE
74 Д → D ; # CYRILLIC CAPITAL LETTER DE
75 д → d ; # CYRILLIC SMALL LETTER DE
76 ########################################################################
78 # # The character e should be romanized ye
79 # initially, after the vowel # characters a, e, ё, и, о, у, ы, э, ю,
80 # and я, and after й, ъ, and ь.
81 # In all other instances, it should
83 ########################################################################
84 # BUG(ftang)- the following two lines said BEFORE the vowels, instead of AFTER
85 # Е}[$upperVowels [ЙЪЬ]] → YE ; # CYRILLIC CAPITAL LETTER IE
86 # Е}[$lowerVowels [йъь]] → Ye ; # CYRILLIC CAPITAL LETTER IE
87 $wordBoundary{Е} $upper → YE ; # CYRILLIC CAPITAL LETTER IE
88 $wordBoundary{Е → Ye ; # CYRILLIC CAPITAL LETTER IE
89 Е → E ; # CYRILLIC CAPITAL LETTER IE
91 # BUG(ftang)- the following line said BEFORE the vowels, instead of AFTER
92 # е}[$upperVowels $lowerVowels [ЙйЪъЬь]] → ye ; # CYRILLIC SMALL LETTER IE
93 $wordBoundary{е → ye ; # CYRILLIC SMALL LETTER IE
94 е → e ; # CYRILLIC SMALL LETTER IE
95 ########################################################################
97 ########################################################################
98 ########################################################################
101 # The character ё is not considered a separate character of the
102 # Russian alphabet and the dieresis is generally not shown. When the
103 # dieresis is shown, the character should be romanized yë initially,
104 # after the vowel characters a, e, ё, и, о, у, ы, э, ю, and я, and
105 # after й, ъ, and ь, In all other instances, it should be romanized
106 # ё. When the dieresis is not shown, the character may still be
107 # romanized in the preceding manner or, alternatively, in accordance
109 ########################################################################
110 # BUG(ftang)- the following two lines said BEFORE the vowels, instead of AFTER
111 # Ё}[$upperVowels [ЙЪЬ]] → YË ; # CYRILLIC CAPITAL LETTER IO
112 # Ё}[$lowerVowels [йъь]] → Yë ; # CYRILLIC CAPITAL LETTER IO
113 $wordBoundary {Ё} $upper → YË ; # CYRILLIC CAPITAL LETTER IO
114 $wordBoundary {Ё} $lower → Yë ; # CYRILLIC CAPITAL LETTER IO
115 Ё → Ë ; # CYRILLIC CAPITAL LETTER IO
116 # BUG(ftang)- the following line said BEFORE the vowels, instead of AFTER
117 # ё}[$upperVowels $lowerVowels [ЙйЪъЬь]] → yë ; # CYRILLIC SMALL LETTER IO
118 $wordBoundary{ё → yë ; # CYRILLIC SMALL LETTER IO
119 ё → ë ; # CYRILLIC SMALL LETTER IO
120 ########################################################################
122 ########################################################################
123 Ж} $lower → Zh ; # CYRILLIC CAPITAL LETTER ZHE
124 Ж → ZH ; # CYRILLIC CAPITAL LETTER ZHE
125 ж → zh ; # CYRILLIC SMALL LETTER ZHE
126 ########################################################################
127 # BGN Page 94 Rule 3.4
128 # э after any consonant character except
130 ########################################################################
131 З → Z ; # CYRILLIC CAPITAL LETTER ZE
132 з → z ; # CYRILLIC SMALL LETTER ZE
133 # BUG(ftang) The following two lines said those consonant becomes ·е
134 # [$consonants - [Йй]]}Э → ·Е ;
135 # [$consonants - [Йй]]}э → ·е ;
136 ########################################################################
138 ########################################################################
139 И → I ; # CYRILLIC CAPITAL LETTER I
140 и → i ; # CYRILLIC SMALL LETTER I
141 ########################################################################
142 # BGN Page 94 Rule 3:
144 # Unusual Russian character sequences occurring primarily in
145 # non-Russian-language names may be romanized as shown below in order
146 # to provide differentiation from regularly-occurring digraphs and
147 # character sequences.
149 # BGN Page 94 Rule 3.1
150 # й before а, у, ы, or э becomes у·
151 ########################################################################
152 Й}[АаУуЫыЭэ] → Y· ; # CYRILLIC CAPITAL LETTER I
153 й}[АаУуЫыЭэ] → y· ; # CYRILLIC SMALL LETTER I
154 Й → Y ; # CYRILLIC CAPITAL LETTER I
155 й → y ; # CYRILLIC SMALL LETTER I
156 ########################################################################
158 ########################################################################
159 К → K ; # CYRILLIC CAPITAL LETTER KA
160 к → k ; # CYRILLIC SMALL LETTER KA
161 Л → L ; # CYRILLIC CAPITAL LETTER EL
162 л → l ; # CYRILLIC SMALL LETTER EL
163 М → M ; # CYRILLIC CAPITAL LETTER EM
164 м → m ; # CYRILLIC SMALL LETTER EM
165 Н → N ; # CYRILLIC CAPITAL LETTER EN
166 н → n ; # CYRILLIC SMALL LETTER EN
167 О → O ; # CYRILLIC CAPITAL LETTER O
168 о → o ; # CYRILLIC SMALL LETTER O
169 П → P ; # CYRILLIC CAPITAL LETTER PE
170 п → p ; # CYRILLIC SMALL LETTER PE
171 Р → R ; # CYRILLIC CAPITAL LETTER ER
172 р → r ; # CYRILLIC SMALL LETTER ER
173 С → S ; # CYRILLIC CAPITAL LETTER ES
174 с → s ; # CYRILLIC SMALL LETTER ES
175 ########################################################################
176 # BGN Page 94 Rule 3.5
178 ########################################################################
179 ТС → T·S ; # CYRILLIC CAPITAL LETTER TE
180 Тс → T·s ; # CYRILLIC CAPITAL LETTER TE
181 тс → t·s ; # CYRILLIC SMALL LETTER TE
182 Т → T ; # CYRILLIC CAPITAL LETTER TE
183 т → t ; # CYRILLIC SMALL LETTER TE
184 ########################################################################
186 ########################################################################
187 У → U ; # CYRILLIC CAPITAL LETTER U
188 у → u ; # CYRILLIC SMALL LETTER U
189 Ф → F ; # CYRILLIC CAPITAL LETTER EF
190 ф → f ; # CYRILLIC SMALL LETTER EF
191 Х} $lower → Kh ; # CYRILLIC CAPITAL LETTER HA
192 Х → KH ; # CYRILLIC CAPITAL LETTER HA
193 х → kh ; # CYRILLIC SMALL LETTER HA
194 Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE
195 Ц → TS ; # CYRILLIC CAPITAL LETTER TSE
196 ц → ts ; # CYRILLIC SMALL LETTER TSE
197 Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE
198 Ч → CH ; # CYRILLIC CAPITAL LETTER CHE
199 ч → ch ; # CYRILLIC SMALL LETTER CHE
200 ########################################################################
201 # BGN Page 94 Rule 3.6
203 ########################################################################
204 ШЧ → SH·CH ; # CYRILLIC CAPITAL LETTER SHA
205 Шч → Sh·ch ; # CYRILLIC CAPITAL LETTER SHA
206 шч → sh·ch ; # CYRILLIC SMALL LETTER SHA
207 Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA
208 Ш → SH ; # CYRILLIC CAPITAL LETTER SHA
209 ш → sh ; # CYRILLIC SMALL LETTER SHA
210 Щ} $lower → Shch ; # CYRILLIC CAPITAL LETTER SHCHA
211 Щ → SHCH ; # CYRILLIC CAPITAL LETTER SHCHA
212 щ → shch ; # CYRILLIC SMALL LETTER SHCHA
213 ########################################################################
215 ########################################################################
216 Ъ → $doublePrime ; # CYRILLIC CAPITAL LETTER HARD SIGN
217 ъ → $doublePrime ; # CYRILLIC SMALL LETTER HARD SIGN
218 ########################################################################
219 # BGN Page 94 Rule 3.2
220 # ы before а, у, ы, or э becomes у·
222 # BGN Page 94 Rule 3.3
223 # ы after any vowel character becomes ·у
224 ########################################################################
226 # BUG(ftang) the following line said the vowels will change
227 # $vowels}Ы → ·Y ; # CYRILLIC CAPITAL LETTER I
228 # $vowels}ы → ·y ; # CYRILLIC CAPITAL LETTER I
229 Ы}[АаУуЫыЭэ] → Y· ; # CYRILLIC CAPITAL LETTER YERU
230 ы}[ауыэ] → y· ; # CYRILLIC SMALL LETTER YERU
231 Ы → Y ; # CYRILLIC CAPITAL LETTER YERU
232 ы → y ; # CYRILLIC SMALL LETTER YERU
233 ########################################################################
234 # End Rule 3.2 and 3.3
235 ########################################################################
236 Ь → $prime ; # CYRILLIC CAPITAL LETTER SOFT SIGN
237 ь → $prime ; # CYRILLIC SMALL LETTER SOFT SIGN
238 Э → E ; # CYRILLIC CAPITAL LETTER E
239 э → e ; # CYRILLIC SMALL LETTER E
240 Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU
241 Ю → YU ; # CYRILLIC CAPITAL LETTER YU
242 ю → yu ; # CYRILLIC SMALL LETTER YU
243 Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA
244 Я → YA ; # CYRILLIC CAPITAL LETTER YA
245 я → ya ; # CYRILLIC SMALL LETTER YA