]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/ru_ru_Latn_BGN.txt
ICU-59117.0.1.tar.gz
[apple/icu.git] / icuSources / data / translit / ru_ru_Latn_BGN.txt
1 # © 2016 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html#License
3 #
4 # File: ru_ru_Latn_BGN.txt
5 # Generated from CLDR
6 #
7
8 # BGN/PCGN 1947 System
9 #
10 # The BGN/PCGN system for Russian was adopted by the BGN in 1944 and
11 # by the PCGN in 1947 for use in romanizing names written in the
12 # Russian Cyrillic alphabet.
13 #
14 # The Russian Alphabet as defined by the BGN (Page 93):
15 # АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
16 # абвгдеёжзийклмнопрстуфхцчшщъыьэюя
17 #
18 # Originally prepared by Michael Everson everson@evertype.com
19 # Fixed by Frank Yung-Fong Tang ftang@google.com
20 #
21 # Test Data from http://en.wikipedia.org/wiki/BGN/PCGN_romanization_of_Russian
22 ########################################################################
23 # MINIMAL FILTER: Russian-Latin
24 ::[АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя];
25 # BUG(ftang) remove the following line. Otherwise the rule for
26 # Й й Ё ё will break since the rule is written in NFC but
27 # the line decomposes the text.
28 # :: NFD (NFC) ;
29 ########################################################################
30 # Define All Transformation Variables
31 ########################################################################
32 $prime = ʹ ;
33 $doublePrime = ʺ ;
34 $wordBoundary = [^[:L:][:M:][:N:]] ;
35 $upperVowels = [АЕЁЭИОУЫЮЯ] ;
36 $lowerVowels = [аеёэиоуыюя] ;
37 $vowels = [$upperVowels $lowerVowels] ;
38 $upperConsonants = [[:Uppercase:]-$vowels] ;
39 $lowerConsonants = [[:Lowercase:]-$vowels] ;
40 $consonants = [$upperConsonants $lowerConsonants] ;
41 $upper = [:Uppercase:];
42 $lower = [:Lowercase:];
43 ########################################################################
44 # Rules moved to front to avoid masking
45 ########################################################################
46 $lowerVowels { ы → ·y ;
47 $upperVowels { [Ыы] → ·Y ;
48 [$consonants - [Йй]]{Э → ·E ;
49 [$consonants - [Йй]]{э → ·e ;
50 [$upperVowels [ЙЪЬ]] { Е } $upper → YE ; # CYRILLIC CAPITAL LETTER IE
51 [$upperVowels [ЙЪЬ]] { Е → Ye ; # CYRILLIC CAPITAL LETTER IE
52 [$upperVowels $lowerVowels [ЙйЪъЬь]] { е → ye ; # CYRILLIC SMALL LETTER IE
53 [$upperVowels [ЙЪЬ]] { Ё } $upper → YË ; # CYRILLIC CAPITAL LETTER IO
54 [$upperVowels [ЙЪЬ]] { Ё → Yë ; # CYRILLIC CAPITAL LETTER IO
55 [$upperVowels $lowerVowels [ЙйЪъЬь]] { ё → yë ; # CYRILLIC SMALL LETTER IO
56 # Since in the above rule we look use the context before the characters,
57 # we have to perform them in a separate pass before we change the vowels
58 # the ::Null forces a separate pass.
59 ::Null;
60 ########################################################################
61 # Start of Alphabetic Transformations
62 ########################################################################
63 А → A ; # CYRILLIC CAPITAL LETTER A
64 а → a ; # CYRILLIC SMALL LETTER A
65 Б → B ; # CYRILLIC CAPITAL LETTER BE
66 б → b ; # CYRILLIC SMALL LETTER BE
67 В → V ; # CYRILLIC CAPITAL LETTER VE
68 в → v ; # CYRILLIC SMALL LETTER VE
69 Г → G ; # CYRILLIC CAPITAL LETTER GHE
70 г → g ; # CYRILLIC SMALL LETTER GHE
71 Д → D ; # CYRILLIC CAPITAL LETTER DE
72 д → d ; # CYRILLIC SMALL LETTER DE
73 ########################################################################
74 # BGN Page 94 Rule 1:
75 # # The character e should be romanized ye
76 # initially, after the vowel # characters a, e, ё, и, о, у, ы, э, ю,
77 # and я, and after й, ъ, and ь.
78 # In all other instances, it should
79 # be romanized e.
80 ########################################################################
81 # BUG(ftang)- the following two lines said BEFORE the vowels, instead of AFTER
82 # Е}[$upperVowels [ЙЪЬ]] → YE ; # CYRILLIC CAPITAL LETTER IE
83 # Е}[$lowerVowels [йъь]] → Ye ; # CYRILLIC CAPITAL LETTER IE
84 $wordBoundary{Е} $upper → YE ; # CYRILLIC CAPITAL LETTER IE
85 $wordBoundary{Е → Ye ; # CYRILLIC CAPITAL LETTER IE
86 Е → E ; # CYRILLIC CAPITAL LETTER IE
87 #
88 # BUG(ftang)- the following line said BEFORE the vowels, instead of AFTER
89 # е}[$upperVowels $lowerVowels [ЙйЪъЬь]] → ye ; # CYRILLIC SMALL LETTER IE
90 $wordBoundary{е → ye ; # CYRILLIC SMALL LETTER IE
91 е → e ; # CYRILLIC SMALL LETTER IE
92 ########################################################################
93 # End of Rule 1
94 ########################################################################
95 ########################################################################
96 # BGN Page 94 Rule 2:
97 #
98 # The character ё is not considered a separate character of the
99 # Russian alphabet and the dieresis is generally not shown. When the
100 # dieresis is shown, the character should be romanized yë initially,
101 # after the vowel characters a, e, ё, и, о, у, ы, э, ю, and я, and
102 # after й, ъ, and ь, In all other instances, it should be romanized
103 # ё. When the dieresis is not shown, the character may still be
104 # romanized in the preceding manner or, alternatively, in accordance
105 # with note 1.
106 ########################################################################
107 # BUG(ftang)- the following two lines said BEFORE the vowels, instead of AFTER
108 # Ё}[$upperVowels [ЙЪЬ]] → YË ; # CYRILLIC CAPITAL LETTER IO
109 # Ё}[$lowerVowels [йъь]] → Yë ; # CYRILLIC CAPITAL LETTER IO
110 $wordBoundary {Ё} $upper → YË ; # CYRILLIC CAPITAL LETTER IO
111 $wordBoundary {Ё} $lower → Yë ; # CYRILLIC CAPITAL LETTER IO
112 Ё → Ë ; # CYRILLIC CAPITAL LETTER IO
113 # BUG(ftang)- the following line said BEFORE the vowels, instead of AFTER
114 # ё}[$upperVowels $lowerVowels [ЙйЪъЬь]] → yë ; # CYRILLIC SMALL LETTER IO
115 $wordBoundary{ё → yë ; # CYRILLIC SMALL LETTER IO
116 ё → ë ; # CYRILLIC SMALL LETTER IO
117 ########################################################################
118 # End of Rule 2
119 ########################################################################
120 Ж} $lower → Zh ; # CYRILLIC CAPITAL LETTER ZHE
121 Ж → ZH ; # CYRILLIC CAPITAL LETTER ZHE
122 ж → zh ; # CYRILLIC SMALL LETTER ZHE
123 ########################################################################
124 # BGN Page 94 Rule 3.4
125 # э after any consonant character except
126 # й becomes ·е
127 ########################################################################
128 З → Z ; # CYRILLIC CAPITAL LETTER ZE
129 з → z ; # CYRILLIC SMALL LETTER ZE
130 # BUG(ftang) The following two lines said those consonant becomes ·е
131 # [$consonants - [Йй]]}Э → ·Е ;
132 # [$consonants - [Йй]]}э → ·е ;
133 ########################################################################
134 # End of Rule 3.4
135 ########################################################################
136 И → I ; # CYRILLIC CAPITAL LETTER I
137 и → i ; # CYRILLIC SMALL LETTER I
138 ########################################################################
139 # BGN Page 94 Rule 3:
140 #
141 # Unusual Russian character sequences occurring primarily in
142 # non-Russian-language names may be romanized as shown below in order
143 # to provide differentiation from regularly-occurring digraphs and
144 # character sequences.
145 #
146 # BGN Page 94 Rule 3.1
147 # й before а, у, ы, or э becomes у·
148 ########################################################################
149 Й}[АаУуЫыЭэ] → Y· ; # CYRILLIC CAPITAL LETTER I
150 й}[АаУуЫыЭэ] → y· ; # CYRILLIC SMALL LETTER I
151 Й → Y ; # CYRILLIC CAPITAL LETTER I
152 й → y ; # CYRILLIC SMALL LETTER I
153 ########################################################################
154 # End Rule 3.1
155 ########################################################################
156 К → K ; # CYRILLIC CAPITAL LETTER KA
157 к → k ; # CYRILLIC SMALL LETTER KA
158 Л → L ; # CYRILLIC CAPITAL LETTER EL
159 л → l ; # CYRILLIC SMALL LETTER EL
160 М → M ; # CYRILLIC CAPITAL LETTER EM
161 м → m ; # CYRILLIC SMALL LETTER EM
162 Н → N ; # CYRILLIC CAPITAL LETTER EN
163 н → n ; # CYRILLIC SMALL LETTER EN
164 О → O ; # CYRILLIC CAPITAL LETTER O
165 о → o ; # CYRILLIC SMALL LETTER O
166 П → P ; # CYRILLIC CAPITAL LETTER PE
167 п → p ; # CYRILLIC SMALL LETTER PE
168 Р → R ; # CYRILLIC CAPITAL LETTER ER
169 р → r ; # CYRILLIC SMALL LETTER ER
170 С → S ; # CYRILLIC CAPITAL LETTER ES
171 с → s ; # CYRILLIC SMALL LETTER ES
172 ########################################################################
173 # BGN Page 94 Rule 3.5
174 # тс becomes t·s
175 ########################################################################
176 ТС → T·S ; # CYRILLIC CAPITAL LETTER TE
177 Тс → T·s ; # CYRILLIC CAPITAL LETTER TE
178 тс → t·s ; # CYRILLIC SMALL LETTER TE
179 Т → T ; # CYRILLIC CAPITAL LETTER TE
180 т → t ; # CYRILLIC SMALL LETTER TE
181 ########################################################################
182 # End Rule 3.5
183 ########################################################################
184 У → U ; # CYRILLIC CAPITAL LETTER U
185 у → u ; # CYRILLIC SMALL LETTER U
186 Ф → F ; # CYRILLIC CAPITAL LETTER EF
187 ф → f ; # CYRILLIC SMALL LETTER EF
188 Х} $lower → Kh ; # CYRILLIC CAPITAL LETTER HA
189 Х → KH ; # CYRILLIC CAPITAL LETTER HA
190 х → kh ; # CYRILLIC SMALL LETTER HA
191 Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE
192 Ц → TS ; # CYRILLIC CAPITAL LETTER TSE
193 ц → ts ; # CYRILLIC SMALL LETTER TSE
194 Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE
195 Ч → CH ; # CYRILLIC CAPITAL LETTER CHE
196 ч → ch ; # CYRILLIC SMALL LETTER CHE
197 ########################################################################
198 # BGN Page 94 Rule 3.6
199 # шч becomes sh·ch
200 ########################################################################
201 ШЧ → SH·CH ; # CYRILLIC CAPITAL LETTER SHA
202 Шч → Sh·ch ; # CYRILLIC CAPITAL LETTER SHA
203 шч → sh·ch ; # CYRILLIC SMALL LETTER SHA
204 Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA
205 Ш → SH ; # CYRILLIC CAPITAL LETTER SHA
206 ш → sh ; # CYRILLIC SMALL LETTER SHA
207 Щ} $lower → Shch ; # CYRILLIC CAPITAL LETTER SHCHA
208 Щ → SHCH ; # CYRILLIC CAPITAL LETTER SHCHA
209 щ → shch ; # CYRILLIC SMALL LETTER SHCHA
210 ########################################################################
211 # End Rule 3.6
212 ########################################################################
213 Ъ → $doublePrime ; # CYRILLIC CAPITAL LETTER HARD SIGN
214 ъ → $doublePrime ; # CYRILLIC SMALL LETTER HARD SIGN
215 ########################################################################
216 # BGN Page 94 Rule 3.2
217 # ы before а, у, ы, or э becomes у·
218 #
219 # BGN Page 94 Rule 3.3
220 # ы after any vowel character becomes ·у
221 ########################################################################
222 #
223 # BUG(ftang) the following line said the vowels will change
224 # $vowels}Ы → ·Y ; # CYRILLIC CAPITAL LETTER I
225 # $vowels}ы → ·y ; # CYRILLIC CAPITAL LETTER I
226 Ы}[АаУуЫыЭэ] → Y· ; # CYRILLIC CAPITAL LETTER YERU
227 ы}[ауыэ] → y· ; # CYRILLIC SMALL LETTER YERU
228 Ы → Y ; # CYRILLIC CAPITAL LETTER YERU
229 ы → y ; # CYRILLIC SMALL LETTER YERU
230 ########################################################################
231 # End Rule 3.2 and 3.3
232 ########################################################################
233 Ь → $prime ; # CYRILLIC CAPITAL LETTER SOFT SIGN
234 ь → $prime ; # CYRILLIC SMALL LETTER SOFT SIGN
235 Э → E ; # CYRILLIC CAPITAL LETTER E
236 э → e ; # CYRILLIC SMALL LETTER E
237 Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU
238 Ю → YU ; # CYRILLIC CAPITAL LETTER YU
239 ю → yu ; # CYRILLIC SMALL LETTER YU
240 Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA
241 Я → YA ; # CYRILLIC CAPITAL LETTER YA
242 я → ya ; # CYRILLIC SMALL LETTER YA
243