]>
Commit | Line | Data |
---|---|---|
729e4ab9 A |
1 | # *************************************************************************** |
2 | # * | |
2ca993e8 | 3 | # * Copyright (C) 2004-2016, International Business Machines |
729e4ab9 A |
4 | # * Corporation; Unicode, Inc.; and others. All Rights Reserved. |
5 | # * | |
6 | # *************************************************************************** | |
7 | # File: ja_Latn_ru.txt | |
8 | # Generated from CLDR | |
9 | # | |
2ca993e8 A |
10 | |
11 | # Japanese (Rōmaji) to Russian (Cyrillic) Polivanov transliteration for ICU. | |
12 | # Can be run in sequence after e.g. Katakana-Latin. | |
13 | # | |
14 | # These rules cannot be used to target Bulgarian, Serbian, Tajik, or Ukrainian. | |
15 | # | |
16 | # TODO: Cyrillization needs to respect morpheme/Kanji boundaries. | |
17 | # 中井 becomes Накаи, but 北海道 becomes Хоккайдо. We need boundary | |
18 | # markup in the input in order to do that properly. | |
19 | # | |
729e4ab9 A |
20 | ::NFD(NFC); |
21 | ::[:Latin:] Lower(); | |
2ca993e8 A |
22 | # |
23 | # | |
729e4ab9 | 24 | $lengthMarker = [\u0302\u0304]; |
2ca993e8 A |
25 | # |
26 | # | |
27 | # Delete apostrophes. Apostrophes after "n" are consumed below. | |
729e4ab9 | 28 | \' → ; |
2ca993e8 A |
29 | # |
30 | # | |
31 | # Turn long /e:/ into diphthong /ei/. | |
32 | # Note that /ei/ across a morpheme boundary (e.g. 武井 Takei) becomes эи. | |
729e4ab9 | 33 | e $lengthMarker → эй ; |
2ca993e8 A |
34 | # |
35 | # | |
36 | # Turn long /i:/ into two vowels /ii/. | |
729e4ab9 | 37 | i $lengthMarker → | i i ; |
2ca993e8 A |
38 | # |
39 | # | |
40 | # Ignore vowel length everywhere else. | |
729e4ab9 | 41 | $lengthMarker → ; |
2ca993e8 A |
42 | # |
43 | # | |
44 | # Vowels. | |
45 | # | |
46 | # TODO(mjansche): Enable diphthongs once we have Kanji boundaries. | |
47 | ## ai → ай ; | |
51004dcb | 48 | a → а ; |
729e4ab9 | 49 | i\~e → | ye ; |
51004dcb A |
50 | i → и ; |
51 | u\~ → в ; # ウィ etc. | |
2ca993e8 A |
52 | # |
53 | ## ui → уй ; | |
51004dcb A |
54 | u → у ; |
55 | e → э ; | |
56 | o → о ; | |
2ca993e8 A |
57 | # |
58 | # | |
59 | # Consonants. | |
60 | # | |
729e4ab9 | 61 | k → к ; |
2ca993e8 A |
62 | # |
63 | # | |
729e4ab9 A |
64 | sh → | sy ; |
65 | s → с ; | |
2ca993e8 A |
66 | # |
67 | # | |
51004dcb | 68 | ch → | ty ; |
729e4ab9 | 69 | c } ch → t ; |
51004dcb A |
70 | te\~ → | t ; # テュ |
71 | to\~ → | t ; # トゥ | |
72 | tsu\~ → | ts ; # ツァ, ツィ, etc. | |
729e4ab9 | 73 | ts → ц ; |
51004dcb | 74 | t → т ; |
2ca993e8 A |
75 | # |
76 | # | |
729e4ab9 | 77 | \~tsu → | tsu ; |
2ca993e8 A |
78 | # |
79 | # | |
51004dcb | 80 | n } [bpm] → м ; # 群馬 → Гумма |
729e4ab9 A |
81 | n\' → нъ ; |
82 | n → н ; | |
2ca993e8 A |
83 | # |
84 | # | |
729e4ab9 | 85 | h → х ; |
51004dcb | 86 | fu\~ → | f ; # フュ |
729e4ab9 | 87 | f → ф ; |
2ca993e8 A |
88 | # |
89 | # | |
729e4ab9 | 90 | m → м ; |
2ca993e8 A |
91 | # |
92 | # | |
729e4ab9 | 93 | ya → я ; |
51004dcb | 94 | yi → и ; # Added for convenience, after sh, ch, j. |
729e4ab9 | 95 | yu → ю ; |
51004dcb | 96 | ye → е ; # ?? unobserved |
729e4ab9 | 97 | yo → ё ; |
2ca993e8 A |
98 | # |
99 | # | |
729e4ab9 | 100 | r → р ; |
2ca993e8 A |
101 | # |
102 | # | |
729e4ab9 A |
103 | wa → ва ; |
104 | w → ; | |
2ca993e8 A |
105 | # |
106 | # | |
729e4ab9 | 107 | g → г ; |
2ca993e8 A |
108 | # |
109 | # | |
729e4ab9 A |
110 | j → | zy ; |
111 | z → дз ; | |
2ca993e8 A |
112 | # |
113 | # | |
51004dcb A |
114 | de\~ → | d ; # デュ |
115 | dji\~ → | z ; # ヂャ, ヂュ, etc. | |
116 | dj → | j ; # ヂ | |
117 | do\~ → | d ; # ドゥ | |
118 | dzu\~ → | z ; # ヅァ, ヅィ, etc. | |
119 | dz → | z ; # ヅ | |
729e4ab9 | 120 | d → д ; |
2ca993e8 A |
121 | # |
122 | # | |
729e4ab9 | 123 | b → б ; |
51004dcb A |
124 | vu\~ → | v ; # ヴァ, etc. |
125 | v → в ; # ?? unobserved | |
2ca993e8 A |
126 | # |
127 | # | |
729e4ab9 | 128 | p → п ; |
2ca993e8 A |
129 | # |
130 | # | |
729e4ab9 | 131 | ::NFC(NFD); |
2ca993e8 | 132 |