]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | # © 2016 and later: Unicode, Inc. and others. |
2 | # License & terms of use: http://www.unicode.org/copyright.html#License | |
3 | # | |
729e4ab9 | 4 | # File: ja_Latn_ru.txt |
f3c0d7a5 | 5 | # Generated from CLDR |
729e4ab9 | 6 | # |
2ca993e8 A |
7 | |
8 | # Japanese (Rōmaji) to Russian (Cyrillic) Polivanov transliteration for ICU. | |
9 | # Can be run in sequence after e.g. Katakana-Latin. | |
10 | # | |
11 | # These rules cannot be used to target Bulgarian, Serbian, Tajik, or Ukrainian. | |
12 | # | |
13 | # TODO: Cyrillization needs to respect morpheme/Kanji boundaries. | |
14 | # 中井 becomes Накаи, but 北海道 becomes Хоккайдо. We need boundary | |
15 | # markup in the input in order to do that properly. | |
16 | # | |
729e4ab9 A |
17 | ::NFD(NFC); |
18 | ::[:Latin:] Lower(); | |
2ca993e8 A |
19 | # |
20 | # | |
729e4ab9 | 21 | $lengthMarker = [\u0302\u0304]; |
2ca993e8 A |
22 | # |
23 | # | |
24 | # Delete apostrophes. Apostrophes after "n" are consumed below. | |
729e4ab9 | 25 | \' → ; |
2ca993e8 A |
26 | # |
27 | # | |
28 | # Turn long /e:/ into diphthong /ei/. | |
29 | # Note that /ei/ across a morpheme boundary (e.g. 武井 Takei) becomes эи. | |
729e4ab9 | 30 | e $lengthMarker → эй ; |
2ca993e8 A |
31 | # |
32 | # | |
33 | # Turn long /i:/ into two vowels /ii/. | |
729e4ab9 | 34 | i $lengthMarker → | i i ; |
2ca993e8 A |
35 | # |
36 | # | |
37 | # Ignore vowel length everywhere else. | |
729e4ab9 | 38 | $lengthMarker → ; |
2ca993e8 A |
39 | # |
40 | # | |
41 | # Vowels. | |
42 | # | |
43 | # TODO(mjansche): Enable diphthongs once we have Kanji boundaries. | |
44 | ## ai → ай ; | |
51004dcb | 45 | a → а ; |
729e4ab9 | 46 | i\~e → | ye ; |
51004dcb A |
47 | i → и ; |
48 | u\~ → в ; # ウィ etc. | |
2ca993e8 A |
49 | # |
50 | ## ui → уй ; | |
51004dcb A |
51 | u → у ; |
52 | e → э ; | |
53 | o → о ; | |
2ca993e8 A |
54 | # |
55 | # | |
56 | # Consonants. | |
57 | # | |
729e4ab9 | 58 | k → к ; |
2ca993e8 A |
59 | # |
60 | # | |
729e4ab9 A |
61 | sh → | sy ; |
62 | s → с ; | |
2ca993e8 A |
63 | # |
64 | # | |
51004dcb | 65 | ch → | ty ; |
729e4ab9 | 66 | c } ch → t ; |
51004dcb A |
67 | te\~ → | t ; # テュ |
68 | to\~ → | t ; # トゥ | |
69 | tsu\~ → | ts ; # ツァ, ツィ, etc. | |
729e4ab9 | 70 | ts → ц ; |
51004dcb | 71 | t → т ; |
2ca993e8 A |
72 | # |
73 | # | |
729e4ab9 | 74 | \~tsu → | tsu ; |
2ca993e8 A |
75 | # |
76 | # | |
51004dcb | 77 | n } [bpm] → м ; # 群馬 → Гумма |
729e4ab9 A |
78 | n\' → нъ ; |
79 | n → н ; | |
2ca993e8 A |
80 | # |
81 | # | |
729e4ab9 | 82 | h → х ; |
51004dcb | 83 | fu\~ → | f ; # フュ |
729e4ab9 | 84 | f → ф ; |
2ca993e8 A |
85 | # |
86 | # | |
729e4ab9 | 87 | m → м ; |
2ca993e8 A |
88 | # |
89 | # | |
729e4ab9 | 90 | ya → я ; |
51004dcb | 91 | yi → и ; # Added for convenience, after sh, ch, j. |
729e4ab9 | 92 | yu → ю ; |
51004dcb | 93 | ye → е ; # ?? unobserved |
729e4ab9 | 94 | yo → ё ; |
2ca993e8 A |
95 | # |
96 | # | |
729e4ab9 | 97 | r → р ; |
2ca993e8 A |
98 | # |
99 | # | |
729e4ab9 A |
100 | wa → ва ; |
101 | w → ; | |
2ca993e8 A |
102 | # |
103 | # | |
729e4ab9 | 104 | g → г ; |
2ca993e8 A |
105 | # |
106 | # | |
729e4ab9 A |
107 | j → | zy ; |
108 | z → дз ; | |
2ca993e8 A |
109 | # |
110 | # | |
51004dcb A |
111 | de\~ → | d ; # デュ |
112 | dji\~ → | z ; # ヂャ, ヂュ, etc. | |
113 | dj → | j ; # ヂ | |
114 | do\~ → | d ; # ドゥ | |
115 | dzu\~ → | z ; # ヅァ, ヅィ, etc. | |
116 | dz → | z ; # ヅ | |
729e4ab9 | 117 | d → д ; |
2ca993e8 A |
118 | # |
119 | # | |
729e4ab9 | 120 | b → б ; |
51004dcb A |
121 | vu\~ → | v ; # ヴァ, etc. |
122 | v → в ; # ?? unobserved | |
2ca993e8 A |
123 | # |
124 | # | |
729e4ab9 | 125 | p → п ; |
2ca993e8 A |
126 | # |
127 | # | |
729e4ab9 | 128 | ::NFC(NFD); |
2ca993e8 | 129 |