]>
Commit | Line | Data |
---|---|---|
1 | # © 2016 and later: Unicode, Inc. and others. | |
2 | # License & terms of use: http://www.unicode.org/copyright.html#License | |
3 | # | |
4 | # File: ja_Latn_ru.txt | |
5 | # Generated from CLDR | |
6 | # | |
7 | ||
8 | # Japanese (Rōmaji) to Russian (Cyrillic) Polivanov transliteration for ICU. | |
9 | # Can be run in sequence after e.g. Katakana-Latin. | |
10 | # | |
11 | # These rules cannot be used to target Bulgarian, Serbian, Tajik, or Ukrainian. | |
12 | # | |
13 | # TODO: Cyrillization needs to respect morpheme/Kanji boundaries. | |
14 | # 中井 becomes Накаи, but 北海道 becomes Хоккайдо. We need boundary | |
15 | # markup in the input in order to do that properly. | |
16 | # | |
17 | ::NFD(NFC); | |
18 | ::[:Latin:] Lower(); | |
19 | # | |
20 | # | |
21 | $lengthMarker = [\u0302\u0304]; | |
22 | # | |
23 | # | |
24 | # Delete apostrophes. Apostrophes after "n" are consumed below. | |
25 | \' → ; | |
26 | # | |
27 | # | |
28 | # Turn long /e:/ into diphthong /ei/. | |
29 | # Note that /ei/ across a morpheme boundary (e.g. 武井 Takei) becomes эи. | |
30 | e $lengthMarker → эй ; | |
31 | # | |
32 | # | |
33 | # Turn long /i:/ into two vowels /ii/. | |
34 | i $lengthMarker → | i i ; | |
35 | # | |
36 | # | |
37 | # Ignore vowel length everywhere else. | |
38 | $lengthMarker → ; | |
39 | # | |
40 | # | |
41 | # Vowels. | |
42 | # | |
43 | # TODO(mjansche): Enable diphthongs once we have Kanji boundaries. | |
44 | ## ai → ай ; | |
45 | a → а ; | |
46 | i\~e → | ye ; | |
47 | i → и ; | |
48 | u\~ → в ; # ウィ etc. | |
49 | # | |
50 | ## ui → уй ; | |
51 | u → у ; | |
52 | e → э ; | |
53 | o → о ; | |
54 | # | |
55 | # | |
56 | # Consonants. | |
57 | # | |
58 | k → к ; | |
59 | # | |
60 | # | |
61 | sh → | sy ; | |
62 | s → с ; | |
63 | # | |
64 | # | |
65 | ch → | ty ; | |
66 | c } ch → t ; | |
67 | te\~ → | t ; # テュ | |
68 | to\~ → | t ; # トゥ | |
69 | tsu\~ → | ts ; # ツァ, ツィ, etc. | |
70 | ts → ц ; | |
71 | t → т ; | |
72 | # | |
73 | # | |
74 | \~tsu → | tsu ; | |
75 | # | |
76 | # | |
77 | n } [bpm] → м ; # 群馬 → Гумма | |
78 | n\' → нъ ; | |
79 | n → н ; | |
80 | # | |
81 | # | |
82 | h → х ; | |
83 | fu\~ → | f ; # フュ | |
84 | f → ф ; | |
85 | # | |
86 | # | |
87 | m → м ; | |
88 | # | |
89 | # | |
90 | ya → я ; | |
91 | yi → и ; # Added for convenience, after sh, ch, j. | |
92 | yu → ю ; | |
93 | ye → е ; # ?? unobserved | |
94 | yo → ё ; | |
95 | # | |
96 | # | |
97 | r → р ; | |
98 | # | |
99 | # | |
100 | wa → ва ; | |
101 | w → ; | |
102 | # | |
103 | # | |
104 | g → г ; | |
105 | # | |
106 | # | |
107 | j → | zy ; | |
108 | z → дз ; | |
109 | # | |
110 | # | |
111 | de\~ → | d ; # デュ | |
112 | dji\~ → | z ; # ヂャ, ヂュ, etc. | |
113 | dj → | j ; # ヂ | |
114 | do\~ → | d ; # ドゥ | |
115 | dzu\~ → | z ; # ヅァ, ヅィ, etc. | |
116 | dz → | z ; # ヅ | |
117 | d → д ; | |
118 | # | |
119 | # | |
120 | b → б ; | |
121 | vu\~ → | v ; # ヴァ, etc. | |
122 | v → в ; # ?? unobserved | |
123 | # | |
124 | # | |
125 | p → п ; | |
126 | # | |
127 | # | |
128 | ::NFC(NFD); | |
129 |