]> git.saurik.com Git - apple/icu.git/blame - icuSources/data/translit/ja_Latn_ru.txt
ICU-66108.tar.gz
[apple/icu.git] / icuSources / data / translit / ja_Latn_ru.txt
CommitLineData
f3c0d7a5
A
1# © 2016 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html#License
3#
729e4ab9 4# File: ja_Latn_ru.txt
f3c0d7a5 5# Generated from CLDR
729e4ab9 6#
2ca993e8
A
7
8# Japanese (Rōmaji) to Russian (Cyrillic) Polivanov transliteration for ICU.
9# Can be run in sequence after e.g. Katakana-Latin.
10#
11# These rules cannot be used to target Bulgarian, Serbian, Tajik, or Ukrainian.
12#
13# TODO: Cyrillization needs to respect morpheme/Kanji boundaries.
14# 中井 becomes Накаи, but 北海道 becomes Хоккайдо. We need boundary
15# markup in the input in order to do that properly.
16#
729e4ab9
A
17::NFD(NFC);
18::[:Latin:] Lower();
2ca993e8
A
19#
20#
729e4ab9 21$lengthMarker = [\u0302\u0304];
2ca993e8
A
22#
23#
24# Delete apostrophes. Apostrophes after "n" are consumed below.
729e4ab9 25\' → ;
2ca993e8
A
26#
27#
28# Turn long /e:/ into diphthong /ei/.
29# Note that /ei/ across a morpheme boundary (e.g. 武井 Takei) becomes эи.
729e4ab9 30e $lengthMarker → эй ;
2ca993e8
A
31#
32#
33# Turn long /i:/ into two vowels /ii/.
729e4ab9 34i $lengthMarker → | i i ;
2ca993e8
A
35#
36#
37# Ignore vowel length everywhere else.
729e4ab9 38$lengthMarker → ;
2ca993e8
A
39#
40#
41# Vowels.
42#
43# TODO(mjansche): Enable diphthongs once we have Kanji boundaries.
44## ai → ай ;
51004dcb 45a → а ;
729e4ab9 46i\~e → | ye ;
51004dcb
A
47i → и ;
48u\~ → в ; # ウィ etc.
2ca993e8
A
49#
50## ui → уй ;
51004dcb
A
51u → у ;
52e → э ;
53o → о ;
2ca993e8
A
54#
55#
56# Consonants.
57#
729e4ab9 58k → к ;
2ca993e8
A
59#
60#
729e4ab9
A
61sh → | sy ;
62s → с ;
2ca993e8
A
63#
64#
51004dcb 65ch → | ty ;
729e4ab9 66c } ch → t ;
51004dcb
A
67te\~ → | t ; # テュ
68to\~ → | t ; # トゥ
69tsu\~ → | ts ; # ツァ, ツィ, etc.
729e4ab9 70ts → ц ;
51004dcb 71t → т ;
2ca993e8
A
72#
73#
729e4ab9 74\~tsu → | tsu ;
2ca993e8
A
75#
76#
51004dcb 77n } [bpm] → м ; # 群馬 → Гумма
729e4ab9
A
78n\' → нъ ;
79n → н ;
2ca993e8
A
80#
81#
729e4ab9 82h → х ;
51004dcb 83fu\~ → | f ; # フュ
729e4ab9 84f → ф ;
2ca993e8
A
85#
86#
729e4ab9 87m → м ;
2ca993e8
A
88#
89#
729e4ab9 90ya → я ;
51004dcb 91yi → и ; # Added for convenience, after sh, ch, j.
729e4ab9 92yu → ю ;
51004dcb 93ye → е ; # ?? unobserved
729e4ab9 94yo → ё ;
2ca993e8
A
95#
96#
729e4ab9 97r → р ;
2ca993e8
A
98#
99#
729e4ab9
A
100wa → ва ;
101w → ;
2ca993e8
A
102#
103#
729e4ab9 104g → г ;
2ca993e8
A
105#
106#
729e4ab9
A
107j → | zy ;
108z → дз ;
2ca993e8
A
109#
110#
51004dcb
A
111de\~ → | d ; # デュ
112dji\~ → | z ; # ヂャ, ヂュ, etc.
113dj → | j ; # ヂ
114do\~ → | d ; # ドゥ
115dzu\~ → | z ; # ヅァ, ヅィ, etc.
116dz → | z ; # ヅ
729e4ab9 117d → д ;
2ca993e8
A
118#
119#
729e4ab9 120b → б ;
51004dcb
A
121vu\~ → | v ; # ヴァ, etc.
122v → в ; # ?? unobserved
2ca993e8
A
123#
124#
729e4ab9 125p → п ;
2ca993e8
A
126#
127#
729e4ab9 128::NFC(NFD);
2ca993e8 129