]> git.saurik.com Git - apple/icu.git/blame - icuSources/data/translit/ja_Latn_ru.txt
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / data / translit / ja_Latn_ru.txt
CommitLineData
729e4ab9
A
1# ***************************************************************************
2# *
2ca993e8 3# * Copyright (C) 2004-2016, International Business Machines
729e4ab9
A
4# * Corporation; Unicode, Inc.; and others. All Rights Reserved.
5# *
6# ***************************************************************************
7# File: ja_Latn_ru.txt
8# Generated from CLDR
9#
2ca993e8
A
10
11# Japanese (Rōmaji) to Russian (Cyrillic) Polivanov transliteration for ICU.
12# Can be run in sequence after e.g. Katakana-Latin.
13#
14# These rules cannot be used to target Bulgarian, Serbian, Tajik, or Ukrainian.
15#
16# TODO: Cyrillization needs to respect morpheme/Kanji boundaries.
17# 中井 becomes Накаи, but 北海道 becomes Хоккайдо. We need boundary
18# markup in the input in order to do that properly.
19#
729e4ab9
A
20::NFD(NFC);
21::[:Latin:] Lower();
2ca993e8
A
22#
23#
729e4ab9 24$lengthMarker = [\u0302\u0304];
2ca993e8
A
25#
26#
27# Delete apostrophes. Apostrophes after "n" are consumed below.
729e4ab9 28\' → ;
2ca993e8
A
29#
30#
31# Turn long /e:/ into diphthong /ei/.
32# Note that /ei/ across a morpheme boundary (e.g. 武井 Takei) becomes эи.
729e4ab9 33e $lengthMarker → эй ;
2ca993e8
A
34#
35#
36# Turn long /i:/ into two vowels /ii/.
729e4ab9 37i $lengthMarker → | i i ;
2ca993e8
A
38#
39#
40# Ignore vowel length everywhere else.
729e4ab9 41$lengthMarker → ;
2ca993e8
A
42#
43#
44# Vowels.
45#
46# TODO(mjansche): Enable diphthongs once we have Kanji boundaries.
47## ai → ай ;
51004dcb 48a → а ;
729e4ab9 49i\~e → | ye ;
51004dcb
A
50i → и ;
51u\~ → в ; # ウィ etc.
2ca993e8
A
52#
53## ui → уй ;
51004dcb
A
54u → у ;
55e → э ;
56o → о ;
2ca993e8
A
57#
58#
59# Consonants.
60#
729e4ab9 61k → к ;
2ca993e8
A
62#
63#
729e4ab9
A
64sh → | sy ;
65s → с ;
2ca993e8
A
66#
67#
51004dcb 68ch → | ty ;
729e4ab9 69c } ch → t ;
51004dcb
A
70te\~ → | t ; # テュ
71to\~ → | t ; # トゥ
72tsu\~ → | ts ; # ツァ, ツィ, etc.
729e4ab9 73ts → ц ;
51004dcb 74t → т ;
2ca993e8
A
75#
76#
729e4ab9 77\~tsu → | tsu ;
2ca993e8
A
78#
79#
51004dcb 80n } [bpm] → м ; # 群馬 → Гумма
729e4ab9
A
81n\' → нъ ;
82n → н ;
2ca993e8
A
83#
84#
729e4ab9 85h → х ;
51004dcb 86fu\~ → | f ; # フュ
729e4ab9 87f → ф ;
2ca993e8
A
88#
89#
729e4ab9 90m → м ;
2ca993e8
A
91#
92#
729e4ab9 93ya → я ;
51004dcb 94yi → и ; # Added for convenience, after sh, ch, j.
729e4ab9 95yu → ю ;
51004dcb 96ye → е ; # ?? unobserved
729e4ab9 97yo → ё ;
2ca993e8
A
98#
99#
729e4ab9 100r → р ;
2ca993e8
A
101#
102#
729e4ab9
A
103wa → ва ;
104w → ;
2ca993e8
A
105#
106#
729e4ab9 107g → г ;
2ca993e8
A
108#
109#
729e4ab9
A
110j → | zy ;
111z → дз ;
2ca993e8
A
112#
113#
51004dcb
A
114de\~ → | d ; # デュ
115dji\~ → | z ; # ヂャ, ヂュ, etc.
116dj → | j ; # ヂ
117do\~ → | d ; # ドゥ
118dzu\~ → | z ; # ヅァ, ヅィ, etc.
119dz → | z ; # ヅ
729e4ab9 120d → д ;
2ca993e8
A
121#
122#
729e4ab9 123b → б ;
51004dcb
A
124vu\~ → | v ; # ヴァ, etc.
125v → в ; # ?? unobserved
2ca993e8
A
126#
127#
729e4ab9 128p → п ;
2ca993e8
A
129#
130#
729e4ab9 131::NFC(NFD);
2ca993e8 132