]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/Greek_Latin_UNGEGN.txt
ICU-6.2.4.tar.gz
[apple/icu.git] / icuSources / data / translit / Greek_Latin_UNGEGN.txt
1 #--------------------------------------------------------------------
2 # Copyright (c) 1999-2004, International Business Machines
3 # Corporation and others. All Rights Reserved.
4 #--------------------------------------------------------------------
5 # For modern Greek, based on UNGEGN rules.
6
7 # Rules are predicated on running NFD first, and NFC afterwards
8 # MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
9 # WARNING: need to add accents to both filters ###
10 # :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ\u03F7-\u07FB\u03F9] ;
11
12 :: [[[:Greek:][:Mn:][:Me:]] [\:-;?\u00B7\u037E\u0387]] ;
13 ::NFD (NFC) ;
14
15 # Useful variables
16
17 $lower = [[:latin:][:greek:] & [:Ll:]] ;
18 $upper = [[:latin:][:greek:] & [:Lu:]] ;
19 $accent = [[:Mn:][:Me:]] ;
20
21 $macron = ̄ ;
22 $ddot = ̈ ;
23
24 $lcgvowel = [αεηιουω] ;
25 $ucgvowel = [ΑΕΗΙΟΥΩ] ;
26 $gvowel = [$lcgvowel $ucgvowel] ;
27 $lcgvowelC = [$lcgvowel $accent] ;
28
29 $evowel = [aeiouyAEIOUY];
30 $vowel = [ $evowel $gvowel] ;
31
32 $beforeLower = $accent * $lower ;
33
34 $gammaLike = [ΓΚΞΧγκξχϰ] ;
35 $egammaLike = [GKXCgkxc] ;
36 $smooth = ̓ ;
37 $rough = ̔ ;
38 $iotasub = ͅ ;
39
40 $softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;
41
42 $under = ̱;
43
44 $caron = ̌;
45
46 $afterLetter = [:L:] [\'$accent]* ;
47 $beforeLetter = [\'$accent]* [:L:] ;
48
49 # Fix punctuation
50
51 # preserve orginal
52 \: <> \: $under ;
53 \? <> \? $under ;
54
55 \; <> \? ;
56 · <> \: ;
57
58 # Fix any ancient characters that creep in
59
60 ͂ > ́ ;
61 ̂ > ́ ;
62 ̀ > ́ ;
63 $smooth > ;
64 $rough > ;
65 $iotasub > ;
66 ͺ > ;
67
68 # need to have these up here so the rules don't mask
69
70 η <> i $under ;
71 Η <> I $under ;
72
73 Ψ } $beforeLower <> Ps ;
74 Ψ <> PS ;
75 ψ <> ps ;
76
77 ω <> o $under ;
78 Ω <> O $under;
79
80 # at begining or end of word, convert mp to b
81
82 [^[:L:]$accent] { μπ > b ;
83 μπ } [^[:L:]$accent] > b ;
84 [^[:L:]$accent] { [Μμ][Ππ] > B ;
85 [Μμ][Ππ] } [^[:L:]$accent] > B ;
86
87 μπ < b ;
88 Μπ < B } $beforeLower ;
89 ΜΠ < B ;
90
91 # handle diphthongs ending with upsilon
92
93 ου <> ou ;
94 ΟΥ <> OU ;
95 Ου <> Ou ;
96 οΥ <> oU ;
97
98 $fmaker = [aeiAEI] $under ? ;
99 $shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate
100
101 $fmaker { υ ( $shiftForwardVowels )* } $softener > $1 v $under ;
102 υ $1 < ( $shiftForwardVowels )* v $under ;
103
104 $fmaker { υ ( $shiftForwardVowels )* } > $1 f $under;
105 υ $1 < ( $shiftForwardVowels )* f $under ;
106
107 $fmaker { Υ } $softener <> V $under ;
108 $fmaker { Υ <> U $under ;
109
110 υ <> y ;
111 Υ <> Y ;
112
113 # NORMAL
114
115 α <> a ;
116 Α <> A ;
117
118 β <> v ;
119 Β <> V ;
120
121 γ } $gammaLike <> n } $egammaLike ;
122 γ <> g ;
123 Γ } $gammaLike <> N } $egammaLike ;
124 Γ <> G ;
125
126 δ <> d ;
127 Δ <> D ;
128
129 ε <> e ;
130 Ε <> E ;
131
132 ζ <> z ;
133 Ζ <> Z ;
134
135 θ <> th ;
136 Θ } $beforeLower <> Th ;
137 Θ <> TH ;
138
139 ι <> i ;
140 Ι <> I ;
141
142 κ <> k ;
143 Κ <> K ;
144
145 λ <> l ;
146 Λ <> L ;
147
148 μ <> m ;
149 Μ <> M ;
150
151 ν } $gammaLike > n\' ;
152 ν <> n ;
153 Ν } $gammaLike <> N\' ;
154 Ν <> N ;
155
156 ξ <> x ;
157 Ξ <> X ;
158
159 ο <> o ;
160 Ο <> O ;
161
162 π <> p ;
163 Π <> P ;
164
165 ρ <> r ;
166 Ρ <> R ;
167
168 # insert separator before things that turn into s
169 [Pp] { } [ςσΣϷϸϺϻ] > \' ;
170
171 # special S variants
172
173 Ϸ <> Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
174 ϸ <> š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
175 Ϻ <> Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
176 ϻ <> ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
177
178 # Caron means exception
179
180 # before a letter, initial
181 ς } $beforeLetter <> s $under } $beforeLetter;
182 σ } $beforeLetter <> s } $beforeLetter;
183
184 # otherwise, after a letter = final
185 $afterLetter { σ <> $afterLetter { s $under;
186 $afterLetter { ς <> $afterLetter { s ;
187
188 # otherwise (isolated) = initial
189 ς <> s $under;
190 σ <> s ;
191
192 # [Pp] { Σ <> \'S ;
193 Σ <> S ;
194
195 τ <> t ;
196 Τ <> T ;
197
198 φ <> f ;
199 Φ <> F ;
200
201 χ <> ch ;
202 Χ } $beforeLower <> Ch ;
203 Χ <> CH ;
204
205 # Completeness for ASCII
206
207 # $ignore = [[:Mark:]''] * ;
208
209 | ch < h ;
210 | k < c ;
211 | i < j ;
212 | k < q ;
213 | b < u } $vowel ;
214 | b < w } $vowel ;
215 | y < u ;
216 | y < w ;
217
218 | Ch < H ;
219 | K < C ;
220 | I < J ;
221 | K < Q ;
222 | B < W } $vowel ;
223 | B < U } $vowel ;
224 | Y < W ;
225 | Y < U ;
226
227 # Completeness for Greek
228
229 ϐ > | β ;
230 ϑ > | θ ;
231 ϒ > | Υ ;
232 ϕ > | φ ;
233 ϖ > | π ;
234
235 ϰ > | κ ;
236 ϱ > | ρ ;
237 ϲ > | σ ;
238 Ϲ > | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
239 ϳ > j ;
240 ϴ > | Θ ;
241 ϵ > | ε ;
242 µ > | μ ;
243
244 # delete any trailing ' marks used for roundtripping
245
246 < [Ππ] { \' } [Ss] ;
247 < [Νν] { \' } $egammaLike ;
248
249 ::NFC (NFD) ;
250
251 # MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD
252 :: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ;