]> git.saurik.com Git - apple/icu.git/blame - icuSources/data/translit/Greek_Latin_UNGEGN.txt
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / data / translit / Greek_Latin_UNGEGN.txt
CommitLineData
374ca955
A
1#--------------------------------------------------------------------\r
2# Copyright (c) 1999-2004, International Business Machines\r
3# Corporation and others. All Rights Reserved.\r
4#--------------------------------------------------------------------\r
5# For modern Greek, based on UNGEGN rules.\r
6\r
7# Rules are predicated on running NFD first, and NFC afterwards\r
8# MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN\r
9# WARNING: need to add accents to both filters ###\r
10# :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ\u03F7-\u07FB\u03F9] ;\r
11\r
12:: [[[:Greek:][:Mn:][:Me:]] [\:-;?\u00B7\u037E\u0387]] ;\r
13::NFD (NFC) ;\r
14\r
15# Useful variables\r
16\r
17$lower = [[:latin:][:greek:] & [:Ll:]] ;\r
18$upper = [[:latin:][:greek:] & [:Lu:]] ;\r
19$accent = [[:Mn:][:Me:]] ;\r
20\r
21$macron = ̄ ;\r
22$ddot = ̈ ;\r
23\r
24$lcgvowel = [αεηιουω] ;\r
25$ucgvowel = [ΑΕΗΙΟΥΩ] ;\r
26$gvowel = [$lcgvowel $ucgvowel] ;\r
27$lcgvowelC = [$lcgvowel $accent] ;\r
28\r
29$evowel = [aeiouyAEIOUY];\r
30$vowel = [ $evowel $gvowel] ;\r
31\r
32$beforeLower = $accent * $lower ;\r
33\r
34$gammaLike = [ΓΚΞΧγκξχϰ] ;\r
35$egammaLike = [GKXCgkxc] ;\r
36$smooth = ̓ ;\r
37$rough = ̔ ;\r
38$iotasub = ͅ ;\r
39\r
40$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;\r
41\r
42$under = ̱;\r
43\r
44$caron = ̌;\r
45\r
46$afterLetter = [:L:] [\'$accent]* ;\r
47$beforeLetter = [\'$accent]* [:L:] ;\r
48\r
49# Fix punctuation\r
50\r
51# preserve orginal\r
52\: <> \: $under ;\r
53\? <> \? $under ;\r
54\r
55\; <> \? ;\r
56· <> \: ;\r
57\r
58# Fix any ancient characters that creep in\r
59\r
60͂ > ́ ;\r
61̂ > ́ ;\r
62̀ > ́ ;\r
63$smooth > ;\r
64$rough > ;\r
65$iotasub > ;\r
66ͺ > ;\r
67\r
68# need to have these up here so the rules don't mask\r
69\r
70η <> i $under ;\r
71Η <> I $under ;\r
72\r
73Ψ } $beforeLower <> Ps ;\r
74Ψ <> PS ;\r
75ψ <> ps ;\r
76\r
77ω <> o $under ;\r
78Ω <> O $under;\r
79\r
80# at begining or end of word, convert mp to b\r
81\r
82[^[:L:]$accent] { μπ > b ;\r
83μπ } [^[:L:]$accent] > b ;\r
84[^[:L:]$accent] { [Μμ][Ππ] > B ;\r
85[Μμ][Ππ] } [^[:L:]$accent] > B ;\r
86\r
87μπ < b ;\r
88Μπ < B } $beforeLower ;\r
89ΜΠ < B ;\r
90\r
91# handle diphthongs ending with upsilon\r
92\r
93ου <> ou ;\r
94ΟΥ <> OU ;\r
95Ου <> Ou ;\r
96οΥ <> oU ;\r
97\r
98$fmaker = [aeiAEI] $under ? ;\r
99$shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate\r
100\r
101$fmaker { υ ( $shiftForwardVowels )* } $softener > $1 v $under ;\r
102υ $1 < ( $shiftForwardVowels )* v $under ;\r
103\r
104$fmaker { υ ( $shiftForwardVowels )* } > $1 f $under;\r
105υ $1 < ( $shiftForwardVowels )* f $under ;\r
106\r
107$fmaker { Υ } $softener <> V $under ;\r
108$fmaker { Υ <> U $under ;\r
109\r
110υ <> y ;\r
111Υ <> Y ;\r
112\r
113# NORMAL\r
114\r
115α <> a ;\r
116Α <> A ;\r
117\r
118β <> v ;\r
119Β <> V ;\r
120\r
121γ } $gammaLike <> n } $egammaLike ;\r
122γ <> g ;\r
123Γ } $gammaLike <> N } $egammaLike ;\r
124Γ <> G ;\r
125\r
126δ <> d ;\r
127Δ <> D ;\r
128\r
129ε <> e ;\r
130Ε <> E ;\r
131\r
132ζ <> z ;\r
133Ζ <> Z ;\r
134\r
135θ <> th ;\r
136Θ } $beforeLower <> Th ;\r
137Θ <> TH ;\r
138\r
139ι <> i ;\r
140Ι <> I ;\r
141\r
142κ <> k ;\r
143Κ <> K ;\r
144\r
145λ <> l ;\r
146Λ <> L ;\r
147\r
148μ <> m ;\r
149Μ <> M ;\r
150\r
151ν } $gammaLike > n\' ;\r
152ν <> n ;\r
153Ν } $gammaLike <> N\' ;\r
154Ν <> N ;\r
155\r
156ξ <> x ;\r
157Ξ <> X ;\r
158\r
159ο <> o ;\r
160Ο <> O ;\r
161\r
162π <> p ;\r
163Π <> P ;\r
164\r
165ρ <> r ;\r
166Ρ <> R ;\r
167\r
168# insert separator before things that turn into s\r
169[Pp] { } [ςσΣϷϸϺϻ] > \' ; \r
170\r
171# special S variants\r
172\r
173Ϸ <> Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L \r
174ϸ <> š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L \r
175Ϻ <> Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L \r
176ϻ <> ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L \r
177\r
178# Caron means exception\r
179\r
180# before a letter, initial\r
181ς } $beforeLetter <> s $under } $beforeLetter;\r
182σ } $beforeLetter <> s } $beforeLetter;\r
183\r
184# otherwise, after a letter = final\r
185$afterLetter { σ <> $afterLetter { s $under;\r
186$afterLetter { ς <> $afterLetter { s ;\r
187\r
188# otherwise (isolated) = initial\r
189ς <> s $under;\r
190σ <> s ;\r
191\r
192# [Pp] { Σ <> \'S ;\r
193Σ <> S ;\r
194\r
195τ <> t ;\r
196Τ <> T ;\r
197\r
198φ <> f ;\r
199Φ <> F ;\r
200\r
201χ <> ch ;\r
202Χ } $beforeLower <> Ch ;\r
203Χ <> CH ;\r
204\r
205# Completeness for ASCII\r
206\r
207# $ignore = [[:Mark:]''] * ;\r
208\r
209| ch < h ;\r
210| k < c ;\r
211| i < j ;\r
212| k < q ;\r
213| b < u } $vowel ;\r
214| b < w } $vowel ;\r
215| y < u ;\r
216| y < w ;\r
217\r
218| Ch < H ;\r
219| K < C ;\r
220| I < J ;\r
221| K < Q ;\r
222| B < W } $vowel ;\r
223| B < U } $vowel ;\r
224| Y < W ;\r
225| Y < U ;\r
226\r
227# Completeness for Greek\r
228\r
229ϐ > | β ;\r
230ϑ > | θ ;\r
231ϒ > | Υ ;\r
232ϕ > | φ ;\r
233ϖ > | π ;\r
234\r
235ϰ > | κ ;\r
236ϱ > | ρ ;\r
237ϲ > | σ ;\r
238Ϲ > | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL\r
239ϳ > j ;\r
240ϴ > | Θ ;\r
241ϵ > | ε ;\r
242µ > | μ ;\r
243\r
244# delete any trailing ' marks used for roundtripping\r
245\r
246 < [Ππ] { \' } [Ss] ;\r
247 < [Νν] { \' } $egammaLike ;\r
248\r
249::NFC (NFD) ;\r
250\r
251# MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD\r
252:: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ;\r