]>
Commit | Line | Data |
---|---|---|
374ca955 A |
1 | #--------------------------------------------------------------------\r |
2 | # Copyright (c) 1999-2004, International Business Machines\r | |
3 | # Corporation and others. All Rights Reserved.\r | |
4 | #--------------------------------------------------------------------\r | |
5 | # For modern Greek, based on UNGEGN rules.\r | |
6 | \r | |
7 | # Rules are predicated on running NFD first, and NFC afterwards\r | |
8 | # MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN\r | |
9 | # WARNING: need to add accents to both filters ###\r | |
10 | # :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ\u03F7-\u07FB\u03F9] ;\r | |
11 | \r | |
12 | :: [[[:Greek:][:Mn:][:Me:]] [\:-;?\u00B7\u037E\u0387]] ;\r | |
13 | ::NFD (NFC) ;\r | |
14 | \r | |
15 | # Useful variables\r | |
16 | \r | |
17 | $lower = [[:latin:][:greek:] & [:Ll:]] ;\r | |
18 | $upper = [[:latin:][:greek:] & [:Lu:]] ;\r | |
19 | $accent = [[:Mn:][:Me:]] ;\r | |
20 | \r | |
21 | $macron = ̄ ;\r | |
22 | $ddot = ̈ ;\r | |
23 | \r | |
24 | $lcgvowel = [αεηιουω] ;\r | |
25 | $ucgvowel = [ΑΕΗΙΟΥΩ] ;\r | |
26 | $gvowel = [$lcgvowel $ucgvowel] ;\r | |
27 | $lcgvowelC = [$lcgvowel $accent] ;\r | |
28 | \r | |
29 | $evowel = [aeiouyAEIOUY];\r | |
30 | $vowel = [ $evowel $gvowel] ;\r | |
31 | \r | |
32 | $beforeLower = $accent * $lower ;\r | |
33 | \r | |
34 | $gammaLike = [ΓΚΞΧγκξχϰ] ;\r | |
35 | $egammaLike = [GKXCgkxc] ;\r | |
36 | $smooth = ̓ ;\r | |
37 | $rough = ̔ ;\r | |
38 | $iotasub = ͅ ;\r | |
39 | \r | |
40 | $softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;\r | |
41 | \r | |
42 | $under = ̱;\r | |
43 | \r | |
44 | $caron = ̌;\r | |
45 | \r | |
46 | $afterLetter = [:L:] [\'$accent]* ;\r | |
47 | $beforeLetter = [\'$accent]* [:L:] ;\r | |
48 | \r | |
49 | # Fix punctuation\r | |
50 | \r | |
51 | # preserve orginal\r | |
52 | \: <> \: $under ;\r | |
53 | \? <> \? $under ;\r | |
54 | \r | |
55 | \; <> \? ;\r | |
56 | · <> \: ;\r | |
57 | \r | |
58 | # Fix any ancient characters that creep in\r | |
59 | \r | |
60 | ͂ > ́ ;\r | |
61 | ̂ > ́ ;\r | |
62 | ̀ > ́ ;\r | |
63 | $smooth > ;\r | |
64 | $rough > ;\r | |
65 | $iotasub > ;\r | |
66 | ͺ > ;\r | |
67 | \r | |
68 | # need to have these up here so the rules don't mask\r | |
69 | \r | |
70 | η <> i $under ;\r | |
71 | Η <> I $under ;\r | |
72 | \r | |
73 | Ψ } $beforeLower <> Ps ;\r | |
74 | Ψ <> PS ;\r | |
75 | ψ <> ps ;\r | |
76 | \r | |
77 | ω <> o $under ;\r | |
78 | Ω <> O $under;\r | |
79 | \r | |
80 | # at begining or end of word, convert mp to b\r | |
81 | \r | |
82 | [^[:L:]$accent] { μπ > b ;\r | |
83 | μπ } [^[:L:]$accent] > b ;\r | |
84 | [^[:L:]$accent] { [Μμ][Ππ] > B ;\r | |
85 | [Μμ][Ππ] } [^[:L:]$accent] > B ;\r | |
86 | \r | |
87 | μπ < b ;\r | |
88 | Μπ < B } $beforeLower ;\r | |
89 | ΜΠ < B ;\r | |
90 | \r | |
91 | # handle diphthongs ending with upsilon\r | |
92 | \r | |
93 | ου <> ou ;\r | |
94 | ΟΥ <> OU ;\r | |
95 | Ου <> Ou ;\r | |
96 | οΥ <> oU ;\r | |
97 | \r | |
98 | $fmaker = [aeiAEI] $under ? ;\r | |
99 | $shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate\r | |
100 | \r | |
101 | $fmaker { υ ( $shiftForwardVowels )* } $softener > $1 v $under ;\r | |
102 | υ $1 < ( $shiftForwardVowels )* v $under ;\r | |
103 | \r | |
104 | $fmaker { υ ( $shiftForwardVowels )* } > $1 f $under;\r | |
105 | υ $1 < ( $shiftForwardVowels )* f $under ;\r | |
106 | \r | |
107 | $fmaker { Υ } $softener <> V $under ;\r | |
108 | $fmaker { Υ <> U $under ;\r | |
109 | \r | |
110 | υ <> y ;\r | |
111 | Υ <> Y ;\r | |
112 | \r | |
113 | # NORMAL\r | |
114 | \r | |
115 | α <> a ;\r | |
116 | Α <> A ;\r | |
117 | \r | |
118 | β <> v ;\r | |
119 | Β <> V ;\r | |
120 | \r | |
121 | γ } $gammaLike <> n } $egammaLike ;\r | |
122 | γ <> g ;\r | |
123 | Γ } $gammaLike <> N } $egammaLike ;\r | |
124 | Γ <> G ;\r | |
125 | \r | |
126 | δ <> d ;\r | |
127 | Δ <> D ;\r | |
128 | \r | |
129 | ε <> e ;\r | |
130 | Ε <> E ;\r | |
131 | \r | |
132 | ζ <> z ;\r | |
133 | Ζ <> Z ;\r | |
134 | \r | |
135 | θ <> th ;\r | |
136 | Θ } $beforeLower <> Th ;\r | |
137 | Θ <> TH ;\r | |
138 | \r | |
139 | ι <> i ;\r | |
140 | Ι <> I ;\r | |
141 | \r | |
142 | κ <> k ;\r | |
143 | Κ <> K ;\r | |
144 | \r | |
145 | λ <> l ;\r | |
146 | Λ <> L ;\r | |
147 | \r | |
148 | μ <> m ;\r | |
149 | Μ <> M ;\r | |
150 | \r | |
151 | ν } $gammaLike > n\' ;\r | |
152 | ν <> n ;\r | |
153 | Ν } $gammaLike <> N\' ;\r | |
154 | Ν <> N ;\r | |
155 | \r | |
156 | ξ <> x ;\r | |
157 | Ξ <> X ;\r | |
158 | \r | |
159 | ο <> o ;\r | |
160 | Ο <> O ;\r | |
161 | \r | |
162 | π <> p ;\r | |
163 | Π <> P ;\r | |
164 | \r | |
165 | ρ <> r ;\r | |
166 | Ρ <> R ;\r | |
167 | \r | |
168 | # insert separator before things that turn into s\r | |
169 | [Pp] { } [ςσΣϷϸϺϻ] > \' ; \r | |
170 | \r | |
171 | # special S variants\r | |
172 | \r | |
173 | Ϸ <> Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L \r | |
174 | ϸ <> š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L \r | |
175 | Ϻ <> Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L \r | |
176 | ϻ <> ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L \r | |
177 | \r | |
178 | # Caron means exception\r | |
179 | \r | |
180 | # before a letter, initial\r | |
181 | ς } $beforeLetter <> s $under } $beforeLetter;\r | |
182 | σ } $beforeLetter <> s } $beforeLetter;\r | |
183 | \r | |
184 | # otherwise, after a letter = final\r | |
185 | $afterLetter { σ <> $afterLetter { s $under;\r | |
186 | $afterLetter { ς <> $afterLetter { s ;\r | |
187 | \r | |
188 | # otherwise (isolated) = initial\r | |
189 | ς <> s $under;\r | |
190 | σ <> s ;\r | |
191 | \r | |
192 | # [Pp] { Σ <> \'S ;\r | |
193 | Σ <> S ;\r | |
194 | \r | |
195 | τ <> t ;\r | |
196 | Τ <> T ;\r | |
197 | \r | |
198 | φ <> f ;\r | |
199 | Φ <> F ;\r | |
200 | \r | |
201 | χ <> ch ;\r | |
202 | Χ } $beforeLower <> Ch ;\r | |
203 | Χ <> CH ;\r | |
204 | \r | |
205 | # Completeness for ASCII\r | |
206 | \r | |
207 | # $ignore = [[:Mark:]''] * ;\r | |
208 | \r | |
209 | | ch < h ;\r | |
210 | | k < c ;\r | |
211 | | i < j ;\r | |
212 | | k < q ;\r | |
213 | | b < u } $vowel ;\r | |
214 | | b < w } $vowel ;\r | |
215 | | y < u ;\r | |
216 | | y < w ;\r | |
217 | \r | |
218 | | Ch < H ;\r | |
219 | | K < C ;\r | |
220 | | I < J ;\r | |
221 | | K < Q ;\r | |
222 | | B < W } $vowel ;\r | |
223 | | B < U } $vowel ;\r | |
224 | | Y < W ;\r | |
225 | | Y < U ;\r | |
226 | \r | |
227 | # Completeness for Greek\r | |
228 | \r | |
229 | ϐ > | β ;\r | |
230 | ϑ > | θ ;\r | |
231 | ϒ > | Υ ;\r | |
232 | ϕ > | φ ;\r | |
233 | ϖ > | π ;\r | |
234 | \r | |
235 | ϰ > | κ ;\r | |
236 | ϱ > | ρ ;\r | |
237 | ϲ > | σ ;\r | |
238 | Ϲ > | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL\r | |
239 | ϳ > j ;\r | |
240 | ϴ > | Θ ;\r | |
241 | ϵ > | ε ;\r | |
242 | µ > | μ ;\r | |
243 | \r | |
244 | # delete any trailing ' marks used for roundtripping\r | |
245 | \r | |
246 | < [Ππ] { \' } [Ss] ;\r | |
247 | < [Νν] { \' } $egammaLike ;\r | |
248 | \r | |
249 | ::NFC (NFD) ;\r | |
250 | \r | |
251 | # MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD\r | |
252 | :: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ;\r |