]>
Commit | Line | Data |
---|---|---|
1 | # © 2016 and later: Unicode, Inc. and others. | |
2 | # License & terms of use: http://www.unicode.org/copyright.html#License | |
3 | # | |
4 | # File: Grek_Latn_UNGEGN.txt | |
5 | # Generated from CLDR | |
6 | # | |
7 | ||
8 | # For modern Greek, based on UNGEGN rules. | |
9 | # Rules are predicated on running NFD first, and NFC afterwards | |
10 | # MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN | |
11 | # WARNING: need to add accents to both filters ### | |
12 | # :: [\u0301\u0304\u0306\u0308;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ\u0300\u0302\u0313-\u0314\u0340\u0342-\u0343\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩϷ-\u07FBϹ] ; | |
13 | :: [[[:Greek:][:Mn:][:Me:]] [\:-;?·;·]] ; | |
14 | ::NFD (NFC) ; | |
15 | # Useful variables | |
16 | $lower = [[:latin:][:greek:] & [:Ll:]] ; | |
17 | $upper = [[:latin:][:greek:] & [:Lu:]] ; | |
18 | $accent = [[:Mn:][:Me:]] ; | |
19 | $macron = \u0304 ; | |
20 | $ddot = \u0308 ; | |
21 | $lcgvowel = [αεηιουω] ; | |
22 | $ucgvowel = [ΑΕΗΙΟΥΩ] ; | |
23 | $gvowel = [$lcgvowel $ucgvowel] ; | |
24 | $lcgvowelC = [$lcgvowel $accent] ; | |
25 | $evowel = [aeiouyAEIOUY]; | |
26 | $vowel = [ $evowel $gvowel] ; | |
27 | $beforeLower = $accent * $lower ; | |
28 | $gammaLike = [ΓΚΞΧγκξχϰ] ; | |
29 | $egammaLike = [GKXCgkxc] ; | |
30 | $smooth = \u0313 ; | |
31 | $rough = \u0314 ; | |
32 | $iotasub = \u0345 ; | |
33 | $softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ; | |
34 | $under = \u0331; | |
35 | $caron = \u030C; | |
36 | $afterLetter = [:L:] [\'$accent]* ; | |
37 | $beforeLetter = [\'$accent]* [:L:] ; | |
38 | # Fix punctuation | |
39 | # preserve orginal | |
40 | \: ↔ \: $under ; | |
41 | \? ↔ \? $under ; | |
42 | \; ↔ \? ; | |
43 | · ↔ \: ; | |
44 | # Fix any ancient characters that creep in | |
45 | \u0342 → \u0301 ; | |
46 | \u0302 → \u0301 ; | |
47 | \u0300 → \u0301 ; | |
48 | $smooth → ; | |
49 | $rough → ; | |
50 | $iotasub → ; | |
51 | ͺ → ; | |
52 | # need to have these up here so the rules don't mask | |
53 | η ↔ i $under ; | |
54 | Η ↔ I $under ; | |
55 | Ψ } $beforeLower ↔ Ps ; | |
56 | Ψ ↔ PS ; | |
57 | ψ ↔ ps ; | |
58 | ω ↔ o $under ; | |
59 | Ω ↔ O $under; | |
60 | # at begining or end of word, convert mp to b | |
61 | [^[:L:]$accent] { μπ → b ; | |
62 | μπ } [^[:L:]$accent] → b ; | |
63 | [^[:L:]$accent] { [Μμ][Ππ] → B ; | |
64 | [Μμ][Ππ] } [^[:L:]$accent] → B ; | |
65 | μπ ← b ; | |
66 | Μπ ← B } $beforeLower ; | |
67 | ΜΠ ← B ; | |
68 | # handle diphthongs ending with upsilon | |
69 | ου ↔ ou ; | |
70 | ΟΥ ↔ OU ; | |
71 | Ου ↔ Ou ; | |
72 | οΥ ↔ oU ; | |
73 | $fmaker = [aeiAEI] $under ? ; | |
74 | $shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate | |
75 | $fmaker { υ ( $shiftForwardVowels )* } $softener → $1 v $under ; | |
76 | υ $1 ← ( $shiftForwardVowels )* v $under ; | |
77 | $fmaker { υ ( $shiftForwardVowels )* } → $1 f $under; | |
78 | υ $1 ← ( $shiftForwardVowels )* f $under ; | |
79 | $fmaker { Υ } $softener ↔ V $under ; | |
80 | $fmaker { Υ ↔ U $under ; | |
81 | υ ↔ y ; | |
82 | Υ ↔ Y ; | |
83 | # NORMAL | |
84 | α ↔ a ; | |
85 | Α ↔ A ; | |
86 | β ↔ v ; | |
87 | Β ↔ V ; | |
88 | γ } $gammaLike ↔ n } $egammaLike ; | |
89 | γ ↔ g ; | |
90 | Γ } $gammaLike ↔ N } $egammaLike ; | |
91 | Γ ↔ G ; | |
92 | δ ↔ d ; | |
93 | Δ ↔ D ; | |
94 | ε ↔ e ; | |
95 | Ε ↔ E ; | |
96 | ζ ↔ z ; | |
97 | Ζ ↔ Z ; | |
98 | θ ↔ th ; | |
99 | Θ } $beforeLower ↔ Th ; | |
100 | Θ ↔ TH ; | |
101 | ι ↔ i ; | |
102 | Ι ↔ I ; | |
103 | κ ↔ k ; | |
104 | Κ ↔ K ; | |
105 | λ ↔ l ; | |
106 | Λ ↔ L ; | |
107 | μ ↔ m ; | |
108 | Μ ↔ M ; | |
109 | ν } $gammaLike → n\' ; | |
110 | ν ↔ n ; | |
111 | Ν } $gammaLike ↔ N\' ; | |
112 | Ν ↔ N ; | |
113 | ξ ↔ x ; | |
114 | Ξ ↔ X ; | |
115 | ο ↔ o ; | |
116 | Ο ↔ O ; | |
117 | π ↔ p ; | |
118 | Π ↔ P ; | |
119 | ρ ↔ r ; | |
120 | Ρ ↔ R ; | |
121 | # insert separator before things that turn into s | |
122 | [Pp] { } [ςσΣϷϸϺϻ] → \' ; | |
123 | # special S variants | |
124 | Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L | |
125 | ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L | |
126 | Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L | |
127 | ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L | |
128 | # Caron means exception | |
129 | # before a letter, initial | |
130 | ς } $beforeLetter ↔ s $under } $beforeLetter; | |
131 | σ } $beforeLetter ↔ s } $beforeLetter; | |
132 | # otherwise, after a letter = final | |
133 | $afterLetter { σ ↔ $afterLetter { s $under; | |
134 | $afterLetter { ς ↔ $afterLetter { s ; | |
135 | # otherwise (isolated) = initial | |
136 | ς ↔ s $under; | |
137 | σ ↔ s ; | |
138 | # [Pp] { Σ ↔ \'S ; | |
139 | Σ ↔ S ; | |
140 | τ ↔ t ; | |
141 | Τ ↔ T ; | |
142 | φ ↔ f ; | |
143 | Φ ↔ F ; | |
144 | χ ↔ ch ; | |
145 | Χ } $beforeLower ↔ Ch ; | |
146 | Χ ↔ CH ; | |
147 | # Completeness for ASCII | |
148 | # $ignore = [[:Mark:]''] * ; | |
149 | | ch ← h ; | |
150 | | k ← c ; | |
151 | | i ← j ; | |
152 | | k ← q ; | |
153 | | b ← u } $vowel ; | |
154 | | b ← w } $vowel ; | |
155 | | y ← u ; | |
156 | | y ← w ; | |
157 | | Ch ← H ; | |
158 | | K ← C ; | |
159 | | I ← J ; | |
160 | | K ← Q ; | |
161 | | B ← W } $vowel ; | |
162 | | B ← U } $vowel ; | |
163 | | Y ← W ; | |
164 | | Y ← U ; | |
165 | # Completeness for Greek | |
166 | ϐ → | β ; | |
167 | ϑ → | θ ; | |
168 | ϒ → | Υ ; | |
169 | ϕ → | φ ; | |
170 | ϖ → | π ; | |
171 | ϰ → | κ ; | |
172 | ϱ → | ρ ; | |
173 | ϲ → | σ ; | |
174 | Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL | |
175 | ϳ → j ; | |
176 | ϴ → | Θ ; | |
177 | ϵ → | ε ; | |
178 | µ → | μ ; | |
179 | # delete any trailing ' marks used for roundtripping | |
180 | ← [Ππ] { \' } [Ss] ; | |
181 | ← [Νν] { \' } $egammaLike ; | |
182 | ::NFC (NFD) ; | |
183 | # MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD | |
184 | :: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ; | |
185 |