]>
Commit | Line | Data |
---|---|---|
1 | # © 2016 and later: Unicode, Inc. and others. | |
2 | # License & terms of use: http://www.unicode.org/copyright.html#License | |
3 | # | |
4 | # File: Grek_Latn.txt | |
5 | # Generated from CLDR | |
6 | # | |
7 | ||
8 | # Rules are predicated on running NFD first, and NFC afterwards | |
9 | # :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:nonspacing mark:]] ; | |
10 | # MINIMAL FILTER GENERATED FOR: Greek-Latin | |
11 | :: [;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ\u0304\u0308\u0313-\u0314\u0342-\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ; | |
12 | :: NFD (NFC) ; | |
13 | # TEST CASES | |
14 | # Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος | |
15 | # ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ | |
16 | # ᾳ ῃ ῳ ὃ ὄ | |
17 | # ὠς ὡς ὢς ὣς | |
18 | # Ὠς Ὡς Ὢς Ὣς | |
19 | # ὨΣ ὩΣ ὪΣ ὫΣ | |
20 | # Ạ, ạ, Ẹ, ẹ, Ọ, ọ | |
21 | # Useful variables | |
22 | $lower = [[:latin:][:greek:] & [:Ll:]]; | |
23 | $glower = [[:greek:] & [:Ll:]]; | |
24 | $upper = [[:latin:][:greek:] & [:Lu:]] ; | |
25 | $accent = [:M:] ; | |
26 | # NOTE: restrict to just the Greek & Latin accents that we care about | |
27 | # TODO: broaden out once interation is fixed | |
28 | $accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ; | |
29 | $macron = \u0304 ; | |
30 | $ddot = \u0308 ; | |
31 | $ddotmac = [$ddot$macron]; | |
32 | $lcgvowel = [αεηιουω] ; | |
33 | $ucgvowel = [ΑΕΗΙΟΥΩ] ; | |
34 | $gvowel = [$lcgvowel $ucgvowel] ; | |
35 | $lcgvowelC = [$lcgvowel $accent] ; | |
36 | $evowel = [aeiouyAEIOUY]; | |
37 | $evowel2 = [iuyIUY]; | |
38 | $vowel = [ $evowel $gvowel] ; | |
39 | $gammaLike = [ΓΚΞΧγκξχϰ] ; | |
40 | $egammaLike = [GKXCgkxc] ; | |
41 | $smooth = \u0313 ; | |
42 | $rough = \u0314 ; | |
43 | $iotasub = \u0345 ; | |
44 | $evowel_i = [$evowel-[iI]] ; | |
45 | $evowel2_i = [uyUY]; | |
46 | $underbar = \u0331; | |
47 | $afterLetter = [:L:] [[:M:]\']* ; | |
48 | $beforeLetter = [[:M:]\']* [:L:] ; | |
49 | $beforeLower = $accent * $lower ; | |
50 | $notLetter = [^[:L:][:M:]] ; | |
51 | $under = \u0331; | |
52 | # Fix punctuation | |
53 | # preserve original | |
54 | \: ↔ \: $under ; | |
55 | \? ↔ \? $under ; | |
56 | \; ↔ \? ; | |
57 | · ↔ \: ; | |
58 | # CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve | |
59 | \u0342 ↔ \u0302 ; | |
60 | # IOTA: convert iota subscript to iota | |
61 | # first make previous alpha long! | |
62 | $accent_minus = [[$accent]-[$iotasub$macron]]; | |
63 | Α } $accent_minus * $iotasub → | Α $macron ; | |
64 | α } $accent_minus * $iotasub → | α $macron ; | |
65 | # now convert to uppercase if after uppercase, ow to lowercase | |
66 | $upper $accent * { $iotasub → I ; | |
67 | $iotasub → i ; | |
68 | | $1 $iotasub ← ($evowel $macron $accentMinus *) i ; | |
69 | | $1 $iotasub ← ($evowel $macron $accentMinus *) I ; | |
70 | # BREATHING | |
71 | # Convert rough breathing to h, and move before letters. | |
72 | # Make A ` x = → H a x | |
73 | Α ($macron?) $rough } $beforeLower → H | α $1; | |
74 | Ε $rough } $beforeLower → H | ε; | |
75 | Η $rough } $beforeLower → H | η ; | |
76 | Ι ($ddot?) $rough } $beforeLower → H | ι $1; | |
77 | Ο $rough } $beforeLower → H | ο ; | |
78 | Υ $rough } $beforeLower → H | υ ; | |
79 | Ω ($ddot?) $rough } $beforeLower → H | ω $1; | |
80 | # Make A x ` = → H a x | |
81 | Α ($glower $macron?) $rough → H | α $1 ; | |
82 | Ε ($glower) $rough → H | ε $1 ; | |
83 | Η ($glower) $rough → H | η $1 ; | |
84 | Ι ($glower $ddot?) $rough → H | ι $1 ; | |
85 | Ο ($glower) $rough → H | ο $1 ; | |
86 | Υ ($glower) $rough → H | υ $1 ; | |
87 | Ω ($glower $ddot?) $rough → H | ω $1 ; | |
88 | #Otherwise, make x ` into h x and X ` into H X | |
89 | ($lcgvowel + $ddotmac? ) $rough → h | $1 ; | |
90 | ($gvowel + $ddotmac? ) $rough → H | $1 ; | |
91 | # Go backwards with H | |
92 | | $1 $rough ← h ($evowel $macron $ddot? $evowel2_i $macron?) ; | |
93 | | $1 $rough ← h ($evowel $ddot? $evowel2 $macron?) ; | |
94 | | $1 $rough ← h ($evowel $macron? $ddot?) ; | |
95 | | $1 $rough ← H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ; | |
96 | | $1 $rough ← H ([AEIOUY] $ddot? $evowel2 $macron?) ; | |
97 | | $1 $rough ← H ([AEIOUY] $macron? $ddot?) ; | |
98 | # titlecase, have to fix individually | |
99 | # in the future, we should add &uppercase() to make this easier | |
100 | | A $1 $rough ← H a ($macron $ddot? $evowel2_i $macron?) ; | |
101 | | E $1 $rough ← H e ($macron $ddot? $evowel2_i $macron?) ; | |
102 | | I $1 $rough ← H i ($macron $ddot? $evowel2_i $macron?) ; | |
103 | | O $1 $rough ← H o ($macron $ddot? $evowel2_i $macron?) ; | |
104 | | U $1 $rough ← H u ($macron $ddot? $evowel2_i $macron?) ; | |
105 | | Y $1 $rough ← H y ($macron $ddot? $evowel2_i $macron?) ; | |
106 | | A $1 $rough ← H a ($ddot? $evowel2 $macron?) ; | |
107 | | E $1 $rough ← H e ($ddot? $evowel2 $macron?) ; | |
108 | | I $1 $rough ← H i ($ddot? $evowel2 $macron?) ; | |
109 | | O $1 $rough ← H o ($ddot? $evowel2 $macron?) ; | |
110 | | U $1 $rough ← H u ($ddot? $evowel2 $macron?) ; | |
111 | | Y $1 $rough ← H y ($ddot? $evowel2 $macron?) ; | |
112 | | A $1 $rough ← H a ($macron? $ddot? ) ; | |
113 | | E $1 $rough ← H e ($macron? $ddot? ) ; | |
114 | | I $1 $rough ← H i ($macron? $ddot? ) ; | |
115 | | O $1 $rough ← H o ($macron? $ddot? ) ; | |
116 | | U $1 $rough ← H u ($macron? $ddot? ) ; | |
117 | | Y $1 $rough ← H y ($macron? $ddot? ) ; | |
118 | # Now do smooth | |
119 | #delete smooth breathing for Latin | |
120 | $smooth → ; | |
121 | # insert in Greek | |
122 | # the assumption is that all Marks are on letters. | |
123 | | $1 $smooth ← $notLetter { ([rR]) } [^hH$smooth$rough] ; | |
124 | | $1 $smooth ← $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ; | |
125 | | $1 $smooth ← $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ; | |
126 | # TODO: preserve smooth/rough breathing if not | |
127 | # on initial vowel sequence | |
128 | # need to have these up here so the rules don't mask | |
129 | # remove now superfluous macron when returning | |
130 | Α ← A $macron ; | |
131 | α ← a $macron ; | |
132 | η ↔ e $macron ; | |
133 | Η ↔ E $macron ; | |
134 | φ ↔ ph ; | |
135 | Ψ } $beforeLower ↔ Ps ; | |
136 | Ψ ↔ PS ; | |
137 | Φ } $beforeLower ↔ Ph ; | |
138 | Φ ↔ PH ; | |
139 | ψ ↔ ps ; | |
140 | ω ↔ o $macron ; | |
141 | Ω ↔ O $macron; | |
142 | # NORMAL | |
143 | α ↔ a ; | |
144 | Α ↔ A ; | |
145 | β ↔ b ; | |
146 | Β ↔ B ; | |
147 | γ } $gammaLike ↔ n } $egammaLike ; | |
148 | γ ↔ g ; | |
149 | Γ } $gammaLike ↔ N } $egammaLike ; | |
150 | Γ ↔ G ; | |
151 | δ ↔ d ; | |
152 | Δ ↔ D ; | |
153 | ε ↔ e ; | |
154 | Ε ↔ E ; | |
155 | ζ ↔ z ; | |
156 | Ζ ↔ Z ; | |
157 | θ ↔ th ; | |
158 | Θ } $beforeLower ↔ Th ; | |
159 | Θ ↔ TH ; | |
160 | ι ↔ i ; | |
161 | Ι ↔ I ; | |
162 | κ ↔ k ; | |
163 | Κ ↔ K ; | |
164 | λ ↔ l ; | |
165 | Λ ↔ L ; | |
166 | μ ↔ m ; | |
167 | Μ ↔ M ; | |
168 | ν } $gammaLike → n\' ; | |
169 | ν ↔ n ; | |
170 | Ν } $gammaLike ↔ N\' ; | |
171 | Ν ↔ N ; | |
172 | ξ ↔ x ; | |
173 | Ξ ↔ X ; | |
174 | ο ↔ o ; | |
175 | Ο ↔ O ; | |
176 | π ↔ p ; | |
177 | Π ↔ P ; | |
178 | ρ $rough ↔ rh; | |
179 | Ρ $rough } $beforeLower ↔ Rh ; | |
180 | Ρ $rough ↔ RH ; | |
181 | ρ ↔ r ; | |
182 | Ρ ↔ R ; | |
183 | # insert separator before things that turn into s | |
184 | [Pp] { } [ςσΣϷϸϺϻ] → \' ; | |
185 | # special S variants | |
186 | Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L | |
187 | ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L | |
188 | Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L | |
189 | ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L | |
190 | # underbar means exception | |
191 | # before a letter, initial | |
192 | ς } $beforeLetter ↔ s $underbar } $beforeLetter; | |
193 | σ } $beforeLetter ↔ s } $beforeLetter; | |
194 | # otherwise, after a letter = final | |
195 | $afterLetter { σ ↔ $afterLetter { s $underbar; | |
196 | $afterLetter { ς ↔ $afterLetter { s ; | |
197 | # otherwise (isolated) = initial | |
198 | ς ↔ s $underbar; | |
199 | σ ↔ s ; | |
200 | # [Pp] { Σ ↔ \'S ; | |
201 | Σ ↔ S ; | |
202 | τ ↔ t ; | |
203 | Τ ↔ T ; | |
204 | $vowel {υ } ↔ u ; | |
205 | υ ↔ y ; | |
206 | $vowel { Υ ↔ U ; | |
207 | Υ ↔ Y ; | |
208 | χ ↔ ch ; | |
209 | Χ } $beforeLower ↔ Ch ; | |
210 | Χ ↔ CH ; | |
211 | # Completeness for ASCII | |
212 | $ignore = [[:Mark:]''] * ; | |
213 | | k ← c ; | |
214 | | ph ← f ; | |
215 | | i ← j ; | |
216 | | k ← q ; | |
217 | | b ← v } $vowel ; | |
218 | | b ← w } $vowel; | |
219 | | u ← v ; | |
220 | | u ← w; | |
221 | | K ← C ; | |
222 | | Ph ← F ; | |
223 | | I ← J ; | |
224 | | K ← Q ; | |
225 | | B ← V } $vowel ; | |
226 | | B ← W } $vowel ; | |
227 | | U ← V ; | |
228 | | U ← W ; | |
229 | $rough } $ignore [:UppercaseLetter:] → H ; | |
230 | $ignore [:UppercaseLetter:] { $rough → H ; | |
231 | $rough ← H ; | |
232 | $rough ↔ h ; | |
233 | # Completeness for Greek | |
234 | ϐ → | β ; | |
235 | ϑ → | θ ; | |
236 | ϒ → | Υ ; | |
237 | ϕ → | φ ; | |
238 | ϖ → | π ; | |
239 | ϰ → | κ ; | |
240 | ϱ → | ρ ; | |
241 | ϲ → | σ ; | |
242 | Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL | |
243 | ϳ → j ; | |
244 | ϴ → | Θ ; | |
245 | ϵ → | ε ; | |
246 | µ → | μ ; | |
247 | ͺ → i; | |
248 | # delete any trailing ' marks used for roundtripping | |
249 | ← [Ππ] { \' } [Ss] ; | |
250 | ← [Νν] { \' } $egammaLike ; | |
251 | ::NFC (NFD) ; | |
252 | # ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ; | |
253 | # ([\u0000-\u007F · [:Latin:] [:nonspacing mark:]]) ; | |
254 | # MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD | |
255 | :: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0300-\u0337\u0339-\u0345΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ; | |
256 |