]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/Grek_Latn.txt
ICU-62141.0.1.tar.gz
[apple/icu.git] / icuSources / data / translit / Grek_Latn.txt
1 # © 2016 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html#License
3 #
4 # File: Grek_Latn.txt
5 # Generated from CLDR
6 #
7
8 # Rules are predicated on running NFD first, and NFC afterwards
9 # :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:nonspacing mark:]] ;
10 # MINIMAL FILTER GENERATED FOR: Greek-Latin
11 :: [;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ\u0304\u0308\u0313-\u0314\u0342-\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ;
12 :: NFD (NFC) ;
13 # TEST CASES
14 # Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος
15 # ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ
16 # ᾳ ῃ ῳ ὃ ὄ
17 # ὠς ὡς ὢς ὣς
18 # Ὠς Ὡς Ὢς Ὣς
19 # ὨΣ ὩΣ ὪΣ ὫΣ
20 # Ạ, ạ, Ẹ, ẹ, Ọ, ọ
21 # Useful variables
22 $lower = [[:latin:][:greek:] & [:Ll:]];
23 $glower = [[:greek:] & [:Ll:]];
24 $upper = [[:latin:][:greek:] & [:Lu:]] ;
25 $accent = [:M:] ;
26 # NOTE: restrict to just the Greek & Latin accents that we care about
27 # TODO: broaden out once interation is fixed
28 $accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ;
29 $macron = \u0304 ;
30 $ddot = \u0308 ;
31 $ddotmac = [$ddot$macron];
32 $lcgvowel = [αεηιουω] ;
33 $ucgvowel = [ΑΕΗΙΟΥΩ] ;
34 $gvowel = [$lcgvowel $ucgvowel] ;
35 $lcgvowelC = [$lcgvowel $accent] ;
36 $evowel = [aeiouyAEIOUY];
37 $evowel2 = [iuyIUY];
38 $vowel = [ $evowel $gvowel] ;
39 $gammaLike = [ΓΚΞΧγκξχϰ] ;
40 $egammaLike = [GKXCgkxc] ;
41 $smooth = \u0313 ;
42 $rough = \u0314 ;
43 $iotasub = \u0345 ;
44 $evowel_i = [$evowel-[iI]] ;
45 $evowel2_i = [uyUY];
46 $underbar = \u0331;
47 $afterLetter = [:L:] [[:M:]\']* ;
48 $beforeLetter = [[:M:]\']* [:L:] ;
49 $beforeLower = $accent * $lower ;
50 $notLetter = [^[:L:][:M:]] ;
51 $under = \u0331;
52 # Fix punctuation
53 # preserve original
54 \: ↔ \: $under ;
55 \? ↔ \? $under ;
56 \; ↔ \? ;
57 · ↔ \: ;
58 # CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
59 \u0342 ↔ \u0302 ;
60 # IOTA: convert iota subscript to iota
61 # first make previous alpha long!
62 $accent_minus = [[$accent]-[$iotasub$macron]];
63 Α } $accent_minus * $iotasub → | Α $macron ;
64 α } $accent_minus * $iotasub → | α $macron ;
65 # now convert to uppercase if after uppercase, ow to lowercase
66 $upper $accent * { $iotasub → I ;
67 $iotasub → i ;
68 | $1 $iotasub ← ($evowel $macron $accentMinus *) i ;
69 | $1 $iotasub ← ($evowel $macron $accentMinus *) I ;
70 # BREATHING
71 # Convert rough breathing to h, and move before letters.
72 # Make A ` x = → H a x
73 Α ($macron?) $rough } $beforeLower → H | α $1;
74 Ε $rough } $beforeLower → H | ε;
75 Η $rough } $beforeLower → H | η ;
76 Ι ($ddot?) $rough } $beforeLower → H | ι $1;
77 Ο $rough } $beforeLower → H | ο ;
78 Υ $rough } $beforeLower → H | υ ;
79 Ω ($ddot?) $rough } $beforeLower → H | ω $1;
80 # Make A x ` = → H a x
81 Α ($glower $macron?) $rough → H | α $1 ;
82 Ε ($glower) $rough → H | ε $1 ;
83 Η ($glower) $rough → H | η $1 ;
84 Ι ($glower $ddot?) $rough → H | ι $1 ;
85 Ο ($glower) $rough → H | ο $1 ;
86 Υ ($glower) $rough → H | υ $1 ;
87 Ω ($glower $ddot?) $rough → H | ω $1 ;
88 #Otherwise, make x ` into h x and X ` into H X
89 ($lcgvowel + $ddotmac? ) $rough → h | $1 ;
90 ($gvowel + $ddotmac? ) $rough → H | $1 ;
91 # Go backwards with H
92 | $1 $rough ← h ($evowel $macron $ddot? $evowel2_i $macron?) ;
93 | $1 $rough ← h ($evowel $ddot? $evowel2 $macron?) ;
94 | $1 $rough ← h ($evowel $macron? $ddot?) ;
95 | $1 $rough ← H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;
96 | $1 $rough ← H ([AEIOUY] $ddot? $evowel2 $macron?) ;
97 | $1 $rough ← H ([AEIOUY] $macron? $ddot?) ;
98 # titlecase, have to fix individually
99 # in the future, we should add &uppercase() to make this easier
100 | A $1 $rough ← H a ($macron $ddot? $evowel2_i $macron?) ;
101 | E $1 $rough ← H e ($macron $ddot? $evowel2_i $macron?) ;
102 | I $1 $rough ← H i ($macron $ddot? $evowel2_i $macron?) ;
103 | O $1 $rough ← H o ($macron $ddot? $evowel2_i $macron?) ;
104 | U $1 $rough ← H u ($macron $ddot? $evowel2_i $macron?) ;
105 | Y $1 $rough ← H y ($macron $ddot? $evowel2_i $macron?) ;
106 | A $1 $rough ← H a ($ddot? $evowel2 $macron?) ;
107 | E $1 $rough ← H e ($ddot? $evowel2 $macron?) ;
108 | I $1 $rough ← H i ($ddot? $evowel2 $macron?) ;
109 | O $1 $rough ← H o ($ddot? $evowel2 $macron?) ;
110 | U $1 $rough ← H u ($ddot? $evowel2 $macron?) ;
111 | Y $1 $rough ← H y ($ddot? $evowel2 $macron?) ;
112 | A $1 $rough ← H a ($macron? $ddot? ) ;
113 | E $1 $rough ← H e ($macron? $ddot? ) ;
114 | I $1 $rough ← H i ($macron? $ddot? ) ;
115 | O $1 $rough ← H o ($macron? $ddot? ) ;
116 | U $1 $rough ← H u ($macron? $ddot? ) ;
117 | Y $1 $rough ← H y ($macron? $ddot? ) ;
118 # Now do smooth
119 #delete smooth breathing for Latin
120 $smooth → ;
121 # insert in Greek
122 # the assumption is that all Marks are on letters.
123 | $1 $smooth ← $notLetter { ([rR]) } [^hH$smooth$rough] ;
124 | $1 $smooth ← $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;
125 | $1 $smooth ← $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;
126 # TODO: preserve smooth/rough breathing if not
127 # on initial vowel sequence
128 # need to have these up here so the rules don't mask
129 # remove now superfluous macron when returning
130 Α ← A $macron ;
131 α ← a $macron ;
132 η ↔ e $macron ;
133 Η ↔ E $macron ;
134 φ ↔ ph ;
135 Ψ } $beforeLower ↔ Ps ;
136 Ψ ↔ PS ;
137 Φ } $beforeLower ↔ Ph ;
138 Φ ↔ PH ;
139 ψ ↔ ps ;
140 ω ↔ o $macron ;
141 Ω ↔ O $macron;
142 # NORMAL
143 α ↔ a ;
144 Α ↔ A ;
145 β ↔ b ;
146 Β ↔ B ;
147 γ } $gammaLike ↔ n } $egammaLike ;
148 γ ↔ g ;
149 Γ } $gammaLike ↔ N } $egammaLike ;
150 Γ ↔ G ;
151 δ ↔ d ;
152 Δ ↔ D ;
153 ε ↔ e ;
154 Ε ↔ E ;
155 ζ ↔ z ;
156 Ζ ↔ Z ;
157 θ ↔ th ;
158 Θ } $beforeLower ↔ Th ;
159 Θ ↔ TH ;
160 ι ↔ i ;
161 Ι ↔ I ;
162 κ ↔ k ;
163 Κ ↔ K ;
164 λ ↔ l ;
165 Λ ↔ L ;
166 μ ↔ m ;
167 Μ ↔ M ;
168 ν } $gammaLike → n\' ;
169 ν ↔ n ;
170 Ν } $gammaLike ↔ N\' ;
171 Ν ↔ N ;
172 ξ ↔ x ;
173 Ξ ↔ X ;
174 ο ↔ o ;
175 Ο ↔ O ;
176 π ↔ p ;
177 Π ↔ P ;
178 ρ $rough ↔ rh;
179 Ρ $rough } $beforeLower ↔ Rh ;
180 Ρ $rough ↔ RH ;
181 ρ ↔ r ;
182 Ρ ↔ R ;
183 # insert separator before things that turn into s
184 [Pp] { } [ςσΣϷϸϺϻ] → \' ;
185 # special S variants
186 Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
187 ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
188 Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
189 ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
190 # underbar means exception
191 # before a letter, initial
192 ς } $beforeLetter ↔ s $underbar } $beforeLetter;
193 σ } $beforeLetter ↔ s } $beforeLetter;
194 # otherwise, after a letter = final
195 $afterLetter { σ ↔ $afterLetter { s $underbar;
196 $afterLetter { ς ↔ $afterLetter { s ;
197 # otherwise (isolated) = initial
198 ς ↔ s $underbar;
199 σ ↔ s ;
200 # [Pp] { Σ ↔ \'S ;
201 Σ ↔ S ;
202 τ ↔ t ;
203 Τ ↔ T ;
204 $vowel {υ } ↔ u ;
205 υ ↔ y ;
206 $vowel { Υ ↔ U ;
207 Υ ↔ Y ;
208 χ ↔ ch ;
209 Χ } $beforeLower ↔ Ch ;
210 Χ ↔ CH ;
211 # Completeness for ASCII
212 $ignore = [[:Mark:]''] * ;
213 | k ← c ;
214 | ph ← f ;
215 | i ← j ;
216 | k ← q ;
217 | b ← v } $vowel ;
218 | b ← w } $vowel;
219 | u ← v ;
220 | u ← w;
221 | K ← C ;
222 | Ph ← F ;
223 | I ← J ;
224 | K ← Q ;
225 | B ← V } $vowel ;
226 | B ← W } $vowel ;
227 | U ← V ;
228 | U ← W ;
229 $rough } $ignore [:UppercaseLetter:] → H ;
230 $ignore [:UppercaseLetter:] { $rough → H ;
231 $rough ← H ;
232 $rough ↔ h ;
233 # Completeness for Greek
234 ϐ → | β ;
235 ϑ → | θ ;
236 ϒ → | Υ ;
237 ϕ → | φ ;
238 ϖ → | π ;
239 ϰ → | κ ;
240 ϱ → | ρ ;
241 ϲ → | σ ;
242 Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
243 ϳ → j ;
244 ϴ → | Θ ;
245 ϵ → | ε ;
246 µ → | μ ;
247 ͺ → i;
248 # delete any trailing ' marks used for roundtripping
249 ← [Ππ] { \' } [Ss] ;
250 ← [Νν] { \' } $egammaLike ;
251 ::NFC (NFD) ;
252 # ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;
253 # ([\u0000-\u007F · [:Latin:] [:nonspacing mark:]]) ;
254 # MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
255 :: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0300-\u0337\u0339-\u0345΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ;
256