]> git.saurik.com Git - apple/icu.git/blame - icuSources/data/translit/Grek_Latn.txt
ICU-66108.tar.gz
[apple/icu.git] / icuSources / data / translit / Grek_Latn.txt
CommitLineData
f3c0d7a5
A
1# © 2016 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html#License
3#
2ca993e8 4# File: Grek_Latn.txt
f3c0d7a5 5# Generated from CLDR
73c04bcf 6#
2ca993e8
A
7
8# Rules are predicated on running NFD first, and NFC afterwards
9# :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:nonspacing mark:]] ;
10# MINIMAL FILTER GENERATED FOR: Greek-Latin
73c04bcf
A
11:: [;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ\u0304\u0308\u0313-\u0314\u0342-\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ;
12:: NFD (NFC) ;
2ca993e8
A
13# TEST CASES
14# Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος
15# ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ
16# ᾳ ῃ ῳ ὃ ὄ
17# ὠς ὡς ὢς ὣς
18# Ὠς Ὡς Ὢς Ὣς
19# ὨΣ ὩΣ ὪΣ ὫΣ
20# Ạ, ạ, Ẹ, ẹ, Ọ, ọ
21# Useful variables
73c04bcf
A
22$lower = [[:latin:][:greek:] & [:Ll:]];
23$glower = [[:greek:] & [:Ll:]];
24$upper = [[:latin:][:greek:] & [:Lu:]] ;
374ca955 25$accent = [:M:] ;
2ca993e8
A
26# NOTE: restrict to just the Greek & Latin accents that we care about
27# TODO: broaden out once interation is fixed
374ca955 28$accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ;
374ca955
A
29$macron = \u0304 ;
30$ddot = \u0308 ;
31$ddotmac = [$ddot$macron];
73c04bcf
A
32$lcgvowel = [αεηιουω] ;
33$ucgvowel = [ΑΕΗΙΟΥΩ] ;
34$gvowel = [$lcgvowel $ucgvowel] ;
35$lcgvowelC = [$lcgvowel $accent] ;
374ca955
A
36$evowel = [aeiouyAEIOUY];
37$evowel2 = [iuyIUY];
73c04bcf
A
38$vowel = [ $evowel $gvowel] ;
39$gammaLike = [ΓΚΞΧγκξχϰ] ;
40$egammaLike = [GKXCgkxc] ;
41$smooth = \u0313 ;
42$rough = \u0314 ;
43$iotasub = \u0345 ;
374ca955
A
44$evowel_i = [$evowel-[iI]] ;
45$evowel2_i = [uyUY];
374ca955 46$underbar = \u0331;
374ca955
A
47$afterLetter = [:L:] [[:M:]\']* ;
48$beforeLetter = [[:M:]\']* [:L:] ;
73c04bcf 49$beforeLower = $accent * $lower ;
374ca955 50$notLetter = [^[:L:][:M:]] ;
73c04bcf 51$under = \u0331;
2ca993e8
A
52# Fix punctuation
53# preserve original
729e4ab9
A
54\: ↔ \: $under ;
55\? ↔ \? $under ;
56\; ↔ \? ;
57· ↔ \: ;
2ca993e8 58# CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
729e4ab9 59\u0342 ↔ \u0302 ;
2ca993e8
A
60# IOTA: convert iota subscript to iota
61# first make previous alpha long!
374ca955 62$accent_minus = [[$accent]-[$iotasub$macron]];
729e4ab9
A
63Α } $accent_minus * $iotasub → | Α $macron ;
64α } $accent_minus * $iotasub → | α $macron ;
2ca993e8 65# now convert to uppercase if after uppercase, ow to lowercase
729e4ab9
A
66$upper $accent * { $iotasub → I ;
67$iotasub → i ;
68| $1 $iotasub ← ($evowel $macron $accentMinus *) i ;
69| $1 $iotasub ← ($evowel $macron $accentMinus *) I ;
2ca993e8
A
70# BREATHING
71# Convert rough breathing to h, and move before letters.
72# Make A ` x = → H a x
729e4ab9
A
73Α ($macron?) $rough } $beforeLower → H | α $1;
74Ε $rough } $beforeLower → H | ε;
75Η $rough } $beforeLower → H | η ;
51004dcb 76Ι ($ddot?) $rough } $beforeLower → H | ι $1;
729e4ab9
A
77Ο $rough } $beforeLower → H | ο ;
78Υ $rough } $beforeLower → H | υ ;
79Ω ($ddot?) $rough } $beforeLower → H | ω $1;
2ca993e8 80# Make A x ` = → H a x
729e4ab9
A
81Α ($glower $macron?) $rough → H | α $1 ;
82Ε ($glower) $rough → H | ε $1 ;
83Η ($glower) $rough → H | η $1 ;
84Ι ($glower $ddot?) $rough → H | ι $1 ;
85Ο ($glower) $rough → H | ο $1 ;
86Υ ($glower) $rough → H | υ $1 ;
51004dcb 87Ω ($glower $ddot?) $rough → H | ω $1 ;
2ca993e8 88#Otherwise, make x ` into h x and X ` into H X
729e4ab9
A
89($lcgvowel + $ddotmac? ) $rough → h | $1 ;
90($gvowel + $ddotmac? ) $rough → H | $1 ;
2ca993e8 91# Go backwards with H
729e4ab9
A
92| $1 $rough ← h ($evowel $macron $ddot? $evowel2_i $macron?) ;
93| $1 $rough ← h ($evowel $ddot? $evowel2 $macron?) ;
94| $1 $rough ← h ($evowel $macron? $ddot?) ;
95| $1 $rough ← H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;
96| $1 $rough ← H ([AEIOUY] $ddot? $evowel2 $macron?) ;
97| $1 $rough ← H ([AEIOUY] $macron? $ddot?) ;
2ca993e8
A
98# titlecase, have to fix individually
99# in the future, we should add &uppercase() to make this easier
51004dcb
A
100| A $1 $rough ← H a ($macron $ddot? $evowel2_i $macron?) ;
101| E $1 $rough ← H e ($macron $ddot? $evowel2_i $macron?) ;
102| I $1 $rough ← H i ($macron $ddot? $evowel2_i $macron?) ;
103| O $1 $rough ← H o ($macron $ddot? $evowel2_i $macron?) ;
729e4ab9
A
104| U $1 $rough ← H u ($macron $ddot? $evowel2_i $macron?) ;
105| Y $1 $rough ← H y ($macron $ddot? $evowel2_i $macron?) ;
106| A $1 $rough ← H a ($ddot? $evowel2 $macron?) ;
107| E $1 $rough ← H e ($ddot? $evowel2 $macron?) ;
108| I $1 $rough ← H i ($ddot? $evowel2 $macron?) ;
109| O $1 $rough ← H o ($ddot? $evowel2 $macron?) ;
110| U $1 $rough ← H u ($ddot? $evowel2 $macron?) ;
111| Y $1 $rough ← H y ($ddot? $evowel2 $macron?) ;
112| A $1 $rough ← H a ($macron? $ddot? ) ;
113| E $1 $rough ← H e ($macron? $ddot? ) ;
114| I $1 $rough ← H i ($macron? $ddot? ) ;
115| O $1 $rough ← H o ($macron? $ddot? ) ;
116| U $1 $rough ← H u ($macron? $ddot? ) ;
117| Y $1 $rough ← H y ($macron? $ddot? ) ;
2ca993e8
A
118# Now do smooth
119#delete smooth breathing for Latin
729e4ab9 120$smooth → ;
2ca993e8
A
121# insert in Greek
122# the assumption is that all Marks are on letters.
729e4ab9
A
123| $1 $smooth ← $notLetter { ([rR]) } [^hH$smooth$rough] ;
124| $1 $smooth ← $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;
125| $1 $smooth ← $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;
2ca993e8
A
126# TODO: preserve smooth/rough breathing if not
127# on initial vowel sequence
128# need to have these up here so the rules don't mask
129# remove now superfluous macron when returning
729e4ab9
A
130Α ← A $macron ;
131α ← a $macron ;
132η ↔ e $macron ;
133Η ↔ E $macron ;
134φ ↔ ph ;
135Ψ } $beforeLower ↔ Ps ;
136Ψ ↔ PS ;
137Φ } $beforeLower ↔ Ph ;
138Φ ↔ PH ;
139ψ ↔ ps ;
140ω ↔ o $macron ;
51004dcb 141Ω ↔ O $macron;
2ca993e8 142# NORMAL
729e4ab9
A
143α ↔ a ;
144Α ↔ A ;
145β ↔ b ;
146Β ↔ B ;
147γ } $gammaLike ↔ n } $egammaLike ;
148γ ↔ g ;
149Γ } $gammaLike ↔ N } $egammaLike ;
150Γ ↔ G ;
151δ ↔ d ;
152Δ ↔ D ;
153ε ↔ e ;
154Ε ↔ E ;
155ζ ↔ z ;
156Ζ ↔ Z ;
157θ ↔ th ;
158Θ } $beforeLower ↔ Th ;
159Θ ↔ TH ;
160ι ↔ i ;
161Ι ↔ I ;
162κ ↔ k ;
163Κ ↔ K ;
164λ ↔ l ;
165Λ ↔ L ;
166μ ↔ m ;
167Μ ↔ M ;
168ν } $gammaLike → n\' ;
169ν ↔ n ;
170Ν } $gammaLike ↔ N\' ;
171Ν ↔ N ;
172ξ ↔ x ;
173Ξ ↔ X ;
174ο ↔ o ;
175Ο ↔ O ;
176π ↔ p ;
177Π ↔ P ;
178ρ $rough ↔ rh;
179Ρ $rough } $beforeLower ↔ Rh ;
180Ρ $rough ↔ RH ;
181ρ ↔ r ;
182Ρ ↔ R ;
2ca993e8 183# insert separator before things that turn into s
729e4ab9 184[Pp] { } [ςσΣϷϸϺϻ] → \' ;
2ca993e8 185# special S variants
729e4ab9
A
186Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
187ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
188Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
189ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
2ca993e8
A
190# underbar means exception
191# before a letter, initial
729e4ab9
A
192ς } $beforeLetter ↔ s $underbar } $beforeLetter;
193σ } $beforeLetter ↔ s } $beforeLetter;
2ca993e8 194# otherwise, after a letter = final
729e4ab9
A
195$afterLetter { σ ↔ $afterLetter { s $underbar;
196$afterLetter { ς ↔ $afterLetter { s ;
2ca993e8 197# otherwise (isolated) = initial
729e4ab9
A
198ς ↔ s $underbar;
199σ ↔ s ;
2ca993e8 200# [Pp] { Σ ↔ \'S ;
729e4ab9
A
201Σ ↔ S ;
202τ ↔ t ;
203Τ ↔ T ;
204$vowel {υ } ↔ u ;
205υ ↔ y ;
206$vowel { Υ ↔ U ;
207Υ ↔ Y ;
208χ ↔ ch ;
209Χ } $beforeLower ↔ Ch ;
210Χ ↔ CH ;
2ca993e8 211# Completeness for ASCII
374ca955 212$ignore = [[:Mark:]''] * ;
51004dcb 213| k ← c ;
729e4ab9 214| ph ← f ;
51004dcb 215| i ← j ;
729e4ab9
A
216| k ← q ;
217| b ← v } $vowel ;
218| b ← w } $vowel;
219| u ← v ;
220| u ← w;
221| K ← C ;
222| Ph ← F ;
223| I ← J ;
224| K ← Q ;
51004dcb
A
225| B ← V } $vowel ;
226| B ← W } $vowel ;
729e4ab9
A
227| U ← V ;
228| U ← W ;
229$rough } $ignore [:UppercaseLetter:] → H ;
230$ignore [:UppercaseLetter:] { $rough → H ;
231$rough ← H ;
232$rough ↔ h ;
2ca993e8 233# Completeness for Greek
729e4ab9
A
234ϐ → | β ;
235ϑ → | θ ;
236ϒ → | Υ ;
237ϕ → | φ ;
238ϖ → | π ;
239ϰ → | κ ;
240ϱ → | ρ ;
241ϲ → | σ ;
242Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
243ϳ → j ;
244ϴ → | Θ ;
245ϵ → | ε ;
246µ → | μ ;
247ͺ → i;
2ca993e8 248# delete any trailing ' marks used for roundtripping
729e4ab9
A
249← [Ππ] { \' } [Ss] ;
250← [Νν] { \' } $egammaLike ;
374ca955 251::NFC (NFD) ;
2ca993e8
A
252# ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;
253# ([\u0000-\u007F · [:Latin:] [:nonspacing mark:]]) ;
254# MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
73c04bcf 255:: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0300-\u0337\u0339-\u0345΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ;
2ca993e8 256