]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/Grek_Latn_UNGEGN.txt
ICU-66108.tar.gz
[apple/icu.git] / icuSources / data / translit / Grek_Latn_UNGEGN.txt
1 # © 2016 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html#License
3 #
4 # File: Grek_Latn_UNGEGN.txt
5 # Generated from CLDR
6 #
7
8 # For modern Greek, based on UNGEGN rules.
9 # Rules are predicated on running NFD first, and NFC afterwards
10 # MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
11 # WARNING: need to add accents to both filters ###
12 # :: [\u0301\u0304\u0306\u0308;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ\u0300\u0302\u0313-\u0314\u0340\u0342-\u0343\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩϷ-\u07FBϹ] ;
13 :: [[[:Greek:][:Mn:][:Me:]] [\:-;?·;·]] ;
14 ::NFD (NFC) ;
15 # Useful variables
16 $lower = [[:latin:][:greek:] & [:Ll:]] ;
17 $upper = [[:latin:][:greek:] & [:Lu:]] ;
18 $accent = [[:Mn:][:Me:]] ;
19 $macron = \u0304 ;
20 $ddot = \u0308 ;
21 $lcgvowel = [αεηιουω] ;
22 $ucgvowel = [ΑΕΗΙΟΥΩ] ;
23 $gvowel = [$lcgvowel $ucgvowel] ;
24 $lcgvowelC = [$lcgvowel $accent] ;
25 $evowel = [aeiouyAEIOUY];
26 $vowel = [ $evowel $gvowel] ;
27 $beforeLower = $accent * $lower ;
28 $gammaLike = [ΓΚΞΧγκξχϰ] ;
29 $egammaLike = [GKXCgkxc] ;
30 $smooth = \u0313 ;
31 $rough = \u0314 ;
32 $iotasub = \u0345 ;
33 $softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;
34 $under = \u0331;
35 $caron = \u030C;
36 $afterLetter = [:L:] [\'$accent]* ;
37 $beforeLetter = [\'$accent]* [:L:] ;
38 # Fix punctuation
39 # preserve orginal
40 \: ↔ \: $under ;
41 \? ↔ \? $under ;
42 \; ↔ \? ;
43 · ↔ \: ;
44 # Fix any ancient characters that creep in
45 \u0342 → \u0301 ;
46 \u0302 → \u0301 ;
47 \u0300 → \u0301 ;
48 $smooth → ;
49 $rough → ;
50 $iotasub → ;
51 ͺ → ;
52 # need to have these up here so the rules don't mask
53 η ↔ i $under ;
54 Η ↔ I $under ;
55 Ψ } $beforeLower ↔ Ps ;
56 Ψ ↔ PS ;
57 ψ ↔ ps ;
58 ω ↔ o $under ;
59 Ω ↔ O $under;
60 # at begining or end of word, convert mp to b
61 [^[:L:]$accent] { μπ → b ;
62 μπ } [^[:L:]$accent] → b ;
63 [^[:L:]$accent] { [Μμ][Ππ] → B ;
64 [Μμ][Ππ] } [^[:L:]$accent] → B ;
65 μπ ← b ;
66 Μπ ← B } $beforeLower ;
67 ΜΠ ← B ;
68 # handle diphthongs ending with upsilon
69 ου ↔ ou ;
70 ΟΥ ↔ OU ;
71 Ου ↔ Ou ;
72 οΥ ↔ oU ;
73 $fmaker = [aeiAEI] $under ? ;
74 $shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate
75 $fmaker { υ ( $shiftForwardVowels )* } $softener → $1 v $under ;
76 υ $1 ← ( $shiftForwardVowels )* v $under ;
77 $fmaker { υ ( $shiftForwardVowels )* } → $1 f $under;
78 υ $1 ← ( $shiftForwardVowels )* f $under ;
79 $fmaker { Υ } $softener ↔ V $under ;
80 $fmaker { Υ ↔ U $under ;
81 υ ↔ y ;
82 Υ ↔ Y ;
83 # NORMAL
84 α ↔ a ;
85 Α ↔ A ;
86 β ↔ v ;
87 Β ↔ V ;
88 γ } $gammaLike ↔ n } $egammaLike ;
89 γ ↔ g ;
90 Γ } $gammaLike ↔ N } $egammaLike ;
91 Γ ↔ G ;
92 δ ↔ d ;
93 Δ ↔ D ;
94 ε ↔ e ;
95 Ε ↔ E ;
96 ζ ↔ z ;
97 Ζ ↔ Z ;
98 θ ↔ th ;
99 Θ } $beforeLower ↔ Th ;
100 Θ ↔ TH ;
101 ι ↔ i ;
102 Ι ↔ I ;
103 κ ↔ k ;
104 Κ ↔ K ;
105 λ ↔ l ;
106 Λ ↔ L ;
107 μ ↔ m ;
108 Μ ↔ M ;
109 ν } $gammaLike → n\' ;
110 ν ↔ n ;
111 Ν } $gammaLike ↔ N\' ;
112 Ν ↔ N ;
113 ξ ↔ x ;
114 Ξ ↔ X ;
115 ο ↔ o ;
116 Ο ↔ O ;
117 π ↔ p ;
118 Π ↔ P ;
119 ρ ↔ r ;
120 Ρ ↔ R ;
121 # insert separator before things that turn into s
122 [Pp] { } [ςσΣϷϸϺϻ] → \' ;
123 # special S variants
124 Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
125 ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
126 Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
127 ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
128 # Caron means exception
129 # before a letter, initial
130 ς } $beforeLetter ↔ s $under } $beforeLetter;
131 σ } $beforeLetter ↔ s } $beforeLetter;
132 # otherwise, after a letter = final
133 $afterLetter { σ ↔ $afterLetter { s $under;
134 $afterLetter { ς ↔ $afterLetter { s ;
135 # otherwise (isolated) = initial
136 ς ↔ s $under;
137 σ ↔ s ;
138 # [Pp] { Σ ↔ \'S ;
139 Σ ↔ S ;
140 τ ↔ t ;
141 Τ ↔ T ;
142 φ ↔ f ;
143 Φ ↔ F ;
144 χ ↔ ch ;
145 Χ } $beforeLower ↔ Ch ;
146 Χ ↔ CH ;
147 # Completeness for ASCII
148 # $ignore = [[:Mark:]''] * ;
149 | ch ← h ;
150 | k ← c ;
151 | i ← j ;
152 | k ← q ;
153 | b ← u } $vowel ;
154 | b ← w } $vowel ;
155 | y ← u ;
156 | y ← w ;
157 | Ch ← H ;
158 | K ← C ;
159 | I ← J ;
160 | K ← Q ;
161 | B ← W } $vowel ;
162 | B ← U } $vowel ;
163 | Y ← W ;
164 | Y ← U ;
165 # Completeness for Greek
166 ϐ → | β ;
167 ϑ → | θ ;
168 ϒ → | Υ ;
169 ϕ → | φ ;
170 ϖ → | π ;
171 ϰ → | κ ;
172 ϱ → | ρ ;
173 ϲ → | σ ;
174 Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
175 ϳ → j ;
176 ϴ → | Θ ;
177 ϵ → | ε ;
178 µ → | μ ;
179 # delete any trailing ' marks used for roundtripping
180 ← [Ππ] { \' } [Ss] ;
181 ← [Νν] { \' } $egammaLike ;
182 ::NFC (NFD) ;
183 # MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD
184 :: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ;
185