]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/data/translit/Grek_Latn_UNGEGN.txt
ICU-66108.tar.gz
[apple/icu.git] / icuSources / data / translit / Grek_Latn_UNGEGN.txt
... / ...
CommitLineData
1# © 2016 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html#License
3#
4# File: Grek_Latn_UNGEGN.txt
5# Generated from CLDR
6#
7
8# For modern Greek, based on UNGEGN rules.
9# Rules are predicated on running NFD first, and NFC afterwards
10# MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
11# WARNING: need to add accents to both filters ###
12# :: [\u0301\u0304\u0306\u0308;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ\u0300\u0302\u0313-\u0314\u0340\u0342-\u0343\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩϷ-\u07FBϹ] ;
13:: [[[:Greek:][:Mn:][:Me:]] [\:-;?·;·]] ;
14::NFD (NFC) ;
15# Useful variables
16$lower = [[:latin:][:greek:] & [:Ll:]] ;
17$upper = [[:latin:][:greek:] & [:Lu:]] ;
18$accent = [[:Mn:][:Me:]] ;
19$macron = \u0304 ;
20$ddot = \u0308 ;
21$lcgvowel = [αεηιουω] ;
22$ucgvowel = [ΑΕΗΙΟΥΩ] ;
23$gvowel = [$lcgvowel $ucgvowel] ;
24$lcgvowelC = [$lcgvowel $accent] ;
25$evowel = [aeiouyAEIOUY];
26$vowel = [ $evowel $gvowel] ;
27$beforeLower = $accent * $lower ;
28$gammaLike = [ΓΚΞΧγκξχϰ] ;
29$egammaLike = [GKXCgkxc] ;
30$smooth = \u0313 ;
31$rough = \u0314 ;
32$iotasub = \u0345 ;
33$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;
34$under = \u0331;
35$caron = \u030C;
36$afterLetter = [:L:] [\'$accent]* ;
37$beforeLetter = [\'$accent]* [:L:] ;
38# Fix punctuation
39# preserve orginal
40\: ↔ \: $under ;
41\? ↔ \? $under ;
42\; ↔ \? ;
43· ↔ \: ;
44# Fix any ancient characters that creep in
45\u0342 → \u0301 ;
46\u0302 → \u0301 ;
47\u0300 → \u0301 ;
48$smooth → ;
49$rough → ;
50$iotasub → ;
51ͺ → ;
52# need to have these up here so the rules don't mask
53η ↔ i $under ;
54Η ↔ I $under ;
55Ψ } $beforeLower ↔ Ps ;
56Ψ ↔ PS ;
57ψ ↔ ps ;
58ω ↔ o $under ;
59Ω ↔ O $under;
60# at begining or end of word, convert mp to b
61[^[:L:]$accent] { μπ → b ;
62μπ } [^[:L:]$accent] → b ;
63[^[:L:]$accent] { [Μμ][Ππ] → B ;
64[Μμ][Ππ] } [^[:L:]$accent] → B ;
65μπ ← b ;
66Μπ ← B } $beforeLower ;
67ΜΠ ← B ;
68# handle diphthongs ending with upsilon
69ου ↔ ou ;
70ΟΥ ↔ OU ;
71Ου ↔ Ou ;
72οΥ ↔ oU ;
73$fmaker = [aeiAEI] $under ? ;
74$shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate
75$fmaker { υ ( $shiftForwardVowels )* } $softener → $1 v $under ;
76υ $1 ← ( $shiftForwardVowels )* v $under ;
77$fmaker { υ ( $shiftForwardVowels )* } → $1 f $under;
78υ $1 ← ( $shiftForwardVowels )* f $under ;
79$fmaker { Υ } $softener ↔ V $under ;
80$fmaker { Υ ↔ U $under ;
81υ ↔ y ;
82Υ ↔ Y ;
83# NORMAL
84α ↔ a ;
85Α ↔ A ;
86β ↔ v ;
87Β ↔ V ;
88γ } $gammaLike ↔ n } $egammaLike ;
89γ ↔ g ;
90Γ } $gammaLike ↔ N } $egammaLike ;
91Γ ↔ G ;
92δ ↔ d ;
93Δ ↔ D ;
94ε ↔ e ;
95Ε ↔ E ;
96ζ ↔ z ;
97Ζ ↔ Z ;
98θ ↔ th ;
99Θ } $beforeLower ↔ Th ;
100Θ ↔ TH ;
101ι ↔ i ;
102Ι ↔ I ;
103κ ↔ k ;
104Κ ↔ K ;
105λ ↔ l ;
106Λ ↔ L ;
107μ ↔ m ;
108Μ ↔ M ;
109ν } $gammaLike → n\' ;
110ν ↔ n ;
111Ν } $gammaLike ↔ N\' ;
112Ν ↔ N ;
113ξ ↔ x ;
114Ξ ↔ X ;
115ο ↔ o ;
116Ο ↔ O ;
117π ↔ p ;
118Π ↔ P ;
119ρ ↔ r ;
120Ρ ↔ R ;
121# insert separator before things that turn into s
122[Pp] { } [ςσΣϷϸϺϻ] → \' ;
123# special S variants
124Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
125ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
126Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
127ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
128# Caron means exception
129# before a letter, initial
130ς } $beforeLetter ↔ s $under } $beforeLetter;
131σ } $beforeLetter ↔ s } $beforeLetter;
132# otherwise, after a letter = final
133$afterLetter { σ ↔ $afterLetter { s $under;
134$afterLetter { ς ↔ $afterLetter { s ;
135# otherwise (isolated) = initial
136ς ↔ s $under;
137σ ↔ s ;
138# [Pp] { Σ ↔ \'S ;
139Σ ↔ S ;
140τ ↔ t ;
141Τ ↔ T ;
142φ ↔ f ;
143Φ ↔ F ;
144χ ↔ ch ;
145Χ } $beforeLower ↔ Ch ;
146Χ ↔ CH ;
147# Completeness for ASCII
148# $ignore = [[:Mark:]''] * ;
149| ch ← h ;
150| k ← c ;
151| i ← j ;
152| k ← q ;
153| b ← u } $vowel ;
154| b ← w } $vowel ;
155| y ← u ;
156| y ← w ;
157| Ch ← H ;
158| K ← C ;
159| I ← J ;
160| K ← Q ;
161| B ← W } $vowel ;
162| B ← U } $vowel ;
163| Y ← W ;
164| Y ← U ;
165# Completeness for Greek
166ϐ → | β ;
167ϑ → | θ ;
168ϒ → | Υ ;
169ϕ → | φ ;
170ϖ → | π ;
171ϰ → | κ ;
172ϱ → | ρ ;
173ϲ → | σ ;
174Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
175ϳ → j ;
176ϴ → | Θ ;
177ϵ → | ε ;
178µ → | μ ;
179# delete any trailing ' marks used for roundtripping
180← [Ππ] { \' } [Ss] ;
181← [Νν] { \' } $egammaLike ;
182::NFC (NFD) ;
183# MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD
184:: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ;
185