]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/Grek_Latn_UNGEGN.txt
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / data / translit / Grek_Latn_UNGEGN.txt
1 # ***************************************************************************
2 # *
3 # * Copyright (C) 2004-2016, International Business Machines
4 # * Corporation; Unicode, Inc.; and others. All Rights Reserved.
5 # *
6 # ***************************************************************************
7 # File: Grek_Latn_UNGEGN.txt
8 # Generated from CLDR
9 #
10
11 # For modern Greek, based on UNGEGN rules.
12 # Rules are predicated on running NFD first, and NFC afterwards
13 # MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
14 # WARNING: need to add accents to both filters ###
15 # :: [\u0301\u0304\u0306\u0308;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ\u0300\u0302\u0313-\u0314\u0340\u0342-\u0343\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩϷ-\u07FBϹ] ;
16 :: [[[:Greek:][:Mn:][:Me:]] [\:-;?·;·]] ;
17 ::NFD (NFC) ;
18 # Useful variables
19 $lower = [[:latin:][:greek:] & [:Ll:]] ;
20 $upper = [[:latin:][:greek:] & [:Lu:]] ;
21 $accent = [[:Mn:][:Me:]] ;
22 $macron = \u0304 ;
23 $ddot = \u0308 ;
24 $lcgvowel = [αεηιουω] ;
25 $ucgvowel = [ΑΕΗΙΟΥΩ] ;
26 $gvowel = [$lcgvowel $ucgvowel] ;
27 $lcgvowelC = [$lcgvowel $accent] ;
28 $evowel = [aeiouyAEIOUY];
29 $vowel = [ $evowel $gvowel] ;
30 $beforeLower = $accent * $lower ;
31 $gammaLike = [ΓΚΞΧγκξχϰ] ;
32 $egammaLike = [GKXCgkxc] ;
33 $smooth = \u0313 ;
34 $rough = \u0314 ;
35 $iotasub = \u0345 ;
36 $softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;
37 $under = \u0331;
38 $caron = \u030C;
39 $afterLetter = [:L:] [\'$accent]* ;
40 $beforeLetter = [\'$accent]* [:L:] ;
41 # Fix punctuation
42 # preserve orginal
43 \: ↔ \: $under ;
44 \? ↔ \? $under ;
45 \; ↔ \? ;
46 · ↔ \: ;
47 # Fix any ancient characters that creep in
48 \u0342 → \u0301 ;
49 \u0302 → \u0301 ;
50 \u0300 → \u0301 ;
51 $smooth → ;
52 $rough → ;
53 $iotasub → ;
54 ͺ → ;
55 # need to have these up here so the rules don't mask
56 η ↔ i $under ;
57 Η ↔ I $under ;
58 Ψ } $beforeLower ↔ Ps ;
59 Ψ ↔ PS ;
60 ψ ↔ ps ;
61 ω ↔ o $under ;
62 Ω ↔ O $under;
63 # at begining or end of word, convert mp to b
64 [^[:L:]$accent] { μπ → b ;
65 μπ } [^[:L:]$accent] → b ;
66 [^[:L:]$accent] { [Μμ][Ππ] → B ;
67 [Μμ][Ππ] } [^[:L:]$accent] → B ;
68 μπ ← b ;
69 Μπ ← B } $beforeLower ;
70 ΜΠ ← B ;
71 # handle diphthongs ending with upsilon
72 ου ↔ ou ;
73 ΟΥ ↔ OU ;
74 Ου ↔ Ou ;
75 οΥ ↔ oU ;
76 $fmaker = [aeiAEI] $under ? ;
77 $shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate
78 $fmaker { υ ( $shiftForwardVowels )* } $softener → $1 v $under ;
79 υ $1 ← ( $shiftForwardVowels )* v $under ;
80 $fmaker { υ ( $shiftForwardVowels )* } → $1 f $under;
81 υ $1 ← ( $shiftForwardVowels )* f $under ;
82 $fmaker { Υ } $softener ↔ V $under ;
83 $fmaker { Υ ↔ U $under ;
84 υ ↔ y ;
85 Υ ↔ Y ;
86 # NORMAL
87 α ↔ a ;
88 Α ↔ A ;
89 β ↔ v ;
90 Β ↔ V ;
91 γ } $gammaLike ↔ n } $egammaLike ;
92 γ ↔ g ;
93 Γ } $gammaLike ↔ N } $egammaLike ;
94 Γ ↔ G ;
95 δ ↔ d ;
96 Δ ↔ D ;
97 ε ↔ e ;
98 Ε ↔ E ;
99 ζ ↔ z ;
100 Ζ ↔ Z ;
101 θ ↔ th ;
102 Θ } $beforeLower ↔ Th ;
103 Θ ↔ TH ;
104 ι ↔ i ;
105 Ι ↔ I ;
106 κ ↔ k ;
107 Κ ↔ K ;
108 λ ↔ l ;
109 Λ ↔ L ;
110 μ ↔ m ;
111 Μ ↔ M ;
112 ν } $gammaLike → n\' ;
113 ν ↔ n ;
114 Ν } $gammaLike ↔ N\' ;
115 Ν ↔ N ;
116 ξ ↔ x ;
117 Ξ ↔ X ;
118 ο ↔ o ;
119 Ο ↔ O ;
120 π ↔ p ;
121 Π ↔ P ;
122 ρ ↔ r ;
123 Ρ ↔ R ;
124 # insert separator before things that turn into s
125 [Pp] { } [ςσΣϷϸϺϻ] → \' ;
126 # special S variants
127 Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
128 ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
129 Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
130 ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
131 # Caron means exception
132 # before a letter, initial
133 ς } $beforeLetter ↔ s $under } $beforeLetter;
134 σ } $beforeLetter ↔ s } $beforeLetter;
135 # otherwise, after a letter = final
136 $afterLetter { σ ↔ $afterLetter { s $under;
137 $afterLetter { ς ↔ $afterLetter { s ;
138 # otherwise (isolated) = initial
139 ς ↔ s $under;
140 σ ↔ s ;
141 # [Pp] { Σ ↔ \'S ;
142 Σ ↔ S ;
143 τ ↔ t ;
144 Τ ↔ T ;
145 φ ↔ f ;
146 Φ ↔ F ;
147 χ ↔ ch ;
148 Χ } $beforeLower ↔ Ch ;
149 Χ ↔ CH ;
150 # Completeness for ASCII
151 # $ignore = [[:Mark:]''] * ;
152 | ch ← h ;
153 | k ← c ;
154 | i ← j ;
155 | k ← q ;
156 | b ← u } $vowel ;
157 | b ← w } $vowel ;
158 | y ← u ;
159 | y ← w ;
160 | Ch ← H ;
161 | K ← C ;
162 | I ← J ;
163 | K ← Q ;
164 | B ← W } $vowel ;
165 | B ← U } $vowel ;
166 | Y ← W ;
167 | Y ← U ;
168 # Completeness for Greek
169 ϐ → | β ;
170 ϑ → | θ ;
171 ϒ → | Υ ;
172 ϕ → | φ ;
173 ϖ → | π ;
174 ϰ → | κ ;
175 ϱ → | ρ ;
176 ϲ → | σ ;
177 Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
178 ϳ → j ;
179 ϴ → | Θ ;
180 ϵ → | ε ;
181 µ → | μ ;
182 # delete any trailing ' marks used for roundtripping
183 ← [Ππ] { \' } [Ss] ;
184 ← [Νν] { \' } $egammaLike ;
185 ::NFC (NFD) ;
186 # MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD
187 :: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ;
188