]> git.saurik.com Git - apple/icu.git/blame - icuSources/data/translit/Grek_Latn.txt
ICU-57166.0.1.tar.gz
[apple/icu.git] / icuSources / data / translit / Grek_Latn.txt
CommitLineData
73c04bcf
A
1# ***************************************************************************
2# *
2ca993e8 3# * Copyright (C) 2004-2016, International Business Machines
73c04bcf
A
4# * Corporation; Unicode, Inc.; and others. All Rights Reserved.
5# *
6# ***************************************************************************
2ca993e8 7# File: Grek_Latn.txt
46f4442e 8# Generated from CLDR
73c04bcf 9#
2ca993e8
A
10
11# Rules are predicated on running NFD first, and NFC afterwards
12# :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:nonspacing mark:]] ;
13# MINIMAL FILTER GENERATED FOR: Greek-Latin
73c04bcf
A
14:: [;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ\u0304\u0308\u0313-\u0314\u0342-\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ;
15:: NFD (NFC) ;
2ca993e8
A
16# TEST CASES
17# Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος
18# ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ
19# ᾳ ῃ ῳ ὃ ὄ
20# ὠς ὡς ὢς ὣς
21# Ὠς Ὡς Ὢς Ὣς
22# ὨΣ ὩΣ ὪΣ ὫΣ
23# Ạ, ạ, Ẹ, ẹ, Ọ, ọ
24# Useful variables
73c04bcf
A
25$lower = [[:latin:][:greek:] & [:Ll:]];
26$glower = [[:greek:] & [:Ll:]];
27$upper = [[:latin:][:greek:] & [:Lu:]] ;
374ca955 28$accent = [:M:] ;
2ca993e8
A
29# NOTE: restrict to just the Greek & Latin accents that we care about
30# TODO: broaden out once interation is fixed
374ca955 31$accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ;
374ca955
A
32$macron = \u0304 ;
33$ddot = \u0308 ;
34$ddotmac = [$ddot$macron];
73c04bcf
A
35$lcgvowel = [αεηιουω] ;
36$ucgvowel = [ΑΕΗΙΟΥΩ] ;
37$gvowel = [$lcgvowel $ucgvowel] ;
38$lcgvowelC = [$lcgvowel $accent] ;
374ca955
A
39$evowel = [aeiouyAEIOUY];
40$evowel2 = [iuyIUY];
73c04bcf
A
41$vowel = [ $evowel $gvowel] ;
42$gammaLike = [ΓΚΞΧγκξχϰ] ;
43$egammaLike = [GKXCgkxc] ;
44$smooth = \u0313 ;
45$rough = \u0314 ;
46$iotasub = \u0345 ;
374ca955
A
47$evowel_i = [$evowel-[iI]] ;
48$evowel2_i = [uyUY];
374ca955 49$underbar = \u0331;
374ca955
A
50$afterLetter = [:L:] [[:M:]\']* ;
51$beforeLetter = [[:M:]\']* [:L:] ;
73c04bcf 52$beforeLower = $accent * $lower ;
374ca955 53$notLetter = [^[:L:][:M:]] ;
73c04bcf 54$under = \u0331;
2ca993e8
A
55# Fix punctuation
56# preserve original
729e4ab9
A
57\: ↔ \: $under ;
58\? ↔ \? $under ;
59\; ↔ \? ;
60· ↔ \: ;
2ca993e8 61# CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
729e4ab9 62\u0342 ↔ \u0302 ;
2ca993e8
A
63# IOTA: convert iota subscript to iota
64# first make previous alpha long!
374ca955 65$accent_minus = [[$accent]-[$iotasub$macron]];
729e4ab9
A
66Α } $accent_minus * $iotasub → | Α $macron ;
67α } $accent_minus * $iotasub → | α $macron ;
2ca993e8 68# now convert to uppercase if after uppercase, ow to lowercase
729e4ab9
A
69$upper $accent * { $iotasub → I ;
70$iotasub → i ;
71| $1 $iotasub ← ($evowel $macron $accentMinus *) i ;
72| $1 $iotasub ← ($evowel $macron $accentMinus *) I ;
2ca993e8
A
73# BREATHING
74# Convert rough breathing to h, and move before letters.
75# Make A ` x = → H a x
729e4ab9
A
76Α ($macron?) $rough } $beforeLower → H | α $1;
77Ε $rough } $beforeLower → H | ε;
78Η $rough } $beforeLower → H | η ;
51004dcb 79Ι ($ddot?) $rough } $beforeLower → H | ι $1;
729e4ab9
A
80Ο $rough } $beforeLower → H | ο ;
81Υ $rough } $beforeLower → H | υ ;
82Ω ($ddot?) $rough } $beforeLower → H | ω $1;
2ca993e8 83# Make A x ` = → H a x
729e4ab9
A
84Α ($glower $macron?) $rough → H | α $1 ;
85Ε ($glower) $rough → H | ε $1 ;
86Η ($glower) $rough → H | η $1 ;
87Ι ($glower $ddot?) $rough → H | ι $1 ;
88Ο ($glower) $rough → H | ο $1 ;
89Υ ($glower) $rough → H | υ $1 ;
51004dcb 90Ω ($glower $ddot?) $rough → H | ω $1 ;
2ca993e8 91#Otherwise, make x ` into h x and X ` into H X
729e4ab9
A
92($lcgvowel + $ddotmac? ) $rough → h | $1 ;
93($gvowel + $ddotmac? ) $rough → H | $1 ;
2ca993e8 94# Go backwards with H
729e4ab9
A
95| $1 $rough ← h ($evowel $macron $ddot? $evowel2_i $macron?) ;
96| $1 $rough ← h ($evowel $ddot? $evowel2 $macron?) ;
97| $1 $rough ← h ($evowel $macron? $ddot?) ;
98| $1 $rough ← H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;
99| $1 $rough ← H ([AEIOUY] $ddot? $evowel2 $macron?) ;
100| $1 $rough ← H ([AEIOUY] $macron? $ddot?) ;
2ca993e8
A
101# titlecase, have to fix individually
102# in the future, we should add &uppercase() to make this easier
51004dcb
A
103| A $1 $rough ← H a ($macron $ddot? $evowel2_i $macron?) ;
104| E $1 $rough ← H e ($macron $ddot? $evowel2_i $macron?) ;
105| I $1 $rough ← H i ($macron $ddot? $evowel2_i $macron?) ;
106| O $1 $rough ← H o ($macron $ddot? $evowel2_i $macron?) ;
729e4ab9
A
107| U $1 $rough ← H u ($macron $ddot? $evowel2_i $macron?) ;
108| Y $1 $rough ← H y ($macron $ddot? $evowel2_i $macron?) ;
109| A $1 $rough ← H a ($ddot? $evowel2 $macron?) ;
110| E $1 $rough ← H e ($ddot? $evowel2 $macron?) ;
111| I $1 $rough ← H i ($ddot? $evowel2 $macron?) ;
112| O $1 $rough ← H o ($ddot? $evowel2 $macron?) ;
113| U $1 $rough ← H u ($ddot? $evowel2 $macron?) ;
114| Y $1 $rough ← H y ($ddot? $evowel2 $macron?) ;
115| A $1 $rough ← H a ($macron? $ddot? ) ;
116| E $1 $rough ← H e ($macron? $ddot? ) ;
117| I $1 $rough ← H i ($macron? $ddot? ) ;
118| O $1 $rough ← H o ($macron? $ddot? ) ;
119| U $1 $rough ← H u ($macron? $ddot? ) ;
120| Y $1 $rough ← H y ($macron? $ddot? ) ;
2ca993e8
A
121# Now do smooth
122#delete smooth breathing for Latin
729e4ab9 123$smooth → ;
2ca993e8
A
124# insert in Greek
125# the assumption is that all Marks are on letters.
729e4ab9
A
126| $1 $smooth ← $notLetter { ([rR]) } [^hH$smooth$rough] ;
127| $1 $smooth ← $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;
128| $1 $smooth ← $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;
2ca993e8
A
129# TODO: preserve smooth/rough breathing if not
130# on initial vowel sequence
131# need to have these up here so the rules don't mask
132# remove now superfluous macron when returning
729e4ab9
A
133Α ← A $macron ;
134α ← a $macron ;
135η ↔ e $macron ;
136Η ↔ E $macron ;
137φ ↔ ph ;
138Ψ } $beforeLower ↔ Ps ;
139Ψ ↔ PS ;
140Φ } $beforeLower ↔ Ph ;
141Φ ↔ PH ;
142ψ ↔ ps ;
143ω ↔ o $macron ;
51004dcb 144Ω ↔ O $macron;
2ca993e8 145# NORMAL
729e4ab9
A
146α ↔ a ;
147Α ↔ A ;
148β ↔ b ;
149Β ↔ B ;
150γ } $gammaLike ↔ n } $egammaLike ;
151γ ↔ g ;
152Γ } $gammaLike ↔ N } $egammaLike ;
153Γ ↔ G ;
154δ ↔ d ;
155Δ ↔ D ;
156ε ↔ e ;
157Ε ↔ E ;
158ζ ↔ z ;
159Ζ ↔ Z ;
160θ ↔ th ;
161Θ } $beforeLower ↔ Th ;
162Θ ↔ TH ;
163ι ↔ i ;
164Ι ↔ I ;
165κ ↔ k ;
166Κ ↔ K ;
167λ ↔ l ;
168Λ ↔ L ;
169μ ↔ m ;
170Μ ↔ M ;
171ν } $gammaLike → n\' ;
172ν ↔ n ;
173Ν } $gammaLike ↔ N\' ;
174Ν ↔ N ;
175ξ ↔ x ;
176Ξ ↔ X ;
177ο ↔ o ;
178Ο ↔ O ;
179π ↔ p ;
180Π ↔ P ;
181ρ $rough ↔ rh;
182Ρ $rough } $beforeLower ↔ Rh ;
183Ρ $rough ↔ RH ;
184ρ ↔ r ;
185Ρ ↔ R ;
2ca993e8 186# insert separator before things that turn into s
729e4ab9 187[Pp] { } [ςσΣϷϸϺϻ] → \' ;
2ca993e8 188# special S variants
729e4ab9
A
189Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
190ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
191Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
192ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
2ca993e8
A
193# underbar means exception
194# before a letter, initial
729e4ab9
A
195ς } $beforeLetter ↔ s $underbar } $beforeLetter;
196σ } $beforeLetter ↔ s } $beforeLetter;
2ca993e8 197# otherwise, after a letter = final
729e4ab9
A
198$afterLetter { σ ↔ $afterLetter { s $underbar;
199$afterLetter { ς ↔ $afterLetter { s ;
2ca993e8 200# otherwise (isolated) = initial
729e4ab9
A
201ς ↔ s $underbar;
202σ ↔ s ;
2ca993e8 203# [Pp] { Σ ↔ \'S ;
729e4ab9
A
204Σ ↔ S ;
205τ ↔ t ;
206Τ ↔ T ;
207$vowel {υ } ↔ u ;
208υ ↔ y ;
209$vowel { Υ ↔ U ;
210Υ ↔ Y ;
211χ ↔ ch ;
212Χ } $beforeLower ↔ Ch ;
213Χ ↔ CH ;
2ca993e8 214# Completeness for ASCII
374ca955 215$ignore = [[:Mark:]''] * ;
51004dcb 216| k ← c ;
729e4ab9 217| ph ← f ;
51004dcb 218| i ← j ;
729e4ab9
A
219| k ← q ;
220| b ← v } $vowel ;
221| b ← w } $vowel;
222| u ← v ;
223| u ← w;
224| K ← C ;
225| Ph ← F ;
226| I ← J ;
227| K ← Q ;
51004dcb
A
228| B ← V } $vowel ;
229| B ← W } $vowel ;
729e4ab9
A
230| U ← V ;
231| U ← W ;
232$rough } $ignore [:UppercaseLetter:] → H ;
233$ignore [:UppercaseLetter:] { $rough → H ;
234$rough ← H ;
235$rough ↔ h ;
2ca993e8 236# Completeness for Greek
729e4ab9
A
237ϐ → | β ;
238ϑ → | θ ;
239ϒ → | Υ ;
240ϕ → | φ ;
241ϖ → | π ;
242ϰ → | κ ;
243ϱ → | ρ ;
244ϲ → | σ ;
245Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
246ϳ → j ;
247ϴ → | Θ ;
248ϵ → | ε ;
249µ → | μ ;
250ͺ → i;
2ca993e8 251# delete any trailing ' marks used for roundtripping
729e4ab9
A
252← [Ππ] { \' } [Ss] ;
253← [Νν] { \' } $egammaLike ;
374ca955 254::NFC (NFD) ;
2ca993e8
A
255# ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;
256# ([\u0000-\u007F · [:Latin:] [:nonspacing mark:]]) ;
257# MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
73c04bcf 258:: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0300-\u0337\u0339-\u0345΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ;
2ca993e8 259