1 # © 2016 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html#License
4 # File: und_FONIPA_fa.txt
10 # In these rules, we produce ی و ا both for short and for long vowels.
11 # This would be wrong for writing Farsi or Arabic, but when transliterating
12 # foreign words and names, it is strongly preferred to vowel marks.
13 # Short schwa [ə] and a few other, schwa-like vowels get omitted entirely
14 # unless at the end of the word, in which case we emit ه whose Farsi
15 # word-final pronunciation comes close to [ə]. At the beginning of words,
16 # Farsi speakers prefer to see آ for [ɑ] and a few other similar-sounding
17 # dark vowels; note that this use of آ is quite different from Arabic.
18 $IVowel = [i ɪ e {e\u031E}];
19 $UVowel = [y {ɨ} {ʉ} ɯ u ʏ {ɪ\u0308} {ʊ\u0308} {ɯ\u033D} {ʊ} ø ɤ o {ø\u031E} {ɤ\u031E} {o\u031E} ɔ w {w\u0325} ʍ ʷ];
20 $AVowel = [ɛ œ ɜ æ ɶ];
21 $DarkAVowel = [ʌ a ɑ ɒ ɐ ɞ {ä} {ɒ\u0308}]; # آ instead of ا at beginning of words
22 $SchwaVowel = [ɘ ɵ ə {ɵ\u031E}];
23 $Click = [ʘ ɋ ǀ ʇ ǃ ʗ ǂ ʄ ǁ ʖ];
24 $Boundary = [^[:L:][:M:][:N:]];
26 [ʰ ʱ ʼ \u0303 \u0330 \u030B \u0301 \u0304 \u0300 \u030F \u030C \u0302 ˥ ˦ ˧ ˨ ˩ ꜜ ꜛ ↗ ↘ \u0361 \u035C \u032F] → ;
32 # TODO: Diphthongs probably need more work.
33 # Romanian [sekujesk] → [sekujask], for emitting سیکویاسک not سیکویسک
34 $UVowel [j $IVowel] [e {e\u031E} $SchwaVowel] → uia;
35 # Kazakh Аягөз [ɑjɑɡy\u032Fʉz] → [ɑjɑɡiuz], to emit آیاگیوز not آیاگووز
39 $Boundary {$SchwaVowel ː?} → ای;
41 {[$SchwaVowel e {e\u031E}]} [^[:L:][:M:][:N:][\.]] → ه;
43 $Boundary {$IVowel ː?} → ای;
45 $Boundary {$UVowel ː?} → او;
47 $Boundary {$AVowel ː?} → ا;
49 $Boundary {$DarkAVowel ː?} → آ;
51 # Shadda for long (geminated) consonants
56 [ɡ g ɠ k] $Click → کچ;
60 [{n\u033C\u030A} {n\u033C} {n\u0325} n {ɳ\u030A} ɳ {ɲ\u030A} {ɲ\u0325} ɲ] → ن;
61 [{ŋ\u030A} ŋ {ɴ\u0325} ɴ] k → نک;
62 [{ŋ\u030A} ŋ {ɴ\u0325} ɴ] [ɡ g]? → نگ;
66 [{d\u033C} d ɗ ᶑ] → د;
81 # Non-sibilant fricatives
84 [{θ\u033C} θ {θ\u0331}] → ث;
85 [{ð\u033C} ð {ð\u0320}] → ذ;
93 # Approximants, trills, flaps
97 [{ɹ\u0325} {ɹ} {ɻ\u030A} {ɻ} {ɾ\u0325} ɾ {ɽ\u030A} ɽ {r\u033C} {r\u0325} r] → ر;
105 {[{ʎ\u0325} ʎ]} [^ $IVowel j ʝ] → لی;
106 [{l\u033C} {l\u0325} l {ɭ\u030A} ɭ {ʎ\u0325} ʎ] → ل;
108 # Independent pass for misc cleanup.
110 # Strip off syllable markers
112 # Sequences of three or more ووو look very confusing; we shorten them.
113 # Polish Darłowo [darwɔvɔ] → داروو → داروووو