]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | # © 2016 and later: Unicode, Inc. and others. |
2 | # License & terms of use: http://www.unicode.org/copyright.html#License | |
3 | # | |
2ca993e8 | 4 | # File: und_FONIPA_ar.txt |
f3c0d7a5 | 5 | # Generated from CLDR |
2ca993e8 A |
6 | # |
7 | ||
8 | # Vowels | |
9 | # ------ | |
10 | # In these rules, we produce ي و ا both for short and for long vowels. | |
11 | # This would be wrong for writing Arabic, but when transliterating | |
12 | # foreign words and names, it is strongly preferred to vowel marks. | |
13 | # However, we emit short schwa [ə] and a few other, schwa-like vowels. | |
14 | $IVowel = [i ɪ e {e\u031E}]; | |
15 | $UVowel = [y {ɨ} {ʉ} ɯ u ʏ {ɪ\u0308} {ʊ\u0308} {ɯ\u033D} {ʊ} ø ɤ o {ø\u031E} {ɤ\u031E} {o\u031E} ɞ ɔ w {w\u0325} ʍ ʷ]; | |
16 | $AVowel = [ɛ œ ɜ ʌ æ ɐ a ɶ {ä} {ɒ\u0308} ɑ ɒ]; | |
17 | $SchwaVowel = [ɘ ɵ ə {ɵ\u031E}]; | |
18 | $Vowel = [$IVowel $UVowel $AVowel $SchwaVowel]; | |
19 | $Click = [ʘ ɋ ǀ ʇ ǃ ʗ ǂ ʄ ǁ ʖ]; | |
20 | $Boundary = [^[:L:][:M:][:N:]]; | |
21 | ::NFD; | |
22 | [ʰ ʱ ʼ \u0303 \u0330 \u030B \u0301 \u0304 \u0300 \u030F \u030C \u0302 ˥ ˦ ˧ ˨ ˩ ꜜ ꜛ ↗ ↘ \u0361 \u035C \u032F] → ; | |
23 | ʲ → j; | |
24 | ᵐ → m; | |
25 | ⁿ → n; | |
26 | ᵑ → ŋ; | |
27 | ::NFC; | |
28 | # TODO: Diphthongs probably need more work. | |
29 | # Romanian [sekujesk] → [sekujask], for emitting سيكوياسك not سيكويسك | |
30 | $UVowel [j $IVowel] [e {e\u031E} $SchwaVowel] → uia; | |
31 | # Kazakh Аягөз [ɑjɑɡy\u032Fʉz] → [ɑjɑɡiuz], to emit TODO | |
32 | yʉ → iu; | |
33 | ::NULL; | |
34 | # Vowels | |
35 | $Boundary {ʔ? $IVowel ː} → إ\u0650ي; | |
36 | $Boundary {ʔ? $IVowel} → إ\u0650; | |
37 | {$IVowel ʔ} $Boundary → ئ; | |
38 | {$IVowel ː ʔ} $Boundary → يء; | |
39 | {$IVowel ː ʔ} [$Vowel] → ئ; | |
40 | $IVowel ː? → ي; | |
41 | $Boundary {ʔ? $UVowel ː} → أو; | |
42 | $Boundary {ʔ? $UVowel} → أ; | |
43 | {$UVowel ʔ} $Boundary → ؤ; | |
44 | {$UVowel ː ʔ} $Boundary → وء; | |
45 | $UVowel ː? → و; | |
46 | $Boundary {ʔ? $AVowel ː} → آ; | |
47 | $Boundary {ʔ? $AVowel} → أ; | |
48 | {$AVowel ʔ} $Boundary → أ; | |
49 | {$AVowel ː ʔ} $Boundary → اء; | |
50 | $AVowel ː? ʔ $AVowel ː? → اءا; | |
51 | $AVowel ː? → ا; | |
52 | $Boundary {ʔ? $SchwaVowel ː} → إ\u0650ي; | |
53 | $Boundary {ʔ? $SchwaVowel} → أ; | |
54 | $SchwaVowel ː → ي; | |
55 | $SchwaVowel → ; | |
56 | # TODO: Handle glottal stop. | |
57 | ʔ → ; | |
58 | # Shadda for long (geminated) consonants | |
59 | ː → \u0651; | |
60 | # Affricates | |
61 | [{t\u0361ʃ} ʧ] → ت\u0652ش; | |
62 | # Clicks | |
63 | [ɡ g ɠ k] $Click → ك\u0652ش; | |
64 | $Click → ت\u0652ش; | |
65 | # Nasal stops | |
66 | [{m\u0325} m ɱ] → م; | |
67 | [{n\u033C\u030A} {n\u033C} {n\u0325} n {ɳ\u030A} ɳ {ɲ\u030A} {ɲ\u0325} ɲ] → ن; | |
68 | [{ŋ\u030A} ŋ {ɴ\u0325} ɴ] k → نك; | |
69 | [{ŋ\u030A} ŋ {ɴ\u0325} ɴ] [ɡ g ɠ]? → ن\u0652غ; | |
70 | # Non-nasal stops | |
71 | [p b {p\u032A} {b\u032A} ɓ] → ب; | |
72 | [{d\u033C} d ɗ ᶑ] → د; | |
73 | [{t\u033C} t] → ت; | |
74 | [ʈ] → ط; | |
75 | [ɖ] → ض; | |
76 | c → ت\u0652ش; | |
77 | ɟ → دج; | |
78 | k → ك; | |
79 | [ɡ g ɠ] → غ; | |
80 | [q ɢ ʡ ʛ] → ق; | |
81 | # Sibilant fricatives | |
82 | s → س; | |
83 | z → ز; | |
84 | [ʃ ʂ ɕ ʄ] → ش; | |
85 | [ʒ ʐ ʑ] → ج; | |
86 | # Non-sibilant fricatives | |
87 | [ɸ f v] → ف; | |
88 | β → ب; | |
89 | [{θ\u033C} θ {θ\u0331}] → ث; | |
90 | [{ð\u033C} ð {ð\u0320}] → ذ; | |
91 | ç → ش; | |
92 | ʝ $IVowel? ː? → ي; | |
93 | [x χ] → خ; | |
94 | [ɣ ʁ] → غ; | |
95 | ħ → ح; | |
96 | ʕ → ع; | |
97 | [h ɦ {ʔ\u031E}] → ه; | |
98 | # Approximants, trills, flaps | |
99 | ʋ → و; | |
100 | ʙ → بر; | |
101 | {r\u031D} → رش; | |
102 | [{ɹ\u0325} {ɹ} {ɻ\u030A} {ɻ} {ɾ\u0325} ɾ {ɽ\u030A} ɽ {r\u033C} {r\u0325} r] → ر; | |
103 | [{ʀ\u0325} ʀ] → غ; | |
104 | ʜ → ح; | |
105 | ʢ → ع; | |
106 | j $IVowel? ː? → ي; | |
107 | # Laterals | |
108 | ɬ → ش\u0652ل; | |
109 | ɮ → ج\u0652ل; | |
110 | {[{ʎ\u0325} ʎ]} [^ $IVowel j ʝ] → لي; | |
111 | [{l\u033C} {l\u0325} l {ɭ\u030A} ɭ {ʎ\u0325} ʎ] → ل; | |
112 | [ʟ {ʟ\u0320}] → غ; | |
113 | # Independent pass for misc cleanup. | |
114 | ::NULL; | |
115 | # Strip off syllable markers | |
116 | \. → ; | |
117 | # Sequences of three or more ووو look very confusing; we shorten them. | |
118 | # Polish Darłowo [darwɔvɔ] → داروو → داروووو | |
119 | ووو+ → وو; | |
120 |