1 # © 2016 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html#License
4 # File: am_am_FONIPA.txt
8 # Transforms Amharic (am) to Amharic in phonemic IPA transcription (am_FONIPA).
10 # Long vowels, long/geminated consonants:
11 # In the direction from am_FONIPA to am, we emit Ethiopic gemination
12 # and vowel length markers (U+135D, U+135E, U+135F) although
13 # they are rarely written in Amharic text. Exceptions include
14 # school books and textbooks for non-native speakers.
15 # Clients who do not want these markers can easily strip them off
16 # in a post-processing step.
19 # Amharic speakers will usually say ሟ as [mʷa] instead of [mwa];
20 # labializing [m] instead of saying [m] followed by a separate [w].
21 # Most Amharic consonants can get labialized. To keep the phonemic
22 # transcription simple, we emit /m/ + /w/; otherwise, our phoneme
23 # set would almost double, and it would include very unusual phonemes
24 # such as /ɲʷ/ or /t\u0361ʃʼʷ/.
27 # [1] The Ge’ez Frontier Foundation: “Principles and Specification
28 # for Mnemonic Ethiopic Keyboards.” Version of January 17, 2009;
29 # retrieved on November 4, 2014.
30 # http://keyboards.ethiopic.org/specification/GFF-MnemonicEthiopicKeyboardSpecification.pdf
31 # Other than most online sources, this report uses correct IPA notation
32 # with the exception of /j/, which it consistently (but wrongly)
34 $IPA_VOWEL = [aeəiɨou];
35 $IPA_CONSONANT = [mnɲɴ p{pʼ}bt{tʼ}dk{kʼ}ɡʔʕ fvs{sʼ}zʃʒxh lr {t\u0361ʃ}{t\u0361ʃʼ}{d\u0361ʒ}];
36 # Some consonants have a special syllable when labialized, such as ፗ ↔ /pʷa/.
37 # Amharic restricts this mostly to /a/ syllables. While the Ethiopic script
38 # does offer labialized syllables for other vowels, these are typically
39 # not written in Amharic.
40 $LABIALIZABLE_BEFORE_A = [p{pʼ}t{tʼ} {t\u0361ʃ}{t\u0361ʃʼ}{d\u0361ʒ}{d\u0361ʒʼ} s{sʼ}zʃʒ fv r];
41 ← [ ʼ \u0361 \u035C \u032F];
43 # Appendix B of [1] transcribes ሀ as /hə/. However, according to
44 # an Amharic-speaking person, there is no /hə/ sequence
45 # in Amharic; instead, it gets pronounced as /ha/.
54 ሇ → ho; # Dizi, Me’en, Mursi, Suri /hɔ/ ([1], Appendix E); not used in Amharic.
63 ⶀ → lo; # Dizi, Me’en, Mursi, Suri /lɔ/ ([1], Appendix E); not used in Amharic.
66 # Appendix B of [1] transcribes ሐ as Voiceless pharyngeal fricative
67 # /ħə/. However, according to an Amharic-speaking person, Amharic
68 # makes no difference in pronunciation between ሐ...ሓ and ሀ...ሃ; both
69 # are pronounced as Voiceless glottal fricative /h/. Also, according
70 # to the speaker there is no /hə/ sequence in Amharic; instead, it
71 # gets pronounced as /ha/.
87 ⶁ → mo; # Dizi, Me’en, Mursi, Suri /mɔ/ ([1], Appendix E); not used in Amharic.
88 ᎀ → mwə; # Sebatbeit /mwə/ ([1], Appendix H); not used in Amharic.
89 ᎃ → mwu; # Sebatbeit /mwu/ ([1], Appendix H); not used in Amharic.
90 ᎁ → mwi; # Sebatbeit /mwi/ ([1], Appendix H); not used in Amharic.
92 ᎂ → mwe; # Sebatbeit /mwe/ ([1], Appendix H); not used in Amharic.
93 ፙ → mja; # Unclear which language; Appendix L of [1] transcribes ፙ as /mʲa/.
110 ⶂ → ro; # Dizi, Me’en, Mursi, Suri /rɔ/ ([1], Appendix E); not used in Amharic.
112 ፘ → rja; # Unclear which language; Appendix L of [1] transcribes ፘ as /rʲa/.
114 # Amharic speakers pronounce ⶠ like ሸ. Source: [1], Appendix B.
129 ⶄ → ʃo; # Dizi, Me’en, Mursi, Suri /ʃɔ/ ([1], Appendix E); not used in Amharic.
139 ቇ → kʼo; # Dizi, Me’en, Mursi, Suri /kʼɔ/ ([1], Appendix E); not used in Amharic.
146 # In Awngi, Blin, Qimant, and Xamtanga, ቐ is spoken as voiced uvular fricative [ʁ].
147 # Source: [1], Appendix C. However, */ʁ/ is not an Amharic phoneme.
148 # When reading foreign words with ቐ, Amharic speakers pronounce
149 # ቐ like ቀ, i.e. as velar ejective /kʼ/.
162 # In Sebatbeit, ⷀ is spoken as palatalized velar ejective /kʼʲ/ ([1], Appendix H).
163 # In Amharic, the syllable is not used, but it might appear in names.
178 ⶅ → bo; # Dizi, Me’en, Mursi, Suri /bɔ/ ([1], Appendix E); not used in Amharic.
179 ᎄ → bwə; # Sebatbeit /bʷə/ ([1], Appendix H); not used in Amharic.
180 ᎇ → bwu; # Sebatbeit /bʷu/ ([1], Appendix H); not used in Amharic.
181 ᎅ → bwi; # Sebatbeit /bʷi/ ([1], Appendix H); not used in Amharic.
182 ቧ → bwa; # Sebatbeit /bʷa/ ([1], Appendix H); not used in Amharic.
183 ᎆ → bwe; # Sebatbeit /bʷe/ ([1], Appendix H); not used in Amharic.
194 # Unclear which Ethiopic language uses ⶨ. It only appears in the
195 # “Language Neutral” list of Appendix L in [1], which transcribes it as t\u0361ʃ.
196 # For Amharic, we pronounce ⶨ therefore like ቸ.
204 # In Amharic, ኀ is pronounced like ሀ.
205 # Source: [1], section on “Phonological Redundancy” for Amharic, page 5.
206 # Appendix B of [1] transcribes ሀ as /hə/. However, according to
207 # an Amharic-speaking person, there is no /hə/ sequence in Amharic.
208 # Instead, ሀ (and hence also ኀ) gets pronounced as /ha/.
216 ኇ → ho; # Dizi, Me’en, Mursi, Suri /ŋɔ/ ([1], Appendix E); not used in Amharic.
229 ⶈ → no; # Dizi, Me’en, Mursi, Suri /nɔ/ ([1], Appendix E); not used in Amharic.
239 ⶉ → ɲo; # Dizi, Me’en, Mursi, Suri /ɲɔ/ ([1], Appendix E); not used in Amharic.
242 # Amharic speakers pronounce ኸ as [h] because Amharic has no [x] sound.
243 # However, in transliterations of foreign (eg. Spanish) words with [x],
244 # several Amharic speakers have confirmed that they prefer ኻ over ሃ.
278 ⶊ → ʔo; # Dizi, Me’en, Mursi, Suri /ɲɔ/ ([1], Appendix E); not used in Amharic.
293 # In Sebatbeit, ⷈ is spoken as palatalized velar plosive /kʲ/ ([1], Appendix H).
294 # Amharic speakers pronounce it as /k/ without palatalization.
302 # In Sebatbeit, ⷐ is spoken as palatalized voiceless velar fricative/xʲə/
303 # according to [1], Appendix H. When the syllable appears in names,
304 # Amharic speakers pronounce it as /kə/ without palatalization.
319 ዏ → wo; # Dizi, Me’en, Mursi, Suri /wɔ/ ([1], Appendix E); not used in Amharic.
336 ⶋ → zo; # Dizi, Me’en, Mursi, Suri /zɔ/ ([1], Appendix E); not used in Amharic.
348 # Unclear which Ethiopic language uses ⶰ. It only appears in the
349 # “Language Neutral” list of Appendix L in [1], which transcribes it as ʒ.
350 # For Amharic, we pronounce ⶰ therefore like ዠ.
365 ዯ → jo; # Dizi, Me’en, Mursi, Suri /zɔ/ ([1], Appendix E); not used in Amharic.
383 ⶌ → do; # Dizi, Me’en, Mursi, Suri /zɔ/ ([1], Appendix E); not used in Amharic.
399 # In Awngi, Blin, Qimant, and Xamtanga, ጘ is spoken as voiced velar nasal [ŋ].
400 # Source: [1], Appendix C. While /ŋ/ is not an Amharic phoneme, Amharic speakers
401 # still can pronounce it according to our source. However, when transliterating
402 # foreign words with [ŋ], Amharic uses the sequence ንግ /nɡ/. For example,
403 # the Amharic transliteration of Washington /waʃiŋtən/ is ዋሺንግተን.
416 # Since there is no uvular nasal [ɴ] in Amharic, we use the velar nasal [ŋ].
425 # In Sebatbeit, ⷘ is spoken as palatalized voiced velar stop /ɡj/ ([1], Appendix H).
426 # Amharic speakers pronounce it as voiced velar stop /ɡ/ without palatalization.
450 ⶐ → t\u0361ʃʼo; # Dizi, Me’en, Mursi, Suri /t\u0361ʃʼɔ/ ([1], Appendix E); not used in Amharic.
453 # According to Appendix B of [1], the following are used in the Bench language
454 # (aka Benchnon, Gimira). In Bench, ⶻ is pronounced as /ʈ\u0361ʂʼ/ Retroflex
455 # ejective affricate; with a phonemic distrinction to the non-retroflex version.
456 # Amharic does not have retroflex phonemes, so we go with /t\u0361ʃʼ/.
480 ⶆ → to; # Dizi, Me’en, Mursi, Suri /tɔ/ ([1], Appendix E); not used in Amharic.
490 ⶑ → pʼo; # Dizi, Me’en, Mursi, Suri /pʼɔ/ ([1], Appendix E); not used in Amharic.
502 # In Amharic, ፀ is pronounced like ጸ.
503 # Source: [1], section on “Phonological Redundancy” for Amharic, page 5.
511 ፇ → sʼo; # Dizi, Me’en, Mursi, Suri /sʼɔ/ ([1], Appendix E); not used in Amharic.
512 # Amharic speakers pronounce ሰ like ሠ. Source: [1], Appendix B.
520 ⶃ → so; # Dizi, Me’en, Mursi, Suri /sɔ/ ([1], Appendix E); not used in Amharic.
530 ᎈ → fwə; # Sebatbeit /fwə/ ([1], Appendix H); not used in Amharic.
531 ᎉ → fwu; # Sebatbeit /fwu/ ([1], Appendix H); not used in Amharic.
532 ᎋ → fwi; # Sebatbeit /fwi/ ([1], Appendix H); not used in Amharic.
534 ᎊ → fwe; # Sebatbeit /fwe/ ([1], Appendix H); not used in Amharic.
535 ፚ → fja; # Unclear which language; Appendix L of [1] transcribes ፚ as /fja/.
544 ⶒ → po; # Dizi, Me’en, Mursi, Suri /pɔ/ ([1], Appendix E); not used in Amharic.
545 ᎌ → pwə; # Sebatbeit /pwə/ ([1], Appendix H); not used in Amharic.
546 ᎍ → pwu; # Sebatbeit /pwu/ ([1], Appendix H); not used in Amharic.
547 ᎏ → pwi; # Sebatbeit /pwi/ ([1], Appendix H); not used in Amharic.
549 ᎎ → pwe; # Sebatbeit /pwe/ ([1], Appendix H); not used in Amharic.
552 ኡ ← u; # ኡላዓን ባዓታር ← Ulaan Baatar /ulaʕan baʕatar/
553 አ ← a; # አምስተርዳም ← Amsterdam /amstərdam/
556 ኦ ← o; # ፖርት ኦፍ ስፔን ← Port of Spain /port of speːn/
557 ኢ ← i; # ኢስላማባድ ← Islamabad /islamabad/
558 # Applications will typically split words before calling our rules.
559 # To be resilient, we replace punctuation by whitespace in IPA.
560 ፠ → ' '; # U+1360 ETHIOPIC SECTION MARK
561 ፡ → ' '; # U+1361 ETHIOPIC WORDSPACE
562 ። → ' '; # U+1362 ETHIOPIC FULL STOP
563 ፣ → ' '; # U+1363 ETHIOPIC COMMA
564 ፤ → ' '; # U+1364 ETHIOPIC SEMICOLON
565 ፥ → ' '; # U+1365 ETHIOPIC COLON
566 ፦ → ' '; # U+1366 ETHIOPIC PREFACE COLON
567 ፧ → ' '; # U+1367 ETHIOPIC QUESTION MARK
568 ፨ → ' '; # U+1368 ETHIOPIC PARAGRAPH SEPARATOR
569 # Likewise, Ethiopic numberals cannot be pronounced by these rules,
570 # so we replace them by whitespace in the output IPA notation.
571 # Applications will typically pre-process text before calling
572 # the am → am_FONIPA transform.
573 ፩ → ' '; # U+1369 ETHIOPIC DIGIT ONE
574 ፪ → ' '; # U+136A ETHIOPIC DIGIT TWO
575 ፫ → ' '; # U+136B ETHIOPIC DIGIT THREE
576 ፬ → ' '; # U+136C ETHIOPIC DIGIT FOUR
577 ፭ → ' '; # U+136D ETHIOPIC DIGIT FIVE
578 ፮ → ' '; # U+136E ETHIOPIC DIGIT SIX
579 ፯ → ' '; # U+136F ETHIOPIC DIGIT SEVEN
580 ፰ → ' '; # U+1370 ETHIOPIC DIGIT EIGHT
581 ፱ → ' '; # U+1371 ETHIOPIC DIGIT NINE
582 ፲ → ' '; # U+1372 ETHIOPIC NUMBER TEN
583 ፳ → ' '; # U+1373 ETHIOPIC NUMBER TWENTY
584 ፴ → ' '; # U+1374 ETHIOPIC NUMBER THIRTY
585 ፵ → ' '; # U+1375 ETHIOPIC NUMBER FORTY
586 ፶ → ' '; # U+1376 ETHIOPIC NUMBER FIFTY
587 ፷ → ' '; # U+1377 ETHIOPIC NUMBER SIXTY
588 ፸ → ' '; # U+1378 ETHIOPIC NUMBER SEVENTY
589 ፹ → ' '; # U+1379 ETHIOPIC NUMBER EIGHTY
590 ፺ → ' '; # U+137A ETHIOPIC NUMBER NINETY
591 ፻ → ' '; # U+137B ETHIOPIC NUMBER HUNDRED
592 ፼ → ' '; # U+137C ETHIOPIC NUMBER TEN THOUSAND
593 # Transform IPA length markers to one of these:
594 # U+135D ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK
595 # U+135E ETHIOPIC COMBINING VOWEL LENGTH MARK
596 # U+135F ETHIOPIC COMBINING GEMINATION MARK
598 ← ː ; # Strip off any remaining IPA length markers.
600 ($IPA_CONSONANT) ([jw]? $IPA_VOWEL) \u135D → $1 ː $2 ː;
601 ($IPA_CONSONANT) ([jw]? $IPA_VOWEL) \u135E → $1 $2 ː;
602 ($IPA_CONSONANT) ([jw]? $IPA_VOWEL?) \u135F → $1 ː $2;
603 [\u135D \u135E \u135F] → ; # Strip off any remaining length markers.
604 $1 wa \u135D ← ($LABIALIZABLE_BEFORE_A) ː waː; # ቷ\u135D ← [tːʷaː]
605 $1 wa \u135E ← ($LABIALIZABLE_BEFORE_A) waː; # ቷ\u135E ← [tʷaː]
606 $1 wa \u135F ← ($LABIALIZABLE_BEFORE_A) ː wa; # አቷ\u135F ← [tːʷa]
607 $1 \u135F $2 \u135E ← ([b $LABIALIZABLE_BEFORE_A]) ː ([jw] $IPA_VOWEL) ː;
608 $1 \u135F $2 ← {([b $LABIALIZABLE_BEFORE_A]) ː ([jw] $IPA_VOWEL?)};
609 $1 \u135E ← ($IPA_VOWEL ː);
610 $1 \u135D ← (jː $IPA_VOWEL ː);
611 $1 \u135E ← ([jw] $IPA_VOWEL ː);
612 $1 \u135F ← (jː $IPA_VOWEL?);
613 $1 \u135D ← ($IPA_CONSONANT ː [w]? $IPA_VOWEL ː);
614 $1 \u135E ← ($IPA_CONSONANT [w]? $IPA_VOWEL ː);
615 $1 \u135F ← ($IPA_CONSONANT ː [w]? $IPA_VOWEL?);
616 # Insert syllable markers in a separate pass.
618 {($IPA_VOWEL ː?)} [[:L:]] → $1 \.;
620 ← [ˈˌ\. \u0303\u032F];
621 aj ← ai; # Nairobi /nairobi/ ናይሮቢ, Cairo /kairo/ ካይሮ
622 aw ← au; # Bissau /bisːau/ ቢሳው
623 eji ← ei; # Beijing /beid\u0361ʒiŋ/ ቤዪጂንግ
624 ewo ← eo; # Montevideo /montevideo/ ሞንቴቪዴዎ
625 ija ← ia; # Monrovia /monrovia/ ሞንሮቪያ
626 ijə ← iə; # Reunion /rijunijən/ ሪዩኒየን
627 iw ← iu; # Vilnius /vilnius/ ቪልኒውስ, New Delhi /niu deːli/ ኒው ዴሊ
628 jo ← io; # Tokyo /tokio/ ቶክዮ
629 nɡ ← ŋɡ; # Kongo /koŋɡo/ ኮንጎ, Hungary /həŋɡari/ ሀንጋሪ
630 nɡ ← ŋ; # Bangkok /baŋkok/ ባንግኮክ, Beijing /beid\u0361ʒiŋ/ ቤዪጂንግ
631 uwa ← ua; # Kuala Lumpur /kuala lumpur/ ኩዋላ ሉምፑር, Ruanda /ruanda/ ሩዋንዳ
632 bwe ← bue; # Buenos Aires /buenos aires/ ብዌኖስ አይሬስ
633 sʼ ← t\u0361s; # Podgorica /podɡorit\u0361sa/ ፖድጎሪጻ, Vaduz /fadut\u0361s/ ፋዱጽ
634 uwi ← ui; # Port Luis /port luis/ ፖርት ሉዊስ
635 uwe ← ue; # Lithuania /lituenia/ ሊቱዌኒያ, Venezuela /venɨzuela/ ቬንዙዌላ
644 $1 w ← {($IPA_VOWEL ː?) \u032F} $IPA_VOWEL; # /ewowa/ ← /e\u032Fo\u032Fa/
646 n ← [n {n\u033C} {n\u033C\u030A} {m\u033A} {n\u030A} {n\u0325} ⁿ ᵑ];
647 m ← [ɱ {m\u0325} {m\u032A} ᵐ];
648 ɲ ← [{ɳ\u030A} {ɳ\u0325} ɳ {ɲ\u030A} {ɲ\u0325} ɲ];
649 ŋ ← [{ŋ\u030A} {ŋ\u0325} ŋ];
650 ɴ ← [{ɴ\u030A} {ɴ\u0325} ɴ];
651 p ← [{t\u033C} {p\u033A}];
653 b ← [{d\u033C} {b\u033A} {ɾ\u033C} ɓ];
662 s ← [θ {θ\u0331} {θ\u031E} {θ\u033C} {ɸ\u033A}];
663 z ← [ð {ð\u0320} {ð\u033C} {β\u033A}];
664 sʼ ← [{t\u0361s} {t\u035Cs} ʦ];
665 t\u0361ʃ ← [{t\u035Cʃ} ʧ {t\u0361ɕ} {t\u035Cɕ} ʨ {ʈ\u0361ʂ} c];
666 t\u0361ʃʼ ← [ǀ ʇ ǂ ʄ];
667 d\u0361ʒ ← [ʤ ʣ {d\u0361z} {d\u035Cz} {d\u0361ɕ} ʥ {d\u0361ʑ} {d\u035Cʑ} {ɖ\u0361ʐ} {d\u0361ʐ} ɟ];
668 pf ← [{p\u032A} {p\u0346} ȹ {p\u0361f} {p\u032Af} {p\u032A\u035Cf}];
669 bv ← [{b\u032A} {b\u0346} ȸ {b\u0361v} {b\u032A\u0361v}];
681 t\u0361ʃl ← [{t\u0361ɬ} {tɬ}];
683 w ← {u\u032F} $IPA_VOWEL;
685 ʼː ← ːʼ; # /pʼː/ ← /pːʼ/; /sʼː/ ← /sːʼ/; etc.
695 $1ːʲ ← ([pbtd])ʲː; # [bːʲeː] ← [bʲːeː]
696 $1ːʷ ← ([pbtd])ʷː; # [bːʷeː] ← [bʷːeː]
698 ← [ \u0303 \u0330 \u030B \u0301 \u0304 \u0300 \u030F \u030C \u0302 ˥ ˦ ˧ ˨ ˩ ꜜ ꜛ ↗ ↘ ];