Index: source/data/translit/am_am_FONIPA.txt |
diff --git a/source/data/translit/am_am_FONIPA.txt b/source/data/translit/am_am_FONIPA.txt |
old mode 100755 |
new mode 100644 |
index 20467a7e3e40ae9d6675737d2b17780f70ed44bc..a3390db715b489bea94440ed585114ae74940723 |
--- a/source/data/translit/am_am_FONIPA.txt |
+++ b/source/data/translit/am_am_FONIPA.txt |
@@ -1,36 +1,74 @@ |
-# *************************************************************************** |
-# * |
-# * Copyright (C) 2004-2015, International Business Machines |
-# * Corporation; Unicode, Inc.; and others. All Rights Reserved. |
-# * |
-# *************************************************************************** |
+# © 2016 and later: Unicode, Inc. and others. |
+# License & terms of use: http://www.unicode.org/copyright.html#License |
+# |
# File: am_am_FONIPA.txt |
-# Generated from CLDR |
+# Generated from CLDR |
# |
-\u135D → ''; # U+135D ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK |
-\u135E → ''; # U+135E ETHIOPIC COMBINING VOWEL LENGTH MARK |
-\u135F → ''; # U+135F ETHIOPIC COMBINING GEMINATION MARK |
- |
+# Transforms Amharic (am) to Amharic in phonemic IPA transcription (am_FONIPA). |
+# |
+# Long vowels, long/geminated consonants: |
+# In the direction from am_FONIPA to am, we emit Ethiopic gemination |
+# and vowel length markers (U+135D, U+135E, U+135F) although |
+# they are rarely written in Amharic text. Exceptions include |
+# school books and textbooks for non-native speakers. |
+# Clients who do not want these markers can easily strip them off |
+# in a post-processing step. |
+# |
+# Labialization: |
+# Amharic speakers will usually say ሟ as [mʷa] instead of [mwa]; |
+# labializing [m] instead of saying [m] followed by a separate [w]. |
+# Most Amharic consonants can get labialized. To keep the phonemic |
+# transcription simple, we emit /m/ + /w/; otherwise, our phoneme |
+# set would almost double, and it would include very unusual phonemes |
+# such as /ɲʷ/ or /t\u0361ʃʼʷ/. |
+# |
+# References: |
+# [1] The Ge’ez Frontier Foundation: “Principles and Specification |
+# for Mnemonic Ethiopic Keyboards.” Version of January 17, 2009; |
+# retrieved on November 4, 2014. |
+# http://keyboards.ethiopic.org/specification/GFF-MnemonicEthiopicKeyboardSpecification.pdf |
+# Other than most online sources, this report uses correct IPA notation |
+# with the exception of /j/, which it consistently (but wrongly) |
+# writes as */y/. |
+$IPA_VOWEL = [aeəiɨou]; |
+$IPA_CONSONANT = [mnɲɴ p{pʼ}bt{tʼ}dk{kʼ}ɡʔʕ fvs{sʼ}zʃʒxh lr {t\u0361ʃ}{t\u0361ʃʼ}{d\u0361ʒ}]; |
+# Some consonants have a special syllable when labialized, such as ፗ ↔ /pʷa/. |
+# Amharic restricts this mostly to /a/ syllables. While the Ethiopic script |
+# does offer labialized syllables for other vowels, these are typically |
+# not written in Amharic. |
+$LABIALIZABLE_BEFORE_A = [p{pʼ}t{tʼ} {t\u0361ʃ}{t\u0361ʃʼ}{d\u0361ʒ}{d\u0361ʒʼ} s{sʼ}zʃʒ fv r]; |
+← [ ʼ \u0361 \u035C \u032F]; |
+::(null); |
+# Appendix B of [1] transcribes ሀ as /hə/. However, according to |
+# an Amharic-speaking person, there is no /hə/ sequence |
+# in Amharic; instead, it gets pronounced as /ha/. |
ሀ → ha; |
-ሁ → hu; |
-ሂ → hi; |
-ሃ → ha; |
-ሄ → he; |
-ህ → hɨ; |
-ሆ → ho; |
+ሀ ← hə; |
+ሁ ↔ hu; |
+ሂ ↔ hi; |
+ሃ ↔ ha; |
+ሄ ↔ he; |
+ህ ↔ hɨ; |
+ሆ ↔ ho; |
ሇ → ho; # Dizi, Me’en, Mursi, Suri /hɔ/ ([1], Appendix E); not used in Amharic. |
- |
-ለ → lə; |
-ሉ → lu; |
-ሊ → li; |
-ላ → la; |
-ሌ → le; |
-ል → lɨ; |
-ሎ → lo; |
+ህ ← h; |
+ለ ↔ lə; |
+ሉ ↔ lu; |
+ሊ ↔ li; |
+ላ ↔ la; |
+ሌ ↔ le; |
+ል ↔ lɨ; |
+ሎ ↔ lo; |
ⶀ → lo; # Dizi, Me’en, Mursi, Suri /lɔ/ ([1], Appendix E); not used in Amharic. |
-ሏ → lwa; |
- |
+ሏ ↔ lwa; |
+ል ← l; |
+# Appendix B of [1] transcribes ሐ as Voiceless pharyngeal fricative |
+# /ħə/. However, according to an Amharic-speaking person, Amharic |
+# makes no difference in pronunciation between ሐ...ሓ and ሀ...ሃ; both |
+# are pronounced as Voiceless glottal fricative /h/. Also, according |
+# to the speaker there is no /hə/ sequence in Amharic; instead, it |
+# gets pronounced as /ha/. |
ሐ → ha; |
ሑ → hu; |
ሒ → hi; |
@@ -39,22 +77,21 @@ |
ሕ → hɨ; |
ሖ → ho; |
ሗ → hwa; |
- |
-መ → mə; |
-ሙ → mu; |
-ሚ → mi; |
-ማ → ma; |
-ሜ → me; |
-ም → mɨ; |
-ሞ → mo; |
+መ ↔ mə; |
+ሙ ↔ mu; |
+ሚ ↔ mi; |
+ማ ↔ ma; |
+ሜ ↔ me; |
+ም ↔ mɨ; |
+ሞ ↔ mo; |
ⶁ → mo; # Dizi, Me’en, Mursi, Suri /mɔ/ ([1], Appendix E); not used in Amharic. |
ᎀ → mwə; # Sebatbeit /mwə/ ([1], Appendix H); not used in Amharic. |
ᎃ → mwu; # Sebatbeit /mwu/ ([1], Appendix H); not used in Amharic. |
ᎁ → mwi; # Sebatbeit /mwi/ ([1], Appendix H); not used in Amharic. |
-ሟ → mwa; |
+ሟ ↔ mwa; |
ᎂ → mwe; # Sebatbeit /mwe/ ([1], Appendix H); not used in Amharic. |
ፙ → mja; # Unclear which language; Appendix L of [1] transcribes ፙ as /mʲa/. |
- |
+ም ← m; |
ሠ → sə; |
ሡ → su; |
ሢ → si; |
@@ -63,38 +100,18 @@ |
ሥ → sɨ; |
ሦ → so; |
ሧ → swa; |
- |
-ረ → rə; |
-ሩ → ru; |
-ሪ → ri; |
-ራ → ra; |
-ሬ → re; |
-ር → rɨ; |
-ሮ → ro; |
+ረ ↔ rə; |
+ሩ ↔ ru; |
+ሪ ↔ ri; |
+ራ ↔ ra; |
+ሬ ↔ re; |
+ር ↔ rɨ; |
+ሮ ↔ ro; |
ⶂ → ro; # Dizi, Me’en, Mursi, Suri /rɔ/ ([1], Appendix E); not used in Amharic. |
-ሯ → rwa; |
+ሯ ↔ rwa; |
ፘ → rja; # Unclear which language; Appendix L of [1] transcribes ፘ as /rʲa/. |
- |
-ሰ → sə; |
-ሱ → su; |
-ሲ → si; |
-ሳ → sa; |
-ሴ → se; |
-ስ → sɨ; |
-ሶ → so; |
-ⶃ → so; # Dizi, Me’en, Mursi, Suri /sɔ/ ([1], Appendix E); not used in Amharic. |
-ሷ → swa; |
- |
-ሸ → ʃə; |
-ሹ → ʃu; |
-ሺ → ʃi; |
-ሻ → ʃa; |
-ሼ → ʃe; |
-ሽ → ʃɨ; |
-ሾ → ʃo; |
-ⶄ → ʃo; # Dizi, Me’en, Mursi, Suri /ʃɔ/ ([1], Appendix E); not used in Amharic. |
-ሿ → ʃwa; |
- |
+ር ← r; |
+# Amharic speakers pronounce ⶠ like ሸ. Source: [1], Appendix B. |
ⶠ → ʃə; |
ⶡ → ʃu; |
ⶢ → ʃi; |
@@ -102,21 +119,34 @@ |
ⶤ → ʃe; |
ⶥ → ʃɨ; |
ⶦ → ʃo; |
- |
-ቀ → kʼə; |
-ቁ → kʼu; |
-ቂ → kʼi; |
-ቃ → kʼa; |
-ቄ → kʼe; |
-ቅ → kʼɨ; |
-ቆ → kʼo; |
+ሸ ↔ ʃə; |
+ሹ ↔ ʃu; |
+ሺ ↔ ʃi; |
+ሻ ↔ ʃa; |
+ሼ ↔ ʃe; |
+ሽ ↔ ʃɨ; |
+ሾ ↔ ʃo; |
+ⶄ → ʃo; # Dizi, Me’en, Mursi, Suri /ʃɔ/ ([1], Appendix E); not used in Amharic. |
+ሿ ↔ ʃwa; |
+ሽ ← ʃ; |
+ቀ ↔ kʼə; |
+ቁ ↔ kʼu; |
+ቂ ↔ kʼi; |
+ቃ ↔ kʼa; |
+ቄ ↔ kʼe; |
+ቅ ↔ kʼɨ; |
+ቆ ↔ kʼo; |
ቇ → kʼo; # Dizi, Me’en, Mursi, Suri /kʼɔ/ ([1], Appendix E); not used in Amharic. |
-ቈ → kʼwə; |
-ቍ → kʼwu; |
-ቊ → kʼwi; |
-ቋ → kʼwa; |
-ቌ → kʼwe; |
- |
+ቈ ↔ kʼwə; |
+ቍ ↔ kʼwu; |
+ቊ ↔ kʼwi; |
+ቋ ↔ kʼwa; |
+ቌ ↔ kʼwe; |
+ቅ ← kʼ; |
+# In Awngi, Blin, Qimant, and Xamtanga, ቐ is spoken as voiced uvular fricative [ʁ]. |
+# Source: [1], Appendix C. However, */ʁ/ is not an Amharic phoneme. |
+# When reading foreign words with ቐ, Amharic speakers pronounce |
+# ቐ like ቀ, i.e. as velar ejective /kʼ/. |
ቐ → kʼə; |
ቑ → kʼu; |
ቒ → kʼi; |
@@ -129,7 +159,8 @@ |
ቚ → kʼwi; |
ቛ → kʼwa; |
ቜ → kʼwe; |
- |
+# In Sebatbeit, ⷀ is spoken as palatalized velar ejective /kʼʲ/ ([1], Appendix H). |
+# In Amharic, the syllable is not used, but it might appear in names. |
ⷀ → kʼjə; |
ⷁ → kʼju; |
ⷂ → kʼji; |
@@ -137,49 +168,32 @@ |
ⷄ → kʼje; |
ⷅ → kʼjɨ; |
ⷆ → kʼjo; |
- |
-በ → bə; |
-ቡ → bu; |
-ቢ → bi; |
-ባ → ba; |
-ቤ → be; |
-ብ → bɨ; |
-ቦ → bo; |
+በ ↔ bə; |
+ቡ ↔ bu; |
+ቢ ↔ bi; |
+ባ ↔ ba; |
+ቤ ↔ be; |
+ብ ↔ bɨ; |
+ቦ ↔ bo; |
ⶅ → bo; # Dizi, Me’en, Mursi, Suri /bɔ/ ([1], Appendix E); not used in Amharic. |
ᎄ → bwə; # Sebatbeit /bʷə/ ([1], Appendix H); not used in Amharic. |
ᎇ → bwu; # Sebatbeit /bʷu/ ([1], Appendix H); not used in Amharic. |
ᎅ → bwi; # Sebatbeit /bʷi/ ([1], Appendix H); not used in Amharic. |
ቧ → bwa; # Sebatbeit /bʷa/ ([1], Appendix H); not used in Amharic. |
ᎆ → bwe; # Sebatbeit /bʷe/ ([1], Appendix H); not used in Amharic. |
- |
-ቨ → və; |
-ቩ → vu; |
-ቪ → vi; |
-ቫ → va; |
-ቬ → ve; |
-ቭ → vɨ; |
-ቮ → vo; |
-ቯ → vwa; |
- |
-ተ → tə; |
-ቱ → tu; |
-ቲ → ti; |
-ታ → ta; |
-ቴ → te; |
-ት → tɨ; |
-ቶ → to; |
-ⶆ → to; # Dizi, Me’en, Mursi, Suri /tɔ/ ([1], Appendix E); not used in Amharic. |
-ቷ → twa; |
- |
-ቸ → t\u0361ʃə; |
-ቹ → t\u0361ʃu; |
-ቺ → t\u0361ʃi; |
-ቻ → t\u0361ʃa; |
-ቼ → t\u0361ʃe; |
-ች → t\u0361ʃɨ; |
-ቾ → t\u0361ʃo; |
-ቿ → t\u0361ʃwa; |
- |
+ብ ← b; |
+ቨ ↔ və; |
+ቩ ↔ vu; |
+ቪ ↔ vi; |
+ቫ ↔ va; |
+ቬ ↔ ve; |
+ቭ ↔ vɨ; |
+ቮ ↔ vo; |
+ቯ ↔ vwa; |
+ቭ ← v; |
+# Unclear which Ethiopic language uses ⶨ. It only appears in the |
+# “Language Neutral” list of Appendix L in [1], which transcribes it as t\u0361ʃ. |
+# For Amharic, we pronounce ⶨ therefore like ቸ. |
ⶨ → t\u0361ʃə; |
ⶩ → t\u0361ʃu; |
ⶪ → t\u0361ʃi; |
@@ -187,8 +201,11 @@ |
ⶬ → t\u0361ʃe; |
ⶭ → t\u0361ʃɨ; |
ⶮ → t\u0361ʃo; |
- |
- |
+# In Amharic, ኀ is pronounced like ሀ. |
+# Source: [1], section on “Phonological Redundancy” for Amharic, page 5. |
+# Appendix B of [1] transcribes ሀ as /hə/. However, according to |
+# an Amharic-speaking person, there is no /hə/ sequence in Amharic. |
+# Instead, ሀ (and hence also ኀ) gets pronounced as /ha/. |
ኀ → ha; |
ኁ → hu; |
ኂ → hi; |
@@ -202,49 +219,79 @@ |
ኊ → hwi; |
ኋ → hwa; |
ኌ → hwe; |
- |
-ነ → nə; |
-ኑ → nu; |
-ኒ → ni; |
-ና → na; |
-ኔ → ne; |
-ን → nɨ; |
-ኖ → no; |
+ነ ↔ nə; |
+ኑ ↔ nu; |
+ኒ ↔ ni; |
+ና ↔ na; |
+ኔ ↔ ne; |
+ን ↔ nɨ; |
+ኖ ↔ no; |
ⶈ → no; # Dizi, Me’en, Mursi, Suri /nɔ/ ([1], Appendix E); not used in Amharic. |
-ኗ → nwa; |
- |
-ኘ → ɲə; |
-ኙ → ɲu; |
-ኚ → ɲi; |
-ኛ → ɲa; |
-ኜ → ɲe; |
-ኝ → ɲɨ; |
-ኞ → ɲo; |
+ኗ ↔ nwa; |
+ን ← n; |
+ኘ ↔ ɲə; |
+ኙ ↔ ɲu; |
+ኚ ↔ ɲi; |
+ኛ ↔ ɲa; |
+ኜ ↔ ɲe; |
+ኝ ↔ ɲɨ; |
+ኞ ↔ ɲo; |
ⶉ → ɲo; # Dizi, Me’en, Mursi, Suri /ɲɔ/ ([1], Appendix E); not used in Amharic. |
-ኟ → ɲwa; |
- |
-አ → ʔə; |
-ኡ → ʔu; |
-ኢ → ʔi; |
-ኣ → ʔa; |
-ኤ → ʔe; |
-እ → ʔɨ; |
-ኦ → ʔo; |
+ኟ ↔ ɲwa; |
+ኝ ← ɲ; |
+# Amharic speakers pronounce ኸ as [h] because Amharic has no [x] sound. |
+# However, in transliterations of foreign (eg. Spanish) words with [x], |
+# several Amharic speakers have confirmed that they prefer ኻ over ሃ. |
+ዀ → hwə; |
+ዂ → hwi; |
+ዃ → hwa; |
+ዄ → hwe; |
+ዅ → hwɨ; |
+ኸ → hə; |
+ኹ → hu; |
+ኺ → hi; |
+ኻ → ha; |
+ኼ → he; |
+ኽ → hɨ; |
+ኾ → ho; |
+ዀ ← xwə; |
+ዂ ← xwi; |
+ዃ ← xwa; |
+ዄ ← xwe; |
+ዅ ← xwɨ; |
+ዅ ← xw; |
+ኸ ← xə; |
+ኹ ← xu; |
+ኺ ← xi; |
+ኻ ← xa; |
+ኼ ← xe; |
+ኽ ← xɨ; |
+ኾ ← xo; |
+ኽ ← x; |
+አ ↔ ʔə; |
+ኡ ↔ ʔu; |
+ኢ ↔ ʔi; |
+ኣ ↔ ʔa; |
+ኤ ↔ ʔe; |
+እ ↔ ʔɨ; |
+ኦ ↔ ʔo; |
ⶊ → ʔo; # Dizi, Me’en, Mursi, Suri /ɲɔ/ ([1], Appendix E); not used in Amharic. |
- |
-ከ → kə; |
-ኩ → ku; |
-ኪ → ki; |
-ካ → ka; |
-ኬ → ke; |
-ክ → kɨ; |
-ኮ → ko; |
-ኰ → kwə; |
-ኵ → kwu; |
-ኲ → kwi; |
-ኳ → kwa; |
-ኴ → kwe; |
- |
+እ ← ʔ; |
+ከ ↔ kə; |
+ኩ ↔ ku; |
+ኪ ↔ ki; |
+ካ ↔ ka; |
+ኬ ↔ ke; |
+ክ ↔ kɨ; |
+ኮ ↔ ko; |
+ኰ ↔ kwə; |
+ኵ ↔ kwu; |
+ኲ ↔ kwi; |
+ኳ ↔ kwa; |
+ኴ ↔ kwe; |
+ክ ← k; |
+# In Sebatbeit, ⷈ is spoken as palatalized velar plosive /kʲ/ ([1], Appendix H). |
+# Amharic speakers pronounce it as /k/ without palatalization. |
ⷈ → kə; |
ⷉ → ku; |
ⷊ → ki; |
@@ -252,7 +299,9 @@ |
ⷌ → ke; |
ⷍ → kɨ; |
ⷎ → ko; |
- |
+# In Sebatbeit, ⷐ is spoken as palatalized voiceless velar fricative/xʲə/ |
+# according to [1], Appendix H. When the syllable appears in names, |
+# Amharic speakers pronounce it as /kə/ without palatalization. |
ⷐ → kə; |
ⷑ → ku; |
ⷒ → ki; |
@@ -260,43 +309,45 @@ |
ⷔ → ke; |
ⷕ → kɨ; |
ⷖ → ko; |
- |
-ወ → wə; |
-ዉ → wu; |
-ዊ → wi; |
-ዋ → wa; |
-ዌ → we; |
-ው → wɨ; |
-ዎ → wo; |
+ወ ↔ wə; |
+ዉ ↔ wu; |
+ዊ ↔ wi; |
+ዋ ↔ wa; |
+ዌ ↔ we; |
+ው ↔ wɨ; |
+ዎ ↔ wo; |
ዏ → wo; # Dizi, Me’en, Mursi, Suri /wɔ/ ([1], Appendix E); not used in Amharic. |
- |
-ዐ → ʕə; |
-ዑ → ʕu; |
-ዒ → ʕi; |
-ዓ → ʕa; |
-ዔ → ʕe; |
-ዕ → ʕɨ; |
-ዖ → ʕo; |
- |
-ዘ → zə; |
-ዙ → zu; |
-ዚ → zi; |
-ዛ → za; |
-ዜ → ze; |
-ዝ → zɨ; |
-ዞ → zo; |
+ው ← w; |
+ዐ ↔ ʕə; |
+ዑ ↔ ʕu; |
+ዒ ↔ ʕi; |
+ዓ ↔ ʕa; |
+ዔ ↔ ʕe; |
+ዕ ↔ ʕɨ; |
+ዖ ↔ ʕo; |
+ዒ ← ʕ; |
+ዘ ↔ zə; |
+ዙ ↔ zu; |
+ዚ ↔ zi; |
+ዛ ↔ za; |
+ዜ ↔ ze; |
+ዝ ↔ zɨ; |
+ዞ ↔ zo; |
ⶋ → zo; # Dizi, Me’en, Mursi, Suri /zɔ/ ([1], Appendix E); not used in Amharic. |
-ዟ → zwa; |
- |
-ዠ → ʒə; |
-ዡ → ʒu; |
-ዢ → ʒi; |
-ዣ → ʒa; |
-ዤ → ʒe; |
-ዥ → ʒɨ; |
-ዦ → ʒo; |
-ዧ → ʒwa; |
- |
+ዟ ↔ zwa; |
+ዝ ← z; |
+ዠ ↔ ʒə; |
+ዡ ↔ ʒu; |
+ዢ ↔ ʒi; |
+ዣ ↔ ʒa; |
+ዤ ↔ ʒe; |
+ዥ ↔ ʒɨ; |
+ዦ ↔ ʒo; |
+ዧ ↔ ʒwa; |
+ዢ ← ʒ; |
+# Unclear which Ethiopic language uses ⶰ. It only appears in the |
+# “Language Neutral” list of Appendix L in [1], which transcribes it as ʒ. |
+# For Amharic, we pronounce ⶰ therefore like ዠ. |
ⶰ → ʒə; |
ⶱ → ʒu; |
ⶲ → ʒi; |
@@ -304,48 +355,52 @@ |
ⶴ → ʒe; |
ⶵ → ʒɨ; |
ⶶ → ʒo; |
- |
-የ → jə; |
-ዩ → ju; |
-ዪ → ji; |
-ያ → ja; |
-ዬ → je; |
-ይ → jɨ; |
-ዮ → jo; |
+የ ↔ jə; |
+ዩ ↔ ju; |
+ዪ ↔ ji; |
+ያ ↔ ja; |
+ዬ ↔ je; |
+ይ ↔ jɨ; |
+ዮ ↔ jo; |
ዯ → jo; # Dizi, Me’en, Mursi, Suri /zɔ/ ([1], Appendix E); not used in Amharic. |
- |
-ደ → də; |
-ዱ → du; |
-ዲ → di; |
-ዳ → da; |
-ዴ → de; |
-ድ → dɨ; |
-ዶ → do; |
+ይ ← j; |
+ጀ ↔ d\u0361ʒə; |
+ጁ ↔ d\u0361ʒu; |
+ጂ ↔ d\u0361ʒi; |
+ጃ ↔ d\u0361ʒa; |
+ጄ ↔ d\u0361ʒe; |
+ጅ ↔ d\u0361ʒɨ; |
+ጆ ↔ d\u0361ʒo; |
+ጇ ↔ d\u0361ʒwa; |
+ጅ ← d\u0361ʒ; |
+ደ ↔ də; |
+ዱ ↔ du; |
+ዲ ↔ di; |
+ዳ ↔ da; |
+ዴ ↔ de; |
+ድ ↔ dɨ; |
+ዶ ↔ do; |
ⶌ → do; # Dizi, Me’en, Mursi, Suri /zɔ/ ([1], Appendix E); not used in Amharic. |
-ዷ → dwa; |
- |
-ጀ → d\u0361ʒə; |
-ጁ → d\u0361ʒu; |
-ጂ → d\u0361ʒi; |
-ጃ → d\u0361ʒa; |
-ጄ → d\u0361ʒe; |
-ጅ → d\u0361ʒɨ; |
-ጆ → d\u0361ʒo; |
-ጇ → d\u0361ʒwa; |
- |
-ገ → ɡə; |
-ጉ → ɡu; |
-ጊ → ɡi; |
-ጋ → ɡa; |
-ጌ → ɡe; |
-ግ → ɡɨ; |
-ጎ → ɡo; |
-ጐ → ɡwə; |
-ጕ → ɡwu; |
-ጒ → ɡwi; |
-ጓ → ɡwa; |
-ጔ → ɡwe; |
- |
+ዷ ↔ dwa; |
+ድ ← d; |
+ገ ↔ ɡə; |
+ጉ ↔ ɡu; |
+ጊ ↔ ɡi; |
+ጋ ↔ ɡa; |
+ጌ ↔ ɡe; |
+ግ ↔ ɡɨ; |
+ጎ ↔ ɡo; |
+ጐ ↔ ɡwə; |
+ጕ ↔ ɡwu; |
+ጒ ↔ ɡwi; |
+ጓ ↔ ɡwa; |
+ጔ ↔ ɡwe; |
+ግ ← ɡ; |
+# In Awngi, Blin, Qimant, and Xamtanga, ጘ is spoken as voiced velar nasal [ŋ]. |
+# Source: [1], Appendix C. While /ŋ/ is not an Amharic phoneme, Amharic speakers |
+# still can pronounce it according to our source. However, when transliterating |
+# foreign words with [ŋ], Amharic uses the sequence ንግ /nɡ/. For example, |
+# the Amharic transliteration of Washington /waʃiŋtən/ is ዋሺንግተን. |
ጘ → ŋə; |
ጙ → ŋu; |
ጚ → ŋi; |
@@ -358,7 +413,17 @@ |
ⶔ → ŋwi; |
ጟ → ŋwa; |
ⶕ → ŋwe; |
- |
+# Since there is no uvular nasal [ɴ] in Amharic, we use the velar nasal [ŋ]. |
+ጘ ← ɴə; |
+ጙ ← ɴu; |
+ጚ ← ɴi; |
+ጛ ← ɴa; |
+ጜ ← ɴe; |
+ጝ ← ɴɨ; |
+ጞ ← ɴo; |
+ጝ ← ɴ; |
+# In Sebatbeit, ⷘ is spoken as palatalized voiced velar stop /ɡj/ ([1], Appendix H). |
+# Amharic speakers pronounce it as voiced velar stop /ɡ/ without palatalization. |
ⷘ → ɡə; |
ⷙ → ɡu; |
ⷚ → ɡi; |
@@ -366,26 +431,29 @@ |
ⷜ → ɡe; |
ⷝ → ɡɨ; |
ⷞ → ɡo; |
- |
-ጠ → tʼə; |
-ጡ → tʼu; |
-ጢ → tʼi; |
-ጣ → tʼa; |
-ጤ → tʼe; |
-ጥ → tʼɨ; |
-ጦ → tʼo; |
-ጧ → tʼwa; |
- |
-ጨ → t\u0361ʃʼə; |
-ጩ → t\u0361ʃʼu; |
-ጪ → t\u0361ʃʼi; |
-ጫ → t\u0361ʃʼa; |
-ጬ → t\u0361ʃʼe; |
-ጭ → t\u0361ʃʼɨ; |
-ጮ → t\u0361ʃʼo; |
+ጠ ↔ tʼə; |
+ጡ ↔ tʼu; |
+ጢ ↔ tʼi; |
+ጣ ↔ tʼa; |
+ጤ ↔ tʼe; |
+ጥ ↔ tʼɨ; |
+ጦ ↔ tʼo; |
+ጧ ↔ tʼwa; |
+ጢ ← tʼ; |
+ጨ ↔ t\u0361ʃʼə; |
+ጩ ↔ t\u0361ʃʼu; |
+ጪ ↔ t\u0361ʃʼi; |
+ጫ ↔ t\u0361ʃʼa; |
+ጬ ↔ t\u0361ʃʼe; |
+ጭ ↔ t\u0361ʃʼɨ; |
+ጮ ↔ t\u0361ʃʼo; |
ⶐ → t\u0361ʃʼo; # Dizi, Me’en, Mursi, Suri /t\u0361ʃʼɔ/ ([1], Appendix E); not used in Amharic. |
-ጯ → t\u0361ʃʼwa; |
- |
+ጯ ↔ t\u0361ʃʼwa; |
+ጪ ← t\u0361ʃʼ; |
+# According to Appendix B of [1], the following are used in the Bench language |
+# (aka Benchnon, Gimira). In Bench, ⶻ is pronounced as /ʈ\u0361ʂʼ/ Retroflex |
+# ejective affricate; with a phonemic distrinction to the non-retroflex version. |
+# Amharic does not have retroflex phonemes, so we go with /t\u0361ʃʼ/. |
ⶸ → t\u0361ʃʼə; |
ⶹ → t\u0361ʃʼu; |
ⶺ → t\u0361ʃʼi; |
@@ -393,26 +461,46 @@ |
ⶼ → t\u0361ʃʼe; |
ⶽ → t\u0361ʃʼɨ; |
ⶾ → t\u0361ʃʼo; |
- |
-ጰ → pʼə; |
-ጱ → pʼu; |
-ጲ → pʼi; |
-ጳ → pʼa; |
-ጴ → pʼe; |
-ጵ → pʼɨ; |
-ጶ → pʼo; |
+ቸ ↔ t\u0361ʃə; |
+ቹ ↔ t\u0361ʃu; |
+ቺ ↔ t\u0361ʃi; |
+ቻ ↔ t\u0361ʃa; |
+ቼ ↔ t\u0361ʃe; |
+ች ↔ t\u0361ʃɨ; |
+ቾ ↔ t\u0361ʃo; |
+ቿ ↔ t\u0361ʃwa; |
+ች ← t\u0361ʃ; |
+ተ ↔ tə; |
+ቱ ↔ tu; |
+ቲ ↔ ti; |
+ታ ↔ ta; |
+ቴ ↔ te; |
+ት ↔ tɨ; |
+ቶ ↔ to; |
+ⶆ → to; # Dizi, Me’en, Mursi, Suri /tɔ/ ([1], Appendix E); not used in Amharic. |
+ቷ ↔ twa; |
+ት ← t; |
+ጰ ↔ pʼə; |
+ጱ ↔ pʼu; |
+ጲ ↔ pʼi; |
+ጳ ↔ pʼa; |
+ጴ ↔ pʼe; |
+ጵ ↔ pʼɨ; |
+ጶ ↔ pʼo; |
ⶑ → pʼo; # Dizi, Me’en, Mursi, Suri /pʼɔ/ ([1], Appendix E); not used in Amharic. |
-ጷ → pʼwa; |
- |
-ጸ → sʼə; |
-ጹ → sʼu; |
-ጺ → sʼi; |
-ጻ → sʼa; |
-ጼ → sʼe; |
-ጽ → sʼɨ; |
-ጾ → sʼo; |
-ጿ → sʼwa; |
- |
+ጷ ↔ pʼwa; |
+ጵ ← pʼ; |
+ጸ ↔ sʼə; |
+ጹ ↔ sʼu; |
+ጺ ↔ sʼi; |
+ጻ ↔ sʼa; |
+ጼ ↔ sʼe; |
+ጽ ↔ sʼɨ; |
+ጾ ↔ sʼo; |
+ጿ ↔ sʼwa; |
+ጽ ← sʼ; |
+# In Amharic, ፀ is pronounced like ጸ. |
+# Source: [1], section on “Phonological Redundancy” for Amharic, page 5. |
ፀ → sʼə; |
ፁ → sʼu; |
ፂ → sʼi; |
@@ -421,37 +509,54 @@ |
ፅ → sʼɨ; |
ፆ → sʼo; |
ፇ → sʼo; # Dizi, Me’en, Mursi, Suri /sʼɔ/ ([1], Appendix E); not used in Amharic. |
- |
-ፈ → fə; |
-ፉ → fu; |
-ፊ → fi; |
-ፋ → fa; |
-ፌ → fe; |
-ፍ → fɨ; |
-ፎ → fo; |
+# Amharic speakers pronounce ሰ like ሠ. Source: [1], Appendix B. |
+ሰ ↔ sə; |
+ሱ ↔ su; |
+ሲ ↔ si; |
+ሳ ↔ sa; |
+ሴ ↔ se; |
+ስ ↔ sɨ; |
+ሶ ↔ so; |
+ⶃ → so; # Dizi, Me’en, Mursi, Suri /sɔ/ ([1], Appendix E); not used in Amharic. |
+ሷ ↔ swa; |
+ስ ← s; |
+ፈ ↔ fə; |
+ፉ ↔ fu; |
+ፊ ↔ fi; |
+ፋ ↔ fa; |
+ፌ ↔ fe; |
+ፍ ↔ fɨ; |
+ፎ ↔ fo; |
ᎈ → fwə; # Sebatbeit /fwə/ ([1], Appendix H); not used in Amharic. |
ᎉ → fwu; # Sebatbeit /fwu/ ([1], Appendix H); not used in Amharic. |
ᎋ → fwi; # Sebatbeit /fwi/ ([1], Appendix H); not used in Amharic. |
-ፏ → fwa; |
+ፏ ↔ fwa; |
ᎊ → fwe; # Sebatbeit /fwe/ ([1], Appendix H); not used in Amharic. |
ፚ → fja; # Unclear which language; Appendix L of [1] transcribes ፚ as /fja/. |
- |
-ፐ → pə; |
-ፑ → pu; |
-ፒ → pi; |
-ፓ → pa; |
-ፔ → pe; |
-ፕ → pɨ; |
-ፖ → po; |
+ፍ ← f; |
+ፐ ↔ pə; |
+ፑ ↔ pu; |
+ፒ ↔ pi; |
+ፓ ↔ pa; |
+ፔ ↔ pe; |
+ፕ ↔ pɨ; |
+ፖ ↔ po; |
ⶒ → po; # Dizi, Me’en, Mursi, Suri /pɔ/ ([1], Appendix E); not used in Amharic. |
ᎌ → pwə; # Sebatbeit /pwə/ ([1], Appendix H); not used in Amharic. |
ᎍ → pwu; # Sebatbeit /pwu/ ([1], Appendix H); not used in Amharic. |
ᎏ → pwi; # Sebatbeit /pwi/ ([1], Appendix H); not used in Amharic. |
-ፗ → pwa; |
+ፗ ↔ pwa; |
ᎎ → pwe; # Sebatbeit /pwe/ ([1], Appendix H); not used in Amharic. |
- |
-ኧ → ə; |
- |
+ፕ ← p; |
+ኧ ↔ ə; |
+ኡ ← u; # ኡላዓን ባዓታር ← Ulaan Baatar /ulaʕan baʕatar/ |
+አ ← a; # አምስተርዳም ← Amsterdam /amstərdam/ |
+ኤ ← e; |
+እ ← ɨ; |
+ኦ ← o; # ፖርት ኦፍ ስፔን ← Port of Spain /port of speːn/ |
+ኢ ← i; # ኢስላማባድ ← Islamabad /islamabad/ |
+# Applications will typically split words before calling our rules. |
+# To be resilient, we replace punctuation by whitespace in IPA. |
፠ → ' '; # U+1360 ETHIOPIC SECTION MARK |
፡ → ' '; # U+1361 ETHIOPIC WORDSPACE |
። → ' '; # U+1362 ETHIOPIC FULL STOP |
@@ -461,7 +566,10 @@ |
፦ → ' '; # U+1366 ETHIOPIC PREFACE COLON |
፧ → ' '; # U+1367 ETHIOPIC QUESTION MARK |
፨ → ' '; # U+1368 ETHIOPIC PARAGRAPH SEPARATOR |
- |
+# Likewise, Ethiopic numberals cannot be pronounced by these rules, |
+# so we replace them by whitespace in the output IPA notation. |
+# Applications will typically pre-process text before calling |
+# the am → am_FONIPA transform. |
፩ → ' '; # U+1369 ETHIOPIC DIGIT ONE |
፪ → ' '; # U+136A ETHIOPIC DIGIT TWO |
፫ → ' '; # U+136B ETHIOPIC DIGIT THREE |
@@ -482,12 +590,111 @@ |
፺ → ' '; # U+137A ETHIOPIC NUMBER NINETY |
፻ → ' '; # U+137B ETHIOPIC NUMBER HUNDRED |
፼ → ' '; # U+137C ETHIOPIC NUMBER TEN THOUSAND |
+# Transform IPA length markers to one of these: |
+# U+135D ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK |
+# U+135E ETHIOPIC COMBINING VOWEL LENGTH MARK |
+# U+135F ETHIOPIC COMBINING GEMINATION MARK |
+::null(); |
+← ː ; # Strip off any remaining IPA length markers. |
+::(null); |
+($IPA_CONSONANT) ([jw]? $IPA_VOWEL) \u135D → $1 ː $2 ː; |
+($IPA_CONSONANT) ([jw]? $IPA_VOWEL) \u135E → $1 $2 ː; |
+($IPA_CONSONANT) ([jw]? $IPA_VOWEL?) \u135F → $1 ː $2; |
+[\u135D \u135E \u135F] → ; # Strip off any remaining length markers. |
+$1 wa \u135D ← ($LABIALIZABLE_BEFORE_A) ː waː; # ቷ\u135D ← [tːʷaː] |
+$1 wa \u135E ← ($LABIALIZABLE_BEFORE_A) waː; # ቷ\u135E ← [tʷaː] |
+$1 wa \u135F ← ($LABIALIZABLE_BEFORE_A) ː wa; # አቷ\u135F ← [tːʷa] |
+$1 \u135F $2 \u135E ← ([b $LABIALIZABLE_BEFORE_A]) ː ([jw] $IPA_VOWEL) ː; |
+$1 \u135F $2 ← {([b $LABIALIZABLE_BEFORE_A]) ː ([jw] $IPA_VOWEL?)}; |
+$1 \u135E ← ($IPA_VOWEL ː); |
+$1 \u135D ← (jː $IPA_VOWEL ː); |
+$1 \u135E ← ([jw] $IPA_VOWEL ː); |
+$1 \u135F ← (jː $IPA_VOWEL?); |
+$1 \u135D ← ($IPA_CONSONANT ː [w]? $IPA_VOWEL ː); |
+$1 \u135E ← ($IPA_CONSONANT [w]? $IPA_VOWEL ː); |
+$1 \u135F ← ($IPA_CONSONANT ː [w]? $IPA_VOWEL?); |
+# Insert syllable markers in a separate pass. |
+::null; |
+{($IPA_VOWEL ː?)} [[:L:]] → $1 \.; |
+::(null); |
+← [ˈˌ\. \u0303\u032F]; |
+aj ← ai; # Nairobi /nairobi/ ናይሮቢ, Cairo /kairo/ ካይሮ |
+aw ← au; # Bissau /bisːau/ ቢሳው |
+eji ← ei; # Beijing /beid\u0361ʒiŋ/ ቤዪጂንግ |
+ewo ← eo; # Montevideo /montevideo/ ሞንቴቪዴዎ |
+ija ← ia; # Monrovia /monrovia/ ሞንሮቪያ |
+ijə ← iə; # Reunion /rijunijən/ ሪዩኒየን |
+iw ← iu; # Vilnius /vilnius/ ቪልኒውስ, New Delhi /niu deːli/ ኒው ዴሊ |
+jo ← io; # Tokyo /tokio/ ቶክዮ |
+nɡ ← ŋɡ; # Kongo /koŋɡo/ ኮንጎ, Hungary /həŋɡari/ ሀንጋሪ |
+nɡ ← ŋ; # Bangkok /baŋkok/ ባንግኮክ, Beijing /beid\u0361ʒiŋ/ ቤዪጂንግ |
+uwa ← ua; # Kuala Lumpur /kuala lumpur/ ኩዋላ ሉምፑር, Ruanda /ruanda/ ሩዋንዳ |
+bwe ← bue; # Buenos Aires /buenos aires/ ብዌኖስ አይሬስ |
+sʼ ← t\u0361s; # Podgorica /podɡorit\u0361sa/ ፖድጎሪጻ, Vaduz /fadut\u0361s/ ፋዱጽ |
+uwi ← ui; # Port Luis /port luis/ ፖርት ሉዊስ |
+uwe ← ue; # Lithuania /lituenia/ ሊቱዌኒያ, Venezuela /venɨzuela/ ቬንዙዌላ |
+::(null); |
+ʔə ← \. ə; |
+ʔu ← \. u; |
+ʔi ← \. i; |
+ʔa ← \. a; |
+ʔe ← \. e; |
+ʔɨ ← \. ɨ; |
+ʔo ← \. o; |
+$1 w ← {($IPA_VOWEL ː?) \u032F} $IPA_VOWEL; # /ewowa/ ← /e\u032Fo\u032Fa/ |
+::(null); |
+n ← [n {n\u033C} {n\u033C\u030A} {m\u033A} {n\u030A} {n\u0325} ⁿ ᵑ]; |
+m ← [ɱ {m\u0325} {m\u032A} ᵐ]; |
+ɲ ← [{ɳ\u030A} {ɳ\u0325} ɳ {ɲ\u030A} {ɲ\u0325} ɲ]; |
+ŋ ← [{ŋ\u030A} {ŋ\u0325} ŋ]; |
+ɴ ← [{ɴ\u030A} {ɴ\u0325} ɴ]; |
+p ← [{t\u033C} {p\u033A}]; |
+pʼ ← [ʘ ɋ]; |
+b ← [{d\u033C} {b\u033A} {ɾ\u033C} ɓ]; |
+t ← [{t\u032A} ʈ]; |
+tʼ ← [ǁ ʖ]; |
+d ← [ɖ ɗ ᶑ]; |
+k ← q; |
+kʼ ← [ǃ ʗ]; |
+ɡ ← [g ɢ ɣ ɠ ʛ]; |
+nɡ ← ᵑɡ; |
+ʔ ← ʡ; |
+s ← [θ {θ\u0331} {θ\u031E} {θ\u033C} {ɸ\u033A}]; |
+z ← [ð {ð\u0320} {ð\u033C} {β\u033A}]; |
+sʼ ← [{t\u0361s} {t\u035Cs} ʦ]; |
+t\u0361ʃ ← [{t\u035Cʃ} ʧ {t\u0361ɕ} {t\u035Cɕ} ʨ {ʈ\u0361ʂ} c]; |
+t\u0361ʃʼ ← [ǀ ʇ ǂ ʄ]; |
+d\u0361ʒ ← [ʤ ʣ {d\u0361z} {d\u035Cz} {d\u0361ɕ} ʥ {d\u0361ʑ} {d\u035Cʑ} {ɖ\u0361ʐ} {d\u0361ʐ} ɟ]; |
+pf ← [{p\u032A} {p\u0346} ȹ {p\u0361f} {p\u032Af} {p\u032A\u035Cf}]; |
+bv ← [{b\u032A} {b\u0346} ȸ {b\u0361v} {b\u032A\u0361v}]; |
+ʃ ← [ʂ ɕ]; |
+ʒ ← [ʐ ʑ]; |
+r ← [ɾ ɽ ʁ]; |
+rːʒ ← r\u031Dː; |
+rʒ ← r\u031D; |
+v ← β; |
+x ← [ç x χ]; |
+ʕ ← ʕ\u031D; |
+h ← ɦ; |
+j ← [ʝ ʲ]; |
+lj ← ʎ [iɨ]? [jʝʲ]?; |
+t\u0361ʃl ← [{t\u0361ɬ} {tɬ}]; |
+ʃl ← ɬ; |
+w ← {u\u032F} $IPA_VOWEL; |
+w ← ʷ; |
+ʼː ← ːʼ; # /pʼː/ ← /pːʼ/; /sʼː/ ← /sːʼ/; etc. |
+::(null); |
+i ← y; |
+ɨ ← [ɪ ʉ]; |
+u ← [ʊ ɯ]; |
+ə ← [ɛ æ ɘ]; |
+o ← [ɔ ø]; |
+a ← ɑ; |
+ʼ ← ʰ; |
+← [ʱ]; |
+$1ːʲ ← ([pbtd])ʲː; # [bːʲeː] ← [bʲːeː] |
+$1ːʷ ← ([pbtd])ʷː; # [bːʷeː] ← [bʷːeː] |
+::(NFC); |
+← [ \u0303 \u0330 \u030B \u0301 \u0304 \u0300 \u030F \u030C \u0302 ˥ ˦ ˧ ˨ ˩ ꜜ ꜛ ↗ ↘ ]; |
+::(NFD); |
-::NULL; |
-{i} [[:L:]] → i \.; |
-{ɨ} [[:L:]] → ɨ \.; |
-{u} [[:L:]] → u \.; |
-{e} [[:L:]] → e \.; |
-{o} [[:L:]] → o \.; |
-{ə} [[:L:]] → ə \.; |
-{a} [[:L:]] → a \.; |