| Index: source/data/translit/xh_xh_FONIPA.txt
|
| diff --git a/source/data/translit/xh_xh_FONIPA.txt b/source/data/translit/xh_xh_FONIPA.txt
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..f9b3ebc6c8fe8b84ed64a2dd85d4ca6d94aa3881
|
| --- /dev/null
|
| +++ b/source/data/translit/xh_xh_FONIPA.txt
|
| @@ -0,0 +1,91 @@
|
| +# © 2016 and later: Unicode, Inc. and others.
|
| +# License & terms of use: http://www.unicode.org/copyright.html#License
|
| +#
|
| +# File: xh_xh_FONIPA.txt
|
| +# Generated from CLDR
|
| +#
|
| +
|
| +# Pronunciation rules for isiXhosa.
|
| +#
|
| +# Author: mjansche@google.com (Martin Jansche)
|
| +#
|
| +# These rules transcribe isiXhosa into the phoneme inventory used within the
|
| +# NCHLT Speech Corpus (https://sites.google.com/site/nchltspeechcorpus/home).
|
| +#
|
| +# The rules were tested using the NCHLT-inlang isiXhosa pronunciation dictionary
|
| +# (http://rma.nwu.ac.za/index.php/resource-catalogue/nchlt-inlang-dictionaries.html).
|
| +# They correctly account for 14,999 out of 15,000 entries in the dictionary.
|
| +#
|
| +# The NCHLT 2013 phone set does not distinguish short and long vowels and does
|
| +# not indicate tone in any way. Transcription of tone is out of scope without a
|
| +# dictionary, since tone is generally not indicated in the orthography. Nasal
|
| +# clicks are not treated as separated phonemes in the NCHLT 2013 phone set and
|
| +# are transcribed as a sequence of nasal plus click instead.
|
| +#
|
| +# One minor notational deviation from the NCHLT 2013 phone set is that we use a
|
| +# tie bar within the complex (slack voiced) clicks, e.g. ɡ\u0361ǀ instead of ɡǀ, to
|
| +# avoid ambiguity and make the phoneme inventory uniquely decodable.
|
| +::Lower;
|
| +nyh → ɲʰ;
|
| +n { tsh → t\u0361ʃʼ;
|
| +tsh → t\u0361ʃʰ;
|
| +tyh → cʰ;
|
| +bh → bʰ;
|
| +ch → ǀʰ;
|
| +dl → ɮ;
|
| +dy → ɟ;
|
| +gc → ɡ\u0361ǀ;
|
| +gq → ɡ\u0361ǃ;
|
| +gr → ɣ;
|
| +gx → ɡ\u0361ǁ;
|
| +hl → ɬ;
|
| +kh → kʰ;
|
| +kr → k\u0361x;
|
| +mh } [^l] → mʰ; # <mhl> denotes /mɬ/ instead
|
| +nh → nʰ;
|
| +ny → ɲ;
|
| +ph → pʰ;
|
| +qh → ǃʰ;
|
| +sh → ʃ;
|
| +th → tʰ;
|
| +tl → t\u0361ɬʼ;
|
| +ts → t\u0361sʼ;
|
| +ty → cʼ;
|
| +xh → ǁʰ;
|
| +aa → | a;
|
| +ee → | e;
|
| +ii → | i;
|
| +kc → | c;
|
| +kq → | q;
|
| +mm → | m;
|
| +oo → | o;
|
| +rh → | r;
|
| +uu → | u;
|
| +a → a;
|
| +b → ɓ;
|
| +c → ǀ;
|
| +d → d;
|
| +e → ɛ;
|
| +f → f;
|
| +g → ɡ;
|
| +h → h;
|
| +i → i;
|
| +j → d\u0361ʒ;
|
| +k → kʼ;
|
| +l → l;
|
| +m → m;
|
| +n } g → ŋ;
|
| +n → n;
|
| +o → ɔ;
|
| +p → pʼ;
|
| +q → ǃ;
|
| +r → r;
|
| +s → s;
|
| +t → tʼ;
|
| +u → u;
|
| +v → v;
|
| +w → w;
|
| +x → ǁ;
|
| +y → j;
|
| +z → z;
|
| +
|
|
|