| Index: source/data/translit/my_my_FONIPA.txt
|
| diff --git a/source/data/translit/my_my_FONIPA.txt b/source/data/translit/my_my_FONIPA.txt
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..7713d6eb2d7d42f6e142e539b08fc51d199ff57c
|
| --- /dev/null
|
| +++ b/source/data/translit/my_my_FONIPA.txt
|
| @@ -0,0 +1,331 @@
|
| +# © 2016 and later: Unicode, Inc. and others.
|
| +# License & terms of use: http://www.unicode.org/copyright.html#License
|
| +#
|
| +# File: my_my_FONIPA.txt
|
| +# Generated from CLDR
|
| +#
|
| +
|
| +# Pronunciation rules for Burmese.
|
| +#
|
| +# The following rules are lexical and heuristic: lexical in the sense
|
| +# that they generate phoneme strings which may further undergo
|
| +# post-lexical phonological processes, in particular voicing, to
|
| +# result in actual surface forms; heuristic in the sense that they try
|
| +# to resolve ambiguities, especially around reduced vowels, in a
|
| +# systematic way that may be incorrect in many situations. Vowel
|
| +# reduction depends on many factors, such as morphemic structure,
|
| +# which are not available here.
|
| +#
|
| +# Definitions
|
| +#
|
| +# Dependent vowel signs
|
| +$vs_AA = \u102B;
|
| +$vs_aa = \u102C;
|
| +$vs_i = \u102D;
|
| +$vs_ii = \u102E;
|
| +$vs_u = \u102F;
|
| +$vs_uu = \u1030;
|
| +$vs_e = \u1031;
|
| +$vs_ai = \u1032;
|
| +# Various signs
|
| +$anusvara = \u1036;
|
| +$visarga = \u1038;
|
| +$virama = \u1039;
|
| +$asat = \u103A;
|
| +# Dependent (medial) consonant signs
|
| +$med_y = \u103B;
|
| +$med_r = \u103C;
|
| +$med_w = \u103D;
|
| +$med_h = \u103E;
|
| +# Independent letters and letter-like punctuation symbols
|
| +$independent = [\u1000-\u102A \u103F \u104C-\u104F \u1050-\u1055];
|
| +$creaky = \u0330;
|
| +$high = \u0301;
|
| +$low = \u0300;
|
| +$coda = [$creaky $high $low ɴ ʔ ə]; # TODO: remove if unused
|
| +#
|
| +# Preprocessing
|
| +#
|
| +::NFC;
|
| +# Replace U+102B TALL AA with U+102C AA. Their pronunciation is identical.
|
| +$vs_AA → $vs_aa;
|
| +# Unstack kinzi (င\u103A plus U+1039 VIRAMA) into plain င\u103A.
|
| +# Hmm, what would happen if the syllable ending in kinzi had non-low tone?
|
| +င\u103A $virama → င\u103A;
|
| +# Unstack everything else, i.e. replace U+1039 VIRAMA with U+103A ASAT.
|
| +$virama → $asat;
|
| +# Unstack U+103F GREAT SA.
|
| +ဿ → သ\u103Aသ;
|
| +# Insert a syllable boundary marker /./ before every independent letter.
|
| +::Null;
|
| +[^.$] { } $independent ([\u1037\u103B-\u103E])* [^\u103A] → \.;
|
| +# Insert default inherent vowel: /a\u0330/ at the end, /ə/ everywhere else.
|
| +::Null;
|
| +([\u1000-\u1021\u103F] [\u103B-\u103E]*) } [$] → $1 a $creaky;
|
| +([\u1000-\u1021\u103F] [\u103B-\u103E]*) } \. → $1 ə;
|
| +# Allow for additional coda consonants.
|
| +#
|
| +# This only covers a few of the cases in which full coda consonants
|
| +# can appear in loanwords. The general situation is somewhat rare and
|
| +# is more easily dealt with in a formalism that can impose structural
|
| +# constraints on syllables more easily.
|
| +::Null;
|
| +$asat ($visarga)? [\u1000-\u102A] { $asat → ;
|
| +# Deal with ၎င\u103Aး early.
|
| +၎င\u103Aး → lə\.ɡa $high ʊ\u032Fɴ;
|
| +#
|
| +# Rhymes
|
| +#
|
| +::Null;
|
| +က\u103A → ɛʔ;
|
| +ဂ\u103A → ɛʔ; # in မဂ\u1039ဂဇင\u103Aး ~ မဂ\u103Aဂဇင\u103Aး /mɛʔ.ɡə.zɪ\u0301ɴ/
|
| +င\u1037\u103A → ɪ $creaky ɴ;
|
| +င\u103Aး → ɪ $high ɴ;
|
| +င\u103A → ɪ $low ɴ;
|
| +စ\u103A → ɪʔ; # maybe sometimes /eɪ\u032Fʔ/
|
| +ဉ\u1037\u103A → ɪ $creaky ɴ;
|
| +ဉ\u103Aး → ɪ $high ɴ;
|
| +ဉ\u103A → ɪ $low ɴ;
|
| +ည\u1037\u103A → ɛ $creaky;
|
| +ည\u103Aး → ɛ $high;
|
| +ည\u103A → ɛ $low;
|
| +ဏ\u1037\u103A → a $creaky ɴ;
|
| +ဏ\u103Aး → a $high ɴ;
|
| +ဏ\u103A → a $low ɴ;
|
| +တ\u103A → aʔ;
|
| +န\u1037\u103A → a $creaky ɴ;
|
| +န\u103Aး → a $high ɴ;
|
| +န\u103A → a $low ɴ;
|
| +ပ\u103A → aʔ;
|
| +မ\u1037\u103A → a $creaky ɴ;
|
| +မ\u103Aး → a $high ɴ;
|
| +မ\u103A → a $low ɴ;
|
| +ယ\u1037\u103A → ɛ $creaky;
|
| +ယ\u103Aး → ɛ $high;
|
| +ယ\u103A → ɛ $low;
|
| +သ\u103A → aʔ;
|
| +$vs_aa ဉ\u1037\u103A → ɪ $creaky ɴ;
|
| +$vs_aa ဉ\u103Aး → ɪ $high ɴ;
|
| +$vs_aa ဉ\u103A → ɪ $low ɴ;
|
| +$vs_aa တ\u103A → aʔ;
|
| +$vs_aa ဏ\u1037\u103A → a $creaky ɴ;
|
| +$vs_aa ဏ\u103Aး → a $high ɴ;
|
| +$vs_aa ဏ\u103A → a $low ɴ;
|
| +$vs_aa န\u1037\u103A → a $creaky ɴ;
|
| +$vs_aa န\u103Aး → a $high ɴ;
|
| +$vs_aa န\u103A → a $low ɴ;
|
| +$vs_aa ပ\u103A → aʔ; # in ကလာပ\u103Aစည\u103Aး /kə.laʔ.sɛ\u0301/ (club cell)
|
| +$vs_aa ယ\u1037\u103A → ɛ $creaky;
|
| +$vs_aa ယ\u103Aး → ɛ $high;
|
| +$vs_aa ယ\u103A → ɛ $low;
|
| +$vs_aa \u1037 → a $creaky; # redundant creaky tone
|
| +$vs_aa း → a $high;
|
| +$vs_aa → a $low;
|
| +$vs_i က\u103A → eɪ\u032Fʔ;
|
| +$vs_i စ\u103A → eɪ\u032Fʔ;
|
| +$vs_i တ\u103A → eɪ\u032Fʔ;
|
| +$vs_i န\u1037\u103A → e $creaky ɪ\u032Fɴ;
|
| +$vs_i န\u103Aး → e $high ɪ\u032Fɴ;
|
| +$vs_i န\u103A → e $low ɪ\u032Fɴ;
|
| +$vs_i ပ\u103A → eɪ\u032Fʔ;
|
| +$vs_i မ\u1037\u103A → e $creaky ɪ\u032Fɴ;
|
| +$vs_i မ\u103Aး → e $high ɪ\u032Fɴ;
|
| +$vs_i မ\u103A → e $low ɪ\u032Fɴ;
|
| +$vs_i $vs_u က\u103A → aɪ\u032Fʔ;
|
| +$vs_i $vs_u င\u1037\u103A → a $creaky ɪ\u032Fɴ;
|
| +$vs_i $vs_u င\u103Aး → a $high ɪ\u032Fɴ;
|
| +$vs_i $vs_u င\u103A → a $low ɪ\u032Fɴ;
|
| +$vs_i $vs_u ဏ\u1037\u103A → a $creaky ɪ\u032Fɴ;
|
| +$vs_i $vs_u ဏ\u103Aး → a $high ɪ\u032Fɴ;
|
| +$vs_i $vs_u ဏ\u103A → a $low ɪ\u032Fɴ;
|
| +$vs_i $vs_u ယ\u1037\u103A → o $creaky;
|
| +$vs_i $vs_u ယ\u103Aး → o $high;
|
| +$vs_i $vs_u ယ\u103A → o $low; # in က\u102D\u102Fယ\u103A /kò/
|
| +$vs_i $vs_u \u1037 → o $creaky;
|
| +$vs_i $vs_u း → o $high;
|
| +$vs_i $vs_u → o $low;
|
| +$vs_i $anusvara \u1037 → e $creaky ɪ\u032Fɴ;
|
| +$vs_i $anusvara း → e $high ɪ\u032Fɴ;
|
| +$vs_i $anusvara → e $low ɪ\u032Fɴ;
|
| +$vs_i → i $creaky;
|
| +$vs_ii \u1037 → i $creaky; # this does not usually occur
|
| +$vs_ii း → i $high;
|
| +$vs_ii → i $low;
|
| +$vs_u က\u103A → oʊ\u032Fʔ;
|
| +$vs_u ဂ\u103A → oʊ\u032Fʔ;
|
| +$vs_u ဏ\u1037\u103A → o $creaky ʊ\u032Fɴ;
|
| +$vs_u ဏ\u103Aး → o $high ʊ\u032Fɴ;
|
| +$vs_u ဏ\u103A → o $low ʊ\u032Fɴ;
|
| +$vs_u တ\u103A → oʊ\u032Fʔ;
|
| +$vs_u န\u1037\u103A → o $creaky ʊ\u032Fɴ;
|
| +$vs_u န\u103Aး → o $high ʊ\u032Fɴ;
|
| +$vs_u န\u103A → o $low ʊ\u032Fɴ;
|
| +$vs_u ပ\u103A → oʊ\u032Fʔ;
|
| +$vs_u မ\u1037\u103A → o $creaky ʊ\u032Fɴ;
|
| +$vs_u မ\u103Aး → o $high ʊ\u032Fɴ;
|
| +$vs_u မ\u103A → o $low ʊ\u032Fɴ;
|
| +$vs_u $anusvara \u1037 → o $creaky ʊ\u032Fɴ;
|
| +$vs_u $anusvara း → o $high ʊ\u032Fɴ;
|
| +$vs_u $anusvara → o $low ʊ\u032Fɴ;
|
| +$vs_u → u $creaky;
|
| +$vs_uu \u1037 → u $creaky; # this does not usually occur
|
| +$vs_uu း → u $high;
|
| +$vs_uu → u $low;
|
| +$vs_e တ\u103A → ɪʔ;
|
| +$vs_e $vs_aa က\u103A → aʊ\u032Fʔ;
|
| +$vs_e $vs_aa င\u1037\u103A → a $creaky ʊ\u032Fɴ;
|
| +$vs_e $vs_aa င\u103Aး → a $high ʊ\u032Fɴ;
|
| +$vs_e $vs_aa င\u103A → a $low ʊ\u032Fɴ;
|
| +$vs_e $vs_aa \u1037 → ɔ $creaky;
|
| +$vs_e $vs_aa း → ɔ $high; # redundant high tone; this does not usually occur
|
| +$vs_e $vs_aa \u103A → ɔ $low;
|
| +$vs_e $vs_aa → ɔ $high;
|
| +$vs_e \u1037 → e $creaky;
|
| +$vs_e း → e $high;
|
| +$vs_e → e $low;
|
| +$vs_ai \u1037 → ɛ $creaky;
|
| +$vs_ai း → ɛ $high; # redundant high tone; this does not usually occur
|
| +$vs_ai → ɛ $high;
|
| +$anusvara \u1037 → a $creaky ɴ;
|
| +$anusvara း → a $high ɴ;
|
| +$anusvara → a $low ɴ;
|
| +$med_w တ\u103A → ʊʔ;
|
| +$med_w န\u1037\u103A → ʊ $creaky ɴ;
|
| +$med_w န\u103Aး → ʊ $high ɴ;
|
| +$med_w န\u103A → ʊ $low ɴ;
|
| +$med_w ပ\u103A → ʊʔ;
|
| +$med_w မ\u1037\u103A → ʊ $creaky ɴ;
|
| +$med_w မ\u103Aး → ʊ $high ɴ;
|
| +$med_w မ\u103A → ʊ $low ɴ;
|
| +#
|
| +# Medials
|
| +#
|
| +::Null;
|
| +# Palatalization of the velar stops before MEDIAL YA and MEDIAL RA:
|
| +# velar + /j/ ==> modern palatals.
|
| +ကျ → t\u0361ɕ;
|
| +ချ → t\u0361ɕʰ;
|
| +ဂျ → d\u0361ʑ;
|
| +ဃျ → d\u0361ʑ;
|
| +ကြ → t\u0361ɕ;
|
| +ခြ → t\u0361ɕʰ;
|
| +ဂြ → d\u0361ʑ;
|
| +ဃြ → d\u0361ʑ;
|
| +# Remove redundant MEDIAL YA and MEDIAL RA after initial YA.
|
| +ယ { [$med_y $med_r] → ;
|
| +# Reorder the medials so that U+103E SIGN MEDIAL HA comes before any
|
| +# other medials.
|
| +# First, push U+103E MEDIAL HA before U+103D MEDIAL WA.
|
| +\u103D \u103E → \u103E \u103D;
|
| +::Null;
|
| +# Now MEDIAL WA comes last.
|
| +# Produce the palatal ʃ from (SA|LA)+YA+HA.
|
| +သျ\u103E → ʃ;
|
| +လျ\u103E → ʃ;
|
| +# Second, push U+103E MEDIAL HA before U+103C MEDIAL RA.
|
| +\u103C \u103E → \u103E \u103C;
|
| +::Null;
|
| +# Finally, push U+103E MEDIAL HA before U+103B MEDIAL YA.
|
| +\u103B \u103E → \u103E \u103B;
|
| +::Null;
|
| +# Consume MEDIAL HA and apply devoicing.
|
| +င\u103E → ŋ\u030A;
|
| +ဉ\u103E → ɲ\u0325;
|
| +ည\u103E → ɲ\u0325;
|
| +ဏ\u103E → n\u0325;
|
| +န\u103E → n\u0325;
|
| +မ\u103E → m\u0325;
|
| +ယ\u103E → ʃ;
|
| +ရ\u103E → ʃ;
|
| +လ\u103E → l\u0325;
|
| +ဝ\u103E → w\u0325;
|
| +ဠ\u103E → l\u0325;
|
| +# Drop any remaining U+103E MEDIAL HA.
|
| +\u103E → ;
|
| +# Simplify medial cluster /jw/ to /w/, i.e. drop U+103B MEDIAL YA and
|
| +# U+103C MEDIAL RA before U+103D MEDIAL WA. # TODO: revisit this
|
| +\u103B } \u103D → ;
|
| +\u103C } \u103D → ;
|
| +\u103B → j;
|
| +\u103C → j;
|
| +\u103D → w;
|
| +#
|
| +# Initials
|
| +#
|
| +# Velars
|
| +က → k;
|
| +ခ → kʰ;
|
| +ဂ → ɡ;
|
| +ဃ → ɡ;
|
| +င → ŋ;
|
| +# Historic palatals
|
| +စ → s;
|
| +ဆ → sʰ;
|
| +ဇ → z;
|
| +ဈ → z;
|
| +ဉ → ɲ;
|
| +ည → ɲ;
|
| +# Alveolars
|
| +ဋ → t;
|
| +ဌ → tʰ;
|
| +ဍ → d;
|
| +ဎ → d;
|
| +ဏ → n;
|
| +# Historic dentals ==> alveolars
|
| +တ → t;
|
| +ထ → tʰ;
|
| +ဒ → d;
|
| +ဓ → d;
|
| +န → n;
|
| +# Labials
|
| +ပ → p;
|
| +ဖ → pʰ;
|
| +ဗ → b;
|
| +ဘ → b;
|
| +မ → m;
|
| +# Other letters
|
| +ယ → j;
|
| +ရ → j; # historic /r/
|
| +လ\u103A → ; # final, typically not pronounced in native words
|
| +လ → l;
|
| +ဝ → w;
|
| +သ → θ; # historic /s/ ==> modern dental
|
| +ဟ → h;
|
| +ဠ → l;
|
| +အ → ʔ;
|
| +# Independent vowels
|
| +ဣ\u1037 → ʔḭ; # redundant creaky tone; this does not usually occur
|
| +ဣး → ʔí; # this does not usually occur
|
| +ဣ → ʔḭ;
|
| +ဤ\u1037 → ʔḭ; # this does not usually occur
|
| +ဤး → ʔí; # this does not usually occur
|
| +ဤ → ʔì;
|
| +ဥ\u1037 → ʔṵ; # redundant creaky tone; this does not usually occur
|
| +ဥး → ʔú; # this does not usually occur
|
| +ဥ → ʔṵ;
|
| +ဦ\u1037 → ʔṵ; # this does not usually occur
|
| +ဦး → ʔú;
|
| +ဦ → ʔù;
|
| +ဧ\u1037 → ʔḛ; # this does not usually occur
|
| +ဧး → ʔé;
|
| +ဧ → ʔè;
|
| +ဩ\u1037 → ʔɔ\u0330; # this does not usually occur
|
| +ဩး → ʔɔ\u0301; # redundant high tone; this does not usually occur
|
| +ဩ → ʔɔ\u0301;
|
| +ဪ\u1037 → ʔɔ\u0330; # this does not usually occur
|
| +ဪး → ʔɔ\u0301; # this does not usually occur
|
| +ဪ → ʔɔ\u0300;
|
| +# Various signs
|
| +၌ → n\u0325aɪ\u032Fʔ;
|
| +၍ → jwḛ;
|
| +# ၎င\u103Aး was handled earlier.
|
| +၏ → ʔḭ;
|
| +#
|
| +# Postprocessing
|
| +#
|
| +# Delete any remaining U+103A ASAT.
|
| +$asat → ;
|
| +# Delete zero-width space, non-joiner, joiner.
|
| +[\u200B-\u200D] → ;
|
| +::NFC;
|
| +
|
|
|