| Index: source/data/translit/und_FONIPA_fa.txt
|
| diff --git a/source/data/translit/und_FONIPA_fa.txt b/source/data/translit/und_FONIPA_fa.txt
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..5a1a322c8e3484e23b77b9f1aabd083333378dfd
|
| --- /dev/null
|
| +++ b/source/data/translit/und_FONIPA_fa.txt
|
| @@ -0,0 +1,115 @@
|
| +# © 2016 and later: Unicode, Inc. and others.
|
| +# License & terms of use: http://www.unicode.org/copyright.html#License
|
| +#
|
| +# File: und_FONIPA_fa.txt
|
| +# Generated from CLDR
|
| +#
|
| +
|
| +# Vowels
|
| +# ------
|
| +# In these rules, we produce ی و ا both for short and for long vowels.
|
| +# This would be wrong for writing Farsi or Arabic, but when transliterating
|
| +# foreign words and names, it is strongly preferred to vowel marks.
|
| +# Short schwa [ə] and a few other, schwa-like vowels get omitted entirely
|
| +# unless at the end of the word, in which case we emit ه whose Farsi
|
| +# word-final pronunciation comes close to [ə]. At the beginning of words,
|
| +# Farsi speakers prefer to see آ for [ɑ] and a few other similar-sounding
|
| +# dark vowels; note that this use of آ is quite different from Arabic.
|
| +$IVowel = [i ɪ e {e\u031E}];
|
| +$UVowel = [y {ɨ} {ʉ} ɯ u ʏ {ɪ\u0308} {ʊ\u0308} {ɯ\u033D} {ʊ} ø ɤ o {ø\u031E} {ɤ\u031E} {o\u031E} ɔ w {w\u0325} ʍ ʷ];
|
| +$AVowel = [ɛ œ ɜ æ ɶ];
|
| +$DarkAVowel = [ʌ a ɑ ɒ ɐ ɞ {ä} {ɒ\u0308}]; # آ instead of ا at beginning of words
|
| +$SchwaVowel = [ɘ ɵ ə {ɵ\u031E}];
|
| +$Click = [ʘ ɋ ǀ ʇ ǃ ʗ ǂ ʄ ǁ ʖ];
|
| +$Boundary = [^[:L:][:M:][:N:]];
|
| +::NFD;
|
| +[ʰ ʱ ʼ \u0303 \u0330 \u030B \u0301 \u0304 \u0300 \u030F \u030C \u0302 ˥ ˦ ˧ ˨ ˩ ꜜ ꜛ ↗ ↘ \u0361 \u035C \u032F] → ;
|
| +ʲ → j;
|
| +ᵐ → m;
|
| +ⁿ → n;
|
| +ᵑ → ŋ;
|
| +::NFC;
|
| +# TODO: Diphthongs probably need more work.
|
| +# Romanian [sekujesk] → [sekujask], for emitting سیکویاسک not سیکویسک
|
| +$UVowel [j $IVowel] [e {e\u031E} $SchwaVowel] → uia;
|
| +# Kazakh Аягөз [ɑjɑɡy\u032Fʉz] → [ɑjɑɡiuz], to emit آیاگیوز not آیاگووز
|
| +yʉ → iu;
|
| +::NULL;
|
| +# Vowels
|
| +$Boundary {$SchwaVowel ː?} → ای;
|
| +$SchwaVowel ː → ی;
|
| +{[$SchwaVowel e {e\u031E}]} [^[:L:][:M:][:N:][\.]] → ه;
|
| +$SchwaVowel → ;
|
| +$Boundary {$IVowel ː?} → ای;
|
| +$IVowel ː? j? → ی;
|
| +$Boundary {$UVowel ː?} → او;
|
| +$UVowel ː? → و;
|
| +$Boundary {$AVowel ː?} → ا;
|
| +$AVowel ː? → ا;
|
| +$Boundary {$DarkAVowel ː?} → آ;
|
| +$DarkAVowel ː? → ا;
|
| +# Shadda for long (geminated) consonants
|
| +ː → \u0651;
|
| +# Affricates
|
| +[{t\u0361ʃ} ʧ] → چ;
|
| +# Clicks
|
| +[ɡ g ɠ k] $Click → کچ;
|
| +[n ɲ]? $Click → نچ;
|
| +# Nasal stops
|
| +[{m\u0325} m ɱ] → م;
|
| +[{n\u033C\u030A} {n\u033C} {n\u0325} n {ɳ\u030A} ɳ {ɲ\u030A} {ɲ\u0325} ɲ] → ن;
|
| +[{ŋ\u030A} ŋ {ɴ\u0325} ɴ] k → نک;
|
| +[{ŋ\u030A} ŋ {ɴ\u0325} ɴ] [ɡ g]? → نگ;
|
| +# Non-nasal stops
|
| +[p {p\u032A}] → پ;
|
| +[b {b\u032A} ɓ] → ب;
|
| +[{d\u033C} d ɗ ᶑ] → د;
|
| +[{t\u033C} t] → ت;
|
| +[ʈ] → ط;
|
| +[ɖ] → ض;
|
| +c → چ;
|
| +ɟ → دج;
|
| +k → ک;
|
| +[ɡ g ɠ] → گ;
|
| +[q ɢ ʡ ʛ] → ق;
|
| +ʔ → ;
|
| +# Sibilant fricatives
|
| +s → س;
|
| +z → ز;
|
| +[ʃ ʂ ɕ ʄ] → ش;
|
| +[ʒ ʐ ʑ] → ژ;
|
| +# Non-sibilant fricatives
|
| +[ɸ f] → ف;
|
| +[β v] → و;
|
| +[{θ\u033C} θ {θ\u0331}] → ث;
|
| +[{ð\u033C} ð {ð\u0320}] → ذ;
|
| +ç → ش;
|
| +ʝ $IVowel? ː? → ی;
|
| +[x χ] → خ;
|
| +[ɣ ʁ] → غ;
|
| +ħ → ح;
|
| +ʕ → ع;
|
| +[h ɦ {ʔ\u031E}] → ه;
|
| +# Approximants, trills, flaps
|
| +ʋ → و;
|
| +ʙ → بر;
|
| +{r\u031D} → رژ;
|
| +[{ɹ\u0325} {ɹ} {ɻ\u030A} {ɻ} {ɾ\u0325} ɾ {ɽ\u030A} ɽ {r\u033C} {r\u0325} r] → ر;
|
| +[{ʀ\u0325} ʀ] → غ;
|
| +ʜ → ح;
|
| +ʢ → ع;
|
| +j $IVowel? ː? → ی;
|
| +# Laterals
|
| +ɬ → شل;
|
| +ɮ → ژل;
|
| +{[{ʎ\u0325} ʎ]} [^ $IVowel j ʝ] → لی;
|
| +[{l\u033C} {l\u0325} l {ɭ\u030A} ɭ {ʎ\u0325} ʎ] → ل;
|
| +[ʟ {ʟ\u0320}] → غ;
|
| +# Independent pass for misc cleanup.
|
| +::NULL;
|
| +# Strip off syllable markers
|
| +\. → ;
|
| +# Sequences of three or more ووو look very confusing; we shorten them.
|
| +# Polish Darłowo [darwɔvɔ] → داروو → داروووو
|
| +ووو+ → وو;
|
| +
|
|
|