Index: source/data/translit/und_FONIPA_ar.txt |
diff --git a/source/data/translit/und_FONIPA_ar.txt b/source/data/translit/und_FONIPA_ar.txt |
new file mode 100644 |
index 0000000000000000000000000000000000000000..1e79833c4943ee34f2f89d3c58cd2c94b01c8342 |
--- /dev/null |
+++ b/source/data/translit/und_FONIPA_ar.txt |
@@ -0,0 +1,120 @@ |
+# © 2016 and later: Unicode, Inc. and others. |
+# License & terms of use: http://www.unicode.org/copyright.html#License |
+# |
+# File: und_FONIPA_ar.txt |
+# Generated from CLDR |
+# |
+ |
+# Vowels |
+# ------ |
+# In these rules, we produce ي و ا both for short and for long vowels. |
+# This would be wrong for writing Arabic, but when transliterating |
+# foreign words and names, it is strongly preferred to vowel marks. |
+# However, we emit short schwa [ə] and a few other, schwa-like vowels. |
+$IVowel = [i ɪ e {e\u031E}]; |
+$UVowel = [y {ɨ} {ʉ} ɯ u ʏ {ɪ\u0308} {ʊ\u0308} {ɯ\u033D} {ʊ} ø ɤ o {ø\u031E} {ɤ\u031E} {o\u031E} ɞ ɔ w {w\u0325} ʍ ʷ]; |
+$AVowel = [ɛ œ ɜ ʌ æ ɐ a ɶ {ä} {ɒ\u0308} ɑ ɒ]; |
+$SchwaVowel = [ɘ ɵ ə {ɵ\u031E}]; |
+$Vowel = [$IVowel $UVowel $AVowel $SchwaVowel]; |
+$Click = [ʘ ɋ ǀ ʇ ǃ ʗ ǂ ʄ ǁ ʖ]; |
+$Boundary = [^[:L:][:M:][:N:]]; |
+::NFD; |
+[ʰ ʱ ʼ \u0303 \u0330 \u030B \u0301 \u0304 \u0300 \u030F \u030C \u0302 ˥ ˦ ˧ ˨ ˩ ꜜ ꜛ ↗ ↘ \u0361 \u035C \u032F] → ; |
+ʲ → j; |
+ᵐ → m; |
+ⁿ → n; |
+ᵑ → ŋ; |
+::NFC; |
+# TODO: Diphthongs probably need more work. |
+# Romanian [sekujesk] → [sekujask], for emitting سيكوياسك not سيكويسك |
+$UVowel [j $IVowel] [e {e\u031E} $SchwaVowel] → uia; |
+# Kazakh Аягөз [ɑjɑɡy\u032Fʉz] → [ɑjɑɡiuz], to emit TODO |
+yʉ → iu; |
+::NULL; |
+# Vowels |
+$Boundary {ʔ? $IVowel ː} → إ\u0650ي; |
+$Boundary {ʔ? $IVowel} → إ\u0650; |
+{$IVowel ʔ} $Boundary → ئ; |
+{$IVowel ː ʔ} $Boundary → يء; |
+{$IVowel ː ʔ} [$Vowel] → ئ; |
+$IVowel ː? → ي; |
+$Boundary {ʔ? $UVowel ː} → أو; |
+$Boundary {ʔ? $UVowel} → أ; |
+{$UVowel ʔ} $Boundary → ؤ; |
+{$UVowel ː ʔ} $Boundary → وء; |
+$UVowel ː? → و; |
+$Boundary {ʔ? $AVowel ː} → آ; |
+$Boundary {ʔ? $AVowel} → أ; |
+{$AVowel ʔ} $Boundary → أ; |
+{$AVowel ː ʔ} $Boundary → اء; |
+$AVowel ː? ʔ $AVowel ː? → اءا; |
+$AVowel ː? → ا; |
+$Boundary {ʔ? $SchwaVowel ː} → إ\u0650ي; |
+$Boundary {ʔ? $SchwaVowel} → أ; |
+$SchwaVowel ː → ي; |
+$SchwaVowel → ; |
+# TODO: Handle glottal stop. |
+ʔ → ; |
+# Shadda for long (geminated) consonants |
+ː → \u0651; |
+# Affricates |
+[{t\u0361ʃ} ʧ] → ت\u0652ش; |
+# Clicks |
+[ɡ g ɠ k] $Click → ك\u0652ش; |
+$Click → ت\u0652ش; |
+# Nasal stops |
+[{m\u0325} m ɱ] → م; |
+[{n\u033C\u030A} {n\u033C} {n\u0325} n {ɳ\u030A} ɳ {ɲ\u030A} {ɲ\u0325} ɲ] → ن; |
+[{ŋ\u030A} ŋ {ɴ\u0325} ɴ] k → نك; |
+[{ŋ\u030A} ŋ {ɴ\u0325} ɴ] [ɡ g ɠ]? → ن\u0652غ; |
+# Non-nasal stops |
+[p b {p\u032A} {b\u032A} ɓ] → ب; |
+[{d\u033C} d ɗ ᶑ] → د; |
+[{t\u033C} t] → ت; |
+[ʈ] → ط; |
+[ɖ] → ض; |
+c → ت\u0652ش; |
+ɟ → دج; |
+k → ك; |
+[ɡ g ɠ] → غ; |
+[q ɢ ʡ ʛ] → ق; |
+# Sibilant fricatives |
+s → س; |
+z → ز; |
+[ʃ ʂ ɕ ʄ] → ش; |
+[ʒ ʐ ʑ] → ج; |
+# Non-sibilant fricatives |
+[ɸ f v] → ف; |
+β → ب; |
+[{θ\u033C} θ {θ\u0331}] → ث; |
+[{ð\u033C} ð {ð\u0320}] → ذ; |
+ç → ش; |
+ʝ $IVowel? ː? → ي; |
+[x χ] → خ; |
+[ɣ ʁ] → غ; |
+ħ → ح; |
+ʕ → ع; |
+[h ɦ {ʔ\u031E}] → ه; |
+# Approximants, trills, flaps |
+ʋ → و; |
+ʙ → بر; |
+{r\u031D} → رش; |
+[{ɹ\u0325} {ɹ} {ɻ\u030A} {ɻ} {ɾ\u0325} ɾ {ɽ\u030A} ɽ {r\u033C} {r\u0325} r] → ر; |
+[{ʀ\u0325} ʀ] → غ; |
+ʜ → ح; |
+ʢ → ع; |
+j $IVowel? ː? → ي; |
+# Laterals |
+ɬ → ش\u0652ل; |
+ɮ → ج\u0652ل; |
+{[{ʎ\u0325} ʎ]} [^ $IVowel j ʝ] → لي; |
+[{l\u033C} {l\u0325} l {ɭ\u030A} ɭ {ʎ\u0325} ʎ] → ل; |
+[ʟ {ʟ\u0320}] → غ; |
+# Independent pass for misc cleanup. |
+::NULL; |
+# Strip off syllable markers |
+\. → ; |
+# Sequences of three or more ووو look very confusing; we shorten them. |
+# Polish Darłowo [darwɔvɔ] → داروو → داروووو |
+ووو+ → وو; |
+ |