Index: source/data/translit/ar_ar_Latn_BGN.txt |
diff --git a/source/data/translit/Arabic_Latin_BGN.txt b/source/data/translit/ar_ar_Latn_BGN.txt |
similarity index 61% |
rename from source/data/translit/Arabic_Latin_BGN.txt |
rename to source/data/translit/ar_ar_Latn_BGN.txt |
index 6248e62b46a2e4d10a055c4582e5936ace3fb5e9..ecd9363c7ea0e44723f010f31dbd724ec2d15ad3 100644 |
--- a/source/data/translit/Arabic_Latin_BGN.txt |
+++ b/source/data/translit/ar_ar_Latn_BGN.txt |
@@ -1,22 +1,56 @@ |
-# *************************************************************************** |
-# * |
-# * Copyright (C) 2004-2015, International Business Machines |
-# * Corporation; Unicode, Inc.; and others. All Rights Reserved. |
-# * |
-# *************************************************************************** |
-# File: Arabic_Latin_BGN.txt |
-# Generated from CLDR |
+# © 2016 and later: Unicode, Inc. and others. |
+# License & terms of use: http://www.unicode.org/copyright.html#License |
+# |
+# File: ar_ar_Latn_BGN.txt |
+# Generated from CLDR |
+# |
+ |
+# |
+######################################################################## |
+# BGN/PCGN 1956 System |
+# |
+# This system was adopted by the BGN in 1946 and by the PCGN |
+# in 1956 and has been applied in the systematic romanization |
+# of geographic names in Bahrain, Egypt, Iraq, Jordan, |
+# Kuwait, Lebanon, Libya, Oman, Qatar, Saudi Arabia, Sudan, |
+# Syria, Tunisia, the United Arab Emirates, and Yemen, all |
+# of which has been covered by published BGN engineers. |
+# |
+# Originally prepared by Michael Everson <everson@evertype.com> |
+######################################################################## |
+# |
+# MINIMAL FILTER: Arabic-Latin |
# |
:: [[:arabic:][:block=ARABIC:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهوىي\u064B\u064C\u064D\u064E\u064F\u0650\u0651\u0652٠١٢٣٤٥٦٧٨٩ٱ]] ; |
:: NFKD (NFC) ; |
+# |
+# |
+######################################################################## |
+# |
+######################################################################## |
+# |
+# Define All Transformation Variables |
+# |
+######################################################################## |
+# |
$alef = ’; |
$ayin = ‘; |
$disambig = \u0331 ; |
+# |
+# |
+# Use this $wordBoundary until bug 2034 is fixed in ICU: |
+# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest |
+# |
$wordBoundary = [^[:L:][:M:][:N:]] ; |
+# |
+# |
+######################################################################## |
+# non-letters |
[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR |
[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR |
٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR |
٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR |
+# ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate |
، ↔ ',' ; # ARABIC COMMA |
؛ ↔ ';' ; # ARABIC SEMICOLON |
؟ ↔ '?' ; # ARABIC QUESTION MARK |
@@ -41,10 +75,46 @@ $wordBoundary = [^[:L:][:M:][:N:]] ; |
٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN |
٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT |
٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE |
+# |
+######################################################################## |
+# |
+# Rules moved to front to avoid masking |
+# |
+######################################################################## |
+# |
+######################################################################## |
+# |
+# BGN Page 8 Rule 5 |
+# |
+# The character sequences ت , كه , ته , and سه may be romanized t·h, k·h, |
+# d·h, and s·h in order to differentiate those romanizations from the |
+# digraphs th, kh, dh, and sh. |
+# |
+######################################################################## |
+# |
ته → t·h ; # ARABIC LETTER TEH + HEH |
كه → k·h ; # ARABIC LETTER KAF + HEH |
ده → d·h ; # ARABIC LETTER DAL + HEH |
سه → s·h ; # ARABIC LETTER SEEN + HEH |
+# |
+# |
+######################################################################## |
+# |
+# End Rule 5 |
+# |
+######################################################################## |
+######################################################################## |
+# |
+# |
+# BGN Page 8 Rule 9 |
+# |
+# Doubles consonant sounds are represented in Arabic script by placing |
+# a shaddah ( \u0651 ) over a consonant character. In romanization the letter |
+# should be doubled. [The remainder of this rule deals with the definite |
+# article and is lexical.] |
+# |
+######################################################################## |
+# |
ب\u0651 → bb ; # ARABIC LETTER BEH + SHADDA |
ت\u0651 → tt ; # ARABIC LETTER TEH + SHADDA |
ث\u0651 → thth ; # ARABIC LETTER THEH + SHADDA |
@@ -72,6 +142,20 @@ $wordBoundary = [^[:L:][:M:][:N:]] ; |
ه\u0651 → hh ; # ARABIC LETTER HEH + SHADDA |
و\u0651 → ww ; # ARABIC LETTER WAW + SHADDA |
ى\u0651 → yy ; # ARABIC LETTER YEH + SHADDA |
+# |
+# |
+######################################################################## |
+# |
+# End Rule 9 |
+# |
+######################################################################## |
+# |
+######################################################################## |
+# |
+# Start of Transformations |
+# |
+######################################################################## |
+# |
$wordBoundary{ء → ; # ARABIC LETTER HAMZA |
ء → $alef ; # ARABIC LETTER HAMZA |
$wordBoundary{ا → ; # ARABIC LETTER ALEF |
@@ -121,3 +205,7 @@ $wordBoundary{آ → ā ; # ARABIC LETTER ALEF WITH MADDA ABOVE |
\u064D → iⁿ ; # ARABIC KASRATAN |
\u064C → uⁿ ; # ARABIC DAMMATAN |
::NFC (NFD) ; |
+# |
+# |
+######################################################################## |
+ |