Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Unified Diff: source/data/translit/ps_ps_Latn_BGN.txt

Issue 2440913002: Update ICU to 58.1
Patch Set: Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/data/translit/pl_pl_FONIPA.txt ('k') | source/data/translit/rm_SURSILV_am.txt » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/data/translit/ps_ps_Latn_BGN.txt
diff --git a/source/data/translit/Pashto_Latin_BGN.txt b/source/data/translit/ps_ps_Latn_BGN.txt
similarity index 68%
rename from source/data/translit/Pashto_Latin_BGN.txt
rename to source/data/translit/ps_ps_Latn_BGN.txt
index f6c0d2fddcf0ea37c323f766c9236cee120656ef..90f48df9ef7f6bc901240d0317e9eeef61ce5609 100644
--- a/source/data/translit/Pashto_Latin_BGN.txt
+++ b/source/data/translit/ps_ps_Latn_BGN.txt
@@ -1,22 +1,53 @@
-# ***************************************************************************
-# *
-# * Copyright (C) 2004-2015, International Business Machines
-# * Corporation; Unicode, Inc.; and others. All Rights Reserved.
-# *
-# ***************************************************************************
-# File: Pashto_Latin_BGN.txt
-# Generated from CLDR
+# © 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html#License
+#
+# File: ps_ps_Latn_BGN.txt
+# Generated from CLDR
+#
+
+#
+########################################################################
+# BGN/PCGN 1968 System
+#
+# This system was adopted in 1968 for the romanization of Pashto
+# geographic names in Afghanistan. Persian names in Afghanistan are
+# romanized in accordance with the Romanization System for Persian
+# (BGN/PCGN 1958 System), shown on pages 87-92).
+#
+# Originally prepared by Michael Everson <everson@evertype.com>
+########################################################################
+#
+# MINIMAL FILTER: Pashto-Latin
#
:: [ءآابةتثجحخدذرزسشصضطظعغفقكلمنهوىي\u064E\u064F\u0650\u0651\u0652\u0654٠١٢٣٤٥٦٧٨٩ټپځڅچډړږژښگڰڼیۍې] ;
:: NFD (NFC) ;
+#
+#
+########################################################################
+#
+########################################################################
+#
+# Define All Transformation Variables
+#
+########################################################################
+#
$alef = ’;
$ayin = ‘;
$disambig = \u0331 ;
+#
+#
+# Use this $wordBoundary until bug 2034 is fixed in ICU:
+# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest
+#
$wordBoundary = [^[:L:][:M:][:N:]] ;
+#
+#
+########################################################################
[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR
[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR
٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR
٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR
+# ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate
، ↔ ',' ; # ARABIC COMMA
؛ ↔ ';' ; # ARABIC SEMICOLON
؟ ↔ '?' ; # ARABIC QUESTION MARK
@@ -41,10 +72,46 @@ $wordBoundary = [^[:L:][:M:][:N:]] ;
۷ ↔ 7 ; # EXTENDED ARABIC-INDIC DIGIT SEVEN
۸ ↔ 8 ; # EXTENDED ARABIC-INDIC DIGIT EIGHT
۹ ↔ 9 ; # EXTENDED ARABIC-INDIC DIGIT NINE
+#
+########################################################################
+#
+# Rules moved to front to avoid masking
+#
+########################################################################
+#
+########################################################################
+#
+# BGN Page 89 Rule 4
+#
+# The character sequences كه , زه , سه , and گه may be romanized k·h, z·h,
+# s·h, and g·h in order to differentiate those romanizations from the
+# digraphs kh, zh, sh, and gh.
+#
+########################################################################
+#
كه → k·h ; # ARABIC LETTER KAF + HEH
زه → z·h ; # ARABIC LETTER ZAIN + HEH
سه → s·h ; # ARABIC LETTER SEEN + HEH
گه → g·h ; # ARABIC LETTER GAF + HEH
+#
+#
+########################################################################
+#
+# End Rule 4
+#
+########################################################################
+#
+########################################################################
+#
+# BGN Page 91 Rule 7
+#
+# Doubles consonant sounds are represented in Arabic script by
+# placing a shaddah ( \u0651 ) over a consonant character. In romanization
+# the letter should be doubled. [The remainder of this rule deals with
+# the definite article and is lexical.]
+#
+########################################################################
+#
ب\u0651 → bb ; # ARABIC LETTER BEH + SHADDA
پ\u0651 → pp ; # ARABIC LETTER PEH + SHADDA
ت\u0651 → tt ; # ARABIC LETTER TEH + SHADDA
@@ -86,6 +153,20 @@ $wordBoundary = [^[:L:][:M:][:N:]] ;
و\u0651 → ww ; # ARABIC LETTER WAW + SHADDA
\u0651ی → yy ; # ARABIC LETTER FARSI YEH + SHADDA
ى\u0651 → yy ; # ARABIC LETTER YEH + SHADDA
+#
+#
+########################################################################
+#
+# End Rule 7
+#
+########################################################################
+#
+########################################################################
+#
+# Start of Transformations
+#
+########################################################################
+#
$wordBoundary{ء → ; # ARABIC LETTER HAMZA
ء → $alef ; # ARABIC LETTER HAMZA
$wordBoundary{ا → ; # ARABIC LETTER ALEF
@@ -135,7 +216,7 @@ $wordBoundary{ا → ; # ARABIC LETTER ALEF
ى → y ; # ARABIC LETTER YEH
ې → e ; # ARABIC LETTER E
\u064Eا → ā ; # ARABIC FATHA + ALEF
-\u064Eى\u0652 → ay ; # ARABIC FATHA + FARSI YEH + SUKUN
+\u064Eى\u0652 → ay ; # ARABIC FATHA + FARSI YEH + SUKUN
\u064Eى → á ; # ARABIC FATHA + ALEF MAKSURA
\u064E\u0652ۍ → êy ; # ARABIC FATHA + SUKUN + YEH WITH TAIL
\u064E\u0652 → ê ; # ARABIC FATHA + SUKUN
@@ -150,3 +231,7 @@ $wordBoundary{ا → ; # ARABIC LETTER ALEF
\u064Fو → ū ; # ARABIC DAMMA + WAW
\u064F → u ; # ARABIC DAMMA
\u0652 → ; # ARABIC SUKUN
+#
+#
+########################################################################
+
« no previous file with comments | « source/data/translit/pl_pl_FONIPA.txt ('k') | source/data/translit/rm_SURSILV_am.txt » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698