Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8)

Unified Diff: source/data/translit/Latin_InterIndic.txt

Issue 2440913002: Update ICU to 58.1
Patch Set: Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/data/translit/Latin_Hangul.txt ('k') | source/data/translit/Latin_Jamo.txt » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/data/translit/Latin_InterIndic.txt
diff --git a/source/data/translit/Latin_InterIndic.txt b/source/data/translit/Latin_InterIndic.txt
index 385d91666e58f0d694bbaef2089fa3a279033213..7a4f1feffbcb04fb49da1e1427150ab33c066b7a 100644
--- a/source/data/translit/Latin_InterIndic.txt
+++ b/source/data/translit/Latin_InterIndic.txt
@@ -1,15 +1,19 @@
-# ***************************************************************************
-# *
-# * Copyright (C) 2004-2015, International Business Machines
-# * Corporation; Unicode, Inc.; and others. All Rights Reserved.
-# *
-# ***************************************************************************
+# © 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html#License
+#
# File: Latin_InterIndic.txt
-# Generated from CLDR
+# Generated from CLDR
#
+
+# Latin-InterIndic
+#:: NFD;
+#\u0E00 reserved
+#consonants
$chandrabindu=\uE001;
$anusvara=\uE002;
$visarga=\uE003;
+#\u0E004 reserved
+# w←vowel→ represents the stand-alone form
$wa=\uE005;
$waa=\uE006;
$wi=\uE007;
@@ -64,8 +68,11 @@ $sha=\uE036;
$ssa=\uE037;
$sa=\uE038;
$ha=\uE039;
+#\u093A Reserved
+#\u093B Reserved
$nukta=\uE03C;
$avagraha=\uE03D; # SIGN AVAGRAHA
+# ←vowel→ represents the dependent form
$aa=\uE03E;
$i=\uE03F;
$ii=\uE040;
@@ -82,10 +89,17 @@ $so=\uE04A; # VOWEL SIGN SHORT O
$o=\uE04B; # ो
$au=\uE04C;
$virama=\uE04D;
+# \u094E Reserved
+# \u094F Reserved
$om = \uE050; # OM
+# \u0951→; # UNMAPPED STRESS SIGN UDATTA
+# \u0952→; # UNMAPPED STRESS SIGN ANUDATTA
+# \u0953→; # UNMAPPED GRAVE ACCENT
+# \u0954→; # UNMAPPED ACUTE ACCENT
$lm = \uE055;# Telugu Length Mark
$ailm=\uE056;# AI Length Mark
$aulm=\uE057;# AU Length Mark
+#urdu compatibity forms
$uka=\uE058;
$ukha=\uE059;
$ugha=\uE05A;
@@ -111,6 +125,7 @@ $seven=\uE06D; # DIGIT SEVEN
$eight=\uE06E; # DIGIT EIGHT
$nine=\uE06F; # DIGIT NINE
$dgs=\uE082;
+# For all other scripts
$ecp0=\uE070;
$ecp1=\uE071;
$ecp2=\uE072;
@@ -127,10 +142,13 @@ $ecpC=\uE07C;
$ecpD=\uE07D;
$ecpE=\uE07E;
$ecpF=\uE07F;
+# Khanda-ta
$kta=\uE083;
+# ॰→; # nothing in Latin maps to InterIndic ABBREVIATION SIGN
$depVowelAbove=[\uE03E-\uE040\uE045-\uE04C];
$depVowelBelow=[\uE041-\uE044];
$endThing=[$danda$doubleDanda];
+# $x was originally called '§'; $z was '%'
$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];
$z=[bcdfghjklmnpqrstvwxyz];
$consonants=[[$ka-$ha]$z[क-ह][ক-হ][ਕ-ਹ][ક-હ][କ-ହ][க-ஹ][క-హ][ಕ-ಹ][ക-ഹ]];
@@ -139,6 +157,8 @@ $consonants=[[$ka-$ha]$z[क-ह][ক-হ][ਕ-ਹ][ક-હ][କ-ହ][க-ஹ][
m\u0310→$chandrabindu;
h\u0323→$visarga;
x→$ka$virama$sa;
+# convert to independent forms at start of word or syllable:
+# dependent forms for roundtrip
\u0314a\u0304→$aa;
\u0314ai→$ai;
\u0314au→$au;
@@ -159,6 +179,7 @@ x→$ka$virama$sa;
\u0314o\u0306→$co;
\u0314e→$se;
\u0314o→$so;
+# preceeded by consonants
$consonants{ a\u0304→$aa;
$consonants{ ai→$ai;
$consonants{ au→$au;
@@ -179,6 +200,7 @@ $consonants{ e\u0306→$ce;
$consonants{ o\u0306→$co;
$consonants{ e→$se;
$consonants{ o→$so;
+# e.g. keai -→ {ka}{e}{wai}; k'ai -→ {ka}{wai}; (ai) -→ ({wai})
a\u0304→$waa;
ai→$wai;
au→$wau;
@@ -199,6 +221,7 @@ o\u0306→$wco;
e→$wse;
''om→$om;
o→$wso;
+# rules for anusvara
n}r\u0325 → $na|$virama;
n}l\u0325 → $na|$virama;
n}na → $na|$virama;
@@ -211,12 +234,14 @@ n}[tdn] → $anusvara;
m}[pbm] → $anusvara;
n}[ylvshr] → $anusvara;
m\u0307 → $anusvara;
+#urdu compatibility
q→$uka|$virama;
k\u0331h\u0331→$ukha |$virama;
g\u0307→ $ugha | $virama;
z → $ujha |$virama;
f → $ufa|$virama;
t\u0331→$kta;
+# dev
y\u0307→$uya|$virama;
l\u0331→$ela|$virama;
n\u0331→$ena|$virama;
@@ -268,15 +293,21 @@ h→$ha|$virama;
$danda'.'→$doubleDanda;
$depVowelAbove{'~'→$anusvara;
$depVowelBelow{'~'→$chandrabindu;
+# convert to dependent forms after consonant with no vowel:
+# e.g. kai -→ {ka}{virama}ai -→ {ka}{ai}
+#$virama aa→$aa;
$virama a\u0304→$aa;
$virama ai→$ai;
$virama au→$au;
$virama ii→$ii;
$virama i\u0304→$ii;
$virama i→$i;
+#$virama uu→$uu;
$virama u\u0304→$uu;
$virama u→$u;
+#$virama rrh→$rrh;
$virama r\u0325\u0304→$rrh;
+#$virama rh→$rh;
$virama r\u0325a→$rh;
$virama r\u0325→$rh;
$virama l\u0325\u0304→$llh;
@@ -289,16 +320,23 @@ $virama e\u0306→$ce;
$virama o\u0306→$co;
$virama e→$se;
$virama o→$so;
+# otherwise convert independent forms when separated by ': k'ai -→ {ka}{virama}{wai}
+#$virama''aa→$waa;
$virama''a\u0304→$waa;
$virama''ai→$wai;
$virama''au→$wau;
+#$virama''ii→$wii;
$virama''i\u0304→$wii;
$virama''i→$wi;
+#$virama''uu→$wuu;
$virama''u\u0304→$wuu;
$virama''u→$wu;
+#$virama''rrh→$wrr;
$virama''r\u0325\u0304→$wrr;
+#$virama''rh→$wr;
$virama''r\u0325→$wr;
$virama''l\u0325\u0304→$wll;
+#$virama''lh→$wl;
$virama''l\u0325→$wl;
$virama''e\u0304→$we;
$virama''o\u0304→$wo;
@@ -307,6 +345,7 @@ $virama''e\u0306→$wce;
$virama''o\u0306→$wco;
$virama''e→$wse;
$virama''o→$wso;
+# no virama
''a\u0304→$waa;
''ai→$wai;
''au→$wau;
@@ -340,3 +379,5 @@ $virama}$endThing→;
8→$eight;
9→$nine;
''→;
+#:: NFC (NFD) ;
+
« no previous file with comments | « source/data/translit/Latin_Hangul.txt ('k') | source/data/translit/Latin_Jamo.txt » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698