Index: source/data/translit/Latin_InterIndic.txt |
diff --git a/source/data/translit/Latin_InterIndic.txt b/source/data/translit/Latin_InterIndic.txt |
index 385d91666e58f0d694bbaef2089fa3a279033213..7a4f1feffbcb04fb49da1e1427150ab33c066b7a 100644 |
--- a/source/data/translit/Latin_InterIndic.txt |
+++ b/source/data/translit/Latin_InterIndic.txt |
@@ -1,15 +1,19 @@ |
-# *************************************************************************** |
-# * |
-# * Copyright (C) 2004-2015, International Business Machines |
-# * Corporation; Unicode, Inc.; and others. All Rights Reserved. |
-# * |
-# *************************************************************************** |
+# © 2016 and later: Unicode, Inc. and others. |
+# License & terms of use: http://www.unicode.org/copyright.html#License |
+# |
# File: Latin_InterIndic.txt |
-# Generated from CLDR |
+# Generated from CLDR |
# |
+ |
+# Latin-InterIndic |
+#:: NFD; |
+#\u0E00 reserved |
+#consonants |
$chandrabindu=\uE001; |
$anusvara=\uE002; |
$visarga=\uE003; |
+#\u0E004 reserved |
+# w←vowel→ represents the stand-alone form |
$wa=\uE005; |
$waa=\uE006; |
$wi=\uE007; |
@@ -64,8 +68,11 @@ $sha=\uE036; |
$ssa=\uE037; |
$sa=\uE038; |
$ha=\uE039; |
+#\u093A Reserved |
+#\u093B Reserved |
$nukta=\uE03C; |
$avagraha=\uE03D; # SIGN AVAGRAHA |
+# ←vowel→ represents the dependent form |
$aa=\uE03E; |
$i=\uE03F; |
$ii=\uE040; |
@@ -82,10 +89,17 @@ $so=\uE04A; # VOWEL SIGN SHORT O |
$o=\uE04B; # ो |
$au=\uE04C; |
$virama=\uE04D; |
+# \u094E Reserved |
+# \u094F Reserved |
$om = \uE050; # OM |
+# \u0951→; # UNMAPPED STRESS SIGN UDATTA |
+# \u0952→; # UNMAPPED STRESS SIGN ANUDATTA |
+# \u0953→; # UNMAPPED GRAVE ACCENT |
+# \u0954→; # UNMAPPED ACUTE ACCENT |
$lm = \uE055;# Telugu Length Mark |
$ailm=\uE056;# AI Length Mark |
$aulm=\uE057;# AU Length Mark |
+#urdu compatibity forms |
$uka=\uE058; |
$ukha=\uE059; |
$ugha=\uE05A; |
@@ -111,6 +125,7 @@ $seven=\uE06D; # DIGIT SEVEN |
$eight=\uE06E; # DIGIT EIGHT |
$nine=\uE06F; # DIGIT NINE |
$dgs=\uE082; |
+# For all other scripts |
$ecp0=\uE070; |
$ecp1=\uE071; |
$ecp2=\uE072; |
@@ -127,10 +142,13 @@ $ecpC=\uE07C; |
$ecpD=\uE07D; |
$ecpE=\uE07E; |
$ecpF=\uE07F; |
+# Khanda-ta |
$kta=\uE083; |
+# ॰→; # nothing in Latin maps to InterIndic ABBREVIATION SIGN |
$depVowelAbove=[\uE03E-\uE040\uE045-\uE04C]; |
$depVowelBelow=[\uE041-\uE044]; |
$endThing=[$danda$doubleDanda]; |
+# $x was originally called '§'; $z was '%' |
$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co]; |
$z=[bcdfghjklmnpqrstvwxyz]; |
$consonants=[[$ka-$ha]$z[क-ह][ক-হ][ਕ-ਹ][ક-હ][କ-ହ][க-ஹ][క-హ][ಕ-ಹ][ക-ഹ]]; |
@@ -139,6 +157,8 @@ $consonants=[[$ka-$ha]$z[क-ह][ক-হ][ਕ-ਹ][ક-હ][କ-ହ][க-ஹ][ |
m\u0310→$chandrabindu; |
h\u0323→$visarga; |
x→$ka$virama$sa; |
+# convert to independent forms at start of word or syllable: |
+# dependent forms for roundtrip |
\u0314a\u0304→$aa; |
\u0314ai→$ai; |
\u0314au→$au; |
@@ -159,6 +179,7 @@ x→$ka$virama$sa; |
\u0314o\u0306→$co; |
\u0314e→$se; |
\u0314o→$so; |
+# preceeded by consonants |
$consonants{ a\u0304→$aa; |
$consonants{ ai→$ai; |
$consonants{ au→$au; |
@@ -179,6 +200,7 @@ $consonants{ e\u0306→$ce; |
$consonants{ o\u0306→$co; |
$consonants{ e→$se; |
$consonants{ o→$so; |
+# e.g. keai -→ {ka}{e}{wai}; k'ai -→ {ka}{wai}; (ai) -→ ({wai}) |
a\u0304→$waa; |
ai→$wai; |
au→$wau; |
@@ -199,6 +221,7 @@ o\u0306→$wco; |
e→$wse; |
''om→$om; |
o→$wso; |
+# rules for anusvara |
n}r\u0325 → $na|$virama; |
n}l\u0325 → $na|$virama; |
n}na → $na|$virama; |
@@ -211,12 +234,14 @@ n}[tdn] → $anusvara; |
m}[pbm] → $anusvara; |
n}[ylvshr] → $anusvara; |
m\u0307 → $anusvara; |
+#urdu compatibility |
q→$uka|$virama; |
k\u0331h\u0331→$ukha |$virama; |
g\u0307→ $ugha | $virama; |
z → $ujha |$virama; |
f → $ufa|$virama; |
t\u0331→$kta; |
+# dev |
y\u0307→$uya|$virama; |
l\u0331→$ela|$virama; |
n\u0331→$ena|$virama; |
@@ -268,15 +293,21 @@ h→$ha|$virama; |
$danda'.'→$doubleDanda; |
$depVowelAbove{'~'→$anusvara; |
$depVowelBelow{'~'→$chandrabindu; |
+# convert to dependent forms after consonant with no vowel: |
+# e.g. kai -→ {ka}{virama}ai -→ {ka}{ai} |
+#$virama aa→$aa; |
$virama a\u0304→$aa; |
$virama ai→$ai; |
$virama au→$au; |
$virama ii→$ii; |
$virama i\u0304→$ii; |
$virama i→$i; |
+#$virama uu→$uu; |
$virama u\u0304→$uu; |
$virama u→$u; |
+#$virama rrh→$rrh; |
$virama r\u0325\u0304→$rrh; |
+#$virama rh→$rh; |
$virama r\u0325a→$rh; |
$virama r\u0325→$rh; |
$virama l\u0325\u0304→$llh; |
@@ -289,16 +320,23 @@ $virama e\u0306→$ce; |
$virama o\u0306→$co; |
$virama e→$se; |
$virama o→$so; |
+# otherwise convert independent forms when separated by ': k'ai -→ {ka}{virama}{wai} |
+#$virama''aa→$waa; |
$virama''a\u0304→$waa; |
$virama''ai→$wai; |
$virama''au→$wau; |
+#$virama''ii→$wii; |
$virama''i\u0304→$wii; |
$virama''i→$wi; |
+#$virama''uu→$wuu; |
$virama''u\u0304→$wuu; |
$virama''u→$wu; |
+#$virama''rrh→$wrr; |
$virama''r\u0325\u0304→$wrr; |
+#$virama''rh→$wr; |
$virama''r\u0325→$wr; |
$virama''l\u0325\u0304→$wll; |
+#$virama''lh→$wl; |
$virama''l\u0325→$wl; |
$virama''e\u0304→$we; |
$virama''o\u0304→$wo; |
@@ -307,6 +345,7 @@ $virama''e\u0306→$wce; |
$virama''o\u0306→$wco; |
$virama''e→$wse; |
$virama''o→$wso; |
+# no virama |
''a\u0304→$waa; |
''ai→$wai; |
''au→$wau; |
@@ -340,3 +379,5 @@ $virama}$endThing→; |
8→$eight; |
9→$nine; |
''→; |
+#:: NFC (NFD) ; |
+ |