Index: source/data/translit/InterIndic_Latin.txt |
diff --git a/source/data/translit/InterIndic_Latin.txt b/source/data/translit/InterIndic_Latin.txt |
index 0c85a5a446b786726ceed38b6da529cd9b6bd5af..13cd64a7211cd99b3eff533b82208c7ec9f79a83 100644 |
--- a/source/data/translit/InterIndic_Latin.txt |
+++ b/source/data/translit/InterIndic_Latin.txt |
@@ -1,15 +1,18 @@ |
-# *************************************************************************** |
-# * |
-# * Copyright (C) 2004-2015, International Business Machines |
-# * Corporation; Unicode, Inc.; and others. All Rights Reserved. |
-# * |
-# *************************************************************************** |
+# © 2016 and later: Unicode, Inc. and others. |
+# License & terms of use: http://www.unicode.org/copyright.html#License |
+# |
# File: InterIndic_Latin.txt |
-# Generated from CLDR |
+# Generated from CLDR |
# |
+ |
+# InterIndic-Latin |
+#\u0E00 reserved |
+#consonants |
$chandrabindu=\uE001; |
$anusvara=\uE002; |
$visarga=\uE003; |
+#\u0E004 reserved |
+# w←vowel→ represents the stand-alone form |
$wa=\uE005; |
$waa=\uE006; |
$wi=\uE007; |
@@ -64,8 +67,11 @@ $sha=\uE036; |
$ssa=\uE037; |
$sa=\uE038; |
$ha=\uE039; |
+#\u093A Reserved |
+#\u093B Reserved |
$nukta=\uE03C; |
$avagraha=\uE03D; # SIGN AVAGRAHA |
+# ←vowel→ represents the dependent form |
$aa=\uE03E; |
$i=\uE03F; |
$ii=\uE040; |
@@ -82,6 +88,8 @@ $so=\uE04A; # VOWEL SIGN SHORT O |
$o=\uE04B; # ो |
$au=\uE04C; |
$virama=\uE04D; |
+# \u094E Reserved |
+# \u094F Reserved |
$om=\uE050; # OM |
\uE051→; # UNMAPPED STRESS SIGN UDATTA |
\uE052→; # UNMAPPED STRESS SIGN ANUDATTA |
@@ -90,6 +98,7 @@ $om=\uE050; # OM |
$lm = \uE055;# Telugu Length Mark |
$ailm=\uE056;# AI Length Mark |
$aulm=\uE057;# AU Length Mark |
+#urdu compatibity forms |
$uka=\uE058; |
$ukha=\uE059; |
$ugha=\uE05A; |
@@ -114,14 +123,21 @@ $six=\uE06C; # DIGIT SIX |
$seven=\uE06D; # DIGIT SEVEN |
$eight=\uE06E; # DIGIT EIGHT |
$nine=\uE06F; # DIGIT NINE |
+# Glottal stop |
$dgs=\uE082; |
+#Khanda-ta |
$kta=\uE083; |
$depVowelAbove=[\uE03E-\uE040\uE045-\uE04C]; |
$depVowelBelow=[\uE041-\uE044]; |
+# $x was originally called '§'; $z was '%' |
$x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co]; |
$z=[bcdfghjklmnpqrstvwxyz]; |
$vowels=[aeiour\u0304\u0325\u0306]; |
$forceIndependentMatra = [^[[:L:][\u0300-\u034C]]]; |
+###################################################################### |
+# convert from Native letters to Latin letters |
+###################################################################### |
+#transliterations for anusvara |
$anusvara} [$ka$kha$ga$gha$nga] → n\u0307; |
$anusvara} [$ca$cha$ja$jha$nya] → n\u0304; |
$anusvara} [$tta$ttha$dda$ddha$nna] → n\u0323; |
@@ -129,6 +145,7 @@ $anusvara} [$ta$tha$da$dha$na] → n; |
$anusvara} [$pa$pha$ba$bha$ma] → m; |
$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] → n; |
$anusvara→ m\u0307; |
+# Urdu compatibility |
$ya$nukta}$x → y\u0307; |
$ya$nukta$virama → y\u0307; |
$ya$nukta → y\u0307a; |
@@ -186,6 +203,7 @@ $ela → l\u0331a; |
$uya}$x → y\u0307; |
$uya$virama → y\u0307; |
$uya → y\u0307a; |
+# normal consonants |
$ka$virama}$ha→k''; |
$ka}$x→k; |
$ka$virama→k; |
@@ -312,6 +330,7 @@ $sa$virama}$ssa→s''; |
$sa$virama}$sa→s''; |
$sa}$x→s; |
$sa$virama→s; |
+#for gurmukhi |
$sa$nukta}$x→s\u0301; |
$sa$nukta$virama→s\u0301; |
$sa$nukta→s\u0301a; |
@@ -325,6 +344,7 @@ $ssa→s\u0323a; |
$ha}$x→h; |
$ha$virama→h; |
$ha→ha; |
+# dependent vowels (should never occur except following consonants) |
$forceIndependentMatra{$aa → \u0314a\u0304; |
$forceIndependentMatra{$ai → \u0314ai; |
$forceIndependentMatra{$au → \u0314au; |
@@ -338,6 +358,7 @@ $forceIndependentMatra{$llh → \u0314l\u0325\u0304; |
$forceIndependentMatra{$lh → \u0314l\u0325; |
$forceIndependentMatra{$e → \u0314e\u0304; |
$forceIndependentMatra{$o → \u0314o\u0304; |
+#extra vowels |
$forceIndependentMatra{$ce → \u0314e\u0306; |
$forceIndependentMatra{$co → \u0314o\u0306; |
$forceIndependentMatra{$se → \u0314e; |
@@ -357,10 +378,12 @@ $llh → l\u0325\u0304; |
$lh → l\u0325; |
$e → e\u0304; |
$o → o\u0304; |
+#extra vowels |
$ce → e\u0306; |
$co → o\u0306; |
$se → e; |
$so → o; |
+#dependent vowels when following independent vowels. Generally Illegal only for roundtripping |
$waa} $x → a\u0304\u0314; |
$wai} $x → ai\u0314; |
$wau} $x → au\u0314; |
@@ -375,11 +398,13 @@ $wl } $x → l\u0325\u0314; |
$we } $x → e\u0304\u0314; |
$wo } $x → o\u0304\u0314; |
$wa } $x → a\u0314; |
+#extra vowels |
$wce} $x → e\u0306\u0314; |
$wco} $x → o\u0306\u0314; |
$wse} $x → e\u0314; |
$wso} $x → o\u0314; |
$om} $x → ''om\u0314; |
+# independent vowels when preceeded by vowels |
$vowels{$waa → ''a\u0304; |
$vowels{$wai → ''ai; |
$vowels{$wau → ''au; |
@@ -394,10 +419,12 @@ $vowels{$wl → ''l\u0325; |
$vowels{$we → ''e\u0304; |
$vowels{$wo → ''o\u0304; |
$vowels{$wa → ''a; |
+#extra vowels |
$vowels{$wce → ''e\u0306; |
$vowels{$wco → ''o\u0306; |
$vowels{$wse → ''e; |
$vowels{$wso → ''o; |
+# independent vowels (otherwise) |
$waa → a\u0304; |
$wai → ai; |
$wau → au; |
@@ -412,15 +439,18 @@ $wl → l\u0325; |
$we → e\u0304; |
$wo → o\u0304; |
$wa → a; |
+#extra vowels |
$wce → e\u0306; |
$wco → o\u0306; |
$wse → e; |
$wso → o; |
$om → ''om; |
+#stress marks |
$avagraha → \u0315; |
$chandrabindu$anusvara→\u0303; |
$chandrabindu → m\u0310; |
$visarga→h\u0323; |
+#numbers |
$zero → 0; |
$one → 1; |
$two → 2; |
@@ -439,9 +469,11 @@ $kta→t\u0331; |
$danda→'.'; |
$doubleDanda→'.'; |
\uE070→; # ABBREVIATION SIGN |
+# LETTER RA WITH MIDDLE DIAGONAL |
\uE071}$x→ra; |
\uE071$virama→r; |
\uE071→ra; |
+# LETTER RA WITH LOWER DIAGONAL |
\uE072}$x→ra; |
\uE072$virama→r; |
\uE072→ra; |
@@ -460,3 +492,4 @@ $doubleDanda→'.'; |
\uE07F→; # URA |
\uE080→; # EK ONKAR |
\uE004→; # DEVANAGARI VOWEL SIGN SHORT A |
+ |