Index: source/data/translit/Hebr_Latn.txt |
diff --git a/source/data/translit/Hebrew_Latin.txt b/source/data/translit/Hebr_Latn.txt |
similarity index 62% |
rename from source/data/translit/Hebrew_Latin.txt |
rename to source/data/translit/Hebr_Latn.txt |
index 16dda62fc8315400c9da295232863dc84b3afc8e..63791d721484291be4d59572799aa51e42e6bd3c 100644 |
--- a/source/data/translit/Hebrew_Latin.txt |
+++ b/source/data/translit/Hebr_Latn.txt |
@@ -1,15 +1,33 @@ |
-# *************************************************************************** |
-# * |
-# * Copyright (C) 2004-2015, International Business Machines |
-# * Corporation; Unicode, Inc.; and others. All Rights Reserved. |
-# * |
-# *************************************************************************** |
-# File: Hebrew_Latin.txt |
-# Generated from CLDR |
+# © 2016 and later: Unicode, Inc. and others. |
+# License & terms of use: http://www.unicode.org/copyright.html#License |
# |
+# File: Hebr_Latn.txt |
+# Generated from CLDR |
+# |
+ |
+# Transliteration table for Hebrew |
+# Based on the UNGEGN table at: |
+# http://www.eki.ee/wgrs/rom1_he.pdf |
+# |
+# Exceptions: |
+# - Accents are added to disambiguate letters |
+# - Combinations of dagesh, shin/sin dot that produce different |
+# letters are not yet encoded. |
+# |
+# To test, open: |
+# http://www.ibm.com/software/globalization/icu/demo/transform |
+# Click Edit, paste in this file, Save As hebrew-latin/XXX |
+# (where XXX is a username) |
+# Now go back to the main window, and try it out. |
+# Use hebrew-latin/XXX for Output 1, and (Inverse) for Output 2 |
+# Paste in hebrew text in Input, and hit Transliterate. |
+# |
+# For more information, see: |
+# http://icu.sourceforge.net/userguide/Transform.html |
:: [[:Hebrew:] [:^ccc=0:] [\u05B0-\u05B9\u05BB-\u05BC\u05C1-\u05C2ℵ-ℸ\u0304\u05BF] - [\u05BD]] ; |
:: nfkd (nfc) ; |
$letterAfter = [:M:]* [:L:] ; |
+# move longer items here to avoid masking |
ח ↔ h\u0331 ; |
צ ↔ z\u0331 } $letterAfter; |
ץ ↔ z\u0331 ; |
@@ -43,6 +61,7 @@ $letterAfter = [:M:]* [:L:] ; |
\u05BC ↔ \u0307 ; # dagesh just goes to overdot for now |
\u05C1 ↔ \u030C ; # shin dot -→ sh |
\u05C2 ↔ \u0302 ; # sin dot -→ s |
+# points |
$above = [^[:ccc=0:][:ccc=230:]]*; |
\u05B2 → à ; |
\u05B2 $1← a ($above) \u0300; |
@@ -62,6 +81,7 @@ $above = [^[:ccc=0:][:ccc=230:]]*; |
\u05B6 ↔ e ; |
\u05B3 ↔ o ; |
\u05BF ↔ \u0304 ; |
+# fallbacks |
ק ← c ; |
פ ← f } $letterAfter; |
ף ← f ; |
@@ -71,3 +91,4 @@ $above = [^[:ccc=0:][:ccc=230:]]*; |
:: (lower); |
:: nfc (nfd) ; |
:: ([[:Latin:] [:^ccc=0:] [ʻ-ʼ\u0300-\u0302\u0307\u030C\u0327\u0331\u0340-\u0341 \u0304 ]]); |
+ |