Index: source/data/translit/Grek_Latn_UNGEGN.txt |
diff --git a/source/data/translit/Greek_Latin_UNGEGN.txt b/source/data/translit/Grek_Latn_UNGEGN.txt |
similarity index 69% |
rename from source/data/translit/Greek_Latin_UNGEGN.txt |
rename to source/data/translit/Grek_Latn_UNGEGN.txt |
index 6c8ae5247dc57748dba3898f6555ca7081b3344f..fa29c428277509c6846e12d15a1ddfbda4738cfb 100644 |
--- a/source/data/translit/Greek_Latin_UNGEGN.txt |
+++ b/source/data/translit/Grek_Latn_UNGEGN.txt |
@@ -1,14 +1,18 @@ |
-# *************************************************************************** |
-# * |
-# * Copyright (C) 2004-2015, International Business Machines |
-# * Corporation; Unicode, Inc.; and others. All Rights Reserved. |
-# * |
-# *************************************************************************** |
-# File: Greek_Latin_UNGEGN.txt |
-# Generated from CLDR |
+# © 2016 and later: Unicode, Inc. and others. |
+# License & terms of use: http://www.unicode.org/copyright.html#License |
# |
+# File: Grek_Latn_UNGEGN.txt |
+# Generated from CLDR |
+# |
+ |
+# For modern Greek, based on UNGEGN rules. |
+# Rules are predicated on running NFD first, and NFC afterwards |
+# MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN |
+# WARNING: need to add accents to both filters ### |
+# :: [\u0301\u0304\u0306\u0308;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ\u0300\u0302\u0313-\u0314\u0340\u0342-\u0343\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩϷ-\u07FBϹ] ; |
:: [[[:Greek:][:Mn:][:Me:]] [\:-;?·;·]] ; |
::NFD (NFC) ; |
+# Useful variables |
$lower = [[:latin:][:greek:] & [:Ll:]] ; |
$upper = [[:latin:][:greek:] & [:Lu:]] ; |
$accent = [[:Mn:][:Me:]] ; |
@@ -31,10 +35,13 @@ $under = \u0331; |
$caron = \u030C; |
$afterLetter = [:L:] [\'$accent]* ; |
$beforeLetter = [\'$accent]* [:L:] ; |
+# Fix punctuation |
+# preserve orginal |
\: ↔ \: $under ; |
\? ↔ \? $under ; |
\; ↔ \? ; |
· ↔ \: ; |
+# Fix any ancient characters that creep in |
\u0342 → \u0301 ; |
\u0302 → \u0301 ; |
\u0300 → \u0301 ; |
@@ -42,6 +49,7 @@ $smooth → ; |
$rough → ; |
$iotasub → ; |
ͺ → ; |
+# need to have these up here so the rules don't mask |
η ↔ i $under ; |
Η ↔ I $under ; |
Ψ } $beforeLower ↔ Ps ; |
@@ -49,6 +57,7 @@ $iotasub → ; |
ψ ↔ ps ; |
ω ↔ o $under ; |
Ω ↔ O $under; |
+# at begining or end of word, convert mp to b |
[^[:L:]$accent] { μπ → b ; |
μπ } [^[:L:]$accent] → b ; |
[^[:L:]$accent] { [Μμ][Ππ] → B ; |
@@ -56,6 +65,7 @@ $iotasub → ; |
μπ ← b ; |
Μπ ← B } $beforeLower ; |
ΜΠ ← B ; |
+# handle diphthongs ending with upsilon |
ου ↔ ou ; |
ΟΥ ↔ OU ; |
Ου ↔ Ou ; |
@@ -70,6 +80,7 @@ $fmaker { Υ } $softener ↔ V $under ; |
$fmaker { Υ ↔ U $under ; |
υ ↔ y ; |
Υ ↔ Y ; |
+# NORMAL |
α ↔ a ; |
Α ↔ A ; |
β ↔ v ; |
@@ -107,17 +118,24 @@ $fmaker { Υ ↔ U $under ; |
Π ↔ P ; |
ρ ↔ r ; |
Ρ ↔ R ; |
+# insert separator before things that turn into s |
[Pp] { } [ςσΣϷϸϺϻ] → \' ; |
+# special S variants |
Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L |
ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L |
Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L |
ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L |
+# Caron means exception |
+# before a letter, initial |
ς } $beforeLetter ↔ s $under } $beforeLetter; |
σ } $beforeLetter ↔ s } $beforeLetter; |
+# otherwise, after a letter = final |
$afterLetter { σ ↔ $afterLetter { s $under; |
$afterLetter { ς ↔ $afterLetter { s ; |
+# otherwise (isolated) = initial |
ς ↔ s $under; |
σ ↔ s ; |
+# [Pp] { Σ ↔ \'S ; |
Σ ↔ S ; |
τ ↔ t ; |
Τ ↔ T ; |
@@ -126,6 +144,8 @@ $afterLetter { ς ↔ $afterLetter { s ; |
χ ↔ ch ; |
Χ } $beforeLower ↔ Ch ; |
Χ ↔ CH ; |
+# Completeness for ASCII |
+# $ignore = [[:Mark:]''] * ; |
| ch ← h ; |
| k ← c ; |
| i ← j ; |
@@ -142,6 +162,7 @@ $afterLetter { ς ↔ $afterLetter { s ; |
| B ← U } $vowel ; |
| Y ← W ; |
| Y ← U ; |
+# Completeness for Greek |
ϐ → | β ; |
ϑ → | θ ; |
ϒ → | Υ ; |
@@ -155,7 +176,10 @@ $afterLetter { ς ↔ $afterLetter { s ; |
ϴ → | Θ ; |
ϵ → | ε ; |
µ → | μ ; |
+# delete any trailing ' marks used for roundtripping |
← [Ππ] { \' } [Ss] ; |
← [Νν] { \' } $egammaLike ; |
::NFC (NFD) ; |
+# MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD |
:: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ; |
+ |