| Index: source/data/translit/Grek_Latn_UNGEGN.txt
|
| diff --git a/source/data/translit/Greek_Latin_UNGEGN.txt b/source/data/translit/Grek_Latn_UNGEGN.txt
|
| similarity index 69%
|
| rename from source/data/translit/Greek_Latin_UNGEGN.txt
|
| rename to source/data/translit/Grek_Latn_UNGEGN.txt
|
| index 6c8ae5247dc57748dba3898f6555ca7081b3344f..fa29c428277509c6846e12d15a1ddfbda4738cfb 100644
|
| --- a/source/data/translit/Greek_Latin_UNGEGN.txt
|
| +++ b/source/data/translit/Grek_Latn_UNGEGN.txt
|
| @@ -1,14 +1,18 @@
|
| -# ***************************************************************************
|
| -# *
|
| -# * Copyright (C) 2004-2015, International Business Machines
|
| -# * Corporation; Unicode, Inc.; and others. All Rights Reserved.
|
| -# *
|
| -# ***************************************************************************
|
| -# File: Greek_Latin_UNGEGN.txt
|
| -# Generated from CLDR
|
| +# © 2016 and later: Unicode, Inc. and others.
|
| +# License & terms of use: http://www.unicode.org/copyright.html#License
|
| #
|
| +# File: Grek_Latn_UNGEGN.txt
|
| +# Generated from CLDR
|
| +#
|
| +
|
| +# For modern Greek, based on UNGEGN rules.
|
| +# Rules are predicated on running NFD first, and NFC afterwards
|
| +# MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
|
| +# WARNING: need to add accents to both filters ###
|
| +# :: [\u0301\u0304\u0306\u0308;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ\u0300\u0302\u0313-\u0314\u0340\u0342-\u0343\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩϷ-\u07FBϹ] ;
|
| :: [[[:Greek:][:Mn:][:Me:]] [\:-;?·;·]] ;
|
| ::NFD (NFC) ;
|
| +# Useful variables
|
| $lower = [[:latin:][:greek:] & [:Ll:]] ;
|
| $upper = [[:latin:][:greek:] & [:Lu:]] ;
|
| $accent = [[:Mn:][:Me:]] ;
|
| @@ -31,10 +35,13 @@ $under = \u0331;
|
| $caron = \u030C;
|
| $afterLetter = [:L:] [\'$accent]* ;
|
| $beforeLetter = [\'$accent]* [:L:] ;
|
| +# Fix punctuation
|
| +# preserve orginal
|
| \: ↔ \: $under ;
|
| \? ↔ \? $under ;
|
| \; ↔ \? ;
|
| · ↔ \: ;
|
| +# Fix any ancient characters that creep in
|
| \u0342 → \u0301 ;
|
| \u0302 → \u0301 ;
|
| \u0300 → \u0301 ;
|
| @@ -42,6 +49,7 @@ $smooth → ;
|
| $rough → ;
|
| $iotasub → ;
|
| ͺ → ;
|
| +# need to have these up here so the rules don't mask
|
| η ↔ i $under ;
|
| Η ↔ I $under ;
|
| Ψ } $beforeLower ↔ Ps ;
|
| @@ -49,6 +57,7 @@ $iotasub → ;
|
| ψ ↔ ps ;
|
| ω ↔ o $under ;
|
| Ω ↔ O $under;
|
| +# at begining or end of word, convert mp to b
|
| [^[:L:]$accent] { μπ → b ;
|
| μπ } [^[:L:]$accent] → b ;
|
| [^[:L:]$accent] { [Μμ][Ππ] → B ;
|
| @@ -56,6 +65,7 @@ $iotasub → ;
|
| μπ ← b ;
|
| Μπ ← B } $beforeLower ;
|
| ΜΠ ← B ;
|
| +# handle diphthongs ending with upsilon
|
| ου ↔ ou ;
|
| ΟΥ ↔ OU ;
|
| Ου ↔ Ou ;
|
| @@ -70,6 +80,7 @@ $fmaker { Υ } $softener ↔ V $under ;
|
| $fmaker { Υ ↔ U $under ;
|
| υ ↔ y ;
|
| Υ ↔ Y ;
|
| +# NORMAL
|
| α ↔ a ;
|
| Α ↔ A ;
|
| β ↔ v ;
|
| @@ -107,17 +118,24 @@ $fmaker { Υ ↔ U $under ;
|
| Π ↔ P ;
|
| ρ ↔ r ;
|
| Ρ ↔ R ;
|
| +# insert separator before things that turn into s
|
| [Pp] { } [ςσΣϷϸϺϻ] → \' ;
|
| +# special S variants
|
| Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
|
| ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
|
| Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
|
| ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
|
| +# Caron means exception
|
| +# before a letter, initial
|
| ς } $beforeLetter ↔ s $under } $beforeLetter;
|
| σ } $beforeLetter ↔ s } $beforeLetter;
|
| +# otherwise, after a letter = final
|
| $afterLetter { σ ↔ $afterLetter { s $under;
|
| $afterLetter { ς ↔ $afterLetter { s ;
|
| +# otherwise (isolated) = initial
|
| ς ↔ s $under;
|
| σ ↔ s ;
|
| +# [Pp] { Σ ↔ \'S ;
|
| Σ ↔ S ;
|
| τ ↔ t ;
|
| Τ ↔ T ;
|
| @@ -126,6 +144,8 @@ $afterLetter { ς ↔ $afterLetter { s ;
|
| χ ↔ ch ;
|
| Χ } $beforeLower ↔ Ch ;
|
| Χ ↔ CH ;
|
| +# Completeness for ASCII
|
| +# $ignore = [[:Mark:]''] * ;
|
| | ch ← h ;
|
| | k ← c ;
|
| | i ← j ;
|
| @@ -142,6 +162,7 @@ $afterLetter { ς ↔ $afterLetter { s ;
|
| | B ← U } $vowel ;
|
| | Y ← W ;
|
| | Y ← U ;
|
| +# Completeness for Greek
|
| ϐ → | β ;
|
| ϑ → | θ ;
|
| ϒ → | Υ ;
|
| @@ -155,7 +176,10 @@ $afterLetter { ς ↔ $afterLetter { s ;
|
| ϴ → | Θ ;
|
| ϵ → | ε ;
|
| µ → | μ ;
|
| +# delete any trailing ' marks used for roundtripping
|
| ← [Ππ] { \' } [Ss] ;
|
| ← [Νν] { \' } $egammaLike ;
|
| ::NFC (NFD) ;
|
| +# MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD
|
| :: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ;
|
| +
|
|
|