| Index: source/data/translit/Grek_Latn.txt
|
| diff --git a/source/data/translit/Greek_Latin.txt b/source/data/translit/Grek_Latn.txt
|
| similarity index 75%
|
| rename from source/data/translit/Greek_Latin.txt
|
| rename to source/data/translit/Grek_Latn.txt
|
| index 5118c6fe6e56948a533e97bd5872cb2572017806..a682aff6712b54f27e2c5a30ef698b9b7b762834 100644
|
| --- a/source/data/translit/Greek_Latin.txt
|
| +++ b/source/data/translit/Grek_Latn.txt
|
| @@ -1,18 +1,30 @@
|
| -# ***************************************************************************
|
| -# *
|
| -# * Copyright (C) 2004-2015, International Business Machines
|
| -# * Corporation; Unicode, Inc.; and others. All Rights Reserved.
|
| -# *
|
| -# ***************************************************************************
|
| -# File: Greek_Latin.txt
|
| -# Generated from CLDR
|
| +# © 2016 and later: Unicode, Inc. and others.
|
| +# License & terms of use: http://www.unicode.org/copyright.html#License
|
| #
|
| +# File: Grek_Latn.txt
|
| +# Generated from CLDR
|
| +#
|
| +
|
| +# Rules are predicated on running NFD first, and NFC afterwards
|
| +# :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:nonspacing mark:]] ;
|
| +# MINIMAL FILTER GENERATED FOR: Greek-Latin
|
| :: [;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ\u0304\u0308\u0313-\u0314\u0342-\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ;
|
| :: NFD (NFC) ;
|
| +# TEST CASES
|
| +# Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος
|
| +# ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ
|
| +# ᾳ ῃ ῳ ὃ ὄ
|
| +# ὠς ὡς ὢς ὣς
|
| +# Ὠς Ὡς Ὢς Ὣς
|
| +# ὨΣ ὩΣ ὪΣ ὫΣ
|
| +# Ạ, ạ, Ẹ, ẹ, Ọ, ọ
|
| +# Useful variables
|
| $lower = [[:latin:][:greek:] & [:Ll:]];
|
| $glower = [[:greek:] & [:Ll:]];
|
| $upper = [[:latin:][:greek:] & [:Lu:]] ;
|
| $accent = [:M:] ;
|
| +# NOTE: restrict to just the Greek & Latin accents that we care about
|
| +# TODO: broaden out once interation is fixed
|
| $accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ;
|
| $macron = \u0304 ;
|
| $ddot = \u0308 ;
|
| @@ -37,18 +49,27 @@ $beforeLetter = [[:M:]\']* [:L:] ;
|
| $beforeLower = $accent * $lower ;
|
| $notLetter = [^[:L:][:M:]] ;
|
| $under = \u0331;
|
| +# Fix punctuation
|
| +# preserve original
|
| \: ↔ \: $under ;
|
| \? ↔ \? $under ;
|
| \; ↔ \? ;
|
| · ↔ \: ;
|
| +# CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
|
| \u0342 ↔ \u0302 ;
|
| +# IOTA: convert iota subscript to iota
|
| +# first make previous alpha long!
|
| $accent_minus = [[$accent]-[$iotasub$macron]];
|
| Α } $accent_minus * $iotasub → | Α $macron ;
|
| α } $accent_minus * $iotasub → | α $macron ;
|
| +# now convert to uppercase if after uppercase, ow to lowercase
|
| $upper $accent * { $iotasub → I ;
|
| $iotasub → i ;
|
| | $1 $iotasub ← ($evowel $macron $accentMinus *) i ;
|
| | $1 $iotasub ← ($evowel $macron $accentMinus *) I ;
|
| +# BREATHING
|
| +# Convert rough breathing to h, and move before letters.
|
| +# Make A ` x = → H a x
|
| Α ($macron?) $rough } $beforeLower → H | α $1;
|
| Ε $rough } $beforeLower → H | ε;
|
| Η $rough } $beforeLower → H | η ;
|
| @@ -56,6 +77,7 @@ $iotasub → i ;
|
| Ο $rough } $beforeLower → H | ο ;
|
| Υ $rough } $beforeLower → H | υ ;
|
| Ω ($ddot?) $rough } $beforeLower → H | ω $1;
|
| +# Make A x ` = → H a x
|
| Α ($glower $macron?) $rough → H | α $1 ;
|
| Ε ($glower) $rough → H | ε $1 ;
|
| Η ($glower) $rough → H | η $1 ;
|
| @@ -63,14 +85,18 @@ $iotasub → i ;
|
| Ο ($glower) $rough → H | ο $1 ;
|
| Υ ($glower) $rough → H | υ $1 ;
|
| Ω ($glower $ddot?) $rough → H | ω $1 ;
|
| +#Otherwise, make x ` into h x and X ` into H X
|
| ($lcgvowel + $ddotmac? ) $rough → h | $1 ;
|
| ($gvowel + $ddotmac? ) $rough → H | $1 ;
|
| +# Go backwards with H
|
| | $1 $rough ← h ($evowel $macron $ddot? $evowel2_i $macron?) ;
|
| | $1 $rough ← h ($evowel $ddot? $evowel2 $macron?) ;
|
| | $1 $rough ← h ($evowel $macron? $ddot?) ;
|
| | $1 $rough ← H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;
|
| | $1 $rough ← H ([AEIOUY] $ddot? $evowel2 $macron?) ;
|
| | $1 $rough ← H ([AEIOUY] $macron? $ddot?) ;
|
| +# titlecase, have to fix individually
|
| +# in the future, we should add &uppercase() to make this easier
|
| | A $1 $rough ← H a ($macron $ddot? $evowel2_i $macron?) ;
|
| | E $1 $rough ← H e ($macron $ddot? $evowel2_i $macron?) ;
|
| | I $1 $rough ← H i ($macron $ddot? $evowel2_i $macron?) ;
|
| @@ -89,10 +115,18 @@ $iotasub → i ;
|
| | O $1 $rough ← H o ($macron? $ddot? ) ;
|
| | U $1 $rough ← H u ($macron? $ddot? ) ;
|
| | Y $1 $rough ← H y ($macron? $ddot? ) ;
|
| +# Now do smooth
|
| +#delete smooth breathing for Latin
|
| $smooth → ;
|
| +# insert in Greek
|
| +# the assumption is that all Marks are on letters.
|
| | $1 $smooth ← $notLetter { ([rR]) } [^hH$smooth$rough] ;
|
| | $1 $smooth ← $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;
|
| | $1 $smooth ← $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;
|
| +# TODO: preserve smooth/rough breathing if not
|
| +# on initial vowel sequence
|
| +# need to have these up here so the rules don't mask
|
| +# remove now superfluous macron when returning
|
| Α ← A $macron ;
|
| α ← a $macron ;
|
| η ↔ e $macron ;
|
| @@ -105,6 +139,7 @@ $smooth → ;
|
| ψ ↔ ps ;
|
| ω ↔ o $macron ;
|
| Ω ↔ O $macron;
|
| +# NORMAL
|
| α ↔ a ;
|
| Α ↔ A ;
|
| β ↔ b ;
|
| @@ -145,17 +180,24 @@ $smooth → ;
|
| Ρ $rough ↔ RH ;
|
| ρ ↔ r ;
|
| Ρ ↔ R ;
|
| +# insert separator before things that turn into s
|
| [Pp] { } [ςσΣϷϸϺϻ] → \' ;
|
| +# special S variants
|
| Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
|
| ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
|
| Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
|
| ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
|
| +# underbar means exception
|
| +# before a letter, initial
|
| ς } $beforeLetter ↔ s $underbar } $beforeLetter;
|
| σ } $beforeLetter ↔ s } $beforeLetter;
|
| +# otherwise, after a letter = final
|
| $afterLetter { σ ↔ $afterLetter { s $underbar;
|
| $afterLetter { ς ↔ $afterLetter { s ;
|
| +# otherwise (isolated) = initial
|
| ς ↔ s $underbar;
|
| σ ↔ s ;
|
| +# [Pp] { Σ ↔ \'S ;
|
| Σ ↔ S ;
|
| τ ↔ t ;
|
| Τ ↔ T ;
|
| @@ -166,6 +208,7 @@ $vowel { Υ ↔ U ;
|
| χ ↔ ch ;
|
| Χ } $beforeLower ↔ Ch ;
|
| Χ ↔ CH ;
|
| +# Completeness for ASCII
|
| $ignore = [[:Mark:]''] * ;
|
| | k ← c ;
|
| | ph ← f ;
|
| @@ -187,6 +230,7 @@ $rough } $ignore [:UppercaseLetter:] → H ;
|
| $ignore [:UppercaseLetter:] { $rough → H ;
|
| $rough ← H ;
|
| $rough ↔ h ;
|
| +# Completeness for Greek
|
| ϐ → | β ;
|
| ϑ → | θ ;
|
| ϒ → | Υ ;
|
| @@ -201,7 +245,12 @@ $rough ↔ h ;
|
| ϵ → | ε ;
|
| µ → | μ ;
|
| ͺ → i;
|
| +# delete any trailing ' marks used for roundtripping
|
| ← [Ππ] { \' } [Ss] ;
|
| ← [Νν] { \' } $egammaLike ;
|
| ::NFC (NFD) ;
|
| +# ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;
|
| +# ([\u0000-\u007F · [:Latin:] [:nonspacing mark:]]) ;
|
| +# MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
|
| :: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0300-\u0337\u0339-\u0345΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ;
|
| +
|
|
|