source/data/translit/Grek_Latn.txt - Issue 2440913002: Update ICU to 58.1

Unified Diff: source/data/translit/Grek_Latn.txt

Issue 2440913002: Update ICU to 58.1

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: source/data/translit/Grek_Latn.txt

diff --git a/source/data/translit/Greek_Latin.txt b/source/data/translit/Grek_Latn.txt

similarity index 75%

rename from source/data/translit/Greek_Latin.txt

rename to source/data/translit/Grek_Latn.txt

index 5118c6fe6e56948a533e97bd5872cb2572017806..a682aff6712b54f27e2c5a30ef698b9b7b762834 100644

--- a/source/data/translit/Greek_Latin.txt

+++ b/source/data/translit/Grek_Latn.txt

@@ -1,18 +1,30 @@

-# ***************************************************************************

-# *

-# ***************************************************************************

-# File: Greek_Latin.txt

-# Generated from CLDR

+# License & terms of use: http://www.unicode.org/copyright.html#License

+# File: Grek_Latn.txt

+# Generated from CLDR

+# Rules are predicated on running NFD first, and NFC afterwards

+# :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:nonspacing mark:]] ;

+# MINIMAL FILTER GENERATED FOR: Greek-Latin

:: [;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ\u0304\u0308\u0313-\u0314\u0342-\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ;

:: NFD (NFC) ;

+# TEST CASES

+# Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος

+# ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ

+# ᾳ ῃ ῳ ὃ ὄ

+# ὠς ὡς ὢς ὣς

+# Ὠς Ὡς Ὢς Ὣς

+# ὨΣ ὩΣ ὪΣ ὫΣ

+# Ạ, ạ, Ẹ, ẹ, Ọ, ọ

+# Useful variables

$lower = [[:latin:][:greek:] & [:Ll:]];

$glower = [[:greek:] & [:Ll:]];

$upper = [[:latin:][:greek:] & [:Lu:]] ;

$accent = [:M:] ;

+# NOTE: restrict to just the Greek & Latin accents that we care about

+# TODO: broaden out once interation is fixed

$accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ;

$macron = \u0304 ;

$ddot = \u0308 ;

@@ -37,18 +49,27 @@ $beforeLetter = [[:M:]\']* [:L:] ;

$beforeLower = $accent * $lower ;

$notLetter = [^[:L:][:M:]] ;

$under = \u0331;

+# Fix punctuation

+# preserve original

\: ↔ \: $under ;

\? ↔ \? $under ;

\; ↔ \? ;

· ↔ \: ;

+# CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve

\u0342 ↔ \u0302 ;

+# IOTA: convert iota subscript to iota

+# first make previous alpha long!

$accent_minus = [[$accent]-[$iotasub$macron]];

Α } $accent_minus * $iotasub → | Α $macron ;

α } $accent_minus * $iotasub → | α $macron ;

+# now convert to uppercase if after uppercase, ow to lowercase

$upper $accent * { $iotasub → I ;

$iotasub → i ;

| $1 $iotasub ← ($evowel $macron $accentMinus *) i ;

| $1 $iotasub ← ($evowel $macron $accentMinus *) I ;

+# BREATHING

+# Convert rough breathing to h, and move before letters.

+# Make A ` x = → H a x

Α ($macron?) $rough } $beforeLower → H | α $1;

Ε $rough } $beforeLower → H | ε;

Η $rough } $beforeLower → H | η ;

@@ -56,6 +77,7 @@ $iotasub → i ;

Ο $rough } $beforeLower → H | ο ;

Υ $rough } $beforeLower → H | υ ;

Ω ($ddot?) $rough } $beforeLower → H | ω $1;

+# Make A x ` = → H a x

Α ($glower $macron?) $rough → H | α $1 ;

Ε ($glower) $rough → H | ε $1 ;

Η ($glower) $rough → H | η $1 ;

@@ -63,14 +85,18 @@ $iotasub → i ;

Ο ($glower) $rough → H | ο $1 ;

Υ ($glower) $rough → H | υ $1 ;

Ω ($glower $ddot?) $rough → H | ω $1 ;

+#Otherwise, make x ` into h x and X ` into H X

($lcgvowel + $ddotmac? ) $rough → h | $1 ;

($gvowel + $ddotmac? ) $rough → H | $1 ;

+# Go backwards with H

| $1 $rough ← h ($evowel $macron $ddot? $evowel2_i $macron?) ;

| $1 $rough ← h ($evowel $ddot? $evowel2 $macron?) ;

| $1 $rough ← h ($evowel $macron? $ddot?) ;

| $1 $rough ← H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;

| $1 $rough ← H ([AEIOUY] $ddot? $evowel2 $macron?) ;

| $1 $rough ← H ([AEIOUY] $macron? $ddot?) ;

+# titlecase, have to fix individually

+# in the future, we should add &uppercase() to make this easier

| A $1 $rough ← H a ($macron $ddot? $evowel2_i $macron?) ;

| E $1 $rough ← H e ($macron $ddot? $evowel2_i $macron?) ;

| I $1 $rough ← H i ($macron $ddot? $evowel2_i $macron?) ;

@@ -89,10 +115,18 @@ $iotasub → i ;

| O $1 $rough ← H o ($macron? $ddot? ) ;

| U $1 $rough ← H u ($macron? $ddot? ) ;

| Y $1 $rough ← H y ($macron? $ddot? ) ;

+# Now do smooth

+#delete smooth breathing for Latin

$smooth → ;

+# insert in Greek

+# the assumption is that all Marks are on letters.

| $1 $smooth ← $notLetter { ([rR]) } [^hH$smooth$rough] ;

| $1 $smooth ← $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;

| $1 $smooth ← $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;

+# TODO: preserve smooth/rough breathing if not

+# on initial vowel sequence

+# need to have these up here so the rules don't mask

+# remove now superfluous macron when returning

Α ← A $macron ;

α ← a $macron ;

η ↔ e $macron ;

@@ -105,6 +139,7 @@ $smooth → ;

ψ ↔ ps ;

ω ↔ o $macron ;

Ω ↔ O $macron;

+# NORMAL

α ↔ a ;

Α ↔ A ;

β ↔ b ;

@@ -145,17 +180,24 @@ $smooth → ;

Ρ $rough ↔ RH ;

ρ ↔ r ;

Ρ ↔ R ;

+# insert separator before things that turn into s

[Pp] { } [ςσΣϷϸϺϻ] → \' ;

+# special S variants

Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L

ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L

Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L

ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L

+# underbar means exception

+# before a letter, initial

ς } $beforeLetter ↔ s $underbar } $beforeLetter;

σ } $beforeLetter ↔ s } $beforeLetter;

+# otherwise, after a letter = final

$afterLetter { σ ↔ $afterLetter { s $underbar;

$afterLetter { ς ↔ $afterLetter { s ;

+# otherwise (isolated) = initial

ς ↔ s $underbar;

σ ↔ s ;

+# [Pp] { Σ ↔ \'S ;

Σ ↔ S ;

τ ↔ t ;

Τ ↔ T ;

@@ -166,6 +208,7 @@ $vowel { Υ ↔ U ;

χ ↔ ch ;

Χ } $beforeLower ↔ Ch ;

Χ ↔ CH ;

+# Completeness for ASCII

$ignore = [[:Mark:]''] * ;

| k ← c ;

| ph ← f ;

@@ -187,6 +230,7 @@ $rough } $ignore [:UppercaseLetter:] → H ;

$ignore [:UppercaseLetter:] { $rough → H ;

$rough ← H ;

$rough ↔ h ;

+# Completeness for Greek

ϐ → | β ;

ϑ → | θ ;

ϒ → | Υ ;

@@ -201,7 +245,12 @@ $rough ↔ h ;

ϵ → | ε ;

µ → | μ ;

ͺ → i;

+# delete any trailing ' marks used for roundtripping

← [Ππ] { \' } [Ss] ;

← [Νν] { \' } $egammaLike ;

::NFC (NFD) ;

+# ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;

+# ([\u0000-\u007F · [:Latin:] [:nonspacing mark:]]) ;

+# MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD

:: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0300-\u0337\u0339-\u0345΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ;

« no previous file with comments | « source/data/translit/Greek_Latin_UNGEGN.txt ('k') | source/data/translit/Grek_Latn_UNGEGN.txt » ('j') | no next file with comments »