Index: source/data/translit/el_el_Latn_BGN.txt |
diff --git a/source/data/translit/Greek_Latin_BGN.txt b/source/data/translit/el_el_Latn_BGN.txt |
similarity index 57% |
rename from source/data/translit/Greek_Latin_BGN.txt |
rename to source/data/translit/el_el_Latn_BGN.txt |
index a81e88596d6178f043aaf0115829faacfd0f88ca..6d7fbaf5d35fad13797bac3b8fbc49c9911955e3 100644 |
--- a/source/data/translit/Greek_Latin_BGN.txt |
+++ b/source/data/translit/el_el_Latn_BGN.txt |
@@ -1,14 +1,47 @@ |
-# *************************************************************************** |
-# * |
-# * Copyright (C) 2004-2015, International Business Machines |
-# * Corporation; Unicode, Inc.; and others. All Rights Reserved. |
-# * |
-# *************************************************************************** |
-# File: Greek_Latin_BGN.txt |
-# Generated from CLDR |
+# © 2016 and later: Unicode, Inc. and others. |
+# License & terms of use: http://www.unicode.org/copyright.html#License |
+# |
+# File: el_el_Latn_BGN.txt |
+# Generated from CLDR |
+# |
+ |
+# |
+######################################################################## |
+# BGN/PCGN 1962 System |
+# |
+# This system is a simplified version of the system devised by the PCGN |
+# in 1941 and later adopted by the BGN. In 1962 the two organizations |
+# agreed to joint adoption of certain changes in the original system, |
+# specifically the omission of special rules for the treatment of Greek |
+# geographic names of Albanian, Bulgarian, Italian, Macedonian, and |
+# Turkish origin. That revision eliminated the need to consider the |
+# origin of names and removed ambiguity from the romanization of Greek |
+# expressions of possible non-Greek origin. This system is based on |
+# the pronunciation of modern Greek and is not intended for use in |
+# the romanization of classical Greek. |
+# |
+# The Greek Alphabet as defined by the BGN (Pages 29-31): |
+# |
+# ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ |
+# αβγδεζηθικλμνξοπρσςτυφχψω |
+# |
+# Originally prepared by Michael Everson <everson@evertype.com> |
+######################################################################## |
+# |
+# MINIMAL FILTER: Greek-Latin |
# |
:: [ΆΈΉΊΌΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώἀἁἂἃἄἅἆἇἈἉἊἋἌἍἎἏἐἑἒἓἔἕἘἙἚἛἜἝἠἡἢἣἤἥἦἧἨἩἪἫἬἭἮἯἰἱἲἳἴἵἶἷἸἹἺἻἼἽἾἿὀὁὂὃὄὅὈὉὊὋὌὍὐὑὒὓὔὕὖὗὙὛὝὟὠὡὢὣὤὥὦὧὨὩὫὬὭὮὯὰάὲέὴήὶίὸόὺύὼώᾀᾁᾂᾃᾄᾅᾆᾇᾈᾉᾊᾋᾌᾍᾎᾏᾐᾑᾒᾓᾔᾕᾖᾗᾘᾙᾚᾛᾜᾝᾞᾟᾠᾡᾢᾣᾤᾥᾦᾧᾨᾩᾪᾫᾬᾭᾮᾯᾲᾳᾴᾶᾷᾺΆᾼῂῃῄῆῇῈΈῊΉῌῖῚΊῤῥῦῪΎῲῳῴῶῷῸΌῺΏῼ῾] ; |
:: NFD (NFC) ; |
+# |
+# |
+######################################################################## |
+# |
+######################################################################## |
+# |
+# Define All Transformation Variables |
+# |
+######################################################################## |
+# |
$upperConsonants = [ΒΓΔΖΘΚΛΜΝΞΠΡΣΤΦΧΨ] ; |
$lowerConsonants = [βγδζθκλμνξπρσςτφχψ] ; |
$consonants = [$upperConsonants $lowerConsonants] ; |
@@ -16,7 +49,53 @@ $upperVowels = [ΑΕΗΙΟΥΩ] ; |
$lowerVowels = [αεηιουω] ; |
$vowels = [$upperVowels $lowerVowels] ; |
$lower = [$lowerConsonants $lowerVowels] ; |
+# |
+# |
+# Use this $wordBoundary until bug 2034 is fixed in ICU: |
+# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest |
+# |
$wordBoundary = [^[:L:][:M:][:N:]] ; |
+# |
+# |
+######################################################################## |
+# |
+######################################################################## |
+# |
+# Rules moved to front to avoid masking |
+# |
+######################################################################## |
+# |
+######################################################################## |
+# |
+# BGN Page 32 Rule 1: |
+# |
+# The apostrophe and reversed apostrophe, on or the other of which is |
+# written in Greek in front of all initial uppercase vowel characters, |
+# above all initial lowercase vowel characters, and above the second |
+# character of all initial two-vowel character sequences, should not |
+# be romanized, e.g., Ἀθῆναι → Athínai, Ἠράκλειον → Iráklion, |
+# Οἰνόφυτα → Oinófita. These apostrophes must be distinguished from |
+# accent marks hen they occur together, e.g. Ἄβατον → Ávaton, |
+# Ἤλια → Ília, Οἴτη → Oíti. The reversed apostrophe is sometimes found |
+# also with ρ and should, likewise, not be romanized: ῥέμα → réma. |
+# |
+# BGN Page 32 Rule 2a: |
+# |
+# Stress is shown in Greek by the use of the tilde or circumflex, |
+# the acute accent, or the grave accent; all of those marks should |
+# be represented in romanization by an acute accent, e.g., |
+# Ἀθῆναι → Athínai, Νδία → Día, Ζεμενὸν → Zemenón. |
+# |
+# BGN Page 32 Rule 4: |
+# |
+# The character ι (ióta) is sometimes found written under, or, |
+# in uppercase, to the right of the vowel characters α, η, and ω. |
+# This "subscript iota" should not be romanized, e.g., |
+# Μυρτῷον Πέλαγος or ΜΥΡΤῼΟΝ ΠΕΛΑΓΟΣ [but not ΜΥΡΤΩΙΟΝ ΠΕΛΑΓΟΣ] |
+# → Mirtóön Pélagos. |
+# |
+######################################################################## |
+# |
[ἈἉᾼᾈᾉ] → Α ; # GREEK CAPITAL LETTER ALPHA |
[ἀἁᾳᾀᾁ] → α ; # GREEK SMALL LETTER ALPHA |
[ἊἋἌἍἎἏᾊᾋᾌᾍᾎᾏᾺΆ] → Ά ; # GREEK CAPITAL LETTER ALPHA WITH TONOS |
@@ -47,6 +126,29 @@ $wordBoundary = [^[:L:][:M:][:N:]] ; |
[ὢὣὤὥὦὧὼώᾢᾣᾤᾥᾦᾧῲῴῶῷ] → ώ ; # GREEK SMALL LETTER OMEGA WITH TONOS |
Ῥ → Ρ ; # GREEK CAPITAL LETTER RHO |
[ῤῥ] → ρ ; # GREEK SMALL LETTER RHO |
+# |
+# |
+######################################################################## |
+# |
+# End of Rules 1, 2a, and 4 |
+# |
+######################################################################## |
+# |
+######################################################################## |
+# |
+# BGN Page 32 Rules 2b and 2c: |
+# |
+# If the stressed vowel is written as a sequence of two vowel characters |
+# in Greek, the # second vowel character should carry the accent; |
+# similarly, in Romanization the acute accent should be placed over the |
+# second vowel letter, e.g., Οἰνοῦσαι → Oinoúsai, Οἴτη → Oíti, |
+# Θεσπιαὶ → Thespiaí. |
+# |
+# Where a syllable containing on the combinations αυ, ευ, or ηυ |
+# carries the stress, this is marked in Greek on the character υ. |
+# In romanization it should be shown on the preceding vowel |
+# letter, e.g., Πειραιεύς → Piraiévs, Αὔρα → Ávra. |
+# |
Αί → Aí ; |
αί → aí ; |
Οί → Oí ; |
@@ -59,6 +161,24 @@ $wordBoundary = [^[:L:][:M:][:N:]] ; |
εύ → έυ ; |
Ηύ → Ήυ ; |
ηύ → ήυ ; |
+# |
+# |
+######################################################################## |
+# |
+# End of Rules 2b and 2c |
+# |
+######################################################################## |
+# |
+######################################################################## |
+# |
+# BGN Page 32 Rule 3: |
+# |
+# The dieresis should be shown in romanization where it occurs in Greek, |
+# e.g., Μαρινέϊκα → Marinéïka, Ἀχαΐα → Akhaï\u0301a; and over the second vowel |
+# etter in romanization of the following combinations fo Greek vowel |
+# characters: αε, e.g., Ἀετὸς → Aëtos; αη, e.g., Ἀηδὼν → Aïdhon; οη, |
+# e.g. Οἰνόη → Oinóï; ωο, e.g., Ἠρῶον → Iróön. |
+# |
[ΪΫ] → Ï ; |
[ϊϋ] → ï ; |
[ΐΰ] → ï\u0301 ; |
@@ -78,6 +198,20 @@ $wordBoundary = [^[:L:][:M:][:N:]] ; |
όη → óï ; |
Ώο → Óö ; |
ώο → óö ; |
+# |
+# |
+######################################################################## |
+# |
+# End of Rule 3 |
+# |
+######################################################################## |
+# |
+######################################################################## |
+# |
+# Start of Alphabetic Transformations |
+# |
+######################################################################## |
+# |
ΑΙ → AI ; # GREEK CAPITAL LETTER ALPHA + CAPITAL IOTA |
Αι → Ai ; # GREEK CAPITAL LETTER ALPHA + SMALL IOTA |
αι → ai ; # GREEK SMALL LETTER ALPHA + SMALL IOTA |
@@ -99,25 +233,95 @@ $wordBoundary{γκ → g ; # GREEK SMALL LETTER GAMMA + SMALL KAPPA |
ΓΚ → NG ; # GREEK CAPITAL LETTER GAMMA + CAPITAL KAPPA |
Γκ → Ng ; # GREEK CAPITAL LETTER GAMMA + SMALL KAPPA |
γκ → ng ; # GREEK SMALL LETTER GAMMA + SMALL KAPPA |
+# |
+# |
+######################################################################## |
+# |
+# BGN Page 29 Rule 3a: |
+# |
+# The character γ should be romanized g before α, ο, ου, ω, and |
+# consonants other than γ, ξ, and χ. |
+# |
+######################################################################## |
+# |
Γ}[ΑΟΩ [$upperConsonants - [ΓΞΧ]]] → G ; # GREEK CAPITAL LETTER GAMMA |
Γ}[αοω [$lowerConsonants - [γξχ]]] → G ; # GREEK CAPITAL LETTER GAMMA |
Γ}ΟΥ → G ; # GREEK CAPITAL LETTER GAMMA |
Γ}ου → G ; # GREEK CAPITAL LETTER GAMMA |
γ}[αοω [$lowerConsonants - [γξχ]]] → g ; # GREEK SMALL LETTER GAMMA |
γ}ου → g ; # GREEK SMALL LETTER GAMMA |
+# |
+# |
+######################################################################## |
+# |
+# End of Rule 3a |
+# |
+######################################################################## |
+# |
+######################################################################## |
+# |
+# BGN Page 29 Rule 3b: |
+# |
+# The character γ should be romanized y before αι, ε, ει, η, ι, οι, υ, |
+# and υι. |
+# |
+######################################################################## |
+# |
Γ}[ΑΕΟΥ]Ι → Y ; # GREEK CAPITAL LETTER GAMMA |
Γ}[ΕΗΙΥ] → Y ; # GREEK CAPITAL LETTER GAMMA |
Γ}[αεου]ι → Y ; # GREEK CAPITAL LETTER GAMMA |
Γ}[εηιυ] → Y ; # GREEK CAPITAL LETTER GAMMA |
γ}[αεου]ι → y ; # GREEK SMALL LETTER GAMMA |
γ}[εηιυ] → y ; # GREEK SMALL LETTER GAMMA |
+# |
+# |
+######################################################################## |
+# |
+# End of Rule 3b |
+# |
+######################################################################## |
+# |
+######################################################################## |
+# |
+# BGN Page 29 Rule 3c: |
+# |
+# The character γ should be romanized n before ξ and χ. |
+# |
+######################################################################## |
+# |
Γ}[ΞΧ] → N ; # GREEK CAPITAL LETTER GAMMA |
Γ}[ξχ] → N ; # GREEK CAPITAL LETTER GAMMA |
γ}[ξχ] → n ; # GREEK SMALL LETTER GAMMA |
+# |
+# |
+######################################################################## |
+# |
+# End of Rule 3c |
+# |
+######################################################################## |
+# |
Γ → G ; # GREEK CAPITAL LETTER GAMMA |
γ → g ; # GREEK SMALL LETTER GAMMA |
+# |
+# |
+######################################################################## |
+# |
+# BGN Page 29 Rule 4a: |
+# |
+# The character δ should be romanized d when between ν and ρ. |
+# |
+######################################################################## |
+# |
Ν{Δ}Ρ → D ; # GREEK CAPITAL LETTER DELTA |
ν{δ}ρ → d ; # GREEK SMALL LETTER GAMMA |
+# |
+# |
+######################################################################## |
+# |
+# End of Rule 4a |
+# |
+######################################################################## |
+# |
Δ} $lower → Dh ; # GREEK CAPITAL LETTER PSI |
Δ → DH ; # GREEK CAPITAL LETTER DELTA |
δ → dh ; # GREEK SMALL LETTER DELTA |
@@ -191,6 +395,14 @@ $wordBoundary{ντ → d ; # GREEK SMALL LETTER NU + SMALL TAU |
ς → s ; # GREEK SMALL LETTER FINAL SIGMA |
Τ → T ; # GREEK CAPITAL LETTER TAU |
τ → t ; # GREEK SMALL LETTER TAU |
+# |
+# |
+######################################################################## |
+# |
+# End Rule 3.5 |
+# |
+######################################################################## |
+# |
Υ → I ; # GREEK CAPITAL LETTER UPSILON |
υ → i ; # GREEK SMALL LETTER UPSILON |
Ύ → Í ; # GREEK CAPITAL LETTER UPSILON WITH TONOS |
@@ -207,3 +419,7 @@ $wordBoundary{ντ → d ; # GREEK SMALL LETTER NU + SMALL TAU |
ω → o ; # GREEK SMALL LETTER OMEGA |
Ώ → Ó ; # GREEK CAPITAL LETTER OMEGA WITH TONOS |
ώ → ó ; # GREEK SMALL LETTER OMEGA WITH TONOS |
+# |
+# |
+######################################################################## |
+ |