Index: source/data/translit/ru_ru_Latn_BGN.txt |
diff --git a/source/data/translit/ru_ru_Latn_BGN.txt b/source/data/translit/ru_ru_Latn_BGN.txt |
new file mode 100644 |
index 0000000000000000000000000000000000000000..8c205b68d524372768227d8255e52b7a020ca949 |
--- /dev/null |
+++ b/source/data/translit/ru_ru_Latn_BGN.txt |
@@ -0,0 +1,243 @@ |
+# © 2016 and later: Unicode, Inc. and others. |
+# License & terms of use: http://www.unicode.org/copyright.html#License |
+# |
+# File: ru_ru_Latn_BGN.txt |
+# Generated from CLDR |
+# |
+ |
+# BGN/PCGN 1947 System |
+# |
+# The BGN/PCGN system for Russian was adopted by the BGN in 1944 and |
+# by the PCGN in 1947 for use in romanizing names written in the |
+# Russian Cyrillic alphabet. |
+# |
+# The Russian Alphabet as defined by the BGN (Page 93): |
+# АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ |
+# абвгдеёжзийклмнопрстуфхцчшщъыьэюя |
+# |
+# Originally prepared by Michael Everson everson@evertype.com |
+# Fixed by Frank Yung-Fong Tang ftang@google.com |
+# |
+# Test Data from http://en.wikipedia.org/wiki/BGN/PCGN_romanization_of_Russian |
+######################################################################## |
+# MINIMAL FILTER: Russian-Latin |
+::[АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя]; |
+# BUG(ftang) remove the following line. Otherwise the rule for |
+# Й й Ё ё will break since the rule is written in NFC but |
+# the line decomposes the text. |
+# :: NFD (NFC) ; |
+######################################################################## |
+# Define All Transformation Variables |
+######################################################################## |
+$prime = ʹ ; |
+$doublePrime = ʺ ; |
+$wordBoundary = [^[:L:][:M:][:N:]] ; |
+$upperVowels = [АЕЁЭИОУЫЮЯ] ; |
+$lowerVowels = [аеёэиоуыюя] ; |
+$vowels = [$upperVowels $lowerVowels] ; |
+$upperConsonants = [[:Uppercase:]-$vowels] ; |
+$lowerConsonants = [[:Lowercase:]-$vowels] ; |
+$consonants = [$upperConsonants $lowerConsonants] ; |
+$upper = [:Uppercase:]; |
+$lower = [:Lowercase:]; |
+######################################################################## |
+# Rules moved to front to avoid masking |
+######################################################################## |
+$lowerVowels { ы → ·y ; |
+$upperVowels { [Ыы] → ·Y ; |
+[$consonants - [Йй]]{Э → ·E ; |
+[$consonants - [Йй]]{э → ·e ; |
+[$upperVowels [ЙЪЬ]] { Е } $upper → YE ; # CYRILLIC CAPITAL LETTER IE |
+[$upperVowels [ЙЪЬ]] { Е → Ye ; # CYRILLIC CAPITAL LETTER IE |
+[$upperVowels $lowerVowels [ЙйЪъЬь]] { е → ye ; # CYRILLIC SMALL LETTER IE |
+[$upperVowels [ЙЪЬ]] { Ё } $upper → YË ; # CYRILLIC CAPITAL LETTER IO |
+[$upperVowels [ЙЪЬ]] { Ё → Yë ; # CYRILLIC CAPITAL LETTER IO |
+[$upperVowels $lowerVowels [ЙйЪъЬь]] { ё → yë ; # CYRILLIC SMALL LETTER IO |
+# Since in the above rule we look use the context before the characters, |
+# we have to perform them in a separate pass before we change the vowels |
+# the ::Null forces a separate pass. |
+::Null; |
+######################################################################## |
+# Start of Alphabetic Transformations |
+######################################################################## |
+А → A ; # CYRILLIC CAPITAL LETTER A |
+а → a ; # CYRILLIC SMALL LETTER A |
+Б → B ; # CYRILLIC CAPITAL LETTER BE |
+б → b ; # CYRILLIC SMALL LETTER BE |
+В → V ; # CYRILLIC CAPITAL LETTER VE |
+в → v ; # CYRILLIC SMALL LETTER VE |
+Г → G ; # CYRILLIC CAPITAL LETTER GHE |
+г → g ; # CYRILLIC SMALL LETTER GHE |
+Д → D ; # CYRILLIC CAPITAL LETTER DE |
+д → d ; # CYRILLIC SMALL LETTER DE |
+######################################################################## |
+# BGN Page 94 Rule 1: |
+# # The character e should be romanized ye |
+# initially, after the vowel # characters a, e, ё, и, о, у, ы, э, ю, |
+# and я, and after й, ъ, and ь. |
+# In all other instances, it should |
+# be romanized e. |
+######################################################################## |
+# BUG(ftang)- the following two lines said BEFORE the vowels, instead of AFTER |
+# Е}[$upperVowels [ЙЪЬ]] → YE ; # CYRILLIC CAPITAL LETTER IE |
+# Е}[$lowerVowels [йъь]] → Ye ; # CYRILLIC CAPITAL LETTER IE |
+$wordBoundary{Е} $upper → YE ; # CYRILLIC CAPITAL LETTER IE |
+$wordBoundary{Е → Ye ; # CYRILLIC CAPITAL LETTER IE |
+Е → E ; # CYRILLIC CAPITAL LETTER IE |
+# |
+# BUG(ftang)- the following line said BEFORE the vowels, instead of AFTER |
+# е}[$upperVowels $lowerVowels [ЙйЪъЬь]] → ye ; # CYRILLIC SMALL LETTER IE |
+$wordBoundary{е → ye ; # CYRILLIC SMALL LETTER IE |
+е → e ; # CYRILLIC SMALL LETTER IE |
+######################################################################## |
+# End of Rule 1 |
+######################################################################## |
+######################################################################## |
+# BGN Page 94 Rule 2: |
+# |
+# The character ё is not considered a separate character of the |
+# Russian alphabet and the dieresis is generally not shown. When the |
+# dieresis is shown, the character should be romanized yë initially, |
+# after the vowel characters a, e, ё, и, о, у, ы, э, ю, and я, and |
+# after й, ъ, and ь, In all other instances, it should be romanized |
+# ё. When the dieresis is not shown, the character may still be |
+# romanized in the preceding manner or, alternatively, in accordance |
+# with note 1. |
+######################################################################## |
+# BUG(ftang)- the following two lines said BEFORE the vowels, instead of AFTER |
+# Ё}[$upperVowels [ЙЪЬ]] → YË ; # CYRILLIC CAPITAL LETTER IO |
+# Ё}[$lowerVowels [йъь]] → Yë ; # CYRILLIC CAPITAL LETTER IO |
+$wordBoundary {Ё} $upper → YË ; # CYRILLIC CAPITAL LETTER IO |
+$wordBoundary {Ё} $lower → Yë ; # CYRILLIC CAPITAL LETTER IO |
+Ё → Ë ; # CYRILLIC CAPITAL LETTER IO |
+# BUG(ftang)- the following line said BEFORE the vowels, instead of AFTER |
+# ё}[$upperVowels $lowerVowels [ЙйЪъЬь]] → yë ; # CYRILLIC SMALL LETTER IO |
+$wordBoundary{ё → yë ; # CYRILLIC SMALL LETTER IO |
+ё → ë ; # CYRILLIC SMALL LETTER IO |
+######################################################################## |
+# End of Rule 2 |
+######################################################################## |
+Ж} $lower → Zh ; # CYRILLIC CAPITAL LETTER ZHE |
+Ж → ZH ; # CYRILLIC CAPITAL LETTER ZHE |
+ж → zh ; # CYRILLIC SMALL LETTER ZHE |
+######################################################################## |
+# BGN Page 94 Rule 3.4 |
+# э after any consonant character except |
+# й becomes ·е |
+######################################################################## |
+З → Z ; # CYRILLIC CAPITAL LETTER ZE |
+з → z ; # CYRILLIC SMALL LETTER ZE |
+# BUG(ftang) The following two lines said those consonant becomes ·е |
+# [$consonants - [Йй]]}Э → ·Е ; |
+# [$consonants - [Йй]]}э → ·е ; |
+######################################################################## |
+# End of Rule 3.4 |
+######################################################################## |
+И → I ; # CYRILLIC CAPITAL LETTER I |
+и → i ; # CYRILLIC SMALL LETTER I |
+######################################################################## |
+# BGN Page 94 Rule 3: |
+# |
+# Unusual Russian character sequences occurring primarily in |
+# non-Russian-language names may be romanized as shown below in order |
+# to provide differentiation from regularly-occurring digraphs and |
+# character sequences. |
+# |
+# BGN Page 94 Rule 3.1 |
+# й before а, у, ы, or э becomes у· |
+######################################################################## |
+Й}[АаУуЫыЭэ] → Y· ; # CYRILLIC CAPITAL LETTER I |
+й}[АаУуЫыЭэ] → y· ; # CYRILLIC SMALL LETTER I |
+Й → Y ; # CYRILLIC CAPITAL LETTER I |
+й → y ; # CYRILLIC SMALL LETTER I |
+######################################################################## |
+# End Rule 3.1 |
+######################################################################## |
+К → K ; # CYRILLIC CAPITAL LETTER KA |
+к → k ; # CYRILLIC SMALL LETTER KA |
+Л → L ; # CYRILLIC CAPITAL LETTER EL |
+л → l ; # CYRILLIC SMALL LETTER EL |
+М → M ; # CYRILLIC CAPITAL LETTER EM |
+м → m ; # CYRILLIC SMALL LETTER EM |
+Н → N ; # CYRILLIC CAPITAL LETTER EN |
+н → n ; # CYRILLIC SMALL LETTER EN |
+О → O ; # CYRILLIC CAPITAL LETTER O |
+о → o ; # CYRILLIC SMALL LETTER O |
+П → P ; # CYRILLIC CAPITAL LETTER PE |
+п → p ; # CYRILLIC SMALL LETTER PE |
+Р → R ; # CYRILLIC CAPITAL LETTER ER |
+р → r ; # CYRILLIC SMALL LETTER ER |
+С → S ; # CYRILLIC CAPITAL LETTER ES |
+с → s ; # CYRILLIC SMALL LETTER ES |
+######################################################################## |
+# BGN Page 94 Rule 3.5 |
+# тс becomes t·s |
+######################################################################## |
+ТС → T·S ; # CYRILLIC CAPITAL LETTER TE |
+Тс → T·s ; # CYRILLIC CAPITAL LETTER TE |
+тс → t·s ; # CYRILLIC SMALL LETTER TE |
+Т → T ; # CYRILLIC CAPITAL LETTER TE |
+т → t ; # CYRILLIC SMALL LETTER TE |
+######################################################################## |
+# End Rule 3.5 |
+######################################################################## |
+У → U ; # CYRILLIC CAPITAL LETTER U |
+у → u ; # CYRILLIC SMALL LETTER U |
+Ф → F ; # CYRILLIC CAPITAL LETTER EF |
+ф → f ; # CYRILLIC SMALL LETTER EF |
+Х} $lower → Kh ; # CYRILLIC CAPITAL LETTER HA |
+Х → KH ; # CYRILLIC CAPITAL LETTER HA |
+х → kh ; # CYRILLIC SMALL LETTER HA |
+Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE |
+Ц → TS ; # CYRILLIC CAPITAL LETTER TSE |
+ц → ts ; # CYRILLIC SMALL LETTER TSE |
+Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE |
+Ч → CH ; # CYRILLIC CAPITAL LETTER CHE |
+ч → ch ; # CYRILLIC SMALL LETTER CHE |
+######################################################################## |
+# BGN Page 94 Rule 3.6 |
+# шч becomes sh·ch |
+######################################################################## |
+ШЧ → SH·CH ; # CYRILLIC CAPITAL LETTER SHA |
+Шч → Sh·ch ; # CYRILLIC CAPITAL LETTER SHA |
+шч → sh·ch ; # CYRILLIC SMALL LETTER SHA |
+Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA |
+Ш → SH ; # CYRILLIC CAPITAL LETTER SHA |
+ш → sh ; # CYRILLIC SMALL LETTER SHA |
+Щ} $lower → Shch ; # CYRILLIC CAPITAL LETTER SHCHA |
+Щ → SHCH ; # CYRILLIC CAPITAL LETTER SHCHA |
+щ → shch ; # CYRILLIC SMALL LETTER SHCHA |
+######################################################################## |
+# End Rule 3.6 |
+######################################################################## |
+Ъ → $doublePrime ; # CYRILLIC CAPITAL LETTER HARD SIGN |
+ъ → $doublePrime ; # CYRILLIC SMALL LETTER HARD SIGN |
+######################################################################## |
+# BGN Page 94 Rule 3.2 |
+# ы before а, у, ы, or э becomes у· |
+# |
+# BGN Page 94 Rule 3.3 |
+# ы after any vowel character becomes ·у |
+######################################################################## |
+# |
+# BUG(ftang) the following line said the vowels will change |
+# $vowels}Ы → ·Y ; # CYRILLIC CAPITAL LETTER I |
+# $vowels}ы → ·y ; # CYRILLIC CAPITAL LETTER I |
+Ы}[АаУуЫыЭэ] → Y· ; # CYRILLIC CAPITAL LETTER YERU |
+ы}[ауыэ] → y· ; # CYRILLIC SMALL LETTER YERU |
+Ы → Y ; # CYRILLIC CAPITAL LETTER YERU |
+ы → y ; # CYRILLIC SMALL LETTER YERU |
+######################################################################## |
+# End Rule 3.2 and 3.3 |
+######################################################################## |
+Ь → $prime ; # CYRILLIC CAPITAL LETTER SOFT SIGN |
+ь → $prime ; # CYRILLIC SMALL LETTER SOFT SIGN |
+Э → E ; # CYRILLIC CAPITAL LETTER E |
+э → e ; # CYRILLIC SMALL LETTER E |
+Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU |
+Ю → YU ; # CYRILLIC CAPITAL LETTER YU |
+ю → yu ; # CYRILLIC SMALL LETTER YU |
+Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA |
+Я → YA ; # CYRILLIC CAPITAL LETTER YA |
+я → ya ; # CYRILLIC SMALL LETTER YA |
+ |