| Index: source/data/translit/ja_Latn_ru.txt
|
| diff --git a/source/data/translit/ja_Latn_ru.txt b/source/data/translit/ja_Latn_ru.txt
|
| index 6a5f3fb3af01096f3009802f8d348c36a923fd6f..fc2bf525a2ee32c5fe23d9e4e8ea61fcaa88eaa2 100644
|
| --- a/source/data/translit/ja_Latn_ru.txt
|
| +++ b/source/data/translit/ja_Latn_ru.txt
|
| @@ -1,29 +1,67 @@
|
| -# ***************************************************************************
|
| -# *
|
| -# * Copyright (C) 2004-2015, International Business Machines
|
| -# * Corporation; Unicode, Inc.; and others. All Rights Reserved.
|
| -# *
|
| -# ***************************************************************************
|
| +# © 2016 and later: Unicode, Inc. and others.
|
| +# License & terms of use: http://www.unicode.org/copyright.html#License
|
| +#
|
| # File: ja_Latn_ru.txt
|
| -# Generated from CLDR
|
| +# Generated from CLDR
|
| +#
|
| +
|
| +# Japanese (Rōmaji) to Russian (Cyrillic) Polivanov transliteration for ICU.
|
| +# Can be run in sequence after e.g. Katakana-Latin.
|
| +#
|
| +# These rules cannot be used to target Bulgarian, Serbian, Tajik, or Ukrainian.
|
| +#
|
| +# TODO: Cyrillization needs to respect morpheme/Kanji boundaries.
|
| +# 中井 becomes Накаи, but 北海道 becomes Хоккайдо. We need boundary
|
| +# markup in the input in order to do that properly.
|
| #
|
| ::NFD(NFC);
|
| ::[:Latin:] Lower();
|
| +#
|
| +#
|
| $lengthMarker = [\u0302\u0304];
|
| +#
|
| +#
|
| +# Delete apostrophes. Apostrophes after "n" are consumed below.
|
| \' → ;
|
| +#
|
| +#
|
| +# Turn long /e:/ into diphthong /ei/.
|
| +# Note that /ei/ across a morpheme boundary (e.g. 武井 Takei) becomes эи.
|
| e $lengthMarker → эй ;
|
| +#
|
| +#
|
| +# Turn long /i:/ into two vowels /ii/.
|
| i $lengthMarker → | i i ;
|
| +#
|
| +#
|
| +# Ignore vowel length everywhere else.
|
| $lengthMarker → ;
|
| +#
|
| +#
|
| +# Vowels.
|
| +#
|
| +# TODO(mjansche): Enable diphthongs once we have Kanji boundaries.
|
| +## ai → ай ;
|
| a → а ;
|
| i\~e → | ye ;
|
| i → и ;
|
| u\~ → в ; # ウィ etc.
|
| +#
|
| +## ui → уй ;
|
| u → у ;
|
| e → э ;
|
| o → о ;
|
| +#
|
| +#
|
| +# Consonants.
|
| +#
|
| k → к ;
|
| +#
|
| +#
|
| sh → | sy ;
|
| s → с ;
|
| +#
|
| +#
|
| ch → | ty ;
|
| c } ch → t ;
|
| te\~ → | t ; # テュ
|
| @@ -31,25 +69,45 @@ to\~ → | t ; # トゥ
|
| tsu\~ → | ts ; # ツァ, ツィ, etc.
|
| ts → ц ;
|
| t → т ;
|
| +#
|
| +#
|
| \~tsu → | tsu ;
|
| +#
|
| +#
|
| n } [bpm] → м ; # 群馬 → Гумма
|
| n\' → нъ ;
|
| n → н ;
|
| +#
|
| +#
|
| h → х ;
|
| fu\~ → | f ; # フュ
|
| f → ф ;
|
| +#
|
| +#
|
| m → м ;
|
| +#
|
| +#
|
| ya → я ;
|
| yi → и ; # Added for convenience, after sh, ch, j.
|
| yu → ю ;
|
| ye → е ; # ?? unobserved
|
| yo → ё ;
|
| +#
|
| +#
|
| r → р ;
|
| +#
|
| +#
|
| wa → ва ;
|
| w → ;
|
| +#
|
| +#
|
| g → г ;
|
| +#
|
| +#
|
| j → | zy ;
|
| z → дз ;
|
| +#
|
| +#
|
| de\~ → | d ; # デュ
|
| dji\~ → | z ; # ヂャ, ヂュ, etc.
|
| dj → | j ; # ヂ
|
| @@ -57,8 +115,15 @@ do\~ → | d ; # ドゥ
|
| dzu\~ → | z ; # ヅァ, ヅィ, etc.
|
| dz → | z ; # ヅ
|
| d → д ;
|
| +#
|
| +#
|
| b → б ;
|
| vu\~ → | v ; # ヴァ, etc.
|
| v → в ; # ?? unobserved
|
| +#
|
| +#
|
| p → п ;
|
| +#
|
| +#
|
| ::NFC(NFD);
|
| +
|
|
|