Index: source/data/translit/kk_kk_Latn_BGN.txt |
diff --git a/source/data/translit/Kazakh_Latin_BGN.txt b/source/data/translit/kk_kk_Latn_BGN.txt |
similarity index 51% |
rename from source/data/translit/Kazakh_Latin_BGN.txt |
rename to source/data/translit/kk_kk_Latn_BGN.txt |
index ed695acbddaebe9961cbdc903ff269273d19a435..d9ef30e974cbf2112a904c536b8e15c8a6b2c5e0 100644 |
--- a/source/data/translit/Kazakh_Latin_BGN.txt |
+++ b/source/data/translit/kk_kk_Latn_BGN.txt |
@@ -1,14 +1,41 @@ |
-# *************************************************************************** |
-# * |
-# * Copyright (C) 2004-2015, International Business Machines |
-# * Corporation; Unicode, Inc.; and others. All Rights Reserved. |
-# * |
-# *************************************************************************** |
-# File: Kazakh_Latin_BGN.txt |
-# Generated from CLDR |
+# © 2016 and later: Unicode, Inc. and others. |
+# License & terms of use: http://www.unicode.org/copyright.html#License |
+# |
+# File: kk_kk_Latn_BGN.txt |
+# Generated from CLDR |
+# |
+ |
+# |
+######################################################################## |
+# BGN/PCGN 1979 System |
+# |
+# The BGN/PCGN system for Kazakh Cyrillic was designed for use in |
+# romanizing names written in the Kazakh Cyrillic alphabet. |
+# The Kazakh Cyrillic alphabet contains nine letters not present |
+# in the Russian alphabet: Әә, Ғғ, Ққ, Ңң, Өө, Ұұ, Үү, Һһ, and Іі. |
+# |
+# The Kazakh Cyrillic Alphabet as defined by the BGN (Page 47): |
+# |
+# АӘБВГҒДЕЁЖЗИЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЪЫІЬЭЮЯ |
+# аәбвгғдеёжзийкқлмнңоөпрстуұүфхһцчшщъыіьэюя |
+# |
+# Originally prepared by Michael Everson <everson@evertype.com> |
+######################################################################## |
+# |
+# MINIMAL FILTER: KazakhCyrl-Latin |
# |
:: [АӘБВГҒДЕЁЖЗИЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЪЫІЬЭЮЯаәбвгғдеёжзийкқлмнңоөпрстуұүфхһцчшщъыіьэюя] ; |
:: NFD (NFC) ; |
+# |
+# |
+######################################################################## |
+# |
+######################################################################## |
+# |
+# Define All Transformation Variables |
+# |
+######################################################################## |
+# |
$prime = ʹ ; |
$doublePrime = ʺ ; |
$upperConsonants = [БВГҒДЖЗЙКҚЛМНҢПРСТФХҺЦЧШЩЪЬ] ; |
@@ -18,7 +45,22 @@ $upperVowels = [АӘЕЁИОӨУҰҮЫІЭЮЯ] ; |
$lowerVowels = [аәеёиоөуұүыіэюя] ; |
$vowels = [$upperVowels $lowerVowels] ; |
$lower = [$lowerConsonants $lowerVowels] ; |
+# |
+# |
+# Use this $wordBoundary until bug 2034 is fixed in ICU: |
+# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest |
+# |
$wordBoundary = [^[:L:][:M:][:N:]] ; |
+# |
+# |
+######################################################################## |
+# |
+######################################################################## |
+# |
+# Start of Alphabetic Transformations |
+# |
+######################################################################## |
+# |
А → A ; # CYRILLIC CAPITAL LETTER A |
а → a ; # CYRILLIC SMALL LETTER A |
Ә → Ä ; # CYRILLIC CAPITAL LETTER SCHWA |
@@ -27,11 +69,33 @@ $wordBoundary = [^[:L:][:M:][:N:]] ; |
б → b ; # CYRILLIC SMALL LETTER BE |
В → V ; # CYRILLIC CAPITAL LETTER VE |
в → v ; # CYRILLIC SMALL LETTER VE |
+# |
+# |
+######################################################################## |
+# |
+# BGN Page 48 Rule 1 |
+# |
+# The character sequences гһ, зһ, кһ, нг, сһ and цһ may be romanized |
+# g·h, z·h, k·h, n·g, s·h and ts·h in order to differentiate those |
+# romanizations from the digraphs gh, zh, kh, ng, sh, and the letter |
+# sequence tsh, which are used to render the characters г, ж, х, ң, ш, |
+# and the character sequence тш. |
+# |
+######################################################################## |
+# |
ГҺ → G·H ; # CYRILLIC CAPITAL LETTER GHE |
Гһ → G·h ; # CYRILLIC CAPITAL LETTER GHE |
гһ → g·h ; # CYRILLIC SMALL LETTER GHE |
Г → G ; # CYRILLIC CAPITAL LETTER GHE |
г → g ; # CYRILLIC SMALL LETTER GHE |
+# |
+# |
+######################################################################## |
+# |
+# End Rule 1 |
+# |
+######################################################################## |
+# |
Ғ} $lower → Gh ; # CYRILLIC CAPITAL LETTER GHE WITH STROKE |
Ғ → GH ; # CYRILLIC CAPITAL LETTER GHE WITH STROKE |
ғ → gh ; # CYRILLIC SMALL LETTER GHE WITH STROKE |
@@ -45,31 +109,85 @@ $wordBoundary = [^[:L:][:M:][:N:]] ; |
Ж} $lower → Zh ; # CYRILLIC CAPITAL LETTER ZHE |
Ж → ZH ; # CYRILLIC CAPITAL LETTER ZHE |
ж → zh ; # CYRILLIC SMALL LETTER ZHE |
+# |
+# |
+######################################################################## |
+# |
+# BGN Page 48 Rule 1 |
+# |
+# зһ becomes z·h |
+# |
+######################################################################## |
+# |
ЗҺ → Z·H ; # CYRILLIC CAPITAL LETTER ZE |
Зһ → Z·h ; # CYRILLIC CAPITAL LETTER ZE |
зһ → z·h ; # CYRILLIC SMALL LETTER ZE |
З → Z ; # CYRILLIC CAPITAL LETTER ZE |
з → z ; # CYRILLIC SMALL LETTER ZE |
+# |
+# |
+######################################################################## |
+# |
+# End Rule 1 |
+# |
+######################################################################## |
+# |
И → Ī ; # CYRILLIC CAPITAL LETTER I |
и → ī ; # CYRILLIC SMALL LETTER I |
Й → Y ; # CYRILLIC CAPITAL LETTER I |
й → y ; # CYRILLIC SMALL LETTER I |
+# |
+# |
+######################################################################## |
+# |
+# BGN Page 48 Rule 1 |
+# |
+# кһ becomes k·h |
+# |
+######################################################################## |
+# |
КҺ → K·H ; # CYRILLIC CAPITAL LETTER KA |
Кһ → K·h ; # CYRILLIC CAPITAL LETTER KA |
кһ → k·h ; # CYRILLIC SMALL LETTER KA |
К → K ; # CYRILLIC CAPITAL LETTER KA |
к → k ; # CYRILLIC SMALL LETTER KA |
+# |
+# |
+######################################################################## |
+# |
+# End Rule 1 |
+# |
+######################################################################## |
+# |
Қ → Q ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER |
қ → q ; # CYRILLIC SMALL LETTER KA WITH DESCENDER |
Л → L ; # CYRILLIC CAPITAL LETTER EL |
л → l ; # CYRILLIC SMALL LETTER EL |
М → M ; # CYRILLIC CAPITAL LETTER EM |
м → m ; # CYRILLIC SMALL LETTER EM |
+# |
+# |
+######################################################################## |
+# |
+# BGN Page 48 Rule 1 |
+# |
+# нг becomes n·g |
+# |
+######################################################################## |
+# |
НГ → N·G ; # CYRILLIC CAPITAL LETTER EN |
Нг → N·g ; # CYRILLIC CAPITAL LETTER EN |
нг → n·g ; # CYRILLIC SMALL LETTER EN |
Н → N ; # CYRILLIC CAPITAL LETTER EN |
н → n ; # CYRILLIC SMALL LETTER EN |
+# |
+# |
+######################################################################## |
+# |
+# End Rule 1 |
+# |
+######################################################################## |
+# |
Ң} $lower → Ng ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER |
Ң → NG ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER |
ң → ng ; # CYRILLIC SMALL LETTER EN WITH DESCENDER |
@@ -81,11 +199,29 @@ $wordBoundary = [^[:L:][:M:][:N:]] ; |
п → p ; # CYRILLIC SMALL LETTER PE |
Р → R ; # CYRILLIC CAPITAL LETTER ER |
р → r ; # CYRILLIC SMALL LETTER ER |
+# |
+# |
+######################################################################## |
+# |
+# BGN Page 48 Rule 1 |
+# |
+# сһ becomes s·h |
+# |
+######################################################################## |
+# |
СҺ → S·H ; # CYRILLIC CAPITAL LETTER ES |
Сһ → S·h ; # CYRILLIC CAPITAL LETTER ES |
сһ → s·h ; # CYRILLIC SMALL LETTER ES |
С → S ; # CYRILLIC CAPITAL LETTER ES |
с → s ; # CYRILLIC SMALL LETTER ES |
+# |
+# |
+######################################################################## |
+# |
+# End Rule 1 |
+# |
+######################################################################## |
+# |
Т → T ; # CYRILLIC CAPITAL LETTER TE |
т → t ; # CYRILLIC SMALL LETTER TE |
У → Ū ; # CYRILLIC CAPITAL LETTER U |
@@ -101,15 +237,43 @@ $wordBoundary = [^[:L:][:M:][:N:]] ; |
х → kh ; # CYRILLIC SMALL LETTER HA |
Һ → H ; # CYRILLIC CAPITAL LETTER SHHA |
һ → h ; # CYRILLIC SMALL LETTER SHHA |
+# |
+# |
+######################################################################## |
+# |
+# BGN Page 48 Rule 1 |
+# |
+# цһ becomes ts·h |
+# |
+######################################################################## |
+# |
ЦҺ → TS·H ; # CYRILLIC CAPITAL LETTER GHE |
Цһ → Ts·h ; # CYRILLIC CAPITAL LETTER GHE |
цһ → ts·h ; # CYRILLIC SMALL LETTER GHE |
Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE |
Ц → TS ; # CYRILLIC CAPITAL LETTER TSE |
ц → ts ; # CYRILLIC SMALL LETTER TSE |
+# |
+# |
+######################################################################## |
+# |
+# End Rule 1 |
+# |
+######################################################################## |
+# |
Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE |
Ч → CH ; # CYRILLIC CAPITAL LETTER CHE |
ч → ch ; # CYRILLIC SMALL LETTER CHE |
+# |
+# |
+######################################################################## |
+# |
+# Implied rule from BGN Russian-Latin transliteration (Page 94 Note 3.6). |
+# |
+# шч becomes sh·ch |
+# |
+######################################################################## |
+# |
ШЧ → SH·CH ; # CYRILLIC CAPITAL LETTER SHA |
Шч → Sh·ch ; # CYRILLIC CAPITAL LETTER SHA |
шч → sh·ch ; # CYRILLIC SMALL LETTER SHA |
@@ -119,10 +283,43 @@ $wordBoundary = [^[:L:][:M:][:N:]] ; |
Щ} $lower → Shch ; # CYRILLIC CAPITAL LETTER SHCHA |
Щ → SHCH ; # CYRILLIC CAPITAL LETTER SHCHA |
щ → shch ; # CYRILLIC SMALL LETTER SHCHA |
+# |
+# |
+######################################################################## |
+# |
+# End Implied rule |
+# |
+######################################################################## |
+# |
Ъ → $doublePrime ; # CYRILLIC CAPITAL LETTER HARD SIGN |
ъ → $doublePrime ; # CYRILLIC SMALL LETTER HARD SIGN |
+# |
+# |
+######################################################################## |
+# |
+# BGN Page 48 Note 2 |
+# |
+# The character Ыы may be romanized Ɨɨ instead of Yy, if so desired. |
+# |
+######################################################################## |
+# |
Ы → Y ; # CYRILLIC CAPITAL LETTER YERU |
ы → y ; # CYRILLIC SMALL LETTER YERU |
+# |
+# |
+# Alternative rule to implement the option described here. To apply |
+# uncomment the following by removing the '#' mark at the start of the |
+# line and insert before the two rule lines above. |
+# |
+#Ы → Ɨ ; # CYRILLIC CAPITAL LETTER YERU |
+#ы → ɨ ; # CYRILLIC SMALL LETTER YERU |
+# |
+######################################################################## |
+# |
+# End BGN Page 48 Note 2 |
+# |
+######################################################################## |
+# |
І → I ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I |
і → i ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I |
Ь → $prime ; # CYRILLIC CAPITAL LETTER SOFT SIGN |
@@ -135,3 +332,7 @@ $wordBoundary = [^[:L:][:M:][:N:]] ; |
Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA |
Я → YA ; # CYRILLIC CAPITAL LETTER YA |
я → ya ; # CYRILLIC SMALL LETTER YA |
+# |
+# |
+######################################################################## |
+ |