Index: source/data/translit/mk_mk_Latn_BGN.txt |
diff --git a/source/data/translit/Macedonian_Latin_BGN.txt b/source/data/translit/mk_mk_Latn_BGN.txt |
similarity index 53% |
rename from source/data/translit/Macedonian_Latin_BGN.txt |
rename to source/data/translit/mk_mk_Latn_BGN.txt |
index e0289da2a8b8a7288638512e7aa5f5282f3991bf..4dbd9999f86998073f4098ca77702dc844759816 100644 |
--- a/source/data/translit/Macedonian_Latin_BGN.txt |
+++ b/source/data/translit/mk_mk_Latn_BGN.txt |
@@ -1,14 +1,42 @@ |
-# *************************************************************************** |
-# * |
-# * Copyright (C) 2004-2015, International Business Machines |
-# * Corporation; Unicode, Inc.; and others. All Rights Reserved. |
-# * |
-# *************************************************************************** |
-# File: Macedonian_Latin_BGN.txt |
-# Generated from CLDR |
+# © 2016 and later: Unicode, Inc. and others. |
+# License & terms of use: http://www.unicode.org/copyright.html#License |
+# |
+# File: mk_mk_Latn_BGN.txt |
+# Generated from CLDR |
+# |
+ |
+# |
+######################################################################## |
+# BGN/PCGN 1981 System |
+# |
+# Macedonian was official established as a literary language in |
+# Yugoslavia during World War II and is now the official language |
+# of Macedonia. Its alphabet is identical to Serbian, except |
+# that the letters Ђђ and Ћћ are replaced by Ѓѓ and Ќќ, and |
+# the letter Ѕѕ and the apostrophe are added. |
+# |
+# The Macedonian Alphabet as defined by the BGN (Page 69): |
+# |
+# АБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЦЧЏШ |
+# абвгдѓежзѕијклљмнњопрстќуфхцчџш’ |
+# |
+# Originally prepared by Michael Everson <everson@evertype.com> |
+######################################################################## |
+# |
+# MINIMAL FILTER: Macedonian-Latin |
# |
:: [АБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЦЧЏШабвгдѓежзѕијклљмнњопрстќуфхцчџш’] ; |
:: NFD (NFC) ; |
+# |
+# |
+######################################################################## |
+# |
+######################################################################## |
+# |
+# Define All Transformation Variables |
+# |
+######################################################################## |
+# |
$prime = ’ ; |
$upperConsonants = [БВГДЃЖЗЅЈКЛЉМНЊПРСТЌФХЦЧЏШ] ; |
$lowerConsonants = [бвгдѓжзѕјклљмнњпрстќфхцчџш’] ; |
@@ -17,7 +45,22 @@ $upperVowels = [АЕИОУ] ; |
$lowerVowels = [аеиоу] ; |
$vowels = [$upperVowels $lowerVowels] ; |
$lower = [$lowerConsonants $lowerVowels] ; |
+# |
+# |
+# Use this $wordBoundary until bug 2034 is fixed in ICU: |
+# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest |
+# |
$wordBoundary = [^[:L:][:M:][:N:]] ; |
+# |
+# |
+######################################################################## |
+# |
+######################################################################## |
+# |
+# Start of Alphabetic Transformations |
+# |
+######################################################################## |
+# |
А → A ; # CYRILLIC CAPITAL LETTER A |
а → a ; # CYRILLIC SMALL LETTER A |
Б → B ; # CYRILLIC CAPITAL LETTER BE |
@@ -28,10 +71,29 @@ $wordBoundary = [^[:L:][:M:][:N:]] ; |
г → g ; # CYRILLIC SMALL LETTER GHE |
Д → D ; # CYRILLIC CAPITAL LETTER DE |
д → d ; # CYRILLIC SMALL LETTER DE |
+# |
+# |
+######################################################################## |
+# |
+# BGN Page 70 Rule 1: |
+# |
+# The character ѓ should be romanized g when it occurs before е |
+# and и. In all other instances, it should be romanized đ (Đ). |
+# |
+######################################################################## |
+# |
Ѓ}[ЕеИи] → G ; # CYRILLIC CAPITAL LETTER GJE |
ѓ}[ЕеИи] → g ; # CYRILLIC SMALL LETTER GJE |
Ѓ → Đ ; # CYRILLIC CAPITAL LETTER GJE |
ѓ → đ ; # CYRILLIC SMALL LETTER GJE |
+# |
+# |
+######################################################################## |
+# |
+# End Rule 1 |
+# |
+######################################################################## |
+# |
Е → E ; # CYRILLIC CAPITAL LETTER DE |
е → e ; # CYRILLIC SMALL LETTER DE |
Ж → Ž ; # CYRILLIC CAPITAL LETTER ZHE |
@@ -44,7 +106,7 @@ $wordBoundary = [^[:L:][:M:][:N:]] ; |
И → I ; # CYRILLIC CAPITAL LETTER I |
и → i ; # CYRILLIC SMALL LETTER I |
Ј → J ; # CYRILLIC CAPITAL LETTER JE |
-ј → J ; # CYRILLIC SMALL LETTER JE |
+ј → j ; # CYRILLIC SMALL LETTER JE |
К → K ; # CYRILLIC CAPITAL LETTER KA |
к → k ; # CYRILLIC SMALL LETTER KA |
Л → L ; # CYRILLIC CAPITAL LETTER EL |
@@ -69,10 +131,29 @@ $wordBoundary = [^[:L:][:M:][:N:]] ; |
с → s ; # CYRILLIC SMALL LETTER ES |
Т → T ; # CYRILLIC CAPITAL LETTER TE |
т → t ; # CYRILLIC SMALL LETTER TE |
+# |
+# |
+######################################################################## |
+# |
+# BGN Page 70 Rule 2: |
+# |
+# The character ќ should be romanized k when it occurs before е |
+# and и. In all other instances, it should be romanized c\u0301. |
+# |
+######################################################################## |
+# |
Ќ}[ЕеИи] → K ; # CYRILLIC CAPITAL LETTER KJE |
ќ}[ЕеИи] → k ; # CYRILLIC SMALL LETTER KJE |
Ќ → C\u0301 ; # CYRILLIC CAPITAL LETTER KJE |
ќ → c\u0301 ; # CYRILLIC SMALL LETTER KJE |
+# |
+# |
+######################################################################## |
+# |
+# End Rule 2 |
+# |
+######################################################################## |
+# |
У → U ; # CYRILLIC CAPITAL LETTER U |
у → u ; # CYRILLIC SMALL LETTER U |
Ф → F ; # CYRILLIC CAPITAL LETTER EF |
@@ -88,3 +169,14 @@ $wordBoundary = [^[:L:][:M:][:N:]] ; |
џ → dž ; # CYRILLIC SMALL LETTER SHA |
Ш → Š ; # CYRILLIC CAPITAL LETTER SHA |
ш → š ; # CYRILLIC SMALL LETTER SHA |
+# |
+# |
+######################################################################## |
+# |
+# BGN Page 69 Rule 32, maps the symbol onto itself and |
+# is ignored here for computational efficiency. |
+# |
+# $prime → $prime ; # RIGHT SINGLE QUOTATION MARK |
+# |
+######################################################################## |
+ |