Index: source/data/translit/ja_Hrkt_ja_Latn_BGN.txt |
diff --git a/source/data/translit/Katakana_Latin_BGN.txt b/source/data/translit/ja_Hrkt_ja_Latn_BGN.txt |
old mode 100755 |
new mode 100644 |
similarity index 82% |
rename from source/data/translit/Katakana_Latin_BGN.txt |
rename to source/data/translit/ja_Hrkt_ja_Latn_BGN.txt |
index 38f8738d12f1c1e3e19b42524e91b6a0a7174170..04040d09e35a9165c5bb1910abb75dca488da880 |
--- a/source/data/translit/Katakana_Latin_BGN.txt |
+++ b/source/data/translit/ja_Hrkt_ja_Latn_BGN.txt |
@@ -1,16 +1,59 @@ |
-# *************************************************************************** |
-# * |
-# * Copyright (C) 2004-2015, International Business Machines |
-# * Corporation; Unicode, Inc.; and others. All Rights Reserved. |
-# * |
-# *************************************************************************** |
-# File: Katakana_Latin_BGN.txt |
-# Generated from CLDR |
+# © 2016 and later: Unicode, Inc. and others. |
+# License & terms of use: http://www.unicode.org/copyright.html#License |
+# |
+# File: ja_Hrkt_ja_Latn_BGN.txt |
+# Generated from CLDR |
+# |
+ |
+# |
+######################################################################## |
+# BGN/PCGN Agreement |
+# |
+# The modified Hepburn system for the romanization of Japanese has been |
+# in use by the U.S. Board on Geographic Names since about 1930 and has |
+# been used extensively in the romanization of Japanese geographic names. |
+# The system is well adapted to the general needs of speakers of English |
+# and is the most widely used system for the romanization of Japanese. |
+# |
+# Originally prepared by Michael Everson <everson@evertype.com> |
+######################################################################## |
+# |
+# MINIMAL FILTER: Japanese-Latin |
# |
:: [あいうえおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろわゐゑをんゔアイウエオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロワヰヱヲンヴ] ; |
:: NFD (NFC) ; |
+# |
+# |
+######################################################################## |
+# |
+######################################################################## |
+# |
+# Define All Transformation Variables |
+# |
+######################################################################## |
$apostrophe = ’; |
+# |
+# Use this $wordBoundary until bug 2034 is fixed in ICU: |
+# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest |
+# |
$wordBoundary = [^[:L:][:M:][:N:]] ; |
+# |
+######################################################################## |
+# |
+# Rules moved to front to avoid masking |
+# |
+######################################################################## |
+# |
+######################################################################## |
+# |
+# BGN Page 45 Rule 2: |
+# |
+# A small-script tsu form (ッ or っ) is inserted between kana symbols |
+# to indicate a double consonant and is romanized as k before k; |
+# as s before s or sh; as t before t, ts, or ch; and as p before p. |
+# |
+######################################################################## |
+# |
ッ}[カキクケコ] → k ; # KATAKANA LETTER SMALL TU |
っ}[かきくけこ] → k ; # HIRAGANA LETTER SMALL TU |
ッ}[サシスセソ] → s ; # KATAKANA LETTER SMALL TU |
@@ -19,6 +62,20 @@ $wordBoundary = [^[:L:][:M:][:N:]] ; |
っ}[たちつてと] → t ; # HIRAGANA LETTER SMALL TU |
ッ}[パピプペポ] → p ; # KATAKANA LETTER SMALL TU |
っ}[ぱぴぷぺぽ] → p ; # HIRAGANA LETTER SMALL TU |
+# |
+# |
+######################################################################## |
+# |
+# End of Rule 2 |
+# |
+######################################################################## |
+# |
+######################################################################## |
+# |
+# Start of Syllabic Transformations |
+# |
+######################################################################## |
+# |
ア → a ; # KATAKANA LETTER A |
イ → i ; # KATAKANA LETTER I |
ウ → u ; # KATAKANA LETTER U |
@@ -110,10 +167,31 @@ $wordBoundary = [^[:L:][:M:][:N:]] ; |
ヰ → i ; # KATAKANA LETTER WI |
ヱ → e ; # KATAKANA LETTER WE |
ヲ → o ; # KATAKANA LETTER WO |
+# |
+# |
+######################################################################## |
+# |
+# BGN Page 45 Rule 3: |
+# |
+# The character ン should be romanized m before b, p, or m. |
+# The character ん should be romanized m before b, p, or m. |
+# The character ン should be romanized n’ before y or a vowel letter. |
+# The character ん should be romanized n’ before y or a vowel letter. |
+# |
+######################################################################## |
+# |
ン}[バビブベボパピプペポマミムメモ] → m ; # KATAKANA LETTER N |
ん}[ばびぶべぼぱぴぷぺぽまみむめも] → m ; # HIRAGANA LETTER N |
ン}[ヤユヨアイウエオ] → n $apostrophe ; # KATAKANA LETTER N |
ん}[やゆよあいうえお] → n $apostrophe ; # HIRAGANA LETTER N |
+# |
+# |
+######################################################################## |
+# |
+# End of Rule 3 |
+# |
+######################################################################## |
+# |
ン → n ; # KATAKANA LETTER N |
ガ → ga ; # KATAKANA LETTER GA |
ギョウ → gyō ; # KATAKANA LETTER GI + SMALL YO + U |
@@ -309,3 +387,7 @@ $wordBoundary = [^[:L:][:M:][:N:]] ; |
ぽう → pō ; # HIRAGANA LETTER PO + U |
ぽ → po ; # HIRAGANA LETTER PO |
ゔ → v ; # HIRAGANA LETTER VU |
+# |
+# |
+######################################################################## |
+ |