Index: source/data/translit/Latin_NumericPinyin.txt |
diff --git a/source/data/translit/Latin_NumericPinyin.txt b/source/data/translit/Latin_NumericPinyin.txt |
index e907d55c6b8e628a0033e9c19e7b16a2b0e4a7f4..316e0758ab1ea58194bcc3e2c16123e435da2ea4 100644 |
--- a/source/data/translit/Latin_NumericPinyin.txt |
+++ b/source/data/translit/Latin_NumericPinyin.txt |
@@ -1,17 +1,29 @@ |
-# *************************************************************************** |
-# * |
-# * Copyright (C) 2004-2015, International Business Machines |
-# * Corporation; Unicode, Inc.; and others. All Rights Reserved. |
-# * |
-# *************************************************************************** |
+# © 2016 and later: Unicode, Inc. and others. |
+# License & terms of use: http://www.unicode.org/copyright.html#License |
+# |
# File: Latin_NumericPinyin.txt |
-# Generated from CLDR |
+# Generated from CLDR |
# |
+ |
+# According to the pinyin definitions I've been able to find: |
+# 'a', 'e' are the preferred bases |
+# otherwise 'o' |
+# otherwise last vowel |
+# The trailing form of syllables are the following: |
+# "a", "ai", "ao", "an", "ang", |
+# "o", "ou", "ong", |
+# "e", "ei", "er", "en", "eng", |
+# "i", "ia", "iao", "ie", "iu", "ian", "in", "iang", "ing", "iong", |
+# "u", "ua", "uo", "uai", "ui", "uan", "un", "uang", "ueng", |
+# "ü", "üe", "üan", "ün" |
+# so the letters the tone will 'hop' are: |
::NFD (NFC); |
$tone = [\u0304\u0301\u030C\u0300\u0306] ; |
+# Move the tone to the end of a syllable, and convert to number |
e {($tone) r} → r &Pinyin-NumericPinyin($1); |
($tone) ( [i o n u {o n} {n g}]) → $2 &Pinyin-NumericPinyin($1); |
($tone) → &Pinyin-NumericPinyin($1); |
+# The following backs up until it finds the right vowel, then deposits the tone |
$vowel = [aAeEiIoOuU {u\u0308} {U\u0308} vV]; |
$consonant = [[a-z A-Z] - [$vowel]]; |
$digit = [1-5]; |
@@ -20,3 +32,4 @@ $1 &NumericPinyin-Pinyin($3) $2 ← ([oO]) ([$vowel-[aeAE]]* $consonant*) ($digi |
$1 &NumericPinyin-Pinyin($3) $2 ← ($vowel) ($consonant*) ($digit); |
&NumericPinyin-Pinyin($1) ← [:letter:] {($digit)}; |
::NFC (NFD); |
+ |