| Index: source/data/translit/es_FONIPA_zh.txt
|
| diff --git a/source/data/translit/es_FONIPA_zh.txt b/source/data/translit/es_FONIPA_zh.txt
|
| index cece3beef367d510e1608c93b45aaf37b9aceec9..e7798c936153d2763ab9f82f3e9b3108bbc356f0 100644
|
| --- a/source/data/translit/es_FONIPA_zh.txt
|
| +++ b/source/data/translit/es_FONIPA_zh.txt
|
| @@ -1,15 +1,16 @@
|
| -# ***************************************************************************
|
| -# *
|
| -# * Copyright (C) 2004-2015, International Business Machines
|
| -# * Corporation; Unicode, Inc.; and others. All Rights Reserved.
|
| -# *
|
| -# ***************************************************************************
|
| +# © 2016 and later: Unicode, Inc. and others.
|
| +# License & terms of use: http://www.unicode.org/copyright.html#License
|
| +#
|
| # File: es_FONIPA_zh.txt
|
| -# Generated from CLDR
|
| +# Generated from CLDR
|
| #
|
| +
|
| +# Tranforms Spanish to Mandarin Chinese. The input Spanish string must be in
|
| +# phonemic IPA transcription (es_FONIPA); the output is in Simplified Chinese.
|
| $word_boundary = [-\ $];
|
| $vowel = [aeijouw]; # Vowels and glides
|
| $not_vowel = [^$vowel];
|
| +# First pass: Collapse phonetic distinctions not preserved in Mandarin.
|
| ð → | d;
|
| ɣ → | g;
|
| ŋ → | n;
|
| @@ -35,13 +36,20 @@ s[θs] → s; # GB/T 17693.5-2009, 5.3.4
|
| [^ʧ] { jo → io; # GB/T 17693.5-2009 表 1, 注 7
|
| ::Null;
|
| j } an $not_vowel → i ; # GB/T 17693.5-2009 表 1, 注 8
|
| +# GB/T 17693.5-2009 表 1, 注 8 also says that <uai> should be treated as if
|
| +# it was <u> plus <ai>. This is not borne out by the observed data, which
|
| +# suggests that <ua> plus <i> is the more appropriate choice in some
|
| +# situations.
|
| [g.$] { wai\u032F → wai ;
|
| wai\u032F → uai\u032F ;
|
| [g.$] { wau\u032F → wau ;
|
| wau\u032F → uau\u032F ;
|
| jau\u032F → iau\u032F ;
|
| +# Even though "ao" is not a diphthong in Spanish, Mandarin treats it as one.
|
| [^jw] { ao } [^n] → au\u032F ;
|
| [^jw] { ao } n $vowel → au\u032F ;
|
| +# Main pass: Phoneme to Hanzi conversion.
|
| +# This generally follows GB/T 17693.5-2009 表 1, unless otherwise noted.
|
| ::Null;
|
| '.' → ;
|
| ai\u032F → 艾 ;
|
| @@ -145,6 +153,11 @@ fwen } $not_vowel → 丰 ;
|
| fwe → 富埃 ;
|
| fwi → 富伊 ;
|
| fwo → 福 ;
|
| +# The choice of 弗 vs. 夫 sounds simple according to the GB/T standard, but the
|
| +# data suggest otherwise. Ideally, 弗 should occur at the beginning of a
|
| +# morpheme (e.g. in "villafranca" 比利亚弗兰卡) and 夫 everywhere else. Since
|
| +# we don't have morpheme boundaries, we'll fudge it by writing 夫 at the end of
|
| +# a word and 弗 everywhere else.
|
| f } $word_boundary → 夫 ;
|
| f → 弗 ;
|
| gai\u032F → 盖 ;
|
| @@ -410,6 +423,9 @@ tje → 铁 ;
|
| tju → 蒂乌 ;
|
| ton } $not_vowel → 通 ;
|
| to → 托 ;
|
| +# The rules for /ts/ (tz in the orthography) are nonstandard and derived
|
| +# entirely from the observed data. They apply mostly to native toponyms
|
| +# in Mexico.
|
| tsa → 察 ;
|
| tsen } $not_vowel → 岑 ;
|
| tse → 采 ;
|
| @@ -487,12 +503,26 @@ xwe → 胡埃 ;
|
| xwi → 惠 ;
|
| xwo → 霍 ;
|
| x → 赫 ;
|
| +# 尔 simplification pass. The idea is to drop most occurences of 尔
|
| +# corresponding to <r> (not to <l> or <ll>) from a word if there is another /l/
|
| +# sound nearby. There is a vague pattern like this in the data, but the details
|
| +# remain to be determined. At the moment, this does nothing, it just puts 尔 in
|
| +# for every <r> in a syllable coda.
|
| ::Null;
|
| $r = [R利拉];
|
| +#
|
| +#
|
| +# R } . $r → ;
|
| +# R } .. $r → ;
|
| +# R } ... $r → ;
|
| +# R } .... $r → ;
|
| R → 尔 ;
|
| +# Dong-nan-xi-hai pass. Per GB/T 17693.5-2009 表 1, 注 4, replace confusing
|
| +# characters at the beginning and end of a word.
|
| ::Null;
|
| $word_boundary { 东 → 栋 ;
|
| $word_boundary { 南 → 楠 ;
|
| $word_boundary { 西 → 锡 ;
|
| 海 } $word_boundary → 亥 ;
|
| ::NFC;
|
| +
|
|
|