X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/51004dcb01e06fef634b61be77ed73dd61cb6db9..0f5d89e82340278ed3d7d50029f37cab2c41a57e:/icuSources/data/translit/es_FONIPA_zh.txt diff --git a/icuSources/data/translit/es_FONIPA_zh.txt b/icuSources/data/translit/es_FONIPA_zh.txt index 67dc66d4..e7798c93 100644 --- a/icuSources/data/translit/es_FONIPA_zh.txt +++ b/icuSources/data/translit/es_FONIPA_zh.txt @@ -1,15 +1,16 @@ -# *************************************************************************** -# * -# * Copyright (C) 2004-2013, International Business Machines -# * Corporation; Unicode, Inc.; and others. All Rights Reserved. -# * -# *************************************************************************** +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# # File: es_FONIPA_zh.txt -# Generated from CLDR +# Generated from CLDR # + +# Tranforms Spanish to Mandarin Chinese. The input Spanish string must be in +# phonemic IPA transcription (es_FONIPA); the output is in Simplified Chinese. $word_boundary = [-\ $]; $vowel = [aeijouw]; # Vowels and glides $not_vowel = [^$vowel]; +# First pass: Collapse phonetic distinctions not preserved in Mandarin. ð → | d; ɣ → | g; ŋ → | n; @@ -35,13 +36,20 @@ s[θs] → s; # GB/T 17693.5-2009, 5.3.4 [^ʧ] { jo → io; # GB/T 17693.5-2009 表 1, 注 7 ::Null; j } an $not_vowel → i ; # GB/T 17693.5-2009 表 1, 注 8 +# GB/T 17693.5-2009 表 1, 注 8 also says that should be treated as if +# it was plus . This is not borne out by the observed data, which +# suggests that plus is the more appropriate choice in some +# situations. [g.$] { wai\u032F → wai ; wai\u032F → uai\u032F ; [g.$] { wau\u032F → wau ; wau\u032F → uau\u032F ; jau\u032F → iau\u032F ; +# Even though "ao" is not a diphthong in Spanish, Mandarin treats it as one. [^jw] { ao } [^n] → au\u032F ; [^jw] { ao } n $vowel → au\u032F ; +# Main pass: Phoneme to Hanzi conversion. +# This generally follows GB/T 17693.5-2009 表 1, unless otherwise noted. ::Null; '.' → ; ai\u032F → 艾 ; @@ -145,6 +153,11 @@ fwen } $not_vowel → 丰 ; fwe → 富埃 ; fwi → 富伊 ; fwo → 福 ; +# The choice of 弗 vs. 夫 sounds simple according to the GB/T standard, but the +# data suggest otherwise. Ideally, 弗 should occur at the beginning of a +# morpheme (e.g. in "villafranca" 比利亚弗兰卡) and 夫 everywhere else. Since +# we don't have morpheme boundaries, we'll fudge it by writing 夫 at the end of +# a word and 弗 everywhere else. f } $word_boundary → 夫 ; f → 弗 ; gai\u032F → 盖 ; @@ -410,6 +423,9 @@ tje → 铁 ; tju → 蒂乌 ; ton } $not_vowel → 通 ; to → 托 ; +# The rules for /ts/ (tz in the orthography) are nonstandard and derived +# entirely from the observed data. They apply mostly to native toponyms +# in Mexico. tsa → 察 ; tsen } $not_vowel → 岑 ; tse → 采 ; @@ -487,12 +503,26 @@ xwe → 胡埃 ; xwi → 惠 ; xwo → 霍 ; x → 赫 ; +# 尔 simplification pass. The idea is to drop most occurences of 尔 +# corresponding to (not to or ) from a word if there is another /l/ +# sound nearby. There is a vague pattern like this in the data, but the details +# remain to be determined. At the moment, this does nothing, it just puts 尔 in +# for every in a syllable coda. ::Null; $r = [R利拉]; +# +# +# R } . $r → ; +# R } .. $r → ; +# R } ... $r → ; +# R } .... $r → ; R → 尔 ; +# Dong-nan-xi-hai pass. Per GB/T 17693.5-2009 表 1, 注 4, replace confusing +# characters at the beginning and end of a word. ::Null; $word_boundary { 东 → 栋 ; $word_boundary { 南 → 楠 ; $word_boundary { 西 → 锡 ; 海 } $word_boundary → 亥 ; ::NFC; +