icuSources/data/translit/es_FONIPA_zh.txt

   1 # ***************************************************************************
   2 # *
   3 # *  Copyright (C) 2004-2016, International Business Machines
   4 # *  Corporation; Unicode, Inc.; and others.  All Rights Reserved.
   5 # *
   6 # ***************************************************************************
   7 # File: es_FONIPA_zh.txt
   8 # Generated from CLDR
   9 #
  10
  11 # Tranforms Spanish to Mandarin Chinese. The input Spanish string must be in
  12 # phonemic IPA transcription (es_FONIPA); the output is in Simplified Chinese.
  13 $word_boundary = [-\ $];
  14 $vowel = [aeijouw];       # Vowels and glides
  15 $not_vowel = [^$vowel];
  16 # First pass: Collapse phonetic distinctions not preserved in Mandarin.
  17 ð → | d;
  18 ɣ → | g;
  19 ŋ → | n;
  20 θ → | s;
  21 ɾ → | r;
  22 ff → f ;
  23 kk → k ;
  24 mm → m ;
  25 nn → n ;
  26 pp → p ;
  27 tt → t ;
  28 tʧ → ʧ ;
  29 aa → a ;
  30 oi\u032F → oi ;
  31 oo → o ;
  32 uu → u ;
  33 [^dgktx] { ei\u032F → e ;
  34 [^-\ .$] { eu\u032F → eu ;
  35 [^-\ .$] { ou\u032F → o;
  36 [^j]     { ui → wi ;
  37 [^$word_boundary] { m } [bp] → n;  # GB/T 17693.5-2009, 5.3.2
  38 s[θs] → s;               # GB/T 17693.5-2009, 5.3.4
  39 [^ʧ] { jo → io;          # GB/T 17693.5-2009 表 1, 注 7
  40 ::Null;
  41 j } an $not_vowel → i ;  # GB/T 17693.5-2009 表 1, 注 8
  42 # GB/T 17693.5-2009 表 1, 注 8 also says that <uai> should be treated as if
  43 # it was <u> plus <ai>.  This is not borne out by the observed data, which
  44 # suggests that <ua> plus <i> is the more appropriate choice in some
  45 # situations.
  46 [g.$] { wai\u032F → wai ;
  47 wai\u032F → uai\u032F ;
  48 [g.$] { wau\u032F → wau ;
  49 wau\u032F → uau\u032F ;
  50 jau\u032F → iau\u032F ;
  51 # Even though "ao" is not a diphthong in Spanish, Mandarin treats it as one.
  52 [^jw] { ao } [^n]     → au\u032F ;
  53 [^jw] { ao } n $vowel → au\u032F ;
  54 # Main pass: Phoneme to Hanzi conversion.
  55 # This generally follows GB/T 17693.5-2009 表 1, unless otherwise noted.
  56 ::Null;
  57 '.' → ;
  58 ai\u032F → 艾 ;
  59 an } $not_vowel → 安 ;
  60 au\u032F → 奥 ;
  61 a → 阿 ;
  62 bai\u032F → 拜 ;
  63 ban } $not_vowel → 班 ;
  64 bau\u032F → 包 ;
  65 ba → 巴 ;
  66 ben } $not_vowel → 本 ;
  67 be → 贝 ;
  68 bin } $not_vowel → 宾 ;
  69 bi → 比 ;
  70 bja → 比亚 ;
  71 bjen } $not_vowel → 比恩 ;
  72 bje → 别 ;
  73 bju → 比乌 ;
  74 bon } $not_vowel → 邦 ;
  75 bo → 博 ;
  76 bun } $not_vowel → 本 ;
  77 bu → 布 ;
  78 bwan } $not_vowel → 布安 ;
  79 bwa → 布阿 ;
  80 bwen } $not_vowel → 布恩 ;  # Should be be 本, per GB/T 17693.5-2009 表 1.
  81 bwe → 布埃 ;
  82 bwin } $not_vowel → 布因 ;  # Nonstandard, but fits observed data.
  83 bwi → 布伊 ;
  84 bwo → 博 ;
  85 b → 布 ;
  86 βai\u032F → 瓦伊 ;
  87 βan } $not_vowel → 万 ;
  88 βau\u032F → 沃 ;
  89 βa → 瓦 ;
  90 βen } $not_vowel → 文 ;
  91 βe → 韦 ;
  92 βin } $not_vowel → 温 ;
  93 βi → 维 ;
  94 βja → 维亚 ;
  95 βjen } $not_vowel → 维恩 ;
  96 βje → 维耶 ;
  97 βju → 维乌 ;
  98 βon } $not_vowel → 翁 ;
  99 βo → 沃 ;
 100 βun } $not_vowel → 文 ;
 101 βu → 武 ;
 102 βwan } $not_vowel → 万 ;
 103 βwa → 瓦 ;
 104 βwen } $not_vowel → 文 ;
 105 βwe → 武埃 ;
 106 βwi → 维 ;
 107 βwo → 沃 ;
 108 β → 夫 ;
 109 dai\u032F → 代 ;
 110 dan } $not_vowel → 丹 ;
 111 dau\u032F → 道 ;
 112 da → 达 ;
 113 dei\u032F → 代 ;
 114 den } $not_vowel → 登 ;
 115 de → 德 ;
 116 din } $not_vowel → 丁 ;
 117 di → 迪 ;
 118 dja → 迪亚 ;
 119 djen } $not_vowel → 迪恩 ;
 120 dje → 迭 ;
 121 dju → 迪乌 ;
 122 don } $not_vowel → 东 ;
 123 do → 多 ;
 124 dun } $not_vowel → 敦 ;
 125 du → 杜 ;
 126 dwan } $not_vowel → 端 ;
 127 dwa → 杜阿 ;
 128 dwen } $not_vowel → 敦 ;
 129 dwe → 杜埃 ;
 130 dwi → 杜伊 ;
 131 dwo → 多 ;
 132 d } $word_boundary → ;
 133 d → 德 ;
 134 ei\u032F → 埃 ;
 135 en } $not_vowel → 恩 ;
 136 eu\u032F → 欧 ;
 137 e → 埃 ;
 138 fai\u032F → 法伊 ;
 139 fan } $not_vowel → 凡 ;
 140 fau\u032F → 福 ;
 141 fa → 法 ;
 142 fe → 费 ;
 143 fin } $not_vowel → 芬 ;
 144 fi → 菲 ;
 145 fja → 菲亚 ;
 146 fjen } $not_vowel → 菲恩 ;
 147 fje → 菲耶 ;
 148 fju → 菲乌 ;
 149 fon } $not_vowel → 丰 ;
 150 fo → 福 ;
 151 fun } $not_vowel → 丰 ;
 152 fu → 富 ;
 153 fwan } $not_vowel → 富安 ;
 154 fwa → 富阿 ;
 155 fwen } $not_vowel → 丰 ;
 156 fwe → 富埃 ;
 157 fwi → 富伊 ;
 158 fwo → 福 ;
 159 # The choice of 弗 vs. 夫 sounds simple according to the GB/T standard, but the
 160 # data suggest otherwise.  Ideally, 弗 should occur at the beginning of a
 161 # morpheme (e.g. in "villafranca" 比利亚弗兰卡) and 夫 everywhere else.  Since
 162 # we don't have morpheme boundaries, we'll fudge it by writing 夫 at the end of
 163 # a word and 弗 everywhere else.
 164 f } $word_boundary → 夫 ;
 165 f → 弗 ;
 166 gai\u032F → 盖 ;
 167 gan } $not_vowel → 甘 ;
 168 gau\u032F → 高 ;
 169 ga → 加 ;
 170 gei\u032F → 盖 ;
 171 gen } $not_vowel → 根 ;
 172 ge → 格 ;
 173 gin } $not_vowel → 金 ;
 174 gi → 吉 ;
 175 gja → 吉亚 ;
 176 gjen } $not_vowel → 吉恩 ;
 177 gje → 吉耶 ;
 178 gju → 吉乌 ;
 179 gon } $not_vowel → 贡 ;
 180 go → 戈 ;
 181 gun } $not_vowel → 贡 ;
 182 gu → 古 ;
 183 gwan } [$] → 古安 ;        # Nonstandard, but fits observed data.
 184 gwan } $not_vowel → 关 ;
 185 gwa → 瓜 ;
 186 gwen } $not_vowel → 古恩 ;
 187 gwe → 圭 ;
 188 gwi → 圭 ;
 189 gwo → 果 ;
 190 g → 格 ;
 191 in } $not_vowel → 因 ;
 192 i → 伊 ;
 193 ʝai\u032F → 亚伊 ;
 194 ʝan } $not_vowel → 扬 ;
 195 ʝau\u032F → 尧 ;
 196 ʝa → 亚 ;
 197 ʝen } $not_vowel → 延 ;
 198 ʝe → 耶 ;
 199 ʝin } $not_vowel → 因 ;
 200 ʝi → 伊 ;
 201 ʝon } $not_vowel → 永 ;
 202 ʝo → 约 ;
 203 ʝun } $not_vowel → 云 ;
 204 ʝu → 尤 ;
 205 ʝwan } $not_vowel → 元 ;
 206 ʝwa → 尤阿 ;
 207 ʝwen } $not_vowel → 云 ;
 208 ʝwe → 尤埃 ;
 209 ʝwi → 尤伊 ;
 210 ʝwo → 约 ;
 211 ʝ → 伊 ;
 212 kai\u032F → 凯 ;
 213 kan } $not_vowel → 坎 ;
 214 kau\u032F → 考 ;
 215 ka → 卡 ;
 216 kei\u032F → 凯 ;
 217 ken } $not_vowel → 肯 ;
 218 ke → 克 ;
 219 kin } $not_vowel → 金 ;
 220 ki → 基 ;
 221 kja → 基亚 ;
 222 kjen } $not_vowel → 基恩 ;
 223 kje → 基耶 ;
 224 kju → 基乌 ;
 225 kon } $not_vowel → 孔 ;
 226 ko → 科 ;
 227 kun } $not_vowel → 昆 ;
 228 ku → 库 ;
 229 kwan } $not_vowel → 宽 ;
 230 kwa → 夸 ;
 231 kwen } $not_vowel → 昆 ;
 232 kwe → 库埃 ;
 233 kwin } $not_vowel → 昆 ;
 234 kwi → 奎 ;
 235 kwo → 阔 ;
 236 k → 克 ;
 237 lae } [^n] → 莱 ;
 238 lai\u032F → 莱 ;
 239 lan } $not_vowel → 兰 ;
 240 lau\u032F → 劳 ;
 241 la → 拉 ;
 242 len } $not_vowel → 伦 ;
 243 le → 莱 ;
 244 lin } $not_vowel → 林 ;
 245 li → 利 ;
 246 lja → 利亚 ;
 247 ljen } $not_vowel → 连 ;
 248 lje → 列 ;
 249 lju → 柳 ;
 250 lon } $not_vowel → 隆 ;
 251 lo → 洛 ;
 252 lun } $not_vowel → 伦 ;
 253 lu → 卢 ;
 254 lwan } $not_vowel → 卢安 ;
 255 lwa → 卢阿 ;
 256 lwen } $not_vowel → 伦 ;
 257 lwe → 卢埃 ;
 258 lwi → 卢伊 ;
 259 lwo → 洛 ;
 260 l → 尔 ;
 261 ʎan } $not_vowel → 良 ;
 262 ʎau\u032F → 廖 ;
 263 ʎa → 利亚 ;
 264 ʎen } $not_vowel → 连 ;
 265 ʎe → 列 ;
 266 ʎin } $not_vowel → 林 ;
 267 ʎi → 利 ;
 268 ʎon } $not_vowel → 利翁 ;
 269 ʎo → 略 ;
 270 ʎu → 柳 ;
 271 ʎwan } $not_vowel → 柳安 ;
 272 ʎwa → 柳阿 ;
 273 ʎwen } $not_vowel → 柳恩 ;
 274 ʎwe → 柳埃 ;
 275 ʎwi → 柳伊 ;
 276 ʎwo → 略 ;
 277 ʎ → 尔 ;
 278 mai\u032F → 迈 ;
 279 man } $not_vowel → 曼 ;
 280 martin → 马丁 ;
 281 mau\u032F → 毛 ;
 282 ma → 马 ;
 283 men } $not_vowel → 门 ;
 284 me → 梅 ;
 285 min } $not_vowel → 明 ;
 286 mi → 米 ;
 287 mja → 米亚 ;
 288 mjen } $not_vowel → 缅 ;
 289 mje → 米耶 ;
 290 mju → 缪 ;
 291 mon } $not_vowel → 蒙 ;
 292 mo → 莫 ;
 293 mun } $not_vowel → 蒙 ;
 294 mu → 穆 ;
 295 mwan } $not_vowel → 穆安 ;
 296 mwa → 穆阿 ;
 297 mwen } $not_vowel → 门 ;
 298 mwe → 穆埃 ;
 299 mwin } $not_vowel → 穆因 ;  # Nonstandard, but fits observed data.
 300 mwi → 穆伊 ;
 301 mwo → 莫 ;
 302 m → 姆 ;
 303 nai\u032F → 奈 ;
 304 nan } $not_vowel → 南 ;
 305 nau\u032F → 瑙 ;
 306 na → 纳 ;
 307 nen } $not_vowel → 嫩 ;
 308 ne → 内 ;
 309 nin } $not_vowel → 宁 ;
 310 ni → 尼 ;
 311 nja → 尼亚 ;
 312 njen } $not_vowel → 年 ;
 313 nje → 涅 ;
 314 nju → 纽 ;
 315 non } $not_vowel → 农 ;
 316 no → 诺 ;
 317 nun } $not_vowel → 嫩 ;
 318 nu → 努 ;
 319 nwan } $not_vowel → 努安 ;
 320 nwa → 努阿 ;
 321 nwen } $not_vowel → 农 ;
 322 nwe → 努埃 ;
 323 nwi → 努伊 ;
 324 nwo → 诺 ;
 325 n → 恩 ;
 326 ɲan } $not_vowel → 尼扬 ;
 327 ɲau\u032F → 尼奥 ;
 328 ɲa → 尼亚 ;
 329 ɲen } $not_vowel → 年 ;
 330 ɲe → 涅 ;
 331 ɲin } $not_vowel → 宁 ;
 332 ɲi → 尼 ;
 333 ɲon } $not_vowel → 尼翁 ;
 334 ɲo → 尼奥 ;
 335 ɲu → 纽 ;
 336 ɲwan } $not_vowel → 纽安 ;
 337 ɲwa → 纽阿 ;
 338 ɲwen } $not_vowel → 纽恩 ;
 339 ɲwe → 纽埃 ;
 340 ɲwi → 纽伊 ;
 341 ɲwo → 尼奥 ;
 342 on } $not_vowel → 翁 ;
 343 ou\u032F → 欧 ;
 344 o → 奥 ;
 345 pai\u032F → 派 ;
 346 pan } $not_vowel → 潘 ;
 347 pau\u032F → 保 ;
 348 pa → 帕 ;
 349 pen } $not_vowel → 彭 ;
 350 pe → 佩 ;
 351 pin } $not_vowel → 平 ;
 352 pi → 皮 ;
 353 pja → 皮亚 ;
 354 pjen } $not_vowel → 皮恩 ;
 355 pje → 彼 ;
 356 pju → 皮乌 ;
 357 pon } $not_vowel → 蓬 ;
 358 po → 波 ;
 359 pun } $not_vowel → 蓬 ;
 360 pu → 普 ;
 361 pwan } $not_vowel → 普安 ;
 362 pwa → 普阿 ;
 363 pwen } $not_vowel → 蓬 ;
 364 pwe → 普埃 ;
 365 pwi → 普伊 ;
 366 pwo → 波 ;
 367 p → 普 ;
 368 rai\u032F → 赖 ;
 369 ran } $not_vowel → 兰 ;
 370 rau\u032F → 劳 ;
 371 ra → 拉 ;
 372 ren } $not_vowel → 伦 ;
 373 re → 雷 ;
 374 rin } $not_vowel → 林 ;
 375 ri → 里 ;
 376 rja → 里亚 ;
 377 rjen } $not_vowel → 连 ;
 378 rje → 列 ;
 379 rju → 留 ;
 380 ron } $not_vowel → 龙 ;
 381 ro → 罗 ;
 382 run } $not_vowel → 伦 ;
 383 ru → 鲁 ;
 384 rwan } $not_vowel → 鲁安 ;
 385 rwa → 鲁阿 ;
 386 rwen } $not_vowel → 伦 ;
 387 rwe → 鲁埃 ;
 388 rwi → 鲁伊 ;
 389 rwo → 罗 ;
 390 r → R ;
 391 sai\u032F → 赛 ;
 392 san } $not_vowel → 桑 ;
 393 sau\u032F → 绍 ;
 394 sa → 萨 ;
 395 sen } $not_vowel → 森 ;
 396 se → 塞 ;
 397 sin } $not_vowel → 辛 ;
 398 si → 西 ;
 399 sja → 西亚 ;
 400 sjen } $not_vowel → 先 ;
 401 sje → 谢 ;
 402 sju → 休 ;
 403 son } $not_vowel → 松 ;
 404 so → 索 ;
 405 sun } $not_vowel → 孙 ;
 406 su → 苏 ;
 407 swan } $not_vowel → 苏安 ;
 408 swa → 苏阿 ;
 409 swen } $not_vowel → 孙 ;
 410 swe → 苏埃 ;
 411 swi → 绥 ;
 412 swo → 索 ;
 413 s → 斯 ;
 414 tai\u032F → 泰 ;
 415 tan } $not_vowel → 坦 ;
 416 tau\u032F → 陶 ;
 417 ta → 塔 ;
 418 tei\u032F → 泰 ;
 419 ten } $not_vowel → 滕 ;
 420 te → 特 ;
 421 tin } $not_vowel → 廷 ;
 422 ti → 蒂 ;
 423 tja → 蒂亚 ;
 424 tjen } $not_vowel → 蒂恩 ;
 425 tje → 铁 ;
 426 tju → 蒂乌 ;
 427 ton } $not_vowel → 通 ;
 428 to → 托 ;
 429 # The rules for /ts/ (tz in the orthography) are nonstandard and derived
 430 # entirely from the observed data.  They apply mostly to native toponyms
 431 # in Mexico.
 432 tsa → 察 ;
 433 tsen } $not_vowel → 岑 ;
 434 tse → 采 ;
 435 tsin } $not_vowel → 钦 ;
 436 tsi → 齐 ;
 437 tso → 措 ;
 438 tsun } $not_vowel → 聪 ;
 439 tsu → 楚 ;
 440 ts → 茨 ;
 441 tun } $not_vowel → 通 ;
 442 tu → 图 ;
 443 twan } $not_vowel → 图安    ;
 444 twa → 图阿 ;
 445 twen } $not_vowel → 通 ;
 446 twe → 图埃 ;
 447 twi → 图伊 ;
 448 two → 托 ;
 449 t → 特 ;
 450 ʧai\u032F → 柴 ;
 451 ʧan } $not_vowel → 钱 ;
 452 ʧau\u032F → 乔 ;
 453 ʧa → 查 ;
 454 ʧen } $not_vowel → 琴 ;
 455 ʧe → 切 ;
 456 ʧin } $not_vowel → 钦 ;
 457 ʧi → 奇 ;
 458 ʧjan } $not_vowel → 钱 ;
 459 ʧja → 恰 ;
 460 ʧjen } $not_vowel → 钱 ;
 461 ʧje → 切 ;
 462 ʧjon } $not_vowel → 琼 ;
 463 ʧju → 丘 ;
 464 ʧon } $not_vowel → 琼 ;
 465 ʧo → 乔 ;
 466 ʧun } $not_vowel → 琼 ;  # Should be 春, per GB/T 17693.5-2009 表 1.
 467 ʧu → 丘 ;
 468 ʧwan } $not_vowel → 丘安 ;
 469 ʧwa → 丘阿 ;
 470 ʧwen } $not_vowel → 琼 ;
 471 ʧwe → 丘埃 ;
 472 ʧwi → 崔 ;
 473 ʧwo → 乔 ;
 474 ʧ → 奇 ;
 475 un } $not_vowel → 温 ;
 476 u → 乌 ;
 477 wan } $not_vowel → 万 ;
 478 wa → 瓦 ;
 479 wen } $not_vowel → 温 ;
 480 we → 韦 ;
 481 win } $not_vowel → 温 ;
 482 wi → 维 ;
 483 won } $not_vowel → 翁 ;  # Unseen.
 484 wo → 沃 ;
 485 xai\u032F → 海 ;
 486 xan } $not_vowel → 汉 ;
 487 xau\u032F → 豪 ;
 488 xa → 哈 ;
 489 xei\u032F → 黑 ;
 490 xen } $not_vowel → 亨 ;
 491 xe → 赫 ;
 492 xin } $not_vowel → 欣 ;
 493 xi → 希 ;
 494 xja → 希亚 ;
 495 xjen } $not_vowel → 希恩 ;
 496 xje → 希耶 ;
 497 xju → 休 ;
 498 xon } $not_vowel → 洪 ;
 499 xo → 霍 ;
 500 xun } $not_vowel → 洪 ;
 501 xu → 胡 ;
 502 xwan } $not_vowel → 胡安 ;
 503 xwa → 华 ;
 504 xwen } $not_vowel → 洪 ;
 505 xwe → 胡埃 ;
 506 xwi → 惠 ;
 507 xwo → 霍 ;
 508 x → 赫 ;
 509 # 尔 simplification pass.  The idea is to drop most occurences of 尔
 510 # corresponding to <r> (not to <l> or <ll>) from a word if there is another /l/
 511 # sound nearby.  There is a vague pattern like this in the data, but the details
 512 # remain to be determined.  At the moment, this does nothing, it just puts 尔 in
 513 # for every <r> in a syllable coda.
 514 ::Null;
 515 $r = [R利拉];
 516 #
 517 #
 518 # R } . $r → ;
 519 # R } .. $r → ;
 520 # R } ... $r → ;
 521 # R } .... $r → ;
 522 R → 尔 ;
 523 # Dong-nan-xi-hai pass.  Per GB/T 17693.5-2009 表 1, 注 4, replace confusing
 524 # characters at the beginning and end of a word.
 525 ::Null;
 526 $word_boundary { 东 → 栋 ;
 527 $word_boundary { 南 → 楠 ;
 528 $word_boundary { 西 → 锡 ;
 529 海 } $word_boundary → 亥 ;
 530 ::NFC;
 531