1 # © 2016 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html#License
4 # File: es_FONIPA_zh.txt
8 # Tranforms Spanish to Mandarin Chinese. The input Spanish string must be in
9 # phonemic IPA transcription (es_FONIPA); the output is in Simplified Chinese.
10 $word_boundary = [-\ $];
11 $vowel = [aeijouw]; # Vowels and glides
12 $not_vowel = [^$vowel];
13 # First pass: Collapse phonetic distinctions not preserved in Mandarin.
30 [^dgktx] { ei\u032F → e ;
31 [^-\ .$] { eu\u032F → eu ;
32 [^-\ .$] { ou\u032F → o;
34 [^$word_boundary] { m } [bp] → n; # GB/T 17693.5-2009, 5.3.2
35 s[θs] → s; # GB/T 17693.5-2009, 5.3.4
36 [^ʧ] { jo → io; # GB/T 17693.5-2009 表 1, 注 7
38 j } an $not_vowel → i ; # GB/T 17693.5-2009 表 1, 注 8
39 # GB/T 17693.5-2009 表 1, 注 8 also says that <uai> should be treated as if
40 # it was <u> plus <ai>. This is not borne out by the observed data, which
41 # suggests that <ua> plus <i> is the more appropriate choice in some
43 [g.$] { wai\u032F → wai ;
44 wai\u032F → uai\u032F ;
45 [g.$] { wau\u032F → wau ;
46 wau\u032F → uau\u032F ;
47 jau\u032F → iau\u032F ;
48 # Even though "ao" is not a diphthong in Spanish, Mandarin treats it as one.
49 [^jw] { ao } [^n] → au\u032F ;
50 [^jw] { ao } n $vowel → au\u032F ;
51 # Main pass: Phoneme to Hanzi conversion.
52 # This generally follows GB/T 17693.5-2009 表 1, unless otherwise noted.
60 ban } $not_vowel → 班 ;
63 ben } $not_vowel → 本 ;
65 bin } $not_vowel → 宾 ;
68 bjen } $not_vowel → 比恩 ;
71 bon } $not_vowel → 邦 ;
73 bun } $not_vowel → 本 ;
75 bwan } $not_vowel → 布安 ;
77 bwen } $not_vowel → 布恩 ; # Should be be 本, per GB/T 17693.5-2009 表 1.
79 bwin } $not_vowel → 布因 ; # Nonstandard, but fits observed data.
84 βan } $not_vowel → 万 ;
87 βen } $not_vowel → 文 ;
89 βin } $not_vowel → 温 ;
92 βjen } $not_vowel → 维恩 ;
95 βon } $not_vowel → 翁 ;
97 βun } $not_vowel → 文 ;
99 βwan } $not_vowel → 万 ;
101 βwen } $not_vowel → 文 ;
107 dan } $not_vowel → 丹 ;
111 den } $not_vowel → 登 ;
113 din } $not_vowel → 丁 ;
116 djen } $not_vowel → 迪恩 ;
119 don } $not_vowel → 东 ;
121 dun } $not_vowel → 敦 ;
123 dwan } $not_vowel → 端 ;
125 dwen } $not_vowel → 敦 ;
129 d } $word_boundary → ;
132 en } $not_vowel → 恩 ;
136 fan } $not_vowel → 凡 ;
140 fin } $not_vowel → 芬 ;
143 fjen } $not_vowel → 菲恩 ;
146 fon } $not_vowel → 丰 ;
148 fun } $not_vowel → 丰 ;
150 fwan } $not_vowel → 富安 ;
152 fwen } $not_vowel → 丰 ;
156 # The choice of 弗 vs. 夫 sounds simple according to the GB/T standard, but the
157 # data suggest otherwise. Ideally, 弗 should occur at the beginning of a
158 # morpheme (e.g. in "villafranca" 比利亚弗兰卡) and 夫 everywhere else. Since
159 # we don't have morpheme boundaries, we'll fudge it by writing 夫 at the end of
160 # a word and 弗 everywhere else.
161 f } $word_boundary → 夫 ;
164 gan } $not_vowel → 甘 ;
168 gen } $not_vowel → 根 ;
170 gin } $not_vowel → 金 ;
173 gjen } $not_vowel → 吉恩 ;
176 gon } $not_vowel → 贡 ;
178 gun } $not_vowel → 贡 ;
180 gwan } [$] → 古安 ; # Nonstandard, but fits observed data.
181 gwan } $not_vowel → 关 ;
183 gwen } $not_vowel → 古恩 ;
188 in } $not_vowel → 因 ;
191 ʝan } $not_vowel → 扬 ;
194 ʝen } $not_vowel → 延 ;
196 ʝin } $not_vowel → 因 ;
198 ʝon } $not_vowel → 永 ;
200 ʝun } $not_vowel → 云 ;
202 ʝwan } $not_vowel → 元 ;
204 ʝwen } $not_vowel → 云 ;
210 kan } $not_vowel → 坎 ;
214 ken } $not_vowel → 肯 ;
216 kin } $not_vowel → 金 ;
219 kjen } $not_vowel → 基恩 ;
222 kon } $not_vowel → 孔 ;
224 kun } $not_vowel → 昆 ;
226 kwan } $not_vowel → 宽 ;
228 kwen } $not_vowel → 昆 ;
230 kwin } $not_vowel → 昆 ;
236 lan } $not_vowel → 兰 ;
239 len } $not_vowel → 伦 ;
241 lin } $not_vowel → 林 ;
244 ljen } $not_vowel → 连 ;
247 lon } $not_vowel → 隆 ;
249 lun } $not_vowel → 伦 ;
251 lwan } $not_vowel → 卢安 ;
253 lwen } $not_vowel → 伦 ;
258 ʎan } $not_vowel → 良 ;
261 ʎen } $not_vowel → 连 ;
263 ʎin } $not_vowel → 林 ;
265 ʎon } $not_vowel → 利翁 ;
268 ʎwan } $not_vowel → 柳安 ;
270 ʎwen } $not_vowel → 柳恩 ;
276 man } $not_vowel → 曼 ;
280 men } $not_vowel → 门 ;
282 min } $not_vowel → 明 ;
285 mjen } $not_vowel → 缅 ;
288 mon } $not_vowel → 蒙 ;
290 mun } $not_vowel → 蒙 ;
292 mwan } $not_vowel → 穆安 ;
294 mwen } $not_vowel → 门 ;
296 mwin } $not_vowel → 穆因 ; # Nonstandard, but fits observed data.
301 nan } $not_vowel → 南 ;
304 nen } $not_vowel → 嫩 ;
306 nin } $not_vowel → 宁 ;
309 njen } $not_vowel → 年 ;
312 non } $not_vowel → 农 ;
314 nun } $not_vowel → 嫩 ;
316 nwan } $not_vowel → 努安 ;
318 nwen } $not_vowel → 农 ;
323 ɲan } $not_vowel → 尼扬 ;
326 ɲen } $not_vowel → 年 ;
328 ɲin } $not_vowel → 宁 ;
330 ɲon } $not_vowel → 尼翁 ;
333 ɲwan } $not_vowel → 纽安 ;
335 ɲwen } $not_vowel → 纽恩 ;
339 on } $not_vowel → 翁 ;
343 pan } $not_vowel → 潘 ;
346 pen } $not_vowel → 彭 ;
348 pin } $not_vowel → 平 ;
351 pjen } $not_vowel → 皮恩 ;
354 pon } $not_vowel → 蓬 ;
356 pun } $not_vowel → 蓬 ;
358 pwan } $not_vowel → 普安 ;
360 pwen } $not_vowel → 蓬 ;
366 ran } $not_vowel → 兰 ;
369 ren } $not_vowel → 伦 ;
371 rin } $not_vowel → 林 ;
374 rjen } $not_vowel → 连 ;
377 ron } $not_vowel → 龙 ;
379 run } $not_vowel → 伦 ;
381 rwan } $not_vowel → 鲁安 ;
383 rwen } $not_vowel → 伦 ;
389 san } $not_vowel → 桑 ;
392 sen } $not_vowel → 森 ;
394 sin } $not_vowel → 辛 ;
397 sjen } $not_vowel → 先 ;
400 son } $not_vowel → 松 ;
402 sun } $not_vowel → 孙 ;
404 swan } $not_vowel → 苏安 ;
406 swen } $not_vowel → 孙 ;
412 tan } $not_vowel → 坦 ;
416 ten } $not_vowel → 滕 ;
418 tin } $not_vowel → 廷 ;
421 tjen } $not_vowel → 蒂恩 ;
424 ton } $not_vowel → 通 ;
426 # The rules for /ts/ (tz in the orthography) are nonstandard and derived
427 # entirely from the observed data. They apply mostly to native toponyms
430 tsen } $not_vowel → 岑 ;
432 tsin } $not_vowel → 钦 ;
435 tsun } $not_vowel → 聪 ;
438 tun } $not_vowel → 通 ;
440 twan } $not_vowel → 图安 ;
442 twen } $not_vowel → 通 ;
448 ʧan } $not_vowel → 钱 ;
451 ʧen } $not_vowel → 琴 ;
453 ʧin } $not_vowel → 钦 ;
455 ʧjan } $not_vowel → 钱 ;
457 ʧjen } $not_vowel → 钱 ;
459 ʧjon } $not_vowel → 琼 ;
461 ʧon } $not_vowel → 琼 ;
463 ʧun } $not_vowel → 琼 ; # Should be 春, per GB/T 17693.5-2009 表 1.
465 ʧwan } $not_vowel → 丘安 ;
467 ʧwen } $not_vowel → 琼 ;
472 un } $not_vowel → 温 ;
474 wan } $not_vowel → 万 ;
476 wen } $not_vowel → 温 ;
478 win } $not_vowel → 温 ;
480 won } $not_vowel → 翁 ; # Unseen.
483 xan } $not_vowel → 汉 ;
487 xen } $not_vowel → 亨 ;
489 xin } $not_vowel → 欣 ;
492 xjen } $not_vowel → 希恩 ;
495 xon } $not_vowel → 洪 ;
497 xun } $not_vowel → 洪 ;
499 xwan } $not_vowel → 胡安 ;
501 xwen } $not_vowel → 洪 ;
506 # 尔 simplification pass. The idea is to drop most occurences of 尔
507 # corresponding to <r> (not to <l> or <ll>) from a word if there is another /l/
508 # sound nearby. There is a vague pattern like this in the data, but the details
509 # remain to be determined. At the moment, this does nothing, it just puts 尔 in
510 # for every <r> in a syllable coda.
520 # Dong-nan-xi-hai pass. Per GB/T 17693.5-2009 表 1, 注 4, replace confusing
521 # characters at the beginning and end of a word.
523 $word_boundary { 东 → 栋 ;
524 $word_boundary { 南 → 楠 ;
525 $word_boundary { 西 → 锡 ;
526 海 } $word_boundary → 亥 ;