ICU-66108.tar.gz

[apple/icu.git] / icuSources / data / translit / Han_Latin_Names.txt
diff --git a/icuSources/data/translit/Han_Latin_Names.txt b/icuSources/data/translit/Han_Latin_Names.txt

index 4f82fb457e8613c0863a513ffed7e423d22ffbc2..fed312619f41122a6c45f974178561195f442f28 100644 (file)
--- a/icuSources/data/translit/Han_Latin_Names.txt
+++ b/icuSources/data/translit/Han_Latin_Names.txt
@@ -1,57 +1,73 @@
-# ***************************************************************************
-# *
-# *  Copyright (C) 2004-2013, International Business Machines
-# *  Corporation; Unicode, Inc.; and others.  All Rights Reserved.
-# *
-# ***************************************************************************
+# © 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html#License
+#
  # File: Han_Latin_Names.txt
-# Generated from CLDR 
+# Generated from CLDR
  #
  
-$startOfHan = [^[:Han:]] \u0020?; # pre-context for surname-only readings
+# This transform is primarily intended to produce readings for Chinese surnames, or for full
+# Chinese personal names - surname first - that occur at the beginning of a contiguous Han substring
+# (i.e. at the beginning of text, or immediately preceded by space or other non-Han characters).
+# Several Han characters have different readings in surnames, than the readings found in Han-Latin.
+# ----
+# Insert marker at start of each Han sequence (including Han after space).
+# Do this before ::Han-Spacedhan() to catch Han after space in original text,
+# and to apply before all other rules.
+$startOfHanMarker = \uFDD1;
+[:^script=Han:] { ([:script=Han:]) → $startOfHanMarker $1;
+# Need Spacedhan so the name transliterations get spaced properly
  ::Han-Spacedhan();
-# After the above, Hanzi are separated by space from each other and from letters;
-# take this into account when specifying context.
-# First specify mappings that depend on specific adjacent hanzi:
+# Convert special name readings that depend on next character
  令 } \u0020? 狐 →líng;
  万 } \u0020? 俟 →mò;
  澹 } \u0020? 台 →tán;
-# The following maps 长 to the standard reading zhǎng for this case,
-# to override the normal name reading 长 →cháng below.
-$startOfHan { 长 } \u0020? 孙 →zhǎng;
-# Now mappings that depend on no context or $startOfHan only
-$startOfHan { 秘 →bì;
-$startOfHan { 卜 →bǔ;
-长 →cháng;
-$startOfHan { 种 →chóng;
-$startOfHan { 重 →chóng;
-$startOfHan { 刀 →diāo;
-干 →gān;
-葛 →gě;
-$startOfHan { 盖 →gě;
-$startOfHan { 过 →guō;
-$startOfHan { 华 →huà;
-$startOfHan { 纪 →jǐ;
-筠 →jūn;
-$startOfHan { 牟 →mù;
-$startOfHan { 区 →ōu;
-$startOfHan { 繁 →pó;
-仇 →qiú;
-$startOfHan { 任 →rén;
-$startOfHan { 单 →shàn;
-$startOfHan { 召 →shào;
-$startOfHan { 折 →shé;
-$startOfHan { 舍 →shè;
-$startOfHan { 沈 →shěn;
-峙 →shì;
-隗 →wěi;
-$startOfHan { 解 →xiè;
-莘 →xīn;
-$startOfHan { 燕 →yān;
-$startOfHan { 尉 →yù;
-$startOfHan { 乐 →yuè;
-$startOfHan { 员 →yùn;
-$startOfHan { 查 →zhā;
-翟 →zhái;
-曾 →zēng;
+# The following maps 长 to the standard Han-Latin reading zhǎng for this case,
+# to override the normal Han-Latin/Names reading 长→cháng further below
+$startOfHanMarker{ 长 } \u0020? 孙 →zhǎng;
+# Convert single characters with special name readings
+$startOfHanMarker{ 秘→bì;
+$startOfHanMarker{ 卜→bǔ;
+长→cháng;
+$startOfHanMarker{ 种→chóng;
+$startOfHanMarker{ 重→chóng;
+$startOfHanMarker{ 刀→diāo;
+干→gān;
+葛→gě;
+$startOfHanMarker{ 盖→gě;
+$startOfHanMarker{ 过→guō;
+$startOfHanMarker{ 华→huà;
+$startOfHanMarker{ 纪→jǐ;
+筠→jūn;
+$startOfHanMarker{ 阚→kàn;
+靓→liàng;
+$startOfHanMarker{ 牟→mù;
+$startOfHanMarker{ 粘→nián;
+$startOfHanMarker{ 区→ōu;
+$startOfHanMarker{ 繁→pó;
+$startOfHanMarker{ 覃→qín;
+仇→qiú;
+$startOfHanMarker{ 任→rén;
+$startOfHanMarker{ 单→shàn;
+$startOfHanMarker{ 召→shào;
+$startOfHanMarker{ 折→shé;
+$startOfHanMarker{ 舍→shè;
+$startOfHanMarker{ 沈→shěn;
+峙→shì;
+隗→wěi;
+$startOfHanMarker{ 解→xiè;
+莘→xīn;
+$startOfHanMarker{ 燕→yān;
+$startOfHanMarker{ 尉→yù;
+$startOfHanMarker{ 乐→yuè;
+$startOfHanMarker{ 员→yùn;
+$startOfHanMarker{ 查→zhā;
+翟→zhái;
+曾→zēng;
+# Convert $startOfHanMarkers to space, or to nothing if they are at the beginning of text.
+# Need to do this as a separate pass to get the spacing right.
+::Null();
+[^$]{ $startOfHanMarker →\u0020;
+$startOfHanMarker →;
+# Then run the normal Han-Latin transform for the rest
  ::Han-Latin();
+