ICU-62107.0.1.tar.gz

[apple/icu.git] / icuSources / data / translit / Han_Spacedhan.txt
diff --git a/icuSources/data/translit/Han_Spacedhan.txt b/icuSources/data/translit/Han_Spacedhan.txt

index 85607b58e72c15431247c1a5945249ada338217a..9428d4dd9c8f1317a7500d517db6fdfa48ae5ebf 100644 (file)
--- a/icuSources/data/translit/Han_Spacedhan.txt
+++ b/icuSources/data/translit/Han_Spacedhan.txt
@@ -1,24 +1,24 @@
-#--------------------------------------------------------------------
-# Copyright (c) 1999-2004, International Business Machines
-# Corporation and others. All Rights Reserved.
-#--------------------------------------------------------------------
+# © 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html#License
+#
+# File: Han_Spacedhan.txt
+# Generated from CLDR
+#
  
  # Only intended for internal use
  
  # Only intended for internal use
+# Make sure Han are normalized, including characters that contain them.
+# The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:ideographic:]-[:sc=han:]
+# Where XXX is the resolved [:ideographic:][:sc=han:]. It needs updating with each Unicode release!
+:: [[㆒-㆟㈠-㉇㊀-㊰㋀-㋋㍘-㍰㍻-㍿㏠-㏾ 🈐-🈒🈔-🈺🉀-🉈🉐🉑][:ideographic:][:sc=han:]] nfkc;
  :: fullwidth-halfwidth;
  :: fullwidth-halfwidth;
-
-｡ > '.';
-
+｡ → '.';
  $terminalPunct = [\.\,\:\;\?\!．，：？！｡、；[:Pe:][:Pf:]];
  $initialPunct = [:Ps:][:Pi:];
  $terminalPunct = [\.\,\:\;\?\!．，：？！｡、；[:Pe:][:Pf:]];
  $initialPunct = [:Ps:][:Pi:];
-
  # add space between any Han or terminal punctuation and letters, and
  # between letters and Han or initial punct
  # add space between any Han or terminal punctuation and letters, and
  # between letters and Han or initial punct
-
-[[:Ideographic:] $terminalPunct] {} [:Letter:] > ' ' ;
-[:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] > ' ' ;
-
+[[:Ideographic:] $terminalPunct] {} [:Letter:] → ' ' ;
+[:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] → ' ' ;
  # remove spacing between ideographs and other letters
  # remove spacing between ideographs and other letters
-
- < [:Ideographic:] { ' ' } [:Letter:] ;
- < [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ;
+← [:Ideographic:] { ' ' } [:Letter:] ;
+← [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ;