X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/374ca955a76ecab1204ca8bfa63ff9238d998416..0f5d89e82340278ed3d7d50029f37cab2c41a57e:/icuSources/data/translit/Han_Spacedhan.txt diff --git a/icuSources/data/translit/Han_Spacedhan.txt b/icuSources/data/translit/Han_Spacedhan.txt index 85607b58..9428d4dd 100644 --- a/icuSources/data/translit/Han_Spacedhan.txt +++ b/icuSources/data/translit/Han_Spacedhan.txt @@ -1,24 +1,24 @@ -#-------------------------------------------------------------------- -# Copyright (c) 1999-2004, International Business Machines -# Corporation and others. All Rights Reserved. -#-------------------------------------------------------------------- +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# +# File: Han_Spacedhan.txt +# Generated from CLDR +# # Only intended for internal use +# Make sure Han are normalized, including characters that contain them. +# The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:ideographic:]-[:sc=han:] +# Where XXX is the resolved [:ideographic:][:sc=han:]. It needs updating with each Unicode release! +:: [[㆒-㆟㈠-㉇㊀-㊰㋀-㋋㍘-㍰㍻-㍿㏠-㏾ 🈐-🈒🈔-🈺🉀-🉈🉐🉑][:ideographic:][:sc=han:]] nfkc; :: fullwidth-halfwidth; - -。 > '.'; - +。 → '.'; $terminalPunct = [\.\,\:\;\?\!.,:?!。、;[:Pe:][:Pf:]]; $initialPunct = [:Ps:][:Pi:]; - # add space between any Han or terminal punctuation and letters, and # between letters and Han or initial punct - -[[:Ideographic:] $terminalPunct] {} [:Letter:] > ' ' ; -[:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] > ' ' ; - +[[:Ideographic:] $terminalPunct] {} [:Letter:] → ' ' ; +[:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] → ' ' ; # remove spacing between ideographs and other letters - - < [:Ideographic:] { ' ' } [:Letter:] ; - < [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ; +← [:Ideographic:] { ' ' } [:Letter:] ; +← [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ;