]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/test/testdata/break_rules/word_POSIX.txt
ICU-66108.tar.gz
[apple/icu.git] / icuSources / test / testdata / break_rules / word_POSIX.txt
index 04bcb321ae93cee894c4fadb26ae54ee9f0d0e41..7fdc1a1ee0773133dda1741b5562f4684631d4db 100644 (file)
 type = word;      # one of grapheme | word | line | sentence
 locale = en_US_POSIX;
 
+Han            = [:Han:];
 
 CR                 = [\p{Word_Break = CR}];
 LF                 = [\p{Word_Break = LF}];
 Newline            = [\p{Word_Break = Newline}];
-Extend             = [\p{Word_Break = Extend}];
+Extend             = [\p{Word_Break = Extend}-Han];
 ZWJ                = [\p{Word_Break = ZWJ}];
 Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
 Format             = [\p{Word_Break = Format}];
@@ -29,14 +30,13 @@ Double_Quote       = [\p{Word_Break = Double_Quote}];
 MidNumLet          = [\p{Word_Break = MidNumLet} - [.]];
 MidLetter          = [\p{Word_Break = MidLetter} - [\:]];
 MidNum             = [\p{Word_Break = MidNum} [.]];
-Numeric            = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]];  # Patch for ICU-12079;
+Numeric            = [\p{Word_Break = Numeric}];
 ExtendNumLet       = [\p{Word_Break = ExtendNumLet}];
 WSegSpace          = [\p{Word_Break = WSegSpace}];
 Extended_Pict      = [:ExtPict:];
 
 #define dictionary, with the effect being that those characters don't appear in test data.
 
-Han            = [:Han:];
 Hiragana       = [:Hiragana:];
 
 Control        = [\p{Grapheme_Cluster_Break = Control}];