]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/test/testdata/break_rules/word.txt
ICU-66108.tar.gz
[apple/icu.git] / icuSources / test / testdata / break_rules / word.txt
index a86f94fec816ae730afc44d5a1ed526d73160dcf..13d0e63791233279d4728959b8940ed8e14cbf8b 100644 (file)
 type = word;      # one of grapheme | word | line | sentence
 locale = en;
 
+Han            = [:Han:];
 
 CR                 = [\p{Word_Break = CR}];
 LF                 = [\p{Word_Break = LF}];
 Newline            = [\p{Word_Break = Newline}];
-Extend             = [\p{Word_Break = Extend}];
+Extend             = [\p{Word_Break = Extend}-Han];
 ZWJ                = [\p{Word_Break = ZWJ}];
 Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
 Format             = [\p{Word_Break = Format}];
@@ -30,14 +31,13 @@ Double_Quote       = [\p{Word_Break = Double_Quote}];
 MidNumLet          = [\p{Word_Break = MidNumLet}];
 MidLetter          = [\p{Word_Break = MidLetter} - [\:]];
 MidNum             = [\p{Word_Break = MidNum}];
-Numeric            = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]];  # Patch for ICU-12079;
+Numeric            = [\p{Word_Break = Numeric}];
 ExtendNumLet       = [\p{Word_Break = ExtendNumLet}];
 WSegSpace          = [\p{Word_Break = WSegSpace}];
 Extended_Pict      = [:ExtPict:];
 
 #define dictionary, with the effect being that those characters don't appear in test data.
 
-Han            = [:Han:];
 Hiragana       = [:Hiragana:];
 
 Control        = [\p{Grapheme_Cluster_Break = Control}];