type = word; # one of grapheme | word | line | sentence
locale = en_US_POSIX;
+Han = [:Han:];
CR = [\p{Word_Break = CR}];
LF = [\p{Word_Break = LF}];
Newline = [\p{Word_Break = Newline}];
-Extend = [\p{Word_Break = Extend}];
+Extend = [\p{Word_Break = Extend}-Han];
ZWJ = [\p{Word_Break = ZWJ}];
Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
Format = [\p{Word_Break = Format}];
MidNumLet = [\p{Word_Break = MidNumLet} - [.]];
MidLetter = [\p{Word_Break = MidLetter} - [\:]];
MidNum = [\p{Word_Break = MidNum} [.]];
-Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079;
+Numeric = [\p{Word_Break = Numeric}];
ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
WSegSpace = [\p{Word_Break = WSegSpace}];
Extended_Pict = [:ExtPict:];
#define dictionary, with the effect being that those characters don't appear in test data.
-Han = [:Han:];
Hiragana = [:Hiragana:];
Control = [\p{Grapheme_Cluster_Break = Control}];