-$CR = \u000d;
-$LF = \u000a;
-$Extend = [[:Grapheme_Extend = TRUE:]];
-$Control = [[:Zl:] [:Zp:] [:Cc:] [:Cf:] - $Extend];
-$Format = [[:Cf:] - $Extend];
-$Hiragana = [:Hiragana:];
-$Ideographic = [:IDEOGRAPHIC:];
-
-$ALetterEx = $ALetter $Extend*;
-$NumericEx = $Numeric $Extend*;
-$MidNumEx = $MidNum $Extend*;
-$MidLetterEx = $MidLetter $Extend*;
-$KatakanaEx = $Katakana $Extend*;
-$ExtendNumLetEx = $ExtendNumLet $Extend*;
+$CR = [\p{Word_Break = CR}];
+$LF = [\p{Word_Break = LF}];
+$Newline = [\p{Word_Break = Newline}];
+$Extend = [\p{Word_Break = Extend}];
+$Format = [\p{Word_Break = Format}];
+$Katakana = [\p{Word_Break = Katakana}];
+$ALetter = [\p{Word_Break = ALetter}];
+$MidNumLet = [\p{Word_Break = MidNumLet}];
+$MidLetter = [\p{Word_Break = MidLetter}];
+$MidNum = [\p{Word_Break = MidNum}];
+$Numeric = [\p{Word_Break = Numeric}];
+$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
+
+
+# Dictionary character set, for triggering language-based break engines. Currently
+# limited to LineBreak=Complex_Context. Note that this set only works in Unicode
+# 5.0 or later as the definition of Complex_Context was corrected to include all
+# characters requiring dictionary break.
+
+$dictionary = [:LineBreak = Complex_Context:];
+$Control = [\p{Grapheme_Cluster_Break = Control}];
+$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]]; # Note: default ALetter does not
+ # include the dictionary characters.
+
+#
+# Rules 4 Ignore Format and Extend characters,
+# except when they appear at the beginning of a region of text.
+#
+$KatakanaEx = $Katakana ($Extend | $Format)*;
+$ALetterEx = $ALetterPlus ($Extend | $Format)*;
+$MidNumLetEx = $MidNumLet ($Extend | $Format)*;
+$MidLetterEx = $MidLetter ($Extend | $Format)*;
+$MidNumEx = $MidNum ($Extend | $Format)*;
+$NumericEx = $Numeric ($Extend | $Format)*;
+$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*;
+
+$Hiragana = [\p{script=Hiragana}];
+$Ideographic = [\p{Ideographic}];
+$HiraganaEx = $Hiragana ($Extend | $Format)*;
+$IdeographicEx = $Ideographic ($Extend | $Format)*;