#
-# Copyright (C) 2002-2006, International Business Machines Corporation and others.
+# Copyright (C) 2002-2010, International Business Machines Corporation and others.
# All Rights Reserved.
#
# file: char.txt
#
# ICU Character Break Rules, also known as Grapheme Cluster Boundaries
# See Unicode Standard Annex #29.
-# These rules are based on TR29 Version 5.0.0
-# Includes post-5.0 change to treat Japanese half width voicing marks
-# as Grapheme Extend.
+# These rules are based on TR29 Revision 16, for Unicode Version 6.0
#
#
# Character Class Definitions.
#
-$CR = [\p{Grapheme_Cluster_Break = CR}];
-$LF = [\p{Grapheme_Cluster_Break = LF}];
-$Control = [\p{Grapheme_Cluster_Break = Control}];
-
-# add Japanese Half Width voicing marks to $Extend
-$VoiceMarks = [\uff9e\uff9f];
-$Extend = [\p{Grapheme_Cluster_Break = Extend} $VoiceMarks];
+$CR = [\p{Grapheme_Cluster_Break = CR}];
+$LF = [\p{Grapheme_Cluster_Break = LF}];
+$Control = [\p{Grapheme_Cluster_Break = Control}];
+$Prepend = [\p{Grapheme_Cluster_Break = Prepend}];
+$Extend = [\p{Grapheme_Cluster_Break = Extend}];
+$SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
#
# Korean Syllable Definitions
$LV = [\p{Grapheme_Cluster_Break = LV}];
$LVT = [\p{Grapheme_Cluster_Break = LVT}];
-$HangulSyllable = $L+ | ($L* ($LV? $V+ | $LV | $LVT) $T*) | $T+;
## -------------------------------------------------
+!!chain;
!!forward;
$CR $LF;
-([^$Control $CR $LF] | $HangulSyllable) $Extend*;
+
+$L ($L | $V | $LV | $LVT);
+($LV | $V) ($V | $T);
+($LVT | $T) $T;
+
+[^$Control $CR $LF] $Extend;
+
+[^$Control $CR $LF] $SpacingMark;
+$Prepend [^$Control $CR $LF];
+
## -------------------------------------------------
!!reverse;
+$LF $CR;
+($L | $V | $LV | $LVT) $L;
+($V | $T) ($LV | $V);
+$T ($LVT | $T);
+
+$Extend [^$Control $CR $LF];
+$SpacingMark [^$Control $CR $LF];
+[^$Control $CR $LF] $Prepend;
-$BackHangulSyllable = $L+ | ($T* ($V+$LV? | $LV | $LVT) $L*) | $T+;
-$BackOneCluster = ($LF $CR) | ($Extend* ([^$Control $CR $LF] | $BackHangulSyllable));
-$BackOneCluster;
## -------------------------------------------------
!!safe_reverse;
-# rule 6, 7, 8
-$V+ $L;
## -------------------------------------------------
!!safe_forward;
-# rule 6, 7, 8
-$V+ $T;