-# Copyright (c) 2001-2006 International Business Machines
+# Copyright (c) 2001-2008 International Business Machines
# Corporation and others. All Rights Reserved.
#
# RBBI Test Data
# Hindi combining chars. (An old test)
-<data>•भ••ा•\u0930•\u0924• •\u0938\u0941\u0902•\u0926•\u0930•
-•\u0939•\u094c•\u0964•</data>
-<data>•\u0916\u0947•\u0938\u0941\u0902•\u0926•\u0930•\u0939•\u094c•\u0964•</data>
+# TODO: Update these tests for Unicode 5.1 Extended Grapheme clusters
+#<data>•भ••ा•\u0930•\u0924• •\u0938\u0941\u0902•\u0926•\u0930•
+#•\u0939•\u094c•\u0964•</data>
+#<data>•\u0916\u0947•\u0938\u0941\u0902•\u0926•\u0930•\u0939•\u094c•\u0964•</data>
-# Bug 1587. Tamil. \u0baa\u0bc1 should be two separate characters, even though
-# Hyangmi would perfer that it be one.
-<data>•\u0baa•\u0bc1•\u0baa•\u0bc1•</data>
+# Bug 1587. Tamil. \u0baa\u0bc1 is an Extended Grpaheme Cluster
+<data>•\u0baa\u0bc1•\u0baa\u0bc1•</data>
# Regression test for bug 1889
<data>•\u0f40\u0f7d•\u0000•\u0f7e•</data>
# Treat Japanese Half Width voicing marks as combining
<data>•A\uff9e•B\uff9f\uff9e\uff9f•C•</data>
+########################################################################################
+#
+#
+# E x t e n d e d G r a p h e m e C l u s t e r T e s t s
+#
+#
+##########################################################################################
+#<xgc>
+
+# Plain Vanilla grapheme clusters
+#<data>•a•b•c•</data>
+#<data>•a\u0301\u0302• •b\u0303\u0304•</data>
+
+# Assorted Hindi combining marks
+#<data>•\u0904\u0903• •\u0937\u093E• •\u0904\u093F• •\u0937\u0940• •\u0937\u0949• •\u0937\u094A• •\u0937\u094B• •\u0937\u094C•</data>
+
+# Thai Clusters
+# $Prepend $Extend* $PrependBase $Extend*;
+#
+#<data>•\u0e40\u0e01•\u0e44\u0301\u0e23\u0302\u0303•\u0e40•\u0e40\u0e02•\u0e02• •</data>
+
+
########################################################################################
#
#
<data>•\u4e01•\ud840\udc01•\u4e02•abc •\ue000 •\udb80\udc01•</data>
# Regression for bug 836
-<data>•AAA(AAA •</data>
+# Note: Unicode 5.1 changed this behavior
+# ICU will want to change it back before releasing,
+# so there is no break preceding the '('
+<data>•AAA•(AAA •</data>
# Try some words from other scripts.
# Greek, Cyrillic, Hebrew, Arabic, Arabic, Georgian, Latin
<data>•ΑΒΓ •БВГ •אבג֓ •ابت •١٢٣ •\u10A0\u10A1\u10A2 •ABC •</data>
+
########################################################################################
#
#