ICU-511.27.tar.gz

[apple/icu.git] / icuSources / test / intltest / rbbitst.cpp
diff --git a/icuSources/test/intltest/rbbitst.cpp b/icuSources/test/intltest/rbbitst.cpp

index d8feb991ed43c9ba32ee040b08c9e2e6ea09cb01..eddb99a4eb42a5886a266eab6d6fa59607969404 100644 (file)
--- a/icuSources/test/intltest/rbbitst.cpp
+++ b/icuSources/test/intltest/rbbitst.cpp
@@ -1,3801 +1,4312 @@
-/********************************************************************\r
- * COPYRIGHT:\r
- * Copyright (c) 1999-2004, International Business Machines Corporation and\r
- * others. All Rights Reserved.\r
- ********************************************************************/\r
-/************************************************************************\r
-*   Date        Name        Description\r
-*   12/15/99    Madhu        Creation.\r
-*   01/12/2000  Madhu        Updated for changed API and added new tests\r
-************************************************************************/\r
-\r
-#include "unicode/utypes.h"\r
-\r
-#if !UCONFIG_NO_BREAK_ITERATION\r
-\r
-#include "unicode/utypes.h"\r
-#include "unicode/brkiter.h"\r
-#include "unicode/rbbi.h"\r
-#include "unicode/uchar.h"\r
-#include "unicode/utf16.h"\r
-#include "unicode/ucnv.h"\r
-#include "unicode/schriter.h"\r
-#include "unicode/uniset.h"\r
-#include "unicode/regex.h"        // TODO: make conditional on regexp being built.\r
-#include "unicode/ustring.h"\r
-\r
-#include "intltest.h"\r
-#include "rbbitst.h"\r
-#include <string.h>\r
-#include "uvector.h"\r
-#include "uvectr32.h"\r
-#include <string.h>\r
-#include <stdio.h>\r
-#include <stdlib.h>\r
-\r
-\r
-\r
-//---------------------------------------------------------------------------\r
-//\r
-//   class BITestData   Holds a set of Break iterator test data and results\r
-//                      Includes\r
-//                         - the string data to be broken\r
-//                         - a vector of the expected break positions.\r
-//                         - a vector of source line numbers for the data,\r
-//                               (to help see where errors occured.)\r
-//                         - The expected break tag values.\r
-//                         - Vectors of actual break positions and tag values.\r
-//                         - Functions for comparing actual with expected and\r
-//                            reporting errors.\r
-//\r
-//----------------------------------------------------------------------------\r
-class BITestData {\r
-public:\r
-    UnicodeString    fDataToBreak;\r
-    UVector          fExpectedBreakPositions;\r
-    UVector          fExpectedTags;\r
-    UVector          fLineNum;\r
-    UVector          fActualBreakPositions;   // Test Results.\r
-    UVector          fActualTags;\r
-\r
-    BITestData(UErrorCode &status);\r
-    void             addDataChunk(const char *data, int32_t tag, int32_t lineNum, UErrorCode status);\r
-    void             checkResults(const char *heading, RBBITest *test);\r
-    void             err(const char *heading, RBBITest *test, int32_t expectedIdx, int32_t actualIdx);\r
-    void             clearResults();\r
-};\r
-\r
-//\r
-// Constructor.\r
-//\r
-BITestData::BITestData(UErrorCode &status)\r
-: fExpectedBreakPositions(status), fExpectedTags(status),  fLineNum(status), fActualBreakPositions(status),\r
-  fActualTags(status)\r
-{\r
-};\r
-\r
-//\r
-// addDataChunk.   Add a section (non-breaking) piece if data to the test data.\r
-//                 The macro form collects the line number, which is helpful\r
-//                 when tracking down failures.\r
-//\r
-//                 A null data item is inserted at the start of each test's data\r
-//                  to put the starting zero into the data list.  The position saved for\r
-//                  each non-null item is its ending position.\r
-//\r
-#define ADD_DATACHUNK(td, data, tag, status)   td.addDataChunk(data, tag, __LINE__, status);\r
-void BITestData::addDataChunk(const char *data, int32_t tag, int32_t lineNum, UErrorCode status) {\r
-    if (U_FAILURE(status)) {return;}\r
-    if (data != NULL) {\r
-        fDataToBreak.append(CharsToUnicodeString(data));\r
-    }\r
-    fExpectedBreakPositions.addElement(fDataToBreak.length(), status);\r
-    fExpectedTags.addElement(tag, status);\r
-    fLineNum.addElement(lineNum, status);\r
-};\r
-\r
-\r
-//\r
-//  checkResults.   Compare the actual and expected break positions, report any differences.\r
-//\r
-void BITestData::checkResults(const char *heading, RBBITest *test) {\r
-    int32_t   expectedIndex = 0;\r
-    int32_t   actualIndex = 0;\r
-\r
-    for (;;) {\r
-        // If we've run through both the expected and actual results vectors, we're done.\r
-        //   break out of the loop.\r
-        if (expectedIndex >= fExpectedBreakPositions.size() &&\r
-            actualIndex   >= fActualBreakPositions.size()) {\r
-            break;\r
-        }\r
-\r
-\r
-        if (expectedIndex >= fExpectedBreakPositions.size()) {\r
-            err(heading, test, expectedIndex-1, actualIndex);\r
-            actualIndex++;\r
-            continue;\r
-        }\r
-\r
-        if (actualIndex >= fActualBreakPositions.size()) {\r
-            err(heading, test, expectedIndex, actualIndex-1);\r
-            expectedIndex++;\r
-            continue;\r
-        }\r
-\r
-        if (fActualBreakPositions.elementAti(actualIndex) != fExpectedBreakPositions.elementAti(expectedIndex)) {\r
-            err(heading, test, expectedIndex, actualIndex);\r
-            // Try to resync the positions of the indices, to avoid a rash of spurious erros.\r
-            if (fActualBreakPositions.elementAti(actualIndex) < fExpectedBreakPositions.elementAti(expectedIndex)) {\r
-                actualIndex++;\r
-            } else {\r
-                expectedIndex++;\r
-            }\r
-            continue;\r
-        }\r
-\r
-        if (fActualTags.elementAti(actualIndex) != fExpectedTags.elementAti(expectedIndex)) {\r
-            test->errln("%s, tag mismatch.  Test Line = %d, expected tag=%d, got %d",\r
-                heading, fLineNum.elementAt(expectedIndex),\r
-                fExpectedTags.elementAti(expectedIndex), fActualTags.elementAti(actualIndex));\r
-        }\r
-\r
-        actualIndex++;\r
-        expectedIndex++;\r
-    }\r
-}\r
-\r
-//\r
-//  err   -  An error was found.  Report it, along with information about where the\r
-//                                incorrectly broken test data appeared in the source file.\r
-//\r
-void    BITestData::err(const char *heading, RBBITest *test, int32_t expectedIdx, int32_t actualIdx)\r
-{\r
-    int32_t   expected = fExpectedBreakPositions.elementAti(expectedIdx);\r
-    int32_t   actual   = fActualBreakPositions.elementAti(actualIdx);\r
-    int32_t   o        = 0;\r
-    int32_t   line     = fLineNum.elementAti(expectedIdx);\r
-    if (expectedIdx > 0) {\r
-        // The line numbers are off by one because a premature break occurs somewhere\r
-        //    within the previous item, rather than at the start of the current (expected) item.\r
-        //    We want to report the offset of the unexpected break from the start of\r
-        //      this previous item.\r
-        o    = actual - fExpectedBreakPositions.elementAti(expectedIdx-1);\r
-    }\r
-    if (actual < expected) {\r
-        test->errln("%s unexpected break at offset %d in test item from line %d", heading, o, line);\r
-    } else {\r
-        test->errln("%s Failed to find break at end of item from line %d", heading, line);\r
-    }\r
-}\r
-\r
-\r
-void BITestData::clearResults() {\r
-    fActualBreakPositions.removeAllElements();\r
-    fActualTags.removeAllElements();\r
-}\r
-\r
-\r
-//-----------------------------------------------------------------------------------\r
-//\r
-//    Cannned Test Characters\r
-//\r
-//-----------------------------------------------------------------------------------\r
-\r
-static const UChar cannedTestArray[] = {\r
-    0x0001, 0x0002, 0x0003, 0x0004, 0x0020, 0x0021, '\\', 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0028, 0x0029, 0x002b, 0x002d, 0x0030, 0x0031,\r
-    0x0032, 0x0033, 0x0034, 0x003c, 0x003d, 0x003e, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x005b, 0x005d, 0x005e, 0x005f, 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x007b,\r
-    0x007d, 0x007c, 0x002c, 0x00a0, 0x00a2,\r
-    0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00ab, 0x00ad, 0x00ae, 0x00af, 0x00b0, 0x00b2, 0x00b3,\r
-    0x00b4, 0x00b9, 0x00bb, 0x00bc, 0x00bd, 0x02b0, 0x02b1, 0x02b2, 0x02b3, 0x02b4, 0x0300, 0x0301, 0x0302, 0x0303,\r
-    0x0304, 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x0903, 0x093e, 0x093f, 0x0940, 0x0949, 0x0f3a, 0x0f3b, 0x2000,\r
-    0x2001, 0x2002, 0x200c, 0x200d, 0x200e, 0x200f, 0x2010, 0x2011, 0x2012, 0x2028, 0x2029, 0x202a, 0x203e, 0x203f,\r
-    0x2040, 0x20dd, 0x20de, 0x20df, 0x20e0, 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x0000\r
-};\r
-\r
-static UnicodeString* cannedTestChars = 0;\r
-\r
-#define  halfNA     "\\u0928\\u094d\\u200d"\r
-#define  halfSA     "\\u0938\\u094d\\u200d"\r
-#define  halfCHA    "\\u091a\\u094d\\u200d"\r
-#define  halfKA     "\\u0915\\u094d\\u200d"\r
-#define  deadTA     "\\u0924\\u094d"\r
-\r
-//--------------------------------------------------------------------------------------\r
-//\r
-//    RBBITest    constructor and destructor\r
-//\r
-//--------------------------------------------------------------------------------------\r
-\r
-RBBITest::RBBITest() {\r
-    UnicodeString temp(cannedTestArray);\r
-    cannedTestChars = new UnicodeString();\r
-    *cannedTestChars += (UChar)0x0000;\r
-    *cannedTestChars += temp;\r
-}\r
-\r
-\r
-RBBITest::~RBBITest() {\r
-    delete cannedTestChars;\r
-}\r
-\r
-\r
-static const int T_NUMBER = 100;\r
-static const int T_LETTER = 200;\r
-static const int T_H_OR_K = 300;\r
-static const int T_IDEO   = 400;\r
-\r
-\r
-\r
-\r
-\r
-\r
-//--------------------------------------------------------------------\r
-//Testing the BreakIterator for devanagari script\r
-//--------------------------------------------------------------------\r
-\r
-#define deadRA   "\\u0930\\u094d"         /*deadform RA = devanagari RA + virama*/\r
-#define deadPHA  "\\u092b\\u094d"         /*deadform PHA = devanagari PHA + virama*/\r
-#define deadTTHA "\\u0920\\u094d"\r
-#define deadPA   "\\u092a\\u094d"\r
-#define deadSA   "\\u0938\\u094d"\r
-#define visarga  "\\u0903"                /*devanagari visarga looks like a english colon*/\r
-\r
-\r
-\r
-\r
-\r
-\r
-//-----------------------------------------------------------------------------------\r
-//\r
-//   Test for status {tag} return value from break rules.\r
-//        TODO:  a more thorough test.\r
-//\r
-//-----------------------------------------------------------------------------------\r
-void RBBITest::TestStatusReturn() {\r
-     UnicodeString rulesString1 = "$Letters = [:L:];\n"\r
-                                  "$Numbers = [:N:];\n"\r
-                                  "$Letters+{1};\n"\r
-                                  "$Numbers+{2};\n"\r
-                                  "Help\\ {4}/me\\!;\n"\r
-                                  "[^$Letters $Numbers];\n"\r
-                                  "!.*;\n";\r
-     UnicodeString testString1  = "abc123..abc Help me Help me!";\r
-                                // 01234567890123456789012345678\r
-     int32_t bounds1[]   = {0, 3, 6, 7, 8, 11, 12, 16, 17, 19, 20, 25, 27, 28, -1};\r
-     int32_t brkStatus[] = {0, 1, 2, 0, 0,  1,  0,  1,  0,  1,  0,  4,  1,  0, -1};\r
-\r
-     UErrorCode status=U_ZERO_ERROR;\r
-     UParseError    parseError;\r
-\r
-     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);\r
-     if(U_FAILURE(status)) {\r
-         errln("FAIL : in construction");\r
-     } else {\r
-         int32_t  pos;\r
-         int32_t  i = 0;\r
-         bi->setText(testString1);\r
-         for (pos=bi->first(); pos!= BreakIterator::DONE; pos=bi->next()) {\r
-             if (pos != bounds1[i]) {\r
-                 errln("FAIL:  expected break at %d, got %d\n", bounds1[i], pos);\r
-                 break;\r
-             }\r
-\r
-             int tag = bi->getRuleStatus();\r
-             if (tag != brkStatus[i]) {\r
-                 errln("FAIL:  break at %d, expected tag %d, got tag %d\n", pos, brkStatus[i], tag);\r
-                 break;\r
-             }\r
-             i++;\r
-         }\r
-     }\r
-     delete bi;\r
-}\r
-\r
-\r
-static void printStringBreaks(UnicodeString ustr, int expected[],\r
-                              int expectedcount)\r
-{\r
-    UErrorCode status = U_ZERO_ERROR;\r
-    char name[100];\r
-    printf("code    alpha extend alphanum type line name\n");\r
-    int j;\r
-    for (j = 0; j < ustr.length(); j ++) {\r
-        if (expectedcount > 0) {\r
-            int k;\r
-            for (k = 0; k < expectedcount; k ++) {\r
-                if (j == expected[k]) {\r
-                    printf("------------------------------------------------ %d\n",\r
-                           j);\r
-                }\r
-            }\r
-        }\r
-        UChar32 c = ustr.char32At(j);\r
-        if (c > 0xffff) {\r
-            j ++;\r
-        }\r
-        u_charName(c, U_UNICODE_CHAR_NAME, name, 100, &status);\r
-        printf("%7x %5d %6d %8d %4s %4s %s\n", (int)c, \r
-                           u_isUAlphabetic(c), \r
-                           u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND),\r
-                           u_isalnum(c), \r
-                           u_getPropertyValueName(UCHAR_GENERAL_CATEGORY, \r
-                                                  u_charType(c), \r
-                                                  U_SHORT_PROPERTY_NAME), \r
-                           u_getPropertyValueName(UCHAR_LINE_BREAK, \r
-                                                  u_getIntPropertyValue(c, \r
-                                                             UCHAR_LINE_BREAK), \r
-                                                  U_SHORT_PROPERTY_NAME),\r
-                           name);\r
-    }\r
-}\r
-\r
-void RBBITest::TestThaiLineBreak() {\r
-    UErrorCode status = U_ZERO_ERROR;\r
-    BITestData thaiLineSelection(status);\r
-\r
-    // \u0e2f-- the Thai paiyannoi character-- isn't a letter.  It's a symbol that\r
-    // represents elided letters at the end of a long word.  It should be bound to\r
-    // the end of the word and not treated as an independent punctuation mark.\r
-\r
-\r
-    ADD_DATACHUNK(thaiLineSelection, NULL, 0, status);           // Break at start of data\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e2a\\u0e16\\u0e32\\u0e19\\u0e35\\u0e2f", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e08\\u0e30", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e23\\u0e30\\u0e14\\u0e21", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e08\\u0e49\\u0e32", 0, status);\r
-//        ADD_DATACHUNK(thaiLineSelection, "\\u0e2b\\u0e19\\u0e49\\u0e32", 0, status);\r
-//        ADD_DATACHUNK(thaiLineSelection, "\\u0e17\\u0e35\\u0e48", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48", 0, status);\r
-    // the commented-out lines (I think) are the preferred result; this line is what our current dictionary is giving us\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e2d\\u0e2d\\u0e01", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e21\\u0e32", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e23\\u0e48\\u0e07", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e23\\u0e30\\u0e1a\\u0e32\\u0e22", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e2d\\u0e22\\u0e48\\u0e32\\u0e07", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e15\\u0e47\\u0e21", 0, status);\r
-\r
-    // the one time where the paiyannoi occurs somewhere other than at the end\r
-    // of a word is in the Thai abbrevation for "etc.", which both begins and\r
-    // ends with a paiyannoi\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e2f\\u0e25\\u0e2f", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e17\\u0e35\\u0e48", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e19\\u0e31\\u0e49\\u0e19", 0, status);\r
-\r
-    RuleBasedBreakIterator* e = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(\r
-        Locale("th"), status);\r
-    if (U_FAILURE(status))\r
-    {\r
-        errln("Failed to create the BreakIterator for Thai locale in TestThaiLineBreak.\n");\r
-        return;\r
-    }\r
-\r
-    generalIteratorTest(*e, thaiLineSelection);\r
-    delete e;\r
-}\r
-\r
-\r
-\r
-void RBBITest::TestMixedThaiLineBreak()\r
-{\r
-    UErrorCode   status = U_ZERO_ERROR;\r
-    BITestData   thaiLineSelection(status);\r
-\r
-    ADD_DATACHUNK(thaiLineSelection, NULL, 0, status);           // Break at start of data\r
-\r
-    // Arabic numerals should always be separated from surrounding Thai text\r
-/*\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e04\\u0e48\\u0e32", 0, status);\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e07\\u0e34\\u0e19", 0, status);\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e1a\\u0e32\\u0e17", 0, status);\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e41\\u0e15\\u0e30", 0, status);\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e23\\u0e30\\u0e14\\u0e31\\u0e1a", 0, status);\r
-        thaiLineSelection->addElement("39");\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e1a\\u0e32\\u0e17 ", 0, status);\r
-\r
-        // words in non-Thai scripts should always be separated from surrounding Thai text\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e17\\u0e14", 0, status);\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e2a\\u0e2d\\u0e1a", 0, status);\r
-        thaiLineSelection->addElement("Java");\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e1a\\u0e19", 0, status);\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e04\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07", 0, status);\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e44\\u0e2d\\u0e1a\\u0e35\\u0e40\\u0e2d\\u0e47\\u0e21 ", 0, status);\r
-\r
-        // Thai numerals should always be separated from the text surrounding them\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e04\\u0e48\\u0e32", 0, status);\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e07\\u0e34\\u0e19", 0, status);\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e1a\\u0e32\\u0e17", 0, status);\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e41\\u0e15\\u0e30", 0, status);\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e23\\u0e30\\u0e14\\u0e31\\u0e1a", 0, status);\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e53\\u0e59", 0, status);\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e1a\\u0e32\\u0e17 ", 0, status);\r
-\r
-        // Thai text should interact correctly with punctuation and symbols\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e44\\u0e2d\\u0e1a\\u0e35\\u0e40\\u0e2d\\u0e47\\u0e21", 0, status);\r
-//        ADD_DATACHUNK(thaiLineSelection, "(\\u0e1b\\u0e23\\u0e30\\u0e40\\u0e17\\u0e28", 0, status);\r
-//        ADD_DATACHUNK(thaiLineSelection, "\\u0e44\\u0e17\\u0e22)", 0, status);\r
-ADD_DATACHUNK(thaiLineSelection, "(\\u0e1b\\u0e23\\u0e30\\u0e40\\u0e17\\u0e28\\u0e44\\u0e17\\u0e22)", 0, status);\r
-// I believe the commented-out reading above to be the correct one, but this is what passes with our current dictionary\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e08\\u0e33\\u0e01\\u0e31\\u0e14", 0, status);\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e1b\\u0e34\\u0e14", 0, status);\r
-        ADD_DATACHUNK(thaiLineSelection, "\\u0e15\\u0e31\\u0e27\"", 0, status);\r
-*/\r
-\r
-    // The Unicode Linebreak TR says do not break before or after quotes.\r
-    //    So this test is changed ot not break around the quote.\r
-    //    TODO:  should Thai break around the around the quotes, like the original behavior here?\r
-//    ADD_DATACHUNK(thaiLineSelection, "\\u0e2e\\u0e32\\u0e23\\u0e4c\\u0e14\\u0e14\\u0e34\\u0e2a\\u0e01\\u0e4c\"", 0, status);\r
-//    ADD_DATACHUNK(thaiLineSelection, "\\u0e23\\u0e38\\u0e48\\u0e19", 0, status);\r
-      ADD_DATACHUNK(thaiLineSelection, "\\u0e2e\\u0e32\\u0e23\\u0e4c\\u0e14\\u0e14\\u0e34\\u0e2a\\u0e01\\u0e4c\""\r
-                                                         "\\u0e23\\u0e38\\u0e48\\u0e19", 0, status);\r
-\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e43\\u0e2b\\u0e21\\u0e48", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e14\\u0e37\\u0e2d\\u0e19\\u0e21\\u0e34.", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e22.", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e19\\u0e35\\u0e49", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e23\\u0e32\\u0e04\\u0e32", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "$200", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e17\\u0e48\\u0e32", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e19\\u0e31\\u0e49\\u0e19 ", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "(\"\\u0e2e\\u0e32\\u0e23\\u0e4c\\u0e14\\u0e14\\u0e34\\u0e2a\\u0e01\\u0e4c\").", 0, status);\r
-\r
-    RuleBasedBreakIterator* e = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale("th"), status);\r
-    if (U_FAILURE(status))\r
-    {\r
-        errln("Failed to create the BreakIterator for Thai locale in TestMixedThaiLineBreak.\n");\r
-        return;\r
-    }\r
-\r
-\r
-    generalIteratorTest(*e, thaiLineSelection);\r
-    delete e;\r
-}\r
-\r
-\r
-void RBBITest::TestMaiyamok()\r
-{\r
-    UErrorCode status = U_ZERO_ERROR;\r
-    BITestData   thaiLineSelection(status);\r
-    ADD_DATACHUNK(thaiLineSelection, NULL, 0, status);           // Break at start of data\r
-    // the Thai maiyamok character is a shorthand symbol that means "repeat the previous\r
-    // word".  Instead of appearing as a word unto itself, however, it's kept together\r
-    // with the word before it\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e44\\u0e1b\\u0e46", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e21\\u0e32\\u0e46", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e23\\u0e30\\u0e2b\\u0e27\\u0e48\\u0e32\\u0e07", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e01\\u0e23\\u0e38\\u0e07\\u0e40\\u0e17\\u0e1e", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e41\\u0e25\\u0e30", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e03\\u0e35\\u0e22\\u0e07", 0, status);\r
-    ADD_DATACHUNK(thaiLineSelection, "\\u0e43\\u0e2b\\u0e21\\u0e48", 0, status);\r
-\r
-    RuleBasedBreakIterator* e = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(\r
-        Locale("th"), status);\r
-\r
-    if (U_FAILURE(status))\r
-    {\r
-        errln("Failed to create the BreakIterator for Thai locale in TestMaiyamok.\n");\r
-        return;\r
-    }\r
-    generalIteratorTest(*e, thaiLineSelection);\r
-    delete e;\r
-}\r
-\r
-void RBBITest::TestThaiWordBreak() {\r
-    UErrorCode status = U_ZERO_ERROR;\r
-    BITestData   thaiWordSelection(status);\r
-\r
-    ADD_DATACHUNK(thaiWordSelection, NULL, 0, status);           // Break at start of data\r
-    ADD_DATACHUNK(thaiWordSelection, "\\u0E1A\\u0E17", 0, status); //2\r
-    ADD_DATACHUNK(thaiWordSelection, "\\u0E17\\u0E35\\u0E48", 0, status); //5\r
-    ADD_DATACHUNK(thaiWordSelection, "\\u0E51", 0, status); //6\r
-    ADD_DATACHUNK(thaiWordSelection, "\\u0E1E\\u0E32\\u0E22\\u0E38", 0, status); //10\r
-    ADD_DATACHUNK(thaiWordSelection, "\\u0E44\\u0E0B\\u0E42\\u0E04\\u0E25\\u0E19", 0, status); //16\r
-    ADD_DATACHUNK(thaiWordSelection, "\\u000D\\u000A", 0, status); //18\r
-\r
-    // This is the correct result\r
-    //ADD_DATACHUNK(thaiWordSelection, "\\u0E42\\u0E14\\u0E42\\u0E23\\u0E18\\u0E35", 0, status); //24\r
-    //ADD_DATACHUNK(thaiWordSelection, "\\u0E2D\\u0E32\\u0E28\\u0E31\\u0E22", 0, status); //29\r
-\r
-    // and this is what the dictionary does...\r
-    ADD_DATACHUNK(thaiWordSelection, "\\u0E42\\u0E14", 0, status); // 20\r
-    ADD_DATACHUNK(thaiWordSelection, "\\u0E42\\u0E23\\u0E18\\u0E35\\u0E2D\\u0E32\\u0E28\\u0E31\\u0E22", 0, status); //29\r
-\r
-    ADD_DATACHUNK(thaiWordSelection, "\\u0E2D\\u0E22\\u0E39\\u0E48", 0, status); //33\r
-\r
-    // This is the correct result\r
-    //ADD_DATACHUNK(thaiWordSelection, "\\u0E17\\u0E48\\u0E32\\u0E21", 0, status); //37\r
-    //ADD_DATACHUNK(thaiWordSelection, "\\u0E01\\u0E25\\u0E32\\u0E07", 0, status); //41\r
-\r
-    // and this is what the dictionary does\r
-    ADD_DATACHUNK(thaiWordSelection, "\\u0E17\\u0E48\\u0E32\\u0E21\\u0E01\\u0E25\\u0E32\\u0E07", 0, status); //41\r
-\r
-    ADD_DATACHUNK(thaiWordSelection, "\\u0E17\\u0E38\\u0E48\\u0E07", 0, status); //45\r
-    ADD_DATACHUNK(thaiWordSelection, "\\u0E43\\u0E2B\\u0E0D\\u0E48", 0, status); //49\r
-    ADD_DATACHUNK(thaiWordSelection, "\\u0E43\\u0E19", 0, status); //51\r
-\r
-    // This is the correct result\r
-    //ADD_DATACHUNK(thaiWordSelection, "\\u0E41\\u0E04\\u0E19\\u0E0B\\u0E31\\u0E2A", 0, status); //57\r
-    //ADD_DATACHUNK(thaiWordSelection, "\\u0E01\\u0E31\\u0E1A", 0, status); //60\r
-\r
-    // and this is what the dictionary does\r
-    ADD_DATACHUNK(thaiWordSelection, "\\u0E41\\u0E04\\u0E19", 0, status); // 54\r
-    ADD_DATACHUNK(thaiWordSelection, "\\u0E0B\\u0E31\\u0E2A\\u0E01\\u0E31\\u0E1A", 0, status); //60\r
-\r
-    ADD_DATACHUNK(thaiWordSelection, "\\u0E25\\u0E38\\u0E07", 0, status); //63\r
-\r
-    // This is the correct result\r
-    //ADD_DATACHUNK(thaiWordSelection, "\\u0E40\\u0E2E\\u0E19\\u0E23\\u0E35", 0, status); //68\r
-    //ADD_DATACHUNK(thaiWordSelection, "\\u0E0A\\u0E32\\u0E27", 0, status); //71\r
-    //ADD_DATACHUNK(thaiWordSelection, "\\u0E44\\u0E23\\u0E48", 0, status); //74\r
-    //ADD_DATACHUNK(thaiWordSelection, "\\u0E41\\u0E25\\u0E30", 0, status); //77\r
-\r
-    // and this is what the dictionary does\r
-    ADD_DATACHUNK(thaiWordSelection, "\\u0E40\\u0E2E", 0, status); // 65\r
-    ADD_DATACHUNK(thaiWordSelection, "\\u0E19\\u0E23\\u0E35\\u0E0A\\u0E32\\u0E27\\u0E44\\u0E23\\u0E48\\u0E41\\u0E25\\u0E30", 0, status); //77\r
-\r
-    RuleBasedBreakIterator* e = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(\r
-        Locale("th"), status);\r
-    if (U_FAILURE(status))\r
-    {\r
-        errln("Failed to create the BreakIterator for Thai locale in TestThaiWordBreak.\n");\r
-        return;\r
-    }\r
-\r
-    generalIteratorTest(*e, thaiWordSelection);\r
-    delete e;\r
-}\r
-\r
-\r
-void RBBITest::TestBug3818() {\r
-    UErrorCode  status = U_ZERO_ERROR;\r
-\r
-    // Four Thai words...\r
-    static const UChar thaiWordData[] = {  0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0x0E2B,0x0E0D,0x0E48, \r
-                                           0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0x0E2B,0x0E0D,0x0E48, 0 }; \r
-    UnicodeString  thaiStr(thaiWordData);\r
-\r
-    RuleBasedBreakIterator* bi = \r
-        (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale("th"), status);\r
-    if (U_FAILURE(status) || bi == NULL) {\r
-        errln("Fail at file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));\r
-        return;\r
-    }\r
-    bi->setText(thaiStr);\r
-\r
-    int32_t  startOfSecondWord = bi->following(1);\r
-    if (startOfSecondWord != 4) {\r
-        errln("Fail at file %s, line %d expected start of word at 4, got %d",\r
-            __FILE__, __LINE__, startOfSecondWord);\r
-    }\r
-    startOfSecondWord = bi->following(0);\r
-    if (startOfSecondWord != 4) {\r
-        errln("Fail at file %s, line %d expected start of word at 4, got %d",\r
-            __FILE__, __LINE__, startOfSecondWord);\r
-    }\r
-    delete bi;\r
-}\r
-\r
-\r
-void RBBITest::TestJapaneseWordBreak() {\r
-    UErrorCode status = U_ZERO_ERROR;\r
-    BITestData   japaneseWordSelection(status);\r
-\r
-    ADD_DATACHUNK(japaneseWordSelection, NULL, 0, status);           // Break at start of data\r
-    ADD_DATACHUNK(japaneseWordSelection, "\\u4ECA\\u65E5", 400, status); //2\r
-    ADD_DATACHUNK(japaneseWordSelection, "\\u306F\\u3044\\u3044", 300, status); //5\r
-    ADD_DATACHUNK(japaneseWordSelection, "\\u5929\\u6C17", 400, status); //7\r
-    ADD_DATACHUNK(japaneseWordSelection, "\\u3067\\u3059\\u306D", 300, status); //10\r
-    ADD_DATACHUNK(japaneseWordSelection, "\\u3002", 0, status); //11\r
-    ADD_DATACHUNK(japaneseWordSelection, "\\u000D\\u000A", 0, status); //12\r
-\r
-    RuleBasedBreakIterator* e = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(\r
-        Locale("ja"), status);\r
-    if (U_FAILURE(status))\r
-    {\r
-        errln("Failed to create the BreakIterator for Japanese locale in TestJapaneseWordBreak.\n");\r
-        return;\r
-    }\r
-\r
-    generalIteratorTest(*e, japaneseWordSelection);\r
-    delete e;\r
-}\r
-\r
-//---------------------------------------------\r
-// runIndexedTest\r
-//---------------------------------------------\r
-\r
-void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* params )\r
-{\r
-    if (exec) logln("TestSuite RuleBasedBreakIterator: ");\r
-    \r
-    switch (index) {\r
-        case 0: name = "TestBug4153072";\r
-            if(exec) TestBug4153072();                         break;\r
-        case 1: name = "TestJapaneseLineBreak";\r
-            if(exec) TestJapaneseLineBreak();                 break;\r
-        case 2: name = "TestStatusReturn";\r
-            if(exec) TestStatusReturn();                       break;\r
-\r
-        case 3: name = "TestLineBreakData";\r
-            if(exec) TestLineBreakData();                      break;\r
-        case 4: name = "TestEmptyString";\r
-            if(exec) TestEmptyString();                        break;\r
-\r
-        case 5: name = "TestGetAvailableLocales";\r
-            if(exec) TestGetAvailableLocales();                break;\r
-\r
-        case 6: name = "TestGetDisplayName";\r
-            if(exec) TestGetDisplayName();                     break;\r
-\r
-        case 7: name = "TestEndBehaviour";\r
-            if(exec) TestEndBehaviour();                       break;\r
-        case 8: name = "TestMixedThaiLineBreak";\r
-             if(exec) TestMixedThaiLineBreak();                break;\r
-        case 9: name = "TestThaiWordBreak";\r
-             if(exec) TestThaiWordBreak();                     break;\r
-        case 10: name = "TestThaiLineBreak";\r
-             if(exec) TestThaiLineBreak();                     break;\r
-        case 11: name = "TestMaiyamok";\r
-             if(exec) TestMaiyamok();                          break;\r
-        case 12: name = "TestWordBreaks";\r
-             if(exec) TestWordBreaks();                        break;\r
-        case 13: name = "TestWordBoundary";\r
-             if(exec) TestWordBoundary();                      break;\r
-        case 14: name = "TestLineBreaks";\r
-             if(exec) TestLineBreaks();                        break;\r
-        case 15: name = "TestSentBreaks";\r
-             if(exec) TestSentBreaks();                        break;\r
-        case 16: name = "TestExtended";\r
-             if(exec) TestExtended();                          break;\r
-        case 17: name = "TestMonkey";\r
-             if(exec) {\r
- #if !UCONFIG_NO_REGULAR_EXPRESSIONS\r
-               TestMonkey(params);\r
- #else\r
-               logln("skipping TestMonkey (UCONFIG_NO_REGULAR_EXPRESSIONS)");\r
- #endif\r
-             }\r
-                                                               break;\r
-        case 18: name = "TestBug3818";\r
-            if(exec) TestBug3818();                            break;\r
-        case 19: name = "TestJapaneseWordBreak";\r
-            if(exec) TestJapaneseWordBreak();                  break;\r
-\r
-        default: name = ""; break; //needed to end loop\r
-    }\r
-}\r
-\r
-\r
-//----------------------------------------------------------------------------\r
-//\r
-// generalIteratorTest      Given a break iterator and a set of test data,\r
-//                          Run the tests and report the results.\r
-//\r
-//----------------------------------------------------------------------------\r
-void RBBITest::generalIteratorTest(RuleBasedBreakIterator& bi, BITestData &td)\r
-{\r
-\r
-    bi.setText(td.fDataToBreak);\r
-\r
-    testFirstAndNext(bi, td);\r
-\r
-    testLastAndPrevious(bi, td);\r
-\r
-    testFollowing(bi, td);\r
-    testPreceding(bi, td);\r
-    testIsBoundary(bi, td);\r
-    doMultipleSelectionTest(bi, td);\r
-}\r
-\r
-\r
-//\r
-//   testFirstAndNext.   Run the iterator forwards in the obvious first(), next()\r
-//                       kind of loop.\r
-//\r
-void RBBITest::testFirstAndNext(RuleBasedBreakIterator& bi, BITestData &td)\r
-{\r
-    UErrorCode  status = U_ZERO_ERROR;\r
-    int32_t     p;\r
-    int32_t     lastP = -1;\r
-    int32_t     tag;\r
-\r
-    logln("Test first and next");\r
-    bi.setText(td.fDataToBreak);\r
-    td.clearResults();\r
-\r
-    for (p=bi.first(); p!=RuleBasedBreakIterator::DONE; p=bi.next()) {\r
-        td.fActualBreakPositions.addElement(p, status);  // Save result.\r
-        tag = bi.getRuleStatus();\r
-        td.fActualTags.addElement(tag, status);\r
-        if (p <= lastP) {\r
-            // If the iterator is not making forward progress, stop.\r
-            //  No need to raise an error here, it'll be detected in the normal check of results.\r
-            break;\r
-        }\r
-        lastP = p;\r
-    }\r
-    td.checkResults("testFirstAndNext", this);\r
-}\r
-\r
-\r
-//\r
-//  TestLastAndPrevious.   Run the iterator backwards, starting with last().\r
-//\r
-void  RBBITest::testLastAndPrevious(RuleBasedBreakIterator& bi,  BITestData &td)\r
-{\r
-    UErrorCode  status = U_ZERO_ERROR;\r
-    int32_t     p;\r
-    int32_t     lastP  = 0x7ffffffe;\r
-    int32_t     tag;\r
-\r
-    logln("Test first and next");\r
-    bi.setText(td.fDataToBreak);\r
-    td.clearResults();\r
-\r
-    for (p=bi.last(); p!=RuleBasedBreakIterator::DONE; p=bi.previous()) {\r
-        // Save break position.  Insert it at start of vector of results, shoving\r
-        //    already-saved results further towards the end.\r
-        td.fActualBreakPositions.insertElementAt(p, 0, status);\r
-        // bi.previous();   // TODO:  Why does this fix things up????\r
-        // bi.next();\r
-        tag = bi.getRuleStatus();\r
-        td.fActualTags.insertElementAt(tag, 0, status);\r
-        if (p >= lastP) {\r
-            // If the iterator is not making progress, stop.\r
-            //  No need to raise an error here, it'll be detected in the normal check of results.\r
-            break;\r
-        }\r
-        lastP = p;\r
-    }\r
-    td.checkResults("testLastAndPrevious", this);\r
-}\r
-\r
-\r
-void RBBITest::testFollowing(RuleBasedBreakIterator& bi, BITestData &td)\r
-{\r
-    UErrorCode  status = U_ZERO_ERROR;\r
-    int32_t     p;\r
-    int32_t     tag;\r
-    int32_t     lastP  = -2;     // A value that will never be returned as a break position.\r
-                                 //   cannot be -1; that is returned for DONE.\r
-    int         i;\r
-\r
-    logln("testFollowing():");\r
-    bi.setText(td.fDataToBreak);\r
-    td.clearResults();\r
-\r
-    // Save the starting point, since we won't get that out of following.\r
-    p = bi.first();\r
-    td.fActualBreakPositions.addElement(p, status);  // Save result.\r
-    tag = bi.getRuleStatus();\r
-    td.fActualTags.addElement(tag, status);\r
-\r
-    for (i = 0; i <= td.fDataToBreak.length()+1; i++) {\r
-        p = bi.following(i);\r
-        if (p != lastP) {\r
-            if (p == RuleBasedBreakIterator::DONE) {\r
-                break;\r
-            }\r
-            // We've reached a new break position.  Save it.\r
-            td.fActualBreakPositions.addElement(p, status);  // Save result.\r
-            tag = bi.getRuleStatus();\r
-            td.fActualTags.addElement(tag, status);\r
-            lastP = p;\r
-        }\r
-    }\r
-    // The loop normally exits by means of the break in the middle.\r
-    // Make sure that the index was at the correct position for the break iterator to have\r
-    //   returned DONE.\r
-    if (i != td.fDataToBreak.length()) {\r
-        errln("testFollowing():  iterator returned DONE prematurely.");\r
-    }\r
-\r
-    // Full check of all results.\r
-    td.checkResults("testFollowing", this);\r
-}\r
-\r
-\r
-\r
-void RBBITest::testPreceding(RuleBasedBreakIterator& bi,  BITestData &td) {\r
-    UErrorCode  status = U_ZERO_ERROR;\r
-    int32_t     p;\r
-    int32_t     tag;\r
-    int32_t     lastP  = 0x7ffffffe;\r
-    int         i;\r
-\r
-    logln("testPreceding():");\r
-    bi.setText(td.fDataToBreak);\r
-    td.clearResults();\r
-\r
-    p = bi.last();\r
-    td.fActualBreakPositions.addElement(p, status);\r
-    tag = bi.getRuleStatus();\r
-    td.fActualTags.addElement(tag, status);\r
-\r
-    for (i = td.fDataToBreak.length(); i>=-1; i--) {\r
-        p = bi.preceding(i);\r
-        if (p != lastP) {\r
-            if (p == RuleBasedBreakIterator::DONE) {\r
-                break;\r
-            }\r
-            // We've reached a new break position.  Save it.\r
-            td.fActualBreakPositions.insertElementAt(p, 0, status);\r
-            lastP = p;\r
-            tag = bi.getRuleStatus();\r
-            td.fActualTags.insertElementAt(tag, 0, status);\r
-        }\r
-    }\r
-    // The loop normally exits by means of the break in the middle.\r
-    // Make sure that the index was at the correct position for the break iterator to have\r
-    //   returned DONE.\r
-    if (i != 0) {\r
-        errln("testPreceding():  iterator returned DONE prematurely.");\r
-    }\r
-\r
-    // Full check of all results.\r
-    td.checkResults("testPreceding", this);\r
-}\r
-\r
-\r
-\r
-void RBBITest::testIsBoundary(RuleBasedBreakIterator& bi,  BITestData &td) {\r
-    UErrorCode  status = U_ZERO_ERROR;\r
-    int         i;\r
-    int32_t     tag;\r
-\r
-    logln("testIsBoundary():");\r
-    bi.setText(td.fDataToBreak);\r
-    td.clearResults();\r
-\r
-    for (i = 0; i <= td.fDataToBreak.length(); i++) {\r
-        if (bi.isBoundary(i)) {\r
-            td.fActualBreakPositions.addElement(i, status);  // Save result.\r
-            tag = bi.getRuleStatus();\r
-            td.fActualTags.addElement(tag, status);\r
-        }\r
-    }\r
-    td.checkResults("testIsBoundary: ", this);\r
-}\r
-\r
-\r
-\r
-void RBBITest::doMultipleSelectionTest(RuleBasedBreakIterator& iterator, BITestData &td)\r
-{\r
-    iterator.setText(td.fDataToBreak);\r
-\r
-    RuleBasedBreakIterator* testIterator =(RuleBasedBreakIterator*)iterator.clone();\r
-    int32_t offset = iterator.first();\r
-    int32_t testOffset;\r
-    int32_t count = 0;\r
-\r
-    logln("doMultipleSelectionTest text of length: %d", td.fDataToBreak.length());\r
-\r
-    if (*testIterator != iterator)\r
-        errln("clone() or operator!= failed: two clones compared unequal");\r
-\r
-    do {\r
-        testOffset = testIterator->first();\r
-        testOffset = testIterator->next(count);\r
-        if (offset != testOffset)\r
-            errln(UnicodeString("next(n) and next() not returning consistent results: for step ") + count + ", next(n) returned " + testOffset + " and next() had " + offset);\r
-\r
-        if (offset != RuleBasedBreakIterator::DONE) {\r
-            count++;\r
-            offset = iterator.next();\r
-\r
-            if (offset != RuleBasedBreakIterator::DONE && *testIterator == iterator) {\r
-                errln("operator== failed: Two unequal iterators compared equal. count=%d offset=%d", count, offset);\r
-                if (count > 10000 || offset == -1) {\r
-                    errln("operator== failed too many times. Stopping test.");\r
-                    if (offset == -1) {\r
-                        errln("Does (RuleBasedBreakIterator::DONE == -1)?");\r
-                    }\r
-                    return;\r
-                }\r
-            }\r
-        }\r
-    } while (offset != RuleBasedBreakIterator::DONE);\r
-\r
-    // now do it backwards...\r
-    offset = iterator.last();\r
-    count = 0;\r
-\r
-    do {\r
-        testOffset = testIterator->last();\r
-        testOffset = testIterator->next(count);   // next() with a negative arg is same as previous\r
-        if (offset != testOffset)\r
-            errln(UnicodeString("next(n) and next() not returning consistent results: for step ") + count + ", next(n) returned " + testOffset + " and next() had " + offset);\r
-\r
-        if (offset != RuleBasedBreakIterator::DONE) {\r
-            count--;\r
-            offset = iterator.previous();\r
-        }\r
-    } while (offset != RuleBasedBreakIterator::DONE);\r
-\r
-    delete testIterator;\r
-}\r
-\r
-\r
-\r
-//--------------------------------------------------------------------------------------------\r
-//\r
-//    Break Iterator Invariants Tests\r
-//\r
-//--------------------------------------------------------------------------------------------\r
-\r
-void RBBITest::TestCharacterInvariants()\r
-{\r
-    UErrorCode status = U_ZERO_ERROR;\r
-    BreakIterator *e = BreakIterator::createCharacterInstance(Locale::getDefault(), status);\r
-    if (U_FAILURE(status))\r
-    {\r
-        errln("Failed to create the BreakIterator for default locale in TestCharacterInvariants.\n");\r
-        return;\r
-    }\r
-    UnicodeString s = *cannedTestChars + CharsToUnicodeString("\\u1100\\u1101\\u1102\\u1160\\u1161\\u1162\\u11a8\\u11a9\\u11aa");\r
-    doBreakInvariantTest(*e, s);\r
-    s = *cannedTestChars + CharsToUnicodeString("\\u1100\\u1101\\u1102\\u1160\\u1161\\u1162\\u11a8\\u11a9\\u11aa");\r
-    doOtherInvariantTest(*e, s);\r
-    delete e;\r
-}\r
-\r
-\r
-void RBBITest::TestWordInvariants()\r
-{\r
-    UErrorCode status = U_ZERO_ERROR;\r
-    BreakIterator *e = BreakIterator::createWordInstance(Locale::getDefault(), status);\r
-    if (U_FAILURE(status))\r
-    {\r
-        errln("Failed to create the BreakIterator for default locale in TestWordInvariants.\n");\r
-        return;\r
-    }\r
-    UnicodeString s = *cannedTestChars + CharsToUnicodeString("\',.\\u3041\\u3042\\u3043\\u309b\\u309c\\u30a1\\u30a2\\u30a3\\u4e00\\u4e01\\u4e02");\r
-    doBreakInvariantTest(*e, s);\r
-    s = *cannedTestChars + CharsToUnicodeString("\',.\\u3041\\u3042\\u3043\\u309b\\u309c\\u30a1\\u30a2\\u30a3\\u4e00\\u4e01\\u4e02");\r
-    doOtherInvariantTest(*e, s);\r
-    delete e;\r
-}\r
-\r
-\r
-void RBBITest::TestSentenceInvariants()\r
-{\r
-    UErrorCode status = U_ZERO_ERROR;\r
-    BreakIterator *e = BreakIterator::createSentenceInstance(Locale::getDefault(), status);\r
-    if (U_FAILURE(status))\r
-    {\r
-        errln("Failed to create the BreakIterator for default locale in TestSentenceInvariant.\n");\r
-        return;\r
-    }\r
-    UnicodeString s = *cannedTestChars + CharsToUnicodeString(".,\\u3001\\u3002\\u3041\\u3042\\u3043\\ufeff");\r
-    doOtherInvariantTest(*e, s);\r
-    delete e;\r
-}\r
-\r
-\r
-\r
-\r
-void RBBITest::doBreakInvariantTest(BreakIterator& tb, UnicodeString& testChars)\r
-{\r
-    UnicodeString work("aaa");\r
-    int32_t errCount = 0, testCharsLen = testChars.length(), breaksLen;\r
-\r
-    // a break should always occur after CR (unless followed by LF), LF, PS, and LS\r
-    UnicodeString breaks = CharsToUnicodeString("\r\n\\u2029\\u2028");\r
-    int32_t i, j;\r
-\r
-    breaksLen = breaks.length();\r
-    for (i = 0; i < breaksLen; i++) {\r
-        UChar c1 = breaks[i];\r
-        work.setCharAt(1, c1);\r
-        for (j = 0; j < testCharsLen; j++) {\r
-            UChar c0 = testChars[j];\r
-            work.setCharAt(0, c0);\r
-            int k;\r
-            for (k = 0; k < testCharsLen; k++) {\r
-                UChar c2 = testChars[k];\r
-                work.setCharAt(2, c2);\r
-\r
-                // if a cr is followed by lf, ps, ls or etx, don't do the check (that's\r
-                // not supposed to work)\r
-                if (c1 == '\r' && (c2 == '\n' || c2 == 0x2029\r
-                        || c2 == 0x2028 || c2 == 0x0003))\r
-                    continue;\r
-\r
-                if (u_charType(c1) == U_CONTROL_CHAR &&\r
-                    (u_charType(c2) == U_NON_SPACING_MARK ||\r
-                     u_charType(c2) == U_ENCLOSING_MARK ||\r
-                     u_charType(c2) == U_COMBINING_SPACING_MARK)\r
-                    ) {\r
-                    // Combining marks don't combine with controls.\r
-                    //  TODO:  enhance test to verify that the break actually occurs,\r
-                    //         not just ignore the case.\r
-                    continue;\r
-                }\r
-\r
-\r
-                tb.setText(work);\r
-                UBool seen2 = FALSE;\r
-                int l;\r
-                for (l = tb.first(); l != BreakIterator::DONE; l = tb.next()) {\r
-                    if (l == 2) {\r
-                        seen2 = TRUE;\r
-                        break;\r
-                    }\r
-                }\r
-                if (!seen2) {\r
-                    printStringBreaks(work, NULL, 0); \r
-                    errln("No Break between \\U%04x and \\U%04x", c1, c2);\r
-                    errCount++;\r
-                    if (errCount >= 75)\r
-                        return;\r
-                }\r
-            }\r
-        }\r
-    }\r
-}\r
-\r
-\r
-\r
-void RBBITest::doOtherInvariantTest(BreakIterator& tb, UnicodeString& testChars)\r
-{\r
-    UnicodeString work("a\r\na");\r
-    int32_t errCount = 0, testCharsLen = testChars.length();\r
-    int32_t i, j;\r
-    int8_t type;\r
-\r
-    // a break should never occur between CR and LF\r
-    for (i = 0; i < testCharsLen; i++) {\r
-        work.setCharAt(0, testChars[i]);\r
-        for (j = 0; j < testCharsLen; j++) {\r
-            work.setCharAt(3, testChars[j]);\r
-            tb.setText(work);\r
-            int32_t k;\r
-            for (k = tb.first(); k != BreakIterator::DONE; k = tb.next())\r
-                if (k == 2) {\r
-                    errln("Break between CR and LF in string U\\%04x U\\%04x U\\%04x U\\%04x",\r
-                        work[0], work[1], work[2], work[3]);\r
-                    errCount++;\r
-                    if (errCount >= 75)\r
-                        return;\r
-                }\r
-        }\r
-    }\r
-\r
-    // a break should never occur before a non-spacing mark, unless the preceding\r
-    // character is CR, LF, PS, or LS\r
-    //   Or the general category == Control.\r
-    work.remove();\r
-    work += "aaaa";\r
-    for (i = 0; i < testCharsLen; i++) {\r
-        UChar c1 = testChars[i];\r
-        if (c1 == '\n' || c1 == '\r' || c1 == 0x2029 || c1 == 0x2028 || c1 == 0x0003 ||\r
-            u_charType(c1) == U_CONTROL_CHAR  ||  u_charType(c1) == U_FORMAT_CHAR) {\r
-            continue;\r
-        }\r
-        work.setCharAt(1, c1);\r
-        for (j = 0; j < testCharsLen; j++) {\r
-            UChar c2 = testChars[j];\r
-            type = u_charType(c2);\r
-            if ((type != U_NON_SPACING_MARK) &&\r
-                (type != U_ENCLOSING_MARK)) {\r
-                continue;\r
-            }\r
-            work.setCharAt(2, c2);\r
-            tb.setText(work);\r
-            int k;\r
-            for (k = tb.first(); k != BreakIterator::DONE; k = tb.next())\r
-                if (k == 2) {\r
-                    //errln("Break between U+" + UCharToUnicodeString(work[1])\r
-                    //        + " and U+" + UCharToUnicodeString(work[2]));\r
-                    errln("Unexpected Break between %6x and %6x", c1, c2);\r
-                    errCount++;\r
-                    if (errCount >= 75)\r
-                        return;\r
-                }\r
-        }\r
-    }\r
-}\r
-\r
-\r
-\r
-\r
-//---------------------------------------------\r
-//\r
-//     other tests\r
-//\r
-//---------------------------------------------\r
-void RBBITest::TestEmptyString()\r
-{\r
-    UnicodeString text = "";\r
-    UErrorCode status = U_ZERO_ERROR;\r
-\r
-    BITestData x(status);\r
-    ADD_DATACHUNK(x, "", 0, status);           // Break at start of data\r
-    RuleBasedBreakIterator* bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);\r
-    if (U_FAILURE(status))\r
-    {\r
-        errln("Failed to create the BreakIterator for default locale in TestEmptyString.\n");\r
-        return;\r
-    }\r
-    generalIteratorTest(*bi, x);\r
-    delete bi;\r
-}\r
-\r
-void RBBITest::TestGetAvailableLocales()\r
-{\r
-    int32_t locCount = 0;\r
-    const Locale* locList = BreakIterator::getAvailableLocales(locCount);\r
-\r
-    if (locCount == 0)\r
-        errln("getAvailableLocales() returned an empty list!");\r
-    // Just make sure that it's returning good memory.\r
-    int32_t i;\r
-    for (i = 0; i < locCount; ++i) {\r
-        logln(locList[i].getName());\r
-    }\r
-}\r
-\r
-//Testing the BreakIterator::getDisplayName() function\r
-void RBBITest::TestGetDisplayName()\r
-{\r
-    UnicodeString   result;\r
-\r
-    BreakIterator::getDisplayName(Locale::getUS(), result);\r
-    if (Locale::getDefault() == Locale::getUS() && result != "English (United States)")\r
-        errln("BreakIterator::getDisplayName() failed: expected \"English (United States)\", got \""\r
-                + result);\r
-\r
-    BreakIterator::getDisplayName(Locale::getFrance(), Locale::getUS(), result);\r
-    if (result != "French (France)")\r
-        errln("BreakIterator::getDisplayName() failed: expected \"French (France)\", got \""\r
-                + result);\r
-}\r
-/**\r
- * Test End Behaviour\r
- * @bug 4068137\r
- */\r
-void RBBITest::TestEndBehaviour()\r
-{\r
-    UErrorCode status = U_ZERO_ERROR;\r
-    UnicodeString testString("boo.");\r
-    BreakIterator *wb = BreakIterator::createWordInstance(Locale::getDefault(), status);\r
-    if (U_FAILURE(status))\r
-    {\r
-        errln("Failed to create the BreakIterator for default locale in TestEndBehaviour.\n");\r
-        return;\r
-    }\r
-    wb->setText(testString);\r
-\r
-    if (wb->first() != 0)\r
-        errln("Didn't get break at beginning of string.");\r
-    if (wb->next() != 3)\r
-        errln("Didn't get break before period in \"boo.\"");\r
-    if (wb->current() != 4 && wb->next() != 4)\r
-        errln("Didn't get break at end of string.");\r
-    delete wb;\r
-}\r
-/*\r
- * @bug 4153072\r
- */\r
-void RBBITest::TestBug4153072() {\r
-    UErrorCode status = U_ZERO_ERROR;\r
-    BreakIterator *iter = BreakIterator::createWordInstance(Locale::getDefault(), status);\r
-    if (U_FAILURE(status))\r
-    {\r
-        errln("Failed to create the BreakIterator for default locale in TestBug4153072\n");\r
-        return;\r
-    }\r
-    UnicodeString str("...Hello, World!...");\r
-    int32_t begin = 3;\r
-    int32_t end = str.length() - 3;\r
-    UBool dummy;\r
-\r
-    StringCharacterIterator* textIterator = new StringCharacterIterator(str, begin, end, begin);\r
-    iter->adoptText(textIterator);\r
-    int index;\r
-    for (index = -1; index < begin + 1; ++index) {\r
-        dummy = iter->isBoundary(index);\r
-        if (index < begin && dummy == TRUE) {\r
-            errln((UnicodeString)"Didn't handle preceeding correctly with offset = " + index +\r
-                            " and begin index = " + begin);\r
-        }\r
-    }\r
-    delete iter;\r
-}\r
-\r
-\r
-/**\r
- * Test Japanese Line Break\r
- * @bug 4095322\r
- */\r
-void RBBITest::TestJapaneseLineBreak()\r
-{\r
-#if 0\r
-    // Test needs updating some more...   Dump it for now.\r
-\r
-\r
-    // Change for Unicode TR 14:  Punctuation characters with categories Pi and Pf do not count\r
-    //        as opening and closing punctuation for line breaking.\r
-    //        Also, \u30fc and \u30fe are not counted as hyphens.   Remove these chars\r
-    //        from these tests.    6-13-2002\r
-    //\r
-    UErrorCode status = U_ZERO_ERROR;\r
-    UnicodeString testString = CharsToUnicodeString("\\u4e00x\\u4e8c");\r
-    UnicodeString precedingChars = CharsToUnicodeString(\r
-        //"([{\\u00ab$\\u00a5\\u00a3\\u00a4\\u2018\\u201a\\u201c\\u201e\\u201b\\u201f");\r
-        "([{$\\u00a5\\u00a3\\u00a4\\u201a\\u201e");\r
-    UnicodeString followingChars = CharsToUnicodeString(\r
-        // ")]}\\u00bb!%,.\\u3001\\u3002\\u3063\\u3083\\u3085\\u3087\\u30c3\\u30e3\\u30e5\\u30e7\\u30fc"\r
-        ")]}!%,.\\u3001\\u3002\\u3063\\u3083\\u3085\\u3087\\u30c3\\u30e3\\u30e5\\u30e7"\r
-        // ":;\\u309b\\u309c\\u3005\\u309d\\u309e\\u30fd\\u30fe\\u2019\\u201d\\u00b0\\u2032\\u2033\\u2034"\r
-        ":;\\u309b\\u309c\\u3005\\u309d\\u309e\\u30fd\\u00b0\\u2032\\u2033\\u2034"\r
-        "\\u2030\\u2031\\u2103\\u2109\\u00a2\\u0300\\u0301\\u0302");\r
-    BreakIterator *iter = BreakIterator::createLineInstance(Locale::getJapan(), status);\r
-\r
-    int32_t i;\r
-    if (U_FAILURE(status))\r
-    {\r
-        errln("Failed to create the BreakIterator for Japanese locale in TestJapaneseLineBreak.\n");\r
-        return;\r
-    }\r
-\r
-    for (i = 0; i < precedingChars.length(); i++) {\r
-        testString.setCharAt(1, precedingChars[i]);\r
-        iter->setText(testString);\r
-        int32_t j = iter->first();\r
-        if (j != 0)\r
-            errln("ja line break failure: failed to start at 0");\r
-        j = iter->next();\r
-        if (j != 1)\r
-            errln("ja line break failure: failed to stop before '" + UCharToUnicodeString(precedingChars[i])\r
-                        + "' (" + ((int)(precedingChars[i])) + ")");\r
-        j = iter->next();\r
-        if (j != 3)\r
-            errln("ja line break failure: failed to skip position after '" + UCharToUnicodeString(precedingChars[i])\r
-                        + "' (" + ((int)(precedingChars[i])) + ")");\r
-    }\r
-\r
-    for (i = 0; i < followingChars.length(); i++) {\r
-        testString.setCharAt(1, followingChars[i]);\r
-        iter->setText(testString);\r
-        int j = iter->first();\r
-        if (j != 0)\r
-            errln("ja line break failure: failed to start at 0");\r
-        j = iter->next();\r
-        if (j != 2)\r
-            errln("ja line break failure: failed to skip position before '" + UCharToUnicodeString(followingChars[i])\r
-                        + "' (" + ((int)(followingChars[i])) + ")");\r
-        j = iter->next();\r
-        if (j != 3)\r
-            errln("ja line break failure: failed to stop after '" + UCharToUnicodeString(followingChars[i])\r
-                        + "' (" + ((int)(followingChars[i])) + ")");\r
-    }\r
-    delete iter;\r
-#endif\r
-}\r
-\r
-\r
-//------------------------------------------------------------------------------\r
-//\r
-//   RBBITest::Extended    Run  RBBI Tests from an external test data file\r
-//\r
-//------------------------------------------------------------------------------\r
-\r
-struct TestParams {\r
-    BreakIterator   *bi;\r
-    UnicodeString    dataToBreak;\r
-    UVector32       *expectedBreaks;\r
-    UVector32       *srcLine;\r
-    UVector32       *srcCol;\r
-};\r
-\r
-void RBBITest::executeTest(TestParams *t) {\r
-    int32_t    bp;\r
-    int32_t    prevBP;\r
-    int32_t    i;\r
-\r
-    t->bi->setText(t->dataToBreak);\r
-    //\r
-    //  Run the iterator forward\r
-    //\r
-    prevBP = -1;\r
-    for (bp = t->bi->first(); bp != BreakIterator::DONE; bp = t->bi->next()) {\r
-        if (prevBP ==  bp) {\r
-            // Fail for lack of forward progress.\r
-            errln("Forward Iteration, no forward progress.  Break Pos=%4d  File line,col=%4d,%4d",\r
-                bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp));\r
-            break;\r
-        }\r
-\r
-        // Check that there were we didn't miss an expected break between the last one\r
-        //  and this one.\r
-        for (i=prevBP+1; i<bp; i++) {\r
-            if (t->expectedBreaks->elementAti(i) != 0) {\r
-                int expected[] = {0, i};\r
-                printStringBreaks(t->dataToBreak, expected, 2);\r
-                errln("Forward Iteration, break expected, but not found.  Pos=%4d  File line,col= %4d,%4d",\r
-                      i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));\r
-            }\r
-        }\r
-\r
-        // Check that the break we did find was expected\r
-        if (t->expectedBreaks->elementAti(bp) == 0) {\r
-            int expected[] = {0, bp};\r
-            printStringBreaks(t->dataToBreak, expected, 2);\r
-            errln("Forward Iteration, break found, but not expected.  Pos=%4d  File line,col= %4d,%4d",\r
-                bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp));\r
-        } else {\r
-            // The break was expected.\r
-            //   Check that the {nnn} tag value is correct.\r
-            int32_t expectedTagVal = t->expectedBreaks->elementAti(bp);\r
-            if (expectedTagVal == -1) {\r
-                expectedTagVal = 0;\r
-            }\r
-            int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus();\r
-            if (rs != expectedTagVal) {\r
-                errln("Incorrect status for forward break.  Pos=%4d  File line,col= %4d,%4d.\n"\r
-                      "          Actual, Expected status = %4d, %4d",\r
-                    bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp), rs, expectedTagVal);\r
-            }\r
-        }\r
-\r
-\r
-        prevBP = bp;\r
-    }\r
-\r
-    // Verify that there were no missed expected breaks after the last one found\r
-    for (i=prevBP+1; i<t->expectedBreaks->size(); i++) {\r
-        if (t->expectedBreaks->elementAti(i) != 0) {\r
-            errln("Forward Iteration, break expected, but not found.  Pos=%4d  File line,col= %4d,%4d",\r
-                      i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));\r
-        }\r
-    }\r
-\r
-    //\r
-    //  Run the iterator backwards, verify that the same breaks are found.\r
-    //\r
-    prevBP = t->dataToBreak.length()+2;  // start with a phony value for the last break pos seen.\r
-    for (bp = t->bi->last(); bp != BreakIterator::DONE; bp = t->bi->previous()) {\r
-        if (prevBP ==  bp) {\r
-            // Fail for lack of progress.\r
-            errln("Reverse Iteration, no progress.  Break Pos=%4d  File line,col=%4d,%4d",\r
-                bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp));\r
-            break;\r
-        }\r
-\r
-        // Check that there were we didn't miss an expected break between the last one\r
-        //  and this one.  (UVector returns zeros for index out of bounds.)\r
-        for (i=prevBP-1; i>bp; i--) {\r
-            if (t->expectedBreaks->elementAti(i) != 0) {\r
-                errln("Reverse Itertion, break expected, but not found.  Pos=%4d  File line,col= %4d,%4d",\r
-                      i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));\r
-            }\r
-        }\r
-\r
-        // Check that the break we did find was expected\r
-        if (t->expectedBreaks->elementAti(bp) == 0) {\r
-            errln("Reverse Itertion, break found, but not expected.  Pos=%4d  File line,col= %4d,%4d",\r
-                   bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp));\r
-        } else {\r
-            // The break was expected.\r
-            //   Check that the {nnn} tag value is correct.\r
-            int32_t expectedTagVal = t->expectedBreaks->elementAti(bp);\r
-            if (expectedTagVal == -1) {\r
-                expectedTagVal = 0;\r
-            }\r
-            int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus();\r
-            if (rs != expectedTagVal) {\r
-                errln("Incorrect status for reverse break.  Pos=%4d  File line,col= %4d,%4d.\n"\r
-                      "          Actual, Expected status = %4d, %4d",\r
-                    bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp), rs, expectedTagVal);\r
-            }\r
-        }\r
-\r
-        prevBP = bp;\r
-    }\r
-\r
-    // Verify that there were no missed breaks prior to the last one found\r
-    for (i=prevBP-1; i>=0; i--) {\r
-        if (t->expectedBreaks->elementAti(i) != 0) {\r
-            errln("Forward Itertion, break expected, but not found.  Pos=%4d  File line,col= %4d,%4d",\r
-                      i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));\r
-        }\r
-    }\r
-}\r
-\r
-\r
-void RBBITest::TestExtended() {\r
-    UErrorCode      status  = U_ZERO_ERROR;\r
-    Locale          locale   = Locale::getDefault();\r
-\r
-    UnicodeString       rules;\r
-    TestParams          tp;\r
-    tp.bi             = NULL;\r
-    tp.expectedBreaks = new UVector32(status);\r
-    tp.srcLine        = new UVector32(status);\r
-    tp.srcCol         = new UVector32(status);\r
-\r
-\r
-    //\r
-    //  Open and read the test data file.\r
-    //\r
-    const char *testDataDirectory = IntlTest::getSourceTestData(status);\r
-    char testFileName[1000];\r
-    if (testDataDirectory == NULL || strlen(testDataDirectory) >= sizeof(testFileName)) {\r
-        errln("Can't open test data.  Path too long.");\r
-        return;\r
-    }\r
-    strcpy(testFileName, testDataDirectory);\r
-    strcat(testFileName, "rbbitst.txt");\r
-\r
-    int    len;\r
-    UChar *testFile = ReadAndConvertFile(testFileName, len, status);\r
-    if (U_FAILURE(status)) {\r
-        return; /* something went wrong, error already output */\r
-    }\r
-\r
-\r
-\r
-    //\r
-    //  Put the test data into a UnicodeString\r
-    //\r
-    UnicodeString testString(FALSE, testFile, len);\r
-\r
-    enum EParseState{\r
-        PARSE_COMMENT,\r
-        PARSE_TAG,\r
-        PARSE_DATA,\r
-        PARSE_NUM\r
-    }\r
-    parseState = PARSE_TAG;\r
-\r
-    EParseState savedState = PARSE_TAG;\r
-\r
-    static const UChar CH_LF        = 0x0a;\r
-    static const UChar CH_CR        = 0x0d;\r
-    static const UChar CH_HASH      = 0x23;\r
-    /*static const UChar CH_PERIOD    = 0x2e;*/\r
-    static const UChar CH_LT        = 0x3c;\r
-    static const UChar CH_GT        = 0x3e;\r
-    static const UChar CH_BACKSLASH = 0x5c;\r
-    static const UChar CH_BULLET    = 0x2022;\r
-\r
-    int32_t    lineNum  = 1;\r
-    int32_t    colStart = 0;\r
-    int32_t    column   = 0;\r
-    int32_t    charIdx  = 0;\r
-\r
-    int32_t    tagValue = 0;       // The numeric value of a <nnn> tag.\r
-\r
-    for (charIdx = 0; charIdx < len; ) {\r
-        UChar  c = testString.charAt(charIdx);\r
-        charIdx++;\r
-        if (c == CH_CR && charIdx<len && testString.charAt(charIdx) == CH_LF) {\r
-            // treat CRLF as a unit\r
-            c = CH_LF;\r
-            charIdx++;\r
-        }\r
-        if (c == CH_LF || c == CH_CR) {\r
-            lineNum++;\r
-            colStart = charIdx;\r
-        }\r
-        column = charIdx - colStart + 1;\r
-\r
-        switch (parseState) {\r
-        case PARSE_COMMENT:\r
-            if (c == 0x0a || c == 0x0d) {\r
-                parseState = savedState;\r
-            }\r
-            break;\r
-\r
-        case PARSE_TAG:\r
-            {\r
-            if (c == CH_HASH) {\r
-                parseState = PARSE_COMMENT;\r
-                savedState = PARSE_TAG;\r
-                break;\r
-            }\r
-            if (u_isUWhiteSpace(c)) {\r
-                break;\r
-            }\r
-            if (testString.compare(charIdx-1, 6, "<word>") == 0) {\r
-                delete tp.bi;\r
-                tp.bi = BreakIterator::createWordInstance(locale,  status);\r
-                charIdx += 5;\r
-                break;\r
-            }\r
-            if (testString.compare(charIdx-1, 6, "<char>") == 0) {\r
-                delete tp.bi;\r
-                tp.bi = BreakIterator::createCharacterInstance(locale,  status);\r
-                charIdx += 5;\r
-                break;\r
-            }\r
-            if (testString.compare(charIdx-1, 6, "<line>") == 0) {\r
-                delete tp.bi;\r
-                tp.bi = BreakIterator::createLineInstance(locale,  status);\r
-                charIdx += 5;\r
-                break;\r
-            }\r
-            if (testString.compare(charIdx-1, 6, "<sent>") == 0) {\r
-                delete tp.bi;\r
-                tp.bi = BreakIterator::createSentenceInstance(locale,  status);\r
-                charIdx += 5;\r
-                break;\r
-            }\r
-            if (testString.compare(charIdx-1, 7, "<title>") == 0) {\r
-                delete tp.bi;\r
-                tp.bi = BreakIterator::createTitleInstance(locale,  status);\r
-                charIdx += 6;\r
-                break;\r
-            }\r
-            if (testString.compare(charIdx-1, 6, "<data>") == 0) {\r
-                parseState = PARSE_DATA;\r
-                charIdx += 5;\r
-                tp.dataToBreak = "";\r
-                tp.expectedBreaks->removeAllElements();\r
-                tp.srcCol ->removeAllElements();\r
-                tp.srcLine->removeAllElements();\r
-                break;\r
-            }\r
-\r
-            errln("line %d: Tag expected in test file.", lineNum);\r
-            goto end_test;\r
-            parseState = PARSE_COMMENT;\r
-            savedState = PARSE_DATA;\r
-            }\r
-            break;\r
-\r
-        case PARSE_DATA:\r
-            if (c == CH_BULLET) {\r
-                int32_t  breakIdx = tp.dataToBreak.length();\r
-                tp.expectedBreaks->setSize(breakIdx+1);\r
-                tp.expectedBreaks->setElementAt(-1, breakIdx);\r
-                tp.srcLine->setSize(breakIdx+1);\r
-                tp.srcLine->setElementAt(lineNum, breakIdx);\r
-                tp.srcCol ->setSize(breakIdx+1);\r
-                tp.srcCol ->setElementAt(column, breakIdx);\r
-                break;\r
-            }\r
-\r
-            if (testString.compare(charIdx-1, 7, "</data>") == 0) {\r
-                // Add final entry to mappings from break location to source file position.\r
-                //  Need one extra because last break position returned is after the\r
-                //    last char in the data, not at the last char.\r
-                tp.srcLine->addElement(lineNum, status);\r
-                tp.srcCol ->addElement(column, status);\r
-\r
-                parseState = PARSE_TAG;\r
-                charIdx += 7;\r
-\r
-                // RUN THE TEST!\r
-                executeTest(&tp);\r
-                break;\r
-            }\r
-\r
-            if (testString.compare(charIdx-1, 3, "\\N{") == 0) {\r
-                // Named character, e.g. \N{COMBINING GRAVE ACCENT}\r
-                // Get the code point from the name and insert it into the test data.\r
-                //   (Damn, no API takes names in Unicode  !!!\r
-                //    we've got to take it back to char *)\r
-                int32_t nameEndIdx = testString.indexOf((UChar)0x7d/*'}'*/, charIdx);\r
-                int32_t nameLength = nameEndIdx - (charIdx+2);\r
-                char charNameBuf[200];\r
-                UChar32 theChar = -1;\r
-                if (nameEndIdx != -1) {\r
-                    UErrorCode status = U_ZERO_ERROR;\r
-                    testString.extract(charIdx+2, nameLength, charNameBuf, sizeof(charNameBuf));\r
-                    charNameBuf[sizeof(charNameBuf)-1] = 0;\r
-                    theChar = u_charFromName(U_UNICODE_CHAR_NAME, charNameBuf, &status);\r
-                    if (U_FAILURE(status)) {\r
-                        theChar = -1;\r
-                    }\r
-                }\r
-                if (theChar == -1) {\r
-                    errln("Error in named character in test file at line %d, col %d",\r
-                        lineNum, column);\r
-                } else {\r
-                    // Named code point was recognized.  Insert it\r
-                    //   into the test data.\r
-                    tp.dataToBreak.append(theChar);\r
-                    while (tp.dataToBreak.length() > tp.srcLine->size()) {\r
-                        tp.srcLine->addElement(lineNum, status);\r
-                        tp.srcCol ->addElement(column, status);\r
-                    }\r
-                }\r
-                if (nameEndIdx > charIdx) {\r
-                    charIdx = nameEndIdx+1;\r
-                }\r
-                break;\r
-            }\r
-\r
-\r
-\r
-\r
-            if (testString.compare(charIdx-1, 2, "<>") == 0) {\r
-                charIdx++;\r
-                int32_t  breakIdx = tp.dataToBreak.length();\r
-                tp.expectedBreaks->setSize(breakIdx+1);\r
-                tp.expectedBreaks->setElementAt(-1, breakIdx);\r
-                tp.srcLine->setSize(breakIdx+1);\r
-                tp.srcLine->setElementAt(lineNum, breakIdx);\r
-                tp.srcCol ->setSize(breakIdx+1);\r
-                tp.srcCol ->setElementAt(column, breakIdx);\r
-                break;\r
-            }\r
-\r
-            if (c == CH_LT) {\r
-                tagValue   = 0;\r
-                parseState = PARSE_NUM;\r
-                break;\r
-            }\r
-\r
-            if (c == CH_HASH && column==3) {   // TODO:  why is column off so far?\r
-                parseState = PARSE_COMMENT;\r
-                savedState = PARSE_DATA;\r
-                break;\r
-            }\r
-\r
-            if (c == CH_BACKSLASH) {\r
-                // Check for \ at end of line, a line continuation.\r
-                //     Advance over (discard) the newline\r
-                UChar32 cp = testString.char32At(charIdx);\r
-                if (cp == CH_CR && charIdx<len && testString.charAt(charIdx+1) == CH_LF) {\r
-                    // We have a CR LF\r
-                    //  Need an extra increment of the input ptr to move over both of them\r
-                    charIdx++;\r
-                }\r
-                if (cp == CH_LF || cp == CH_CR) {\r
-                    lineNum++;\r
-                    colStart = charIdx;\r
-                    charIdx++;\r
-                    break;\r
-                }\r
-\r
-                // Let unescape handle the back slash.\r
-                cp = testString.unescapeAt(charIdx);\r
-                if (cp != -1) {\r
-                    // Escape sequence was recognized.  Insert the char\r
-                    //   into the test data.\r
-                    tp.dataToBreak.append(cp);\r
-                    while (tp.dataToBreak.length() > tp.srcLine->size()) {\r
-                        tp.srcLine->addElement(lineNum, status);\r
-                        tp.srcCol ->addElement(column, status);\r
-                    }\r
-                    break;\r
-                }\r
-\r
-\r
-                // Not a recognized backslash escape sequence.\r
-                // Take the next char as a literal.\r
-                //  TODO:  Should this be an error?\r
-                c = testString.charAt(charIdx);\r
-                charIdx = testString.moveIndex32(charIdx, 1);\r
-            }\r
-\r
-            // Normal, non-escaped data char.\r
-            tp.dataToBreak.append(c);\r
-\r
-            // Save the mapping from offset in the data to line/column numbers in\r
-            //   the original input file.  Will be used for better error messages only.\r
-            //   If there's an expected break before this char, the slot in the mapping\r
-            //     vector will already be set for this char; don't overwrite it.\r
-            if (tp.dataToBreak.length() > tp.srcLine->size()) {\r
-                tp.srcLine->addElement(lineNum, status);\r
-                tp.srcCol ->addElement(column, status);\r
-            }\r
-            break;\r
-\r
-\r
-        case PARSE_NUM:\r
-            // We are parsing an expected numeric tag value, like <1234>,\r
-            //   within a chunk of data.\r
-            if (u_isUWhiteSpace(c)) {\r
-                break;\r
-            }\r
-\r
-            if (c == CH_GT) {\r
-                // Finished the number.  Add the info to the expected break data,\r
-                //   and switch parse state back to doing plain data.\r
-                parseState = PARSE_DATA;\r
-                if (tagValue == 0) {\r
-                    tagValue = -1;\r
-                }\r
-                int32_t  breakIdx = tp.dataToBreak.length();\r
-                tp.expectedBreaks->setSize(breakIdx+1);\r
-                tp.expectedBreaks->setElementAt(tagValue, breakIdx);\r
-                tp.srcLine->setSize(breakIdx+1);\r
-                tp.srcLine->setElementAt(lineNum, breakIdx);\r
-                tp.srcCol ->setSize(breakIdx+1);\r
-                tp.srcCol ->setElementAt(column, breakIdx);\r
-                break;\r
-            }\r
-\r
-            if (u_isdigit(c)) {\r
-                tagValue = tagValue*10 + u_charDigitValue(c);\r
-                break;\r
-            }\r
-\r
-            errln("Syntax Error in test file at line %d, col %d",\r
-                lineNum, column);\r
-            goto end_test;\r
-            parseState = PARSE_COMMENT;\r
-            break;\r
-        }\r
-\r
-\r
-        if (U_FAILURE(status)) {\r
-            errln("ICU Error %s while parsing test file at line %d.",\r
-                u_errorName(status), lineNum);\r
-            goto end_test;\r
-            status = U_ZERO_ERROR;\r
-        }\r
-\r
-    }\r
-\r
-end_test:\r
-    delete tp.bi;\r
-    delete tp.expectedBreaks;\r
-    delete tp.srcLine;\r
-    delete tp.srcCol;\r
-    delete [] testFile;\r
-}\r
-\r
-\r
-//-------------------------------------------------------------------------------\r
-//\r
-//    ReadAndConvertFile   Read a text data file, convert it to UChars, and\r
-//    return the datain one big UChar * buffer, which the caller must delete.\r
-//\r
-//    TODO:  This is a clone of RegexTest::ReadAndConvertFile.\r
-//           Move this function to some common place.\r
-//\r
-//--------------------------------------------------------------------------------\r
-UChar *RBBITest::ReadAndConvertFile(const char *fileName, int &ulen, UErrorCode &status) {\r
-    UChar       *retPtr  = NULL;\r
-    char        *fileBuf = NULL;\r
-    UConverter* conv     = NULL;\r
-    FILE        *f       = NULL;\r
-\r
-    ulen = 0;\r
-    if (U_FAILURE(status)) {\r
-        return retPtr;\r
-    }\r
-\r
-    //\r
-    //  Open the file.\r
-    //\r
-    f = fopen(fileName, "rb");\r
-    if (f == 0) {\r
-        errln("Error opening test data file %s\n", fileName);\r
-        status = U_FILE_ACCESS_ERROR;\r
-        return NULL;\r
-    }\r
-    //\r
-    //  Read it in\r
-    //\r
-    int   fileSize;\r
-    int   amt_read;\r
-\r
-    fseek( f, 0, SEEK_END);\r
-    fileSize = ftell(f);\r
-    fileBuf = new char[fileSize];\r
-    fseek(f, 0, SEEK_SET);\r
-    amt_read = fread(fileBuf, 1, fileSize, f);\r
-    if (amt_read != fileSize || fileSize <= 0) {\r
-        errln("Error reading test data file.");\r
-        goto cleanUpAndReturn;\r
-    }\r
-\r
-    //\r
-    // Look for a Unicode Signature (BOM) on the data just read\r
-    //\r
-    int32_t        signatureLength;\r
-    const char *   fileBufC;\r
-    const char*    encoding;\r
-\r
-    fileBufC = fileBuf;\r
-    encoding = ucnv_detectUnicodeSignature(\r
-        fileBuf, fileSize, &signatureLength, &status);\r
-    if(encoding!=NULL ){\r
-        fileBufC  += signatureLength;\r
-        fileSize  -= signatureLength;\r
-    }\r
-\r
-    //\r
-    // Open a converter to take the rule file to UTF-16\r
-    //\r
-    conv = ucnv_open(encoding, &status);\r
-    if (U_FAILURE(status)) {\r
-        goto cleanUpAndReturn;\r
-    }\r
-\r
-    //\r
-    // Convert the rules to UChar.\r
-    //  Preflight first to determine required buffer size.\r
-    //\r
-    ulen = ucnv_toUChars(conv,\r
-        NULL,           //  dest,\r
-        0,              //  destCapacity,\r
-        fileBufC,\r
-        fileSize,\r
-        &status);\r
-    if (status == U_BUFFER_OVERFLOW_ERROR) {\r
-        // Buffer Overflow is expected from the preflight operation.\r
-        status = U_ZERO_ERROR;\r
-\r
-        retPtr = new UChar[ulen+1];\r
-        ucnv_toUChars(conv,\r
-            retPtr,       //  dest,\r
-            ulen+1,\r
-            fileBufC,\r
-            fileSize,\r
-            &status);\r
-    }\r
-\r
-cleanUpAndReturn:\r
-    fclose(f);\r
-    delete fileBuf;\r
-    ucnv_close(conv);\r
-    if (U_FAILURE(status)) {\r
-        errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));\r
-        delete retPtr;\r
-        retPtr = 0;\r
-        ulen   = 0;\r
-    };\r
-    return retPtr;\r
-}\r
-\r
-\r
-//--------------------------------------------------------------------------------------------\r
-//\r
-//     Exhaustive Tests, using Unicode Data Files.\r
-//\r
-//--------------------------------------------------------------------------------------------\r
-\r
-//\r
-//  Token level scanner for the Unicode Line Break Test Data file.\r
-//      Return the next token, as follows:\r
-//          >= 0:       a UChar32 character, scanned from hex in the file.\r
-//          -1:         a break position, a division sign in the file.\r
-//          -2:         end of rule.  A new line in the file.\r
-//          -3:         end of file.  No more rules.\r
-//          -4:         Error\r
-//\r
-//   The scanner\r
-//       strips comments, ('#' to end of line)\r
-//       Recognizes CR, CR/LF and LF as new lines.\r
-//       Skips over spaces and  Xs (don't break here) in the data.\r
-//\r
-struct ScanState {\r
-    int32_t     fPeekChar;\r
-    UBool       fPeeked;\r
-    int32_t     fLineNum;\r
-    FILE        *fFile;\r
-    ScanState() :fPeeked(FALSE), fLineNum(0), fFile(NULL) {};\r
-};\r
-\r
-//  Literal characters that are of interest.  In hex to keep EBCDIC based machines happy.\r
-//  The data itself is latin-1 on all platforms.\r
-static const int32_t chSpace  = 0x20;\r
-static const int32_t chTab    = 0x09;\r
-static const int32_t chCR     = 0x0D;\r
-static const int32_t chLF     = 0x0A;\r
-static const int32_t chHash   = 0x23;\r
-static const int32_t chMult   = 0xD7;\r
-static const int32_t chDivide = 0xF7;\r
-\r
-static int32_t   nextLBDToken(ScanState *s) {\r
-    int32_t     c;\r
-\r
-    // Read  characters from the input file until we get something interesting\r
-    //   to return.  The file is in latin-1 encoding.\r
-    for (;;) {\r
-        // Get the next character to look at,\r
-        if (s->fPeeked) {\r
-            c = s->fPeekChar;\r
-            s->fPeeked = FALSE;\r
-        } else {\r
-            c = getc(s->fFile);\r
-        }\r
-\r
-        // EOF.  Return immediately.\r
-        if (c == EOF) {\r
-            return -3;\r
-        }\r
-\r
-        // Spaces.  Treat the multiply sign as a space - it indicates a no-break position\r
-        //          in the data, and the test program doesn't want to see them.\r
-        //          Continue the next char loop, looking for something significant.\r
-        if (c == chSpace || c == chTab || c == chMult) {\r
-            continue;\r
-        }\r
-\r
-        //  Divide sign.  Indicates an expected break position.\r
-        if (c == chDivide) {\r
-            return -1;\r
-        }\r
-\r
-        // New Line Handling.  Keep track of line number in the file, which in turn\r
-        //   requires keeping track of CR/LF as a single new line.\r
-        if (c == chCR) {\r
-            s->fLineNum++;\r
-            s->fPeekChar = getc(s->fFile);\r
-            if (s->fPeekChar != chLF) {s->fPeeked = TRUE;};\r
-            return -2;\r
-        }\r
-        if (c == chLF) {\r
-            s->fLineNum++;\r
-            return -2;\r
-        }\r
-\r
-        // Comments.  Consume everything up to the next new line.\r
-        if (c == chHash) {\r
-            do {\r
-                c = getc(s->fFile);\r
-            } while (!(c == EOF || c == chCR || c == chLF));\r
-            s->fPeekChar = c;\r
-            s->fPeeked = TRUE;\r
-            return nextLBDToken(s);\r
-        }\r
-\r
-        // Scan a hex character (UChar32) value.\r
-        if (u_digit(c, 16) >= 0) {\r
-            int32_t   v = u_digit(c, 16);\r
-            for (;;) {\r
-                c = getc(s->fFile);\r
-                if (u_digit(c, 16) < 0) {break;};\r
-                v <<= 4;\r
-                v += u_digit(c, 16);\r
-            }\r
-            s->fPeekChar = c;\r
-            s->fPeeked   = TRUE;\r
-            return v;\r
-        }\r
-\r
-        // Error.  Character was something unexpected.\r
-        return -4;\r
-    }\r
-}\r
-\r
-\r
-\r
-void RBBITest::TestLineBreakData() {\r
-\r
-    UErrorCode      status = U_ZERO_ERROR;\r
-    UnicodeString   testString;\r
-    UVector         expectedBreaks(status);\r
-    ScanState       ss;\r
-    int32_t         tok;\r
-\r
-    BreakIterator *bi = BreakIterator::createLineInstance(Locale::getDefault(), status);\r
-    if (U_FAILURE(status)) {\r
-        errln("Failure creating break iterator");\r
-        return;\r
-    }\r
-\r
-    const char *    lbdfName = "LBTest.txt";\r
-\r
-    // Open the test data file.\r
-    //   TODO:  a proper way to handle this data.\r
-    ss.fFile = fopen(lbdfName, "rb");\r
-    if (ss.fFile == NULL) {\r
-        logln("Unable to open Line Break Test Data file.  Skipping test.");\r
-        delete bi;\r
-        return;\r
-    }\r
-\r
-    // Loop once per line from the test data file.\r
-    for (;;) {\r
-        // Zero out test data from previous line.\r
-        testString.truncate(0);\r
-        expectedBreaks.removeAllElements();\r
-\r
-        // Read one test's (line's) worth of data from the file.\r
-        //   Loop once per token on the input file line.\r
-        for(;;)  {\r
-            tok = nextLBDToken(&ss);\r
-\r
-            // If we scanned a character number in the file.\r
-            //   save it in the test data array.\r
-            if (tok >= 0) {\r
-                testString.append((UChar32)tok);\r
-                continue;\r
-            }\r
-\r
-            // If we scanned a break position in the data, record it.\r
-            if (tok == -1) {\r
-                expectedBreaks.addElement(testString.length(), status);\r
-                continue;\r
-            }\r
-\r
-            // If we scanned a new line, or EOF\r
-            //    drop out of scan loop and run the test case.\r
-            if (tok == -2 || tok == -3) {break;};\r
-\r
-            // None of above.  Error.\r
-            errln("Failure:  Unrecognized data format,  test file line %d", ss.fLineNum);\r
-            break;\r
-        }\r
-\r
-        // If this line from the test data file actually contained test data,\r
-        //   run the test.\r
-        if (testString.length() > 0) {\r
-            int32_t pos;                 // Break Position in the test string\r
-            int32_t expectedI = 0;       // Index of expected break position in vector of same.\r
-            int32_t expectedPos;         // Expected break position (index into test string)\r
-\r
-            bi->setText(testString);\r
-            pos = bi->first();       // TODO:  break iterators always return a match at pos 0.\r
-            pos = bi->next();        //        Line Break TR says no match at position 0.\r
-                                     //        Resolve.\r
-\r
-            for (; pos != BreakIterator::DONE; ) {\r
-                expectedPos = expectedBreaks.elementAti(expectedI);\r
-                if (pos < expectedPos) {\r
-                    errln("Failure: Test file line %d, unexpected break found at position %d",\r
-                        ss.fLineNum, pos);\r
-                    break;\r
-                }\r
-                if (pos > expectedPos) {\r
-                    errln("Failure: Test file line %d, failed to find break at position %d",\r
-                        ss.fLineNum, expectedPos);\r
-                    break;\r
-                }\r
-                pos = bi->next();\r
-                expectedI++;\r
-            }\r
-        }\r
-\r
-        // If we've hit EOF on the input file, we're done.\r
-        if (tok == -3) {\r
-            break;\r
-        }\r
-\r
-    }\r
-\r
-    fclose(ss.fFile);\r
-    delete bi;\r
-\r
-}\r
-\r
-#if !UCONFIG_NO_REGULAR_EXPRESSIONS\r
-\r
-//---------------------------------------------------------------------------------------\r
-//\r
-//   classs RBBIMonkeyKind\r
-//\r
-//      Monkey Test for Break Iteration\r
-//      Abstract interface class.   Concrete derived classes independently\r
-//      implement the break rules for different iterator types.\r
-//\r
-//      The Monkey Test itself uses doesn't know which type of break iterator it is\r
-//      testing, but works purely in terms of the interface defined here.\r
-//\r
-//---------------------------------------------------------------------------------------\r
-class RBBIMonkeyKind {\r
-public:\r
-    // Return a UVector of UnicodeSets, representing the character classes used\r
-    //   for this type of iterator.\r
-    virtual  UVector  *charClasses() = 0;\r
-\r
-    // Set the test text on which subsequent calls to next() will operate\r
-    virtual  void      setText(const UnicodeString &s) = 0;\r
-\r
-    // Find the next break postion, starting from the prev break position, or from zero.\r
-    // Return -1 after reaching end of string.\r
-    virtual  int32_t   next(int32_t i) = 0;\r
-\r
-    virtual ~RBBIMonkeyKind();\r
-    UErrorCode       deferredStatus;\r
-\r
-\r
-protected:\r
-    RBBIMonkeyKind();\r
-\r
-private:\r
-};\r
-\r
-RBBIMonkeyKind::RBBIMonkeyKind() {\r
-    deferredStatus = U_ZERO_ERROR;\r
-}\r
-\r
-RBBIMonkeyKind::~RBBIMonkeyKind() {\r
-}\r
-\r
-\r
-//----------------------------------------------------------------------------------------\r
-//\r
-//   Random Numbers.  Similar to standard lib rand() and srand()\r
-//                    Not using library to\r
-//                      1.  Get same results on all platforms.\r
-//                      2.  Get access to current seed, to more easily reproduce failures.\r
-//\r
-//---------------------------------------------------------------------------------------\r
-static uint32_t m_seed = 1;\r
-\r
-static uint32_t m_rand()\r
-{\r
-    m_seed = m_seed * 1103515245 + 12345;\r
-    return (uint32_t)(m_seed/65536) % 32768;\r
-}\r
-\r
-\r
-//------------------------------------------------------------------------------------------\r
-//\r
-//   class RBBICharMonkey      Character (Grapheme Cluster) specific implementation\r
-//                             of RBBIMonkeyKind.\r
-//\r
-//------------------------------------------------------------------------------------------\r
-class RBBICharMonkey: public RBBIMonkeyKind {\r
-public:\r
-    RBBICharMonkey();\r
-    virtual          ~RBBICharMonkey();\r
-    virtual  UVector *charClasses();\r
-    virtual  void     setText(const UnicodeString &s);\r
-    virtual  int32_t  next(int32_t i);\r
-private:\r
-    UVector   *fSets;\r
-\r
-    UnicodeSet  *fCRLFSet;\r
-    UnicodeSet  *fControlSet;\r
-    UnicodeSet  *fExtendSet;\r
-    UnicodeSet  *fHangulSet;\r
-    UnicodeSet  *fAnySet;\r
-\r
-    RegexMatcher  *fMatcher;\r
-    const UnicodeString *fText;\r
-};\r
-\r
-\r
-RBBICharMonkey::RBBICharMonkey() {\r
-    UErrorCode  status = U_ZERO_ERROR;\r
-\r
-    fText = NULL;\r
-    fMatcher = new RegexMatcher("\\X", 0, status);     // Pattern to match a grampheme cluster\r
-\r
-    fCRLFSet    = new UnicodeSet("[\\r\\n]", status);\r
-    fControlSet = new UnicodeSet("[[\\p{Zl}\\p{Zp}\\p{Cc}\\p{Cf}]-[\\n]-[\\r]-\\p{Grapheme_Extend}]", status);\r
-    fExtendSet  = new UnicodeSet("[\\p{Grapheme_Extend}]", status);\r
-    fHangulSet  = new UnicodeSet(\r
-        "[\\p{Hangul_Syllable_Type=L}\\p{Hangul_Syllable_Type=L}\\p{Hangul_Syllable_Type=T}"\r
-         "\\p{Hangul_Syllable_Type=LV}\\p{Hangul_Syllable_Type=LVT}]", status);\r
-    fAnySet     = new UnicodeSet("[\\u0000-\\U0010ffff]", status);\r
-\r
-    fSets       = new UVector(status);\r
-    fSets->addElement(fCRLFSet,    status);\r
-    fSets->addElement(fControlSet, status);\r
-    fSets->addElement(fExtendSet,  status);\r
-    fSets->addElement(fHangulSet,  status);\r
-    fSets->addElement(fAnySet,     status);\r
-    if (U_FAILURE(status)) {\r
-        deferredStatus = status;\r
-    }\r
-};\r
-\r
-\r
-void RBBICharMonkey::setText(const UnicodeString &s) {\r
-    fText = &s;\r
-    fMatcher->reset(s);\r
-}\r
-\r
-\r
-int32_t RBBICharMonkey::next(int32_t i) {\r
-    UErrorCode status = U_ZERO_ERROR;\r
-    int32_t  retVal = -1;\r
-\r
-    if (fMatcher->find(i, status)) {\r
-        retVal = fMatcher->end(status);\r
-    }\r
-    if (U_FAILURE(status)){\r
-        retVal = -1;\r
-    }\r
-    return retVal;\r
-}\r
-\r
-\r
-UVector  *RBBICharMonkey::charClasses() {\r
-    return fSets;\r
-}\r
-\r
-\r
-RBBICharMonkey::~RBBICharMonkey() {\r
-    delete fSets;\r
-    delete fCRLFSet;\r
-    delete fControlSet;\r
-    delete fExtendSet;\r
-    delete fHangulSet;\r
-    delete fAnySet;\r
-\r
-    delete fMatcher;\r
-}\r
-\r
-//------------------------------------------------------------------------------------------\r
-//\r
-//   class RBBIWordMonkey      Word Break specific implementation\r
-//                             of RBBIMonkeyKind.\r
-//\r
-//------------------------------------------------------------------------------------------\r
-class RBBIWordMonkey: public RBBIMonkeyKind {\r
-public:\r
-    RBBIWordMonkey();\r
-    virtual          ~RBBIWordMonkey();\r
-    virtual  UVector *charClasses();\r
-    virtual  void     setText(const UnicodeString &s);\r
-    virtual int32_t   next(int32_t i);\r
-private:\r
-    UVector      *fSets;\r
-\r
-    UnicodeSet  *fKatakanaSet;\r
-    UnicodeSet  *fALetterSet;\r
-    UnicodeSet  *fMidLetterSet;\r
-    UnicodeSet  *fMidNumSet;\r
-    UnicodeSet  *fNumericSet;\r
-    UnicodeSet  *fFormatSet;\r
-    UnicodeSet  *fOtherSet;\r
-    UnicodeSet  *fExtendSet;\r
-    UnicodeSet  *fExtendNumLetSet;\r
-\r
-    RegexMatcher  *fMatcher;\r
-\r
-    const UnicodeString  *fText;\r
-\r
-    RegexMatcher         *fGCFMatcher;\r
-    RegexMatcher         *fGCMatcher;\r
-\r
-};\r
-\r
-\r
-RBBIWordMonkey::RBBIWordMonkey() : fGCFMatcher(0),\r
-                                   fGCMatcher(0)\r
-{\r
-    UErrorCode  status = U_ZERO_ERROR;\r
-\r
-    fSets          = new UVector(status);\r
-\r
-    fKatakanaSet   = new UnicodeSet("[\\p{script=KATAKANA}"\r
-        "\\u3031-\\u3035\\u309b\\u309c\\u30a0"\r
-        "\\u30fc\\uff70\\uff9e\\uff9f]", status);\r
-\r
-    const UnicodeString ALetterStr( "[[\\p{Alphabetic}"\r
-                                        "\\u00a0"         // NBSP\r
-                                        "\\u05f3]"        // Hebrew punct Geresh\r
-                                        "-[\\p{Ideographic}]"\r
-                                        "-[\\p{Script=Lao}]"\r
-                                        "-[\\p{Script=Hiragana}]"\r
-                                        "-[\\p{Grapheme_Extend}]]");\r
-    fALetterSet    = new UnicodeSet(ALetterStr, status);\r
-    fALetterSet->removeAll(*fKatakanaSet);\r
-\r
-    fMidLetterSet  = new UnicodeSet("[\\u0027\\u00b7\\u05f4\\u2019\\u2027\\u003a]", status);\r
-    fMidNumSet     = new UnicodeSet("[[\\p{Line_Break=Infix_Numeric}]-[\\u003a]]", status);\r
-    fNumericSet    = new UnicodeSet("[\\p{Nd}\\u066b\\u066c]", status);\r
-    fFormatSet     = new UnicodeSet("[\\p{Format}-[\\u200c\\u200d]]", status);\r
-    fExtendSet     = new UnicodeSet("[\\p{Grapheme_Extend}]", status);\r
-    fExtendNumLetSet = new UnicodeSet("[\\p{Pc}-[\\u30fb\\uff65]]", status);\r
-    fOtherSet      = new UnicodeSet();\r
-    if(U_FAILURE(status)) {\r
-      deferredStatus = status;\r
-      return;\r
-    }\r
-\r
-    fOtherSet->complement();\r
-    fOtherSet->removeAll(*fKatakanaSet);\r
-    fOtherSet->removeAll(*fALetterSet);\r
-    fOtherSet->removeAll(*fMidLetterSet);\r
-    fOtherSet->removeAll(*fMidNumSet);\r
-    fOtherSet->removeAll(*fNumericSet);\r
-    fOtherSet->removeAll(*fExtendNumLetSet);\r
-\r
-    fSets->addElement(fALetterSet,   status);\r
-    fSets->addElement(fKatakanaSet,  status);\r
-    fSets->addElement(fMidLetterSet, status);\r
-    fSets->addElement(fMidNumSet,    status);\r
-    fSets->addElement(fNumericSet,   status);\r
-    fSets->addElement(fFormatSet,    status);\r
-    fSets->addElement(fOtherSet,     status);\r
-    fSets->addElement(fExtendNumLetSet, status);\r
-\r
-\r
-    fGCFMatcher = new RegexMatcher("\\X(?:[\\p{Format}-\\p{Grapheme_Extend}])*", 0, status);\r
-    fGCMatcher  = new RegexMatcher("\\X", 0, status);\r
-\r
-    if (U_FAILURE(status)) {\r
-        deferredStatus = status;\r
-    }\r
-};\r
-\r
-void RBBIWordMonkey::setText(const UnicodeString &s) {\r
-    fText       = &s;\r
-    fGCMatcher->reset(*fText);\r
-    fGCFMatcher->reset(*fText);\r
-}\r
-\r
-\r
-int32_t RBBIWordMonkey::next(int32_t prevPos) {\r
-    UErrorCode status = U_ZERO_ERROR;\r
-\r
-    int    p0, p1, p2, p3;    // Indices of the significant code points around the \r
-                              //   break position being tested.  The candidate break\r
-                              //   location is before p2.\r
-\r
-    int     breakPos = -1;\r
-\r
-    UChar32 c0, c1, c2, c3;   // The code points at p0, p1, p2 & p3.\r
-\r
-    // Prev break at end of string.  return DONE.\r
-    if (prevPos >= fText->length()) {\r
-        return -1;\r
-    }\r
-    p0 = p1 = p2 = p3 = prevPos;\r
-    c3 =  fText->char32At(prevPos);\r
-    c0 = c1 = c2 = 0;\r
-\r
-\r
-    // Format char after prev break?  Special case, see last Note for Word Boundaries TR.\r
-    //    break immdiately after the format char.\r
-    if (fFormatSet->contains(c3)) {\r
-        breakPos = fText->moveIndex32(prevPos, 1);\r
-        return breakPos;\r
-    }\r
-\r
-\r
-    // Loop runs once per "significant" character position in the input text.\r
-    for (;;) {\r
-        // Move all of the positions forward in the input string.\r
-        p0 = p1;  c0 = c1;\r
-        p1 = p2;  c1 = c2;\r
-        p2 = p3;  c2 = c3;\r
-        // Advancd p3 by    (GC Format*)   Rules 3, 4\r
-        status = U_ZERO_ERROR;\r
-        if  (fGCFMatcher->find(p3, status) == FALSE) {\r
-            p3 = fText->length();\r
-            c3 = 0;\r
-        } else {\r
-            p3 = fGCFMatcher->end(0, status);\r
-            U_ASSERT(U_SUCCESS(status));\r
-            c3 = fText->char32At(p3);\r
-        }\r
-        \r
-        if (p1 == p2) {\r
-            // Still warming up the loop.  (won't work with zero length strings, but we don't care)\r
-            continue;\r
-        }\r
-        if (p2 == fText->length()) {\r
-            // Reached end of string.  Always a break position.\r
-            break;\r
-        }\r
-\r
-        // Rule (5).   ALetter x ALetter\r
-        if (fALetterSet->contains(c1) &&\r
-            fALetterSet->contains(c2))  {\r
-            continue;\r
-        }\r
-\r
-        // Rule (6)  ALetter  x  (MidLetter | MidNumLet) ALetter\r
-        //\r
-        //    Also incorporates rule 7 by skipping pos ahead to position of the\r
-        //    terminating ALetter.\r
-        if ( fALetterSet->contains(c1)   &&\r
-             fMidLetterSet->contains(c2) &&\r
-             fALetterSet->contains(c3)) {\r
-            continue;\r
-        }\r
-\r
-\r
-        // Rule (7)  ALetter (MidLetter | MidNumLet)  x  ALetter\r
-        if (fALetterSet->contains(c0) &&\r
-            (fMidLetterSet->contains(c1)  ) &&\r
-            fALetterSet->contains(c2)) {\r
-            continue;\r
-        }\r
-\r
-        // Rule (8)    Numeric x Numeric\r
-        if (fNumericSet->contains(c1) &&\r
-            fNumericSet->contains(c2))  {\r
-            continue;\r
-        }\r
-\r
-        // Rule (9)    ALetter x Numeric\r
-        if (fALetterSet->contains(c1) &&\r
-            fNumericSet->contains(c2))  {\r
-            continue;\r
-        }\r
-\r
-        // Rule (10)    Numeric x ALetter\r
-        if (fNumericSet->contains(c1) &&\r
-            fALetterSet->contains(c2))  {\r
-            continue;\r
-        }\r
-\r
-        // Rule (11)   Numeric (MidNum | MidNumLet)  x  Numeric\r
-        if ( fNumericSet->contains(c0) &&\r
-             fMidNumSet->contains(c1)  && \r
-            fNumericSet->contains(c2)) {\r
-            continue;\r
-        }\r
-\r
-        // Rule (12)  Numeric x (MidNum | MidNumLet) Numeric\r
-        if (fNumericSet->contains(c1) &&\r
-            fMidNumSet->contains(c2)  &&\r
-            fNumericSet->contains(c3)) {\r
-            continue;\r
-        }\r
-        \r
-        // Rule (13)  Katakana x Katakana\r
-        if (fKatakanaSet->contains(c1) &&\r
-            fKatakanaSet->contains(c2))  {\r
-            continue;\r
-        }\r
-\r
-        // Rule 13a\r
-        if ((fALetterSet->contains(c1) || fNumericSet->contains(c1) ||\r
-             fKatakanaSet->contains(c1) || fExtendNumLetSet->contains(c1)) &&\r
-             fExtendNumLetSet->contains(c2)) {\r
-                continue;\r
-             }\r
-\r
-        // Rule 13b\r
-        if (fExtendNumLetSet->contains(c1) && \r
-                (fALetterSet->contains(c2) || fNumericSet->contains(c2) ||\r
-                fKatakanaSet->contains(c2)))  {\r
-                continue;\r
-             }\r
-\r
-\r
-        // Rule 14.  Break found here.\r
-        break;\r
-    }\r
-\r
-\r
-    //  Rule 4 fixup,  back up before any trailing\r
-    //                 format characters at the end of the word.\r
-    breakPos = p2;\r
-    status = U_ZERO_ERROR;\r
-    if  (fGCMatcher->find(p1, status)) {\r
-        breakPos = fGCMatcher->end(0, status);\r
-        U_ASSERT(U_SUCCESS(status));\r
-    }\r
-    return breakPos;\r
-}\r
-\r
-\r
-UVector  *RBBIWordMonkey::charClasses() {\r
-    return fSets;\r
-}\r
-\r
-\r
-RBBIWordMonkey::~RBBIWordMonkey() {\r
-    delete fSets;\r
-    delete fKatakanaSet;\r
-    delete fALetterSet;\r
-    delete fMidLetterSet;\r
-    delete fMidNumSet;\r
-    delete fNumericSet;\r
-    delete fFormatSet;\r
-    delete fExtendSet;\r
-    delete fOtherSet;\r
-\r
-    delete fGCFMatcher;\r
-    delete fGCMatcher;\r
-}\r
-\r
-\r
-\r
-\r
-//-------------------------------------------------------------------------------------------\r
-//\r
-//  RBBILineMonkey\r
-//\r
-//-------------------------------------------------------------------------------------------\r
-\r
-class RBBILineMonkey: public RBBIMonkeyKind {\r
-public:\r
-    RBBILineMonkey();\r
-    virtual          ~RBBILineMonkey();\r
-    virtual  UVector *charClasses();\r
-    virtual  void     setText(const UnicodeString &s);\r
-    virtual  int32_t  next(int32_t i);\r
-    virtual  void     rule67Adjust(int32_t pos, UChar32 *posChar, int32_t *nextPos, UChar32 *nextChar);\r
-private:\r
-    UVector      *fSets;\r
-\r
-    UnicodeSet  *fBK;\r
-    UnicodeSet  *fCR;\r
-    UnicodeSet  *fLF;\r
-    UnicodeSet  *fCM;\r
-    UnicodeSet  *fNL;\r
-    UnicodeSet  *fSG;\r
-    UnicodeSet  *fWJ;\r
-    UnicodeSet  *fZW;\r
-    UnicodeSet  *fGL;\r
-    UnicodeSet  *fCB;\r
-    UnicodeSet  *fSP;\r
-    UnicodeSet  *fB2;\r
-    UnicodeSet  *fBA;\r
-    UnicodeSet  *fBB;\r
-    UnicodeSet  *fHY;\r
-    UnicodeSet  *fCL;\r
-    UnicodeSet  *fEX;\r
-    UnicodeSet  *fIN;\r
-    UnicodeSet  *fNS;\r
-    UnicodeSet  *fOP;\r
-    UnicodeSet  *fQU;\r
-    UnicodeSet  *fIS;\r
-    UnicodeSet  *fNU;\r
-    UnicodeSet  *fPO;\r
-    UnicodeSet  *fPR;\r
-    UnicodeSet  *fSY;\r
-    UnicodeSet  *fAI;\r
-    UnicodeSet  *fAL;\r
-    UnicodeSet  *fID;\r
-    UnicodeSet  *fSA;\r
-    UnicodeSet  *fXX;\r
-\r
-    BreakIterator  *fCharBI;\r
-\r
-    const UnicodeString  *fText;\r
-    int32_t              *fOrigPositions;\r
-\r
-    RegexMatcher         *fNumberMatcher;\r
-    RegexMatcher         *fLB10Matcher;\r
-    RegexMatcher         *fLB11Matcher;\r
-};\r
-\r
-\r
-RBBILineMonkey::RBBILineMonkey() \r
-{\r
-    UErrorCode  status = U_ZERO_ERROR;\r
-\r
-    fSets    = new UVector(status);\r
-\r
-    fBK    = new UnicodeSet("[\\p{Line_Break=BK}]", status);\r
-    fCR    = new UnicodeSet("[\\p{Line_break=CR}]", status);\r
-    fLF    = new UnicodeSet("[\\p{Line_break=LF}]", status);\r
-    fCM    = new UnicodeSet("[\\p{Line_break=CM}]", status);\r
-    fNL    = new UnicodeSet("[\\p{Line_break=NL}]", status);\r
-    fWJ    = new UnicodeSet("[\\p{Line_break=WJ}]", status);\r
-    fZW    = new UnicodeSet("[\\p{Line_break=ZW}]", status);\r
-    fGL    = new UnicodeSet("[\\p{Line_break=GL}]", status);\r
-    fCB    = new UnicodeSet("[\\p{Line_break=CB}]", status);\r
-    fSP    = new UnicodeSet("[\\p{Line_break=SP}]", status);\r
-    fB2    = new UnicodeSet("[\\p{Line_break=B2}]", status);\r
-    fBA    = new UnicodeSet("[\\p{Line_break=BA}]", status);\r
-    fBB    = new UnicodeSet("[\\p{Line_break=BB}]", status);\r
-    fHY    = new UnicodeSet("[\\p{Line_break=HY}]", status);\r
-    fCL    = new UnicodeSet("[\\p{Line_break=CL}]", status);\r
-    fEX    = new UnicodeSet("[\\p{Line_break=EX}]", status);\r
-    fIN    = new UnicodeSet("[\\p{Line_break=IN}]", status);\r
-    fNS    = new UnicodeSet("[\\p{Line_break=NS}]", status);\r
-    fOP    = new UnicodeSet("[\\p{Line_break=OP}]", status);\r
-    fQU    = new UnicodeSet("[\\p{Line_break=QU}]", status);\r
-    fIS    = new UnicodeSet("[\\p{Line_break=IS}]", status);\r
-    fNU    = new UnicodeSet("[\\p{Line_break=NU}]", status);\r
-    fPO    = new UnicodeSet("[\\p{Line_break=PO}]", status);\r
-    fPR    = new UnicodeSet("[\\p{Line_break=PR}]", status);\r
-    fSY    = new UnicodeSet("[\\p{Line_break=SY}]", status);\r
-    fAI    = new UnicodeSet("[\\p{Line_break=AI}]", status);\r
-    fAL    = new UnicodeSet("[\\p{Line_break=AL}]", status);\r
-    fID    = new UnicodeSet("[\\p{Line_break=ID}]", status);\r
-    fSA    = new UnicodeSet("[\\p{Line_break=SA}]", status);\r
-    fXX    = new UnicodeSet("[\\p{Line_break=XX}]", status);\r
-\r
-    fAL->addAll(*fXX);     // Default behavior for XX is identical to AL\r
-    fAL->addAll(*fAI);     // Default behavior for AI is identical to AL\r
-    fAL->addAll(*fSA);     // Default behavior for SA is XX, which defaults to AL\r
-\r
-\r
-\r
-    fSets->addElement(fBK, status);\r
-    fSets->addElement(fCR, status);\r
-    fSets->addElement(fLF, status);\r
-    fSets->addElement(fCM, status);\r
-    fSets->addElement(fNL, status);\r
-    fSets->addElement(fWJ, status);\r
-    fSets->addElement(fZW, status);\r
-    fSets->addElement(fGL, status);\r
-    fSets->addElement(fCB, status);\r
-    fSets->addElement(fSP, status);\r
-    fSets->addElement(fB2, status);\r
-    fSets->addElement(fBA, status);\r
-    fSets->addElement(fBB, status);\r
-    fSets->addElement(fHY, status);\r
-    fSets->addElement(fCL, status);\r
-    fSets->addElement(fEX, status);\r
-    fSets->addElement(fIN, status);\r
-    fSets->addElement(fNS, status);\r
-    fSets->addElement(fOP, status);\r
-    fSets->addElement(fQU, status);\r
-    fSets->addElement(fIS, status);\r
-    fSets->addElement(fNU, status);\r
-    fSets->addElement(fPO, status);\r
-    fSets->addElement(fPR, status);\r
-    fSets->addElement(fSY, status);\r
-    fSets->addElement(fAI, status);\r
-    fSets->addElement(fAL, status);\r
-    fSets->addElement(fID, status);\r
-    fSets->addElement(fWJ, status);\r
-    fSets->addElement(fSA, status);\r
-    // fSets->addElement(fXX, status);\r
-\r
-\r
-\r
-    fNumberMatcher = new RegexMatcher(\r
-        "(\\p{Line_Break=PR}\\p{Line_Break=CM}*)?"\r
-        "((\\p{Line_Break=OP}|\\p{Line_Break=HY})\\p{Line_Break=CM}*)?"\r
-        "\\p{Line_Break=NU}\\p{Line_Break=CM}*"\r
-        "((\\p{Line_Break=NU}|\\p{Line_Break=IS}|\\p{Line_Break=SY})\\p{Line_Break=CM}*)*"\r
-        "(\\p{Line_Break=CL}\\p{Line_Break=CM}*)?"\r
-        "(\\p{Line_Break=PO}\\p{Line_Break=CM}*)?", \r
-        0, status);\r
-\r
-    fLB10Matcher = new RegexMatcher(\r
-        "\\p{Line_Break=QU}\\p{Line_Break=CM}*"\r
-        "\\p{Line_Break=SP}*"\r
-        "(\\p{Line_Break=OP})\\p{Line_Break=CM}*", \r
-        0, status);\r
-\r
-    fLB11Matcher = new RegexMatcher(\r
-        "\\p{Line_Break=CL}\\p{Line_Break=CM}*"\r
-        "\\p{Line_Break=SP}*"\r
-        "(\\p{Line_Break=NS})\\p{Line_Break=CM}*", \r
-        0, status);\r
-\r
-    fCharBI = BreakIterator::createCharacterInstance(Locale::getEnglish(), status);\r
-\r
-    if (U_FAILURE(status)) {\r
-        deferredStatus = status;\r
-    }\r
-};\r
-\r
-\r
-void RBBILineMonkey::setText(const UnicodeString &s) {\r
-    fText       = &s;\r
-    fCharBI->setText(s);\r
-    fNumberMatcher->reset(s);\r
-}\r
-\r
-//\r
-//  rule67Adjust\r
-//     Line Break TR rules 6 and 7 implementation.\r
-//     This deals with combining marks, Hangul Syllables, and other sequences that\r
-//     that must be treated as if they were something other than what they actually are.\r
-//\r
-//     This is factored out into a separate function because it must be applied twice for\r
-//     each potential break, once to the chars before the position being checked, then\r
-//     again to the text following the possible break.\r
-//\r
-void RBBILineMonkey::rule67Adjust(int32_t pos, UChar32 *posChar, int32_t *nextPos, UChar32 *nextChar) {\r
-    if (pos == -1) {\r
-        // Invalid initial position.  Happens during the warmup iteration of the \r
-        //   main loop in next().\r
-        return;\r
-    }\r
-\r
-    int32_t  nPos = *nextPos;\r
-    \r
-    // LB 6  Treat Korean Syllables as a single unit\r
-    int32_t  hangultype = u_getIntPropertyValue(*posChar, UCHAR_HANGUL_SYLLABLE_TYPE);\r
-    if (hangultype != U_HST_NOT_APPLICABLE) {\r
-        nPos = fCharBI->following(pos);   // Advance by grapheme cluster, which\r
-                                          //  contains the logic to locate Hangul syllables.\r
-        // Grapheme Cluster Ugliness: some Grapheme_Extend chars, which are absorbed\r
-        //   into a grapheme cluster, are NOT Line Break CM. (Some are GL, for example.)\r
-        //   We don't want consume any of these.  The Approach is\r
-        //      1.  Back nPos up, undoing the consumption of any\r
-        //          Grapheme_Extend chars by the char break iterator.\r
-        //      2.  Let the LB 7b logic below reconsume any Line Break CM chars.\r
-        for (;;) {\r
-            nPos = fText->moveIndex32(nPos, -1);\r
-            UChar32 possiblyExtendChar = fText->char32At(nPos);\r
-            if (fID->contains(possiblyExtendChar)) {\r
-                // We hit into the Hangul Syllable itself, class is ID.\r
-                nPos = fText->moveIndex32(nPos, +1);\r
-                break;\r
-            }\r
-        }\r
-    }\r
-    \r
-    // LB 7b  Keep combining sequences together.\r
-    //  advance over any CM class chars.  (Line Break CM class is different from\r
-    //    grapheme cluster CM, so we need to do this even for HangulSyllables.\r
-    //    Line Break may eat additional stuff as combining, beyond what graphem cluster did.\r
-    if (!(fBK->contains(*posChar) || fZW->contains(*posChar) || *posChar==0x0a \r
-        || *posChar==0x0d || *posChar==0x85)) {\r
-        for (;;) {\r
-            *nextChar = fText->char32At(nPos);\r
-            if (!fCM->contains(*nextChar)) {\r
-                break;\r
-            }\r
-            nPos = fText->moveIndex32(nPos, 1);\r
-        }\r
-    }\r
-    \r
-    \r
-    // LB 7a In a SP CM* sequence, treat the SP as an ID\r
-    if (nPos != *nextPos && fSP->contains(*posChar)) {\r
-        *posChar = 0x4e00;   // 0x4e00 is a CJK Ideograph, linebreak type is ID.\r
-    }\r
-    \r
-    // LB 7b Treat X CM* as if it were x.\r
-    //       No explicit action required.  \r
-    \r
-    // LB 7c  Treat any remaining combining mark as AL\r
-    if (fCM->contains(*posChar)) {\r
-        *posChar = 0x41;   // thisChar = 'A';\r
-    }\r
-\r
-    // Push the updated nextPos and nextChar back to our caller.\r
-    // This only makes a difference if posChar got bigger, by slurping up a\r
-    // combining sequence or Hangul syllable.\r
-    *nextPos  = nPos;\r
-    *nextChar = fText->char32At(nPos);\r
-}\r
-\r
-\r
-\r
-int32_t RBBILineMonkey::next(int32_t startPos) {\r
-    UErrorCode status = U_ZERO_ERROR;\r
-    int32_t    pos;       //  Index of the char following a potential break position\r
-    UChar32    thisChar;  //  Character at above position "pos"\r
-\r
-    int32_t    prevPos;   //  Index of the char preceding a potential break position\r
-    UChar32    prevChar;  //  Character at above position.  Note that prevChar\r
-                          //   and thisChar may not be adjacent because combining\r
-                          //   characters between them will be ignored.\r
-\r
-    int32_t    nextPos;   //  Index of the next character following pos.\r
-                          //     Usually skips over combining marks.\r
-    int32_t    nextCPPos; //  Index of the code point following "pos."\r
-                          //     May point to a combining mark.\r
-    int32_t    tPos;      //  temp value.\r
-    UChar32    c;\r
-\r
-    if (startPos >= fText->length()) {\r
-        return -1;\r
-    }\r
-\r
-\r
-    // Initial values for loop.  Loop will run the first time without finding breaks,\r
-    //                           while the invalid values shift out and the "this" and\r
-    //                           "prev" positions are filled in with good values.\r
-    pos      = prevPos   = -1;    // Invalid value, serves as flag for initial loop iteration.\r
-    thisChar = prevChar  = 0;\r
-    nextPos  = nextCPPos = startPos;\r
-\r
-\r
-    // Loop runs once per position in the test text, until a break position\r
-    //  is found.\r
-    for (;;) {\r
-        prevPos   = pos;\r
-        prevChar  = thisChar;\r
-\r
-        pos       = nextPos;\r
-        thisChar  = fText->char32At(pos);\r
-\r
-        nextCPPos = fText->moveIndex32(pos, 1);\r
-        nextPos   = nextCPPos;\r
-\r
-        // Break at end of text.\r
-        if (pos >= fText->length()) {\r
-            break;\r
-        }\r
-\r
-        // LB 3a  Always break after hard line breaks,\r
-        if (fBK->contains(prevChar)) {\r
-            break;\r
-        }\r
-\r
-        // LB 3b  Break after CR, LF, NL, but not inside CR LF\r
-        if (prevChar == 0x0d && thisChar == 0x0a) {\r
-            continue;\r
-        }\r
-        if (prevChar == 0x0d ||\r
-            prevChar == 0x0a ||\r
-            prevChar == 0x85)  {\r
-            break;\r
-        }\r
-\r
-        // LB 3c  Don't break before hard line breaks\r
-        if (thisChar == 0x0d || thisChar == 0x0a || thisChar == 0x85 ||\r
-            fBK->contains(thisChar)) {\r
-                continue;\r
-        }\r
-\r
-        // LB 10    QU SP* x OP\r
-        if (prevPos >= 0) {\r
-            UnicodeString  subStr10(*fText, prevPos);\r
-            fLB10Matcher->reset(subStr10);\r
-            status = U_ZERO_ERROR;\r
-            if (fLB10Matcher->lookingAt(status)) {  //   /QU CM* SP* (OP) CM*/;\r
-                // TODO:  Check status codes\r
-                pos      = prevPos + fLB10Matcher->start(1, status);\r
-                nextPos  = prevPos + fLB10Matcher->end(0, status);\r
-                thisChar = fText->char32At(pos);\r
-                continue;\r
-            }\r
-        }\r
-\r
-        // LB 11   CL SP* x NS\r
-        if (prevPos >= 0) {\r
-            UnicodeString  subStr11(*fText, prevPos);\r
-            fLB11Matcher->reset(subStr11);\r
-            status = U_ZERO_ERROR;\r
-            if (fLB11Matcher->lookingAt(status)) {  //   /QU CM* SP* (OP) CM*/;\r
-                // TODO:  Check status codes\r
-                pos      = prevPos + fLB11Matcher->start(1, status);\r
-                nextPos  = prevPos + fLB11Matcher->end(0, status);\r
-                thisChar = fText->char32At(pos);\r
-                continue;\r
-            }\r
-        }\r
-\r
-        // LB 4  Don't break before spaces or zero-width space.\r
-        if (fSP->contains(thisChar)) {\r
-            continue;\r
-        }\r
-\r
-        if (fZW->contains(thisChar)) {\r
-            continue;\r
-        }\r
-\r
-        // LB 5  Break after zero width space\r
-        if (fZW->contains(prevChar)) {\r
-            break;\r
-        }\r
-\r
-        // LB 6, LB 7\r
-        /*int32_t oldpos = pos;*/\r
-        rule67Adjust(prevPos, &prevChar, &pos,     &thisChar);\r
-        \r
-        nextCPPos = fText->moveIndex32(pos, 1);\r
-        nextPos   = nextCPPos;\r
-        c = fText->char32At(nextPos);\r
-        // another percularity of LB 4 - Dont break before space\r
-        if (fSP->contains(thisChar)) {\r
-            continue;\r
-        }\r
-        rule67Adjust(pos,     &thisChar, &nextPos, &c);\r
-\r
-        // If the loop is still warming up - if we haven't shifted the initial\r
-        //   -1 positions out of prevPos yet - loop back to advance the\r
-        //    position in the input without any further looking for breaks.\r
-        if (prevPos == -1) {\r
-            continue;\r
-        }\r
-\r
-        // Re-apply rules 3c, 4 because these could be affected by having\r
-        //                      a new thisChar from doing rule 6 or 7.\r
-        if (thisChar == 0x0d || thisChar == 0x0a || thisChar == 0x85 ||   // 3c\r
-            fBK->contains(thisChar)) {\r
-                continue;\r
-        }\r
-        if (fSP->contains(thisChar)) {    // LB 4\r
-            continue;\r
-        }\r
-        if (fZW->contains(thisChar)) {    // LB 4\r
-            continue;\r
-        }\r
-\r
-\r
-        // LB 8  Don't break before closings.\r
-        //       NU x CL  and NU x IS are not matched here so that they will\r
-        //       fall into LB 17 and the more general number regular expression.\r
-        //\r
-        if (!fNU->contains(prevChar) && fCL->contains(thisChar) ||\r
-                                        fEX->contains(thisChar) ||\r
-            !fNU->contains(prevChar) && fIS->contains(thisChar) ||\r
-            !fNU->contains(prevChar) && fSY->contains(thisChar))    {\r
-            continue;\r
-        }\r
-\r
-        // LB 9  Don't break after OP SP*\r
-        //       Scan backwards, checking for this sequence.\r
-        //       The OP char could include combining marks, so we acually check for\r
-        //           OP CM* SP*\r
-        //       Another Twist: The Rule 67 fixes may have changed a CP CM\r
-        //       sequence into a ID char, so before scanning back through spaces,\r
-        //       verify that prevChar is indeed a space.  The prevChar variable\r
-        //       may differ from fText[prevPos]\r
-        tPos = prevPos;\r
-        if (fSP->contains(prevChar)) {\r
-            while (tPos > 0 && fSP->contains(fText->char32At(tPos))) {\r
-                tPos=fText->moveIndex32(tPos, -1);\r
-            }\r
-        }\r
-        while (tPos > 0 && fCM->contains(fText->char32At(tPos))) {\r
-            tPos=fText->moveIndex32(tPos, -1);\r
-        }\r
-        if (fOP->contains(fText->char32At(tPos))) {\r
-            continue;\r
-        }\r
-\r
-\r
-        // LB 11a        B2 x B2\r
-        if (fB2->contains(thisChar) && fB2->contains(prevChar)) {\r
-            continue;\r
-        }\r
-\r
-        // LB 11b   \r
-        //    x  GL\r
-        //    GL  x\r
-        if (fGL->contains(thisChar) || fGL->contains(prevChar)) {\r
-            continue;\r
-        }\r
-        if (fWJ->contains(thisChar) || fWJ->contains(prevChar)) {\r
-            continue;\r
-        }\r
-\r
-        // LB 12    break after space\r
-        if (fSP->contains(prevChar)) {\r
-            break;\r
-        }\r
-\r
-        // LB 14\r
-        //    x   QU\r
-        //    QU  x\r
-        if (fQU->contains(thisChar) || fQU->contains(prevChar)) {\r
-            continue;\r
-        }\r
-\r
-        // LB 14a  Break around a CB\r
-        if (fCB->contains(thisChar) || fCB->contains(prevChar)) {\r
-            break;\r
-        }\r
-\r
-        // LB 15 \r
-        if (fBA->contains(thisChar) ||\r
-            fHY->contains(thisChar) ||\r
-            fNS->contains(thisChar) ||\r
-            fBB->contains(prevChar) )   {\r
-            continue;\r
-        }\r
-\r
-        // LB 16\r
-        if (fAL->contains(prevChar) && fIN->contains(thisChar) ||\r
-            fID->contains(prevChar) && fIN->contains(thisChar) ||\r
-            fIN->contains(prevChar) && fIN->contains(thisChar) ||\r
-            fNU->contains(prevChar) && fIN->contains(thisChar) )   {\r
-            continue; \r
-        }\r
-\r
-\r
-        // LB 17    ID x PO    (Note:  Leading CM behaves like ID)\r
-        //          AL x NU\r
-        //          NU x AL\r
-        if (fID->contains(prevChar) && fPO->contains(thisChar) ||\r
-            fCM->contains(prevChar) && fPO->contains(thisChar) || \r
-            fAL->contains(prevChar) && fNU->contains(thisChar) ||\r
-            fNU->contains(prevChar) && fAL->contains(thisChar) )   {\r
-            continue; \r
-        }\r
-\r
-        // LB 18    Numbers\r
-        UnicodeString  subStr18(*fText, prevPos);\r
-        fNumberMatcher->reset(subStr18);\r
-        if (fNumberMatcher->lookingAt(status)) {\r
-            // TODO:  Check status codes\r
-            // Matched a number.  But could have been just a single digit, which would\r
-            //    not represent a "no break here" between prevChar and thisChar\r
-            int32_t numEndIdx = prevPos + fNumberMatcher->end(status);  // idx of first char following num\r
-            if (numEndIdx > pos) {\r
-                // Number match includes at least our two chars being checked\r
-                if (numEndIdx > nextPos) {\r
-                    // Number match includes additional chars.  Update pos and nextPos\r
-                    //   so that next loop iteration will continue at the end of the number,\r
-                    //   checking for breaks between last char in number & whatever follows.\r
-                    nextPos = numEndIdx;\r
-                    pos = fCharBI->preceding(numEndIdx); \r
-                    thisChar = fText->char32At(pos);\r
-                    while (fCM->contains(thisChar)) {\r
-                        pos = fCharBI->preceding(pos);\r
-                        thisChar = fText->char32At(pos);\r
-                    }\r
-                }\r
-                continue;\r
-            }\r
-        }\r
-\r
-        if (fPR->contains(prevChar) && fAL->contains(thisChar)) {\r
-            continue;\r
-        }\r
-\r
-        if (fPR->contains(prevChar) && fID->contains(thisChar)) {\r
-            continue;\r
-        }\r
-\r
-        // LB 18b\r
-        if (fHY->contains(prevChar) || fBB->contains(thisChar)) {\r
-            break;\r
-        }\r
-\r
-        // LB 19\r
-        if (fAL->contains(prevChar) && fAL->contains(thisChar)) {\r
-            continue;\r
-        }\r
-\r
-        // LB 19b\r
-        if (fIS->contains(prevChar) && fAL->contains(thisChar)) {\r
-            continue;\r
-        }\r
-\r
-        // LB 20    Break everywhere else\r
-        break;\r
-            \r
-    }\r
-    \r
-    return pos;\r
-}\r
-\r
-\r
-UVector  *RBBILineMonkey::charClasses() {\r
-    return fSets;\r
-}\r
-\r
-\r
-RBBILineMonkey::~RBBILineMonkey() {\r
-    delete fSets;\r
-\r
-    delete fBK;\r
-    delete fCR;\r
-    delete fLF;\r
-    delete fCM;\r
-    delete fNL;\r
-    delete fWJ;\r
-    delete fZW;\r
-    delete fGL;\r
-    delete fCB;\r
-    delete fSP;\r
-    delete fB2;\r
-    delete fBA;\r
-    delete fBB;\r
-    delete fHY;\r
-    delete fCL;\r
-    delete fEX;\r
-    delete fIN;\r
-    delete fNS;\r
-    delete fOP;\r
-    delete fQU;\r
-    delete fIS;\r
-    delete fNU;\r
-    delete fPO;\r
-    delete fPR;\r
-    delete fSY;\r
-    delete fAI;\r
-    delete fAL;\r
-    delete fID;\r
-    delete fSA;\r
-    delete fXX;\r
-\r
-    delete fCharBI;\r
-    delete fNumberMatcher;\r
-    delete fLB10Matcher;\r
-    delete fLB11Matcher;\r
-}\r
-\r
-\r
-//-------------------------------------------------------------------------------------------\r
-//\r
-//   TestMonkey\r
-//\r
-//     params\r
-//       seed=nnnnn        Random number starting seed.\r
-//                         Setting the seed allows errors to be reproduced.\r
-//       loop=nnn          Looping count.  Controls running time.\r
-//                         -1:  run forever.\r
-//                          0 or greater:  run length.\r
-//\r
-//       type = char | word | line | sent | title\r
-//\r
-//-------------------------------------------------------------------------------------------\r
-\r
-static int32_t  getIntParam(UnicodeString name, UnicodeString &params, int32_t defaultVal) {\r
-    int32_t val = defaultVal;\r
-    name.append(" *= *(-?\\d+)");\r
-    UErrorCode status = U_ZERO_ERROR;\r
-    RegexMatcher m(name, params, 0, status);\r
-    if (m.find()) {\r
-        // The param exists.  Convert the string to an int.\r
-        char valString[100];\r
-        int32_t paramLength = m.end(1, status) - m.start(1, status);\r
-        if (paramLength >= (int32_t)(sizeof(valString)-1)) {\r
-            paramLength = (int32_t)(sizeof(valString)-2);\r
-        }\r
-        params.extract(m.start(1, status), paramLength, valString, sizeof(valString));\r
-        val = strtol(valString,  NULL, 10);\r
-\r
-        // Delete this parameter from the params string.\r
-        m.reset();\r
-        params = m.replaceFirst("", status);\r
-    }\r
-    U_ASSERT(U_SUCCESS(status));\r
-    return val;\r
-}\r
-#endif\r
-\r
-static void testBreakBoundPreceding(RBBITest *test, UnicodeString ustr, \r
-                                    BreakIterator *bi,\r
-                                    int expected[], \r
-                                    int expectedcount)\r
-{\r
-    int count = 0;\r
-    int i = 0;\r
-    int forward[50];\r
-    bi->setText(ustr);\r
-    for (i = bi->first(); i != BreakIterator::DONE; i = bi->next()) {\r
-        forward[count] = i;\r
-        if (count < expectedcount && expected[count] != i) {\r
-            test->errln("break forward test failed: expected %d but got %d", \r
-                        expected[count], i);\r
-            break;\r
-        }\r
-        count ++;\r
-    }\r
-    if (count != expectedcount) {\r
-        printStringBreaks(ustr, expected, expectedcount);\r
-        test->errln("break test failed: missed %d match", \r
-                    expectedcount - count);\r
-        return;\r
-    }\r
-    // testing boundaries\r
-    for (i = 1; i < expectedcount; i ++) {\r
-        int j = expected[i - 1];\r
-        if (!bi->isBoundary(j)) {\r
-            printStringBreaks(ustr, expected, expectedcount);\r
-            test->errln("Expected boundary at position %d", j);\r
-            return;\r
-        }\r
-        for (j = expected[i - 1] + 1; j < expected[i]; j ++) {\r
-            if (bi->isBoundary(j)) {\r
-                printStringBreaks(ustr, expected, expectedcount);\r
-                test->errln("Not expecting boundary at position %d", j);\r
-                return;\r
-            }\r
-        }\r
-    }\r
-\r
-    for (i = bi->last(); i != BreakIterator::DONE; i = bi->previous()) {\r
-        count --;\r
-        if (forward[count] != i) {\r
-            test->errln("happy break test reverse failed: expected %d but got %d", \r
-                        forward[count], i);\r
-            break;\r
-        }\r
-    }\r
-    if (count != 0) {\r
-        printStringBreaks(ustr, expected, expectedcount);\r
-        test->errln("happy break test failed: missed a match");\r
-        return;\r
-    }\r
-\r
-    // testing preceding\r
-    for (i = 0; i < expectedcount - 1; i ++) {\r
-        int j = expected[i] + 1;\r
-        for (; j <= expected[i + 1]; j ++) {\r
-            if (bi->preceding(j) != expected[i]) {\r
-                printStringBreaks(ustr, expected, expectedcount);\r
-                test->errln("Not expecting backwards boundary at position %d", j);\r
-                return;\r
-            }\r
-        }\r
-    }    \r
-}\r
-\r
-void RBBITest::TestWordBreaks(void)\r
-{\r
-#if !UCONFIG_NO_REGULAR_EXPRESSIONS\r
-\r
-    // <data><>\u1d4a\u206e<?>\u0603\U0001d7ff<>\u2019<></data>\r
-    Locale        locale("en");\r
-    UErrorCode    status = U_ZERO_ERROR;\r
-    // BreakIterator  *bi = BreakIterator::createCharacterInstance(locale, status);\r
-    BreakIterator *bi = BreakIterator::createWordInstance(locale, status);\r
-    UChar         str[300]; \r
-    static const char *strlist[] = \r
-    {\r
-    "\\U000e0032\\u0097\\u0f94\\uc2d8\\u05f4\\U000e0031\\u060d",\r
-    "\\U000e0037\\u4666\\u1202\\u003a\\U000e0031\\u064d\\u0bea\\u591c\\U000e0040\\u003b",\r
-    "\\u0589\\u3e99\\U0001d7f3\\U000e0074\\u1810\\u200e\\U000e004b\\u179c\\u0027\\U000e0061\\u003a",\r
-    "\\u398c\\U000104a5\\U0001d173\\u102d\\u002e\\uca3b\\u002e\\u002c\\u5622",\r
-    "\\u90ca\\u3588\\u009c\\u0953\\u194b",\r
-    "\\u200e\\U000e0072\\u0a4b\\U000e003f\\ufd2b\\u2027\\u002e\\u002e",\r
-    "\\u0602\\u2019\\ua191\\U000e0063\\u0a4c\\u003a\\ub4b5\\u003a\\u827f\\u002e",\r
-    "\\u7f1f\\uc634\\u65f8\\u0944\\u04f2\\uacdf\\u1f9c\\u05f4\\u002e",\r
-    "\\U000e0042\\u002e\\u0fb8\\u09ef\\u0ed1\\u2044",\r
-    "\\u003b\\u024a\\u102e\\U000e0071\\u0600",\r
-    "\\u2027\\U000e0067\\u0a47\\u00b7",\r
-    "\\u1fcd\\u002c\\u07aa\\u0027\\u11b0",\r
-    "\\u002c\\U000e003c\\U0001d7f4\\u003a\\u0c6f\\u0027",\r
-    "\\u0589\\U000e006e\\u0a42\\U000104a5",\r
-    "\\u4f66\\ub523\\u003a\\uacae\\U000e0047\\u003a",\r
-    "\\u003a\\u0f21\\u0668\\u0dab\\u003a\\u0655\\u00b7",\r
-    "\\u0027\\u11af\\U000e0057\\u0602",\r
-    "\\U0001d7f2\\U000e007\\u0004\\u0589",\r
-    "\\U000e0022\\u003a\\u10b3\\u003a\\ua21b\\u002e\\U000e0058\\u1732\\U000e002b",\r
-    "\\U0001d7f2\\U000e007d\\u0004\\u0589",\r
-    "\\u82ab\\u17e8\\u0736\\u2019\\U0001d64d",\r
-    "\\u0e01\\ub55c\\u0a68\\U000e0037\\u0cd6\\u002c\\ub959",\r
-    "\\U000e0065\\u302c\\uc986\\u09ee\\U000e0068",\r
-    "\\u0be8\\u002e\\u0c68\\u066e\\u136d\\ufc99\\u59e7",\r
-    "\\u0233\\U000e0020\\u0a69\\u0d6a",\r
-    "\\u206f\\u0741\\ub3ab\\u2019\\ubcac\\u2019",\r
-    "\\u58f4\\U000e0049\\u20e7\\u2027",\r
-    "\\ub315\\U0001d7e5\\U000e0073\\u0c47\\u06f2\\u0c6a\\u0037\\u10fe",\r
-    "\\ua183\\u102d\\u0bec\\u003a",\r
-    "\\u17e8\\u06e7\\u002e\\u096d\\u003b",\r
-    "\\u003a\\u0e57\\u0fad\\u002e",\r
-    "\\u002e\\U000e004c\\U0001d7ea\\u05bb\\ud0fd\\u02de",\r
-    "\\u32e6\\U0001d7f6\\u0fa1\\u206a\\U000e003c\\u0cec\\u003a",\r
-    "\\U000e005d\\u2044\\u0731\\u0650\\u0061",\r
-    "\\u003a\\u0664\\u00b7\\u1fba",\r
-    "\\u003b\\u0027\\u00b7\\u47a3",\r
-    "\\u2027\\U000e0067\\u0a42\\u00b7\\ubddf\\uc26c\\u003a\\u4186\\u041b",\r
-    "\\u0027\\u003a\\U0001d70f\\U0001d7df\\ubf4a\\U0001d7f5\\U0001d177\\u003a\\u0e51\\u1058\\U000e0058\\u00b7\\u0673",\r
-    "\\uc30d\\u002e\\U000e002c\\u0c48\\u003a\\ub5a1\\u0661\\u002c",\r
-    };\r
-    int loop;\r
-    if (U_FAILURE(status)) {\r
-        errln("Creation of break iterator failed %s", u_errorName(status));\r
-        return;\r
-    }\r
-    for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) {\r
-        // printf("looping %d\n", loop);\r
-        u_unescape(strlist[loop], str, 25);\r
-        UnicodeString ustr(str);\r
-        // RBBICharMonkey monkey;\r
-        RBBIWordMonkey monkey;\r
-\r
-        int expected[50];\r
-        int expectedcount = 0;\r
-\r
-        monkey.setText(ustr);\r
-        int i;\r
-        for (i = 0; i != BreakIterator::DONE; i = monkey.next(i)) {\r
-            expected[expectedcount ++] = i;\r
-        }\r
-\r
-        testBreakBoundPreceding(this, ustr, bi, expected, expectedcount);\r
-    }\r
-    delete bi;\r
-#endif\r
-}\r
-\r
-void RBBITest::TestWordBoundary(void)\r
-{\r
-    // <data><>\u1d4a\u206e<?>\u0603\U0001d7ff<>\u2019<></data>\r
-    Locale        locale("en");\r
-    UErrorCode    status = U_ZERO_ERROR;\r
-    // BreakIterator  *bi = BreakIterator::createCharacterInstance(locale, status);\r
-    BreakIterator *bi = BreakIterator::createWordInstance(locale, status);\r
-    UChar         str[50]; \r
-    static const char *strlist[] = \r
-    {\r
-    "\\u200e\\U000e0072\\u0a4b\\U000e003f\\ufd2b\\u2027\\u002e\\u002e",\r
-    "\\U000e0042\\u002e\\u0fb8\\u09ef\\u0ed1\\u2044",\r
-    "\\u003b\\u024a\\u102e\\U000e0071\\u0600",\r
-    "\\u2027\\U000e0067\\u0a47\\u00b7",\r
-    "\\u1fcd\\u002c\\u07aa\\u0027\\u11b0",\r
-    "\\u002c\\U000e003c\\U0001d7f4\\u003a\\u0c6f\\u0027",\r
-    "\\u0589\\U000e006e\\u0a42\\U000104a5",\r
-    "\\u4f66\\ub523\\u003a\\uacae\\U000e0047\\u003a",\r
-    "\\u003a\\u0f21\\u0668\\u0dab\\u003a\\u0655\\u00b7",\r
-    "\\u0027\\u11af\\U000e0057\\u0602",\r
-    "\\U0001d7f2\\U000e007\\u0004\\u0589",\r
-    "\\U000e0022\\u003a\\u10b3\\u003a\\ua21b\\u002e\\U000e0058\\u1732\\U000e002b",\r
-    "\\U0001d7f2\\U000e007d\\u0004\\u0589",\r
-    "\\u82ab\\u17e8\\u0736\\u2019\\U0001d64d",\r
-    "\\u0e01\\ub55c\\u0a68\\U000e0037\\u0cd6\\u002c\\ub959",\r
-    "\\U000e0065\\u302c\\uc986\\u09ee\\U000e0068",\r
-    "\\u0be8\\u002e\\u0c68\\u066e\\u136d\\ufc99\\u59e7",\r
-    "\\u0233\\U000e0020\\u0a69\\u0d6a",\r
-    "\\u206f\\u0741\\ub3ab\\u2019\\ubcac\\u2019",\r
-    "\\u58f4\\U000e0049\\u20e7\\u2027",\r
-    "\\ub315\\U0001d7e5\\U000e0073\\u0c47\\u06f2\\u0c6a\\u0037\\u10fe",\r
-    "\\ua183\\u102d\\u0bec\\u003a",\r
-    "\\u17e8\\u06e7\\u002e\\u096d\\u003b",\r
-    "\\u003a\\u0e57\\u0fad\\u002e",\r
-    "\\u002e\\U000e004c\\U0001d7ea\\u05bb\\ud0fd\\u02de",\r
-    "\\u32e6\\U0001d7f6\\u0fa1\\u206a\\U000e003c\\u0cec\\u003a",\r
-    "\\ua2a5\\u0038\\u2044\\u002e\\u0c67\\U000e003c\\u05f4\\u2027\\u05f4\\u2019",\r
-    "\\u003a\\u0664\\u00b7\\u1fba",\r
-    "\\u003b\\u0027\\u00b7\\u47a3",\r
-    };\r
-    int loop;\r
-    if (U_FAILURE(status)) {\r
-        errln("Creation of break iterator failed %s", u_errorName(status));\r
-        return;\r
-    }\r
-    for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) {\r
-        // printf("looping %d\n", loop);\r
-        u_unescape(strlist[loop], str, 20);\r
-        UnicodeString ustr(str);\r
-        int forward[50];\r
-        int count = 0;\r
-        \r
-        bi->setText(ustr);\r
-        int prev = 0;\r
-        int i;\r
-        for (i = bi->first(); i != BreakIterator::DONE; i = bi->next()) {\r
-            forward[count ++] = i;\r
-            if (i > prev) {\r
-                int j;\r
-                for (j = prev + 1; j < i; j ++) {\r
-                    if (bi->isBoundary(j)) {\r
-                        printStringBreaks(ustr, forward, count);\r
-                        errln("happy boundary test failed: expected %d not a boundary", \r
-                               j);\r
-                        return;\r
-                    }\r
-                }\r
-            }\r
-            if (!bi->isBoundary(i)) {\r
-                printStringBreaks(ustr, forward, count);\r
-                errln("happy boundary test failed: expected %d a boundary", \r
-                       i);\r
-                return;\r
-            }\r
-            prev = i;\r
-        }\r
-    }\r
-    delete bi;\r
-}\r
-\r
-void RBBITest::TestLineBreaks(void)\r
-{\r
-#if !UCONFIG_NO_REGULAR_EXPRESSIONS\r
-    Locale        locale("en");\r
-    UErrorCode    status = U_ZERO_ERROR;\r
-    BreakIterator *bi = BreakIterator::createLineInstance(locale, status);\r
-    UChar         str[50]; \r
-    static const char *strlist[] = \r
-    {\r
-     "\\u0668\\u192b\\u002f\\u2034\\ufe39\\u00b4\\u0cc8\\u2571\\u200b\\u003f",\r
-     "\\ufeff\\ufffc\\u3289\\u0085\\u2772\\u0020\\U000e010a\\u0020\\u2025\\u000a\\U000e0123",\r
-     "\\ufe3c\\u201c\\u000d\\u2025\\u2007\\u201c\\u002d\\u20a0\\u002d\\u30a7\\u17a4",\r
-     "\\u2772\\u0020\\U000e010a\\u0020\\u2025\\u000a\\U000e0123",\r
-     "\\u002d\\uff1b\\u02c8\\u2029\\ufeff\\u0f22\\u2044\\ufe09\\u003a\\u096d\\u2009\\u000a\\u06f7\\u02cc\\u1019\\u2060",\r
-     "\\u1781\\u0b68\\u0f0c\\u3010\\u0085\\U00011f7a\\u0020\\u0dd6\\u200b\\U000e007a\\u000a\\u2060\\u2026\\u002f\\u2026\\u24dc\\u101e\\u2014\\u2007\\u30a5",\r
-     "\\u2770\\u0020\\U000e010f\\u0020\\u2060\\u000a\\u02cc\\u0bcc\\u060d\\u30e7\\u0f3b\\u002f",\r
-     "\\ufeff\\u0028\\u003b\\U00012fec\\u2010\\u0020\\u0004\\u200b\\u0020\\u275c\\u002f\\u17b1",\r
-     "\\u20a9\\u2014\\u00a2\\u31f1\\u002f\\u0020\\u05b8\\u200b\\u0cc2\\u003b\\u060d\\u02c8\\ua4e8\\u002f\\u17d5",\r
-     "\\u002d\\u136f\\uff63\\u0084\\ua933\\u2028\\u002d\\u431b\\u200b\\u20b0",\r
-     "\\uade3\\u11d6\\u000a\\U0001107d\\u203a\\u201d\\ub070\\u000d\\u2024\\ufffc",\r
-     "\\uff5b\\u101c\\u1806\\u002f\\u2213\\uff5f",\r
-     "\\u2014\\u0a83\\ufdfc\\u003f\\u00a0\\u0020\\u000a\\u2991\\U0001d179\\u0020\\u201d\\U000125f6\\u0a67\\u20a7\\ufeff\\u043f",\r
-     "\\u169b\\U000e0130\\u002d\\u1041\\u0f3d\\u0abf\\u00b0\\u31fb\\u00a0\\u002d\\u02c8\\u003b",\r
-     "\\u2762\\u1680\\u002d\\u2028\\u0027\\u01dc\\ufe56\\u003a\\u000a\\uffe6\\u29fd\\u0020\\u30ee\\u007c\\U0001d178\\u0af1\\u0085",\r
-     "\\u3010\\u200b\\u2029\\ufeff\\ufe6a\\u275b\\U000e013b\\ufe37\\u24d4\\u002d\\u1806\\u256a\\u1806\\u247c\\u0085\\u17ac",\r
-     "\\u99ab\\u0027\\u003b\\u2026\\ueaf0\\u0020\\u0020\\u0313\\u0020\\u3099\\uff09\\u208e\\u2011\\u2007\\u2060\\u000a\\u0020\\u0020\\u300b\\u0bf9",\r
-     "\\u1806\\u060d\\u30f5\\u00b4\\u17e9\\u2544\\u2028\\u2024\\u2011\\u20a3\\u002d\\u09cc\\u1782\\u000d\\uff6f\\u0025",\r
-     "\\u002f\\uf22e\\u1944\\ufe3d\\u0020\\u206f\\u31b3\\u2014\\u002d\\u2025\\u0f0c\\u0085\\u2763",\r
-     "\\u002f\\u2563\\u202f\\u0085\\u17d5\\u200b\\u0020\\U000e0043\\u2014\\u058a\\u3d0a\\ufe57\\u2035\\u2028\\u2029",\r
-     "\\u20ae\\U0001d169\\u9293\\uff1f\\uff1f\\u0021\\u2012\\u2039\\u0085\\u02cc\\u00a2\\u0020\\U000e01ab\\u3085\\u0f3a\\u1806\\u0f0c\\u1945\\u000a\\U0001d7e7",\r
-     "\\uffe6\\u00a0\\u200b\\u0085\\u2116\\u255b\\U0001d7f7\\u178c\\ufffc",\r
-     "\\u02cc\\ufe6a\\u00a0\\u0021\\u002d\\u7490\\uec2e\\u200b\\u000a",\r
-     "\\uec2e\\u200b\\u000a\\u0020\\u2028\\u2014\\u8945",\r
-     "\\u7490\\uec2e\\u200b\\u000a\\u0020\\u2028\\u2014",\r
-     "\\u0020\\u2028\\u2014\\u8945\\u002c\\u005b",\r
-     "\\u000a\\ufe3c\\u201c\\u000d\\u2025\\u2007\\u201c\\u002d\\u20a0",\r
-     "\\u2473\\u0e9d\\u0020\\u0085\\u000a\\ufe3c\\u201c\\u000d\\u2025",\r
-     "\\U0001d16e\\ufffc\\u2025\\u0021\\u002d",\r
-     "\\ufffc\\u301b\\u0fa5\\U000e0103\\u2060\\u208e\\u17d5\\u034f\\u1009\\u003a\\u180e\\u2009\\u3111",\r
-     "\\u2014\\u0020\\u000a\\u17c5\\u24fc",\r
-     "\\ufffc\\u0020\\u2116\\uff6c\\u200b\\u0ac3\\U0001028f",\r
-     "\\uaeb0\\u0344\\u0085\\ufffc\\u073b\\u2010",\r
-     "\\ufeff\\u0589\\u0085\\u0eb8\\u30fd\\u002f\\u003a\\u2014\\ufe43",\r
-     "\\u09cc\\u256a\\u276d\\u002d\\u3085\\u000d\\u0e05\\u2028\\u0fbb",\r
-     "\\u2034\\u00bb\\u0ae6\\u300c\\u0020\\u31f8\\ufffc",\r
-     "\\u2116\\u0ed2\\uff64\\u02cd\\u2001\\u2060",\r
-    };\r
-    int loop;\r
-    if (U_FAILURE(status)) {\r
-        errln("Creation of break iterator failed %s", u_errorName(status));\r
-        return;\r
-    }\r
-    for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) {\r
-        // printf("looping %d\n", loop);\r
-        u_unescape(strlist[loop], str, 20);\r
-        UnicodeString ustr(str);\r
-        RBBILineMonkey monkey;\r
-\r
-        int expected[50];\r
-        int expectedcount = 0;\r
-\r
-        monkey.setText(ustr);\r
-        int i;\r
-        for (i = 0; i != BreakIterator::DONE; i = monkey.next(i)) {\r
-            expected[expectedcount ++] = i;\r
-        }\r
-\r
-        testBreakBoundPreceding(this, ustr, bi, expected, expectedcount);\r
-    }\r
-    delete bi;\r
-#endif\r
-}\r
-\r
-void RBBITest::TestSentBreaks(void)\r
-{\r
-    Locale        locale("en");\r
-    UErrorCode    status = U_ZERO_ERROR;\r
-    BreakIterator *bi = BreakIterator::createSentenceInstance(locale, status);\r
-    UChar         str[100]; \r
-    static const char *strlist[] = \r
-    {\r
-     "Now\ris\nthe\r\ntime\n\rfor\r\r",\r
-     "This\n",\r
-     "Hello! how are you? I'am fine. Thankyou. How are you doing? This\n costs $20,00,000.",\r
-     "\"Sentence ending with a quote.\" Bye.",\r
-     "  (This is it).  Testing the sentence iterator. \"This isn't it.\"", \r
-     "Hi! This is a simple sample sentence. (This is it.) This is a simple sample sentence. \"This isn't it.\"",\r
-     "Hi! This is a simple sample sentence. It does not have to make any sense as you can see. ",\r
-     "Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura. ",\r
-     "Che la dritta via aveo smarrita. He said, that I said, that you said!! ",\r
-     "Don't rock the boat.\\u2029Because I am the daddy, that is why. Not on my time (el timo.)!",\r
-    };\r
-    int loop;\r
-    int forward[100];\r
-    if (U_FAILURE(status)) {\r
-        errln("Creation of break iterator failed %s", u_errorName(status));\r
-        return;\r
-    }\r
-    for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) {\r
-        u_unescape(strlist[loop], str, 100);\r
-        UnicodeString ustr(str);\r
-\r
-        int count = 0;\r
-        bi->setText(ustr);\r
-        int i;\r
-        for (i = bi->first(); i != BreakIterator::DONE; i = bi->next()) {\r
-            forward[count ++] = i;\r
-        }\r
-        testBreakBoundPreceding(this, ustr, bi, forward, count);\r
-    }\r
-    delete bi;\r
-}\r
-\r
-void RBBITest::TestMonkey(char *params) {\r
-#if !UCONFIG_NO_REGULAR_EXPRESSIONS\r
-\r
-    UErrorCode     status    = U_ZERO_ERROR;\r
-    int32_t        loopCount = 500;\r
-    int32_t        seed      = 1;\r
-    UnicodeString  breakType = "all";\r
-    Locale         locale("en");\r
-\r
-    if (quick == FALSE) {\r
-        loopCount = 10000;\r
-    }\r
-\r
-    if (params) {\r
-        UnicodeString p(params);\r
-        loopCount = getIntParam("loop", p, loopCount);\r
-        seed      = getIntParam("seed", p, seed);\r
-\r
-        RegexMatcher m(" *type *= *(char|word|line|sent|title) *", p, 0, status);\r
-        if (m.find()) {\r
-            breakType = m.group(1, status);\r
-            m.reset();\r
-            p = m.replaceFirst("", status);\r
-        }\r
-\r
-        m.reset(p);\r
-        if (RegexMatcher("\\S", p, 0, status).find()) {\r
-            // Each option is stripped out of the option string as it is processed.\r
-            // All options have been checked.  The option string should have been completely emptied..\r
-            char buf[100];\r
-            p.extract(buf, sizeof(buf), NULL, status);\r
-            buf[sizeof(buf)-1] = 0;\r
-            errln("Unrecognized or extra parameter:  %s\n", buf);\r
-            return;\r
-        }\r
-\r
-    }\r
-\r
-    if (breakType == "char" || breakType == "all") {\r
-        RBBICharMonkey  m;\r
-        BreakIterator  *bi = BreakIterator::createCharacterInstance(locale, status);\r
-        if (U_SUCCESS(status)) {\r
-            RunMonkey(bi, m, "char", seed, loopCount);\r
-        }\r
-        else {\r
-            errln("Creation of character break iterator failed %s", u_errorName(status));\r
-        }\r
-        delete bi;\r
-    }\r
-\r
-    if (breakType == "word" || breakType == "all") {\r
-        logln("Word Break Monkey Test");\r
-        RBBIWordMonkey  m;\r
-        BreakIterator  *bi = BreakIterator::createWordInstance(locale, status);\r
-        if (U_SUCCESS(status)) {\r
-            RunMonkey(bi, m, "word", seed, loopCount);\r
-        }\r
-        else {\r
-            errln("Creation of word break iterator failed %s", u_errorName(status));\r
-        }\r
-        delete bi;\r
-    }\r
-\r
-    if (breakType == "line" || breakType == "all") {\r
-        logln("Line Break Monkey Test");\r
-        RBBILineMonkey  m;\r
-        BreakIterator  *bi = BreakIterator::createLineInstance(locale, status);\r
-        if (params == NULL) {\r
-            loopCount = 50;\r
-        }\r
-        if (U_SUCCESS(status)) {\r
-            RunMonkey(bi, m, "line", seed, loopCount);\r
-        }\r
-        else {\r
-            errln("Creation of line break iterator failed %s", u_errorName(status));\r
-        }\r
-        delete bi;\r
-    }\r
-\r
-\r
-#endif\r
-}\r
-\r
-//\r
-//  Run a RBBI monkey test.  Common routine, for all break iterator types.\r
-//    Parameters:\r
-//       bi      - the break iterator to use\r
-//       mk      - MonkeyKind, abstraction for obtaining expected results\r
-//       name    - Name of test (char, word, etc.) for use in error messages\r
-//       seed    - Seed for starting random number generator (parameter from user)\r
-//       numIterations\r
-//\r
-void RBBITest::RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, const char *name, uint32_t  seed, int32_t numIterations) {\r
-\r
-#if !UCONFIG_NO_REGULAR_EXPRESSIONS\r
-\r
-    const int32_t    TESTSTRINGLEN = 500;\r
-    UnicodeString    testText;\r
-    int32_t          numCharClasses;\r
-    UVector          *chClasses;\r
-    int              expected[TESTSTRINGLEN*2 + 1];\r
-    int              expectedCount = 0;\r
-    char             expectedBreaks[TESTSTRINGLEN*2 + 1];\r
-    char             forwardBreaks[TESTSTRINGLEN*2 + 1];\r
-    char             reverseBreaks[TESTSTRINGLEN*2+1];\r
-    char             isBoundaryBreaks[TESTSTRINGLEN*2+1];\r
-    char             followingBreaks[TESTSTRINGLEN*2+1];\r
-    char             precedingBreaks[TESTSTRINGLEN*2+1];\r
-    int              i;\r
-    int              loopCount = 0;\r
-\r
-    m_seed = seed;\r
-\r
-    numCharClasses = mk.charClasses()->size();\r
-    chClasses      = mk.charClasses();\r
-\r
-    // Check for errors that occured during the construction of the MonkeyKind object.\r
-    //  Can't report them where they occured because errln() is a method coming from intlTest,\r
-    //  and is not visible outside of RBBITest :-(\r
-    if (U_FAILURE(mk.deferredStatus)) {\r
-        errln("status of \"%s\" in creation of RBBIMonkeyKind.", u_errorName(mk.deferredStatus));\r
-        return;\r
-    }\r
-\r
-    // Verify that the character classes all have at least one member.\r
-    for (i=0; i<numCharClasses; i++) {\r
-        UnicodeSet *s = (UnicodeSet *)chClasses->elementAt(i);\r
-        if (s == NULL || s->size() == 0) {\r
-            errln("Character Class #%d is null or of zero size.", i);\r
-            return;\r
-        }\r
-    }\r
-\r
-    while (loopCount < numIterations || numIterations == -1) {\r
-        if (numIterations == -1 && loopCount % 10 == 0) {\r
-            // If test is running in an infinite loop, display a periodic tic so\r
-            //   we can tell that it is making progress.\r
-            fprintf(stderr, ".");\r
-        }\r
-        // Save current random number seed, so that we can recreate the random numbers\r
-        //   for this loop iteration in event of an error.\r
-        seed = m_seed;\r
-\r
-        // Populate a test string with data.\r
-        testText.truncate(0);\r
-        for (i=0; i<TESTSTRINGLEN; i++) {\r
-            int32_t  aClassNum = m_rand() % numCharClasses;\r
-            UnicodeSet *classSet = (UnicodeSet *)chClasses->elementAt(aClassNum);\r
-            int32_t   charIdx = m_rand() % classSet->size();\r
-            UChar32   c = classSet->charAt(charIdx);\r
-            if (c < 0) {   // TODO:  deal with sets containing strings.\r
-                errln("c < 0");\r
-            }\r
-            testText.append(c);\r
-        }\r
-\r
-        // Calculate the expected results for this test string.\r
-        mk.setText(testText);\r
-        memset(expectedBreaks, 0, sizeof(expectedBreaks));\r
-        expectedBreaks[0] = 1;\r
-        int32_t breakPos = 0;\r
-        expectedCount = 0;\r
-        for (;;) {\r
-            breakPos = mk.next(breakPos);\r
-            if (breakPos == -1) {\r
-                break;\r
-            }\r
-            if (breakPos > testText.length()) {\r
-                errln("breakPos > testText.length()");\r
-            }\r
-            expectedBreaks[breakPos] = 1;\r
-            expected[expectedCount ++] = breakPos;\r
-        }\r
-\r
-        // Find the break positions using forward iteration\r
-        memset(forwardBreaks, 0, sizeof(forwardBreaks));\r
-        bi->setText(testText);\r
-        for (i=bi->first(); i != BreakIterator::DONE; i=bi->next()) {\r
-            if (i < 0 || i > testText.length()) {\r
-                errln("%s break monkey test: Out of range value returned by breakIterator::next()", name);\r
-                break;\r
-            }\r
-            forwardBreaks[i] = 1;\r
-        }\r
-\r
-        // Find the break positions using reverse iteration\r
-        memset(reverseBreaks, 0, sizeof(reverseBreaks));\r
-        for (i=bi->last(); i != BreakIterator::DONE; i=bi->previous()) {\r
-            if (i < 0 || i > testText.length()) {\r
-                errln("%s break monkey test: Out of range value returned by breakIterator::next()", name);\r
-                break;\r
-            }\r
-            reverseBreaks[i] = 1;\r
-        }\r
-\r
-        // Find the break positions using isBoundary() tests.\r
-        memset(isBoundaryBreaks, 0, sizeof(isBoundaryBreaks));\r
-        U_ASSERT(sizeof(isBoundaryBreaks) > testText.length());\r
-        for (i=0; i<=testText.length(); i++) {\r
-            isBoundaryBreaks[i] = bi->isBoundary(i);\r
-        }\r
-\r
-\r
-        // Find the break positions using the following() function.\r
-        // printf(".");\r
-        memset(followingBreaks, 0, sizeof(followingBreaks));\r
-        int32_t   lastBreakPos = 0;\r
-        followingBreaks[0] = 1;\r
-        for (i=0; i<testText.length(); i++) {\r
-            breakPos = bi->following(i);\r
-            if (breakPos <= i ||\r
-                breakPos < lastBreakPos ||\r
-                breakPos > testText.length() ||\r
-                breakPos > lastBreakPos && lastBreakPos > i ) {\r
-                errln("%s break monkey test: "\r
-                    "Out of range value returned by BreakIterator::following().\n"\r
-                    "Random seed=%d",  name, seed);\r
-                break;\r
-            }\r
-            followingBreaks[breakPos] = 1;\r
-            lastBreakPos = breakPos;\r
-        }\r
-\r
-        // Find the break positions using the preceding() function.\r
-        memset(precedingBreaks, 0, sizeof(followingBreaks));\r
-        lastBreakPos = testText.length();\r
-        precedingBreaks[testText.length()] = 1;\r
-        for (i=testText.length(); i>0; i--) {\r
-            breakPos = bi->preceding(i);\r
-            if (breakPos >= i ||\r
-                breakPos > lastBreakPos ||\r
-                breakPos < 0 ||\r
-                breakPos < lastBreakPos && lastBreakPos < i ) {\r
-                errln("%s break monkey test: "\r
-                    "Out of range value returned by BreakIterator::preceding().\n"\r
-                    "index=%d;  prev returned %d; lastBreak=%d" ,\r
-                    name,  i, breakPos, lastBreakPos);\r
-                precedingBreaks[i] = 2;   // Forces an error.\r
-            } else {\r
-                precedingBreaks[breakPos] = 1;\r
-                lastBreakPos = breakPos;\r
-            }\r
-        }\r
-\r
-        // Compare the expected and actual results.\r
-        for (i=0; i<=testText.length(); i++) {\r
-            const char *errorType = NULL;\r
-            if  (forwardBreaks[i] != expectedBreaks[i]) {\r
-                errorType = "next()";\r
-            } else if (reverseBreaks[i] != forwardBreaks[i]) {\r
-                errorType = "previous()";\r
-            } else if (isBoundaryBreaks[i] != expectedBreaks[i]) {\r
-                errorType = "isBoundary()";\r
-            } else if (followingBreaks[i] != expectedBreaks[i]) {\r
-                errorType = "following()";\r
-            } else if (precedingBreaks[i] != expectedBreaks[i]) {\r
-                errorType = "preceding()";\r
-            }\r
-\r
-\r
-            if (errorType != NULL) {\r
-                // Format a range of the test text that includes the failure as\r
-                //  a data item that can be included in the rbbi test data file.\r
-\r
-                // Start of the range is the last point where expected and actual results\r
-                //   both agreed that there was a break position.\r
-                int startContext = i;\r
-                int32_t count = 0;\r
-                for (;;) {\r
-                    if (startContext==0) { break; }\r
-                    startContext --;\r
-                    if (expectedBreaks[startContext] != 0) {\r
-                        if (count == 2) break;\r
-                        count ++;\r
-                    }\r
-                }\r
-\r
-                // End of range is two expected breaks past the start position.\r
-                int endContext = i + 1;\r
-                int ci;\r
-                for (ci=0; ci<2; ci++) {  // Number of items to include in error text.\r
-                    for (;;) {\r
-                        if (endContext >= testText.length()) {break;}\r
-                        if (expectedBreaks[endContext-1] != 0) { \r
-                            if (count == 0) break;\r
-                            count --;\r
-                        }\r
-                        endContext ++;\r
-                    }\r
-                }\r
-\r
-                // Format looks like   "<data><>\uabcd\uabcd<>\U0001abcd...</data>"\r
-                UnicodeString errorText = "<data>";\r
-                /***if (strcmp(errorType, "next()") == 0) {\r
-                    startContext = 0;\r
-                    endContext = testText.length();\r
-                   \r
-                    printStringBreaks(testText, expected, expectedCount);\r
-                }***/\r
-\r
-                for (ci=startContext; ci<endContext;) {\r
-                    UnicodeString hexChars("0123456789abcdef");\r
-                    UChar32  c;\r
-                    int      bn;\r
-                    c = testText.char32At(ci);\r
-                    if (ci == i) {\r
-                        // This is the location of the error.\r
-                        errorText.append("<?>");\r
-                    } else if (expectedBreaks[ci] != 0) {\r
-                        // This a non-error expected break position.\r
-                        errorText.append("<>");\r
-                    }\r
-                    if (c < 0x10000) {\r
-                        errorText.append("\\u");\r
-                        for (bn=12; bn>=0; bn-=4) {\r
-                            errorText.append(hexChars.charAt((c>>bn)&0xf));\r
-                        }\r
-                    } else {\r
-                        errorText.append("\\U");\r
-                        for (bn=28; bn>=0; bn-=4) {\r
-                            errorText.append(hexChars.charAt((c>>bn)&0xf));\r
-                        }\r
-                    }\r
-                    ci = testText.moveIndex32(ci, 1);\r
-                }\r
-                errorText.append("<>");\r
-                errorText.append("</data>\n");\r
-\r
-                // Output the error\r
-                char  charErrorTxt[500];\r
-                UErrorCode status = U_ZERO_ERROR;\r
-                errorText.extract(charErrorTxt, sizeof(charErrorTxt), NULL, status);\r
-                charErrorTxt[sizeof(charErrorTxt)-1] = 0;\r
-                errln("%s break monkey test error.  %s. Operation = %s; Random seed = %d;  buf Idx = %d\n%s",\r
-                    name, (expectedBreaks[i]? "break expected but not found" : "break found but not expected"),\r
-                    errorType, seed, i, charErrorTxt);\r
-                break;\r
-            }\r
-        }\r
-\r
-        loopCount++;\r
-    }\r
-#endif\r
-}\r
-\r
-\r
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */\r
+/********************************************************************
+ * COPYRIGHT:
+ * Copyright (c) 1999-2013, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ********************************************************************/
+/************************************************************************
+*   Date        Name        Description
+*   12/15/99    Madhu        Creation.
+*   01/12/2000  Madhu        Updated for changed API and added new tests
+************************************************************************/
+
+#include "utypeinfo.h"  // for 'typeid' to work
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/utypes.h"
+#include "unicode/brkiter.h"
+#include "unicode/rbbi.h"
+#include "unicode/uchar.h"
+#include "unicode/utf16.h"
+#include "unicode/ucnv.h"
+#include "unicode/schriter.h"
+#include "unicode/uniset.h"
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+#include "unicode/regex.h"
+#endif
+#include "unicode/ustring.h"
+#include "unicode/utext.h"
+#include "intltest.h"
+#include "rbbitst.h"
+#include <string.h>
+#include "uvector.h"
+#include "uvectr32.h"
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "unicode/numfmt.h"
+#include "unicode/uscript.h"
+
+#define TEST_ASSERT(x) {if (!(x)) { \
+    errln("Failure in file %s, line %d", __FILE__, __LINE__);}}
+
+#define TEST_ASSERT_SUCCESS(errcode) { if (U_FAILURE(errcode)) { \
+    errcheckln(errcode, "Failure in file %s, line %d, status = \"%s\"", __FILE__, __LINE__, u_errorName(errcode));}}
+
+
+//---------------------------------------------
+// runIndexedTest
+//---------------------------------------------
+
+
+//  Note:  Before adding new tests to this file, check whether the desired test data can 
+//         simply be added to the file testdata/rbbitest.txt.  In most cases it can,
+//         it's much less work than writing a new test, diagnostic output in the event of failures
+//         is good, and the test data file will is shared with ICU4J, so eventually the test
+//         will run there as well, without additional effort.
+
+void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* params )
+{
+    if (exec) logln("TestSuite RuleBasedBreakIterator: ");
+
+    switch (index) {
+#if !UCONFIG_NO_FILE_IO
+        case 0: name = "TestBug4153072";
+            if(exec) TestBug4153072();                         break;
+#else
+        case 0: name = "skip";
+            break;
+#endif
+
+        case 1: name = "skip";
+            break;
+        case 2: name = "TestStatusReturn";
+            if(exec) TestStatusReturn();                       break;
+ 
+#if !UCONFIG_NO_FILE_IO
+        case 3: name = "TestUnicodeFiles";
+            if(exec) TestUnicodeFiles();                       break;
+        case 4: name = "TestEmptyString";
+            if(exec) TestEmptyString();                        break;
+#else
+        case 3: case 4: name = "skip";
+            break;
+#endif
+
+        case 5: name = "TestGetAvailableLocales";
+            if(exec) TestGetAvailableLocales();                break;
+
+        case 6: name = "TestGetDisplayName";
+            if(exec) TestGetDisplayName();                     break;
+
+#if !UCONFIG_NO_FILE_IO
+        case 7: name = "TestEndBehaviour";
+            if(exec) TestEndBehaviour();                       break;
+        case 8: case 9: case 10: name = "skip";
+             break;
+        case 11: name = "TestWordBreaks";
+             if(exec) TestWordBreaks();                        break;
+        case 12: name = "TestWordBoundary";
+             if(exec) TestWordBoundary();                      break;
+        case 13: name = "TestLineBreaks";
+             if(exec) TestLineBreaks();                        break;
+        case 14: name = "TestSentBreaks";
+             if(exec) TestSentBreaks();                        break;
+        case 15: name = "TestExtended";
+             if(exec) TestExtended();                          break;
+#else
+        case 7: case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15: name = "skip";
+             break;
+#endif
+
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FILE_IO
+        case 16:  
+            name = "TestMonkey"; if(exec)  TestMonkey(params); break;
+#else
+        case 16:
+             name = "skip";                                    break;
+#endif
+
+#if !UCONFIG_NO_FILE_IO
+        case 17: name = "TestBug3818";
+            if(exec) TestBug3818();                            break;
+#else
+        case 17: name = "skip";
+            break;
+#endif
+
+        case 18: name = "skip";
+            break;
+        case 19: name = "TestDebug";
+            if(exec) TestDebug();                              break;
+        case 20: name = "skip";
+            break;
+
+#if !UCONFIG_NO_FILE_IO
+        case 21: name = "TestBug5775";
+            if (exec) TestBug5775();                           break;
+#else
+        case 21: name = "skip";
+            break;
+#endif
+
+        case 22: name = "TestBug9983";
+            if (exec) TestBug9983();                           break;
+        case 23: name = "TestDictRules";
+            if (exec) TestDictRules();                         break;
+        case 24: name = "TestBug5532";
+            if (exec) TestBug5532();                           break;
+        default: name = ""; break; //needed to end loop
+    }
+}
+
+
+//---------------------------------------------------------------------------
+//
+//   class BITestData   Holds a set of Break iterator test data and results
+//                      Includes
+//                         - the string data to be broken
+//                         - a vector of the expected break positions.
+//                         - a vector of source line numbers for the data,
+//                               (to help see where errors occured.)
+//                         - The expected break tag values.
+//                         - Vectors of actual break positions and tag values.
+//                         - Functions for comparing actual with expected and
+//                            reporting errors.
+//
+//----------------------------------------------------------------------------
+class BITestData {
+public:
+    UnicodeString    fDataToBreak;
+    UVector          fExpectedBreakPositions;
+    UVector          fExpectedTags;
+    UVector          fLineNum;
+    UVector          fActualBreakPositions;   // Test Results.
+    UVector          fActualTags;
+
+    BITestData(UErrorCode &status);
+    void             addDataChunk(const char *data, int32_t tag, int32_t lineNum, UErrorCode status);
+    void             checkResults(const char *heading, RBBITest *test);
+    void             err(const char *heading, RBBITest *test, int32_t expectedIdx, int32_t actualIdx);
+    void             clearResults();
+};
+
+//
+// Constructor.
+//
+BITestData::BITestData(UErrorCode &status)
+: fExpectedBreakPositions(status), fExpectedTags(status),  fLineNum(status), fActualBreakPositions(status),
+  fActualTags(status)
+{
+}
+
+//
+// addDataChunk.   Add a section (non-breaking) piece if data to the test data.
+//                 The macro form collects the line number, which is helpful
+//                 when tracking down failures.
+//
+//                 A null data item is inserted at the start of each test's data
+//                  to put the starting zero into the data list.  The position saved for
+//                  each non-null item is its ending position.
+//
+#define ADD_DATACHUNK(td, data, tag, status)   td.addDataChunk(data, tag, __LINE__, status);
+void BITestData::addDataChunk(const char *data, int32_t tag, int32_t lineNum, UErrorCode status) {
+    if (U_FAILURE(status)) {return;}
+    if (data != NULL) {
+        fDataToBreak.append(CharsToUnicodeString(data));
+    }
+    fExpectedBreakPositions.addElement(fDataToBreak.length(), status);
+    fExpectedTags.addElement(tag, status);
+    fLineNum.addElement(lineNum, status);
+}
+
+
+//
+//  checkResults.   Compare the actual and expected break positions, report any differences.
+//
+void BITestData::checkResults(const char *heading, RBBITest *test) {
+    int32_t   expectedIndex = 0;
+    int32_t   actualIndex = 0;
+
+    for (;;) {
+        // If we've run through both the expected and actual results vectors, we're done.
+        //   break out of the loop.
+        if (expectedIndex >= fExpectedBreakPositions.size() &&
+            actualIndex   >= fActualBreakPositions.size()) {
+            break;
+        }
+
+
+        if (expectedIndex >= fExpectedBreakPositions.size()) {
+            err(heading, test, expectedIndex-1, actualIndex);
+            actualIndex++;
+            continue;
+        }
+
+        if (actualIndex >= fActualBreakPositions.size()) {
+            err(heading, test, expectedIndex, actualIndex-1);
+            expectedIndex++;
+            continue;
+        }
+
+        if (fActualBreakPositions.elementAti(actualIndex) != fExpectedBreakPositions.elementAti(expectedIndex)) {
+            err(heading, test, expectedIndex, actualIndex);
+            // Try to resync the positions of the indices, to avoid a rash of spurious erros.
+            if (fActualBreakPositions.elementAti(actualIndex) < fExpectedBreakPositions.elementAti(expectedIndex)) {
+                actualIndex++;
+            } else {
+                expectedIndex++;
+            }
+            continue;
+        }
+
+        if (fActualTags.elementAti(actualIndex) != fExpectedTags.elementAti(expectedIndex)) {
+            test->errln("%s, tag mismatch.  Test Line = %d, expected tag=%d, got %d",
+                heading, fLineNum.elementAt(expectedIndex),
+                fExpectedTags.elementAti(expectedIndex), fActualTags.elementAti(actualIndex));
+        }
+
+        actualIndex++;
+        expectedIndex++;
+    }
+}
+
+//
+//  err   -  An error was found.  Report it, along with information about where the
+//                                incorrectly broken test data appeared in the source file.
+//
+void    BITestData::err(const char *heading, RBBITest *test, int32_t expectedIdx, int32_t actualIdx)
+{
+    int32_t   expected = fExpectedBreakPositions.elementAti(expectedIdx);
+    int32_t   actual   = fActualBreakPositions.elementAti(actualIdx);
+    int32_t   o        = 0;
+    int32_t   line     = fLineNum.elementAti(expectedIdx);
+    if (expectedIdx > 0) {
+        // The line numbers are off by one because a premature break occurs somewhere
+        //    within the previous item, rather than at the start of the current (expected) item.
+        //    We want to report the offset of the unexpected break from the start of
+        //      this previous item.
+        o    = actual - fExpectedBreakPositions.elementAti(expectedIdx-1);
+    }
+    if (actual < expected) {
+        test->errln("%s unexpected break at offset %d in test item from line %d. actual break: %d  expected break: %d", heading, o, line, actual, expected);
+    } else {
+        test->errln("%s Failed to find break at end of item from line %d. actual break: %d  expected break: %d", heading, line, actual, expected);
+    }
+}
+
+
+void BITestData::clearResults() {
+    fActualBreakPositions.removeAllElements();
+    fActualTags.removeAllElements();
+}
+
+
+//--------------------------------------------------------------------------------------
+//
+//    RBBITest    constructor and destructor
+//
+//--------------------------------------------------------------------------------------
+
+RBBITest::RBBITest() {
+}
+
+
+RBBITest::~RBBITest() {
+}
+
+//-----------------------------------------------------------------------------------
+//
+//   Test for status {tag} return value from break rules.
+//        TODO:  a more thorough test.
+//
+//-----------------------------------------------------------------------------------
+void RBBITest::TestStatusReturn() {
+     UnicodeString rulesString1("$Letters = [:L:];\n"
+                                  "$Numbers = [:N:];\n"
+                                  "$Letters+{1};\n"
+                                  "$Numbers+{2};\n"
+                                  "Help\\ {4}/me\\!;\n"
+                                  "[^$Letters $Numbers];\n"
+                                  "!.*;\n", -1, US_INV);
+     UnicodeString testString1  = "abc123..abc Help me Help me!";
+                                // 01234567890123456789012345678
+     int32_t bounds1[]   = {0, 3, 6, 7, 8, 11, 12, 16, 17, 19, 20, 25, 27, 28, -1};
+     int32_t brkStatus[] = {0, 1, 2, 0, 0,  1,  0,  1,  0,  1,  0,  4,  1,  0, -1};
+
+     UErrorCode status=U_ZERO_ERROR;
+     UParseError    parseError;
+
+     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
+     if(U_FAILURE(status)) {
+         dataerrln("FAIL : in construction - %s", u_errorName(status));
+     } else {
+         int32_t  pos;
+         int32_t  i = 0;
+         bi->setText(testString1);
+         for (pos=bi->first(); pos!= BreakIterator::DONE; pos=bi->next()) {
+             if (pos != bounds1[i]) {
+                 errln("FAIL:  expected break at %d, got %d\n", bounds1[i], pos);
+                 break;
+             }
+
+             int tag = bi->getRuleStatus();
+             if (tag != brkStatus[i]) {
+                 errln("FAIL:  break at %d, expected tag %d, got tag %d\n", pos, brkStatus[i], tag);
+                 break;
+             }
+             i++;
+         }
+     }
+     delete bi;
+}
+
+
+static void printStringBreaks(UnicodeString ustr, int expected[],
+                              int expectedcount)
+{
+    UErrorCode status = U_ZERO_ERROR;
+    char name[100];
+    printf("code    alpha extend alphanum type word sent line name\n");
+    int j;
+    for (j = 0; j < ustr.length(); j ++) {
+        if (expectedcount > 0) {
+            int k;
+            for (k = 0; k < expectedcount; k ++) {
+                if (j == expected[k]) {
+                    printf("------------------------------------------------ %d\n",
+                           j);
+                }
+            }
+        }
+        UChar32 c = ustr.char32At(j);
+        if (c > 0xffff) {
+            j ++;
+        }
+        u_charName(c, U_UNICODE_CHAR_NAME, name, 100, &status);
+        printf("%7x %5d %6d %8d %4s %4s %4s %4s %s\n", (int)c,
+                           u_isUAlphabetic(c),
+                           u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND),
+                           u_isalnum(c),
+                           u_getPropertyValueName(UCHAR_GENERAL_CATEGORY,
+                                                  u_charType(c),
+                                                  U_SHORT_PROPERTY_NAME),
+                           u_getPropertyValueName(UCHAR_WORD_BREAK,
+                                                  u_getIntPropertyValue(c,
+                                                          UCHAR_WORD_BREAK),
+                                                  U_SHORT_PROPERTY_NAME),
+                           u_getPropertyValueName(UCHAR_SENTENCE_BREAK,
+                                   u_getIntPropertyValue(c,
+                                           UCHAR_SENTENCE_BREAK),
+                                   U_SHORT_PROPERTY_NAME),
+                           u_getPropertyValueName(UCHAR_LINE_BREAK,
+                                   u_getIntPropertyValue(c,
+                                           UCHAR_LINE_BREAK),
+                                   U_SHORT_PROPERTY_NAME),
+                           name);
+    }
+}
+
+
+void RBBITest::TestBug3818() {
+    UErrorCode  status = U_ZERO_ERROR;
+
+    // Four Thai words...
+    static const UChar thaiWordData[] = {  0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0x0E2B,0x0E0D,0x0E48,
+                                           0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0x0E2B,0x0E0D,0x0E48, 0 };
+    UnicodeString  thaiStr(thaiWordData);
+
+    RuleBasedBreakIterator* bi =
+        (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale("th"), status);
+    if (U_FAILURE(status) || bi == NULL) {
+        errcheckln(status, "Fail at file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
+        return;
+    }
+    bi->setText(thaiStr);
+
+    int32_t  startOfSecondWord = bi->following(1);
+    if (startOfSecondWord != 4) {
+        errln("Fail at file %s, line %d expected start of word at 4, got %d",
+            __FILE__, __LINE__, startOfSecondWord);
+    }
+    startOfSecondWord = bi->following(0);
+    if (startOfSecondWord != 4) {
+        errln("Fail at file %s, line %d expected start of word at 4, got %d",
+            __FILE__, __LINE__, startOfSecondWord);
+    }
+    delete bi;
+}
+
+//----------------------------------------------------------------------------
+//
+// generalIteratorTest      Given a break iterator and a set of test data,
+//                          Run the tests and report the results.
+//
+//----------------------------------------------------------------------------
+void RBBITest::generalIteratorTest(RuleBasedBreakIterator& bi, BITestData &td)
+{
+
+    bi.setText(td.fDataToBreak);
+
+    testFirstAndNext(bi, td);
+
+    testLastAndPrevious(bi, td);
+
+    testFollowing(bi, td);
+    testPreceding(bi, td);
+    testIsBoundary(bi, td);
+    doMultipleSelectionTest(bi, td);
+}
+
+
+//
+//   testFirstAndNext.   Run the iterator forwards in the obvious first(), next()
+//                       kind of loop.
+//
+void RBBITest::testFirstAndNext(RuleBasedBreakIterator& bi, BITestData &td)
+{
+    UErrorCode  status = U_ZERO_ERROR;
+    int32_t     p;
+    int32_t     lastP = -1;
+    int32_t     tag;
+
+    logln("Test first and next");
+    bi.setText(td.fDataToBreak);
+    td.clearResults();
+
+    for (p=bi.first(); p!=RuleBasedBreakIterator::DONE; p=bi.next()) {
+        td.fActualBreakPositions.addElement(p, status);  // Save result.
+        tag = bi.getRuleStatus();
+        td.fActualTags.addElement(tag, status);
+        if (p <= lastP) {
+            // If the iterator is not making forward progress, stop.
+            //  No need to raise an error here, it'll be detected in the normal check of results.
+            break;
+        }
+        lastP = p;
+    }
+    td.checkResults("testFirstAndNext", this);
+}
+
+
+//
+//  TestLastAndPrevious.   Run the iterator backwards, starting with last().
+//
+void  RBBITest::testLastAndPrevious(RuleBasedBreakIterator& bi,  BITestData &td)
+{
+    UErrorCode  status = U_ZERO_ERROR;
+    int32_t     p;
+    int32_t     lastP  = 0x7ffffffe;
+    int32_t     tag;
+
+    logln("Test last and previous");
+    bi.setText(td.fDataToBreak);
+    td.clearResults();
+
+    for (p=bi.last(); p!=RuleBasedBreakIterator::DONE; p=bi.previous()) {
+        // Save break position.  Insert it at start of vector of results, shoving
+        //    already-saved results further towards the end.
+        td.fActualBreakPositions.insertElementAt(p, 0, status);
+        // bi.previous();   // TODO:  Why does this fix things up????
+        // bi.next();
+        tag = bi.getRuleStatus();
+        td.fActualTags.insertElementAt(tag, 0, status);
+        if (p >= lastP) {
+            // If the iterator is not making progress, stop.
+            //  No need to raise an error here, it'll be detected in the normal check of results.
+            break;
+        }
+        lastP = p;
+    }
+    td.checkResults("testLastAndPrevious", this);
+}
+
+
+void RBBITest::testFollowing(RuleBasedBreakIterator& bi, BITestData &td)
+{
+    UErrorCode  status = U_ZERO_ERROR;
+    int32_t     p;
+    int32_t     tag;
+    int32_t     lastP  = -2;     // A value that will never be returned as a break position.
+                                 //   cannot be -1; that is returned for DONE.
+    int         i;
+
+    logln("testFollowing():");
+    bi.setText(td.fDataToBreak);
+    td.clearResults();
+
+    // Save the starting point, since we won't get that out of following.
+    p = bi.first();
+    td.fActualBreakPositions.addElement(p, status);  // Save result.
+    tag = bi.getRuleStatus();
+    td.fActualTags.addElement(tag, status);
+
+    for (i = 0; i <= td.fDataToBreak.length()+1; i++) {
+        p = bi.following(i);
+        if (p != lastP) {
+            if (p == RuleBasedBreakIterator::DONE) {
+                break;
+            }
+            // We've reached a new break position.  Save it.
+            td.fActualBreakPositions.addElement(p, status);  // Save result.
+            tag = bi.getRuleStatus();
+            td.fActualTags.addElement(tag, status);
+            lastP = p;
+        }
+    }
+    // The loop normally exits by means of the break in the middle.
+    // Make sure that the index was at the correct position for the break iterator to have
+    //   returned DONE.
+    if (i != td.fDataToBreak.length()) {
+        errln("testFollowing():  iterator returned DONE prematurely.");
+    }
+
+    // Full check of all results.
+    td.checkResults("testFollowing", this);
+}
+
+
+
+void RBBITest::testPreceding(RuleBasedBreakIterator& bi,  BITestData &td) {
+    UErrorCode  status = U_ZERO_ERROR;
+    int32_t     p;
+    int32_t     tag;
+    int32_t     lastP  = 0x7ffffffe;
+    int         i;
+
+    logln("testPreceding():");
+    bi.setText(td.fDataToBreak);
+    td.clearResults();
+
+    p = bi.last();
+    td.fActualBreakPositions.addElement(p, status);
+    tag = bi.getRuleStatus();
+    td.fActualTags.addElement(tag, status);
+
+    for (i = td.fDataToBreak.length(); i>=-1; i--) {
+        p = bi.preceding(i);
+        if (p != lastP) {
+            if (p == RuleBasedBreakIterator::DONE) {
+                break;
+            }
+            // We've reached a new break position.  Save it.
+            td.fActualBreakPositions.insertElementAt(p, 0, status);
+            lastP = p;
+            tag = bi.getRuleStatus();
+            td.fActualTags.insertElementAt(tag, 0, status);
+        }
+    }
+    // The loop normally exits by means of the break in the middle.
+    // Make sure that the index was at the correct position for the break iterator to have
+    //   returned DONE.
+    if (i != 0) {
+        errln("testPreceding():  iterator returned DONE prematurely.");
+    }
+
+    // Full check of all results.
+    td.checkResults("testPreceding", this);
+}
+
+
+
+void RBBITest::testIsBoundary(RuleBasedBreakIterator& bi,  BITestData &td) {
+    UErrorCode  status = U_ZERO_ERROR;
+    int         i;
+    int32_t     tag;
+
+    logln("testIsBoundary():");
+    bi.setText(td.fDataToBreak);
+    td.clearResults();
+
+    for (i = 0; i <= td.fDataToBreak.length(); i++) {
+        if (bi.isBoundary(i)) {
+            td.fActualBreakPositions.addElement(i, status);  // Save result.
+            tag = bi.getRuleStatus();
+            td.fActualTags.addElement(tag, status);
+        }
+    }
+    td.checkResults("testIsBoundary: ", this);
+}
+
+
+
+void RBBITest::doMultipleSelectionTest(RuleBasedBreakIterator& iterator, BITestData &td)
+{
+    iterator.setText(td.fDataToBreak);
+
+    RuleBasedBreakIterator* testIterator =(RuleBasedBreakIterator*)iterator.clone();
+    int32_t offset = iterator.first();
+    int32_t testOffset;
+    int32_t count = 0;
+
+    logln("doMultipleSelectionTest text of length: %d", td.fDataToBreak.length());
+
+    if (*testIterator != iterator)
+        errln("clone() or operator!= failed: two clones compared unequal");
+
+    do {
+        testOffset = testIterator->first();
+        testOffset = testIterator->next(count);
+        if (offset != testOffset)
+            errln(UnicodeString("next(n) and next() not returning consistent results: for step ") + count + ", next(n) returned " + testOffset + " and next() had " + offset);
+
+        if (offset != RuleBasedBreakIterator::DONE) {
+            count++;
+            offset = iterator.next();
+
+            if (offset != RuleBasedBreakIterator::DONE && *testIterator == iterator) {
+                errln("operator== failed: Two unequal iterators compared equal. count=%d offset=%d", count, offset);
+                if (count > 10000 || offset == -1) {
+                    errln("operator== failed too many times. Stopping test.");
+                    if (offset == -1) {
+                        errln("Does (RuleBasedBreakIterator::DONE == -1)?");
+                    }
+                    return;
+                }
+            }
+        }
+    } while (offset != RuleBasedBreakIterator::DONE);
+
+    // now do it backwards...
+    offset = iterator.last();
+    count = 0;
+
+    do {
+        testOffset = testIterator->last();
+        testOffset = testIterator->next(count);   // next() with a negative arg is same as previous
+        if (offset != testOffset)
+            errln(UnicodeString("next(n) and next() not returning consistent results: for step ") + count + ", next(n) returned " + testOffset + " and next() had " + offset);
+
+        if (offset != RuleBasedBreakIterator::DONE) {
+            count--;
+            offset = iterator.previous();
+        }
+    } while (offset != RuleBasedBreakIterator::DONE);
+
+    delete testIterator;
+}
+
+
+//---------------------------------------------
+//
+//     other tests
+//
+//---------------------------------------------
+void RBBITest::TestEmptyString()
+{
+    UnicodeString text = "";
+    UErrorCode status = U_ZERO_ERROR;
+
+    BITestData x(status);
+    ADD_DATACHUNK(x, "", 0, status);           // Break at start of data
+    RuleBasedBreakIterator* bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
+    if (U_FAILURE(status))
+    {
+        errcheckln(status, "Failed to create the BreakIterator for default locale in TestEmptyString. - %s", u_errorName(status));
+        return;
+    }
+    generalIteratorTest(*bi, x);
+    delete bi;
+}
+
+void RBBITest::TestGetAvailableLocales()
+{
+    int32_t locCount = 0;
+    const Locale* locList = BreakIterator::getAvailableLocales(locCount);
+
+    if (locCount == 0)
+        dataerrln("getAvailableLocales() returned an empty list!");
+    // Just make sure that it's returning good memory.
+    int32_t i;
+    for (i = 0; i < locCount; ++i) {
+        logln(locList[i].getName());
+    }
+}
+
+//Testing the BreakIterator::getDisplayName() function
+void RBBITest::TestGetDisplayName()
+{
+    UnicodeString   result;
+
+    BreakIterator::getDisplayName(Locale::getUS(), result);
+    if (Locale::getDefault() == Locale::getUS() && result != "English (United States)")
+        dataerrln("BreakIterator::getDisplayName() failed: expected \"English (United States)\", got \""
+                + result);
+
+    BreakIterator::getDisplayName(Locale::getFrance(), Locale::getUS(), result);
+    if (result != "French (France)")
+        dataerrln("BreakIterator::getDisplayName() failed: expected \"French (France)\", got \""
+                + result);
+}
+/**
+ * Test End Behaviour
+ * @bug 4068137
+ */
+void RBBITest::TestEndBehaviour()
+{
+    UErrorCode status = U_ZERO_ERROR;
+    UnicodeString testString("boo.");
+    BreakIterator *wb = BreakIterator::createWordInstance(Locale::getDefault(), status);
+    if (U_FAILURE(status))
+    {
+        errcheckln(status, "Failed to create the BreakIterator for default locale in TestEndBehaviour. - %s", u_errorName(status));
+        return;
+    }
+    wb->setText(testString);
+
+    if (wb->first() != 0)
+        errln("Didn't get break at beginning of string.");
+    if (wb->next() != 3)
+        errln("Didn't get break before period in \"boo.\"");
+    if (wb->current() != 4 && wb->next() != 4)
+        errln("Didn't get break at end of string.");
+    delete wb;
+}
+/*
+ * @bug 4153072
+ */
+void RBBITest::TestBug4153072() {
+    UErrorCode status = U_ZERO_ERROR;
+    BreakIterator *iter = BreakIterator::createWordInstance(Locale::getDefault(), status);
+    if (U_FAILURE(status))
+    {
+        errcheckln(status, "Failed to create the BreakIterator for default locale in TestBug4153072 - %s", u_errorName(status));
+        return;
+    }
+    UnicodeString str("...Hello, World!...");
+    int32_t begin = 3;
+    int32_t end = str.length() - 3;
+    UBool onBoundary;
+
+    StringCharacterIterator* textIterator = new StringCharacterIterator(str, begin, end, begin);
+    iter->adoptText(textIterator);
+    int index;
+    // Note: with the switch to UText, there is no way to restrict the
+    //       iteration range to begin at an index other than zero.
+    //       String character iterators created with a non-zero bound are
+    //         treated by RBBI as being empty.
+    for (index = -1; index < begin + 1; ++index) {
+        onBoundary = iter->isBoundary(index);
+        if (index == 0?  !onBoundary : onBoundary) {
+            errln((UnicodeString)"Didn't handle isBoundary correctly with offset = " + index +
+                            " and begin index = " + begin);
+        }
+    }
+    delete iter;
+}
+
+
+//
+// Test for problem reported by Ashok Matoria on 9 July 2007
+//    One.<kSoftHyphen><kSpace>Two.
+//
+//    Sentence break at start (0) and then on calling next() it breaks at
+//   'T' of "Two". Now, at this point if I do next() and
+//    then previous(), it breaks at <kSOftHyphen> instead of 'T' of "Two".
+//
+void RBBITest::TestBug5775() {
+    UErrorCode status = U_ZERO_ERROR;
+    BreakIterator *bi = BreakIterator::createSentenceInstance(Locale::getEnglish(), status);
+    TEST_ASSERT_SUCCESS(status);
+    if (U_FAILURE(status)) {
+        return;
+    }
+// Check for status first for better handling of no data errors.
+    TEST_ASSERT(bi != NULL);
+    if (bi == NULL) {
+        return;
+    }
+    
+    UnicodeString s("One.\\u00ad Two.", -1, US_INV);
+    //               01234      56789
+    s = s.unescape();
+    bi->setText(s);
+    int pos = bi->next();
+    TEST_ASSERT(pos == 6);
+    pos = bi->next();
+    TEST_ASSERT(pos == 10);
+    pos = bi->previous();
+    TEST_ASSERT(pos == 6);
+    delete bi;
+}
+
+
+
+//------------------------------------------------------------------------------
+//
+//   RBBITest::Extended    Run  RBBI Tests from an external test data file
+//
+//------------------------------------------------------------------------------
+
+struct TestParams {
+    BreakIterator   *bi;
+    UnicodeString    dataToBreak;
+    UVector32       *expectedBreaks;
+    UVector32       *srcLine;
+    UVector32       *srcCol;
+};
+
+void RBBITest::executeTest(TestParams *t) {
+    int32_t    bp;
+    int32_t    prevBP;
+    int32_t    i;
+
+    if (t->bi == NULL) {
+        return;
+    }
+
+    t->bi->setText(t->dataToBreak);
+    //
+    //  Run the iterator forward
+    //
+    prevBP = -1;
+    for (bp = t->bi->first(); bp != BreakIterator::DONE; bp = t->bi->next()) {
+        if (prevBP ==  bp) {
+            // Fail for lack of forward progress.
+            errln("Forward Iteration, no forward progress.  Break Pos=%4d  File line,col=%4d,%4d",
+                bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp));
+            break;
+        }
+
+        // Check that there were we didn't miss an expected break between the last one
+        //  and this one.
+        for (i=prevBP+1; i<bp; i++) {
+            if (t->expectedBreaks->elementAti(i) != 0) {
+                int expected[] = {0, i};
+                printStringBreaks(t->dataToBreak, expected, 2);
+                errln("Forward Iteration, break expected, but not found.  Pos=%4d  File line,col= %4d,%4d",
+                      i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));
+            }
+        }
+
+        // Check that the break we did find was expected
+        if (t->expectedBreaks->elementAti(bp) == 0) {
+            int expected[] = {0, bp};
+            printStringBreaks(t->dataToBreak, expected, 2);
+            errln("Forward Iteration, break found, but not expected.  Pos=%4d  File line,col= %4d,%4d",
+                bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp));
+        } else {
+            // The break was expected.
+            //   Check that the {nnn} tag value is correct.
+            int32_t expectedTagVal = t->expectedBreaks->elementAti(bp);
+            if (expectedTagVal == -1) {
+                expectedTagVal = 0;
+            }
+            int32_t line = t->srcLine->elementAti(bp);
+            int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus();
+            if (rs != expectedTagVal) {
+                errln("Incorrect status for forward break.  Pos=%4d  File line,col= %4d,%4d.\n"
+                      "          Actual, Expected status = %4d, %4d",
+                    bp, line, t->srcCol->elementAti(bp), rs, expectedTagVal);
+            }
+        }
+
+
+        prevBP = bp;
+    }
+
+    // Verify that there were no missed expected breaks after the last one found
+    for (i=prevBP+1; i<t->expectedBreaks->size(); i++) {
+        if (t->expectedBreaks->elementAti(i) != 0) {
+            errln("Forward Iteration, break expected, but not found.  Pos=%4d  File line,col= %4d,%4d",
+                      i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));
+        }
+    }
+
+    //
+    //  Run the iterator backwards, verify that the same breaks are found.
+    //
+    prevBP = t->dataToBreak.length()+2;  // start with a phony value for the last break pos seen.
+    for (bp = t->bi->last(); bp != BreakIterator::DONE; bp = t->bi->previous()) {
+        if (prevBP ==  bp) {
+            // Fail for lack of progress.
+            errln("Reverse Iteration, no progress.  Break Pos=%4d  File line,col=%4d,%4d",
+                bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp));
+            break;
+        }
+
+        // Check that there were we didn't miss an expected break between the last one
+        //  and this one.  (UVector returns zeros for index out of bounds.)
+        for (i=prevBP-1; i>bp; i--) {
+            if (t->expectedBreaks->elementAti(i) != 0) {
+                errln("Reverse Itertion, break expected, but not found.  Pos=%4d  File line,col= %4d,%4d",
+                      i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));
+            }
+        }
+
+        // Check that the break we did find was expected
+        if (t->expectedBreaks->elementAti(bp) == 0) {
+            errln("Reverse Itertion, break found, but not expected.  Pos=%4d  File line,col= %4d,%4d",
+                   bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp));
+        } else {
+            // The break was expected.
+            //   Check that the {nnn} tag value is correct.
+            int32_t expectedTagVal = t->expectedBreaks->elementAti(bp);
+            if (expectedTagVal == -1) {
+                expectedTagVal = 0;
+            }
+            int line = t->srcLine->elementAti(bp);
+            int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus();
+            if (rs != expectedTagVal) {
+                errln("Incorrect status for reverse break.  Pos=%4d  File line,col= %4d,%4d.\n"
+                      "          Actual, Expected status = %4d, %4d",
+                    bp, line, t->srcCol->elementAti(bp), rs, expectedTagVal);
+            }
+        }
+
+        prevBP = bp;
+    }
+
+    // Verify that there were no missed breaks prior to the last one found
+    for (i=prevBP-1; i>=0; i--) {
+        if (t->expectedBreaks->elementAti(i) != 0) {
+            errln("Forward Itertion, break expected, but not found.  Pos=%4d  File line,col= %4d,%4d",
+                      i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));
+        }
+    }
+
+    // Check isBoundary()
+    for (i=0; i<t->expectedBreaks->size(); i++) {
+        UBool boundaryExpected = (t->expectedBreaks->elementAti(i) != 0);
+        UBool boundaryFound    = t->bi->isBoundary(i);
+        if (boundaryExpected != boundaryFound) {
+            errln("isBoundary(%d) incorrect. File line,col= %4d,%4d\n"
+                  "        Expected, Actual= %s, %s",
+                  i, t->srcLine->elementAti(i), t->srcCol->elementAti(i),
+                  boundaryExpected ? "true":"false", boundaryFound? "true" : "false");
+        }
+    }
+
+    // Check following()
+    for (i=0; i<t->expectedBreaks->size(); i++) {
+        int32_t actualBreak = t->bi->following(i);
+        int32_t expectedBreak = BreakIterator::DONE;
+        for (int32_t j=i+1; j < t->expectedBreaks->size(); j++) {
+            if (t->expectedBreaks->elementAti(j) != 0) {
+                expectedBreak = j;
+                break;
+            }
+        }
+        if (expectedBreak != actualBreak) {
+            errln("following(%d) incorrect. File line,col= %4d,%4d\n"
+                  "        Expected, Actual= %d, %d",
+                  i, t->srcLine->elementAti(i), t->srcCol->elementAti(i), expectedBreak, actualBreak);
+        }
+    }
+
+    // Check preceding()
+    for (i=t->expectedBreaks->size(); i>=0; i--) {
+        int32_t actualBreak = t->bi->preceding(i);
+        int32_t expectedBreak = BreakIterator::DONE;
+
+        for (int32_t j=i-1; j >= 0; j--) {
+            if (t->expectedBreaks->elementAti(j) != 0) {
+                expectedBreak = j;
+                break;
+            }
+        }
+        if (expectedBreak != actualBreak) {
+            errln("preceding(%d) incorrect. File line,col= %4d,%4d\n"
+                  "        Expected, Actual= %d, %d",
+                  i, t->srcLine->elementAti(i), t->srcCol->elementAti(i), expectedBreak, actualBreak);
+        }
+    }
+}
+
+
+void RBBITest::TestExtended() {
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+    UErrorCode      status  = U_ZERO_ERROR;
+    Locale          locale("");
+
+    UnicodeString       rules;
+    TestParams          tp;
+    tp.bi             = NULL;
+    tp.expectedBreaks = new UVector32(status);
+    tp.srcLine        = new UVector32(status);
+    tp.srcCol         = new UVector32(status);
+
+    RegexMatcher      localeMatcher(UNICODE_STRING_SIMPLE("<locale *([\\p{L}\\p{Nd}_]*) *>"), 0, status);
+    if (U_FAILURE(status)) {
+        dataerrln("Failure in file %s, line %d, status = \"%s\"", __FILE__, __LINE__, u_errorName(status));
+    }
+
+
+    //
+    //  Open and read the test data file.
+    //
+    const char *testDataDirectory = IntlTest::getSourceTestData(status);
+    char testFileName[1000];
+    if (testDataDirectory == NULL || strlen(testDataDirectory) >= sizeof(testFileName)) {
+        errln("Can't open test data.  Path too long.");
+        return;
+    }
+    strcpy(testFileName, testDataDirectory);
+    strcat(testFileName, "rbbitst.txt");
+
+    int    len;
+    UChar *testFile = ReadAndConvertFile(testFileName, len, "UTF-8", status);
+    if (U_FAILURE(status)) {
+        return; /* something went wrong, error already output */
+    }
+
+
+
+
+    //
+    //  Put the test data into a UnicodeString
+    //
+    UnicodeString testString(FALSE, testFile, len);
+
+    enum EParseState{
+        PARSE_COMMENT,
+        PARSE_TAG,
+        PARSE_DATA,
+        PARSE_NUM
+    }
+    parseState = PARSE_TAG;
+
+    EParseState savedState = PARSE_TAG;
+
+    static const UChar CH_LF        = 0x0a;
+    static const UChar CH_CR        = 0x0d;
+    static const UChar CH_HASH      = 0x23;
+    /*static const UChar CH_PERIOD    = 0x2e;*/
+    static const UChar CH_LT        = 0x3c;
+    static const UChar CH_GT        = 0x3e;
+    static const UChar CH_BACKSLASH = 0x5c;
+    static const UChar CH_BULLET    = 0x2022;
+
+    int32_t    lineNum  = 1;
+    int32_t    colStart = 0;
+    int32_t    column   = 0;
+    int32_t    charIdx  = 0;
+
+    int32_t    tagValue = 0;       // The numeric value of a <nnn> tag.
+
+    for (charIdx = 0; charIdx < len; ) {
+        status = U_ZERO_ERROR;
+        UChar  c = testString.charAt(charIdx);
+        charIdx++;
+        if (c == CH_CR && charIdx<len && testString.charAt(charIdx) == CH_LF) {
+            // treat CRLF as a unit
+            c = CH_LF;
+            charIdx++;
+        }
+        if (c == CH_LF || c == CH_CR) {
+            lineNum++;
+            colStart = charIdx;
+        }
+        column = charIdx - colStart + 1;
+
+        switch (parseState) {
+        case PARSE_COMMENT:
+            if (c == 0x0a || c == 0x0d) {
+                parseState = savedState;
+            }
+            break;
+
+        case PARSE_TAG:
+            {
+            if (c == CH_HASH) {
+                parseState = PARSE_COMMENT;
+                savedState = PARSE_TAG;
+                break;
+            }
+            if (u_isUWhiteSpace(c)) {
+                break;
+            }
+            if (testString.compare(charIdx-1, 6, "<word>") == 0) {
+                delete tp.bi;
+                tp.bi = BreakIterator::createWordInstance(locale,  status);
+                charIdx += 5;
+                break;
+            }
+            if (testString.compare(charIdx-1, 6, "<char>") == 0) {
+                delete tp.bi;
+                tp.bi = BreakIterator::createCharacterInstance(locale,  status);
+                charIdx += 5;
+                break;
+            }
+            if (testString.compare(charIdx-1, 6, "<line>") == 0) {
+                delete tp.bi;
+                tp.bi = BreakIterator::createLineInstance(locale,  status);
+                charIdx += 5;
+                break;
+            }
+            if (testString.compare(charIdx-1, 6, "<sent>") == 0) {
+                delete tp.bi;
+                tp.bi = NULL;
+                tp.bi = BreakIterator::createSentenceInstance(locale,  status);
+                charIdx += 5;
+                break;
+            }
+            if (testString.compare(charIdx-1, 7, "<title>") == 0) {
+                delete tp.bi;
+                tp.bi = BreakIterator::createTitleInstance(locale,  status);
+                charIdx += 6;
+                break;
+            }
+
+            // <locale  loc_name>
+            localeMatcher.reset(testString);
+            if (localeMatcher.lookingAt(charIdx-1, status)) {
+                UnicodeString localeName = localeMatcher.group(1, status);
+                char localeName8[100];
+                localeName.extract(0, localeName.length(), localeName8, sizeof(localeName8), 0);
+                locale = Locale::createFromName(localeName8);
+                charIdx += localeMatcher.group(0, status).length() - 1;
+                TEST_ASSERT_SUCCESS(status);
+                break;
+            }
+            if (testString.compare(charIdx-1, 6, "<data>") == 0) {
+                parseState = PARSE_DATA;
+                charIdx += 5;
+                tp.dataToBreak = "";
+                tp.expectedBreaks->removeAllElements();
+                tp.srcCol ->removeAllElements();
+                tp.srcLine->removeAllElements();
+                break;
+            }
+
+            errln("line %d: Tag expected in test file.", lineNum);
+            parseState = PARSE_COMMENT;
+            savedState = PARSE_DATA;
+            goto end_test; // Stop the test.
+            }
+            break;
+
+        case PARSE_DATA:
+            if (c == CH_BULLET) {
+                int32_t  breakIdx = tp.dataToBreak.length();
+                tp.expectedBreaks->setSize(breakIdx+1);
+                tp.expectedBreaks->setElementAt(-1, breakIdx);
+                tp.srcLine->setSize(breakIdx+1);
+                tp.srcLine->setElementAt(lineNum, breakIdx);
+                tp.srcCol ->setSize(breakIdx+1);
+                tp.srcCol ->setElementAt(column, breakIdx);
+                break;
+            }
+
+            if (testString.compare(charIdx-1, 7, "</data>") == 0) {
+                // Add final entry to mappings from break location to source file position.
+                //  Need one extra because last break position returned is after the
+                //    last char in the data, not at the last char.
+                tp.srcLine->addElement(lineNum, status);
+                tp.srcCol ->addElement(column, status);
+
+                parseState = PARSE_TAG;
+                charIdx += 6;
+
+                // RUN THE TEST!
+                executeTest(&tp);
+                break;
+            }
+
+            if (testString.compare(charIdx-1, 3, UNICODE_STRING_SIMPLE("\\N{")) == 0) {
+                // Named character, e.g. \N{COMBINING GRAVE ACCENT}
+                // Get the code point from the name and insert it into the test data.
+                //   (Damn, no API takes names in Unicode  !!!
+                //    we've got to take it back to char *)
+                int32_t nameEndIdx = testString.indexOf((UChar)0x7d/*'}'*/, charIdx);
+                int32_t nameLength = nameEndIdx - (charIdx+2);
+                char charNameBuf[200];
+                UChar32 theChar = -1;
+                if (nameEndIdx != -1) {
+                    UErrorCode status = U_ZERO_ERROR;
+                    testString.extract(charIdx+2, nameLength, charNameBuf, sizeof(charNameBuf));
+                    charNameBuf[sizeof(charNameBuf)-1] = 0;
+                    theChar = u_charFromName(U_UNICODE_CHAR_NAME, charNameBuf, &status);
+                    if (U_FAILURE(status)) {
+                        theChar = -1;
+                    }
+                }
+                if (theChar == -1) {
+                    errln("Error in named character in test file at line %d, col %d",
+                        lineNum, column);
+                } else {
+                    // Named code point was recognized.  Insert it
+                    //   into the test data.
+                    tp.dataToBreak.append(theChar);
+                    while (tp.dataToBreak.length() > tp.srcLine->size()) {
+                        tp.srcLine->addElement(lineNum, status);
+                        tp.srcCol ->addElement(column, status);
+                    }
+                }
+                if (nameEndIdx > charIdx) {
+                    charIdx = nameEndIdx+1;
+
+                }
+                break;
+            }
+
+
+
+
+            if (testString.compare(charIdx-1, 2, "<>") == 0) {
+                charIdx++;
+                int32_t  breakIdx = tp.dataToBreak.length();
+                tp.expectedBreaks->setSize(breakIdx+1);
+                tp.expectedBreaks->setElementAt(-1, breakIdx);
+                tp.srcLine->setSize(breakIdx+1);
+                tp.srcLine->setElementAt(lineNum, breakIdx);
+                tp.srcCol ->setSize(breakIdx+1);
+                tp.srcCol ->setElementAt(column, breakIdx);
+                break;
+            }
+
+            if (c == CH_LT) {
+                tagValue   = 0;
+                parseState = PARSE_NUM;
+                break;
+            }
+
+            if (c == CH_HASH && column==3) {   // TODO:  why is column off so far?
+                parseState = PARSE_COMMENT;
+                savedState = PARSE_DATA;
+                break;
+            }
+
+            if (c == CH_BACKSLASH) {
+                // Check for \ at end of line, a line continuation.
+                //     Advance over (discard) the newline
+                UChar32 cp = testString.char32At(charIdx);
+                if (cp == CH_CR && charIdx<len && testString.charAt(charIdx+1) == CH_LF) {
+                    // We have a CR LF
+                    //  Need an extra increment of the input ptr to move over both of them
+                    charIdx++;
+                }
+                if (cp == CH_LF || cp == CH_CR) {
+                    lineNum++;
+                    colStart = charIdx;
+                    charIdx++;
+                    break;
+                }
+
+                // Let unescape handle the back slash.
+                cp = testString.unescapeAt(charIdx);
+                if (cp != -1) {
+                    // Escape sequence was recognized.  Insert the char
+                    //   into the test data.
+                    tp.dataToBreak.append(cp);
+                    while (tp.dataToBreak.length() > tp.srcLine->size()) {
+                        tp.srcLine->addElement(lineNum, status);
+                        tp.srcCol ->addElement(column, status);
+                    }
+                    break;
+                }
+
+
+                // Not a recognized backslash escape sequence.
+                // Take the next char as a literal.
+                //  TODO:  Should this be an error?
+                c = testString.charAt(charIdx);
+                charIdx = testString.moveIndex32(charIdx, 1);
+            }
+
+            // Normal, non-escaped data char.
+            tp.dataToBreak.append(c);
+
+            // Save the mapping from offset in the data to line/column numbers in
+            //   the original input file.  Will be used for better error messages only.
+            //   If there's an expected break before this char, the slot in the mapping
+            //     vector will already be set for this char; don't overwrite it.
+            if (tp.dataToBreak.length() > tp.srcLine->size()) {
+                tp.srcLine->addElement(lineNum, status);
+                tp.srcCol ->addElement(column, status);
+            }
+            break;
+
+
+        case PARSE_NUM:
+            // We are parsing an expected numeric tag value, like <1234>,
+            //   within a chunk of data.
+            if (u_isUWhiteSpace(c)) {
+                break;
+            }
+
+            if (c == CH_GT) {
+                // Finished the number.  Add the info to the expected break data,
+                //   and switch parse state back to doing plain data.
+                parseState = PARSE_DATA;
+                if (tagValue == 0) {
+                    tagValue = -1;
+                }
+                int32_t  breakIdx = tp.dataToBreak.length();
+                tp.expectedBreaks->setSize(breakIdx+1);
+                tp.expectedBreaks->setElementAt(tagValue, breakIdx);
+                tp.srcLine->setSize(breakIdx+1);
+                tp.srcLine->setElementAt(lineNum, breakIdx);
+                tp.srcCol ->setSize(breakIdx+1);
+                tp.srcCol ->setElementAt(column, breakIdx);
+                break;
+            }
+
+            if (u_isdigit(c)) {
+                tagValue = tagValue*10 + u_charDigitValue(c);
+                break;
+            }
+
+            errln("Syntax Error in test file at line %d, col %d",
+                lineNum, column);
+            parseState = PARSE_COMMENT;
+            goto end_test; // Stop the test
+            break;
+        }
+
+
+        if (U_FAILURE(status)) {
+            dataerrln("ICU Error %s while parsing test file at line %d.",
+                u_errorName(status), lineNum);
+            status = U_ZERO_ERROR;
+            goto end_test; // Stop the test
+        }
+
+    }
+
+end_test:
+    delete tp.bi;
+    delete tp.expectedBreaks;
+    delete tp.srcLine;
+    delete tp.srcCol;
+    delete [] testFile;
+#endif
+}
+
+
+//-------------------------------------------------------------------------------
+//
+//  TestDictRules   create a break iterator from source rules that includes a
+//                  dictionary range.   Regression for bug #7130.  Source rules
+//                  do not declare a break iterator type (word, line, sentence, etc.
+//                  but the dictionary code, without a type, would loop.
+//
+//-------------------------------------------------------------------------------
+void RBBITest::TestDictRules() {
+    const char *rules =  "$dictionary = [a-z]; \n"
+                         "!!forward; \n"
+                         "$dictionary $dictionary; \n"
+                         "!!reverse; \n"
+                         "$dictionary $dictionary; \n";
+    const char *text = "aa";
+    UErrorCode status = U_ZERO_ERROR;
+    UParseError parseError;
+
+    RuleBasedBreakIterator bi(rules, parseError, status);
+    if (U_SUCCESS(status)) {
+        UnicodeString utext = text;
+        bi.setText(utext);
+        int32_t position;
+        int32_t loops;
+        for (loops = 0; loops<10; loops++) {
+            position = bi.next();
+            if (position == RuleBasedBreakIterator::DONE) {
+                break;
+            }
+        }
+        TEST_ASSERT(loops == 1);
+    } else {
+        dataerrln("Error creating RuleBasedBreakIterator: %s", u_errorName(status));
+    }
+}
+
+
+
+//-------------------------------------------------------------------------------
+//
+//    ReadAndConvertFile   Read a text data file, convert it to UChars, and
+//    return the datain one big UChar * buffer, which the caller must delete.
+//
+//    parameters:
+//          fileName:   the name of the file, with no directory part.  The test data directory
+//                      is assumed.
+//          ulen        an out parameter, receives the actual length (in UChars) of the file data.
+//          encoding    The file encoding.  If the file contains a BOM, that will override the encoding
+//                      specified here.  The BOM, if it exists, will be stripped from the returned data.
+//                      Pass NULL for the system default encoding.
+//          status
+//    returns:
+//                      The file data, converted to UChar.
+//                      The caller must delete this when done with
+//                           delete [] theBuffer;
+//
+//    TODO:  This is a clone of RegexTest::ReadAndConvertFile.
+//           Move this function to some common place.
+//
+//--------------------------------------------------------------------------------
+UChar *RBBITest::ReadAndConvertFile(const char *fileName, int &ulen, const char *encoding, UErrorCode &status) {
+    UChar       *retPtr  = NULL;
+    char        *fileBuf = NULL;
+    UConverter* conv     = NULL;
+    FILE        *f       = NULL;
+
+    ulen = 0;
+    if (U_FAILURE(status)) {
+        return retPtr;
+    }
+
+    //
+    //  Open the file.
+    //
+    f = fopen(fileName, "rb");
+    if (f == 0) {
+        dataerrln("Error opening test data file %s\n", fileName);
+        status = U_FILE_ACCESS_ERROR;
+        return NULL;
+    }
+    //
+    //  Read it in
+    //
+    int   fileSize;
+    int   amt_read;
+
+    fseek( f, 0, SEEK_END);
+    fileSize = ftell(f);
+    fileBuf = new char[fileSize];
+    fseek(f, 0, SEEK_SET);
+    amt_read = fread(fileBuf, 1, fileSize, f);
+    if (amt_read != fileSize || fileSize <= 0) {
+        errln("Error reading test data file.");
+        goto cleanUpAndReturn;
+    }
+
+    //
+    // Look for a Unicode Signature (BOM) on the data just read
+    //
+    int32_t        signatureLength;
+    const char *   fileBufC;
+    const char*    bomEncoding;
+
+    fileBufC = fileBuf;
+    bomEncoding = ucnv_detectUnicodeSignature(
+        fileBuf, fileSize, &signatureLength, &status);
+    if(bomEncoding!=NULL ){
+        fileBufC  += signatureLength;
+        fileSize  -= signatureLength;
+        encoding = bomEncoding;
+    }
+
+    //
+    // Open a converter to take the rule file to UTF-16
+    //
+    conv = ucnv_open(encoding, &status);
+    if (U_FAILURE(status)) {
+        goto cleanUpAndReturn;
+    }
+
+    //
+    // Convert the rules to UChar.
+    //  Preflight first to determine required buffer size.
+    //
+    ulen = ucnv_toUChars(conv,
+        NULL,           //  dest,
+        0,              //  destCapacity,
+        fileBufC,
+        fileSize,
+        &status);
+    if (status == U_BUFFER_OVERFLOW_ERROR) {
+        // Buffer Overflow is expected from the preflight operation.
+        status = U_ZERO_ERROR;
+
+        retPtr = new UChar[ulen+1];
+        ucnv_toUChars(conv,
+            retPtr,       //  dest,
+            ulen+1,
+            fileBufC,
+            fileSize,
+            &status);
+    }
+
+cleanUpAndReturn:
+    fclose(f);
+    delete []fileBuf;
+    ucnv_close(conv);
+    if (U_FAILURE(status)) {
+        errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
+        delete []retPtr;
+        retPtr = 0;
+        ulen   = 0;
+    };
+    return retPtr;
+}
+
+
+
+//--------------------------------------------------------------------------------------------
+//
+//   Run tests from each of the boundary test data files distributed by the Unicode Consortium
+//
+//-------------------------------------------------------------------------------------------
+void RBBITest::TestUnicodeFiles() {
+    RuleBasedBreakIterator  *bi;
+    UErrorCode               status = U_ZERO_ERROR;
+
+    bi =  (RuleBasedBreakIterator *)BreakIterator::createCharacterInstance(Locale::getEnglish(), status);
+    TEST_ASSERT_SUCCESS(status);
+    if (U_SUCCESS(status)) {
+        runUnicodeTestData("GraphemeBreakTest.txt", bi);
+    }
+    delete bi;
+
+    bi =  (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status);
+    TEST_ASSERT_SUCCESS(status);
+    if (U_SUCCESS(status)) {
+        runUnicodeTestData("WordBreakTest.txt", bi);
+    }
+    delete bi;
+
+    bi =  (RuleBasedBreakIterator *)BreakIterator::createSentenceInstance(Locale::getEnglish(), status);
+    TEST_ASSERT_SUCCESS(status);
+    if (U_SUCCESS(status)) {
+        runUnicodeTestData("SentenceBreakTest.txt", bi);
+    }
+    delete bi;
+
+    bi =  (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
+    TEST_ASSERT_SUCCESS(status);
+    if (U_SUCCESS(status)) {
+        runUnicodeTestData("LineBreakTest.txt", bi);
+    }
+    delete bi;
+}
+
+
+//--------------------------------------------------------------------------------------------
+//
+//   Run tests from one of the boundary test data files distributed by the Unicode Consortium
+//
+//-------------------------------------------------------------------------------------------
+void RBBITest::runUnicodeTestData(const char *fileName, RuleBasedBreakIterator *bi) {
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+    // TODO(andy): Match line break behavior to Unicode 6.0 and remove this time bomb. Ticket #7270
+    UBool isTicket7270Fixed = isICUVersionAtLeast(52, 1);
+    UBool isLineBreak = 0 == strcmp(fileName, "LineBreakTest.txt");
+    UErrorCode  status = U_ZERO_ERROR;
+
+    //
+    //  Open and read the test data file, put it into a UnicodeString.
+    //
+    const char *testDataDirectory = IntlTest::getSourceTestData(status);
+    char testFileName[1000];
+    if (testDataDirectory == NULL || strlen(testDataDirectory) >= sizeof(testFileName)) {
+        dataerrln("Can't open test data.  Path too long.");
+        return;
+    }
+    strcpy(testFileName, testDataDirectory);
+    strcat(testFileName, fileName);
+    
+    logln("Opening data file %s\n", fileName);
+
+    int    len;
+    UChar *testFile = ReadAndConvertFile(testFileName, len, "UTF-8", status);
+    if (status != U_FILE_ACCESS_ERROR) {
+        TEST_ASSERT_SUCCESS(status);
+        TEST_ASSERT(testFile != NULL);
+    }
+    if (U_FAILURE(status) || testFile == NULL) {
+        return; /* something went wrong, error already output */
+    }
+    UnicodeString testFileAsString(TRUE, testFile, len);
+
+    //
+    //  Parse the test data file using a regular expression.
+    //  Each kind of token is recognized in its own capture group; what type of item was scanned
+    //     is identified by which group had a match.
+    //
+    //    Caputure Group #                  1          2            3            4           5
+    //    Parses this item:               divide       x      hex digits   comment \n  unrecognized \n
+    //
+    UnicodeString tokenExpr("[ \t]*(?:(\\u00F7)|(\\u00D7)|([0-9a-fA-F]+)|((?:#.*?)?$.)|(.*?$.))", -1, US_INV);
+    RegexMatcher    tokenMatcher(tokenExpr, testFileAsString, UREGEX_MULTILINE | UREGEX_DOTALL, status);
+    UnicodeString   testString;
+    UVector32       breakPositions(status);
+    int             lineNumber = 1;
+    TEST_ASSERT_SUCCESS(status);
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    //
+    //  Scan through each test case, building up the string to be broken in testString,
+    //   and the positions that should be boundaries in the breakPositions vector.
+    //
+    int spin = 0;
+    while (tokenMatcher.find()) {
+       if(tokenMatcher.hitEnd()) {
+          /* Shouldnt Happen(TM).  This means we didn't find the symbols we were looking for.
+             This occurred when the text file was corrupt (wasn't marked as UTF-8)
+             and caused an infinite loop here on EBCDIC systems!
+          */
+          fprintf(stderr,"FAIL: hit end of file %s for the %8dth time- corrupt data file?\r", fileName, ++spin);
+          //      return;
+       }
+        if (tokenMatcher.start(1, status) >= 0) {
+            // Scanned a divide sign, indicating a break position in the test data.
+            if (testString.length()>0) {
+                breakPositions.addElement(testString.length(), status);
+            }
+        }
+        else if (tokenMatcher.start(2, status) >= 0) {
+            // Scanned an 'x', meaning no break at this position in the test data
+            //   Nothing to be done here.
+            }
+        else if (tokenMatcher.start(3, status) >= 0) {
+            // Scanned Hex digits.  Convert them to binary, append to the character data string.
+            const UnicodeString &hexNumber = tokenMatcher.group(3, status);
+            int length = hexNumber.length();
+            if (length<=8) {
+                char buf[10];
+                hexNumber.extract (0, length, buf, sizeof(buf), US_INV);
+                UChar32 c = (UChar32)strtol(buf, NULL, 16);
+                if (c<=0x10ffff) {
+                    testString.append(c);
+                } else {
+                    errln("Error: Unicode Character value out of range. \'%s\', line %d.\n",
+                       fileName, lineNumber);
+                }
+            } else {
+                errln("Syntax Error: Hex Unicode Character value must have no more than 8 digits at \'%s\', line %d.\n",
+                       fileName, lineNumber);
+             }
+        }
+        else if (tokenMatcher.start(4, status) >= 0) {
+            // Scanned to end of a line, possibly skipping over a comment in the process.
+            //   If the line from the file contained test data, run the test now.
+            //
+            if (testString.length() > 0) {
+// TODO(andy): Remove this time bomb code. Note: Failing line numbers may change when updating to new Unicode data.
+//             Rule 8 
+//                ZW SP* <break>
+//             is not yet implemented.
+if (!(isLineBreak && !isTicket7270Fixed && (5198 == lineNumber || 
+                                            5202 == lineNumber ||
+                                            5214 == lineNumber ||
+                                            5246 == lineNumber ||
+                                            5298 == lineNumber ||
+                                            5302 == lineNumber ))) {
+                checkUnicodeTestCase(fileName, lineNumber, testString, &breakPositions, bi);
+}
+            }
+
+            // Clear out this test case.
+            //    The string and breakPositions vector will be refilled as the next
+            //       test case is parsed.
+            testString.remove();
+            breakPositions.removeAllElements();
+            lineNumber++;
+        } else {
+            // Scanner catchall.  Something unrecognized appeared on the line.
+            char token[16];
+            UnicodeString uToken = tokenMatcher.group(0, status);
+            uToken.extract(0, uToken.length(), token, (uint32_t)sizeof(token));
+            token[sizeof(token)-1] = 0;
+            errln("Syntax error in test data file \'%s\', line %d.  Scanning \"%s\"\n", fileName, lineNumber, token);
+
+            // Clean up, in preparation for continuing with the next line.
+            testString.remove();
+            breakPositions.removeAllElements();
+            lineNumber++;
+        }
+        TEST_ASSERT_SUCCESS(status);
+        if (U_FAILURE(status)) {
+            break;
+        }
+    }
+
+    delete [] testFile;
+ #endif   // !UCONFIG_NO_REGULAR_EXPRESSIONS
+}
+
+//--------------------------------------------------------------------------------------------
+//
+//   checkUnicodeTestCase()   Run one test case from one of the Unicode Consortium
+//                            test data files.  Do only a simple, forward-only check -
+//                            this test is mostly to check that ICU and the Unicode
+//                            data agree with each other.
+//
+//--------------------------------------------------------------------------------------------
+void RBBITest::checkUnicodeTestCase(const char *testFileName, int lineNumber,
+                         const UnicodeString &testString,   // Text data to be broken
+                         UVector32 *breakPositions,         // Positions where breaks should be found.
+                         RuleBasedBreakIterator *bi) {
+    int32_t pos;                 // Break Position in the test string
+    int32_t expectedI = 0;       // Index of expected break position in the vector of expected results.
+    int32_t expectedPos;         // Expected break position (index into test string)
+
+    bi->setText(testString);
+    pos = bi->first();
+    pos = bi->next();
+
+    while (pos != BreakIterator::DONE) {
+        if (expectedI >= breakPositions->size()) {
+            errln("Test file \"%s\", line %d, unexpected break found at position %d",
+                testFileName, lineNumber, pos);
+            break;
+        }
+        expectedPos = breakPositions->elementAti(expectedI);
+        if (pos < expectedPos) {
+            errln("Test file \"%s\", line %d, unexpected break found at position %d",
+                testFileName, lineNumber, pos);
+            break;
+        }
+        if (pos > expectedPos) {
+            errln("Test file \"%s\", line %d, failed to find expected break at position %d",
+                testFileName, lineNumber, expectedPos);
+            break;
+        }
+        pos = bi->next();
+        expectedI++;
+    }
+
+    if (pos==BreakIterator::DONE && expectedI<breakPositions->size()) {
+        errln("Test file \"%s\", line %d, failed to find expected break at position %d",
+            testFileName, lineNumber, breakPositions->elementAti(expectedI));
+    }
+}
+
+
+
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+//---------------------------------------------------------------------------------------
+//
+//   classs RBBIMonkeyKind
+//
+//      Monkey Test for Break Iteration
+//      Abstract interface class.   Concrete derived classes independently
+//      implement the break rules for different iterator types.
+//
+//      The Monkey Test itself uses doesn't know which type of break iterator it is
+//      testing, but works purely in terms of the interface defined here.
+//
+//---------------------------------------------------------------------------------------
+class RBBIMonkeyKind {
+public:
+    // Return a UVector of UnicodeSets, representing the character classes used
+    //   for this type of iterator.
+    virtual  UVector  *charClasses() = 0;
+
+    // Set the test text on which subsequent calls to next() will operate
+    virtual  void      setText(const UnicodeString &s) = 0;
+
+    // Find the next break postion, starting from the prev break position, or from zero.
+    // Return -1 after reaching end of string.
+    virtual  int32_t   next(int32_t i) = 0;
+
+    virtual ~RBBIMonkeyKind();
+    UErrorCode       deferredStatus;
+
+
+protected:
+    RBBIMonkeyKind();
+
+private:
+};
+
+RBBIMonkeyKind::RBBIMonkeyKind() {
+    deferredStatus = U_ZERO_ERROR;
+}
+
+RBBIMonkeyKind::~RBBIMonkeyKind() {
+}
+
+
+//----------------------------------------------------------------------------------------
+//
+//   Random Numbers.  Similar to standard lib rand() and srand()
+//                    Not using library to
+//                      1.  Get same results on all platforms.
+//                      2.  Get access to current seed, to more easily reproduce failures.
+//
+//---------------------------------------------------------------------------------------
+static uint32_t m_seed = 1;
+
+static uint32_t m_rand()
+{
+    m_seed = m_seed * 1103515245 + 12345;
+    return (uint32_t)(m_seed/65536) % 32768;
+}
+
+
+//------------------------------------------------------------------------------------------
+//
+//   class RBBICharMonkey      Character (Grapheme Cluster) specific implementation
+//                             of RBBIMonkeyKind.
+//
+//------------------------------------------------------------------------------------------
+class RBBICharMonkey: public RBBIMonkeyKind {
+public:
+    RBBICharMonkey();
+    virtual          ~RBBICharMonkey();
+    virtual  UVector *charClasses();
+    virtual  void     setText(const UnicodeString &s);
+    virtual  int32_t  next(int32_t i);
+private:
+    UVector   *fSets;
+
+    UnicodeSet  *fCRLFSet;
+    UnicodeSet  *fControlSet;
+    UnicodeSet  *fExtendSet;
+    UnicodeSet  *fRegionalIndicatorSet;
+    UnicodeSet  *fPrependSet;
+    UnicodeSet  *fSpacingSet;
+    UnicodeSet  *fLSet;
+    UnicodeSet  *fVSet;
+    UnicodeSet  *fTSet;
+    UnicodeSet  *fLVSet;
+    UnicodeSet  *fLVTSet;
+    UnicodeSet  *fHangulSet;
+    UnicodeSet  *fAnySet;
+
+    const UnicodeString *fText;
+};
+
+
+RBBICharMonkey::RBBICharMonkey() {
+    UErrorCode  status = U_ZERO_ERROR;
+
+    fText = NULL;
+
+    fCRLFSet    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\r\\n]"), status);
+    fControlSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Control}]"), status);
+    fExtendSet  = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Extend}]"), status);
+    fRegionalIndicatorSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Regional_Indicator}]"), status);
+    fPrependSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Prepend}]"), status);
+    fSpacingSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = SpacingMark}]"), status);
+    fLSet       = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = L}]"), status);
+    fVSet       = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = V}]"), status);
+    fTSet       = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = T}]"), status);
+    fLVSet      = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = LV}]"), status);
+    fLVTSet     = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = LVT}]"), status);
+    fHangulSet  = new UnicodeSet();
+    fHangulSet->addAll(*fLSet);
+    fHangulSet->addAll(*fVSet);
+    fHangulSet->addAll(*fTSet);
+    fHangulSet->addAll(*fLVSet);
+    fHangulSet->addAll(*fLVTSet);
+    fAnySet     = new UnicodeSet(0, 0x10ffff);
+
+    fSets       = new UVector(status);
+    fSets->addElement(fCRLFSet,    status);
+    fSets->addElement(fControlSet, status);
+    fSets->addElement(fExtendSet,  status);
+    fSets->addElement(fRegionalIndicatorSet, status);
+    if (!fPrependSet->isEmpty()) {
+        fSets->addElement(fPrependSet, status);
+    }
+    fSets->addElement(fSpacingSet, status);
+    fSets->addElement(fHangulSet,  status);
+    fSets->addElement(fAnySet,     status);
+    if (U_FAILURE(status)) {
+        deferredStatus = status;
+    }
+}
+
+
+void RBBICharMonkey::setText(const UnicodeString &s) {
+    fText = &s;
+}
+
+
+
+int32_t RBBICharMonkey::next(int32_t prevPos) {
+    int    p0, p1, p2, p3;    // Indices of the significant code points around the
+                              //   break position being tested.  The candidate break
+                              //   location is before p2.
+
+    int     breakPos = -1;
+
+    UChar32 c0, c1, c2, c3;   // The code points at p0, p1, p2 & p3.
+    
+    if (U_FAILURE(deferredStatus)) {
+        return -1;
+    }
+
+    // Previous break at end of string.  return DONE.
+    if (prevPos >= fText->length()) {
+        return -1;
+    }
+    p0 = p1 = p2 = p3 = prevPos;
+    c3 =  fText->char32At(prevPos);
+    c0 = c1 = c2 = 0;
+
+    // Loop runs once per "significant" character position in the input text.
+    for (;;) {
+        // Move all of the positions forward in the input string.
+        p0 = p1;  c0 = c1;
+        p1 = p2;  c1 = c2;
+        p2 = p3;  c2 = c3;
+
+        // Advancd p3 by one codepoint
+        p3 = fText->moveIndex32(p3, 1);
+        c3 = fText->char32At(p3);
+
+        if (p1 == p2) {
+            // Still warming up the loop.  (won't work with zero length strings, but we don't care)
+            continue;
+        }
+        if (p2 == fText->length()) {
+            // Reached end of string.  Always a break position.
+            break;
+        }
+
+        // Rule  GB3   CR x LF
+        //     No Extend or Format characters may appear between the CR and LF,
+        //     which requires the additional check for p2 immediately following p1.
+        //
+        if (c1==0x0D && c2==0x0A && p1==(p2-1)) {
+            continue;
+        }
+
+        // Rule (GB4).   ( Control | CR | LF ) <break>
+        if (fControlSet->contains(c1) ||
+            c1 == 0x0D ||
+            c1 == 0x0A)  {
+            break;
+        }
+
+        // Rule (GB5)    <break>  ( Control | CR | LF )
+        //
+        if (fControlSet->contains(c2) ||
+            c2 == 0x0D ||
+            c2 == 0x0A)  {
+            break;
+        }
+
+
+        // Rule (GB6)  L x ( L | V | LV | LVT )
+        if (fLSet->contains(c1) &&
+               (fLSet->contains(c2)  ||
+                fVSet->contains(c2)  ||
+                fLVSet->contains(c2) ||
+                fLVTSet->contains(c2))) {
+            continue;
+        }
+
+        // Rule (GB7)    ( LV | V )  x  ( V | T )
+        if ((fLVSet->contains(c1) || fVSet->contains(c1)) &&
+            (fVSet->contains(c2) || fTSet->contains(c2)))  {
+            continue;
+        }
+
+        // Rule (GB8)    ( LVT | T)  x T
+        if ((fLVTSet->contains(c1) || fTSet->contains(c1)) &&
+            fTSet->contains(c2))  {
+            continue;
+        }
+
+        // Just adding extra Apple rule does here not work, behavior depends on arbitrary context
+
+        // Rule (GB8a)    Regional_Indicator x Regional_Indicator
+        if (fRegionalIndicatorSet->contains(c1) && fRegionalIndicatorSet->contains(c2)) {
+            continue;
+        }
+
+        // Rule (GB9)    Numeric x ALetter
+        if (fExtendSet->contains(c2))  {
+            continue;
+        }
+
+        // Rule (GB9a)   x  SpacingMark
+        if (fSpacingSet->contains(c2)) {
+            continue;
+        }
+
+        // Rule (GB9b)   Prepend x
+        if (fPrependSet->contains(c1)) {
+            continue;
+        }
+
+        // Rule (GB10)  Any  <break>  Any
+        break;
+    }
+
+    breakPos = p2;
+    return breakPos;
+}
+
+
+
+UVector  *RBBICharMonkey::charClasses() {
+    return fSets;
+}
+
+
+RBBICharMonkey::~RBBICharMonkey() {
+    delete fSets;
+    delete fCRLFSet;
+    delete fControlSet;
+    delete fExtendSet;
+    delete fRegionalIndicatorSet;
+    delete fPrependSet;
+    delete fSpacingSet;
+    delete fLSet;
+    delete fVSet;
+    delete fTSet;
+    delete fLVSet;
+    delete fLVTSet;
+    delete fHangulSet;
+    delete fAnySet;
+}
+
+//------------------------------------------------------------------------------------------
+//
+//   class RBBIWordMonkey      Word Break specific implementation
+//                             of RBBIMonkeyKind.
+//
+//------------------------------------------------------------------------------------------
+class RBBIWordMonkey: public RBBIMonkeyKind {
+public:
+    RBBIWordMonkey();
+    virtual          ~RBBIWordMonkey();
+    virtual  UVector *charClasses();
+    virtual  void     setText(const UnicodeString &s);
+    virtual int32_t   next(int32_t i);
+private:
+    UVector      *fSets;
+
+    UnicodeSet  *fCRSet;
+    UnicodeSet  *fLFSet;
+    UnicodeSet  *fNewlineSet;
+    UnicodeSet  *fKatakanaSet;
+    UnicodeSet  *fALetterSet;
+    // TODO(jungshik): Do we still need this change? 
+    // UnicodeSet  *fALetterSet; // matches ALetterPlus in word.txt
+    UnicodeSet  *fMidNumLetSet;
+    UnicodeSet  *fMidLetterSet;
+    UnicodeSet  *fMidNumSet;
+    UnicodeSet  *fNumericSet;
+    UnicodeSet  *fFormatSet;
+    UnicodeSet  *fOtherSet;
+    UnicodeSet  *fExtendSet;
+    UnicodeSet  *fExtendNumLetSet;
+    UnicodeSet  *fRegionalIndicatorSet;
+    UnicodeSet  *fDictionaryCjkSet;
+
+    RegexMatcher  *fMatcher;
+
+    const UnicodeString  *fText;
+};
+
+
+RBBIWordMonkey::RBBIWordMonkey()
+{
+    UErrorCode  status = U_ZERO_ERROR;
+
+    fSets            = new UVector(status);
+
+    fCRSet           = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = CR}]"),           status);
+    fLFSet           = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = LF}]"),           status);
+    fNewlineSet      = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Newline}]"),      status);
+    fDictionaryCjkSet= new UnicodeSet("[[\\uac00-\\ud7a3][:Han:][:Hiragana:][:Katakana:]]", status);
+    // Exclude Hangul syllables from ALetterSet during testing.
+    // Leave CJK dictionary characters out from the monkey tests!
+#if 0 
+    fALetterSet      = new UnicodeSet("[\\p{Word_Break = ALetter}"
+                                      "[\\p{Line_Break = Complex_Context}"
+                                      "-\\p{Grapheme_Cluster_Break = Extend}"
+                                      "-\\p{Grapheme_Cluster_Break = Control}"
+                                      "]]",
+                                      status);
+#endif
+    fALetterSet      = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = ALetter}]"), status);
+    fALetterSet->removeAll(*fDictionaryCjkSet);
+    fKatakanaSet     = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Katakana}]"),     status);
+    fMidNumLetSet    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidNumLet}]"),    status);
+    fMidLetterSet    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidLetter}]"),    status);
+    fMidNumSet       = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidNum}]"),       status);
+    // TODO: this set used to contain [\\uff10-\\uff19] (fullwidth digits), but this breaks the test
+    // we should figure out why
+    fNumericSet      = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Numeric}]"),      status);
+    fFormatSet       = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Format}]"),       status);
+    fExtendNumLetSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = ExtendNumLet}]"), status);
+    fExtendSet       = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Extend}]"),       status);
+    fRegionalIndicatorSet =  new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Regional_Indicator}]"), status);
+
+    fOtherSet        = new UnicodeSet();
+    if(U_FAILURE(status)) {
+      deferredStatus = status;
+      return;
+    }
+
+    fOtherSet->complement();
+    fOtherSet->removeAll(*fCRSet);
+    fOtherSet->removeAll(*fLFSet);
+    fOtherSet->removeAll(*fNewlineSet);
+    fOtherSet->removeAll(*fKatakanaSet);
+    fOtherSet->removeAll(*fALetterSet);
+    fOtherSet->removeAll(*fMidLetterSet);
+    fOtherSet->removeAll(*fMidNumSet);
+    fOtherSet->removeAll(*fNumericSet);
+    fOtherSet->removeAll(*fExtendNumLetSet);
+    fOtherSet->removeAll(*fFormatSet);
+    fOtherSet->removeAll(*fExtendSet);
+    fOtherSet->removeAll(*fRegionalIndicatorSet);
+    // Inhibit dictionary characters from being tested at all.
+    fOtherSet->removeAll(*fDictionaryCjkSet);
+    fOtherSet->removeAll(UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{LineBreak = Complex_Context}]"), status));
+
+    fSets->addElement(fCRSet,        status);
+    fSets->addElement(fLFSet,        status);
+    fSets->addElement(fNewlineSet,   status);
+    fSets->addElement(fALetterSet,   status);
+    //fSets->addElement(fKatakanaSet,  status); //TODO: work out how to test katakana
+    fSets->addElement(fMidLetterSet, status);
+    fSets->addElement(fMidNumLetSet, status);
+    fSets->addElement(fMidNumSet,    status);
+    fSets->addElement(fNumericSet,   status);
+    fSets->addElement(fFormatSet,    status);
+    fSets->addElement(fExtendSet,    status);
+    fSets->addElement(fOtherSet,     status);
+    fSets->addElement(fExtendNumLetSet, status);
+    fSets->addElement(fRegionalIndicatorSet, status);
+
+    if (U_FAILURE(status)) {
+        deferredStatus = status;
+    }
+}
+
+void RBBIWordMonkey::setText(const UnicodeString &s) {
+    fText       = &s;
+}
+
+
+int32_t RBBIWordMonkey::next(int32_t prevPos) {
+    int    p0, p1, p2, p3;    // Indices of the significant code points around the
+                              //   break position being tested.  The candidate break
+                              //   location is before p2.
+
+    int     breakPos = -1;
+
+    UChar32 c0, c1, c2, c3;   // The code points at p0, p1, p2 & p3.
+    
+    if (U_FAILURE(deferredStatus)) {
+        return -1;
+    }
+
+    // Prev break at end of string.  return DONE.
+    if (prevPos >= fText->length()) {
+        return -1;
+    }
+    p0 = p1 = p2 = p3 = prevPos;
+    c3 =  fText->char32At(prevPos);
+    c0 = c1 = c2 = 0;
+
+    // Loop runs once per "significant" character position in the input text.
+    for (;;) {
+        // Move all of the positions forward in the input string.
+        p0 = p1;  c0 = c1;
+        p1 = p2;  c1 = c2;
+        p2 = p3;  c2 = c3;
+
+        // Advancd p3 by    X(Extend | Format)*   Rule 4
+        //    But do not advance over Extend & Format following a new line. (Unicode 5.1 change)
+        do {
+            p3 = fText->moveIndex32(p3, 1);
+            c3 = fText->char32At(p3);
+            if (fCRSet->contains(c2) || fLFSet->contains(c2) || fNewlineSet->contains(c2)) {
+               break;
+            };
+        }
+        while (fFormatSet->contains(c3) || fExtendSet->contains(c3));
+
+
+        if (p1 == p2) {
+            // Still warming up the loop.  (won't work with zero length strings, but we don't care)
+            continue;
+        }
+        if (p2 == fText->length()) {
+            // Reached end of string.  Always a break position.
+            break;
+        }
+
+        // Rule  (3)   CR x LF
+        //     No Extend or Format characters may appear between the CR and LF,
+        //     which requires the additional check for p2 immediately following p1.
+        //
+        if (c1==0x0D && c2==0x0A) {
+            continue;
+        }
+        
+        // Rule (3a)  Break before and after newlines (including CR and LF)
+        //
+        if (fCRSet->contains(c1) || fLFSet->contains(c1) || fNewlineSet->contains(c1)) {
+            break;
+        };
+        if (fCRSet->contains(c2) || fLFSet->contains(c2) || fNewlineSet->contains(c2)) {
+            break;
+        };
+
+        // Rule (5).   ALetter x ALetter
+        if (fALetterSet->contains(c1) &&
+            fALetterSet->contains(c2))  {
+            continue;
+        }
+
+        // Rule (6)  ALetter  x  (MidLetter | MidNumLet) ALetter
+        //
+        if ( fALetterSet->contains(c1)   &&
+             (fMidLetterSet->contains(c2) || fMidNumLetSet->contains(c2)) &&
+             fALetterSet->contains(c3)) {
+            continue;
+        }
+
+
+        // Rule (7)  ALetter (MidLetter | MidNumLet)  x  ALetter
+        if (fALetterSet->contains(c0) &&
+            (fMidLetterSet->contains(c1) ||  fMidNumLetSet->contains(c1)) &&
+            fALetterSet->contains(c2)) {
+            continue;
+        }
+
+        // Rule (8)    Numeric x Numeric
+        if (fNumericSet->contains(c1) &&
+            fNumericSet->contains(c2))  {
+            continue;
+        }
+
+        // Rule (9)    ALetter x Numeric
+        if (fALetterSet->contains(c1) &&
+            fNumericSet->contains(c2))  {
+            continue;
+        }
+
+        // Rule (10)    Numeric x ALetter
+        if (fNumericSet->contains(c1) &&
+            fALetterSet->contains(c2))  {
+            continue;
+        }
+
+        // Rule (11)   Numeric (MidNum | MidNumLet)  x  Numeric
+        if (fNumericSet->contains(c0) &&
+            (fMidNumSet->contains(c1) || fMidNumLetSet->contains(c1))  &&
+            fNumericSet->contains(c2)) {
+            continue;
+        }
+
+        // Rule (12)  Numeric x (MidNum | MidNumLet) Numeric
+        if (fNumericSet->contains(c1) &&
+            (fMidNumSet->contains(c2) || fMidNumLetSet->contains(c2))  &&
+            fNumericSet->contains(c3)) {
+            continue;
+        }
+
+        // Rule (13)  Katakana x Katakana
+        if (fKatakanaSet->contains(c1) &&
+            fKatakanaSet->contains(c2))  {
+            continue;
+        }
+
+        // Rule 13a
+        if ((fALetterSet->contains(c1) || fNumericSet->contains(c1) ||
+             fKatakanaSet->contains(c1) || fExtendNumLetSet->contains(c1)) &&
+             fExtendNumLetSet->contains(c2)) {
+                continue;
+        }
+
+        // Rule 13b
+        if (fExtendNumLetSet->contains(c1) &&
+                (fALetterSet->contains(c2) || fNumericSet->contains(c2) ||
+                fKatakanaSet->contains(c2)))  {
+                continue;
+        }
+
+        // Rule 13c
+        if (fRegionalIndicatorSet->contains(c1) && fRegionalIndicatorSet->contains(c2)) {
+            continue;
+        }
+
+        // Rule 14.  Break found here.
+        break;
+    }
+
+    breakPos = p2;
+    return breakPos;
+}
+
+
+UVector  *RBBIWordMonkey::charClasses() {
+    return fSets;
+}
+
+
+RBBIWordMonkey::~RBBIWordMonkey() {
+    delete fSets;
+    delete fCRSet;
+    delete fLFSet;
+    delete fNewlineSet;
+    delete fKatakanaSet;
+    delete fALetterSet;
+    delete fMidNumLetSet;
+    delete fMidLetterSet;
+    delete fMidNumSet;
+    delete fNumericSet;
+    delete fFormatSet;
+    delete fExtendSet;
+    delete fExtendNumLetSet;
+    delete fRegionalIndicatorSet;
+    delete fDictionaryCjkSet;
+    delete fOtherSet;
+}
+
+
+
+
+//------------------------------------------------------------------------------------------
+//
+//   class RBBISentMonkey      Sentence Break specific implementation
+//                             of RBBIMonkeyKind.
+//
+//------------------------------------------------------------------------------------------
+class RBBISentMonkey: public RBBIMonkeyKind {
+public:
+    RBBISentMonkey();
+    virtual          ~RBBISentMonkey();
+    virtual  UVector *charClasses();
+    virtual  void     setText(const UnicodeString &s);
+    virtual int32_t   next(int32_t i);
+private:
+    int               moveBack(int posFrom);
+    int               moveForward(int posFrom);
+    UChar32           cAt(int pos);
+
+    UVector      *fSets;
+
+    UnicodeSet  *fSepSet;
+    UnicodeSet  *fFormatSet;
+    UnicodeSet  *fSpSet;
+    UnicodeSet  *fLowerSet;
+    UnicodeSet  *fUpperSet;
+    UnicodeSet  *fOLetterSet;
+    UnicodeSet  *fNumericSet;
+    UnicodeSet  *fATermSet;
+    UnicodeSet  *fSContinueSet;
+    UnicodeSet  *fSTermSet;
+    UnicodeSet  *fCloseSet;
+    UnicodeSet  *fOtherSet;
+    UnicodeSet  *fExtendSet;
+
+    const UnicodeString  *fText;
+
+};
+
+RBBISentMonkey::RBBISentMonkey()
+{
+    UErrorCode  status = U_ZERO_ERROR;
+
+    fSets            = new UVector(status);
+
+    //  Separator Set Note:  Beginning with Unicode 5.1, CR and LF were removed from the separator
+    //                       set and made into character classes of their own.  For the monkey impl,
+    //                       they remain in SEP, since Sep always appears with CR and LF in the rules.
+    fSepSet          = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Sep} \\u000a \\u000d]"),     status);
+    fFormatSet       = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Format}]"),    status);
+    fSpSet           = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Sp}]"),        status);
+    fLowerSet        = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Lower}]"),     status);
+    fUpperSet        = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Upper}]"),     status);
+    fOLetterSet      = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = OLetter}]"),   status);
+    fNumericSet      = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Numeric}]"),   status);
+    fATermSet        = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = ATerm}]"),     status);
+    fSContinueSet    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = SContinue}]"), status);
+    fSTermSet        = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = STerm}]"),     status);
+    fCloseSet        = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Close}]"),     status);
+    fExtendSet       = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Extend}]"),    status);
+    fOtherSet        = new UnicodeSet();
+
+    if(U_FAILURE(status)) {
+      deferredStatus = status;
+      return;
+    }
+
+    fOtherSet->complement();
+    fOtherSet->removeAll(*fSepSet);
+    fOtherSet->removeAll(*fFormatSet);
+    fOtherSet->removeAll(*fSpSet);
+    fOtherSet->removeAll(*fLowerSet);
+    fOtherSet->removeAll(*fUpperSet);
+    fOtherSet->removeAll(*fOLetterSet);
+    fOtherSet->removeAll(*fNumericSet);
+    fOtherSet->removeAll(*fATermSet);
+    fOtherSet->removeAll(*fSContinueSet);
+    fOtherSet->removeAll(*fSTermSet);
+    fOtherSet->removeAll(*fCloseSet);
+    fOtherSet->removeAll(*fExtendSet);
+
+    fSets->addElement(fSepSet,       status);
+    fSets->addElement(fFormatSet,    status);
+    fSets->addElement(fSpSet,        status);
+    fSets->addElement(fLowerSet,     status);
+    fSets->addElement(fUpperSet,     status);
+    fSets->addElement(fOLetterSet,   status);
+    fSets->addElement(fNumericSet,   status);
+    fSets->addElement(fATermSet,     status);
+    fSets->addElement(fSContinueSet, status);
+    fSets->addElement(fSTermSet,     status);
+    fSets->addElement(fCloseSet,     status);
+    fSets->addElement(fOtherSet,     status);
+    fSets->addElement(fExtendSet,    status);
+
+    if (U_FAILURE(status)) {
+        deferredStatus = status;
+    }
+}
+
+
+
+void RBBISentMonkey::setText(const UnicodeString &s) {
+    fText       = &s;
+}
+
+UVector  *RBBISentMonkey::charClasses() {
+    return fSets;
+}
+
+
+//  moveBack()   Find the "significant" code point preceding the index i.
+//               Skips over ($Extend | $Format)* .
+//
+int RBBISentMonkey::moveBack(int i) {
+    if (i <= 0) {
+        return -1;
+    }
+    UChar32   c;
+    int32_t   j = i;
+    do {
+        j = fText->moveIndex32(j, -1);
+        c = fText->char32At(j);
+    }
+    while (j>0 &&(fFormatSet->contains(c) || fExtendSet->contains(c)));
+    return j;
+
+ }
+
+
+int RBBISentMonkey::moveForward(int i) {
+    if (i>=fText->length()) {
+        return fText->length();
+    }
+    UChar32   c;
+    int32_t   j = i;
+    do {
+        j = fText->moveIndex32(j, 1);
+        c = cAt(j);
+    }
+    while (fFormatSet->contains(c) || fExtendSet->contains(c));
+    return j;
+}
+
+UChar32 RBBISentMonkey::cAt(int pos) {
+    if (pos<0 || pos>=fText->length()) {
+        return -1;
+    } else {
+        return fText->char32At(pos);
+    }
+}
+
+int32_t RBBISentMonkey::next(int32_t prevPos) {
+    int    p0, p1, p2, p3;    // Indices of the significant code points around the
+                              //   break position being tested.  The candidate break
+                              //   location is before p2.
+
+    int     breakPos = -1;
+
+    UChar32 c0, c1, c2, c3;   // The code points at p0, p1, p2 & p3.
+    UChar32 c;
+
+    if (U_FAILURE(deferredStatus)) {
+        return -1;
+    }
+
+    // Prev break at end of string.  return DONE.
+    if (prevPos >= fText->length()) {
+        return -1;
+    }
+    p0 = p1 = p2 = p3 = prevPos;
+    c3 =  fText->char32At(prevPos);
+    c0 = c1 = c2 = 0;
+
+    // Loop runs once per "significant" character position in the input text.
+    for (;;) {
+        // Move all of the positions forward in the input string.
+        p0 = p1;  c0 = c1;
+        p1 = p2;  c1 = c2;
+        p2 = p3;  c2 = c3;
+
+        // Advancd p3 by    X(Extend | Format)*   Rule 4
+        p3 = moveForward(p3);
+        c3 = cAt(p3);
+
+        // Rule (3)  CR x LF
+        if (c1==0x0d && c2==0x0a && p2==(p1+1)) {
+            continue;
+        }
+
+        // Rule (4).   Sep  <break>
+        if (fSepSet->contains(c1)) {
+            p2 = p1+1;   // Separators don't combine with Extend or Format.
+            break;
+        }
+
+        if (p2 >= fText->length()) {
+            // Reached end of string.  Always a break position.
+            break;
+        }
+
+        if (p2 == prevPos) {
+            // Still warming up the loop.  (won't work with zero length strings, but we don't care)
+            continue;
+        }
+
+        // Rule (6).   ATerm x Numeric
+        if (fATermSet->contains(c1) &&  fNumericSet->contains(c2))  {
+            continue;
+        }
+
+        // Rule (7).  Upper ATerm  x  Uppper
+        if (fUpperSet->contains(c0) && fATermSet->contains(c1) && fUpperSet->contains(c2)) {
+            continue;
+        }
+
+        // Rule (8)  ATerm Close* Sp*  x  (not (OLettter | Upper | Lower | Sep | STerm | ATerm))* Lower
+        //           Note:  STerm | ATerm are added to the negated part of the expression by a
+        //                  note to the Unicode 5.0 documents.
+        int p8 = p1;
+        while (fSpSet->contains(cAt(p8))) {
+            p8 = moveBack(p8);
+        }
+        while (fCloseSet->contains(cAt(p8))) {
+            p8 = moveBack(p8);
+        }
+        if (fATermSet->contains(cAt(p8))) {
+            p8=p2;
+            for (;;) {
+                c = cAt(p8);
+                if (c==-1 || fOLetterSet->contains(c) || fUpperSet->contains(c) ||
+                    fLowerSet->contains(c) || fSepSet->contains(c) ||
+                    fATermSet->contains(c) || fSTermSet->contains(c))  {
+                    break;
+                }
+                p8 = moveForward(p8);
+            }
+            if (fLowerSet->contains(cAt(p8))) {
+                continue;
+            }
+        }
+
+        // Rule 8a   (STerm | ATerm) Close* Sp* x (SContinue | STerm | ATerm);
+        if (fSContinueSet->contains(c2) || fSTermSet->contains(c2) || fATermSet->contains(c2)) {
+            p8 = p1;
+            while (fSpSet->contains(cAt(p8))) {
+                p8 = moveBack(p8);
+            }
+            while (fCloseSet->contains(cAt(p8))) {
+                p8 = moveBack(p8);
+            }
+            c = cAt(p8);
+            if (fSTermSet->contains(c) || fATermSet->contains(c)) {
+                continue;
+            }
+        }
+
+        // Rule (9)  (STerm | ATerm) Close*  x  (Close | Sp | Sep | CR | LF)
+        int p9 = p1;
+        while (fCloseSet->contains(cAt(p9))) {
+            p9 = moveBack(p9);
+        }
+        c = cAt(p9);
+        if ((fSTermSet->contains(c) || fATermSet->contains(c))) {
+            if (fCloseSet->contains(c2) || fSpSet->contains(c2) || fSepSet->contains(c2)) {
+                continue;
+            }
+        }
+
+        // Rule (10)  (Sterm | ATerm) Close* Sp*  x  (Sp | Sep | CR | LF)
+        int p10 = p1;
+        while (fSpSet->contains(cAt(p10))) {
+            p10 = moveBack(p10);
+        }
+        while (fCloseSet->contains(cAt(p10))) {
+            p10 = moveBack(p10);
+        }
+        if (fSTermSet->contains(cAt(p10)) || fATermSet->contains(cAt(p10))) {
+            if (fSpSet->contains(c2) || fSepSet->contains(c2)) {
+                continue;
+            }
+        }
+
+        // Rule (11)  (STerm | ATerm) Close* Sp* (Sep | CR | LF)?  <break>
+        int p11 = p1;
+        if (fSepSet->contains(cAt(p11))) {
+            p11 = moveBack(p11);
+        }
+        while (fSpSet->contains(cAt(p11))) {
+            p11 = moveBack(p11);
+        }
+        while (fCloseSet->contains(cAt(p11))) {
+            p11 = moveBack(p11);
+        }
+        if (fSTermSet->contains(cAt(p11)) || fATermSet->contains(cAt(p11))) {
+            break;
+        }
+
+        //  Rule (12)  Any x Any
+        continue;
+    }
+    breakPos = p2;
+    return breakPos;
+}
+
+RBBISentMonkey::~RBBISentMonkey() {
+    delete fSets;
+    delete fSepSet;
+    delete fFormatSet;
+    delete fSpSet;
+    delete fLowerSet;
+    delete fUpperSet;
+    delete fOLetterSet;
+    delete fNumericSet;
+    delete fATermSet;
+    delete fSContinueSet;
+    delete fSTermSet;
+    delete fCloseSet;
+    delete fOtherSet;
+    delete fExtendSet;
+}
+
+
+
+//-------------------------------------------------------------------------------------------
+//
+//  RBBILineMonkey
+//
+//-------------------------------------------------------------------------------------------
+
+class RBBILineMonkey: public RBBIMonkeyKind {
+public:
+    RBBILineMonkey();
+    virtual          ~RBBILineMonkey();
+    virtual  UVector *charClasses();
+    virtual  void     setText(const UnicodeString &s);
+    virtual  int32_t  next(int32_t i);
+    virtual  void     rule9Adjust(int32_t pos, UChar32 *posChar, int32_t *nextPos, UChar32 *nextChar);
+private:
+    UVector      *fSets;
+
+    UnicodeSet  *fBK;
+    UnicodeSet  *fCR;
+    UnicodeSet  *fLF;
+    UnicodeSet  *fCM;
+    UnicodeSet  *fNL;
+    UnicodeSet  *fSG;
+    UnicodeSet  *fWJ;
+    UnicodeSet  *fZW;
+    UnicodeSet  *fGL;
+    UnicodeSet  *fCB;
+    UnicodeSet  *fSP;
+    UnicodeSet  *fB2;
+    UnicodeSet  *fBA;
+    UnicodeSet  *fBB;
+    UnicodeSet  *fHY;
+    UnicodeSet  *fH2;
+    UnicodeSet  *fH3;
+    UnicodeSet  *fCL;
+    UnicodeSet  *fCP;
+    UnicodeSet  *fEX;
+    UnicodeSet  *fIN;
+    UnicodeSet  *fJL;
+    UnicodeSet  *fJV;
+    UnicodeSet  *fJT;
+    UnicodeSet  *fNS;
+    UnicodeSet  *fOP;
+    UnicodeSet  *fQU;
+    UnicodeSet  *fIS;
+    UnicodeSet  *fNU;
+    UnicodeSet  *fPO;
+    UnicodeSet  *fPR;
+    UnicodeSet  *fSY;
+    UnicodeSet  *fAI;
+    UnicodeSet  *fAL;
+    UnicodeSet  *fCJ;
+    UnicodeSet  *fHL;
+    UnicodeSet  *fID;
+    UnicodeSet  *fRI;
+    UnicodeSet  *fSA;
+    UnicodeSet  *fXX;
+
+    BreakIterator  *fCharBI;
+
+    const UnicodeString  *fText;
+    int32_t              *fOrigPositions;
+
+    RegexMatcher         *fNumberMatcher;
+    RegexMatcher         *fLB11Matcher;
+};
+
+
+RBBILineMonkey::RBBILineMonkey()
+{
+    UErrorCode  status = U_ZERO_ERROR;
+
+    fSets  = new UVector(status);
+
+    fBK    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_Break=BK}]"), status);
+    fCR    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CR}]"), status);
+    fLF    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=LF}]"), status);
+    fCM    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CM}]"), status);
+    fNL    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=NL}]"), status);
+    fWJ    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=WJ}]"), status);
+    fZW    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=ZW}]"), status);
+    fGL    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=GL}]"), status);
+    fCB    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CB}]"), status);
+    fSP    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=SP}]"), status);
+    fB2    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=B2}]"), status);
+    fBA    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=BA}]"), status);
+    fBB    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=BB}]"), status);
+    fHY    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=HY}]"), status);
+    fH2    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=H2}]"), status);
+    fH3    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=H3}]"), status);
+    fCL    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CL}]"), status);
+    fCP    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CP}]"), status);
+    fEX    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=EX}]"), status);
+    fIN    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=IN}]"), status);
+    fJL    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JL}]"), status);
+    fJV    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JV}]"), status);
+    fJT    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JT}]"), status);
+    fNS    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=NS}]"), status);
+    fOP    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=OP}]"), status);
+    fQU    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=QU}]"), status);
+    fIS    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=IS}]"), status);
+    fNU    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=NU}]"), status);
+    fPO    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=PO}]"), status);
+    fPR    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=PR}]"), status);
+    fSY    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=SY}]"), status);
+    fAI    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=AI}]"), status);
+    fAL    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=AL}]"), status);
+    fCJ    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CJ}]"), status);
+    fHL    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=HL}]"), status);
+    fID    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=ID}]"), status);
+    fRI    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=RI}]"), status);
+    fSA    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=SA}]"), status);
+    fSG    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\ud800-\\udfff]"), status);
+    fXX    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=XX}]"), status);
+
+    if (U_FAILURE(status)) {
+        deferredStatus = status;
+        fCharBI = NULL;
+        fNumberMatcher = NULL;
+        return;
+    }
+
+    fAL->addAll(*fXX);     // Default behavior for XX is identical to AL
+    fAL->addAll(*fAI);     // Default behavior for AI is identical to AL
+    fAL->addAll(*fSA);     // Default behavior for SA is XX, which defaults to AL
+    fAL->addAll(*fSG);     // Default behavior for SG is identical to AL.
+
+    fNS->addAll(*fCJ);     // Default behavior for CJ is identical to NS.
+
+    fSets->addElement(fBK, status);
+    fSets->addElement(fCR, status);
+    fSets->addElement(fLF, status);
+    fSets->addElement(fCM, status);
+    fSets->addElement(fNL, status);
+    fSets->addElement(fWJ, status);
+    fSets->addElement(fZW, status);
+    fSets->addElement(fGL, status);
+    fSets->addElement(fCB, status);
+    fSets->addElement(fSP, status);
+    fSets->addElement(fB2, status);
+    fSets->addElement(fBA, status);
+    fSets->addElement(fBB, status);
+    fSets->addElement(fHY, status);
+    fSets->addElement(fH2, status);
+    fSets->addElement(fH3, status);
+    fSets->addElement(fCL, status);
+    fSets->addElement(fCP, status);
+    fSets->addElement(fEX, status);
+    fSets->addElement(fIN, status);
+    fSets->addElement(fJL, status);
+    fSets->addElement(fJT, status);
+    fSets->addElement(fJV, status);
+    fSets->addElement(fNS, status);
+    fSets->addElement(fOP, status);
+    fSets->addElement(fQU, status);
+    fSets->addElement(fIS, status);
+    fSets->addElement(fNU, status);
+    fSets->addElement(fPO, status);
+    fSets->addElement(fPR, status);
+    fSets->addElement(fSY, status);
+    fSets->addElement(fAI, status);
+    fSets->addElement(fAL, status);
+    fSets->addElement(fHL, status);
+    fSets->addElement(fID, status);
+    fSets->addElement(fWJ, status);
+    fSets->addElement(fRI, status);
+    fSets->addElement(fSA, status);
+    fSets->addElement(fSG, status);
+
+    const char *rules = 
+            "((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?"
+            "((\\p{Line_Break=OP}|\\p{Line_Break=HY})\\p{Line_Break=CM}*)?"
+            "\\p{Line_Break=NU}\\p{Line_Break=CM}*"
+            "((\\p{Line_Break=NU}|\\p{Line_Break=IS}|\\p{Line_Break=SY})\\p{Line_Break=CM}*)*"
+            "((\\p{Line_Break=CL}|\\p{Line_Break=CP})\\p{Line_Break=CM}*)?"
+            "((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?";
+
+    fNumberMatcher = new RegexMatcher(
+        UnicodeString(rules, -1, US_INV), 0, status);
+
+    fCharBI = BreakIterator::createCharacterInstance(Locale::getEnglish(), status);
+
+    if (U_FAILURE(status)) {
+        deferredStatus = status;
+    }
+}
+
+
+void RBBILineMonkey::setText(const UnicodeString &s) {
+    fText       = &s;
+    fCharBI->setText(s);
+    fNumberMatcher->reset(s);
+}
+
+//
+//  rule9Adjust
+//     Line Break TR rules 9 and 10 implementation.
+//     This deals with combining marks and other sequences that
+//     that must be treated as if they were something other than what they actually are.
+//
+//     This is factored out into a separate function because it must be applied twice for
+//     each potential break, once to the chars before the position being checked, then
+//     again to the text following the possible break.
+//
+void RBBILineMonkey::rule9Adjust(int32_t pos, UChar32 *posChar, int32_t *nextPos, UChar32 *nextChar) {
+    if (pos == -1) {
+        // Invalid initial position.  Happens during the warmup iteration of the
+        //   main loop in next().
+        return;
+    }
+
+    int32_t  nPos = *nextPos;
+
+    // LB 9  Keep combining sequences together.
+    //  advance over any CM class chars.  Note that Line Break CM is different
+    //  from the normal Grapheme Extend property.
+    if (!(fSP->contains(*posChar) || fBK->contains(*posChar) || *posChar==0x0d ||
+          *posChar==0x0a ||fNL->contains(*posChar) || fZW->contains(*posChar))) {
+        for (;;) {
+            *nextChar = fText->char32At(nPos);
+            if (!fCM->contains(*nextChar)) {
+                break;
+            }
+            nPos = fText->moveIndex32(nPos, 1);
+        }
+    }
+
+
+    // LB 9 Treat X CM* as if it were x.
+    //       No explicit action required.
+
+    // LB 10  Treat any remaining combining mark as AL
+    if (fCM->contains(*posChar)) {
+        *posChar = 0x41;   // thisChar = 'A';
+    }
+
+    // Push the updated nextPos and nextChar back to our caller.
+    // This only makes a difference if posChar got bigger by consuming a
+    // combining sequence.
+    *nextPos  = nPos;
+    *nextChar = fText->char32At(nPos);
+}
+
+
+
+int32_t RBBILineMonkey::next(int32_t startPos) {
+    UErrorCode status = U_ZERO_ERROR;
+    int32_t    pos;       //  Index of the char following a potential break position
+    UChar32    thisChar;  //  Character at above position "pos"
+
+    int32_t    prevPos;   //  Index of the char preceding a potential break position
+    UChar32    prevChar;  //  Character at above position.  Note that prevChar
+                          //   and thisChar may not be adjacent because combining
+                          //   characters between them will be ignored.
+
+    int32_t    prevPosX2; //  Second previous character.  Wider context for LB21a.
+    UChar32    prevCharX2;
+
+    int32_t    nextPos;   //  Index of the next character following pos.
+                          //     Usually skips over combining marks.
+    int32_t    nextCPPos; //  Index of the code point following "pos."
+                          //     May point to a combining mark.
+    int32_t    tPos;      //  temp value.
+    UChar32    c;
+
+    if (U_FAILURE(deferredStatus)) {
+        return -1;
+    }
+
+    if (startPos >= fText->length()) {
+        return -1;
+    }
+
+
+    // Initial values for loop.  Loop will run the first time without finding breaks,
+    //                           while the invalid values shift out and the "this" and
+    //                           "prev" positions are filled in with good values.
+    pos      = prevPos   = prevPosX2  = -1;    // Invalid value, serves as flag for initial loop iteration.
+    thisChar = prevChar  = prevCharX2 = 0;
+    nextPos  = nextCPPos = startPos;
+
+
+    // Loop runs once per position in the test text, until a break position
+    //  is found.
+    for (;;) {
+        prevPosX2 = prevPos;
+        prevCharX2 = prevChar;
+
+        prevPos   = pos;
+        prevChar  = thisChar;
+
+        pos       = nextPos;
+        thisChar  = fText->char32At(pos);
+
+        nextCPPos = fText->moveIndex32(pos, 1);
+        nextPos   = nextCPPos;
+
+        // Rule LB2 - Break at end of text.
+        if (pos >= fText->length()) {
+            break;
+        }
+
+        // Rule LB 9 - adjust for combining sequences.
+        //             We do this one out-of-order because the adjustment does not change anything
+        //             that would match rules LB 3 - LB 6, but after the adjustment, LB 3-6 do need to
+        //             be applied.
+        rule9Adjust(prevPos, &prevChar, &pos,     &thisChar);
+        nextCPPos = nextPos = fText->moveIndex32(pos, 1);
+        c = fText->char32At(nextPos);
+        rule9Adjust(pos,     &thisChar, &nextPos, &c);
+
+        // If the loop is still warming up - if we haven't shifted the initial
+        //   -1 positions out of prevPos yet - loop back to advance the
+        //    position in the input without any further looking for breaks.
+        if (prevPos == -1) {
+            continue;
+        }
+
+        // LB 4  Always break after hard line breaks,
+        if (fBK->contains(prevChar)) {
+            break;
+        }
+
+        // LB 5  Break after CR, LF, NL, but not inside CR LF
+        if (prevChar == 0x0d && thisChar == 0x0a) {
+            continue;
+        }
+        if (prevChar == 0x0d ||
+            prevChar == 0x0a ||
+            prevChar == 0x85)  {
+            break;
+        }
+
+        // LB 6  Don't break before hard line breaks
+        if (thisChar == 0x0d || thisChar == 0x0a || thisChar == 0x85 ||
+            fBK->contains(thisChar)) {
+                continue;
+        }
+
+
+        // LB 7  Don't break before spaces or zero-width space.
+        if (fSP->contains(thisChar)) {
+            continue;
+        }
+
+        if (fZW->contains(thisChar)) {
+            continue;
+        }
+
+        // LB 8  Break after zero width space
+        if (fZW->contains(prevChar)) {
+            break;
+        }
+
+        // LB 9, 10  Already done, at top of loop.
+        //
+
+
+        // LB 11  Do not break before or after WORD JOINER and related characters.
+        //    x  WJ
+        //    WJ  x
+        //
+        if (fWJ->contains(thisChar) || fWJ->contains(prevChar)) {
+            continue;
+        }
+
+        // LB 12
+        //    GL  x
+        if (fGL->contains(prevChar)) {
+            continue;
+        }
+        
+        // LB 12a
+        //    [^SP BA HY] x GL
+        if (!(fSP->contains(prevChar) ||
+              fBA->contains(prevChar) ||
+              fHY->contains(prevChar)     ) && fGL->contains(thisChar)) {
+            continue;
+        }
+
+
+
+        // LB 13  Don't break before closings.
+        //        NU x CL,  NU x CP  and NU x IS are not matched here so that they will
+        //        fall into LB 17 and the more general number regular expression.
+        //
+        if ((!fNU->contains(prevChar) && fCL->contains(thisChar)) ||
+            (!fNU->contains(prevChar) && fCP->contains(thisChar)) ||
+                                         fEX->contains(thisChar)  ||
+            (!fNU->contains(prevChar) && fIS->contains(thisChar)) ||
+            (!fNU->contains(prevChar) && fSY->contains(thisChar)))    {
+            continue;
+        }
+
+        // LB 14 Don't break after OP SP*
+        //       Scan backwards, checking for this sequence.
+        //       The OP char could include combining marks, so we actually check for
+        //           OP CM* SP*
+        //       Another Twist: The Rule 67 fixes may have changed a SP CM
+        //       sequence into a ID char, so before scanning back through spaces,
+        //       verify that prevChar is indeed a space.  The prevChar variable
+        //       may differ from fText[prevPos]
+        tPos = prevPos;
+        if (fSP->contains(prevChar)) {
+            while (tPos > 0 && fSP->contains(fText->char32At(tPos))) {
+                tPos=fText->moveIndex32(tPos, -1);
+            }
+        }
+        while (tPos > 0 && fCM->contains(fText->char32At(tPos))) {
+            tPos=fText->moveIndex32(tPos, -1);
+        }
+        if (fOP->contains(fText->char32At(tPos))) {
+            continue;
+        }
+
+
+        // LB 15    QU SP* x OP
+        if (fOP->contains(thisChar)) {
+            // Scan backwards from prevChar to see if it is preceded by QU CM* SP*
+            int tPos = prevPos;
+            while (tPos>0 && fSP->contains(fText->char32At(tPos))) {
+                tPos = fText->moveIndex32(tPos, -1);
+            }
+            while (tPos>0 && fCM->contains(fText->char32At(tPos))) {
+                tPos = fText->moveIndex32(tPos, -1);
+            }
+            if (fQU->contains(fText->char32At(tPos))) {
+                continue;
+            }
+        }
+
+
+
+        // LB 16   (CL | CP) SP* x NS
+        //    Scan backwards for SP* CM* (CL | CP)
+        if (fNS->contains(thisChar)) {
+            int tPos = prevPos;
+            while (tPos>0 && fSP->contains(fText->char32At(tPos))) {
+                tPos = fText->moveIndex32(tPos, -1);
+            }
+            while (tPos>0 && fCM->contains(fText->char32At(tPos))) {
+                tPos = fText->moveIndex32(tPos, -1);
+            }
+            if (fCL->contains(fText->char32At(tPos)) || fCP->contains(fText->char32At(tPos))) {
+                continue;
+            }
+        }
+
+
+        // LB 17        B2 SP* x B2
+        if (fB2->contains(thisChar)) {
+            //  Scan backwards, checking for the B2 CM* SP* sequence.
+            tPos = prevPos;
+            if (fSP->contains(prevChar)) {
+                while (tPos > 0 && fSP->contains(fText->char32At(tPos))) {
+                    tPos=fText->moveIndex32(tPos, -1);
+                }
+            }
+            while (tPos > 0 && fCM->contains(fText->char32At(tPos))) {
+                tPos=fText->moveIndex32(tPos, -1);
+            }
+            if (fB2->contains(fText->char32At(tPos))) {
+                continue;
+            }
+        }
+
+
+        // LB 18    break after space
+        if (fSP->contains(prevChar)) {
+            break;
+        }
+
+        // LB 19
+        //    x   QU
+        //    QU  x
+        if (fQU->contains(thisChar) || fQU->contains(prevChar)) {
+            continue;
+        }
+
+        // LB 20  Break around a CB
+        if (fCB->contains(thisChar) || fCB->contains(prevChar)) {
+            break;
+        }
+
+        // LB 21
+        if (fBA->contains(thisChar) ||
+            fHY->contains(thisChar) ||
+            fNS->contains(thisChar) ||
+            fBB->contains(prevChar) )   {
+            continue;
+        }
+
+        // LB 21a
+        //   HL (HY | BA) x
+        if (fHL->contains(prevCharX2) && 
+                (fHY->contains(prevChar) || fBA->contains(prevChar))) {
+            continue;
+        }
+
+        // LB 21b - Added for Apple 13927604
+        if (fSY->contains(prevChar) && fHL->contains(thisChar)) {
+            continue;
+        }
+
+        // LB 22
+        if ((fAL->contains(prevChar) && fIN->contains(thisChar)) ||
+            (fHL->contains(prevChar) && fIN->contains(thisChar)) ||
+            (fID->contains(prevChar) && fIN->contains(thisChar)) ||
+            (fIN->contains(prevChar) && fIN->contains(thisChar)) ||
+            (fNU->contains(prevChar) && fIN->contains(thisChar)) )   {
+            continue;
+        }
+
+
+        // LB 23    ID x PO
+        //          AL x NU
+        //          HL x NU
+        //          NU x AL
+        if ((fID->contains(prevChar) && fPO->contains(thisChar)) ||
+            (fAL->contains(prevChar) && fNU->contains(thisChar)) ||
+            (fHL->contains(prevChar) && fNU->contains(thisChar)) ||
+            (fNU->contains(prevChar) && fAL->contains(thisChar)) ||
+            (fNU->contains(prevChar) && fHL->contains(thisChar)) )   {
+            continue;
+        }
+
+        // LB 24  Do not break between prefix and letters or ideographs.
+        //        PR x ID
+        //        PR x (AL | HL)
+        //        PO x (AL | HL)
+        if ((fPR->contains(prevChar) && fID->contains(thisChar)) ||
+            (fPR->contains(prevChar) && (fAL->contains(thisChar) || fHL->contains(thisChar))) ||
+            (fPO->contains(prevChar) && (fAL->contains(thisChar) || fHL->contains(thisChar))))  {
+            continue;
+        }
+
+
+
+        // LB 25    Numbers
+        if (fNumberMatcher->lookingAt(prevPos, status)) {
+            if (U_FAILURE(status)) {
+                break;
+            }
+            // Matched a number.  But could have been just a single digit, which would
+            //    not represent a "no break here" between prevChar and thisChar
+            int32_t numEndIdx = fNumberMatcher->end(status);  // idx of first char following num
+            if (numEndIdx > pos) {
+                // Number match includes at least our two chars being checked
+                if (numEndIdx > nextPos) {
+                    // Number match includes additional chars.  Update pos and nextPos
+                    //   so that next loop iteration will continue at the end of the number,
+                    //   checking for breaks between last char in number & whatever follows.
+                    pos = nextPos = numEndIdx;
+                    do {
+                        pos = fText->moveIndex32(pos, -1);
+                        thisChar = fText->char32At(pos);
+                    } while (fCM->contains(thisChar));
+                }
+                continue;
+            }
+        }
+
+
+        // LB 26 Do not break a Korean syllable.
+        if (fJL->contains(prevChar) && (fJL->contains(thisChar) ||
+                                        fJV->contains(thisChar) ||
+                                        fH2->contains(thisChar) ||
+                                        fH3->contains(thisChar))) {
+                                            continue;
+                                        }
+
+        if ((fJV->contains(prevChar) || fH2->contains(prevChar))  &&
+            (fJV->contains(thisChar) || fJT->contains(thisChar))) {
+                continue;
+        }
+
+        if ((fJT->contains(prevChar) || fH3->contains(prevChar)) &&
+            fJT->contains(thisChar)) {
+                continue;
+        }
+
+        // LB 27 Treat a Korean Syllable Block the same as ID.
+        if ((fJL->contains(prevChar) || fJV->contains(prevChar) ||
+            fJT->contains(prevChar) || fH2->contains(prevChar) || fH3->contains(prevChar)) &&
+            fIN->contains(thisChar)) {
+                continue;
+            }
+        if ((fJL->contains(prevChar) || fJV->contains(prevChar) ||
+            fJT->contains(prevChar) || fH2->contains(prevChar) || fH3->contains(prevChar)) &&
+            fPO->contains(thisChar)) {
+                continue;
+            }
+        if (fPR->contains(prevChar) && (fJL->contains(thisChar) || fJV->contains(thisChar) ||
+            fJT->contains(thisChar) || fH2->contains(thisChar) || fH3->contains(thisChar))) {
+                continue;
+            }
+
+
+
+        // LB 28  Do not break between alphabetics ("at").
+        if ((fAL->contains(prevChar) || fHL->contains(prevChar)) && (fAL->contains(thisChar) || fHL->contains(thisChar))) {
+            continue;
+        }
+
+        // LB 29  Do not break between numeric punctuation and alphabetics ("e.g.").
+        if (fIS->contains(prevChar) && (fAL->contains(thisChar) || fHL->contains(thisChar))) {
+            continue;
+        }
+
+        // LB 30    Do not break between letters, numbers, or ordinary symbols and opening or closing punctuation.
+        //          (AL | NU) x OP
+        //          CP x (AL | NU)
+        if ((fAL->contains(prevChar) || fHL->contains(prevChar) || fNU->contains(prevChar)) && fOP->contains(thisChar)) {
+            continue;
+        }
+        if (fCP->contains(prevChar) && (fAL->contains(thisChar) || fHL->contains(thisChar) || fNU->contains(thisChar))) {
+            continue;
+        }
+
+        // LB30a  Do not break between regional indicators.
+        //        RI x RI
+        if (fRI->contains(prevChar) && fRI->contains(thisChar)) {
+            continue;
+        }
+
+        // LB 31    Break everywhere else
+        break;
+
+    }
+
+    return pos;
+}
+
+
+UVector  *RBBILineMonkey::charClasses() {
+    return fSets;
+}
+
+
+RBBILineMonkey::~RBBILineMonkey() {
+    delete fSets;
+
+    delete fBK;
+    delete fCR;
+    delete fLF;
+    delete fCM;
+    delete fNL;
+    delete fWJ;
+    delete fZW;
+    delete fGL;
+    delete fCB;
+    delete fSP;
+    delete fB2;
+    delete fBA;
+    delete fBB;
+    delete fHY;
+    delete fH2;
+    delete fH3;
+    delete fCL;
+    delete fCP;
+    delete fEX;
+    delete fIN;
+    delete fJL;
+    delete fJV;
+    delete fJT;
+    delete fNS;
+    delete fOP;
+    delete fQU;
+    delete fIS;
+    delete fNU;
+    delete fPO;
+    delete fPR;
+    delete fSY;
+    delete fAI;
+    delete fAL;
+    delete fCJ;
+    delete fHL;
+    delete fID;
+    delete fRI;
+    delete fSA;
+    delete fSG;
+    delete fXX;
+
+    delete fCharBI;
+    delete fNumberMatcher;
+}
+
+
+//-------------------------------------------------------------------------------------------
+//
+//   TestMonkey
+//
+//     params
+//       seed=nnnnn        Random number starting seed.
+//                         Setting the seed allows errors to be reproduced.
+//       loop=nnn          Looping count.  Controls running time.
+//                         -1:  run forever.
+//                          0 or greater:  run length.
+//
+//       type = char | word | line | sent | title
+//
+//-------------------------------------------------------------------------------------------
+
+static int32_t  getIntParam(UnicodeString name, UnicodeString &params, int32_t defaultVal) {
+    int32_t val = defaultVal;
+    name.append(" *= *(-?\\d+)");
+    UErrorCode status = U_ZERO_ERROR;
+    RegexMatcher m(name, params, 0, status);
+    if (m.find()) {
+        // The param exists.  Convert the string to an int.
+        char valString[100];
+        int32_t paramLength = m.end(1, status) - m.start(1, status);
+        if (paramLength >= (int32_t)(sizeof(valString)-1)) {
+            paramLength = (int32_t)(sizeof(valString)-2);
+        }
+        params.extract(m.start(1, status), paramLength, valString, sizeof(valString));
+        val = strtol(valString,  NULL, 10);
+
+        // Delete this parameter from the params string.
+        m.reset();
+        params = m.replaceFirst("", status);
+    }
+    U_ASSERT(U_SUCCESS(status));
+    return val;
+}
+#endif
+
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+static void testBreakBoundPreceding(RBBITest *test, UnicodeString ustr,
+                                    BreakIterator *bi,
+                                    int expected[],
+                                    int expectedcount)
+{
+    int count = 0;
+    int i = 0;
+    int forward[50];
+    bi->setText(ustr);
+    for (i = bi->first(); i != BreakIterator::DONE; i = bi->next()) {
+        forward[count] = i;
+        if (count < expectedcount && expected[count] != i) {
+            test->errln("break forward test failed: expected %d but got %d",
+                        expected[count], i);
+            break;
+        }
+        count ++;
+    }
+    if (count != expectedcount) {
+        printStringBreaks(ustr, expected, expectedcount);
+        test->errln("break forward test failed: missed %d match",
+                    expectedcount - count);
+        return;
+    }
+    // testing boundaries
+    for (i = 1; i < expectedcount; i ++) {
+        int j = expected[i - 1];
+        if (!bi->isBoundary(j)) {
+            printStringBreaks(ustr, expected, expectedcount);
+            test->errln("isBoundary() failed.  Expected boundary at position %d", j);
+            return;
+        }
+        for (j = expected[i - 1] + 1; j < expected[i]; j ++) {
+            if (bi->isBoundary(j)) {
+                printStringBreaks(ustr, expected, expectedcount);
+                test->errln("isBoundary() failed.  Not expecting boundary at position %d", j);
+                return;
+            }
+        }
+    }
+
+    for (i = bi->last(); i != BreakIterator::DONE; i = bi->previous()) {
+        count --;
+        if (forward[count] != i) {
+            printStringBreaks(ustr, expected, expectedcount);
+            test->errln("happy break test previous() failed: expected %d but got %d",
+                        forward[count], i);
+            break;
+        }
+    }
+    if (count != 0) {
+        printStringBreaks(ustr, expected, expectedcount);
+        test->errln("break test previous() failed: missed a match");
+        return;
+    }
+
+    // testing preceding
+    for (i = 0; i < expectedcount - 1; i ++) {
+        // int j = expected[i] + 1;
+        int j = ustr.moveIndex32(expected[i], 1);
+        for (; j <= expected[i + 1]; j ++) {
+            if (bi->preceding(j) != expected[i]) {
+                printStringBreaks(ustr, expected, expectedcount);
+                test->errln("preceding(): Not expecting boundary at position %d", j);
+                return;
+            }
+        }
+    }
+}
+#endif
+
+void RBBITest::TestWordBreaks(void)
+{
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+
+    Locale        locale("en");
+    UErrorCode    status = U_ZERO_ERROR;
+    // BreakIterator  *bi = BreakIterator::createCharacterInstance(locale, status);
+    BreakIterator *bi = BreakIterator::createWordInstance(locale, status);
+    // Replaced any C+J characters in a row with a random sequence of characters
+    // of the same length to make our C+J segmentation not get in the way.
+    static const char *strlist[] =
+    {
+    "\\U000e0032\\u0097\\u0f94\\uc2d8\\u05f4\\U000e0031\\u060d",
+    "\\U000e0037\\u2666\\u1202\\u003a\\U000e0031\\u064d\\u0bea\\u091c\\U000e0040\\u003b",
+    "\\u0589\\u3e99\\U0001d7f3\\U000e0074\\u1810\\u200e\\U000e004b\\u0027\\U000e0061\\u003a",
+    "\\u398c\\U000104a5\\U0001d173\\u102d\\u002e\\uca3b\\u002e\\u002c\\u5622",
+    "\\uac00\\u3588\\u009c\\u0953\\u194b",
+    "\\u200e\\U000e0072\\u0a4b\\U000e003f\\ufd2b\\u2027\\u002e\\u002e",
+    "\\u0602\\u2019\\ua191\\U000e0063\\u0a4c\\u003a\\ub4b5\\u003a\\u827f\\u002e",
+    "\\u2f1f\\u1634\\u05f8\\u0944\\u04f2\\u0cdf\\u1f9c\\u05f4\\u002e",
+    "\\U000e0042\\u002e\\u0fb8\\u09ef\\u0ed1\\u2044",
+    "\\u003b\\u024a\\u102e\\U000e0071\\u0600",
+    "\\u2027\\U000e0067\\u0a47\\u00b7",
+    "\\u1fcd\\u002c\\u07aa\\u0027\\u11b0",
+    "\\u002c\\U000e003c\\U0001d7f4\\u003a\\u0c6f\\u0027",
+    "\\u0589\\U000e006e\\u0a42\\U000104a5",
+    "\\u0f66\\u2523\\u003a\\u0cae\\U000e0047\\u003a",
+    "\\u003a\\u0f21\\u0668\\u0dab\\u003a\\u0655\\u00b7",
+    "\\u0027\\u11af\\U000e0057\\u0602",
+    "\\U0001d7f2\\U000e007\\u0004\\u0589",
+    "\\U000e0022\\u003a\\u10b3\\u003a\\ua21b\\u002e\\U000e0058\\u1732\\U000e002b",
+    "\\U0001d7f2\\U000e007d\\u0004\\u0589",
+    "\\u82ab\\u17e8\\u0736\\u2019\\U0001d64d",
+    "\\ub55c\\u0a68\\U000e0037\\u0cd6\\u002c\\ub959",
+    "\\U000e0065\\u302c\\uc986\\u09ee\\U000e0068",
+    "\\u0be8\\u002e\\u0c68\\u066e\\u136d\\ufc99\\u59e7",
+    "\\u0233\\U000e0020\\u0a69\\u0d6a",
+    "\\u206f\\u0741\\ub3ab\\u2019\\ubcac\\u2019",
+    "\\u18f4\\U000e0049\\u20e7\\u2027",
+    "\\ub315\\U0001d7e5\\U000e0073\\u0c47\\u06f2\\u0c6a\\u0037\\u10fe",
+    "\\ua183\\u102d\\u0bec\\u003a",
+    "\\u17e8\\u06e7\\u002e\\u096d\\u003b",
+    "\\u003a\\u0e57\\u0fad\\u002e",
+    "\\u002e\\U000e004c\\U0001d7ea\\u05bb\\ud0fd\\u02de",
+    "\\u32e6\\U0001d7f6\\u0fa1\\u206a\\U000e003c\\u0cec\\u003a",
+    "\\U000e005d\\u2044\\u0731\\u0650\\u0061",
+    "\\u003a\\u0664\\u00b7\\u1fba",
+    "\\u003b\\u0027\\u00b7\\u47a3",
+    "\\u2027\\U000e0067\\u0a42\\u00b7\\u4edf\\uc26c\\u003a\\u4186\\u041b",
+    "\\u0027\\u003a\\U0001d70f\\U0001d7df\\ubf4a\\U0001d7f5\\U0001d177\\u003a\\u0e51\\u1058\\U000e0058\\u00b7\\u0673",
+    "\\uc30d\\u002e\\U000e002c\\u0c48\\u003a\\ub5a1\\u0661\\u002c",
+    };
+    int loop;
+    if (U_FAILURE(status)) {
+        errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
+        return;
+    }
+    for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) {
+        // printf("looping %d\n", loop);
+        UnicodeString ustr = CharsToUnicodeString(strlist[loop]);
+        // RBBICharMonkey monkey;
+        RBBIWordMonkey monkey;
+
+        int expected[50];
+        int expectedcount = 0;
+
+        monkey.setText(ustr);
+        int i;
+        for (i = 0; i != BreakIterator::DONE; i = monkey.next(i)) {
+            expected[expectedcount ++] = i;
+        }
+
+        testBreakBoundPreceding(this, ustr, bi, expected, expectedcount);
+    }
+    delete bi;
+#endif
+}
+
+void RBBITest::TestWordBoundary(void)
+{
+    // <data><>\u1d4a\u206e<?>\u0603\U0001d7ff<>\u2019<></data>
+    Locale        locale("en");
+    UErrorCode    status = U_ZERO_ERROR;
+    // BreakIterator  *bi = BreakIterator::createCharacterInstance(locale, status);
+    BreakIterator *bi = BreakIterator::createWordInstance(locale, status);
+    UChar         str[50];
+    static const char *strlist[] =
+    {
+    "\\u200e\\U000e0072\\u0a4b\\U000e003f\\ufd2b\\u2027\\u002e\\u002e",
+    "\\U000e0042\\u002e\\u0fb8\\u09ef\\u0ed1\\u2044",
+    "\\u003b\\u024a\\u102e\\U000e0071\\u0600",
+    "\\u2027\\U000e0067\\u0a47\\u00b7",
+    "\\u1fcd\\u002c\\u07aa\\u0027\\u11b0",
+    "\\u002c\\U000e003c\\U0001d7f4\\u003a\\u0c6f\\u0027",
+    "\\u0589\\U000e006e\\u0a42\\U000104a5",
+    "\\u4f66\\ub523\\u003a\\uacae\\U000e0047\\u003a",
+    "\\u003a\\u0f21\\u0668\\u0dab\\u003a\\u0655\\u00b7",
+    "\\u0027\\u11af\\U000e0057\\u0602",
+    "\\U0001d7f2\\U000e007\\u0004\\u0589",
+    "\\U000e0022\\u003a\\u10b3\\u003a\\ua21b\\u002e\\U000e0058\\u1732\\U000e002b",
+    "\\U0001d7f2\\U000e007d\\u0004\\u0589",
+    "\\u82ab\\u17e8\\u0736\\u2019\\U0001d64d",
+    "\\u0e01\\ub55c\\u0a68\\U000e0037\\u0cd6\\u002c\\ub959",
+    "\\U000e0065\\u302c\\u09ee\\U000e0068",
+    "\\u0be8\\u002e\\u0c68\\u066e\\u136d\\ufc99\\u59e7",
+    "\\u0233\\U000e0020\\u0a69\\u0d6a",
+    "\\u206f\\u0741\\ub3ab\\u2019\\ubcac\\u2019",
+    "\\u58f4\\U000e0049\\u20e7\\u2027",
+    "\\U0001d7e5\\U000e0073\\u0c47\\u06f2\\u0c6a\\u0037\\u10fe",
+    "\\ua183\\u102d\\u0bec\\u003a",
+    "\\u17e8\\u06e7\\u002e\\u096d\\u003b",
+    "\\u003a\\u0e57\\u0fad\\u002e",
+    "\\u002e\\U000e004c\\U0001d7ea\\u05bb\\ud0fd\\u02de",
+    "\\u32e6\\U0001d7f6\\u0fa1\\u206a\\U000e003c\\u0cec\\u003a",
+    "\\ua2a5\\u0038\\u2044\\u002e\\u0c67\\U000e003c\\u05f4\\u2027\\u05f4\\u2019",
+    "\\u003a\\u0664\\u00b7\\u1fba",
+    "\\u003b\\u0027\\u00b7\\u47a3",
+    };
+    int loop;
+    if (U_FAILURE(status)) {
+        errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
+        return;
+    }
+    for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) {
+        // printf("looping %d\n", loop);
+        u_unescape(strlist[loop], str, 20);
+        UnicodeString ustr(str);
+        int forward[50];
+        int count = 0;
+
+        bi->setText(ustr);
+        int prev = 0;
+        int i;
+        for (i = bi->first(); i != BreakIterator::DONE; i = bi->next()) {
+            forward[count ++] = i;
+            if (i > prev) {
+                int j;
+                for (j = prev + 1; j < i; j ++) {
+                    if (bi->isBoundary(j)) {
+                        printStringBreaks(ustr, forward, count);
+                        errln("happy boundary test failed: expected %d not a boundary",
+                               j);
+                        return;
+                    }
+                }
+            }
+            if (!bi->isBoundary(i)) {
+                printStringBreaks(ustr, forward, count);
+                errln("happy boundary test failed: expected %d a boundary",
+                       i);
+                return;
+            }
+            prev = i;
+        }
+    }
+    delete bi;
+}
+
+void RBBITest::TestLineBreaks(void)
+{
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+    Locale        locale("en");
+    UErrorCode    status = U_ZERO_ERROR;
+    BreakIterator *bi = BreakIterator::createLineInstance(locale, status);
+    const int32_t  STRSIZE = 50;
+    UChar         str[STRSIZE];
+    static const char *strlist[] =
+    {
+     "\\u300f\\ufdfc\\ub798\\u2011\\u2011\\u0020\\u0b43\\u002d\\ubeec\\ufffc",
+     "\\u24ba\\u2060\\u3405\\ub290\\u000d\\U000e0032\\ufe35\\u00a0\\u0361\\"
+             "U000112ed\\u0f0c\\u000a\\u308e\\ua875\\u0085\\u114d",
+     "\\ufffc\\u3063\\u2e08\\u30e3\\u000d\\u002d\\u0ed8\\u002f\\U00011a57\\"
+             "u2014\\U000e0105\\u118c\\u000a\\u07f8",
+     "\\u0668\\u192b\\u002f\\u2034\\ufe39\\u00b4\\u0cc8\\u2571\\u200b\\u003f",
+     "\\ufeff\\ufffc\\u3289\\u0085\\u2772\\u0020\\U000e010a\\u0020\\u2025\\u000a\\U000e0123",
+     "\\ufe3c\\u201c\\u000d\\u2025\\u2007\\u201c\\u002d\\u20a0\\u002d\\u30a7\\u17a4",
+     "\\u2772\\u0020\\U000e010a\\u0020\\u2025\\u000a\\U000e0123",
+     "\\u002d\\uff1b\\u02c8\\u2029\\ufeff\\u0f22\\u2044\\ufe09\\u003a\\u096d\\u2009\\u000a\\u06f7\\u02cc\\u1019\\u2060",
+     "\\u1781\\u0b68\\u0f0c\\u3010\\u0085\\U00011f7a\\u0020\\u0dd6\\u200b\\U000e007a\\u000a\\u2060\\u2026\\u002f\\u2026\\u24dc\\u101e\\u2014\\u2007\\u30a5",
+     "\\u2770\\u0020\\U000e010f\\u0020\\u2060\\u000a\\u02cc\\u0bcc\\u060d\\u30e7\\u0f3b\\u002f",
+     "\\ufeff\\u0028\\u003b\\U00012fec\\u2010\\u0020\\u0004\\u200b\\u0020\\u275c\\u002f\\u17b1",
+     "\\u20a9\\u2014\\u00a2\\u31f1\\u002f\\u0020\\u05b8\\u200b\\u0cc2\\u003b\\u060d\\u02c8\\ua4e8\\u002f\\u17d5",
+     "\\u002d\\u136f\\uff63\\u0084\\ua933\\u2028\\u002d\\u431b\\u200b\\u20b0",
+     "\\uade3\\u11d6\\u000a\\U0001107d\\u203a\\u201d\\ub070\\u000d\\u2024\\ufffc",
+     "\\uff5b\\u101c\\u1806\\u002f\\u2213\\uff5f",
+     "\\u2014\\u0a83\\ufdfc\\u003f\\u00a0\\u0020\\u000a\\u2991\\U0001d179\\u0020\\u201d\\U000125f6\\u0a67\\u20a7\\ufeff\\u043f",
+     "\\u169b\\U000e0130\\u002d\\u1041\\u0f3d\\u0abf\\u00b0\\u31fb\\u00a0\\u002d\\u02c8\\u003b",
+     "\\u2762\\u1680\\u002d\\u2028\\u0027\\u01dc\\ufe56\\u003a\\u000a\\uffe6\\u29fd\\u0020\\u30ee\\u007c\\U0001d178\\u0af1\\u0085",
+     "\\u3010\\u200b\\u2029\\ufeff\\ufe6a\\u275b\\U000e013b\\ufe37\\u24d4\\u002d\\u1806\\u256a\\u1806\\u247c\\u0085\\u17ac",
+     "\\u99ab\\u0027\\u003b\\u2026\\ueaf0\\u0020\\u0020\\u0313\\u0020\\u3099\\uff09\\u208e\\u2011\\u2007\\u2060\\u000a\\u0020\\u0020\\u300b\\u0bf9",
+     "\\u1806\\u060d\\u30f5\\u00b4\\u17e9\\u2544\\u2028\\u2024\\u2011\\u20a3\\u002d\\u09cc\\u1782\\u000d\\uff6f\\u0025",
+     "\\u002f\\uf22e\\u1944\\ufe3d\\u0020\\u206f\\u31b3\\u2014\\u002d\\u2025\\u0f0c\\u0085\\u2763",
+     "\\u002f\\u2563\\u202f\\u0085\\u17d5\\u200b\\u0020\\U000e0043\\u2014\\u058a\\u3d0a\\ufe57\\u2035\\u2028\\u2029",
+     "\\u20ae\\U0001d169\\u9293\\uff1f\\uff1f\\u0021\\u2012\\u2039\\u0085\\u02cc\\u00a2\\u0020\\U000e01ab\\u3085\\u0f3a\\u1806\\u0f0c\\u1945\\u000a\\U0001d7e7",
+     "\\uffe6\\u00a0\\u200b\\u0085\\u2116\\u255b\\U0001d7f7\\u178c\\ufffc",
+     "\\u02cc\\ufe6a\\u00a0\\u0021\\u002d\\u7490\\uec2e\\u200b\\u000a",
+     "\\uec2e\\u200b\\u000a\\u0020\\u2028\\u2014\\u8945",
+     "\\u7490\\uec2e\\u200b\\u000a\\u0020\\u2028\\u2014",
+     "\\u0020\\u2028\\u2014\\u8945\\u002c\\u005b",
+     "\\u000a\\ufe3c\\u201c\\u000d\\u2025\\u2007\\u201c\\u002d\\u20a0",
+     "\\u2473\\u0e9d\\u0020\\u0085\\u000a\\ufe3c\\u201c\\u000d\\u2025",
+     "\\U0001d16e\\ufffc\\u2025\\u0021\\u002d",
+     "\\ufffc\\u301b\\u0fa5\\U000e0103\\u2060\\u208e\\u17d5\\u034f\\u1009\\u003a\\u180e\\u2009\\u3111",
+     "\\u2014\\u0020\\u000a\\u17c5\\u24fc",
+     "\\ufffc\\u0020\\u2116\\uff6c\\u200b\\u0ac3\\U0001028f",
+     "\\uaeb0\\u0344\\u0085\\ufffc\\u073b\\u2010",
+     "\\ufeff\\u0589\\u0085\\u0eb8\\u30fd\\u002f\\u003a\\u2014\\ufe43",
+     "\\u09cc\\u256a\\u276d\\u002d\\u3085\\u000d\\u0e05\\u2028\\u0fbb",
+     "\\u2034\\u00bb\\u0ae6\\u300c\\u0020\\u31f8\\ufffc",
+     "\\u2116\\u0ed2\\uff64\\u02cd\\u2001\\u2060",
+         "\\u809d\\u2e02\\u0f0a\\uc48f\\u2540\\u000d\\u0cef\\u003a\\u0e4d"
+         "\\U000e0172\\U000e005c\\u17cf\\U00010ca6\\ufeff\\uf621\\u06f3\\uffe5"
+         "\\u0ea2\\ufeff\\udcea\\u3085\\ua874\\u000a\\u0020\\u000b\\u200b",
+     "\\ufe10\\u2060\\u1a5a\\u2060\\u17e4\\ufffc\\ubbe1\\ufe15\\u0020\\u00a0",
+         "\\u2060\\u2213\\u200b\\u2019\\uc2dc\\uff6a\\u1736\\u0085\\udb07",
+    };
+    int loop;
+    TEST_ASSERT_SUCCESS(status);
+    if (U_FAILURE(status)) {
+        return;
+    }
+    for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) {
+        // printf("looping %d\n", loop);
+        int32_t t = u_unescape(strlist[loop], str, STRSIZE);
+        if (t >= STRSIZE) {
+            TEST_ASSERT(FALSE);
+            continue;
+        }
+
+
+        UnicodeString ustr(str);
+        RBBILineMonkey monkey;
+        if (U_FAILURE(monkey.deferredStatus)) {
+            continue;
+        }
+
+        const int EXPECTEDSIZE = 50;
+        int expected[EXPECTEDSIZE];
+        int expectedcount = 0;
+
+        monkey.setText(ustr);
+        int i;
+        for (i = 0; i != BreakIterator::DONE; i = monkey.next(i)) {
+            if (expectedcount >= EXPECTEDSIZE) {
+                TEST_ASSERT(expectedcount < EXPECTEDSIZE);
+                return;
+            }
+            expected[expectedcount ++] = i;
+        }
+
+        testBreakBoundPreceding(this, ustr, bi, expected, expectedcount);
+    }
+    delete bi;
+#endif
+}
+
+void RBBITest::TestSentBreaks(void)
+{
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+    Locale        locale("en");
+    UErrorCode    status = U_ZERO_ERROR;
+    BreakIterator *bi = BreakIterator::createSentenceInstance(locale, status);
+    UChar         str[200];
+    static const char *strlist[] =
+    {
+     "Now\ris\nthe\r\ntime\n\rfor\r\r",
+     "This\n",
+     "Hello! how are you? I'am fine. Thankyou. How are you doing? This\n costs $20,00,000.",
+     "\"Sentence ending with a quote.\" Bye.",
+     "  (This is it).  Testing the sentence iterator. \"This isn't it.\"",
+     "Hi! This is a simple sample sentence. (This is it.) This is a simple sample sentence. \"This isn't it.\"",
+     "Hi! This is a simple sample sentence. It does not have to make any sense as you can see. ",
+     "Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura. ",
+     "Che la dritta via aveo smarrita. He said, that I said, that you said!! ",
+     "Don't rock the boat.\\u2029Because I am the daddy, that is why. Not on my time (el timo.)!",
+     "\\U0001040a\\u203a\\u1217\\u2b23\\u000d\\uff3b\\u03dd\\uff57\\u0a69\\u104a\\ufe56\\ufe52"
+             "\\u3016\\U000e002f\\U000e0077\\u0662\\u1680\\u2984\\U000e006a\\u002e\\ua6ab\\u104a"
+             "\\u002e\\u019b\\u2005\\u002e\\u0477\\u0438\\u0085\\u0441\\u002e\\u5f61\\u202f"
+             "\\U0001019f\\uff08\\u27e8\\u055c\\u0352",
+     "\\u1f3e\\u004d\\u000a\\ua3e4\\U000e0023\\uff63\\u0c52\\u276d\\U0001d5de\\U0001d171"
+             "\\u0e38\\u17e5\\U00012fe6\\u0fa9\\u267f\\u1da3\\u0046\\u03ed\\udc72\\u0030"
+             "\\U0001d688\\u0b6d\\u0085\\u0c67\\u1f94\\u0c6c\\u9cb2\\u202a\\u180e\\u000b"
+             "\\u002e\\U000e005e\\u035b\\u061f\\u02c1\\U000e0025\\u0357\\u0969\\u202b"
+             "\\U000130c5\\u0486\\U000e0123\\u2019\\u01bc\\u2006\\u11ad\\u180e\\u2e05"
+             "\\u10b7\\u013e\\u000a\\u002e\\U00013ea4"
+    };
+    int loop;
+    if (U_FAILURE(status)) {
+        errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
+        return;
+    }
+    for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) {
+        u_unescape(strlist[loop], str, (int32_t)(sizeof(str) / sizeof(str[0])));
+        UnicodeString ustr(str);
+
+        RBBISentMonkey monkey;
+        if (U_FAILURE(monkey.deferredStatus)) {
+            continue;
+        }
+
+        const int EXPECTEDSIZE = 50;
+        int expected[EXPECTEDSIZE];
+        int expectedcount = 0;
+
+        monkey.setText(ustr);
+        int i;
+        for (i = 0; i != BreakIterator::DONE; i = monkey.next(i)) {
+            if (expectedcount >= EXPECTEDSIZE) {
+                TEST_ASSERT(expectedcount < EXPECTEDSIZE);
+                return;
+            }
+            expected[expectedcount ++] = i;
+        }
+
+        testBreakBoundPreceding(this, ustr, bi, expected, expectedcount);
+    }
+    delete bi;
+#endif
+}
+
+void RBBITest::TestMonkey(char *params) {
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+
+    UErrorCode     status    = U_ZERO_ERROR;
+    int32_t        loopCount = 500;
+    int32_t        seed      = 1;
+    UnicodeString  breakType = "all";
+    Locale         locale("en");
+    UBool          useUText  = FALSE;
+
+    if (quick == FALSE) {
+        loopCount = 10000;
+    }
+
+    if (params) {
+        UnicodeString p(params);
+        loopCount = getIntParam("loop", p, loopCount);
+        seed      = getIntParam("seed", p, seed);
+
+        RegexMatcher m(" *type *= *(char|word|line|sent|title) *", p, 0, status);
+        if (m.find()) {
+            breakType = m.group(1, status);
+            m.reset();
+            p = m.replaceFirst("", status);
+        }
+
+        RegexMatcher u(" *utext", p, 0, status);
+        if (u.find()) {
+            useUText = TRUE;
+            u.reset();
+            p = u.replaceFirst("", status);
+        }
+
+
+        // m.reset(p);
+        if (RegexMatcher(UNICODE_STRING_SIMPLE("\\S"), p, 0, status).find()) {
+            // Each option is stripped out of the option string as it is processed.
+            // All options have been checked.  The option string should have been completely emptied..
+            char buf[100];
+            p.extract(buf, sizeof(buf), NULL, status);
+            buf[sizeof(buf)-1] = 0;
+            errln("Unrecognized or extra parameter:  %s\n", buf);
+            return;
+        }
+
+    }
+
+    if (breakType == "char" || breakType == "all") {
+        RBBICharMonkey  m;
+        BreakIterator  *bi = BreakIterator::createCharacterInstance(locale, status);
+        if (U_SUCCESS(status)) {
+            RunMonkey(bi, m, "char", seed, loopCount, useUText);
+            if (breakType == "all" && useUText==FALSE) {
+                // Also run a quick test with UText when "all" is specified
+                RunMonkey(bi, m, "char", seed, loopCount, TRUE);
+            }
+        }
+        else {
+            errcheckln(status, "Creation of character break iterator failed %s", u_errorName(status));
+        }
+        delete bi;
+    }
+
+    if (breakType == "word" || breakType == "all") {
+        logln("Word Break Monkey Test");
+        RBBIWordMonkey  m;
+        BreakIterator  *bi = BreakIterator::createWordInstance(locale, status);
+        if (U_SUCCESS(status)) {
+            RunMonkey(bi, m, "word", seed, loopCount, useUText);
+        }
+        else {
+            errcheckln(status, "Creation of word break iterator failed %s", u_errorName(status));
+        }
+        delete bi;
+    }
+
+    if (breakType == "line" || breakType == "all") {
+        logln("Line Break Monkey Test");
+        RBBILineMonkey  m;
+        BreakIterator  *bi = BreakIterator::createLineInstance(locale, status);
+        if (loopCount >= 10) {
+            loopCount = loopCount / 5;   // Line break runs slower than the others.
+        }
+        if (U_SUCCESS(status)) {
+            RunMonkey(bi, m, "line", seed, loopCount, useUText);
+        }
+        else {
+            errcheckln(status, "Creation of line break iterator failed %s", u_errorName(status));
+        }
+        delete bi;
+    }
+
+    if (breakType == "sent" || breakType == "all"  ) {
+        logln("Sentence Break Monkey Test");
+        RBBISentMonkey  m;
+        BreakIterator  *bi = BreakIterator::createSentenceInstance(locale, status);
+        if (loopCount >= 10) {
+            loopCount = loopCount / 10;   // Sentence runs slower than the other break types
+        }
+        if (U_SUCCESS(status)) {
+            RunMonkey(bi, m, "sentence", seed, loopCount, useUText);
+        }
+        else {
+            errcheckln(status, "Creation of line break iterator failed %s", u_errorName(status));
+        }
+        delete bi;
+    }
+
+#endif
+}
+
+//
+//  Run a RBBI monkey test.  Common routine, for all break iterator types.
+//    Parameters:
+//       bi      - the break iterator to use
+//       mk      - MonkeyKind, abstraction for obtaining expected results
+//       name    - Name of test (char, word, etc.) for use in error messages
+//       seed    - Seed for starting random number generator (parameter from user)
+//       numIterations
+//
+void RBBITest::RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, const char *name, uint32_t  seed,
+                         int32_t numIterations, UBool useUText) {
+
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+
+    const int32_t    TESTSTRINGLEN = 500;
+    UnicodeString    testText;
+    int32_t          numCharClasses;
+    UVector          *chClasses;
+    int              expected[TESTSTRINGLEN*2 + 1];
+    int              expectedCount = 0;
+    char             expectedBreaks[TESTSTRINGLEN*2 + 1];
+    char             forwardBreaks[TESTSTRINGLEN*2 + 1];
+    char             reverseBreaks[TESTSTRINGLEN*2+1];
+    char             isBoundaryBreaks[TESTSTRINGLEN*2+1];
+    char             followingBreaks[TESTSTRINGLEN*2+1];
+    char             precedingBreaks[TESTSTRINGLEN*2+1];
+    int              i;
+    int              loopCount = 0;
+
+    m_seed = seed;
+
+    numCharClasses = mk.charClasses()->size();
+    chClasses      = mk.charClasses();
+
+    // Check for errors that occured during the construction of the MonkeyKind object.
+    //  Can't report them where they occured because errln() is a method coming from intlTest,
+    //  and is not visible outside of RBBITest :-(
+    if (U_FAILURE(mk.deferredStatus)) {
+        errln("status of \"%s\" in creation of RBBIMonkeyKind.", u_errorName(mk.deferredStatus));
+        return;
+    }
+
+    // Verify that the character classes all have at least one member.
+    for (i=0; i<numCharClasses; i++) {
+        UnicodeSet *s = (UnicodeSet *)chClasses->elementAt(i);
+        if (s == NULL || s->size() == 0) {
+            errln("Character Class #%d is null or of zero size.", i);
+            return;
+        }
+    }
+
+    while (loopCount < numIterations || numIterations == -1) {
+        if (numIterations == -1 && loopCount % 10 == 0) {
+            // If test is running in an infinite loop, display a periodic tic so
+            //   we can tell that it is making progress.
+            fprintf(stderr, ".");
+        }
+        // Save current random number seed, so that we can recreate the random numbers
+        //   for this loop iteration in event of an error.
+        seed = m_seed;
+
+        // Populate a test string with data.
+        testText.truncate(0);
+        for (i=0; i<TESTSTRINGLEN; i++) {
+            int32_t  aClassNum = m_rand() % numCharClasses;
+            UnicodeSet *classSet = (UnicodeSet *)chClasses->elementAt(aClassNum);
+            int32_t   charIdx = m_rand() % classSet->size();
+            UChar32   c = classSet->charAt(charIdx);
+            if (c < 0) {   // TODO:  deal with sets containing strings.
+                errln("c < 0");
+                break;
+            }
+            testText.append(c);
+        }
+
+        // Calculate the expected results for this test string.
+        mk.setText(testText);
+        memset(expectedBreaks, 0, sizeof(expectedBreaks));
+        expectedBreaks[0] = 1;
+        int32_t breakPos = 0;
+        expectedCount = 0;
+        for (;;) {
+            breakPos = mk.next(breakPos);
+            if (breakPos == -1) {
+                break;
+            }
+            if (breakPos > testText.length()) {
+                errln("breakPos > testText.length()");
+            }
+            expectedBreaks[breakPos] = 1;
+            U_ASSERT(expectedCount<testText.length());
+            expected[expectedCount ++] = breakPos;
+        }
+
+        // Find the break positions using forward iteration
+        memset(forwardBreaks, 0, sizeof(forwardBreaks));
+        if (useUText) {
+            UErrorCode status = U_ZERO_ERROR;
+            UText *testUText = utext_openReplaceable(NULL, &testText, &status);
+            // testUText = utext_openUnicodeString(testUText, &testText, &status);
+            bi->setText(testUText, status);
+            TEST_ASSERT_SUCCESS(status);
+            utext_close(testUText);   // The break iterator does a shallow clone of the UText
+                                      //  This UText can be closed immediately, so long as the
+                                      //  testText string continues to exist.
+        } else {
+            bi->setText(testText);
+        }
+
+        for (i=bi->first(); i != BreakIterator::DONE; i=bi->next()) {
+            if (i < 0 || i > testText.length()) {
+                errln("%s break monkey test: Out of range value returned by breakIterator::next()", name);
+                break;
+            }
+            forwardBreaks[i] = 1;
+        }
+
+        // Find the break positions using reverse iteration
+        memset(reverseBreaks, 0, sizeof(reverseBreaks));
+        for (i=bi->last(); i != BreakIterator::DONE; i=bi->previous()) {
+            if (i < 0 || i > testText.length()) {
+                errln("%s break monkey test: Out of range value returned by breakIterator::next()", name);
+                break;
+            }
+            reverseBreaks[i] = 1;
+        }
+
+        // Find the break positions using isBoundary() tests.
+        memset(isBoundaryBreaks, 0, sizeof(isBoundaryBreaks));
+        U_ASSERT((int32_t)sizeof(isBoundaryBreaks) > testText.length());
+        for (i=0; i<=testText.length(); i++) {
+            isBoundaryBreaks[i] = bi->isBoundary(i);
+        }
+
+
+        // Find the break positions using the following() function.
+        // printf(".");
+        memset(followingBreaks, 0, sizeof(followingBreaks));
+        int32_t   lastBreakPos = 0;
+        followingBreaks[0] = 1;
+        for (i=0; i<testText.length(); i++) {
+            breakPos = bi->following(i);
+            if (breakPos <= i ||
+                breakPos < lastBreakPos ||
+                breakPos > testText.length() ||
+                (breakPos > lastBreakPos && lastBreakPos > i)) {
+                UChar32 brkChar = testText.char32At(lastBreakPos);
+                if ((strcmp(name, "char") != 0 && strcmp(name, "word") != 0) || brkChar < 0x1F1E6 || brkChar > 0x1F1FF) { // Apple, skip RI char/word break monkey tests
+                errln("%s break monkey test: "
+                    "Out of range value returned by BreakIterator::following().\n"
+                        "Random seed=%d  index=%d; following returned %d;  lastbreak=%d",
+                         name, seed, i, breakPos, lastBreakPos);
+                }
+                break;
+            }
+            followingBreaks[breakPos] = 1;
+            lastBreakPos = breakPos;
+        }
+
+        // Find the break positions using the preceding() function.
+        memset(precedingBreaks, 0, sizeof(precedingBreaks));
+        lastBreakPos = testText.length();
+        precedingBreaks[testText.length()] = 1;
+        for (i=testText.length(); i>0; i--) {
+            breakPos = bi->preceding(i);
+            if (breakPos >= i ||
+                breakPos > lastBreakPos ||
+                (breakPos < 0 && testText.getChar32Start(i)>0) ||
+                (breakPos < lastBreakPos && lastBreakPos < testText.getChar32Start(i)) ) {
+                UChar32 brkChar = testText.char32At(breakPos);
+                if ((strcmp(name, "char") != 0 && strcmp(name, "word") != 0) || brkChar < 0x1F1E6 || brkChar > 0x1F1FF) { // Apple, skip RI char/word break monkey tests
+                errln("%s break monkey test: "
+                    "Out of range value returned by BreakIterator::preceding().\n"
+                    "index=%d;  prev returned %d; lastBreak=%d" ,
+                    name,  i, breakPos, lastBreakPos);
+                if (breakPos >= 0 && breakPos < (int32_t)sizeof(precedingBreaks)) {
+                    precedingBreaks[i] = 2;   // Forces an error.
+                }
+                }
+            } else {
+                if (breakPos >= 0) {
+                    precedingBreaks[breakPos] = 1;
+                } 
+                lastBreakPos = breakPos;
+            }
+        }
+
+        // Compare the expected and actual results.
+        for (i=0; i<=testText.length(); i++) {
+            const char *errorType = NULL;
+            if  (forwardBreaks[i] != expectedBreaks[i]) {
+                errorType = "next()";
+            } else if (reverseBreaks[i] != forwardBreaks[i]) {
+                errorType = "previous()";
+            } else if (isBoundaryBreaks[i] != expectedBreaks[i]) {
+                errorType = "isBoundary()";
+            } else if (followingBreaks[i] != expectedBreaks[i]) {
+                errorType = "following()";
+            } else if (precedingBreaks[i] != expectedBreaks[i]) {
+                errorType = "preceding()";
+            }
+
+
+            if (errorType != NULL) {
+                // Format a range of the test text that includes the failure as
+                //  a data item that can be included in the rbbi test data file.
+
+                // Start of the range is the last point where expected and actual results
+                //   both agreed that there was a break position.
+                int startContext = i;
+                int32_t count = 0;
+                for (;;) {
+                    if (startContext==0) { break; }
+                    startContext --;
+                    if (expectedBreaks[startContext] != 0) {
+                        if (count == 2) break;
+                        count ++;
+                    }
+                }
+
+                // End of range is two expected breaks past the start position.
+                int endContext = i + 1;
+                int ci;
+                for (ci=0; ci<2; ci++) {  // Number of items to include in error text.
+                    for (;;) {
+                        if (endContext >= testText.length()) {break;}
+                        if (expectedBreaks[endContext-1] != 0) {
+                            if (count == 0) break;
+                            count --;
+                        }
+                        endContext ++;
+                    }
+                }
+
+                // Format looks like   "<data>\\\uabcd\uabcd\\\U0001abcd...</data>"
+                UnicodeString errorText = "<data>";
+                /***if (strcmp(errorType, "next()") == 0) {
+                    startContext = 0;
+                    endContext = testText.length();
+
+                    printStringBreaks(testText, expected, expectedCount);
+                }***/
+
+                for (ci=startContext; ci<endContext;) {
+                    UnicodeString hexChars("0123456789abcdef");
+                    UChar32  c;
+                    int      bn;
+                    c = testText.char32At(ci);
+                    if (ci == i) {
+                        // This is the location of the error.
+                        errorText.append("<?>");
+                    } else if (expectedBreaks[ci] != 0) {
+                        // This a non-error expected break position.
+                        errorText.append("\\");
+                    }
+                    if (c < 0x10000) {
+                        errorText.append("\\u");
+                        for (bn=12; bn>=0; bn-=4) {
+                            errorText.append(hexChars.charAt((c>>bn)&0xf));
+                        }
+                    } else {
+                        errorText.append("\\U");
+                        for (bn=28; bn>=0; bn-=4) {
+                            errorText.append(hexChars.charAt((c>>bn)&0xf));
+                        }
+                    }
+                    ci = testText.moveIndex32(ci, 1);
+                }
+                errorText.append("\\");
+                errorText.append("</data>\n");
+
+                // Output the error
+                char  charErrorTxt[500];
+                UErrorCode status = U_ZERO_ERROR;
+                errorText.extract(charErrorTxt, sizeof(charErrorTxt), NULL, status);
+                charErrorTxt[sizeof(charErrorTxt)-1] = 0;
+                const char *badLocale = bi->getLocaleID(ULOC_ACTUAL_LOCALE, status);
+                
+                UChar32 brkChar = testText.char32At(i);
+                if ((strcmp(name, "char") != 0 && strcmp(name, "word") != 0) || brkChar < 0x1F1E6 || brkChar > 0x1F1FF) { // Apple, skip RI char/word break monkey tests
+                errln("%s break monkey test error [%s].  %s. Operation = %s; Random seed = %d;  buf Idx = %d\n%s",
+                    name, badLocale, (expectedBreaks[i]? "break expected but not found" : "break found but not expected"),
+                    errorType, seed, i, charErrorTxt);
+                }
+                break;
+            }
+        }
+
+        loopCount++;
+    }
+#endif
+}
+
+
+//  Bug 5532.  UTF-8 based UText fails in dictionary code.
+//             This test checks the initial patch,
+//             which is to just keep it from crashing.  Correct word boundaries
+//             await a proper fix to the dictionary code.
+//
+void RBBITest::TestBug5532(void)  {
+   // Text includes a mixture of Thai and Latin.
+   const unsigned char utf8Data[] = {
+           0xE0u, 0xB8u, 0x82u, 0xE0u, 0xB8u, 0xB2u, 0xE0u, 0xB8u, 0xA2u, 0xE0u,
+           0xB9u, 0x80u, 0xE0u, 0xB8u, 0x84u, 0xE0u, 0xB8u, 0xA3u, 0xE0u, 0xB8u, 
+           0xB7u, 0xE0u, 0xB9u, 0x88u, 0xE0u, 0xB8u, 0xADu, 0xE0u, 0xB8u, 0x87u,
+           0xE0u, 0xB9u, 0x80u, 0xE0u, 0xB8u, 0xA5u, 0xE0u, 0xB9u, 0x88u, 0xE0u,
+           0xB8u, 0x99u, 0xE0u, 0xB8u, 0x8Bu, 0xE0u, 0xB8u, 0xB5u, 0xE0u, 0xB8u,
+           0x94u, 0xE0u, 0xB8u, 0xB5u, 0x20u, 0x73u, 0x69u, 0x6Du, 0x20u, 0x61u, 
+           0x75u, 0x64u, 0x69u, 0x6Fu, 0x2Fu, 0x20u, 0x4Du, 0x4Fu, 0x4Fu, 0x4Eu, 
+           0x20u, 0x65u, 0x63u, 0x6Cu, 0x69u, 0x70u, 0x73u, 0x65u, 0x20u, 0xE0u, 
+           0xB8u, 0xA3u, 0xE0u, 0xB8u, 0xB2u, 0xE0u, 0xB8u, 0x84u, 0xE0u, 0xB8u, 
+           0xB2u, 0x20u, 0x34u, 0x37u, 0x30u, 0x30u, 0x20u, 0xE0u, 0xB8u, 0xA2u, 
+           0xE0u, 0xB8u, 0xB9u, 0xE0u, 0xB9u, 0x82u, 0xE0u, 0xB8u, 0xA3u, 0x00};
+
+    UErrorCode status = U_ZERO_ERROR;
+    UText utext=UTEXT_INITIALIZER;
+    utext_openUTF8(&utext, (const char *)utf8Data, -1, &status);
+    TEST_ASSERT_SUCCESS(status);
+
+    BreakIterator *bi = BreakIterator::createWordInstance(Locale("th"), status);
+    TEST_ASSERT_SUCCESS(status);
+    if (U_SUCCESS(status)) {
+        bi->setText(&utext, status);
+        TEST_ASSERT_SUCCESS(status);
+
+        int32_t breakCount = 0;
+        int32_t previousBreak = -1;
+        for (bi->first(); bi->next() != BreakIterator::DONE; breakCount++) {
+            // For now, just make sure that the break iterator doesn't hang.
+            TEST_ASSERT(previousBreak < bi->current());
+            previousBreak = bi->current();
+        }
+        TEST_ASSERT(breakCount > 0);
+    }
+    delete bi;
+    utext_close(&utext);
+}
+
+
+void RBBITest::TestBug9983(void)  {
+    UnicodeString text = UnicodeString("\\u002A"  // * Other
+                                       "\\uFF65"  //   Other
+                                       "\\u309C"  //   Katakana
+                                       "\\uFF9F"  //   Extend
+                                       "\\uFF65"  //   Other
+                                       "\\u0020"  //   Other
+                                       "\\u0000").unescape();
+
+    UErrorCode status = U_ZERO_ERROR;
+    LocalPointer<RuleBasedBreakIterator> brkiter(static_cast<RuleBasedBreakIterator *>(
+        BreakIterator::createWordInstance(Locale::getRoot(), status)));
+    TEST_ASSERT_SUCCESS(status);
+    if (U_FAILURE(status)) {
+        return;
+    }
+    brkiter->setText(text);
+    int32_t offset, rstatus;
+    brkiter->last();
+    int32_t iterationCount = 0;
+    while ( (offset = brkiter->previous()) != UBRK_DONE ) {
+        iterationCount++;
+        rstatus = brkiter->getRuleStatus();
+        // printf(" %d(%d)", offset, rstatus);
+        if (iterationCount >= 10) {
+           break; 
+        }
+    }
+    TEST_ASSERT(iterationCount == 6);
+}
+
+
+//
+//  TestDebug    -  A place-holder test for debugging purposes.
+//                  For putting in fragments of other tests that can be invoked
+//                  for tracing  without a lot of unwanted extra stuff happening.
+//
+void RBBITest::TestDebug(void) {
+#if 0
+    UErrorCode   status = U_ZERO_ERROR;
+    int pos = 0;
+    int ruleStatus = 0;
+
+    RuleBasedBreakIterator* bi =
+       // (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
+       // (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::Locale("th"), status);
+       (RuleBasedBreakIterator *)BreakIterator::createSentenceInstance(Locale::getDefault(), status);
+    UnicodeString s("\\u2008\\u002e\\udc6a\\u37cd\\u71d0\\u2048\\U000e006a\\u002e\\u0046\\ufd3f\\u000a\\u002e");
+    // UnicodeString s("Aaa.  Bcd");
+    s = s.unescape();
+    bi->setText(s);
+    UBool r = bi->isBoundary(8);
+    printf("%s", r?"true":"false");
+    return;
+    pos = bi->last();
+    do {
+        // ruleStatus = bi->getRuleStatus();
+        printf("%d\t%d\n", pos, ruleStatus);
+        pos = bi->previous();
+    } while (pos != BreakIterator::DONE);
+#endif
+}
+
+void RBBITest::TestProperties() {
+    UErrorCode errorCode = U_ZERO_ERROR;
+    UnicodeSet prependSet(UNICODE_STRING_SIMPLE("[:GCB=Prepend:]"), errorCode);
+    if (!prependSet.isEmpty()) {
+        errln(
+            "[:GCB=Prepend:] is not empty any more. "
+            "Uncomment relevant lines in source/data/brkitr/char.txt and "
+            "change this test to the opposite condition.");
+    }
+}
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */