ICU-62123.0.1.tar.gz

[apple/icu.git] / icuSources / test / intltest / regextst.cpp
diff --git a/icuSources/test/intltest/regextst.cpp b/icuSources/test/intltest/regextst.cpp

index 1e197a90e45b89608bf8ed8f045b9c3929be2e3a..4b0a2f43fca2891bb94cb5d8007a4abd12b3aae0 100644 (file)
--- a/icuSources/test/intltest/regextst.cpp
+++ b/icuSources/test/intltest/regextst.cpp
@@ -1,6 +1,8 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
  /********************************************************************
   * COPYRIGHT:
- * Copyright (c) 2002-2012, International Business Machines Corporation and
+ * Copyright (c) 2002-2016, International Business Machines Corporation and
   * others. All Rights Reserved.
   ********************************************************************/
  
@@ -23,17 +25,26 @@
  #include "intltest.h"
  #if !UCONFIG_NO_REGULAR_EXPRESSIONS
  
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "unicode/localpointer.h"
  #include "unicode/regex.h"
  #include "unicode/uchar.h"
  #include "unicode/ucnv.h"
  #include "unicode/uniset.h"
+#include "unicode/uregex.h"
+#include "unicode/usetiter.h"
  #include "unicode/ustring.h"
+#include "unicode/utext.h"
+#include "unicode/utf16.h"
+#include "cstr.h"
  #include "regextst.h"
+#include "regexcmp.h"
  #include "uvector.h"
  #include "util.h"
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
+#include "cmemory.h"
  #include "cstring.h"
  #include "uinvchar.h"
  
@@ -58,90 +69,48 @@ RegexTest::~RegexTest()
  void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
  {
      if (exec) logln("TestSuite RegexTest: ");
-    switch (index) {
-
-        case 0: name = "Basic";
-            if (exec) Basic();
-            break;
-        case 1: name = "API_Match";
-            if (exec) API_Match();
-            break;
-        case 2: name = "API_Replace";
-            if (exec) API_Replace();
-            break;
-        case 3: name = "API_Pattern";
-            if (exec) API_Pattern();
-            break;
-        case 4:
+    TESTCASE_AUTO_BEGIN;
+    TESTCASE_AUTO(Basic);
+    TESTCASE_AUTO(API_Match);
+    TESTCASE_AUTO(API_Replace);
+    TESTCASE_AUTO(API_Pattern);
  #if !UCONFIG_NO_FILE_IO
-            name = "Extended";
-            if (exec) Extended();
-#else
-            name = "skip";
+    TESTCASE_AUTO(Extended);
  #endif
-            break;
-        case 5: name = "Errors";
-            if (exec) Errors();
-            break;
-        case 6: name = "PerlTests";
-            if (exec) PerlTests();
-            break;
-        case 7: name = "Callbacks";
-            if (exec) Callbacks();
-            break;
-        case 8: name = "FindProgressCallbacks";
-            if (exec) FindProgressCallbacks();
-            break;
-        case 9: name = "Bug 6149";
-             if (exec) Bug6149();
-             break;
-        case 10: name = "UTextBasic";
-          if (exec) UTextBasic();
-          break;
-        case 11: name = "API_Match_UTF8";
-          if (exec) API_Match_UTF8();
-          break;
-        case 12: name = "API_Replace_UTF8";
-          if (exec) API_Replace_UTF8();
-          break;
-        case 13: name = "API_Pattern_UTF8";
-          if (exec) API_Pattern_UTF8();
-          break;
-        case 14: name = "PerlTestsUTF8";
-          if (exec) PerlTestsUTF8();
-          break;
-        case 15: name = "PreAllocatedUTextCAPI";
-          if (exec) PreAllocatedUTextCAPI();
-          break;
-        case 16: name = "Bug 7651";
-             if (exec) Bug7651();
-             break;
-        case 17: name = "Bug 7740";
-            if (exec) Bug7740();
-            break;
-        case 18: name = "Bug 8479";
-            if (exec) Bug8479();
-            break;
-        case 19: name = "Bug 7029";
-            if (exec) Bug7029();
-            break;
-        case 20: name = "CheckInvBufSize";
-            if (exec) CheckInvBufSize();
-            break;
-        case 21: name = "Bug 9283";
-            if (exec) Bug9283();
-            break;
-
-        default: name = "";
-            break; //needed to end loop
-    }
+    TESTCASE_AUTO(Errors);
+    TESTCASE_AUTO(PerlTests);
+    TESTCASE_AUTO(Callbacks);
+    TESTCASE_AUTO(FindProgressCallbacks);
+    TESTCASE_AUTO(Bug6149);
+    TESTCASE_AUTO(UTextBasic);
+    TESTCASE_AUTO(API_Match_UTF8);
+    TESTCASE_AUTO(API_Replace_UTF8);
+    TESTCASE_AUTO(API_Pattern_UTF8);
+    TESTCASE_AUTO(PerlTestsUTF8);
+    TESTCASE_AUTO(PreAllocatedUTextCAPI);
+    TESTCASE_AUTO(Bug7651);
+    TESTCASE_AUTO(Bug7740);
+    TESTCASE_AUTO(Bug8479);
+    TESTCASE_AUTO(Bug7029);
+    TESTCASE_AUTO(CheckInvBufSize);
+    TESTCASE_AUTO(Bug9283);
+    TESTCASE_AUTO(Bug10459);
+    TESTCASE_AUTO(TestCaseInsensitiveStarters);
+    TESTCASE_AUTO(TestBug11049);
+    TESTCASE_AUTO(TestBug11371);
+    TESTCASE_AUTO(TestBug11480);
+    TESTCASE_AUTO(NamedCapture);
+    TESTCASE_AUTO(NamedCaptureLimits);
+    TESTCASE_AUTO(TestBug12884);
+    TESTCASE_AUTO(TestBug13631);
+    TESTCASE_AUTO(TestBug13632);
+    TESTCASE_AUTO_END;
  }
  
  
-
  /**
   * Calls utext_openUTF8 after, potentially, converting invariant text from the compilation codepage
- * into ASCII. 
+ * into ASCII.
   * @see utext_openUTF8
   */
  static UText* regextst_openUTF8FromInvariant(UText* ut, const char *inv, int64_t length, UErrorCode *status);
@@ -207,8 +176,7 @@ const char* RegexTest::extractToAssertBuf(const UnicodeString& message) {
    return ASSERT_BUF;
  }
  
-
-#define REGEX_VERBOSE_TEXT(text) {char buf[200];utextToPrintable(buf,sizeof(buf)/sizeof(buf[0]),text);logln("%s:%d: UText %s=\"%s\"", __FILE__, __LINE__, #text, buf);}
+#define REGEX_VERBOSE_TEXT(text) {char buf[200];utextToPrintable(buf,UPRV_LENGTHOF(buf),text);logln("%s:%d: UText %s=\"%s\"", __FILE__, __LINE__, #text, buf);}
  
  #define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("%s:%d: RegexTest failure.  status=%s", \
                                                                __FILE__, __LINE__, u_errorName(status)); return;}}
@@ -225,7 +193,12 @@ if (status!=errcode) {dataerrln("RegexTest failure at line %d.  Expected status=
  #define REGEX_ASSERT_L(expr, line) {if ((expr)==FALSE) { \
      errln("RegexTest failure at line %d, from %d.", __LINE__, (line)); return;}}
  
-#define REGEX_ASSERT_UNISTR(ustr,inv) {if (!(ustr==inv)) {errln("%s:%d: RegexTest failure: REGEX_ASSERT_UNISTR(%s,%s) failed \n", __FILE__, __LINE__, extractToAssertBuf(ustr),inv);};}
+// expected: const char * , restricted to invariant characters.
+// actual: const UnicodeString &
+#define REGEX_ASSERT_UNISTR(expected, actual) { \
+    if (UnicodeString(expected, -1, US_INV) != (actual)) { \
+        errln("%s:%d: RegexTest failure: REGEX_ASSERT_UNISTR(%s, %s) failed \n",  \
+                __FILE__, __LINE__, expected, extractToAssertBuf(actual));};}
  
  
  static UBool testUTextEqual(UText *uta, UText *utb) {
@@ -263,8 +236,8 @@ void RegexTest::assertUText(const char *expected, UText *actual, const char *fil
      if (!testUTextEqual(&expectedText, actual)) {
          char buf[201 /*21*/];
          char expectedBuf[201];
-        utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual);
-        utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0]), &expectedText);
+        utextToPrintable(buf, UPRV_LENGTHOF(buf), actual);
+        utextToPrintable(expectedBuf, UPRV_LENGTHOF(expectedBuf), &expectedText);
          errln("%s:%d: assertUText: Failure: expected \"%s\" (%d chars), got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expectedText), buf, (int)utext_nativeLength(actual));
      }
      utext_close(&expectedText);
@@ -285,19 +258,19 @@ void RegexTest::assertUTextInvariant(const char *expected, UText *actual, const
      if (!testUTextEqual(&expectedText, actual)) {
          char buf[201 /*21*/];
          char expectedBuf[201];
-        utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual);
-        utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0]), &expectedText);
+        utextToPrintable(buf, UPRV_LENGTHOF(buf), actual);
+        utextToPrintable(expectedBuf, UPRV_LENGTHOF(expectedBuf), &expectedText);
          errln("%s:%d: assertUTextInvariant: Failure: expected \"%s\" (%d uchars), got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expectedText), buf, (int)utext_nativeLength(actual));
      }
      utext_close(&expectedText);
  }
  
  /**
- * Assumes utf-8 input 
+ * Assumes utf-8 input
   */
  #define REGEX_ASSERT_UTEXT_UTF8(expected, actual) assertUText((expected), (actual), __FILE__, __LINE__)
  /**
- * Assumes Invariant input 
+ * Assumes Invariant input
   */
  #define REGEX_ASSERT_UTEXT_INVARIANT(expected, actual) assertUTextInvariant((expected), (actual), __FILE__, __LINE__)
  
@@ -305,11 +278,11 @@ void RegexTest::assertUTextInvariant(const char *expected, UText *actual, const
   * This buffer ( inv_buf ) is used to hold the UTF-8 strings
   * passed into utext_openUTF8. An error will be given if
   * INV_BUFSIZ is too small.  It's only used on EBCDIC systems.
- */ 
+ */
  
  #define INV_BUFSIZ 2048 /* increase this if too small */
  
-static int32_t inv_next=0;
+static int64_t inv_next=0;
  
  #if U_CHARSET_FAMILY!=U_ASCII_FAMILY
  static char inv_buf[INV_BUFSIZ];
@@ -373,7 +346,7 @@ UBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking,
              line, u_errorName(status));
          return FALSE;
      }
-    if (line==376) { RegexPatternDump(REPattern);}
+    if (line==376) { REPattern->dumpPattern();}
  
      UnicodeString inputString(inputText);
      UnicodeString unEscapedInput = inputString.unescape();
@@ -409,7 +382,7 @@ UBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking,
      }
  
      if (retVal == FALSE) {
-        RegexPatternDump(REPattern);
+        REPattern->dumpPattern();
      }
  
      delete REPattern;
@@ -436,12 +409,12 @@ UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look
              line, u_errorName(status));
          return FALSE;
      }
-    
+
      UnicodeString inputString(text, -1, US_INV);
      UnicodeString unEscapedInput = inputString.unescape();
      LocalUConverterPointer UTF8Converter(ucnv_open("UTF8", &status));
      ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
-    
+
      inputUTF8Length = unEscapedInput.extract(NULL, 0, UTF8Converter.getAlias(), status);
      if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) {
          // UTF-8 does not allow unpaired surrogates, so this could actually happen
@@ -452,7 +425,7 @@ UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look
      textChars = new char[inputUTF8Length+1];
      unEscapedInput.extract(textChars, inputUTF8Length+1, UTF8Converter.getAlias(), status);
      utext_openUTF8(&inputText, textChars, inputUTF8Length, &status);
-    
+
      REMatcher = &REPattern->matcher(status)->reset(&inputText);
      if (U_FAILURE(status)) {
          errln("RegexTest failure in REPattern::matcher() at line %d (UTF8).  Status = %s\n",
@@ -485,7 +458,7 @@ UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look
      }
  
      if (retVal == FALSE) {
-        RegexPatternDump(REPattern);
+        REPattern->dumpPattern();
      }
  
      delete REPattern;
@@ -551,7 +524,7 @@ void RegexTest::regex_err(const char *pat, int32_t errLine, int32_t errCol,
              }
          }
      }
-    
+
      delete callerPattern;
      utext_close(&patternText);
  }
@@ -578,7 +551,7 @@ void RegexTest::Basic() {
          UErrorCode  status = U_ZERO_ERROR;
          RegexPattern *pattern;
          pattern = RegexPattern::compile(UNICODE_STRING_SIMPLE("a\\u00dfx").unescape(), UREGEX_CASE_INSENSITIVE, pe, status);
-        RegexPatternDump(pattern);
+        pattern->dumpPattern();
          RegexMatcher *m = pattern->matcher(UNICODE_STRING_SIMPLE("a\\u00dfxzzz").unescape(), status);
          UBool result = m->find();
          printf("result = %d\n", result);
@@ -726,18 +699,18 @@ void RegexTest::UTextBasic() {
      utext_openUTF8(&pattern, str_abc, -1, &status);
      RegexMatcher matcher(&pattern, 0, status);
      REGEX_CHECK_STATUS;
-    
+
      UText input = UTEXT_INITIALIZER;
      utext_openUTF8(&input, str_abc, -1, &status);
      REGEX_CHECK_STATUS;
      matcher.reset(&input);
      REGEX_CHECK_STATUS;
      REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText());
-    
+
      matcher.reset(matcher.inputText());
      REGEX_CHECK_STATUS;
      REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText());
-    
+
      utext_close(&pattern);
      utext_close(&input);
  }
@@ -1114,7 +1087,7 @@ void RegexTest::API_Match() {
          delete m;
          delete p;
      }
-    
+
      //
      // Regions
      //
@@ -1127,34 +1100,34 @@ void RegexTest::API_Match() {
          REGEX_ASSERT(m.regionEnd() == testString.length());
          REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
          REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
-        
+
          m.region(2,4, status);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(m.matches(status));
          REGEX_ASSERT(m.start(status)==2);
          REGEX_ASSERT(m.end(status)==4);
          REGEX_CHECK_STATUS;
-        
+
          m.reset();
          REGEX_ASSERT(m.regionStart() == 0);
          REGEX_ASSERT(m.regionEnd() == testString.length());
-        
+
          UnicodeString shorterString("short");
          m.reset(shorterString);
          REGEX_ASSERT(m.regionStart() == 0);
          REGEX_ASSERT(m.regionEnd() == shorterString.length());
-        
+
          REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
          REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE));
          REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
          REGEX_ASSERT(&m == &m.reset());
          REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
-        
+
          REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE));
          REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
          REGEX_ASSERT(&m == &m.reset());
          REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
-    
+
          REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
          REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE));
          REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
@@ -1165,9 +1138,9 @@ void RegexTest::API_Match() {
          REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
          REGEX_ASSERT(&m == &m.reset());
          REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
-        
+
      }
-    
+
      //
      // hitEnd() and requireEnd()
      //
@@ -1179,7 +1152,7 @@ void RegexTest::API_Match() {
          REGEX_ASSERT(m1.hitEnd() == TRUE);
          REGEX_ASSERT(m1.requireEnd() == FALSE);
          REGEX_CHECK_STATUS;
-        
+
          status = U_ZERO_ERROR;
          RegexMatcher m2("a*", testString, 0, status);
          REGEX_ASSERT(m2.lookingAt(status) == TRUE);
@@ -1217,7 +1190,7 @@ void RegexTest::API_Match() {
  #endif
  
      //
-    //  Time Outs.  
+    //  Time Outs.
      //       Note:  These tests will need to be changed when the regexp engine is
      //              able to detect and cut short the exponential time behavior on
      //              this type of match.
@@ -1245,22 +1218,22 @@ void RegexTest::API_Match() {
          REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
          REGEX_CHECK_STATUS;
      }
-    
+
      //
      //  Stack Limits
      //
      {
          UErrorCode status = U_ZERO_ERROR;
          UnicodeString testString(1000000, 0x41, 1000000);  // Length 1,000,000, filled with 'A'
-        
+
          // Adding the capturing parentheses to the pattern "(A)+A$" inhibits optimizations
          //   of the '+', and makes the stack frames larger.
          RegexMatcher matcher("(A)+A$", testString, 0, status);
-        
+
          // With the default stack, this match should fail to run
          REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
          REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);
-        
+
          // With unlimited stack, it should run
          status = U_ZERO_ERROR;
          matcher.setStackLimit(0, status);
@@ -1276,7 +1249,7 @@ void RegexTest::API_Match() {
          REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);
          REGEX_ASSERT(matcher.getStackLimit() == 10000);
      }
-        
+
          // A pattern that doesn't save state should work with
          //   a minimal sized stack
      {
@@ -1289,7 +1262,7 @@ void RegexTest::API_Match() {
          REGEX_ASSERT(matcher.matches(status) == TRUE);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(matcher.getStackLimit() == 30);
-        
+
          // Negative stack sizes should fail
          status = U_ZERO_ERROR;
          matcher.setStackLimit(1000, status);
@@ -1298,7 +1271,7 @@ void RegexTest::API_Match() {
          REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
          REGEX_ASSERT(matcher.getStackLimit() == 1000);
      }
-    
+
  
  }
  
@@ -1409,8 +1382,8 @@ void RegexTest::API_Replace() {
      REGEX_ASSERT(dest == "The value of $1 is bc.defg");
  
      dest = matcher2->replaceFirst("$ by itself, no group number $$$", status);
-    REGEX_CHECK_STATUS;
-    REGEX_ASSERT(dest == "$ by itself, no group number $$$defg");
+    REGEX_ASSERT(U_FAILURE(status));
+    status = U_ZERO_ERROR;
  
      UnicodeString replacement = UNICODE_STRING_SIMPLE("Supplemental Digit 1 $\\U0001D7CF.");
      replacement = replacement.unescape();
@@ -1847,7 +1820,7 @@ void RegexTest::API_Match_UTF8() {
          regextst_openUTF8FromInvariant(&input2, "not abc", -1, &status);
          REGEX_VERBOSE_TEXT(&input2);
          utext_openUChars(&empty, NULL, 0, &status);
-        
+
          int32_t input1Len = strlen("abcdef this is a test"); /* TODO: why not nativelen (input1) ? */
          int32_t input2Len = strlen("not abc");
  
@@ -1957,7 +1930,7 @@ void RegexTest::API_Match_UTF8() {
  
          delete m1;
          delete pat2;
-        
+
          utext_close(&re);
          utext_close(&input1);
          utext_close(&input2);
@@ -1978,10 +1951,10 @@ void RegexTest::API_Match_UTF8() {
          UText               re=UTEXT_INITIALIZER;
          const char str_01234567_pat[] = { 0x30, 0x31, 0x28, 0x32, 0x33, 0x28, 0x34, 0x35, 0x29, 0x36, 0x37, 0x29, 0x28, 0x2e, 0x2a, 0x29, 0x00 }; /* 01(23(45)67)(.*) */
          utext_openUTF8(&re, str_01234567_pat, -1, &status);
-        
+
          RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
          REGEX_CHECK_STATUS;
-        
+
          UText input = UTEXT_INITIALIZER;
          const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */
          utext_openUTF8(&input, str_0123456789, -1, &status);
@@ -2016,13 +1989,13 @@ void RegexTest::API_Match_UTF8() {
          REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE);
  
          matcher->lookingAt(status);
-        
+
          UnicodeString dest;
          UText destText = UTEXT_INITIALIZER;
          utext_openUnicodeString(&destText, &dest, &status);
          UText *result;
          //const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */
-        //     Test shallow-clone API
+        //  Test shallow-clone API
          int64_t   group_len;
          result = matcher->group((UText *)NULL, group_len, status);
          REGEX_CHECK_STATUS;
@@ -2035,54 +2008,79 @@ void RegexTest::API_Match_UTF8() {
          //  destText is now immutable, reopen it
          utext_close(&destText);
          utext_openUnicodeString(&destText, &dest, &status);
-        
-        result = matcher->group(0, NULL, status);
+
+        int64_t length;
+        result = matcher->group(0, NULL, length, status);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
          utext_close(result);
-        result = matcher->group(0, &destText, status);
+        result = matcher->group(0, &destText, length, status);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(result == &destText);
-        REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
-        
-        result = matcher->group(1, NULL, status);
+        REGEX_ASSERT(utext_getNativeIndex(result) == 0);
+        REGEX_ASSERT(length == 10);
+        REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);
+
+        // Capture Group 1 == "234567"
+        result = matcher->group(1, NULL, length, status);
          REGEX_CHECK_STATUS;
-        const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 }; /* 234567 */
-        REGEX_ASSERT_UTEXT_UTF8(str_234567, result);
+        REGEX_ASSERT(utext_getNativeIndex(result) == 2);
+        REGEX_ASSERT(length == 6);
+        REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);
          utext_close(result);
-        result = matcher->group(1, &destText, status);
+
+        result = matcher->group(1, &destText, length, status);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(result == &destText);
-        REGEX_ASSERT_UTEXT_UTF8(str_234567, result);
-        
-        result = matcher->group(2, NULL, status);
+        REGEX_ASSERT(utext_getNativeIndex(result) == 2);
+        REGEX_ASSERT(length == 6);
+        REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);
+        utext_close(result);
+
+        // Capture Group 2 == "45"
+        result = matcher->group(2, NULL, length, status);
          REGEX_CHECK_STATUS;
-        const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */
-        REGEX_ASSERT_UTEXT_UTF8(str_45, result);
+        REGEX_ASSERT(utext_getNativeIndex(result) == 4);
+        REGEX_ASSERT(length == 2);
+        REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);
          utext_close(result);
-        result = matcher->group(2, &destText, status);
+
+        result = matcher->group(2, &destText, length, status);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(result == &destText);
-        REGEX_ASSERT_UTEXT_UTF8(str_45, result);
-        
-        result = matcher->group(3, NULL, status);
+        REGEX_ASSERT(utext_getNativeIndex(result) == 4);
+        REGEX_ASSERT(length == 2);
+        REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);
+        utext_close(result);
+
+        // Capture Group 3 == "89"
+        result = matcher->group(3, NULL, length, status);
          REGEX_CHECK_STATUS;
-        const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */
-        REGEX_ASSERT_UTEXT_UTF8(str_89, result);
+        REGEX_ASSERT(utext_getNativeIndex(result) == 8);
+        REGEX_ASSERT(length == 2);
+        REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);
          utext_close(result);
-        result = matcher->group(3, &destText, status);
+
+        result = matcher->group(3, &destText, length, status);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(result == &destText);
-        REGEX_ASSERT_UTEXT_UTF8(str_89, result);
+        REGEX_ASSERT(utext_getNativeIndex(result) == 8);
+        REGEX_ASSERT(length == 2);
+        REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);
+        utext_close(result);
  
+        // Capture Group number out of range.
+        status = U_ZERO_ERROR;
          REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
+        status = U_ZERO_ERROR;
          REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR);
+        status = U_ZERO_ERROR;
          matcher->reset();
          REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE);
  
          delete matcher;
          delete pat;
-        
+
          utext_close(&destText);
          utext_close(&input);
          utext_close(&re);
@@ -2143,7 +2141,7 @@ void RegexTest::API_Match_UTF8() {
  
          delete matcher;
          delete pat;
-        
+
          utext_close(&input);
          utext_close(&re);
      }
@@ -2161,7 +2159,7 @@ void RegexTest::API_Match_UTF8() {
          utext_openUTF8(&re, str_Gabcabc, -1, &status);
  
          RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
-        
+
          REGEX_CHECK_STATUS;
          UText input = UTEXT_INITIALIZER;
          const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x61, 0x62, 0x63, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abcabc.abc.. */
@@ -2183,7 +2181,7 @@ void RegexTest::API_Match_UTF8() {
  
          delete matcher;
          delete pat;
-        
+
          utext_close(&input);
          utext_close(&re);
      }
@@ -2223,7 +2221,7 @@ void RegexTest::API_Match_UTF8() {
              REGEX_ASSERT(m.end(status) == i);
          }
          REGEX_ASSERT(i==20);
-        
+
          utext_close(&s);
      }
      {
@@ -2245,7 +2243,7 @@ void RegexTest::API_Match_UTF8() {
              REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i));
          }
          REGEX_ASSERT(i==5);
-        
+
          utext_close(&s);
      }
  
@@ -2273,7 +2271,7 @@ void RegexTest::API_Match_UTF8() {
          delete m;
          delete p;
      }
-    
+
      //
      // Regions
      //
@@ -2285,42 +2283,42 @@ void RegexTest::API_Match_UTF8() {
          REGEX_VERBOSE_TEXT(&testPattern);
          regextst_openUTF8FromInvariant(&testText, "This is test data", -1, &status);
          REGEX_VERBOSE_TEXT(&testText);
-        
+
          RegexMatcher m(&testPattern, &testText, 0, status);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(m.regionStart() == 0);
          REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data"));
          REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
          REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
-        
+
          m.region(2,4, status);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(m.matches(status));
          REGEX_ASSERT(m.start(status)==2);
          REGEX_ASSERT(m.end(status)==4);
          REGEX_CHECK_STATUS;
-        
+
          m.reset();
          REGEX_ASSERT(m.regionStart() == 0);
          REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data"));
-        
+
          regextst_openUTF8FromInvariant(&testText, "short", -1, &status);
          REGEX_VERBOSE_TEXT(&testText);
          m.reset(&testText);
          REGEX_ASSERT(m.regionStart() == 0);
          REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("short"));
-        
+
          REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
          REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE));
          REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
          REGEX_ASSERT(&m == &m.reset());
          REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
-        
+
          REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE));
          REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
          REGEX_ASSERT(&m == &m.reset());
          REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
-    
+
          REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
          REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE));
          REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
@@ -2331,11 +2329,11 @@ void RegexTest::API_Match_UTF8() {
          REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
          REGEX_ASSERT(&m == &m.reset());
          REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
-        
+
          utext_close(&testText);
          utext_close(&testPattern);
      }
-    
+
      //
      // hitEnd() and requireEnd()
      //
@@ -2347,13 +2345,13 @@ void RegexTest::API_Match_UTF8() {
          const char str_aabb[] = { 0x61, 0x61, 0x62, 0x62, 0x00 }; /* aabb */
          utext_openUTF8(&testPattern, str_, -1, &status);
          utext_openUTF8(&testText, str_aabb, -1, &status);
-        
+
          RegexMatcher m1(&testPattern, &testText,  0, status);
          REGEX_ASSERT(m1.lookingAt(status) == TRUE);
          REGEX_ASSERT(m1.hitEnd() == TRUE);
          REGEX_ASSERT(m1.requireEnd() == FALSE);
          REGEX_CHECK_STATUS;
-        
+
          status = U_ZERO_ERROR;
          const char str_a[] = { 0x61, 0x2a, 0x00 }; /* a* */
          utext_openUTF8(&testPattern, str_a, -1, &status);
@@ -2371,7 +2369,7 @@ void RegexTest::API_Match_UTF8() {
          REGEX_ASSERT(m3.hitEnd() == TRUE);
          REGEX_ASSERT(m3.requireEnd() == TRUE);
          REGEX_CHECK_STATUS;
-        
+
          utext_close(&testText);
          utext_close(&testPattern);
      }
@@ -2397,7 +2395,7 @@ void RegexTest::API_Replace_UTF8() {
      REGEX_VERBOSE_TEXT(&re);
      RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
      REGEX_CHECK_STATUS;
-    
+
      char data[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */
      //             012345678901234567
      UText dataText = UTEXT_INITIALIZER;
@@ -2413,9 +2411,9 @@ void RegexTest::API_Replace_UTF8() {
      UText destText = UTEXT_INITIALIZER;
      utext_openUnicodeString(&destText, &dest, &status);
      UText *result;
-    
+
      UText replText = UTEXT_INITIALIZER;
-    
+
      const char str_yz[] = { 0x79, 0x7a, 0x00 }; /* yz */
      utext_openUTF8(&replText, str_yz, -1, &status);
      REGEX_VERBOSE_TEXT(&replText);
@@ -2447,7 +2445,7 @@ void RegexTest::API_Replace_UTF8() {
      const char str_abxabxabx[] = { 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x00 }; /* .abx..abx...abx.. */
      utext_openUTF8(&dataText, str_abxabxabx, -1, &status);
      matcher->reset(&dataText);
-    
+
      result = matcher->replaceFirst(&replText, NULL, status);
      REGEX_CHECK_STATUS;
      REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);
@@ -2472,7 +2470,7 @@ void RegexTest::API_Replace_UTF8() {
      //
      utext_openUTF8(&dataText, NULL, 0, &status);
      matcher->reset(&dataText);
-    
+
      result = matcher->replaceFirst(&replText, NULL, status);
      REGEX_CHECK_STATUS;
      REGEX_ASSERT_UTEXT_UTF8("", result);
@@ -2496,7 +2494,7 @@ void RegexTest::API_Replace_UTF8() {
      //
      utext_openUTF8(&dataText, data, -1, &status); // ".abc..abc...abc.."
      matcher->reset(&dataText);
-    
+
      utext_openUTF8(&replText, NULL, 0, &status);
      result = matcher->replaceFirst(&replText, NULL, status);
      REGEX_CHECK_STATUS;
@@ -2560,7 +2558,7 @@ void RegexTest::API_Replace_UTF8() {
      utext_openUTF8(&dataText, str_abcdefg, -1, &status);
      RegexMatcher *matcher2 = &pat2->matcher(status)->reset(&dataText);
      REGEX_CHECK_STATUS;
-    
+
      const char str_11[] = { 0x24, 0x31, 0x24, 0x31, 0x00 }; /* $1$1 */
      utext_openUTF8(&replText, str_11, -1, &status);
      result = matcher2->replaceFirst(&replText, NULL, status);
@@ -2573,8 +2571,8 @@ void RegexTest::API_Replace_UTF8() {
      REGEX_CHECK_STATUS;
      REGEX_ASSERT(result == &destText);
      REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result);
-   
-    const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31, 0x2e, 0x00 }; /* The value of \$1 is $1. */ 
+
+    const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31, 0x2e, 0x00 }; /* The value of \$1 is $1. */
      utext_openUTF8(&replText, str_v, -1, &status);
      REGEX_VERBOSE_TEXT(&replText);
      result = matcher2->replaceFirst(&replText, NULL, status);
@@ -2587,8 +2585,10 @@ void RegexTest::API_Replace_UTF8() {
      REGEX_CHECK_STATUS;
      REGEX_ASSERT(result == &destText);
      REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result);
-    
-    const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0x00 }; /* $ by itself, no group number $$$ */
+
+    const char str_byitselfnogroupnumber[] = { 0x5c, 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c,
+               0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62,
+               0x65, 0x72, 0x20, 0x5c, 0x24, 0x5c, 0x24, 0x5c, 0x24, 0x00 }; /* \$ by itself, no group number \$\$\$ */
      utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status);
      result = matcher2->replaceFirst(&replText, NULL, status);
      REGEX_CHECK_STATUS;
@@ -2609,7 +2609,7 @@ void RegexTest::API_Replace_UTF8() {
      supplDigitChars[24] = 0x9F;
      supplDigitChars[25] = 0x8F;
      utext_openUTF8(&replText, (char *)supplDigitChars, -1, &status);
-    
+
      result = matcher2->replaceFirst(&replText, NULL, status);
      REGEX_CHECK_STATUS;
      const char str_SupplementalDigit1bcdefg[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* Supplemental Digit 1 bc.defg */
@@ -2639,7 +2639,7 @@ void RegexTest::API_Replace_UTF8() {
          utext_openUTF8(&dataText, str_abc1abc2abc3, -1, &status);
          utext_openUTF8(&replText, str_u0043, -1, &status);
          matcher->reset(&dataText);
-        
+
          result = matcher->replaceAll(&replText, NULL, status);
          REGEX_CHECK_STATUS;
          const char str_C1C2C3[] = { 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x31, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x32, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x33, 0x00 }; /* --C-- 1 --C-- 2 --C-- 3 */
@@ -2659,7 +2659,7 @@ void RegexTest::API_Replace_UTF8() {
          matcher->reset(&dataText);
  
          unsigned char expected[] = { 0x2d, 0x2d, 0x78, 0x78, 0x78, 0x78, 0x2d, 0x2d, 0x20, 0x21, 0x00 }; /* --xxxx-- ! */ // \U00010000, "LINEAR B SYLLABLE B008 A"
-        //                          0123456789     
+        //                          0123456789
          expected[2] = 0xF0;
          expected[3] = 0x90;
          expected[4] = 0x80;
@@ -2687,10 +2687,10 @@ const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */
          utext_openUTF8(&re, str_ssee, -1, &status);
          utext_openUTF8(&dataText, str_blah, -1, &status);
          utext_openUTF8(&replText, str_ooh, -1, &status);
-        
+
          RegexMatcher m(&re, 0, status);
          REGEX_CHECK_STATUS;
-        
+
          UnicodeString result;
          UText resultText = UTEXT_INITIALIZER;
          utext_openUnicodeString(&resultText, &result, &status);
@@ -2731,7 +2731,7 @@ const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */
          m.appendTail(&resultText, status);
          const char str_blah9[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start with ss and end with ee ooh fin */
          REGEX_ASSERT_UTEXT_UTF8(str_blah9, &resultText);
-        
+
          utext_close(&resultText);
      }
  
@@ -2739,7 +2739,7 @@ const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */
      delete pat2;
      delete matcher;
      delete pat;
-    
+
      utext_close(&dataText);
      utext_close(&replText);
      utext_close(&destText);
@@ -2764,7 +2764,7 @@ void RegexTest::API_Pattern_UTF8() {
      UText         re2 = UTEXT_INITIALIZER;
      UErrorCode    status = U_ZERO_ERROR;
      UParseError   pe;
-    
+
      const char str_abcalmz[] = { 0x61, 0x62, 0x63, 0x5b, 0x61, 0x2d, 0x6c, 0x5d, 0x5b, 0x6d, 0x2d, 0x7a, 0x5d, 0x00 }; /* abc[a-l][m-z] */
      const char str_def[] = { 0x64, 0x65, 0x66, 0x00 }; /* def */
      utext_openUTF8(&re1, str_abcalmz, -1, &status);
@@ -2813,7 +2813,7 @@ void RegexTest::API_Pattern_UTF8() {
      delete pat1a;
      delete pat1;
      delete pat2;
-    
+
      utext_close(&re1);
      utext_close(&re2);
  
@@ -2827,13 +2827,13 @@ void RegexTest::API_Pattern_UTF8() {
          UText          pattern    = UTEXT_INITIALIZER;
          const char str_pL[] = { 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x2b, 0x00 }; /* \p{L}+ */
          utext_openUTF8(&pattern, str_pL, -1, &status);
-        
+
          RegexPattern  *pSource    = RegexPattern::compile(&pattern, 0, status);
          RegexPattern  *pClone     = pSource->clone();
          delete         pSource;
          RegexMatcher  *mFromClone = pClone->matcher(status);
          REGEX_CHECK_STATUS;
-        
+
          UText          input      = UTEXT_INITIALIZER;
          const char str_HelloWorld[] = { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x00 }; /* Hello World */
          utext_openUTF8(&input, str_HelloWorld, -1, &status);
@@ -2845,7 +2845,7 @@ void RegexTest::API_Pattern_UTF8() {
          REGEX_ASSERT(mFromClone->find() == FALSE);
          delete mFromClone;
          delete pClone;
-        
+
          utext_close(&input);
          utext_close(&pattern);
      }
@@ -2857,7 +2857,7 @@ void RegexTest::API_Pattern_UTF8() {
          UErrorCode status  = U_ZERO_ERROR;
          UText      pattern = UTEXT_INITIALIZER;
          UText      input   = UTEXT_INITIALIZER;
-        
+
          const char str_randominput[] = { 0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x20, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* random input */
          utext_openUTF8(&input, str_randominput, -1, &status);
  
@@ -2865,17 +2865,17 @@ void RegexTest::API_Pattern_UTF8() {
          utext_openUTF8(&pattern, str_dotstar, -1, &status);
          REGEX_ASSERT(RegexPattern::matches(&pattern, &input, pe, status) == TRUE);
          REGEX_CHECK_STATUS;
-        
+
          const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */
          utext_openUTF8(&pattern, str_abc, -1, &status);
          REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE);
          REGEX_CHECK_STATUS;
-        
+
          const char str_nput[] = { 0x2e, 0x2a, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* .*nput */
          utext_openUTF8(&pattern, str_nput, -1, &status);
          REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE);
          REGEX_CHECK_STATUS;
-        
+
          utext_openUTF8(&pattern, str_randominput, -1, &status);
          REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, status) == TRUE);
          REGEX_CHECK_STATUS;
@@ -2884,13 +2884,13 @@ void RegexTest::API_Pattern_UTF8() {
          utext_openUTF8(&pattern, str_u, -1, &status);
          REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE);
          REGEX_CHECK_STATUS;
-        
+
          utext_openUTF8(&input, str_abc, -1, &status);
          utext_openUTF8(&pattern, str_abc, -1, &status);
          status = U_INDEX_OUTOFBOUNDS_ERROR;
          REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE);
          REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
-        
+
          utext_close(&input);
          utext_close(&pattern);
      }
@@ -3054,6 +3054,37 @@ void RegexTest::API_Pattern_UTF8() {
      delete pat1;
  
  
+    //
+    // split of a UText based string, with library allocating output UTexts.
+    //
+    {
+        status = U_ZERO_ERROR;
+        RegexMatcher matcher(UnicodeString("(:)"), 0, status);
+        UnicodeString stringToSplit("first:second:third");
+        UText *textToSplit = utext_openUnicodeString(NULL, &stringToSplit, &status);
+        REGEX_CHECK_STATUS;
+
+        UText *splits[10] = {NULL};
+        int32_t numFields = matcher.split(textToSplit, splits, UPRV_LENGTHOF(splits), status);
+        REGEX_CHECK_STATUS;
+        REGEX_ASSERT(numFields == 5);
+        REGEX_ASSERT_UTEXT_INVARIANT("first", splits[0]);
+        REGEX_ASSERT_UTEXT_INVARIANT(":", splits[1]);
+        REGEX_ASSERT_UTEXT_INVARIANT("second", splits[2]);
+        REGEX_ASSERT_UTEXT_INVARIANT(":", splits[3]);
+        REGEX_ASSERT_UTEXT_INVARIANT("third", splits[4]);
+        REGEX_ASSERT(splits[5] == NULL);
+
+        for (int i=0; i<UPRV_LENGTHOF(splits); i++) {
+            if (splits[i]) {
+                utext_close(splits[i]);
+                splits[i] = NULL;
+            }
+        }
+        utext_close(textToSplit);
+    }
+
+
      //
      // RegexPattern::pattern() and patternText()
      //
@@ -3065,7 +3096,7 @@ void RegexTest::API_Pattern_UTF8() {
      regextst_openUTF8FromInvariant(&re1, helloWorldInvariant, -1, &status);
      pat1 = RegexPattern::compile(&re1, pe, status);
      REGEX_CHECK_STATUS;
-    REGEX_ASSERT_UNISTR(pat1->pattern(),"(Hello, world)*");
+    REGEX_ASSERT_UNISTR("(Hello, world)*", pat1->pattern());
      REGEX_ASSERT_UTEXT_INVARIANT("(Hello, world)*", pat1->patternText(status));
      delete pat1;
  
@@ -3131,7 +3162,7 @@ void RegexTest::Extended() {
      UnicodeString   matchString;   // The marked up string to be used as input
  
      if (U_FAILURE(status)){
-        dataerrln("Construct RegexMatcher() error.");
+        dataerrln("Construct RegexMatcher() error - %s", u_errorName(status));
          delete [] testData;
          return;
      }
@@ -3281,7 +3312,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
                             int32_t line) {
      UnicodeString       unEscapedInput;
      UnicodeString       deTaggedInput;
-    
+
      int32_t             patternUTF8Length,      inputUTF8Length;
      char                *patternChars  = NULL, *inputChars = NULL;
      UText               patternText    = UTEXT_INITIALIZER;
@@ -3308,7 +3339,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
      int32_t             regionEnd        = -1;
      int32_t             regionStartUTF8  = -1;
      int32_t             regionEndUTF8    = -1;
-    
+
  
      //
      //  Compile the caller's pattern
@@ -3326,7 +3357,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
      if (flags.indexOf((UChar)0x6d) >= 0)  { // 'm' flag
          bflags |= UREGEX_MULTILINE;
      }
-    
+
      if (flags.indexOf((UChar)0x65) >= 0) { // 'e' flag
          bflags |= UREGEX_ERROR_ON_UNKNOWN_ESCAPES;
      }
@@ -3362,16 +3393,16 @@ void RegexTest::regex_find(const UnicodeString &pattern,
  
      UTF8Converter = ucnv_open("UTF8", &status);
      ucnv_setFromUCallBack(UTF8Converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
-    
+
      patternUTF8Length = pattern.extract(NULL, 0, UTF8Converter, status);
      status = U_ZERO_ERROR; // buffer overflow
      patternChars = new char[patternUTF8Length+1];
      pattern.extract(patternChars, patternUTF8Length+1, UTF8Converter, status);
      utext_openUTF8(&patternText, patternChars, patternUTF8Length, &status);
-    
+
      if (status == U_ZERO_ERROR) {
          UTF8Pattern = RegexPattern::compile(&patternText, bflags, pe, status);
-        
+
          if (status != U_ZERO_ERROR) {
  #if UCONFIG_NO_BREAK_ITERATION==1
              // 'v' test flag means that the test pattern should not compile if ICU was configured
@@ -3393,7 +3424,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
              }
          }
      }
-    
+
      if (UTF8Pattern == NULL) {
          // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine
          logln("Unable to create UTF-8 pattern, skipping UTF-8 tests for %s:%d", srcPath, line);
@@ -3401,7 +3432,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
      }
  
      if (flags.indexOf((UChar)0x64) >= 0) {  // 'd' flag
-        RegexPatternDump(callerPattern);
+        callerPattern->dumpPattern();
      }
  
      if (flags.indexOf((UChar)0x45) >= 0) {  // 'E' flag
@@ -3423,7 +3454,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
              numFinds = i;
          }
      }
-    
+
      // 'M' flag.  Use matches() instead of find()
      if (flags.indexOf((UChar)0x4d) >= 0) {
          useMatchesFunc = TRUE;
@@ -3478,7 +3509,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
      if (flags.indexOf((UChar)0x74) >= 0) {   //  't' trace flag
          matcher->setTrace(TRUE);
      }
-    
+
      if (UTF8Pattern != NULL) {
          inputUTF8Length = deTaggedInput.extract(NULL, 0, UTF8Converter, status);
          status = U_ZERO_ERROR; // buffer overflow
@@ -3490,10 +3521,10 @@ void RegexTest::regex_find(const UnicodeString &pattern,
              UTF8Matcher = &UTF8Pattern->matcher(status)->reset(&inputText);
              REGEX_CHECK_STATUS_L(line);
          }
-        
+
          if (UTF8Matcher == NULL) {
              // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine
-          logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d", srcPath, line);
+            logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d", srcPath, line);
              status = U_ZERO_ERROR;
          }
      }
@@ -3502,9 +3533,12 @@ void RegexTest::regex_find(const UnicodeString &pattern,
      //  Generate native indices for UTF8 versions of region and capture group info
      //
      if (UTF8Matcher != NULL) {
+        if (flags.indexOf((UChar)0x74) >= 0) {   //  't' trace flag
+            UTF8Matcher->setTrace(TRUE);
+        }
          if (regionStart>=0)    (void) utextOffsetToNative(&inputText, regionStart, regionStartUTF8);
          if (regionEnd>=0)      (void) utextOffsetToNative(&inputText, regionEnd, regionEndUTF8);
-       
+
          //  Fill out the native index UVector info.
          //  Only need 1 loop, from above we know groupStarts.size() = groupEnds.size()
          for (i=0; i<groupStarts.size(); i++) {
@@ -3519,7 +3553,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
                  }
                  setInt(groupStartsUTF8, startUTF8, i);
              }
-            
+
              int32_t  end = groupEnds.elementAti(i);
              //  -1 means there was no UVector slot and we won't be requesting that capture group for this test, don't bother inserting
              if (end >= 0) {
@@ -3554,8 +3588,8 @@ void RegexTest::regex_find(const UnicodeString &pattern,
              UTF8Matcher->useTransparentBounds(TRUE);
          }
      }
-    
-    
+
+
  
      //
      // Do a find on the de-tagged input using the caller's pattern
@@ -3581,6 +3615,12 @@ void RegexTest::regex_find(const UnicodeString &pattern,
          }
      }
      matcher->setTrace(FALSE);
+    if (UTF8Matcher) {
+        UTF8Matcher->setTrace(FALSE);
+    }
+    if (U_FAILURE(status)) {
+        errln("Error at line %d. ICU ErrorCode is %s", u_errorName(status));
+    }
  
      //
      // Match up the groups from the find() with the groups from the tags
@@ -3599,16 +3639,17 @@ void RegexTest::regex_find(const UnicodeString &pattern,
          failed = TRUE;
          goto cleanupAndReturn;
      }
+    if (isMatch && groupStarts.size() == 0) {
+        errln("Error at line %d: No match expected, but one found at position %d.", line, matcher->start(status));
+        failed = TRUE;
+    }
+    if (UTF8Matcher && isUTF8Match && groupStarts.size() == 0) {
+        errln("Error at line %d: No match expected, but one found at position %d (UTF-8).", line, UTF8Matcher->start(status));
+        failed = TRUE;
+    }
  
      if (flags.indexOf((UChar)0x47 /*G*/) >= 0) {
          // Only check for match / no match.  Don't check capture groups.
-        if (isMatch && groupStarts.size() == 0) {
-            errln("Error at line %d:  No match expected, but one found.", line);
-            failed = TRUE;
-        } else if (UTF8Matcher != NULL && isUTF8Match && groupStarts.size() == 0) {
-            errln("Error at line %d:  No match expected, but one found. (UTF8)", line);
-            failed = TRUE;
-        }
          goto cleanupAndReturn;
      }
  
@@ -3627,7 +3668,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
              failed = TRUE;
              goto cleanupAndReturn;  // Good chance of subsequent bogus errors.  Stop now.
          }
-        
+
          int32_t  expectedEnd = (i >= groupEnds.size()? -1 : groupEnds.elementAti(i));
          int32_t  expectedEndUTF8 = (i >= groupEndsUTF8.size()? -1 : groupEndsUTF8.elementAti(i));
          if (matcher->end(i, status) != expectedEnd) {
@@ -3664,7 +3705,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
          errln("Error at line %d: requireEnd() returned TRUE.  Expected FALSE (UTF8)", line);
          failed = TRUE;
      }
-    
+
      if ((flags.indexOf((UChar)0x79) >= 0) &&   //  'y' flag:  RequireEnd() == true
          matcher->requireEnd() == FALSE) {
          errln("Error at line %d: requireEnd() returned FALSE.  Expected TRUE", line);
@@ -3674,7 +3715,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
          errln("Error at line %d: requireEnd() returned FALSE.  Expected TRUE (UTF8)", line);
          failed = TRUE;
      }
-    
+
      if ((flags.indexOf((UChar)0x5A) >= 0) &&   //  'Z' flag:  hitEnd() == false
          matcher->hitEnd() == TRUE) {
          errln("Error at line %d: hitEnd() returned TRUE.  Expected FALSE", line);
@@ -3684,7 +3725,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
          errln("Error at line %d: hitEnd() returned TRUE.  Expected FALSE (UTF8)", line);
          failed = TRUE;
      }
-    
+
      if ((flags.indexOf((UChar)0x7A) >= 0) &&   //  'z' flag:  hitEnd() == true
          matcher->hitEnd() == FALSE) {
          errln("Error at line %d: hitEnd() returned FALSE.  Expected TRUE", line);
@@ -3708,7 +3749,7 @@ cleanupAndReturn:
      delete UTF8Pattern;
      delete matcher;
      delete callerPattern;
-    
+
      utext_close(&inputText);
      delete[] inputChars;
      utext_close(&patternText);
@@ -3767,7 +3808,7 @@ void RegexTest::Errors() {
      REGEX_ERR("abc{1,,2}",1,7, U_REGEX_BAD_INTERVAL);
      REGEX_ERR("abc{1,2a}",1,8, U_REGEX_BAD_INTERVAL);
      REGEX_ERR("abc{222222222222222222222}",1,14, U_REGEX_NUMBER_TOO_BIG);
-    REGEX_ERR("abc{5,50000000000}", 1, 17, U_REGEX_NUMBER_TOO_BIG);        // Overflows int during scan
+    REGEX_ERR("abc{5,50000000000}", 1, 16, U_REGEX_NUMBER_TOO_BIG);        // Overflows int during scan
      REGEX_ERR("abc{5,687865858}", 1, 16, U_REGEX_NUMBER_TOO_BIG);          // Overflows regex binary format
      REGEX_ERR("abc{687865858,687865859}", 1, 24, U_REGEX_NUMBER_TOO_BIG);
  
@@ -3784,7 +3825,7 @@ void RegexTest::Errors() {
  
  
  //-------------------------------------------------------------------------------
-//      
+//
  //  Read a text data file, convert it to UChars, and return the data
  //    in one big UChar * buffer, which the caller must delete.
  //
@@ -4127,7 +4168,7 @@ void RegexTest::PerlTests() {
                  lineNum, expected?"":"no ", found?"":"no " );
              continue;
          }
-        
+
          // Don't try to check expected results if there is no match.
          //   (Some have stuff in the expected fields)
          if (!found) {
@@ -4425,7 +4466,7 @@ void RegexTest::PerlTestsUTF8() {
          if (flagStr.indexOf(UChar_x) != -1) {
              flags |= UREGEX_COMMENTS;
          }
-        
+
          //
          // Put the pattern in a UTF-8 UText
          //
@@ -4522,7 +4563,7 @@ void RegexTest::PerlTestsUTF8() {
                  lineNum, expected?"":"no ", found?"":"no " );
              continue;
          }
-        
+
          // Don't try to check expected results if there is no match.
          //   (Some have stuff in the expected fields)
          if (!found) {
@@ -4665,10 +4706,10 @@ void RegexTest::PerlTestsUTF8() {
  
      delete fieldPat;
      delete [] testData;
-    
+
      utext_close(&patternText);
      utext_close(&inputText);
-    
+
      delete [] patternChars;
      delete [] inputChars;
  
@@ -4682,13 +4723,14 @@ void RegexTest::PerlTestsUTF8() {
  //
  //  Bug6149   Verify limits to heap expansion for backtrack stack.
  //             Use this pattern,
-//                 "(a?){1,}"
-//             The zero-length match will repeat forever.
-//                (That this goes into a loop is another bug)
+//                 "(a?){1,8000000}"
+//             Note: was an unbounded upperbounds, but that now has loop-breaking enabled.
+//                   This test is likely to be fragile, as further optimizations stop
+//                   more cases of pointless looping in the match engine.
  //
  //---------------------------------------------------------------
  void RegexTest::Bug6149() {
-    UnicodeString pattern("(a?){1,}");
+    UnicodeString pattern("(a?){1,8000000}");
      UnicodeString s("xyz");
      uint32_t flags = 0;
      UErrorCode status = U_ZERO_ERROR;
@@ -4731,12 +4773,12 @@ U_CDECL_END
  void RegexTest::Callbacks() {
     {
          // Getter returns NULLs if no callback has been set
-        
+
          //   The variables that the getter will fill in.
          //   Init to non-null values so that the action of the getter can be seen.
          const void          *returnedContext = &returnedContext;
          URegexMatchCallback *returnedFn = &testCallBackFn;
-        
+
          UErrorCode status = U_ZERO_ERROR;
          RegexMatcher matcher("x", 0, status);
          REGEX_CHECK_STATUS;
@@ -4745,7 +4787,7 @@ void RegexTest::Callbacks() {
          REGEX_ASSERT(returnedFn == NULL);
          REGEX_ASSERT(returnedContext == NULL);
      }
-    
+
     {
          // Set and Get work
          callBackContext cbInfo = {this, 0, 0, 0};
@@ -4760,7 +4802,7 @@ void RegexTest::Callbacks() {
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(returnedFn == testCallBackFn);
          REGEX_ASSERT(returnedContext == &cbInfo);
-        
+
          // A short-running match shouldn't invoke the callback
          status = U_ZERO_ERROR;
          cbInfo.reset(1);
@@ -4769,7 +4811,7 @@ void RegexTest::Callbacks() {
          REGEX_ASSERT(matcher.matches(status));
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(cbInfo.numCalls == 0);
-        
+
          // A medium-length match that runs long enough to invoke the
          //   callback, but not so long that the callback aborts it.
          status = U_ZERO_ERROR;
@@ -4779,7 +4821,7 @@ void RegexTest::Callbacks() {
          REGEX_ASSERT(matcher.matches(status)==FALSE);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(cbInfo.numCalls > 0);
-        
+
          // A longer running match that the callback function will abort.
          status = U_ZERO_ERROR;
          cbInfo.reset(4);
@@ -4788,8 +4830,17 @@ void RegexTest::Callbacks() {
          REGEX_ASSERT(matcher.matches(status)==FALSE);
          REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
          REGEX_ASSERT(cbInfo.numCalls == 4);
+
+        // A longer running find that the callback function will abort.
+        status = U_ZERO_ERROR;
+        cbInfo.reset(4);
+        s = "aaaaaaaaaaaaaaaaaaaaaaab";
+        matcher.reset(s);
+        REGEX_ASSERT(matcher.find(status)==FALSE);
+        REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
+        REGEX_ASSERT(cbInfo.numCalls == 4);
      }
- 
+
  
  }
  
@@ -4809,6 +4860,9 @@ struct progressCallBackContext {
      void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;};
  };
  
+// call-back function for find().
+// Return TRUE to continue the find().
+// Return FALSE to stop the find().
  U_CDECL_BEGIN
  static UBool U_CALLCONV
  testProgressCallBackFn(const void *context, int64_t matchIndex) {
@@ -4823,12 +4877,12 @@ U_CDECL_END
  void RegexTest::FindProgressCallbacks() {
     {
          // Getter returns NULLs if no callback has been set
-        
+
          //   The variables that the getter will fill in.
          //   Init to non-null values so that the action of the getter can be seen.
          const void                  *returnedContext = &returnedContext;
          URegexFindProgressCallback  *returnedFn = &testProgressCallBackFn;
-        
+
          UErrorCode status = U_ZERO_ERROR;
          RegexMatcher matcher("x", 0, status);
          REGEX_CHECK_STATUS;
@@ -4837,14 +4891,14 @@ void RegexTest::FindProgressCallbacks() {
          REGEX_ASSERT(returnedFn == NULL);
          REGEX_ASSERT(returnedContext == NULL);
      }
-    
+
     {
          // Set and Get work
          progressCallBackContext cbInfo = {this, 0, 0, 0};
          const void                  *returnedContext;
          URegexFindProgressCallback  *returnedFn;
          UErrorCode status = U_ZERO_ERROR;
-        RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status);  // A pattern that can run long.
+        RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)\\2)x"), 0, status);
          REGEX_CHECK_STATUS;
          matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status);
          REGEX_CHECK_STATUS;
@@ -4852,11 +4906,11 @@ void RegexTest::FindProgressCallbacks() {
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(returnedFn == testProgressCallBackFn);
          REGEX_ASSERT(returnedContext == &cbInfo);
-        
-        // A short-running match should NOT invoke the callback.
+
+        // A find that matches on the initial position does NOT invoke the callback.
          status = U_ZERO_ERROR;
          cbInfo.reset(100);
-        UnicodeString s = "abxxx";
+        UnicodeString s = "aaxxx";
          matcher.reset(s);
  #if 0
          matcher.setTrace(TRUE);
@@ -4864,8 +4918,9 @@ void RegexTest::FindProgressCallbacks() {
          REGEX_ASSERT(matcher.find(0, status));
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(cbInfo.numCalls == 0);
-        
-        // A medium running match that causes matcher.find() to invoke our callback for each index.
+
+        // A medium running find() that causes matcher.find() to invoke our callback for each index,
+        //   but not so many times that we interrupt the operation.
          status = U_ZERO_ERROR;
          s = "aaaaaaaaaaaaaaaaaaab";
          cbInfo.reset(s.length()); //  Some upper limit for number of calls that is greater than size of our input string
@@ -4873,31 +4928,30 @@ void RegexTest::FindProgressCallbacks() {
          REGEX_ASSERT(matcher.find(0, status)==FALSE);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(cbInfo.numCalls > 0 && cbInfo.numCalls < 25);
-        
+
          // A longer running match that causes matcher.find() to invoke our callback which we cancel/interrupt at some point.
          status = U_ZERO_ERROR;
          UnicodeString s1 = "aaaaaaaaaaaaaaaaaaaaaaab";
          cbInfo.reset(s1.length() - 5); //  Bail early somewhere near the end of input string
          matcher.reset(s1);
          REGEX_ASSERT(matcher.find(0, status)==FALSE);
-        REGEX_CHECK_STATUS;
+        REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
          REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5);
  
-#if 0
          // Now a match that will succeed, but after an interruption
          status = U_ZERO_ERROR;
          UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx";
          cbInfo.reset(s2.length() - 10); //  Bail early somewhere near the end of input string
          matcher.reset(s2);
          REGEX_ASSERT(matcher.find(0, status)==FALSE);
-        REGEX_CHECK_STATUS;
+        REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
          // Now retry the match from where left off
          cbInfo.maxCalls = 100; //  No callback limit
+        status = U_ZERO_ERROR;
          REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status));
          REGEX_CHECK_STATUS;
-#endif
      }
- 
+
  
  }
  
@@ -4916,7 +4970,7 @@ void RegexTest::PreAllocatedUTextCAPI () {
      UText                patternText = UTEXT_INITIALIZER;
      UnicodeString        buffer;
      UText                bufferText = UTEXT_INITIALIZER;
-    
+
      utext_openUnicodeString(&bufferText, &buffer, &status);
  
      /*
@@ -4933,7 +4987,7 @@ void RegexTest::PreAllocatedUTextCAPI () {
          regextst_openUTF8FromInvariant(&text2, "abcccxd", -1, &status);
          u_uastrncpy(text2Chars, "abcccxd", sizeof(text2)/2);
          utext_openUChars(&text2, text2Chars, -1, &status);
-        
+
          regextst_openUTF8FromInvariant(&patternText, "abc*d", -1, &status);
          re = uregex_openUText(&patternText, 0, NULL, &status);
  
@@ -4945,7 +4999,7 @@ void RegexTest::PreAllocatedUTextCAPI () {
          utext_setNativeIndex(resultText, 0);
          utext_setNativeIndex(&text1, 0);
          REGEX_ASSERT(testUTextEqual(resultText, &text1));
-        
+
          resultText = uregex_getUText(re, &bufferText, &status);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(resultText == &bufferText);
@@ -4961,7 +5015,7 @@ void RegexTest::PreAllocatedUTextCAPI () {
          utext_setNativeIndex(resultText, 0);
          utext_setNativeIndex(&text2, 0);
          REGEX_ASSERT(testUTextEqual(resultText, &text2));
-        
+
          uregex_close(re);
          utext_close(&text1);
          utext_close(&text2);
@@ -4974,7 +5028,11 @@ void RegexTest::PreAllocatedUTextCAPI () {
          UChar    text1[80];
          UText   *actual;
          UBool    result;
-        u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
+        int64_t  length = 0;
+
+        u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
+        //                  012345678901234567890123456789012345678901234567
+        //                  0         1         2         3         4
  
          status = U_ZERO_ERROR;
          re = uregex_openC("abc(.*?)def", 0, NULL, &status);
@@ -4984,30 +5042,33 @@ void RegexTest::PreAllocatedUTextCAPI () {
          result = uregex_find(re, 0, &status);
          REGEX_ASSERT(result==TRUE);
  
-        /*  Capture Group 0, the full match.  Should succeed.  */
+        /*  Capture Group 0, the full match.  Should succeed. "abc interior def" */
          status = U_ZERO_ERROR;
-        actual = uregex_groupUTextDeep(re, 0, &bufferText, &status);
+        actual = uregex_groupUText(re, 0, &bufferText, &length, &status);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(actual == &bufferText);
-        REGEX_ASSERT_UTEXT_INVARIANT("abc interior def", actual);
+        REGEX_ASSERT(utext_getNativeIndex(actual) == 6);
+        REGEX_ASSERT(length == 16);
+        REGEX_ASSERT(utext_nativeLength(actual) == 47);
  
-        /*  Capture group #1.  Should succeed. */
+        /*  Capture group #1.  Should succeed, matching " interior ". */
          status = U_ZERO_ERROR;
-        actual = uregex_groupUTextDeep(re, 1, &bufferText, &status);
+        actual = uregex_groupUText(re, 1, &bufferText, &length, &status);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(actual == &bufferText);
-        REGEX_ASSERT_UTEXT_INVARIANT(" interior ", actual);
+        REGEX_ASSERT(utext_getNativeIndex(actual) == 9);   // position of " interior "
+        REGEX_ASSERT(length == 10);
+        REGEX_ASSERT(utext_nativeLength(actual) == 47);
  
          /*  Capture group out of range.  Error. */
          status = U_ZERO_ERROR;
-        actual = uregex_groupUTextDeep(re, 2, &bufferText, &status);
+        actual = uregex_groupUText(re, 2, &bufferText, &length, &status);
          REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
          REGEX_ASSERT(actual == &bufferText);
-
          uregex_close(re);
  
      }
-    
+
      /*
       *  replaceFirst()
       */
@@ -5016,10 +5077,12 @@ void RegexTest::PreAllocatedUTextCAPI () {
          UChar    text2[80];
          UText    replText = UTEXT_INITIALIZER;
          UText   *result;
-        
          status = U_ZERO_ERROR;
-        u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
-        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
+        utext_openUnicodeString(&bufferText, &buffer, &status);
+
+        status = U_ZERO_ERROR;
+        u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
+        u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2)/2);
          regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status);
  
          re = uregex_openC("x(.*?)x", 0, NULL, &status);
@@ -5027,7 +5090,9 @@ void RegexTest::PreAllocatedUTextCAPI () {
  
          /*  Normal case, with match */
          uregex_setText(re, text1, -1, &status);
+        REGEX_CHECK_STATUS;
          utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
+        REGEX_CHECK_STATUS;
          result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(result == &bufferText);
@@ -5040,10 +5105,10 @@ void RegexTest::PreAllocatedUTextCAPI () {
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(result == &bufferText);
          REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result);
-        
+
          /* Unicode escapes */
          uregex_setText(re, text1, -1, &status);
-        regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a", -1, &status);
+        regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042\\$\\a", -1, &status);
          utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
          result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);
          REGEX_CHECK_STATUS;
@@ -5097,11 +5162,281 @@ void RegexTest::PreAllocatedUTextCAPI () {
       *  splitUText() uses the C++ API directly, and the UnicodeString version uses mutable UTexts,
       *   so we don't need to test it here.
       */
-    
+
      utext_close(&bufferText);
      utext_close(&patternText);
  }
  
+
+//--------------------------------------------------------------
+//
+//  NamedCapture   Check basic named capture group functionality
+//
+//--------------------------------------------------------------
+void RegexTest::NamedCapture() {
+    UErrorCode status = U_ZERO_ERROR;
+    RegexPattern *pat = RegexPattern::compile(UnicodeString(
+            "abc()()(?<three>xyz)(de)(?<five>hmm)(?<six>oh)f\\k<five>"), 0, status);
+    REGEX_CHECK_STATUS;
+    int32_t group = pat->groupNumberFromName("five", -1, status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(5 == group);
+    group = pat->groupNumberFromName("three", -1, status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(3 == group);
+
+    status = U_ZERO_ERROR;
+    group = pat->groupNumberFromName(UnicodeString("six"), status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(6 == group);
+
+    status = U_ZERO_ERROR;
+    group = pat->groupNumberFromName(UnicodeString("nosuch"), status);
+    U_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);
+
+    status = U_ZERO_ERROR;
+
+    // After copying a pattern, named capture should still work in the copy.
+    RegexPattern *copiedPat = new RegexPattern(*pat);
+    REGEX_ASSERT(*copiedPat == *pat);
+    delete pat; pat = NULL;  // Delete original, copy should have no references back to it.
+
+    group = copiedPat->groupNumberFromName("five", -1, status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(5 == group);
+    group = copiedPat->groupNumberFromName("three", -1, status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(3 == group);
+    delete copiedPat;
+
+    // ReplaceAll with named capture group.
+    status = U_ZERO_ERROR;
+    UnicodeString text("Substitution of <<quotes>> for <<double brackets>>");
+    RegexMatcher *m = new RegexMatcher(UnicodeString("<<(?<mid>.+?)>>"), text, 0, status);
+    REGEX_CHECK_STATUS;
+    // m.pattern().dumpPattern();
+    UnicodeString replacedText = m->replaceAll("'${mid}'", status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(UnicodeString("Substitution of 'quotes' for 'double brackets'") == replacedText);
+    delete m;
+
+    // ReplaceAll, allowed capture group numbers.
+    text = UnicodeString("abcmxyz");
+    m = new RegexMatcher(UnicodeString("..(?<one>m)(.)(.)"), text, 0, status);
+    REGEX_CHECK_STATUS;
+
+    status = U_ZERO_ERROR;
+    replacedText  = m->replaceAll(UnicodeString("<$0>"), status);   // group 0, full match, is allowed.
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(UnicodeString("a<bcmxy>z") == replacedText);
+
+    status = U_ZERO_ERROR;
+    replacedText  = m->replaceAll(UnicodeString("<$1>"), status);      // group 1 by number.
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(UnicodeString("a<m>z") == replacedText);
+
+    status = U_ZERO_ERROR;
+    replacedText  = m->replaceAll(UnicodeString("<${one}>"), status);   // group 1 by name.
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(UnicodeString("a<m>z") == replacedText);
+
+    status = U_ZERO_ERROR;
+    replacedText  = m->replaceAll(UnicodeString("<$2>"), status);   // group 2.
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(UnicodeString("a<x>z") == replacedText);
+
+    status = U_ZERO_ERROR;
+    replacedText  = m->replaceAll(UnicodeString("<$3>"), status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(UnicodeString("a<y>z") == replacedText);
+
+    status = U_ZERO_ERROR;
+    replacedText  = m->replaceAll(UnicodeString("<$4>"), status);
+    REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
+
+    status = U_ZERO_ERROR;
+    replacedText  = m->replaceAll(UnicodeString("<$04>"), status);      // group 0, leading 0,
+    REGEX_CHECK_STATUS;                                                 //    trailing out-of-range 4 passes through.
+    REGEX_ASSERT(UnicodeString("a<bcmxy4>z") == replacedText);
+
+    status = U_ZERO_ERROR;
+    replacedText  = m->replaceAll(UnicodeString("<$000016>"), status);  // Consume leading zeroes. Don't consume digits
+    REGEX_CHECK_STATUS;                                                 //   that push group num out of range.
+    REGEX_ASSERT(UnicodeString("a<m6>z") == replacedText);              //   This is group 1.
+
+    status = U_ZERO_ERROR;
+    replacedText  = m->replaceAll(UnicodeString("<$3$2$1${one}>"), status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(UnicodeString("a<yxmm>z") == replacedText);
+
+    status = U_ZERO_ERROR;
+    replacedText  = m->replaceAll(UnicodeString("$3$2$1${one}"), status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(UnicodeString("ayxmmz") == replacedText);
+
+    status = U_ZERO_ERROR;
+    replacedText  = m->replaceAll(UnicodeString("<${noSuchName}>"), status);
+    REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);
+
+    status = U_ZERO_ERROR;
+    replacedText  = m->replaceAll(UnicodeString("<${invalid-name}>"), status);
+    REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);
+
+    status = U_ZERO_ERROR;
+    replacedText  = m->replaceAll(UnicodeString("<${one"), status);
+    REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);
+
+    status = U_ZERO_ERROR;
+    replacedText  = m->replaceAll(UnicodeString("$not a capture group"), status);
+    REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);
+
+    delete m;
+
+    // Repeat the above replaceAll() tests using the plain C API, which
+    //  has a separate implementation internally.
+    //  TODO: factor out the test data.
+
+    status = U_ZERO_ERROR;
+    URegularExpression *re = uregex_openC("..(?<one>m)(.)(.)", 0, NULL, &status);
+    REGEX_CHECK_STATUS;
+    text = UnicodeString("abcmxyz");
+    uregex_setText(re, text.getBuffer(), text.length(), &status);
+    REGEX_CHECK_STATUS;
+
+    UChar resultBuf[100];
+    int32_t resultLength;
+    UnicodeString repl;
+
+    status = U_ZERO_ERROR;
+    repl = UnicodeString("<$0>");
+    resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(UnicodeString("a<bcmxy>z") == UnicodeString(resultBuf, resultLength));
+
+    status = U_ZERO_ERROR;
+    repl = UnicodeString("<$1>");
+    resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(UnicodeString("a<m>z") == UnicodeString(resultBuf, resultLength));
+
+    status = U_ZERO_ERROR;
+    repl = UnicodeString("<${one}>");
+    resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(UnicodeString("a<m>z") == UnicodeString(resultBuf, resultLength));
+
+    status = U_ZERO_ERROR;
+    repl = UnicodeString("<$2>");
+    resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(UnicodeString("a<x>z") == UnicodeString(resultBuf, resultLength));
+
+    status = U_ZERO_ERROR;
+    repl = UnicodeString("<$3>");
+    resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(UnicodeString("a<y>z") == UnicodeString(resultBuf, resultLength));
+
+    status = U_ZERO_ERROR;
+    repl = UnicodeString("<$4>");
+    resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+    REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
+
+    status = U_ZERO_ERROR;
+    repl = UnicodeString("<$04>");
+    resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(UnicodeString("a<bcmxy4>z") == UnicodeString(resultBuf, resultLength));
+
+    status = U_ZERO_ERROR;
+    repl = UnicodeString("<$000016>");
+    resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(UnicodeString("a<m6>z") == UnicodeString(resultBuf, resultLength));
+
+    status = U_ZERO_ERROR;
+    repl = UnicodeString("<$3$2$1${one}>");
+    resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(UnicodeString("a<yxmm>z") == UnicodeString(resultBuf, resultLength));
+
+    status = U_ZERO_ERROR;
+    repl = UnicodeString("$3$2$1${one}");
+    resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(UnicodeString("ayxmmz") == UnicodeString(resultBuf, resultLength));
+
+    status = U_ZERO_ERROR;
+    repl = UnicodeString("<${noSuchName}>");
+    resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+    REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);
+
+    status = U_ZERO_ERROR;
+    repl = UnicodeString("<${invalid-name}>");
+    resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+    REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);
+
+    status = U_ZERO_ERROR;
+    repl = UnicodeString("<${one");
+    resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+    REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);
+
+    status = U_ZERO_ERROR;
+    repl = UnicodeString("$not a capture group");
+    resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+    REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);
+
+    uregex_close(re);
+}
+
+//--------------------------------------------------------------
+//
+//  NamedCaptureLimits   Patterns with huge numbers of named capture groups.
+//                       The point is not so much what the exact limit is,
+//                       but that a largish number doesn't hit bad non-linear performance,
+//                       and that exceeding the limit fails cleanly.
+//
+//--------------------------------------------------------------
+void RegexTest::NamedCaptureLimits() {
+    if (quick) {
+        logln("Skipping test. Runs in exhuastive mode only.");
+        return;
+    }
+    const int32_t goodLimit = 1000000;     // Pattern w this many groups builds successfully.
+    const int32_t failLimit = 10000000;    // Pattern exceeds internal limits, fails to compile.
+    char nnbuf[100];
+    UnicodeString pattern;
+    int32_t nn;
+
+    for (nn=1; nn<goodLimit; nn++) {
+        sprintf(nnbuf, "(?<nn%d>)", nn);
+        pattern.append(UnicodeString(nnbuf, -1, US_INV));
+    }
+    UErrorCode status = U_ZERO_ERROR;
+    RegexPattern *pat = RegexPattern::compile(pattern, 0, status);
+    REGEX_CHECK_STATUS;
+    for (nn=1; nn<goodLimit; nn++) {
+        sprintf(nnbuf, "nn%d", nn);
+        int32_t groupNum = pat->groupNumberFromName(nnbuf, -1, status);
+        REGEX_ASSERT(nn == groupNum);
+        if (nn != groupNum) {
+            break;
+        }
+    }
+    delete pat;
+
+    pattern.remove();
+    for (nn=1; nn<failLimit; nn++) {
+        sprintf(nnbuf, "(?<nn%d>)", nn);
+        pattern.append(UnicodeString(nnbuf, -1, US_INV));
+    }
+    status = U_ZERO_ERROR;
+    pat = RegexPattern::compile(pattern, 0, status);
+    REGEX_ASSERT(status == U_REGEX_PATTERN_TOO_BIG);
+    delete pat;
+}
+
+
  //--------------------------------------------------------------
  //
  //  Bug7651   Regex pattern that exceeds default operator stack depth in matcher.
@@ -5172,7 +5507,7 @@ void RegexTest::Bug8479() {
          delete pMatcher;
      }
  }
-     
+
  
  // Bug 7029
  void RegexTest::Bug7029() {
@@ -5190,16 +5525,17 @@ void RegexTest::Bug7029() {
  
  // Bug 9283
  //   This test is checking for the existance of any supplemental characters that case-fold
-//   to a bmp character.  
+//   to a bmp character.
  //
-//   At the time of this writing there are none. If any should appear in a subsequent release 
-//   of Unicode, the code in regular expressions compilation that determines the longest 
-//   posssible match for a literal string  will need to be enhanced.  
+//   At the time of this writing there are none. If any should appear in a subsequent release
+//   of Unicode, the code in regular expressions compilation that determines the longest
+//   posssible match for a literal string  will need to be enhanced.
  //
  //   See file regexcmp.cpp, case URX_STRING_I in RegexCompile::maxMatchLength()
  //   for details on what to do in case of a failure of this test.
  //
  void RegexTest::Bug9283() {
+#if !UCONFIG_NO_NORMALIZATION
      UErrorCode status = U_ZERO_ERROR;
      UnicodeSet supplementalsWithCaseFolding("[[:CWCF:]&[\\U00010000-\\U0010FFFF]]", status);
      REGEX_CHECK_STATUS;
@@ -5213,6 +5549,7 @@ void RegexTest::Bug9283() {
          UnicodeString cf = UnicodeString(c).foldCase();
          REGEX_ASSERT(cf.length() >= 2);
      }
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
  }
  
  
@@ -5225,5 +5562,293 @@ void RegexTest::CheckInvBufSize() {
    }
  }
  
-#endif  /* !UCONFIG_NO_REGULAR_EXPRESSIONS  */
  
+void RegexTest::Bug10459() {
+    UErrorCode status = U_ZERO_ERROR;
+    UnicodeString patternString("(txt)");
+    UnicodeString txtString("txt");
+
+    UText *utext_pat = utext_openUnicodeString(NULL, &patternString, &status);
+    REGEX_CHECK_STATUS;
+    UText *utext_txt = utext_openUnicodeString(NULL, &txtString, &status);
+    REGEX_CHECK_STATUS;
+
+    URegularExpression *icu_re = uregex_openUText(utext_pat, 0, NULL, &status);
+    REGEX_CHECK_STATUS;
+
+    uregex_setUText(icu_re, utext_txt, &status);
+    REGEX_CHECK_STATUS;
+
+    // The bug was that calling uregex_group() before doing a matching operation
+    //   was causing a segfault. Only for Regular Expressions created from UText.
+    //   It should set an U_REGEX_INVALID_STATE.
+
+    UChar buf[100];
+    int32_t len = uregex_group(icu_re, 0, buf, UPRV_LENGTHOF(buf), &status);
+    REGEX_ASSERT(status == U_REGEX_INVALID_STATE);
+    REGEX_ASSERT(len == 0);
+
+    uregex_close(icu_re);
+    utext_close(utext_pat);
+    utext_close(utext_txt);
+}
+
+void RegexTest::TestCaseInsensitiveStarters() {
+    // Test that the data used by RegexCompile::findCaseInsensitiveStarters() hasn't
+    //  become stale because of new Unicode characters.
+    // If it is stale, rerun the generation tool
+    //    svn+ssh://source.icu-project.org/repos/icu/tools/trunk/unicode/c/genregexcasing
+    // and replace the embedded data in i18n/regexcmp.cpp
+
+    for (UChar32 cp=0; cp<=0x10ffff; cp++) {
+        if (!u_hasBinaryProperty(cp, UCHAR_CASE_SENSITIVE)) {
+            continue;
+        }
+        UnicodeSet s(cp, cp);
+        s.closeOver(USET_CASE_INSENSITIVE);
+        UnicodeSetIterator setIter(s);
+        while (setIter.next()) {
+            if (!setIter.isString()) {
+                continue;
+            }
+            const UnicodeString &str = setIter.getString();
+            UChar32 firstChar = str.char32At(0);
+            UnicodeSet starters;
+            RegexCompile::findCaseInsensitiveStarters(firstChar, &starters);
+            if (!starters.contains(cp)) {
+                errln("CaseInsensitiveStarters for \\u%x is missing character \\u%x.", cp, firstChar);
+                return;
+            }
+        }
+    }
+}
+
+
+void RegexTest::TestBug11049() {
+    // Original bug report: pattern with match start consisting of one of several individual characters,
+    //  and the text being matched ending with a supplementary character. find() would read past the
+    //  end of the input text when searching for potential match starting points.
+
+    // To see the problem, the text must exactly fill an allocated buffer, so that valgrind will
+    // detect the bad read.
+
+    TestCase11049("A|B|C", "a string \\ud800\\udc00", FALSE, __LINE__);
+    TestCase11049("A|B|C", "string matches at end C", TRUE, __LINE__);
+
+    // Test again with a pattern starting with a single character,
+    // which takes a different code path than starting with an OR expression,
+    // but with similar logic.
+    TestCase11049("C", "a string \\ud800\\udc00", FALSE, __LINE__);
+    TestCase11049("C", "string matches at end C", TRUE, __LINE__);
+}
+
+// Run a single test case from TestBug11049(). Internal function.
+void RegexTest::TestCase11049(const char *pattern, const char *data, UBool expectMatch, int32_t lineNumber) {
+    UErrorCode status = U_ZERO_ERROR;
+    UnicodeString patternString = UnicodeString(pattern).unescape();
+    LocalPointer<RegexPattern> compiledPat(RegexPattern::compile(patternString, 0, status));
+
+    UnicodeString dataString = UnicodeString(data).unescape();
+    UChar *exactBuffer = new UChar[dataString.length()];
+    dataString.extract(exactBuffer, dataString.length(), status);
+    UText *ut = utext_openUChars(NULL, exactBuffer, dataString.length(), &status);
+
+    LocalPointer<RegexMatcher> matcher(compiledPat->matcher(status));
+    REGEX_CHECK_STATUS;
+    matcher->reset(ut);
+    UBool result = matcher->find();
+    if (result != expectMatch) {
+        errln("File %s, line %d: expected %d, got %d. Pattern = \"%s\", text = \"%s\"",
+              __FILE__, lineNumber, expectMatch, result, pattern, data);
+    }
+
+    // Rerun test with UTF-8 input text. Won't see buffer overreads, but could see
+    //   off-by-one on find() with match at the last code point.
+    //   Size of the original char * data (invariant charset) will be <= than the equivalent UTF-8
+    //   because string.unescape() will only shrink it.
+    char * utf8Buffer = new char[uprv_strlen(data)+1];
+    u_strToUTF8(utf8Buffer, uprv_strlen(data)+1, NULL, dataString.getBuffer(), dataString.length(), &status);
+    REGEX_CHECK_STATUS;
+    ut = utext_openUTF8(ut, utf8Buffer, -1, &status);
+    REGEX_CHECK_STATUS;
+    matcher->reset(ut);
+    result = matcher->find();
+    if (result != expectMatch) {
+        errln("File %s, line %d (UTF-8 check): expected %d, got %d. Pattern = \"%s\", text = \"%s\"",
+              __FILE__, lineNumber, expectMatch, result, pattern, data);
+    }
+    delete [] utf8Buffer;
+
+    utext_close(ut);
+    delete [] exactBuffer;
+}
+
+
+void RegexTest::TestBug11371() {
+    if (quick) {
+        logln("Skipping test. Runs in exhuastive mode only.");
+        return;
+    }
+    UErrorCode status = U_ZERO_ERROR;
+    UnicodeString patternString;
+
+    for (int i=0; i<8000000; i++) {
+        patternString.append(UnicodeString("()"));
+    }
+    LocalPointer<RegexPattern> compiledPat(RegexPattern::compile(patternString, 0, status));
+    if (status != U_REGEX_PATTERN_TOO_BIG) {
+        errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.",
+              __FILE__, __LINE__, u_errorName(status));
+    }
+
+    status = U_ZERO_ERROR;
+    patternString = "(";
+    for (int i=0; i<20000000; i++) {
+        patternString.append(UnicodeString("A++"));
+    }
+    patternString.append(UnicodeString("){0}B++"));
+    LocalPointer<RegexPattern> compiledPat2(RegexPattern::compile(patternString, 0, status));
+    if (status != U_REGEX_PATTERN_TOO_BIG) {
+        errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.",
+              __FILE__, __LINE__, u_errorName(status));
+    }
+
+    // Pattern with too much string data, such that string indexes overflow operand data field size
+    // in compiled instruction.
+    status = U_ZERO_ERROR;
+    patternString = "";
+    while (patternString.length() < 0x00ffffff) {
+        patternString.append(UnicodeString("stuff and things dont you know, these are a few of my favorite strings\n"));
+    }
+    patternString.append(UnicodeString("X? trailing string"));
+    LocalPointer<RegexPattern> compiledPat3(RegexPattern::compile(patternString, 0, status));
+    if (status != U_REGEX_PATTERN_TOO_BIG) {
+        errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.",
+              __FILE__, __LINE__, u_errorName(status));
+    }
+}
+
+void RegexTest::TestBug11480() {
+    // C API, get capture group of a group that does not participate in the match.
+    //        (Returns a zero length string, with nul termination,
+    //         indistinguishable from a group with a zero length match.)
+
+    UErrorCode status = U_ZERO_ERROR;
+    URegularExpression *re = uregex_openC("(A)|(B)", 0, NULL, &status);
+    REGEX_CHECK_STATUS;
+    UnicodeString text = UNICODE_STRING_SIMPLE("A");
+    uregex_setText(re, text.getBuffer(), text.length(), &status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(uregex_lookingAt(re, 0, &status));
+    UChar buf[10] = {(UChar)13, (UChar)13, (UChar)13, (UChar)13};
+    int32_t length = uregex_group(re, 2, buf+1, UPRV_LENGTHOF(buf)-1, &status);
+    REGEX_ASSERT(length == 0);
+    REGEX_ASSERT(buf[0] == 13);
+    REGEX_ASSERT(buf[1] == 0);
+    REGEX_ASSERT(buf[2] == 13);
+    uregex_close(re);
+
+    // UText C++ API, length of match is 0 for non-participating matches.
+    UText ut = UTEXT_INITIALIZER;
+    utext_openUnicodeString(&ut, &text, &status);
+    RegexMatcher matcher(UnicodeString("(A)|(B)"), 0, status);
+    REGEX_CHECK_STATUS;
+    matcher.reset(&ut);
+    REGEX_ASSERT(matcher.lookingAt(0, status));
+
+    // UText C++ API, Capture group 1 matches "A", position 0, length 1.
+    int64_t groupLen = -666;
+    UText group = UTEXT_INITIALIZER;
+    matcher.group(1, &group, groupLen, status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(groupLen == 1);
+    REGEX_ASSERT(utext_getNativeIndex(&group) == 0);
+
+    // Capture group 2, the (B), does not participate in the match.
+    matcher.group(2, &group, groupLen, status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(groupLen == 0);
+    REGEX_ASSERT(matcher.start(2, status) == -1);
+    REGEX_CHECK_STATUS;
+}
+
+void RegexTest::TestBug12884() {
+    // setTimeLimit() was not effective for empty sub-patterns with large {minimum counts}
+    UnicodeString pattern(u"(((((((){120}){11}){11}){11}){80}){11}){4}");
+    UnicodeString text(u"hello");
+    UErrorCode status = U_ZERO_ERROR;
+    RegexMatcher m(pattern, text, 0, status);
+    REGEX_CHECK_STATUS;
+    m.setTimeLimit(5, status);
+    m.find(status);
+    REGEX_ASSERT(status == U_REGEX_TIME_OUT);
+
+    // Non-greedy loops. They take a different code path during matching.
+    UnicodeString ngPattern(u"(((((((){120}?){11}?){11}?){11}?){80}?){11}?){4}?");
+    status = U_ZERO_ERROR;
+    RegexMatcher ngM(ngPattern, text, 0, status);
+    REGEX_CHECK_STATUS;
+    ngM.setTimeLimit(5, status);
+    ngM.find(status);
+    REGEX_ASSERT(status == U_REGEX_TIME_OUT);
+
+    // UText, wrapping non-UTF-16 text, also takes a different execution path.
+    const char *text8 = u8"¿Qué es Unicode?  Unicode proporciona un número único para cada"
+                          "carácter, sin importar la plataforma, sin importar el programa,"
+                          "sin importar el idioma.";
+    status = U_ZERO_ERROR;
+    LocalUTextPointer ut(utext_openUTF8(NULL, text8, -1, &status));
+    REGEX_CHECK_STATUS;
+    m.reset(ut.getAlias());
+    m.find(status);
+    REGEX_ASSERT(status == U_REGEX_TIME_OUT);
+
+    status = U_ZERO_ERROR;
+    ngM.reset(ut.getAlias());
+    ngM.find(status);
+    REGEX_ASSERT(status == U_REGEX_TIME_OUT);
+}
+
+// Bug 13631. A find() of a pattern with a zero length look-behind assertions
+//            can cause a read past the end of the input text.
+//            The failure is seen when running this test with Clang's Addresss Sanitizer.
+
+void RegexTest::TestBug13631() {
+    const UChar *pats[] = { u"(?<!^)",
+                            u"(?<=^)",
+                            nullptr
+                          };
+    for (const UChar **pat=pats; *pat; ++pat) {
+        UErrorCode status = U_ZERO_ERROR;
+        UnicodeString upat(*pat);
+        RegexMatcher matcher(upat, 0, status);
+        const UChar s =u'a';
+        UText *ut = utext_openUChars(nullptr, &s, 1, &status);
+        REGEX_CHECK_STATUS;
+        matcher.reset(ut);
+        while (matcher.find()) {
+        }
+        utext_close(ut);
+    }
+}
+
+// Bug 13632 Out of bounds memory reference if a replacement string ends with a '$',
+//           where a following group specification would be expected.
+//           Failure shows when running the test under Clang's Address Sanitizer.
+
+void RegexTest::TestBug13632() {
+    UErrorCode status = U_ZERO_ERROR;
+    URegularExpression *re = uregex_openC(" ", 0, nullptr, &status);
+    const char16_t *sourceString = u"Hello, world.";
+    uregex_setText(re, sourceString, u_strlen(sourceString), &status);
+
+    const int32_t destCap = 20;
+    char16_t dest[destCap] = {};
+    const char16_t replacement[] = {u'x', u'$'};    // Not nul terminated string.
+    uregex_replaceAll(re, replacement, 2, dest, destCap, &status);
+
+    assertEquals("", U_REGEX_INVALID_CAPTURE_GROUP_NAME, status);
+    uregex_close(re);
+}
+
+#endif  /* !UCONFIG_NO_REGULAR_EXPRESSIONS  */