ICU-66108.tar.gz

[apple/icu.git] / icuSources / test / intltest / regextst.cpp
diff --git a/icuSources/test/intltest/regextst.cpp b/icuSources/test/intltest/regextst.cpp

index c488e0e25dbe7f8a28c6be150270f606cbf7854f..efec8fdfadc6921fea6e3f90c3e0498a88a0652f 100644 (file)
--- a/icuSources/test/intltest/regextst.cpp
+++ b/icuSources/test/intltest/regextst.cpp
@@ -1,6 +1,8 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
  /********************************************************************
   * COPYRIGHT:
- * Copyright (c) 2002-2015, International Business Machines Corporation and
+ * Copyright (c) 2002-2016, International Business Machines Corporation and
   * others. All Rights Reserved.
   ********************************************************************/
  
@@ -23,6 +25,10 @@
  #include "intltest.h"
  #if !UCONFIG_NO_REGULAR_EXPRESSIONS
  
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
  #include "unicode/localpointer.h"
  #include "unicode/regex.h"
  #include "unicode/uchar.h"
@@ -31,13 +37,13 @@
  #include "unicode/uregex.h"
  #include "unicode/usetiter.h"
  #include "unicode/ustring.h"
+#include "unicode/utext.h"
+#include "unicode/utf16.h"
+#include "cstr.h"
  #include "regextst.h"
  #include "regexcmp.h"
  #include "uvector.h"
  #include "util.h"
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
  #include "cmemory.h"
  #include "cstring.h"
  #include "uinvchar.h"
@@ -63,107 +69,47 @@ RegexTest::~RegexTest()
  void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
  {
      if (exec) logln("TestSuite RegexTest: ");
-    switch (index) {
-
-        case 0: name = "Basic";
-            if (exec) Basic();
-            break;
-        case 1: name = "API_Match";
-            if (exec) API_Match();
-            break;
-        case 2: name = "API_Replace";
-            if (exec) API_Replace();
-            break;
-        case 3: name = "API_Pattern";
-            if (exec) API_Pattern();
-            break;
-        case 4:
+    TESTCASE_AUTO_BEGIN;
+    TESTCASE_AUTO(Basic);
+    TESTCASE_AUTO(API_Match);
+    TESTCASE_AUTO(API_Replace);
+    TESTCASE_AUTO(API_Pattern);
  #if !UCONFIG_NO_FILE_IO
-            name = "Extended";
-            if (exec) Extended();
-#else
-            name = "skip";
+    TESTCASE_AUTO(Extended);
  #endif
-            break;
-        case 5: name = "Errors";
-            if (exec) Errors();
-            break;
-        case 6: name = "PerlTests";
-            if (exec) PerlTests();
-            break;
-        case 7: name = "Callbacks";
-            if (exec) Callbacks();
-            break;
-        case 8: name = "FindProgressCallbacks";
-            if (exec) FindProgressCallbacks();
-            break;
-        case 9: name = "Bug 6149";
-             if (exec) Bug6149();
-             break;
-        case 10: name = "UTextBasic";
-          if (exec) UTextBasic();
-          break;
-        case 11: name = "API_Match_UTF8";
-          if (exec) API_Match_UTF8();
-          break;
-        case 12: name = "API_Replace_UTF8";
-          if (exec) API_Replace_UTF8();
-          break;
-        case 13: name = "API_Pattern_UTF8";
-          if (exec) API_Pattern_UTF8();
-          break;
-        case 14: name = "PerlTestsUTF8";
-          if (exec) PerlTestsUTF8();
-          break;
-        case 15: name = "PreAllocatedUTextCAPI";
-          if (exec) PreAllocatedUTextCAPI();
-          break;
-        case 16: name = "Bug 7651";
-             if (exec) Bug7651();
-             break;
-        case 17: name = "Bug 7740";
-            if (exec) Bug7740();
-            break;
-        case 18: name = "Bug 8479";
-            if (exec) Bug8479();
-            break;
-        case 19: name = "Bug 7029";
-            if (exec) Bug7029();
-            break;
-        case 20: name = "CheckInvBufSize";
-            if (exec) CheckInvBufSize();
-            break;
-        case 21: name = "Bug 9283";
-            if (exec) Bug9283();
-            break;
-        case 22: name = "Bug10459";
-            if (exec) Bug10459();
-            break;
-        case 23: name = "TestCaseInsensitiveStarters";
-            if (exec) TestCaseInsensitiveStarters();
-            break;
-        case 24: name = "TestBug11049";
-            if (exec) TestBug11049();
-            break;
-        case 25: name = "TestBug11371";
-            if (exec) TestBug11371();
-            break;
-        case 26: name = "TestBug11480";
-            if (exec) TestBug11480();
-            break;
-        case 27: name = "NamedCapture";
-            if (exec) NamedCapture();
-            break;
-        case 28: name = "NamedCaptureLimits";
-            if (exec) NamedCaptureLimits();
-            break;
-        default: name = "";
-            break; //needed to end loop
-    }
+    TESTCASE_AUTO(Errors);
+    TESTCASE_AUTO(PerlTests);
+    TESTCASE_AUTO(Callbacks);
+    TESTCASE_AUTO(FindProgressCallbacks);
+    TESTCASE_AUTO(Bug6149);
+    TESTCASE_AUTO(UTextBasic);
+    TESTCASE_AUTO(API_Match_UTF8);
+    TESTCASE_AUTO(API_Replace_UTF8);
+    TESTCASE_AUTO(API_Pattern_UTF8);
+    TESTCASE_AUTO(PerlTestsUTF8);
+    TESTCASE_AUTO(PreAllocatedUTextCAPI);
+    TESTCASE_AUTO(Bug7651);
+    TESTCASE_AUTO(Bug7740);
+    TESTCASE_AUTO(Bug8479);
+    TESTCASE_AUTO(Bug7029);
+    TESTCASE_AUTO(CheckInvBufSize);
+    TESTCASE_AUTO(Bug9283);
+    TESTCASE_AUTO(Bug10459);
+    TESTCASE_AUTO(TestCaseInsensitiveStarters);
+    TESTCASE_AUTO(TestBug11049);
+    TESTCASE_AUTO(TestBug11371);
+    TESTCASE_AUTO(TestBug11480);
+    TESTCASE_AUTO(NamedCapture);
+    TESTCASE_AUTO(NamedCaptureLimits);
+    TESTCASE_AUTO(TestBug12884);
+    TESTCASE_AUTO(TestBug13631);
+    TESTCASE_AUTO(TestBug13632);
+    TESTCASE_AUTO(TestBug20359);
+    TESTCASE_AUTO(TestBug20863);
+    TESTCASE_AUTO_END;
  }
  
  
-
  /**
   * Calls utext_openUTF8 after, potentially, converting invariant text from the compilation codepage
   * into ASCII.
@@ -232,29 +178,56 @@ const char* RegexTest::extractToAssertBuf(const UnicodeString& message) {
    return ASSERT_BUF;
  }
  
-#define REGEX_VERBOSE_TEXT(text) {char buf[200];utextToPrintable(buf,sizeof(buf)/sizeof(buf[0]),text);logln("%s:%d: UText %s=\"%s\"", __FILE__, __LINE__, #text, buf);}
-
-#define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("%s:%d: RegexTest failure.  status=%s", \
-                                                              __FILE__, __LINE__, u_errorName(status)); return;}}
-
-#define REGEX_ASSERT(expr) {if ((expr)==FALSE) {errln("%s:%d: RegexTest failure: REGEX_ASSERT(%s) failed \n", __FILE__, __LINE__, #expr);};}
-
-#define REGEX_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr);\
-if (status!=errcode) {dataerrln("RegexTest failure at line %d.  Expected status=%s, got %s", \
-    __LINE__, u_errorName(errcode), u_errorName(status));};}
-
-#define REGEX_CHECK_STATUS_L(line) {if (U_FAILURE(status)) {errln( \
-    "RegexTest failure at line %d, from %d.  status=%d\n",__LINE__, (line), status); }}
-
-#define REGEX_ASSERT_L(expr, line) {if ((expr)==FALSE) { \
-    errln("RegexTest failure at line %d, from %d.", __LINE__, (line)); return;}}
+#define REGEX_VERBOSE_TEXT(text) UPRV_BLOCK_MACRO_BEGIN { \
+    char buf[200]; \
+    utextToPrintable(buf,UPRV_LENGTHOF(buf),text); \
+    logln("%s:%d: UText %s=\"%s\"", __FILE__, __LINE__, #text, buf); \
+} UPRV_BLOCK_MACRO_END
+
+#define REGEX_CHECK_STATUS UPRV_BLOCK_MACRO_BEGIN { \
+    if (U_FAILURE(status)) { \
+        dataerrln("%s:%d: RegexTest failure.  status=%s", \
+                  __FILE__, __LINE__, u_errorName(status)); \
+        return; \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+#define REGEX_ASSERT(expr) UPRV_BLOCK_MACRO_BEGIN { \
+    if ((expr)==FALSE) { \
+        errln("%s:%d: RegexTest failure: REGEX_ASSERT(%s) failed \n", __FILE__, __LINE__, #expr); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+#define REGEX_ASSERT_FAIL(expr, errcode) UPRV_BLOCK_MACRO_BEGIN { \
+    UErrorCode status=U_ZERO_ERROR; \
+    (expr); \
+    if (status!=errcode) { \
+        dataerrln("RegexTest failure at line %d.  Expected status=%s, got %s", \
+                  __LINE__, u_errorName(errcode), u_errorName(status)); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+#define REGEX_CHECK_STATUS_L(line) UPRV_BLOCK_MACRO_BEGIN { \
+    if (U_FAILURE(status)) { \
+        errln("RegexTest failure at line %d, from %d.  status=%d\n",__LINE__, (line), status); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+#define REGEX_ASSERT_L(expr, line) UPRV_BLOCK_MACRO_BEGIN { \
+    if ((expr)==FALSE) { \
+        errln("RegexTest failure at line %d, from %d.", __LINE__, (line)); \
+        return; \
+    } \
+} UPRV_BLOCK_MACRO_END
  
  // expected: const char * , restricted to invariant characters.
  // actual: const UnicodeString &
-#define REGEX_ASSERT_UNISTR(expected, actual) { \
+#define REGEX_ASSERT_UNISTR(expected, actual) UPRV_BLOCK_MACRO_BEGIN { \
      if (UnicodeString(expected, -1, US_INV) != (actual)) { \
          errln("%s:%d: RegexTest failure: REGEX_ASSERT_UNISTR(%s, %s) failed \n",  \
-                __FILE__, __LINE__, expected, extractToAssertBuf(actual));};}
+              __FILE__, __LINE__, expected, extractToAssertBuf(actual)); \
+    } \
+} UPRV_BLOCK_MACRO_END
  
  
  static UBool testUTextEqual(UText *uta, UText *utb) {
@@ -292,8 +265,8 @@ void RegexTest::assertUText(const char *expected, UText *actual, const char *fil
      if (!testUTextEqual(&expectedText, actual)) {
          char buf[201 /*21*/];
          char expectedBuf[201];
-        utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual);
-        utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0]), &expectedText);
+        utextToPrintable(buf, UPRV_LENGTHOF(buf), actual);
+        utextToPrintable(expectedBuf, UPRV_LENGTHOF(expectedBuf), &expectedText);
          errln("%s:%d: assertUText: Failure: expected \"%s\" (%d chars), got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expectedText), buf, (int)utext_nativeLength(actual));
      }
      utext_close(&expectedText);
@@ -314,8 +287,8 @@ void RegexTest::assertUTextInvariant(const char *expected, UText *actual, const
      if (!testUTextEqual(&expectedText, actual)) {
          char buf[201 /*21*/];
          char expectedBuf[201];
-        utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual);
-        utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0]), &expectedText);
+        utextToPrintable(buf, UPRV_LENGTHOF(buf), actual);
+        utextToPrintable(expectedBuf, UPRV_LENGTHOF(expectedBuf), &expectedText);
          errln("%s:%d: assertUTextInvariant: Failure: expected \"%s\" (%d uchars), got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expectedText), buf, (int)utext_nativeLength(actual));
      }
      utext_close(&expectedText);
@@ -384,7 +357,10 @@ static UText* regextst_openUTF8FromInvariant(UText *ut, const char *inv, int64_t
  //
  //---------------------------------------------------------------------------
  
-#define REGEX_TESTLM(pat, text, looking, match) {doRegexLMTest(pat, text, looking, match, __LINE__);doRegexLMTestUTF8(pat, text, looking, match, __LINE__);}
+#define REGEX_TESTLM(pat, text, looking, match) UPRV_BLOCK_MACRO_BEGIN { \
+    doRegexLMTest(pat, text, looking, match, __LINE__); \
+    doRegexLMTestUTF8(pat, text, looking, match, __LINE__); \
+} UPRV_BLOCK_MACRO_END
  
  UBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking, UBool match, int32_t line) {
      const UnicodeString pattern(pat, -1, US_INV);
@@ -536,7 +512,7 @@ UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look
  //          REGEX_ERR("pattern",   expected error line, column, expected status);
  //
  //---------------------------------------------------------------------------
-#define REGEX_ERR(pat, line, col, status) regex_err(pat, line, col, status, __LINE__);
+#define REGEX_ERR(pat, line, col, status) regex_err(pat, line, col, status, __LINE__)
  
  void RegexTest::regex_err(const char *pat, int32_t errLine, int32_t errCol,
                            UErrorCode expectedStatus, int32_t line) {
@@ -1877,8 +1853,8 @@ void RegexTest::API_Match_UTF8() {
          REGEX_VERBOSE_TEXT(&input2);
          utext_openUChars(&empty, NULL, 0, &status);
  
-        int32_t input1Len = strlen("abcdef this is a test"); /* TODO: why not nativelen (input1) ? */
-        int32_t input2Len = strlen("not abc");
+        int32_t input1Len = static_cast<int32_t>(strlen("abcdef this is a test")); /* TODO: why not nativelen (input1) ? */
+        int32_t input2Len = static_cast<int32_t>(strlen("not abc"));
  
  
          //
@@ -3550,11 +3526,16 @@ void RegexTest::regex_find(const UnicodeString &pattern,
          }
      }
      parseMatcher->appendTail(deTaggedInput);
-    REGEX_ASSERT_L(groupStarts.size() == groupEnds.size(), line);
+
+    if (groupStarts.size() != groupEnds.size()) {
+        errln("Error at line %d: mismatched <n> group tags in expected results.", line);
+        failed = true;
+        goto cleanupAndReturn;
+    }
      if ((regionStart>=0 || regionEnd>=0) && (regionStart<0 || regionStart>regionEnd)) {
-      errln("mismatched <r> tags");
-      failed = TRUE;
-      goto cleanupAndReturn;
+        errln("mismatched <r> tags");
+        failed = TRUE;
+        goto cleanupAndReturn;
      }
  
      //
@@ -3580,7 +3561,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
  
          if (UTF8Matcher == NULL) {
              // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine
-          logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d", srcPath, line);
+            logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d", srcPath, line);
              status = U_ZERO_ERROR;
          }
      }
@@ -3589,6 +3570,9 @@ void RegexTest::regex_find(const UnicodeString &pattern,
      //  Generate native indices for UTF8 versions of region and capture group info
      //
      if (UTF8Matcher != NULL) {
+        if (flags.indexOf((UChar)0x74) >= 0) {   //  't' trace flag
+            UTF8Matcher->setTrace(TRUE);
+        }
          if (regionStart>=0)    (void) utextOffsetToNative(&inputText, regionStart, regionStartUTF8);
          if (regionEnd>=0)      (void) utextOffsetToNative(&inputText, regionEnd, regionEndUTF8);
  
@@ -3668,6 +3652,9 @@ void RegexTest::regex_find(const UnicodeString &pattern,
          }
      }
      matcher->setTrace(FALSE);
+    if (UTF8Matcher) {
+        UTF8Matcher->setTrace(FALSE);
+    }
      if (U_FAILURE(status)) {
          errln("Error at line %d. ICU ErrorCode is %s", u_errorName(status));
      }
@@ -3689,16 +3676,17 @@ void RegexTest::regex_find(const UnicodeString &pattern,
          failed = TRUE;
          goto cleanupAndReturn;
      }
+    if (isMatch && groupStarts.size() == 0) {
+        errln("Error at line %d: No match expected, but one found at position %d.", line, matcher->start(status));
+        failed = TRUE;
+    }
+    if (UTF8Matcher && isUTF8Match && groupStarts.size() == 0) {
+        errln("Error at line %d: No match expected, but one found at position %d (UTF-8).", line, UTF8Matcher->start(status));
+        failed = TRUE;
+    }
  
      if (flags.indexOf((UChar)0x47 /*G*/) >= 0) {
          // Only check for match / no match.  Don't check capture groups.
-        if (isMatch && groupStarts.size() == 0) {
-            errln("Error at line %d:  No match expected, but one found.", line);
-            failed = TRUE;
-        } else if (UTF8Matcher != NULL && isUTF8Match && groupStarts.size() == 0) {
-            errln("Error at line %d:  No match expected, but one found. (UTF8)", line);
-            failed = TRUE;
-        }
          goto cleanupAndReturn;
      }
  
@@ -3910,7 +3898,7 @@ UChar *RegexTest::ReadAndConvertFile(const char *fileName, int32_t &ulen,
      fileSize = ftell(f);
      fileBuf = new char[fileSize];
      fseek(f, 0, SEEK_SET);
-    amt_read = fread(fileBuf, 1, fileSize, f);
+    amt_read = static_cast<int32_t>(fread(fileBuf, 1, fileSize, f));
      if (amt_read != fileSize || fileSize <= 0) {
          errln("Error reading test data file.");
          goto cleanUpAndReturn;
@@ -3976,7 +3964,7 @@ cleanUpAndReturn:
          delete []retPtr;
          retPtr = 0;
          ulen   = 0;
-    };
+    }
      return retPtr;
  }
  
@@ -4803,7 +4791,7 @@ struct callBackContext {
      int32_t          maxCalls;
      int32_t          numCalls;
      int32_t          lastSteps;
-    void reset(int32_t max) {maxCalls=max; numCalls=0; lastSteps=0;};
+    void reset(int32_t max) {maxCalls=max; numCalls=0; lastSteps=0;}
  };
  
  U_CDECL_BEGIN
@@ -4906,7 +4894,7 @@ struct progressCallBackContext {
      int64_t          lastIndex;
      int32_t          maxCalls;
      int32_t          numCalls;
-    void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;};
+    void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;}
  };
  
  // call-back function for find().
@@ -5716,7 +5704,7 @@ void RegexTest::TestCase11049(const char *pattern, const char *data, UBool expec
      //   Size of the original char * data (invariant charset) will be <= than the equivalent UTF-8
      //   because string.unescape() will only shrink it.
      char * utf8Buffer = new char[uprv_strlen(data)+1];
-    u_strToUTF8(utf8Buffer, uprv_strlen(data)+1, NULL, dataString.getBuffer(), dataString.length(), &status);
+    u_strToUTF8(utf8Buffer, static_cast<int32_t>(uprv_strlen(data)+1), NULL, dataString.getBuffer(), dataString.length(), &status);
      REGEX_CHECK_STATUS;
      ut = utext_openUTF8(ut, utf8Buffer, -1, &status);
      REGEX_CHECK_STATUS;
@@ -5780,7 +5768,7 @@ void RegexTest::TestBug11371() {
  void RegexTest::TestBug11480() {
      // C API, get capture group of a group that does not participate in the match.
      //        (Returns a zero length string, with nul termination,
-    //         indistinguishable from a group with a zero lenght match.)
+    //         indistinguishable from a group with a zero length match.)
  
      UErrorCode status = U_ZERO_ERROR;
      URegularExpression *re = uregex_openC("(A)|(B)", 0, NULL, &status);
@@ -5796,6 +5784,224 @@ void RegexTest::TestBug11480() {
      REGEX_ASSERT(buf[1] == 0);
      REGEX_ASSERT(buf[2] == 13);
      uregex_close(re);
+
+    // UText C++ API, length of match is 0 for non-participating matches.
+    UText ut = UTEXT_INITIALIZER;
+    utext_openUnicodeString(&ut, &text, &status);
+    RegexMatcher matcher(UnicodeString("(A)|(B)"), 0, status);
+    REGEX_CHECK_STATUS;
+    matcher.reset(&ut);
+    REGEX_ASSERT(matcher.lookingAt(0, status));
+
+    // UText C++ API, Capture group 1 matches "A", position 0, length 1.
+    int64_t groupLen = -666;
+    UText group = UTEXT_INITIALIZER;
+    matcher.group(1, &group, groupLen, status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(groupLen == 1);
+    REGEX_ASSERT(utext_getNativeIndex(&group) == 0);
+
+    // Capture group 2, the (B), does not participate in the match.
+    matcher.group(2, &group, groupLen, status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(groupLen == 0);
+    REGEX_ASSERT(matcher.start(2, status) == -1);
+    REGEX_CHECK_STATUS;
+}
+
+void RegexTest::TestBug12884() {
+    // setTimeLimit() was not effective for empty sub-patterns with large {minimum counts}
+    UnicodeString pattern(u"(((((((){120}){11}){11}){11}){80}){11}){4}");
+    UnicodeString text(u"hello");
+    UErrorCode status = U_ZERO_ERROR;
+    RegexMatcher m(pattern, text, 0, status);
+    REGEX_CHECK_STATUS;
+    m.setTimeLimit(5, status);
+    m.find(status);
+    REGEX_ASSERT(status == U_REGEX_TIME_OUT);
+
+    // Non-greedy loops. They take a different code path during matching.
+    UnicodeString ngPattern(u"(((((((){120}?){11}?){11}?){11}?){80}?){11}?){4}?");
+    status = U_ZERO_ERROR;
+    RegexMatcher ngM(ngPattern, text, 0, status);
+    REGEX_CHECK_STATUS;
+    ngM.setTimeLimit(5, status);
+    ngM.find(status);
+    REGEX_ASSERT(status == U_REGEX_TIME_OUT);
+
+    // UText, wrapping non-UTF-16 text, also takes a different execution path.
+    const char *text8 = reinterpret_cast<const char*>(u8"¿Qué es Unicode?  Unicode proporciona un número único para cada"
+                          "carácter, sin importar la plataforma, sin importar el programa,"
+                          "sin importar el idioma.");
+    status = U_ZERO_ERROR;
+    LocalUTextPointer ut(utext_openUTF8(NULL, text8, -1, &status));
+    REGEX_CHECK_STATUS;
+    m.reset(ut.getAlias());
+    m.find(status);
+    REGEX_ASSERT(status == U_REGEX_TIME_OUT);
+
+    status = U_ZERO_ERROR;
+    ngM.reset(ut.getAlias());
+    ngM.find(status);
+    REGEX_ASSERT(status == U_REGEX_TIME_OUT);
+}
+
+// Bug 13631. A find() of a pattern with a zero length look-behind assertions
+//            can cause a read past the end of the input text.
+//            The failure is seen when running this test with Clang's Addresss Sanitizer.
+
+void RegexTest::TestBug13631() {
+    const UChar *pats[] = { u"(?<!^)",
+                            u"(?<=^)",
+                            nullptr
+                          };
+    for (const UChar **pat=pats; *pat; ++pat) {
+        UErrorCode status = U_ZERO_ERROR;
+        UnicodeString upat(*pat);
+        RegexMatcher matcher(upat, 0, status);
+        const UChar s =u'a';
+        UText *ut = utext_openUChars(nullptr, &s, 1, &status);
+        REGEX_CHECK_STATUS;
+        matcher.reset(ut);
+        while (matcher.find()) {
+        }
+        utext_close(ut);
+    }
+}
+
+// Bug 13632 Out of bounds memory reference if a replacement string ends with a '$',
+//           where a following group specification would be expected.
+//           Failure shows when running the test under Clang's Address Sanitizer.
+
+void RegexTest::TestBug13632() {
+    UErrorCode status = U_ZERO_ERROR;
+    URegularExpression *re = uregex_openC(" ", 0, nullptr, &status);
+    const char16_t *sourceString = u"Hello, world.";
+    uregex_setText(re, sourceString, u_strlen(sourceString), &status);
+
+    const int32_t destCap = 20;
+    char16_t dest[destCap] = {};
+    const char16_t replacement[] = {u'x', u'$'};    // Not nul terminated string.
+    uregex_replaceAll(re, replacement, 2, dest, destCap, &status);
+
+    assertEquals("", U_REGEX_INVALID_CAPTURE_GROUP_NAME, status);
+    uregex_close(re);
+}
+
+void RegexTest::TestBug20359() {
+    // The bug was stack overflow while parsing a pattern with a huge number of adjacent \Q\E
+    // pairs. (Enter and exit pattern literal quote mode). Logic was correct.
+    // Changed implementation to loop instead of recursing.
+
+    UnicodeString pattern;
+    for (int i=0; i<50000; ++i) {
+        pattern += u"\\Q\\E";
+    }
+    pattern += u"x";
+
+    UErrorCode status = U_ZERO_ERROR;
+    LocalURegularExpressionPointer re(uregex_open(pattern.getBuffer(), pattern.length(),
+                                       0, nullptr, &status));
+    assertSuccess(WHERE, status);
+
+    // We have passed the point where the bug crashed. The following is a small sanity
+    // check that the pattern works, that all the \Q\E\Q\E... didn't cause other problems.
+
+    uregex_setText(re.getAlias(), u"abcxyz", -1, &status);
+    assertSuccess(WHERE, status);
+    assertTrue(WHERE, uregex_find(re.getAlias(), 0, &status));
+    assertEquals(WHERE, 3, uregex_start(re.getAlias(), 0, &status));
+    assertSuccess(WHERE, status);
+}
+
+
+void RegexTest::TestBug20863() {
+    // Test that patterns with a large number of named capture groups work correctly.
+    //
+    // The ticket was not for a bug per se, but to reduce memory usage by using lazy
+    // construction of the map from capture names to numbers, and decreasing the
+    // default size of the map.
+
+    constexpr int GROUP_COUNT = 2000;
+    std::vector<UnicodeString> groupNames;
+    for (int32_t i=0; i<GROUP_COUNT; ++i) {
+        UnicodeString name;
+        name.append(u"name");
+        name.append(Int64ToUnicodeString(i));
+        groupNames.push_back(name);
+    }
+
+    UnicodeString patternString;
+    for (UnicodeString name: groupNames) {
+        patternString.append(u"(?<");
+        patternString.append(name);
+        patternString.append(u">.)");
+    }
+
+    UErrorCode status = U_ZERO_ERROR;
+    UParseError pe;
+    LocalPointer<RegexPattern> pattern(RegexPattern::compile(patternString, pe, status), status);
+    if (!assertSuccess(WHERE, status)) {
+        return;
+    }
+
+    for (int32_t i=0; i<GROUP_COUNT; ++i) {
+        int32_t group = pattern->groupNumberFromName(groupNames[i], status);
+        if (!assertSuccess(WHERE, status)) {
+            return;
+        }
+        assertEquals(WHERE, i+1, group);
+        // Note: group 0 is the overall match; group 1 is the first separate capture group.
+    }
+
+    // Verify that assignment of patterns with various combinations of named capture work.
+    // Lazy creation of the internal named capture map changed the implementation logic here.
+    {
+        LocalPointer<RegexPattern> pat1(RegexPattern::compile(u"abc", pe, status), status);
+        LocalPointer<RegexPattern> pat2(RegexPattern::compile(u"a(?<name>b)c", pe, status), status);
+        assertSuccess(WHERE, status);
+        assertFalse(WHERE, *pat1 == *pat2);
+        *pat1 = *pat2;
+        assertTrue(WHERE, *pat1 == *pat2);
+        assertEquals(WHERE, 1, pat1->groupNumberFromName(u"name", status));
+        assertEquals(WHERE, 1, pat2->groupNumberFromName(u"name", status));
+        assertSuccess(WHERE, status);
+    }
+
+    {
+        LocalPointer<RegexPattern> pat1(RegexPattern::compile(u"abc", pe, status), status);
+        LocalPointer<RegexPattern> pat2(RegexPattern::compile(u"a(?<name>b)c", pe, status), status);
+        assertSuccess(WHERE, status);
+        assertFalse(WHERE, *pat1 == *pat2);
+        *pat2 = *pat1;
+        assertTrue(WHERE, *pat1 == *pat2);
+        assertEquals(WHERE, 0, pat1->groupNumberFromName(u"name", status));
+        assertEquals(WHERE, U_REGEX_INVALID_CAPTURE_GROUP_NAME, status);
+        status = U_ZERO_ERROR;
+        assertEquals(WHERE, 0, pat2->groupNumberFromName(u"name", status));
+        assertEquals(WHERE, U_REGEX_INVALID_CAPTURE_GROUP_NAME, status);
+        status = U_ZERO_ERROR;
+    }
+
+    {
+        LocalPointer<RegexPattern> pat1(RegexPattern::compile(u"a(?<name1>b)c", pe, status), status);
+        LocalPointer<RegexPattern> pat2(RegexPattern::compile(u"a(?<name2>b)c", pe, status), status);
+        assertSuccess(WHERE, status);
+        assertFalse(WHERE, *pat1 == *pat2);
+        *pat2 = *pat1;
+        assertTrue(WHERE, *pat1 == *pat2);
+        assertEquals(WHERE, 1, pat1->groupNumberFromName(u"name1", status));
+        assertSuccess(WHERE, status);
+        assertEquals(WHERE, 1, pat2->groupNumberFromName(u"name1", status));
+        assertSuccess(WHERE, status);
+        assertEquals(WHERE, 0, pat1->groupNumberFromName(u"name2", status));
+        assertEquals(WHERE, U_REGEX_INVALID_CAPTURE_GROUP_NAME, status);
+        status = U_ZERO_ERROR;
+        assertEquals(WHERE, 0, pat2->groupNumberFromName(u"name2", status));
+        assertEquals(WHERE, U_REGEX_INVALID_CAPTURE_GROUP_NAME, status);
+        status = U_ZERO_ERROR;
+    }
+
  }