]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/i18n/uregex.cpp
ICU-64260.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / uregex.cpp
index 9aab85e5df780257504a48c6347c3825bd82fd6d..57c2febe9d02bcb7f52e368897be6cd369886d7a 100644 (file)
@@ -1,9 +1,11 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
 /*
 *******************************************************************************
-*   Copyright (C) 2004-2005, International Business Machines
+*   Copyright (C) 2004-2015, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *******************************************************************************
-*   file name:  regex.cpp
+*   file name:  uregex.cpp
 */
 
 #include "unicode/utypes.h"
 #include "unicode/ustring.h"
 #include "unicode/uchar.h"
 #include "unicode/uobject.h"
-#include "umutex.h"
-#include "uassert.h"
+#include "unicode/utf16.h"
 #include "cmemory.h"
+#include "uassert.h"
+#include "uhash.h"
+#include "umutex.h"
+#include "uvectr32.h"
+
+#include "regextxt.h"
+
+U_NAMESPACE_BEGIN
+
+#define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0)
 
-struct URegularExpression: public UMemory {
+struct RegularExpression: public UMemory {
 public:
-    URegularExpression();
-    ~URegularExpression();
+    RegularExpression();
+    ~RegularExpression();
     int32_t           fMagic;
     RegexPattern     *fPat;
-    int32_t          *fPatRefCount;
+    u_atomic_int32_t *fPatRefCount;
     UChar            *fPatString;
     int32_t           fPatStringLen;
     RegexMatcher     *fMatcher;
     const UChar      *fText;         // Text from setText()
     int32_t           fTextLength;   // Length provided by user with setText(), which
                                      //  may be -1.
-
-    UnicodeString     fTextString;   // The setText(text) is wrapped into a UnicodeString.
-                                     // TODO: regexp engine should not depend on UnicodeString.
+    UBool             fOwnsText;
 };
 
 static const int32_t REXP_MAGIC = 0x72657870; // "rexp" in ASCII
 
-U_NAMESPACE_USE
-
-URegularExpression::URegularExpression() {
+RegularExpression::RegularExpression() {
     fMagic        = REXP_MAGIC;
     fPat          = NULL;
     fPatRefCount  = NULL;
@@ -51,34 +58,42 @@ URegularExpression::URegularExpression() {
     fMatcher      = NULL;
     fText         = NULL;
     fTextLength   = 0;
+    fOwnsText     = FALSE;
 }
 
-URegularExpression::~URegularExpression() {
+RegularExpression::~RegularExpression() {
     delete fMatcher;
     fMatcher = NULL;
     if (fPatRefCount!=NULL && umtx_atomic_dec(fPatRefCount)==0) {
         delete fPat;
         uprv_free(fPatString);
-        uprv_free(fPatRefCount);
+        uprv_free((void *)fPatRefCount);
+    }
+    if (fOwnsText && fText!=NULL) {
+        uprv_free((void *)fText);
     }
     fMagic = 0;
 }
 
+U_NAMESPACE_END
+
+U_NAMESPACE_USE
+
 //----------------------------------------------------------------------------------------
 //
 //   validateRE    Do boilerplate style checks on API function parameters.
 //                 Return TRUE if they look OK.
 //----------------------------------------------------------------------------------------
-static UBool validateRE(const URegularExpression *re, UErrorCode *status, UBool requiresText = TRUE) {
+static UBool validateRE(const RegularExpression *re, UBool requiresText, UErrorCode *status) {
     if (U_FAILURE(*status)) {
         return FALSE;
     }
     if (re == NULL || re->fMagic != REXP_MAGIC) {
-        // U_ASSERT(FALSE);
         *status = U_ILLEGAL_ARGUMENT_ERROR;
         return FALSE;
     }
-    if (requiresText && re->fText == NULL) {
+    // !!! Not sure how to update this with the new UText backing, which is stored in re->fMatcher anyway
+    if (requiresText && re->fText == NULL && !re->fOwnsText) {
         *status = U_REGEX_INVALID_STATE;
         return FALSE;
     }
@@ -109,13 +124,13 @@ uregex_open( const  UChar          *pattern,
         actualPatLen = u_strlen(pattern);
     }
 
-    URegularExpression *re     = new URegularExpression;
-    int32_t            *refC   = (int32_t *)uprv_malloc(sizeof(int32_t));
+    RegularExpression  *re     = new RegularExpression;
+    u_atomic_int32_t   *refC   = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
     UChar              *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(actualPatLen+1));
     if (re == NULL || refC == NULL || patBuf == NULL) {
         *status = U_MEMORY_ALLOCATION_ERROR;
         delete re;
-        uprv_free(refC);
+        uprv_free((void *)refC);
         uprv_free(patBuf);
         return NULL;
     }
@@ -124,23 +139,110 @@ uregex_open( const  UChar          *pattern,
 
     //
     // Make a copy of the pattern string, so we can return it later if asked.
-    //    For compiling the pattern, we will use a read-only-aliased UnicodeString
-    //    of this local copy, to avoid making even more copies.
+    //    For compiling the pattern, we will use a UText wrapper around
+    //    this local copy, to avoid making even more copies.
     //
     re->fPatString    = patBuf;
     re->fPatStringLen = patternLength;
     u_memcpy(patBuf, pattern, actualPatLen);
     patBuf[actualPatLen] = 0;
-    UnicodeString  patString(patternLength==-1, patBuf, patternLength);                 
+
+    UText patText = UTEXT_INITIALIZER;
+    utext_openUChars(&patText, patBuf, patternLength, status);
+
+    //
+    // Compile the pattern
+    //
+    if (pe != NULL) {
+        re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
+    } else {
+        re->fPat = RegexPattern::compile(&patText, flags, *status);
+    }
+    utext_close(&patText);
+
+    if (U_FAILURE(*status)) {
+        goto ErrorExit;
+    }
+
+    //
+    // Create the matcher object
+    //
+    re->fMatcher = re->fPat->matcher(*status);
+    if (U_SUCCESS(*status)) {
+        return (URegularExpression*)re;
+    }
+
+ErrorExit:
+    delete re;
+    return NULL;
+
+}
+
+//----------------------------------------------------------------------------------------
+//
+//    uregex_openUText
+//
+//----------------------------------------------------------------------------------------
+U_CAPI URegularExpression *  U_EXPORT2
+uregex_openUText(UText          *pattern,
+                 uint32_t        flags,
+                 UParseError    *pe,
+                 UErrorCode     *status) {
+
+    if (U_FAILURE(*status)) {
+        return NULL;
+    }
+    if (pattern == NULL) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return NULL;
+    }
+
+    int64_t patternNativeLength = utext_nativeLength(pattern);
+
+    if (patternNativeLength == 0) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return NULL;
+    }
+
+    RegularExpression *re     = new RegularExpression;
+
+    UErrorCode lengthStatus = U_ZERO_ERROR;
+    int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, NULL, 0, &lengthStatus);
+
+    u_atomic_int32_t   *refC   = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
+    UChar              *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(pattern16Length+1));
+    if (re == NULL || refC == NULL || patBuf == NULL) {
+        *status = U_MEMORY_ALLOCATION_ERROR;
+        delete re;
+        uprv_free((void *)refC);
+        uprv_free(patBuf);
+        return NULL;
+    }
+    re->fPatRefCount = refC;
+    *re->fPatRefCount = 1;
+
+    //
+    // Make a copy of the pattern string, so we can return it later if asked.
+    //    For compiling the pattern, we will use a read-only UText wrapper
+    //    around this local copy, to avoid making even more copies.
+    //
+    re->fPatString    = patBuf;
+    re->fPatStringLen = pattern16Length;
+    utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status);
+
+    UText patText = UTEXT_INITIALIZER;
+    utext_openUChars(&patText, patBuf, pattern16Length, status);
 
     //
     // Compile the pattern
     //
     if (pe != NULL) {
-        re->fPat = RegexPattern::compile(patString, flags, *pe, *status);
+        re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
     } else {
-        re->fPat = RegexPattern::compile(patString, flags, *status);
+        re->fPat = RegexPattern::compile(&patText, flags, *status);
     }
+    utext_close(&patText);
+
     if (U_FAILURE(*status)) {
         goto ErrorExit;
     }
@@ -150,7 +252,7 @@ uregex_open( const  UChar          *pattern,
     //
     re->fMatcher = re->fPat->matcher(*status);
     if (U_SUCCESS(*status)) {
-        return re;
+        return (URegularExpression*)re;
     }
 
 ErrorExit:
@@ -165,9 +267,10 @@ ErrorExit:
 //
 //----------------------------------------------------------------------------------------
 U_CAPI void  U_EXPORT2
-uregex_close(URegularExpression  *re) {
+uregex_close(URegularExpression  *re2) {
+    RegularExpression *re = (RegularExpression*)re2;
     UErrorCode  status = U_ZERO_ERROR;
-    if (validateRE(re, &status, FALSE) == FALSE) {
+    if (validateRE(re, FALSE, &status) == FALSE) {
         return;
     }
     delete re;
@@ -179,13 +282,14 @@ uregex_close(URegularExpression  *re) {
 //    uregex_clone
 //
 //----------------------------------------------------------------------------------------
-U_CAPI URegularExpression * U_EXPORT2 
-uregex_clone(const URegularExpression *source, UErrorCode *status)  {
-    if (validateRE(source, status, FALSE) == FALSE) {
+U_CAPI URegularExpression * U_EXPORT2
+uregex_clone(const URegularExpression *source2, UErrorCode *status)  {
+    RegularExpression *source = (RegularExpression*)source2;
+    if (validateRE(source, FALSE, status) == FALSE) {
         return NULL;
     }
 
-    URegularExpression *clone = new URegularExpression;
+    RegularExpression *clone = new RegularExpression;
     if (clone == NULL) {
         *status = U_MEMORY_ALLOCATION_ERROR;
         return NULL;
@@ -196,19 +300,15 @@ uregex_clone(const URegularExpression *source, UErrorCode *status)  {
         delete clone;
         return NULL;
     }
-    if (clone == NULL) {
-        *status = U_MEMORY_ALLOCATION_ERROR;
-        return NULL;
-    }
 
     clone->fPat          = source->fPat;
-    clone->fPatRefCount  = source->fPatRefCount; 
+    clone->fPatRefCount  = source->fPatRefCount;
     clone->fPatString    = source->fPatString;
     clone->fPatStringLen = source->fPatStringLen;
     umtx_atomic_inc(source->fPatRefCount);
     // Note:  fText is not cloned.
 
-    return clone;
+    return (URegularExpression*)clone;
 }
 
 
@@ -219,12 +319,13 @@ uregex_clone(const URegularExpression *source, UErrorCode *status)  {
 //    uregex_pattern
 //
 //------------------------------------------------------------------------------
-U_CAPI const UChar * U_EXPORT2 
-uregex_pattern(const  URegularExpression *regexp,
-               int32_t            *patLength,
-               UErrorCode         *status)  {
-    
-    if (validateRE(regexp, status, FALSE) == FALSE) {
+U_CAPI const UChar * U_EXPORT2
+uregex_pattern(const  URegularExpression *regexp2,
+                      int32_t            *patLength,
+                      UErrorCode         *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+
+    if (validateRE(regexp, FALSE, status) == FALSE) {
         return NULL;
     }
     if (patLength != NULL) {
@@ -234,14 +335,28 @@ uregex_pattern(const  URegularExpression *regexp,
 }
 
 
+//------------------------------------------------------------------------------
+//
+//    uregex_patternUText
+//
+//------------------------------------------------------------------------------
+U_CAPI UText * U_EXPORT2
+uregex_patternUText(const URegularExpression *regexp2,
+                          UErrorCode         *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    return regexp->fPat->patternText(*status);
+}
+
+
 //------------------------------------------------------------------------------
 //
 //    uregex_flags
 //
 //------------------------------------------------------------------------------
-U_CAPI int32_t U_EXPORT2 
-uregex_flags(const URegularExpression *regexp, UErrorCode *status)  {
-    if (validateRE(regexp, status, FALSE) == FALSE) {
+U_CAPI int32_t U_EXPORT2
+uregex_flags(const URegularExpression *regexp2, UErrorCode *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, FALSE, status) == FALSE) {
         return 0;
     }
     int32_t flags = regexp->fPat->flags();
@@ -254,24 +369,61 @@ uregex_flags(const URegularExpression *regexp, UErrorCode *status)  {
 //    uregex_setText
 //
 //------------------------------------------------------------------------------
-U_CAPI void U_EXPORT2 
-uregex_setText(URegularExpression *regexp,
+U_CAPI void U_EXPORT2
+uregex_setText(URegularExpression *regexp2,
                const UChar        *text,
                int32_t             textLength,
                UErrorCode         *status)  {
-    if (validateRE(regexp, status, FALSE) == FALSE) {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, FALSE, status) == FALSE) {
         return;
     }
     if (text == NULL || textLength < -1) {
         *status = U_ILLEGAL_ARGUMENT_ERROR;
         return;
     }
+
+    if (regexp->fOwnsText && regexp->fText != NULL) {
+        uprv_free((void *)regexp->fText);
+    }
+
     regexp->fText       = text;
     regexp->fTextLength = textLength;
-    UBool isTerminated  = (textLength == -1);
+    regexp->fOwnsText   = FALSE;
 
-    regexp->fTextString.setTo(isTerminated, text, textLength);
-    regexp->fMatcher->reset(regexp->fTextString);
+    UText input = UTEXT_INITIALIZER;
+    utext_openUChars(&input, text, textLength, status);
+    regexp->fMatcher->reset(&input);
+    utext_close(&input); // reset() made a shallow clone, so we don't need this copy
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_setUText
+//
+//------------------------------------------------------------------------------
+U_CAPI void U_EXPORT2
+uregex_setUText(URegularExpression *regexp2,
+                UText              *text,
+                UErrorCode         *status) {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, FALSE, status) == FALSE) {
+        return;
+    }
+    if (text == NULL) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+
+    if (regexp->fOwnsText && regexp->fText != NULL) {
+        uprv_free((void *)regexp->fText);
+    }
+
+    regexp->fText       = NULL; // only fill it in on request
+    regexp->fTextLength = -1;
+    regexp->fOwnsText   = TRUE;
+    regexp->fMatcher->reset(text);
 }
 
 
@@ -281,201 +433,712 @@ uregex_setText(URegularExpression *regexp,
 //    uregex_getText
 //
 //------------------------------------------------------------------------------
-U_CAPI const UChar * U_EXPORT2 
-uregex_getText(URegularExpression *regexp,
+U_CAPI const UChar * U_EXPORT2
+uregex_getText(URegularExpression *regexp2,
                int32_t            *textLength,
                UErrorCode         *status)  {
-    if (validateRE(regexp, status, FALSE) == FALSE) {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, FALSE, status) == FALSE) {
         return NULL;
     }
+
+    if (regexp->fText == NULL) {
+        // need to fill in the text
+        UText *inputText = regexp->fMatcher->inputText();
+        int64_t inputNativeLength = utext_nativeLength(inputText);
+        if (UTEXT_FULL_TEXT_IN_CHUNK(inputText, inputNativeLength)) {
+            regexp->fText = inputText->chunkContents;
+            regexp->fTextLength = (int32_t)inputNativeLength;
+            regexp->fOwnsText = FALSE; // because the UText owns it
+        } else {
+            UErrorCode lengthStatus = U_ZERO_ERROR;
+            regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, NULL, 0, &lengthStatus); // buffer overflow error
+            UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(regexp->fTextLength+1));
+
+            utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status);
+            regexp->fText = inputChars;
+            regexp->fOwnsText = TRUE; // should already be set but just in case
+        }
+    }
+
     if (textLength != NULL) {
         *textLength = regexp->fTextLength;
     }
-    return regexp->fText;
+    return regexp->fText;
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_getUText
+//
+//------------------------------------------------------------------------------
+U_CAPI UText * U_EXPORT2
+uregex_getUText(URegularExpression *regexp2,
+                UText              *dest,
+                UErrorCode         *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, FALSE, status) == FALSE) {
+        return dest;
+    }
+    return regexp->fMatcher->getInput(dest, *status);
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_refreshUText
+//
+//------------------------------------------------------------------------------
+U_CAPI void U_EXPORT2
+uregex_refreshUText(URegularExpression *regexp2,
+                    UText              *text,
+                    UErrorCode         *status) {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, FALSE, status) == FALSE) {
+        return;
+    }
+    regexp->fMatcher->refreshInputText(text, *status);
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_matches
+//
+//------------------------------------------------------------------------------
+U_CAPI UBool U_EXPORT2
+uregex_matches(URegularExpression *regexp2,
+               int32_t            startIndex,
+               UErrorCode        *status)  {
+    return uregex_matches64( regexp2, (int64_t)startIndex, status);
+}
+
+U_CAPI UBool U_EXPORT2
+uregex_matches64(URegularExpression *regexp2,
+                 int64_t            startIndex,
+                 UErrorCode        *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    UBool result = FALSE;
+    if (validateRE(regexp, TRUE, status) == FALSE) {
+        return result;
+    }
+    if (startIndex == -1) {
+        result = regexp->fMatcher->matches(*status);
+    } else {
+        result = regexp->fMatcher->matches(startIndex, *status);
+    }
+    return result;
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_lookingAt
+//
+//------------------------------------------------------------------------------
+U_CAPI UBool U_EXPORT2
+uregex_lookingAt(URegularExpression *regexp2,
+                 int32_t             startIndex,
+                 UErrorCode         *status)  {
+    return uregex_lookingAt64( regexp2, (int64_t)startIndex, status);
+}
+
+U_CAPI UBool U_EXPORT2
+uregex_lookingAt64(URegularExpression *regexp2,
+                   int64_t             startIndex,
+                   UErrorCode         *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    UBool result = FALSE;
+    if (validateRE(regexp, TRUE, status) == FALSE) {
+        return result;
+    }
+    if (startIndex == -1) {
+        result = regexp->fMatcher->lookingAt(*status);
+    } else {
+        result = regexp->fMatcher->lookingAt(startIndex, *status);
+    }
+    return result;
+}
+
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_find
+//
+//------------------------------------------------------------------------------
+U_CAPI UBool U_EXPORT2
+uregex_find(URegularExpression *regexp2,
+            int32_t             startIndex,
+            UErrorCode         *status)  {
+    return uregex_find64( regexp2, (int64_t)startIndex, status);
+}
+
+U_CAPI UBool U_EXPORT2
+uregex_find64(URegularExpression *regexp2,
+              int64_t             startIndex,
+              UErrorCode         *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    UBool result = FALSE;
+    if (validateRE(regexp, TRUE, status) == FALSE) {
+        return result;
+    }
+    if (startIndex == -1) {
+        regexp->fMatcher->resetPreserveRegion();
+        result = regexp->fMatcher->find(*status);
+    } else {
+        result = regexp->fMatcher->find(startIndex, *status);
+    }
+    return result;
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_findNext
+//
+//------------------------------------------------------------------------------
+U_CAPI UBool U_EXPORT2
+uregex_findNext(URegularExpression *regexp2,
+                UErrorCode         *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, TRUE, status) == FALSE) {
+        return FALSE;
+    }
+    UBool result = regexp->fMatcher->find(*status);
+    return result;
+}
+
+//------------------------------------------------------------------------------
+//
+//    uregex_groupCount
+//
+//------------------------------------------------------------------------------
+U_CAPI int32_t U_EXPORT2
+uregex_groupCount(URegularExpression *regexp2,
+                  UErrorCode         *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, FALSE, status) == FALSE) {
+        return 0;
+    }
+    int32_t  result = regexp->fMatcher->groupCount();
+    return result;
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_groupNumberFromName
+//
+//------------------------------------------------------------------------------
+int32_t
+uregex_groupNumberFromName(URegularExpression *regexp2,
+                           const UChar        *groupName,
+                           int32_t             nameLength,
+                           UErrorCode          *status) {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, FALSE, status) == FALSE) {
+        return 0;
+    }
+    int32_t  result = regexp->fPat->groupNumberFromName(UnicodeString(groupName, nameLength), *status);
+    return result;
+}
+
+int32_t
+uregex_groupNumberFromCName(URegularExpression *regexp2,
+                            const char         *groupName,
+                            int32_t             nameLength,
+                            UErrorCode          *status) {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, FALSE, status) == FALSE) {
+        return 0;
+    }
+    return regexp->fPat->groupNumberFromName(groupName, nameLength, *status);
+}
+
+//------------------------------------------------------------------------------
+//
+//    uregex_group
+//
+//------------------------------------------------------------------------------
+U_CAPI int32_t U_EXPORT2
+uregex_group(URegularExpression *regexp2,
+             int32_t             groupNum,
+             UChar              *dest,
+             int32_t             destCapacity,
+             UErrorCode          *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, TRUE, status) == FALSE) {
+        return 0;
+    }
+    if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    if (destCapacity == 0 || regexp->fText != NULL) {
+        // If preflighting or if we already have the text as UChars,
+        // this is a little cheaper than extracting from the UText
+
+        //
+        // Pick up the range of characters from the matcher
+        //
+        int32_t  startIx = regexp->fMatcher->start(groupNum, *status);
+        int32_t  endIx   = regexp->fMatcher->end  (groupNum, *status);
+        if (U_FAILURE(*status)) {
+            return 0;
+        }
+
+        //
+        // Trim length based on buffer capacity
+        //
+        int32_t fullLength = endIx - startIx;
+        int32_t copyLength = fullLength;
+        if (copyLength < destCapacity) {
+            dest[copyLength] = 0;
+        } else if (copyLength == destCapacity) {
+            *status = U_STRING_NOT_TERMINATED_WARNING;
+        } else {
+            copyLength = destCapacity;
+            *status = U_BUFFER_OVERFLOW_ERROR;
+        }
+
+        //
+        // Copy capture group to user's buffer
+        //
+        if (copyLength > 0) {
+            u_memcpy(dest, &regexp->fText[startIx], copyLength);
+        }
+        return fullLength;
+    } else {
+        int64_t  start = regexp->fMatcher->start64(groupNum, *status);
+        int64_t  limit = regexp->fMatcher->end64(groupNum, *status);
+        if (U_FAILURE(*status)) {
+            return 0;
+        }
+        // Note edge cases:
+        //   Group didn't match: start == end == -1. UText trims to 0, UText gives zero length result.
+        //   Zero Length Match: start == end.
+        int32_t length = utext_extract(regexp->fMatcher->inputText(), start, limit, dest, destCapacity, status);
+        return length;
+    }
+
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_groupUText
+//
+//------------------------------------------------------------------------------
+U_CAPI UText * U_EXPORT2
+uregex_groupUText(URegularExpression *regexp2,
+                  int32_t             groupNum,
+                  UText              *dest,
+                  int64_t            *groupLength,
+                  UErrorCode         *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, TRUE, status) == FALSE) {
+        UErrorCode emptyTextStatus = U_ZERO_ERROR;
+        return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
+    }
+
+    return regexp->fMatcher->group(groupNum, dest, *groupLength, *status);
+}
+
+//------------------------------------------------------------------------------
+//
+//    uregex_start
+//
+//------------------------------------------------------------------------------
+U_CAPI int32_t U_EXPORT2
+uregex_start(URegularExpression *regexp2,
+             int32_t             groupNum,
+             UErrorCode          *status)  {
+    return (int32_t)uregex_start64( regexp2, groupNum, status);
+}
+
+U_CAPI int64_t U_EXPORT2
+uregex_start64(URegularExpression *regexp2,
+               int32_t             groupNum,
+               UErrorCode          *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, TRUE, status) == FALSE) {
+        return 0;
+    }
+    int64_t result = regexp->fMatcher->start64(groupNum, *status);
+    return result;
+}
+
+//------------------------------------------------------------------------------
+//
+//    uregex_end
+//
+//------------------------------------------------------------------------------
+U_CAPI int32_t U_EXPORT2
+uregex_end(URegularExpression   *regexp2,
+           int32_t               groupNum,
+           UErrorCode           *status)  {
+    return (int32_t)uregex_end64( regexp2, groupNum, status);
+}
+
+U_CAPI int64_t U_EXPORT2
+uregex_end64(URegularExpression   *regexp2,
+             int32_t               groupNum,
+             UErrorCode           *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, TRUE, status) == FALSE) {
+        return 0;
+    }
+    int64_t result = regexp->fMatcher->end64(groupNum, *status);
+    return result;
+}
+
+//------------------------------------------------------------------------------
+//
+//    uregex_reset
+//
+//------------------------------------------------------------------------------
+U_CAPI void U_EXPORT2
+uregex_reset(URegularExpression    *regexp2,
+             int32_t               index,
+             UErrorCode            *status)  {
+    uregex_reset64( regexp2, (int64_t)index, status);
+}
+
+U_CAPI void U_EXPORT2
+uregex_reset64(URegularExpression    *regexp2,
+               int64_t               index,
+               UErrorCode            *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, TRUE, status) == FALSE) {
+        return;
+    }
+    regexp->fMatcher->reset(index, *status);
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_setRegion
+//
+//------------------------------------------------------------------------------
+U_CAPI void U_EXPORT2
+uregex_setRegion(URegularExpression   *regexp2,
+                 int32_t               regionStart,
+                 int32_t               regionLimit,
+                 UErrorCode           *status)  {
+    uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status);
+}
+
+U_CAPI void U_EXPORT2
+uregex_setRegion64(URegularExpression   *regexp2,
+                   int64_t               regionStart,
+                   int64_t               regionLimit,
+                   UErrorCode           *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, TRUE, status) == FALSE) {
+        return;
+    }
+    regexp->fMatcher->region(regionStart, regionLimit, *status);
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_setRegionAndStart
+//
+//------------------------------------------------------------------------------
+U_CAPI void U_EXPORT2
+uregex_setRegionAndStart(URegularExpression   *regexp2,
+                 int64_t               regionStart,
+                 int64_t               regionLimit,
+                 int64_t               startIndex,
+                 UErrorCode           *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, TRUE, status) == FALSE) {
+        return;
+    }
+    regexp->fMatcher->region(regionStart, regionLimit, startIndex, *status);
+}
+
+//------------------------------------------------------------------------------
+//
+//    uregex_regionStart
+//
+//------------------------------------------------------------------------------
+U_CAPI int32_t U_EXPORT2
+uregex_regionStart(const  URegularExpression   *regexp2,
+                          UErrorCode           *status)  {
+    return (int32_t)uregex_regionStart64(regexp2, status);
+}
+
+U_CAPI int64_t U_EXPORT2
+uregex_regionStart64(const  URegularExpression   *regexp2,
+                            UErrorCode           *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, TRUE, status) == FALSE) {
+        return 0;
+    }
+    return regexp->fMatcher->regionStart();
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_regionEnd
+//
+//------------------------------------------------------------------------------
+U_CAPI int32_t U_EXPORT2
+uregex_regionEnd(const  URegularExpression   *regexp2,
+                        UErrorCode           *status)  {
+    return (int32_t)uregex_regionEnd64(regexp2, status);
+}
+
+U_CAPI int64_t U_EXPORT2
+uregex_regionEnd64(const  URegularExpression   *regexp2,
+                          UErrorCode           *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, TRUE, status) == FALSE) {
+        return 0;
+    }
+    return regexp->fMatcher->regionEnd();
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_hasTransparentBounds
+//
+//------------------------------------------------------------------------------
+U_CAPI UBool U_EXPORT2
+uregex_hasTransparentBounds(const  URegularExpression   *regexp2,
+                                   UErrorCode           *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, FALSE, status) == FALSE) {
+        return FALSE;
+    }
+    return regexp->fMatcher->hasTransparentBounds();
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_useTransparentBounds
+//
+//------------------------------------------------------------------------------
+U_CAPI void U_EXPORT2
+uregex_useTransparentBounds(URegularExpression    *regexp2,
+                            UBool                  b,
+                            UErrorCode            *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, FALSE, status) == FALSE) {
+        return;
+    }
+    regexp->fMatcher->useTransparentBounds(b);
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_hasAnchoringBounds
+//
+//------------------------------------------------------------------------------
+U_CAPI UBool U_EXPORT2
+uregex_hasAnchoringBounds(const  URegularExpression   *regexp2,
+                                 UErrorCode           *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, FALSE, status) == FALSE) {
+        return FALSE;
+    }
+    return regexp->fMatcher->hasAnchoringBounds();
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_useAnchoringBounds
+//
+//------------------------------------------------------------------------------
+U_CAPI void U_EXPORT2
+uregex_useAnchoringBounds(URegularExpression    *regexp2,
+                          UBool                  b,
+                          UErrorCode            *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, FALSE, status) == FALSE) {
+        return;
+    }
+    regexp->fMatcher->useAnchoringBounds(b);
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_hitEnd
+//
+//------------------------------------------------------------------------------
+U_CAPI UBool U_EXPORT2
+uregex_hitEnd(const  URegularExpression   *regexp2,
+                     UErrorCode           *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, TRUE, status) == FALSE) {
+        return FALSE;
+    }
+    return regexp->fMatcher->hitEnd();
 }
 
 
 //------------------------------------------------------------------------------
 //
-//    uregex_matches
+//    uregex_requireEnd
 //
 //------------------------------------------------------------------------------
-U_CAPI UBool U_EXPORT2 
-uregex_matches(URegularExpression *regexp,
-                int32_t            startIndex,
-                UErrorCode        *status)  {
-    if (validateRE(regexp, status) == FALSE) {
+U_CAPI UBool U_EXPORT2
+uregex_requireEnd(const  URegularExpression   *regexp2,
+                         UErrorCode           *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, TRUE, status) == FALSE) {
         return FALSE;
     }
-    UBool result = regexp->fMatcher->matches(startIndex, *status);
-    return result;
+    return regexp->fMatcher->requireEnd();
 }
 
 
-
 //------------------------------------------------------------------------------
 //
-//    uregex_lookingAt
+//    uregex_setTimeLimit
 //
 //------------------------------------------------------------------------------
-U_CAPI UBool U_EXPORT2 
-uregex_lookingAt(URegularExpression *regexp,
-                 int32_t             startIndex,
-                 UErrorCode         *status)  {
-    if (validateRE(regexp, status) == FALSE) {
-        return FALSE;
+U_CAPI void U_EXPORT2
+uregex_setTimeLimit(URegularExpression   *regexp2,
+                    int32_t               limit,
+                    UErrorCode           *status) {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, FALSE, status)) {
+        regexp->fMatcher->setTimeLimit(limit, *status);
     }
-    UBool result = regexp->fMatcher->lookingAt(startIndex, *status);
-    return result;
 }
 
 
 
 //------------------------------------------------------------------------------
 //
-//    uregex_find
+//    uregex_getTimeLimit
 //
 //------------------------------------------------------------------------------
-U_CAPI UBool U_EXPORT2 
-uregex_find(URegularExpression *regexp,
-            int32_t             startIndex, 
-            UErrorCode         *status)  {
-    if (validateRE(regexp, status) == FALSE) {
-        return FALSE;
-    }
-    UBool result = regexp->fMatcher->find(startIndex, *status);
-    return result;
+U_CAPI int32_t U_EXPORT2
+uregex_getTimeLimit(const  URegularExpression   *regexp2,
+                           UErrorCode           *status) {
+    int32_t retVal = 0;
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, FALSE, status)) {
+        retVal = regexp->fMatcher->getTimeLimit();
+    }
+    return retVal;
 }
 
+
+
 //------------------------------------------------------------------------------
 //
-//    uregex_findNext
+//    uregex_setStackLimit
 //
 //------------------------------------------------------------------------------
-U_CAPI UBool U_EXPORT2 
-uregex_findNext(URegularExpression *regexp,
-                UErrorCode         *status)  {
-    if (validateRE(regexp, status) == FALSE) {
-        return FALSE;
+U_CAPI void U_EXPORT2
+uregex_setStackLimit(URegularExpression   *regexp2,
+                     int32_t               limit,
+                     UErrorCode           *status) {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, FALSE, status)) {
+        regexp->fMatcher->setStackLimit(limit, *status);
     }
-    UBool result = regexp->fMatcher->find();
-    return result;
 }
 
+
+
 //------------------------------------------------------------------------------
 //
-//    uregex_groupCount
+//    uregex_getStackLimit
 //
 //------------------------------------------------------------------------------
-U_CAPI int32_t U_EXPORT2 
-uregex_groupCount(URegularExpression *regexp,
-                  UErrorCode         *status)  {
-    if (validateRE(regexp, status, FALSE) == FALSE) {
-        return 0;
-    }
-    int32_t  result = regexp->fMatcher->groupCount();
-    return result;
+U_CAPI int32_t U_EXPORT2
+uregex_getStackLimit(const  URegularExpression   *regexp2,
+                            UErrorCode           *status) {
+    int32_t retVal = 0;
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, FALSE, status)) {
+        retVal = regexp->fMatcher->getStackLimit();
+    }
+    return retVal;
 }
 
 
 //------------------------------------------------------------------------------
 //
-//    uregex_group
+//    uregex_setMatchCallback
 //
 //------------------------------------------------------------------------------
-U_CAPI int32_t U_EXPORT2 
-uregex_group(URegularExpression *regexp,
-             int32_t             groupNum,
-             UChar              *dest,
-             int32_t             destCapacity,
-             UErrorCode          *status)  {
-    if (validateRE(regexp, status) == FALSE) {
-        return 0;
-    }
-    if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) {
-        *status = U_ILLEGAL_ARGUMENT_ERROR;
-        return 0;
-    }
-
-    //
-    // Pick up the range of characters from the matcher
-    //
-    int32_t  startIx = regexp->fMatcher->start(groupNum, *status);
-    int32_t  endIx   = regexp->fMatcher->end  (groupNum, *status);
-    if (U_FAILURE(*status)) {
-        return 0;
-    }
-
-    //
-    // Trim length based on buffer capacity
-    // 
-    int32_t fullLength = endIx - startIx;
-    int32_t copyLength = fullLength;
-    if (copyLength < destCapacity) {
-        dest[copyLength] = 0;
-    } else  if (copyLength == destCapacity) {
-        *status = U_STRING_NOT_TERMINATED_WARNING;
-    } else {
-        copyLength = destCapacity;
-        *status = U_BUFFER_OVERFLOW_ERROR;
-    }
-
-    //
-    // Copy capture group to user's buffer
-    //
-    if (copyLength > 0) {
-        u_memcpy(dest, &regexp->fText[startIx], copyLength);
+U_CAPI void U_EXPORT2
+uregex_setMatchCallback(URegularExpression      *regexp2,
+                        URegexMatchCallback     *callback,
+                        const void              *context,
+                        UErrorCode              *status) {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, FALSE, status)) {
+        regexp->fMatcher->setMatchCallback(callback, context, *status);
     }
-    return fullLength;
 }
 
 
 //------------------------------------------------------------------------------
 //
-//    uregex_start
+//    uregex_getMatchCallback
 //
 //------------------------------------------------------------------------------
-U_CAPI int32_t U_EXPORT2 
-uregex_start(URegularExpression *regexp,
-             int32_t             groupNum,
-             UErrorCode          *status)  {
-    if (validateRE(regexp, status) == FALSE) {
-        return 0;
-    }
-    int32_t result = regexp->fMatcher->start(groupNum, *status);
-    return result;
+U_CAPI void U_EXPORT2
+uregex_getMatchCallback(const URegularExpression    *regexp2,
+                        URegexMatchCallback        **callback,
+                        const void                 **context,
+                        UErrorCode                  *status) {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+     if (validateRE(regexp, FALSE, status)) {
+         regexp->fMatcher->getMatchCallback(*callback, *context, *status);
+     }
 }
 
 
 //------------------------------------------------------------------------------
 //
-//    uregex_end
+//    uregex_setMatchProgressCallback
 //
 //------------------------------------------------------------------------------
-U_CAPI int32_t U_EXPORT2 
-uregex_end(URegularExpression   *regexp,
-           int32_t               groupNum,
-           UErrorCode           *status)  {
-    if (validateRE(regexp, status) == FALSE) {
-        return 0;
+U_CAPI void U_EXPORT2
+uregex_setFindProgressCallback(URegularExpression              *regexp2,
+                                URegexFindProgressCallback      *callback,
+                                const void                      *context,
+                                UErrorCode                      *status) {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, FALSE, status)) {
+        regexp->fMatcher->setFindProgressCallback(callback, context, *status);
     }
-    int32_t result = regexp->fMatcher->end(groupNum, *status);
-    return result;
 }
 
+
 //------------------------------------------------------------------------------
 //
-//    uregex_reset
+//    uregex_getMatchCallback
 //
 //------------------------------------------------------------------------------
-U_CAPI void U_EXPORT2 
-uregex_reset(URegularExpression    *regexp,
-             int32_t               index,
-             UErrorCode            *status)  {
-    if (validateRE(regexp, status) == FALSE) {
-        return;
-    }
-    regexp->fMatcher->reset(index, *status);
+U_CAPI void U_EXPORT2
+uregex_getFindProgressCallback(const URegularExpression          *regexp2,
+                                URegexFindProgressCallback        **callback,
+                                const void                        **context,
+                                UErrorCode                        *status) {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+     if (validateRE(regexp, FALSE, status)) {
+         regexp->fMatcher->getFindProgressCallback(*callback, *context, *status);
+     }
 }
 
 
@@ -484,52 +1147,93 @@ uregex_reset(URegularExpression    *regexp,
 //    uregex_replaceAll
 //
 //------------------------------------------------------------------------------
-U_CAPI int32_t U_EXPORT2 
-uregex_replaceAll(URegularExpression    *regexp,
+U_CAPI int32_t U_EXPORT2
+uregex_replaceAll(URegularExpression    *regexp2,
                   const UChar           *replacementText,
                   int32_t                replacementLength,
                   UChar                 *destBuf,
                   int32_t                destCapacity,
                   UErrorCode            *status)  {
-    if (validateRE(regexp, status) == FALSE) {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, TRUE, status) == FALSE) {
         return 0;
     }
     if (replacementText == NULL || replacementLength < -1 ||
-        destBuf == NULL && destCapacity > 0 ||
+        (destBuf == NULL && destCapacity > 0) ||
         destCapacity < 0) {
         *status = U_ILLEGAL_ARGUMENT_ERROR;
         return 0;
     }
 
     int32_t   len = 0;
-    uregex_reset(regexp, 0, status);
-    while (uregex_findNext(regexp, status)) {
-        len += uregex_appendReplacement(regexp, replacementText, replacementLength, 
+
+    uregex_reset(regexp2, 0, status);
+
+    // Note: Seperate error code variables for findNext() and appendReplacement()
+    //       are used so that destination buffer overflow errors
+    //       in appendReplacement won't stop findNext() from working.
+    //       appendReplacement() and appendTail() special case incoming buffer
+    //       overflow errors, continuing to return the correct length.
+    UErrorCode  findStatus = *status;
+    while (uregex_findNext(regexp2, &findStatus)) {
+        len += uregex_appendReplacement(regexp2, replacementText, replacementLength,
                                         &destBuf, &destCapacity, status);
     }
-    len += uregex_appendTail(regexp, &destBuf, &destCapacity, status);
+    len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
+
+    if (U_FAILURE(findStatus)) {
+        // If anything went wrong with the findNext(), make that error trump
+        //   whatever may have happened with the append() operations.
+        //   Errors in findNext() are not expected.
+        *status = findStatus;
+    }
 
     return len;
 }
 
 
+//------------------------------------------------------------------------------
+//
+//    uregex_replaceAllUText
+//
+//------------------------------------------------------------------------------
+U_CAPI UText * U_EXPORT2
+uregex_replaceAllUText(URegularExpression    *regexp2,
+                       UText                 *replacementText,
+                       UText                 *dest,
+                       UErrorCode            *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, TRUE, status) == FALSE) {
+        return 0;
+    }
+    if (replacementText == NULL) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    dest = regexp->fMatcher->replaceAll(replacementText, dest, *status);
+    return dest;
+}
+
+
 //------------------------------------------------------------------------------
 //
 //    uregex_replaceFirst
 //
 //------------------------------------------------------------------------------
-U_CAPI int32_t U_EXPORT2 
-uregex_replaceFirst(URegularExpression  *regexp,
+U_CAPI int32_t U_EXPORT2
+uregex_replaceFirst(URegularExpression  *regexp2,
                     const UChar         *replacementText,
                     int32_t              replacementLength,
                     UChar               *destBuf,
                     int32_t              destCapacity,
                     UErrorCode          *status)  {
-    if (validateRE(regexp, status) == FALSE) {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, TRUE, status) == FALSE) {
         return 0;
     }
     if (replacementText == NULL || replacementLength < -1 ||
-        destBuf == NULL && destCapacity > 0 ||
+        (destBuf == NULL && destCapacity > 0) ||
         destCapacity < 0) {
         *status = U_ILLEGAL_ARGUMENT_ERROR;
         return 0;
@@ -537,13 +1241,13 @@ uregex_replaceFirst(URegularExpression  *regexp,
 
     int32_t   len = 0;
     UBool     findSucceeded;
-    uregex_reset(regexp, 0, status);
-    findSucceeded = uregex_find(regexp, 0, status);
+    uregex_reset(regexp2, 0, status);
+    findSucceeded = uregex_find(regexp2, 0, status);
     if (findSucceeded) {
-        len = uregex_appendReplacement(regexp, replacementText, replacementLength, 
+        len = uregex_appendReplacement(regexp2, replacementText, replacementLength,
                                        &destBuf, &destCapacity, status);
     }
-    len += uregex_appendTail(regexp, &destBuf, &destCapacity, status);
+    len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
 
     return len;
 }
@@ -551,48 +1255,70 @@ uregex_replaceFirst(URegularExpression  *regexp,
 
 //------------------------------------------------------------------------------
 //
-//    uregex_appendReplacement
+//    uregex_replaceFirstUText
 //
 //------------------------------------------------------------------------------
+U_CAPI UText * U_EXPORT2
+uregex_replaceFirstUText(URegularExpression  *regexp2,
+                         UText                 *replacementText,
+                         UText                 *dest,
+                         UErrorCode            *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, TRUE, status) == FALSE) {
+        return 0;
+    }
+    if (replacementText == NULL) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
 
+    dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status);
+    return dest;
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_appendReplacement
+//
+//------------------------------------------------------------------------------
 
+U_NAMESPACE_BEGIN
 //
 //  Dummy class, because these functions need to be friends of class RegexMatcher,
 //               and stand-alone C functions don't work as friends
 //
-U_NAMESPACE_BEGIN
 class RegexCImpl {
  public:
-   inline static  int32_t appendReplacement(URegularExpression    *regexp,
+   inline static  int32_t appendReplacement(RegularExpression    *regexp,
                       const UChar           *replacementText,
                       int32_t                replacementLength,
                       UChar                **destBuf,
                       int32_t               *destCapacity,
                       UErrorCode            *status);
 
-   inline static int32_t appendTail(URegularExpression    *regexp,
-                  UChar                **destBuf,
-                  int32_t               *destCapacity,
-                  UErrorCode            *status);
+   inline static int32_t appendTail(RegularExpression    *regexp,
+        UChar                **destBuf,
+        int32_t               *destCapacity,
+        UErrorCode            *status);
+
+    inline static int32_t split(RegularExpression    *regexp,
+        UChar                 *destBuf,
+        int32_t                destCapacity,
+        int32_t               *requiredCapacity,
+        UChar                 *destFields[],
+        int32_t                destFieldsCapacity,
+        UErrorCode            *status);
 };
-U_NAMESPACE_END
 
+U_NAMESPACE_END
 
-//
-//  Call-back function for u_unescapeAt(), used when we encounter
-//    \uxxxx or \Uxxxxxxxxx escapes in the replacement text.
-//
-U_CDECL_BEGIN
-static UChar U_CALLCONV
-unescape_charAt(int32_t offset, void *context) {
-    UChar c16 = ((UChar *)context)[offset];
-    return c16;
-}
-U_CDECL_END
 
 
 static const UChar BACKSLASH  = 0x5c;
 static const UChar DOLLARSIGN = 0x24;
+static const UChar LEFTBRACKET = 0x7b;
+static const UChar RIGHTBRACKET = 0x7d;
 
 //
 //  Move a character to an output buffer, with bounds checking on the index.
@@ -610,18 +1336,18 @@ static inline void appendToBuf(UChar c, int32_t *idx, UChar *buf, int32_t bufCap
 //
 //  appendReplacement, the actual implementation.
 //
-int32_t RegexCImpl::appendReplacement(URegularExpression    *regexp,
-                  const UChar           *replacementText,
-                  int32_t                replacementLength,
-                  UChar                **destBuf,
-                  int32_t               *destCapacity,
-                  UErrorCode            *status)  {
+int32_t RegexCImpl::appendReplacement(RegularExpression    *regexp,
+                                      const UChar           *replacementText,
+                                      int32_t                replacementLength,
+                                      UChar                **destBuf,
+                                      int32_t               *destCapacity,
+                                      UErrorCode            *status)  {
 
     // If we come in with a buffer overflow error, don't suppress the operation.
     //  A series of appendReplacements, appendTail need to correctly preflight
     //  the buffer size when an overflow happens somewhere in the middle.
     UBool pendingBufferOverflow = FALSE;
-    if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity == 0) {
+    if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
         pendingBufferOverflow = TRUE;
         *status = U_ZERO_ERROR;
     }
@@ -629,12 +1355,12 @@ int32_t RegexCImpl::appendReplacement(URegularExpression    *regexp,
     //
     // Validate all paramters
     //
-    if (validateRE(regexp, status) == FALSE) {
+    if (validateRE(regexp, TRUE, status) == FALSE) {
         return 0;
     }
     if (replacementText == NULL || replacementLength < -1 ||
-        destCapacity == NULL || destBuf == NULL || 
-        *destBuf == NULL && *destCapacity > 0 ||
+        destCapacity == NULL || destBuf == NULL ||
+        (*destBuf == NULL && *destCapacity > 0) ||
         *destCapacity < 0) {
         *status = U_ILLEGAL_ARGUMENT_ERROR;
         return 0;
@@ -650,7 +1376,7 @@ int32_t RegexCImpl::appendReplacement(URegularExpression    *regexp,
     int32_t   capacity         = *destCapacity;
     int32_t   destIdx          =  0;
     int32_t   i;
-    
+
     // If it wasn't supplied by the caller,  get the length of the replacement text.
     //   TODO:  slightly smarter logic in the copy loop could watch for the NUL on
     //          the fly and avoid this step.
@@ -659,19 +1385,37 @@ int32_t RegexCImpl::appendReplacement(URegularExpression    *regexp,
     }
 
     // Copy input string from the end of previous match to start of current match
-    for (i=m->fLastMatchEnd; i<m->fMatchStart; i++) {
-        appendToBuf(regexp->fText[i], &destIdx, dest, capacity);
+    if (regexp->fText != NULL) {
+        int32_t matchStart;
+        int32_t lastMatchEnd;
+        if (UTEXT_USES_U16(m->fInputText)) {
+            lastMatchEnd = (int32_t)m->fLastMatchEnd;
+            matchStart = (int32_t)m->fMatchStart;
+        } else {
+            // !!!: Would like a better way to do this!
+            UErrorCode tempStatus = U_ZERO_ERROR;
+            lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, NULL, 0, &tempStatus);
+            tempStatus = U_ZERO_ERROR;
+            matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, NULL, 0, &tempStatus);
+        }
+        for (i=lastMatchEnd; i<matchStart; i++) {
+            appendToBuf(regexp->fText[i], &destIdx, dest, capacity);
+        }
+    } else {
+        UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore
+        destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart,
+                                 dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity),
+                                 &possibleOverflowError);
     }
-
-    
+    U_ASSERT(destIdx >= 0);
 
     // scan the replacement text, looking for substitutions ($n) and \escapes.
     int32_t  replIdx = 0;
-    while (replIdx < replacementLength) {
+    while (replIdx < replacementLength && U_SUCCESS(*status)) {
         UChar  c = replacementText[replIdx];
         replIdx++;
         if (c != DOLLARSIGN && c != BACKSLASH) {
-            // Common case, no substitution, no escaping, 
+            // Common case, no substitution, no escaping,
             //  just copy the char to the dest buf.
             appendToBuf(c, &destIdx, dest, capacity);
             continue;
@@ -690,9 +1434,9 @@ int32_t RegexCImpl::appendReplacement(URegularExpression    *regexp,
 
             if (c==0x55/*U*/ || c==0x75/*u*/) {
                 // We have a \udddd or \Udddddddd escape sequence.
-                UChar32 escapedChar = 
-                    u_unescapeAt(unescape_charAt,
-                       &replIdx,                   // Index is updated by unescapeAt 
+                UChar32 escapedChar =
+                    u_unescapeAt(uregex_ucstr_unescape_charAt,
+                       &replIdx,                   // Index is updated by unescapeAt
                        replacementLength,          // Length of replacement text
                        (void *)replacementText);
 
@@ -716,58 +1460,86 @@ int32_t RegexCImpl::appendReplacement(URegularExpression    *regexp,
             continue;
         }
 
+        // We've got a $.  Pick up the following capture group name or number.
+        // For numbers, consume only digits that produce a valid capture group for the pattern.
 
-
-        // We've got a $.  Pick up a capture group number if one follows.
-        // Consume at most the number of digits necessary for the largest capture
-        // number that is valid for this pattern.
-
-        int32_t numDigits = 0;
         int32_t groupNum  = 0;
-        UChar32 digitC;
-        for (;;) {
-            if (replIdx >= replacementLength) {
-                break;
-            }
-            U16_GET(replacementText, 0, replIdx, replacementLength, digitC);
-            if (u_isdigit(digitC) == FALSE) {
-                break;
-            }
+        U_ASSERT(c == DOLLARSIGN);
+        UChar32 c32 = -1;
+        if (replIdx < replacementLength) {
+            U16_GET(replacementText, 0, replIdx, replacementLength, c32);
+        }
+        if (u_isdigit(c32)) {
+            int32_t numDigits = 0;
+            int32_t numCaptureGroups = m->fPattern->fGroupMap->size();
+            for (;;) {
+                if (replIdx >= replacementLength) {
+                    break;
+                }
+                U16_GET(replacementText, 0, replIdx, replacementLength, c32);
+                if (u_isdigit(c32) == FALSE) {
+                    break;
+                }
 
+                int32_t digitVal = u_charDigitValue(c32);
+                if (groupNum * 10 + digitVal <= numCaptureGroups) {
+                    groupNum = groupNum * 10 + digitVal;
+                    U16_FWD_1(replacementText, replIdx, replacementLength);
+                    numDigits++;
+                } else {
+                    if (numDigits == 0) {
+                        *status = U_INDEX_OUTOFBOUNDS_ERROR;
+                    }
+                    break;
+                }
+            }
+        } else if (c32 == LEFTBRACKET) {
+            // Scan for Named Capture Group, ${name}.
+            UnicodeString groupName;
             U16_FWD_1(replacementText, replIdx, replacementLength);
-            groupNum=groupNum*10 + u_charDigitValue(digitC);
-            numDigits++;
-            if (numDigits >= m->fPattern->fMaxCaptureDigits) {
-                break;
+            while (U_SUCCESS(*status) && c32 != RIGHTBRACKET) { 
+                if (replIdx >= replacementLength) {
+                    *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
+                    break;
+                }
+                U16_NEXT(replacementText, replIdx, replacementLength, c32);
+                if ((c32 >= 0x41 && c32 <= 0x5a) ||           // A..Z
+                        (c32 >= 0x61 && c32 <= 0x7a) ||       // a..z
+                        (c32 >= 0x31 && c32 <= 0x39)) {       // 0..9
+                    groupName.append(c32);
+                } else if (c32 == RIGHTBRACKET) {
+                    groupNum = uhash_geti(regexp->fPat->fNamedCaptureMap, &groupName);
+                    if (groupNum == 0) {
+                        // Name not defined by pattern.
+                        *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
+                    }
+                } else {
+                    // Character was something other than a name char or a closing '}'
+                    *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
+                }
             }
+        } else {
+            // $ not followed by {name} or digits.
+            *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
         }
 
 
-        if (numDigits == 0) {
-            // The $ didn't introduce a group number at all.
-            // Treat it as just part of the substitution text.
-            appendToBuf(DOLLARSIGN, &destIdx, dest, capacity);
-            continue;
-        }
-
         // Finally, append the capture group data to the destination.
-        int32_t  capacityRemaining = capacity - destIdx;
-        if (capacityRemaining < 0) {
-            capacityRemaining = 0;
-        }
-        destIdx += uregex_group(regexp, groupNum, dest+destIdx, capacityRemaining, status);
-        if (*status == U_BUFFER_OVERFLOW_ERROR) {
-            // Ignore buffer overflow when extracting the group.  We need to
-            //   continue on to get full size of the untruncated result.  We will
-            //   raise our own buffer overflow error at the end.
-            *status = U_ZERO_ERROR;
+        if (U_SUCCESS(*status)) {
+            destIdx += uregex_group((URegularExpression*)regexp, groupNum,
+                                    dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), status);
+            if (*status == U_BUFFER_OVERFLOW_ERROR) {
+                // Ignore buffer overflow when extracting the group.  We need to
+                //   continue on to get full size of the untruncated result.  We will
+                //   raise our own buffer overflow error at the end.
+                *status = U_ZERO_ERROR;
+            }
         }
 
         if (U_FAILURE(*status)) {
-            // Can fail if group number is out of range.
+            // bad group number or name.
             break;
         }
-
     }
 
     //
@@ -776,12 +1548,14 @@ int32_t RegexCImpl::appendReplacement(URegularExpression    *regexp,
     //
     if (destIdx < capacity) {
         dest[destIdx] = 0;
-    } else if (destIdx == *destCapacity) {
-        *status = U_STRING_NOT_TERMINATED_WARNING;
-    } else {
-        *status = U_BUFFER_OVERFLOW_ERROR;
+    } else if (U_SUCCESS(*status)) {
+        if (destIdx == *destCapacity) {
+            *status = U_STRING_NOT_TERMINATED_WARNING;
+        } else {
+            *status = U_BUFFER_OVERFLOW_ERROR;
+        }
     }
-    
+
     //
     // Return an updated dest buffer and capacity to the caller.
     //
@@ -806,91 +1580,127 @@ int32_t RegexCImpl::appendReplacement(URegularExpression    *regexp,
 }
 
 //
-//   appendReplacement   the acutal API function,
+//   appendReplacement   the actual API function,
 //
-U_CAPI int32_t U_EXPORT2 
-uregex_appendReplacement(URegularExpression    *regexp,
-                  const UChar           *replacementText,
-                  int32_t                replacementLength,
-                  UChar                **destBuf,
-                  int32_t               *destCapacity,
-                  UErrorCode            *status) {
+U_CAPI int32_t U_EXPORT2
+uregex_appendReplacement(URegularExpression    *regexp2,
+                         const UChar           *replacementText,
+                         int32_t                replacementLength,
+                         UChar                **destBuf,
+                         int32_t               *destCapacity,
+                         UErrorCode            *status) {
+
+    RegularExpression *regexp = (RegularExpression*)regexp2;
     return RegexCImpl::appendReplacement(
         regexp, replacementText, replacementLength,destBuf, destCapacity, status);
 }
 
+//
+//   uregex_appendReplacementUText...can just use the normal C++ method
+//
+U_CAPI void U_EXPORT2
+uregex_appendReplacementUText(URegularExpression    *regexp2,
+                              UText                 *replText,
+                              UText                 *dest,
+                              UErrorCode            *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    regexp->fMatcher->appendReplacement(dest, replText, *status);
+}
+
 
 //------------------------------------------------------------------------------
 //
 //    uregex_appendTail
 //
 //------------------------------------------------------------------------------
-int32_t RegexCImpl::appendTail(URegularExpression    *regexp,
-                  UChar                **destBuf,
-                  int32_t               *destCapacity,
-                  UErrorCode            *status)  {
+int32_t RegexCImpl::appendTail(RegularExpression    *regexp,
+                               UChar                **destBuf,
+                               int32_t               *destCapacity,
+                               UErrorCode            *status)
+{
 
     // If we come in with a buffer overflow error, don't suppress the operation.
     //  A series of appendReplacements, appendTail need to correctly preflight
     //  the buffer size when an overflow happens somewhere in the middle.
     UBool pendingBufferOverflow = FALSE;
-    if (*status == U_BUFFER_OVERFLOW_ERROR && *destCapacity == 0) {
+    if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
         pendingBufferOverflow = TRUE;
         *status = U_ZERO_ERROR;
     }
 
-    if (validateRE(regexp, status) == FALSE) {
+    if (validateRE(regexp, TRUE, status) == FALSE) {
         return 0;
     }
-    if (destCapacity == NULL || destBuf == NULL || 
-        *destBuf == NULL && *destCapacity > 0 ||
-        *destCapacity < 0) {
+
+    if (destCapacity == NULL || destBuf == NULL ||
+        (*destBuf == NULL && *destCapacity > 0) ||
+        *destCapacity < 0)
+    {
         *status = U_ILLEGAL_ARGUMENT_ERROR;
         return 0;
     }
-    
-    RegexMatcher *m = regexp->fMatcher;
 
-    int32_t  srcIdx;
-    if (m->fMatch) {
-        // The most recent call to find() succeeded.  
-        srcIdx = m->fMatchEnd;
-    } else {
-        // The last call to find() on this matcher failed().
-        //   Look back to the end of the last find() that succeeded for src index.
-        srcIdx = m->fLastMatchEnd;
-        if (srcIdx == -1)  {
-            // There has been no successful match with this matcher.
-            //   We want to copy the whole string.
-            srcIdx = 0;
-        }
-    }
+    RegexMatcher *m = regexp->fMatcher;
 
     int32_t  destIdx     = 0;
     int32_t  destCap     = *destCapacity;
     UChar    *dest       = *destBuf;
 
-    for (;;) {
-        if (srcIdx == regexp->fTextLength) {
-            break;
+    if (regexp->fText != NULL) {
+        int32_t srcIdx;
+        int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd);
+        if (nativeIdx == -1) {
+            srcIdx = 0;
+        } else if (UTEXT_USES_U16(m->fInputText)) {
+            srcIdx = (int32_t)nativeIdx;
+        } else {
+            UErrorCode newStatus = U_ZERO_ERROR;
+            srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &newStatus);
         }
-        UChar c = regexp->fText[srcIdx];
-        if (c == 0 && regexp->fTextLength == -1) {
-            break;
+
+        for (;;) {
+            U_ASSERT(destIdx >= 0);
+
+            if (srcIdx == regexp->fTextLength) {
+                break;
+            }
+            UChar c = regexp->fText[srcIdx];
+            if (c == 0 && regexp->fTextLength == -1) {
+                regexp->fTextLength = srcIdx;
+                break;
+            }
+
+            if (destIdx < destCap) {
+                dest[destIdx] = c;
+            } else {
+                // We've overflowed the dest buffer.
+                //  If the total input string length is known, we can
+                //    compute the total buffer size needed without scanning through the string.
+                if (regexp->fTextLength > 0) {
+                    destIdx += (regexp->fTextLength - srcIdx);
+                    break;
+                }
+            }
+            srcIdx++;
+            destIdx++;
         }
-        if (destIdx < destCap) {
-            dest[destIdx] = c;
+    } else {
+        int64_t  srcIdx;
+        if (m->fMatch) {
+            // The most recent call to find() succeeded.
+            srcIdx = m->fMatchEnd;
         } else {
-            // We've overflowed the dest buffer.
-            //  If the total input string length is known, we can
-            //    compute the total buffer size needed without scanning through the string.
-            if (regexp->fTextLength > 0) {
-                destIdx += (regexp->fTextLength - srcIdx);
-                break;
+            // The last call to find() on this matcher failed().
+            //   Look back to the end of the last find() that succeeded for src index.
+            srcIdx = m->fLastMatchEnd;
+            if (srcIdx == -1)  {
+                // There has been no successful match with this matcher.
+                //   We want to copy the whole string.
+                srcIdx = 0;
             }
         }
-        srcIdx++;
-        destIdx++;
+
+        destIdx = utext_extract(m->fInputText, srcIdx, m->fInputLength, dest, destCap, status);
     }
 
     //
@@ -912,7 +1722,7 @@ int32_t RegexCImpl::appendTail(URegularExpression    *regexp,
     if (destIdx < destCap) {
         *destBuf      += destIdx;
         *destCapacity -= destIdx;
-    } else {
+    } else if (*destBuf != NULL) {
         *destBuf      += destCap;
         *destCapacity  = 0;
     }
@@ -925,15 +1735,31 @@ int32_t RegexCImpl::appendTail(URegularExpression    *regexp,
 }
 
 
-U_CAPI int32_t U_EXPORT2 
-uregex_appendTail(URegularExpression    *regexp,
+//
+//   appendTail   the actual API function
+//
+U_CAPI int32_t U_EXPORT2
+uregex_appendTail(URegularExpression    *regexp2,
                   UChar                **destBuf,
                   int32_t               *destCapacity,
                   UErrorCode            *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
     return RegexCImpl::appendTail(regexp, destBuf, destCapacity, status);
 }
 
 
+//
+//   uregex_appendTailUText...can just use the normal C++ method
+//
+U_CAPI UText * U_EXPORT2
+uregex_appendTailUText(URegularExpression    *regexp2,
+                       UText                 *dest,
+                       UErrorCode            *status)  {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    return regexp->fMatcher->appendTail(dest, *status);
+}
+
+
 //------------------------------------------------------------------------------
 //
 //    copyString     Internal utility to copy a string to an output buffer,
@@ -942,6 +1768,7 @@ uregex_appendTail(URegularExpression    *regexp,
 //                   and the NUL is counted in the output size.
 //
 //------------------------------------------------------------------------------
+#if 0
 static void copyString(UChar        *destBuffer,    //  Destination buffer.
                        int32_t       destCapacity,  //  Total capacity of dest buffer
                        int32_t      *destIndex,     //  Index into dest buffer.  Updated on return.
@@ -969,82 +1796,75 @@ static void copyString(UChar        *destBuffer,    //  Destination buffer.
     di++;
     *destIndex = di;
 }
-
+#endif
 
 //------------------------------------------------------------------------------
 //
 //    uregex_split
 //
 //------------------------------------------------------------------------------
-U_CAPI int32_t U_EXPORT2 
-uregex_split(   URegularExpression      *regexp,
-                  UChar                 *destBuf,
-                  int32_t                destCapacity,
-                  int32_t               *requiredCapacity,
-                  UChar                 *destFields[],
-                  int32_t                destFieldsCapacity,
-                  UErrorCode            *status) {
-    if (validateRE(regexp, status) == FALSE) {
-        return 0;
-    }
-    if (destBuf == NULL && destCapacity > 0 ||
-        destCapacity < 0 ||
-        destFields == NULL ||
-        destFieldsCapacity < 1 ) {
-        *status = U_ILLEGAL_ARGUMENT_ERROR;
-        return 0;
-    }
-
+int32_t RegexCImpl::split(RegularExpression     *regexp,
+                          UChar                 *destBuf,
+                          int32_t                destCapacity,
+                          int32_t               *requiredCapacity,
+                          UChar                 *destFields[],
+                          int32_t                destFieldsCapacity,
+                          UErrorCode            *status) {
     //
     // Reset for the input text
     //
     regexp->fMatcher->reset();
-    int32_t   inputLen = regexp->fTextString.length();
-    int32_t   nextOutputStringStart = 0;
+    UText *inputText = regexp->fMatcher->fInputText;
+    int64_t   nextOutputStringStart = 0;
+    int64_t   inputLen = regexp->fMatcher->fInputLength;
     if (inputLen == 0) {
         return 0;
     }
 
-
     //
     // Loop through the input text, searching for the delimiter pattern
     //
     int32_t   i;             // Index of the field being processed.
     int32_t   destIdx = 0;   // Next available position in destBuf;
     int32_t   numCaptureGroups = regexp->fMatcher->groupCount();
+    UErrorCode  tStatus = U_ZERO_ERROR;   // Want to ignore any buffer overflow errors so that the strings are still counted
     for (i=0; ; i++) {
         if (i>=destFieldsCapacity-1) {
-            // There are one or zero output string left.
+            // There are one or zero output strings left.
             // Fill the last output string with whatever is left from the input, then exit the loop.
             //  ( i will be == destFieldsCapacity if we filled the output array while processing
             //    capture groups of the delimiter expression, in which case we will discard the
             //    last capture group saved in favor of the unprocessed remainder of the
             //    input string.)
-            int32_t remainingLength = inputLen-nextOutputStringStart;
-            if (remainingLength > 0) {
-            }
-            if (i >= destFieldsCapacity) {
-                // No fields are left.  Recycle the last one for holding the trailing part of
-                //   the input string.
-                i = destFieldsCapacity-1;
-                destIdx = (int32_t)(destFields[i] - destFields[0]);
+            if (inputLen > nextOutputStringStart) {
+                if (i != destFieldsCapacity-1) {
+                    // No fields are left.  Recycle the last one for holding the trailing part of
+                    //   the input string.
+                    i = destFieldsCapacity-1;
+                    destIdx = (int32_t)(destFields[i] - destFields[0]);
+                }
+
+                destFields[i] = &destBuf[destIdx];
+                destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
+                                             &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
             }
-            
-            destFields[i] = &destBuf[destIdx];
-            copyString(destBuf, destCapacity, &destIdx, 
-                &regexp->fText[nextOutputStringStart], remainingLength);
             break;
         }
-        
+
         if (regexp->fMatcher->find()) {
             // We found another delimiter.  Move everything from where we started looking
             //  up until the start of the delimiter into the next output string.
-            int32_t fieldLen = regexp->fMatcher->start(*status) - nextOutputStringStart;
             destFields[i] = &destBuf[destIdx];
-            copyString(destBuf, destCapacity, &destIdx, 
-                &regexp->fText[nextOutputStringStart], fieldLen);
-            nextOutputStringStart =  regexp->fMatcher->end(*status);
-            
+
+            destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart,
+                                         &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus);
+            if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
+                tStatus = U_ZERO_ERROR;
+            } else {
+                *status = tStatus;
+            }
+            nextOutputStringStart = regexp->fMatcher->fMatchEnd;
+
             // If the delimiter pattern has capturing parentheses, the captured
             //  text goes out into the next n destination strings.
             int32_t groupNum;
@@ -1054,22 +1874,37 @@ uregex_split(   URegularExpression      *regexp,
                     break;
                 }
                 i++;
-                
+
                 // Set up to extract the capture group contents into the dest buffer.
-                UErrorCode  tStatus = U_ZERO_ERROR;   // Want to ignore any buffer overflow
-                                                      //  error while extracting this group.
-                int32_t remainingCapacity = destCapacity - destIdx;
-                if (remainingCapacity < 0) {
-                    remainingCapacity = 0;
-                }
                 destFields[i] = &destBuf[destIdx];
-                int32_t t = uregex_group(regexp, groupNum, destFields[i], remainingCapacity, &tStatus);
+                tStatus = U_ZERO_ERROR;
+                int32_t t = uregex_group((URegularExpression*)regexp,
+                                         groupNum,
+                                         destFields[i],
+                                         REMAINING_CAPACITY(destIdx, destCapacity),
+                                         &tStatus);
                 destIdx += t + 1;    // Record the space used in the output string buffer.
                                      //  +1 for the NUL that terminates the string.
+                if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
+                    tStatus = U_ZERO_ERROR;
+                } else {
+                    *status = tStatus;
+                }
             }
 
             if (nextOutputStringStart == inputLen) {
-                // The delimiter was at the end of the string.  We're done.
+                // The delimiter was at the end of the string.
+                // Output an empty string, and then we are done.
+                if (destIdx < destCapacity) {
+                    destBuf[destIdx] = 0;
+                }
+                if (i < destFieldsCapacity-1) {
+                   ++i;
+                }
+                if (destIdx < destCapacity) {
+                    destFields[i] = destBuf + destIdx;
+                }
+                ++destIdx;
                 break;
             }
 
@@ -1079,8 +1914,8 @@ uregex_split(   URegularExpression      *regexp,
             // We ran off the end of the input while looking for the next delimiter.
             // All the remaining text goes into the current output string.
             destFields[i] = &destBuf[destIdx];
-            copyString(destBuf, destCapacity, &destIdx, 
-                         &regexp->fText[nextOutputStringStart], inputLen-nextOutputStringStart);
+            destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
+                                         &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
             break;
         }
     }
@@ -1100,6 +1935,45 @@ uregex_split(   URegularExpression      *regexp,
     return i+1;
 }
 
+//
+//   uregex_split   The actual API function
+//
+U_CAPI int32_t U_EXPORT2
+uregex_split(URegularExpression      *regexp2,
+             UChar                   *destBuf,
+             int32_t                  destCapacity,
+             int32_t                 *requiredCapacity,
+             UChar                   *destFields[],
+             int32_t                  destFieldsCapacity,
+             UErrorCode              *status) {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    if (validateRE(regexp, TRUE, status) == FALSE) {
+        return 0;
+    }
+    if ((destBuf == NULL && destCapacity > 0) ||
+        destCapacity < 0 ||
+        destFields == NULL ||
+        destFieldsCapacity < 1 ) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status);
+}
+
+
+//
+//   uregex_splitUText...can just use the normal C++ method
+//
+U_CAPI int32_t U_EXPORT2
+uregex_splitUText(URegularExpression    *regexp2,
+                  UText                 *destFields[],
+                  int32_t                destFieldsCapacity,
+                  UErrorCode            *status) {
+    RegularExpression *regexp = (RegularExpression*)regexp2;
+    return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, destFieldsCapacity, *status);
+}
+
 
 #endif   // !UCONFIG_NO_REGULAR_EXPRESSIONS