]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/i18n/uregex.cpp
ICU-400.39.tar.gz
[apple/icu.git] / icuSources / i18n / uregex.cpp
index c249d676dd3b390f949f64c60046f1cfe128e454..48a1f3ddec5e00b8a82bcd3d020b41f0f0d15398 100644 (file)
@@ -1,6 +1,6 @@
 /*
 *******************************************************************************
-*   Copyright (C) 1996-2004, International Business Machines
+*   Copyright (C) 2004-2008, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *******************************************************************************
 *   file name:  regex.cpp
@@ -20,6 +20,8 @@
 #include "uassert.h"
 #include "cmemory.h"
 
+U_NAMESPACE_USE
+
 struct URegularExpression: public UMemory {
 public:
     URegularExpression();
@@ -40,8 +42,6 @@ public:
 
 static const int32_t REXP_MAGIC = 0x72657870; // "rexp" in ASCII
 
-U_NAMESPACE_USE
-
 URegularExpression::URegularExpression() {
     fMagic        = REXP_MAGIC;
     fPat          = NULL;
@@ -74,7 +74,6 @@ static UBool validateRE(const URegularExpression *re, UErrorCode *status, UBool
         return FALSE;
     }
     if (re == NULL || re->fMagic != REXP_MAGIC) {
-        // U_ASSERT(FALSE);
         *status = U_ILLEGAL_ARGUMENT_ERROR;
         return FALSE;
     }
@@ -159,32 +158,6 @@ ErrorExit:
 
 }
 
-
-
-
-//----------------------------------------------------------------------------------------
-//
-//    uregex_openC
-//
-//----------------------------------------------------------------------------------------
-U_CAPI URegularExpression * U_EXPORT2
-uregex_openC( const char           *pattern,
-                    uint32_t        flags,
-                    UParseError    *pe,
-                    UErrorCode     *status) {
-    if (U_FAILURE(*status)) {
-        return NULL;
-    }
-    if (pattern == NULL) {
-        *status = U_ILLEGAL_ARGUMENT_ERROR;
-        return NULL;
-    }
-
-    UnicodeString patString(pattern);
-    URegularExpression *re = uregex_open(patString.getBuffer(), patString.length(), flags, pe, status);
-    return re;
-}
-
 //----------------------------------------------------------------------------------------
 //
 //    uregex_close
@@ -222,10 +195,6 @@ uregex_clone(const URegularExpression *source, UErrorCode *status)  {
         delete clone;
         return NULL;
     }
-    if (clone == NULL) {
-        *status = U_MEMORY_ALLOCATION_ERROR;
-        return NULL;
-    }
 
     clone->fPat          = source->fPat;
     clone->fPatRefCount  = source->fPatRefCount; 
@@ -235,16 +204,16 @@ uregex_clone(const URegularExpression *source, UErrorCode *status)  {
     // Note:  fText is not cloned.
 
     return clone;
-};
+}
 
 
 
 
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //    uregex_pattern
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 U_CAPI const UChar * U_EXPORT2 
 uregex_pattern(const  URegularExpression *regexp,
                int32_t            *patLength,
@@ -257,14 +226,14 @@ uregex_pattern(const  URegularExpression *regexp,
         *patLength = regexp->fPatStringLen;
     }
     return regexp->fPatString;
-};
+}
 
 
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //    uregex_flags
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 U_CAPI int32_t U_EXPORT2 
 uregex_flags(const URegularExpression *regexp, UErrorCode *status)  {
     if (validateRE(regexp, status, FALSE) == FALSE) {
@@ -272,14 +241,14 @@ uregex_flags(const URegularExpression *regexp, UErrorCode *status)  {
     }
     int32_t flags = regexp->fPat->flags();
     return flags;
-};
+}
 
 
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //    uregex_setText
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 U_CAPI void U_EXPORT2 
 uregex_setText(URegularExpression *regexp,
                const UChar        *text,
@@ -298,15 +267,15 @@ uregex_setText(URegularExpression *regexp,
 
     regexp->fTextString.setTo(isTerminated, text, textLength);
     regexp->fMatcher->reset(regexp->fTextString);
-};
+}
 
 
 
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //    uregex_getText
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 U_CAPI const UChar * U_EXPORT2 
 uregex_getText(URegularExpression *regexp,
                int32_t            *textLength,
@@ -318,66 +287,82 @@ uregex_getText(URegularExpression *regexp,
         *textLength = regexp->fTextLength;
     }
     return regexp->fText;
-};
+}
 
 
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //    uregex_matches
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 U_CAPI UBool U_EXPORT2 
 uregex_matches(URegularExpression *regexp,
                 int32_t            startIndex,
                 UErrorCode        *status)  {
+    UBool result = FALSE;
     if (validateRE(regexp, status) == FALSE) {
-        return FALSE;
+        return result;
+    }
+    if (startIndex == -1) {
+        result = regexp->fMatcher->matches(*status);
+    } else {
+        result = regexp->fMatcher->matches(startIndex, *status);
     }
-    UBool result = regexp->fMatcher->matches(startIndex, *status);
     return result;
-};
+}
 
 
 
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //    uregex_lookingAt
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 U_CAPI UBool U_EXPORT2 
 uregex_lookingAt(URegularExpression *regexp,
                  int32_t             startIndex,
                  UErrorCode         *status)  {
+    UBool result = FALSE;
     if (validateRE(regexp, status) == FALSE) {
-        return FALSE;
+        return result;
+    }
+    if (startIndex == -1) {
+        result = regexp->fMatcher->lookingAt(*status);
+    } else {
+        result = regexp->fMatcher->lookingAt(startIndex, *status);
     }
-    UBool result = regexp->fMatcher->lookingAt(startIndex, *status);
     return result;
-};
+}
 
 
 
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //    uregex_find
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 U_CAPI UBool U_EXPORT2 
 uregex_find(URegularExpression *regexp,
             int32_t             startIndex, 
             UErrorCode         *status)  {
+    UBool result = FALSE;
     if (validateRE(regexp, status) == FALSE) {
-        return FALSE;
+        return result;
+    }
+    if (startIndex == -1) {
+        regexp->fMatcher->resetPreserveRegion();
+        result = regexp->fMatcher->find();
+    } else {
+        result = regexp->fMatcher->find(startIndex, *status);
     }
-    UBool result = regexp->fMatcher->find(startIndex, *status);
     return result;
-};
+}
 
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //    uregex_findNext
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 U_CAPI UBool U_EXPORT2 
 uregex_findNext(URegularExpression *regexp,
                 UErrorCode         *status)  {
@@ -386,13 +371,13 @@ uregex_findNext(URegularExpression *regexp,
     }
     UBool result = regexp->fMatcher->find();
     return result;
-};
+}
 
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //    uregex_groupCount
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 U_CAPI int32_t U_EXPORT2 
 uregex_groupCount(URegularExpression *regexp,
                   UErrorCode         *status)  {
@@ -401,14 +386,14 @@ uregex_groupCount(URegularExpression *regexp,
     }
     int32_t  result = regexp->fMatcher->groupCount();
     return result;
-};
+}
 
 
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //    uregex_group
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 U_CAPI int32_t U_EXPORT2 
 uregex_group(URegularExpression *regexp,
              int32_t             groupNum,
@@ -453,14 +438,14 @@ uregex_group(URegularExpression *regexp,
         u_memcpy(dest, &regexp->fText[startIx], copyLength);
     }
     return fullLength;
-};
+}
 
 
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //    uregex_start
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 U_CAPI int32_t U_EXPORT2 
 uregex_start(URegularExpression *regexp,
              int32_t             groupNum,
@@ -470,14 +455,14 @@ uregex_start(URegularExpression *regexp,
     }
     int32_t result = regexp->fMatcher->start(groupNum, *status);
     return result;
-};
+}
 
 
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //    uregex_end
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 U_CAPI int32_t U_EXPORT2 
 uregex_end(URegularExpression   *regexp,
            int32_t               groupNum,
@@ -487,13 +472,13 @@ uregex_end(URegularExpression   *regexp,
     }
     int32_t result = regexp->fMatcher->end(groupNum, *status);
     return result;
-};
+}
 
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //    uregex_reset
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 U_CAPI void U_EXPORT2 
 uregex_reset(URegularExpression    *regexp,
              int32_t               index,
@@ -502,17 +487,253 @@ uregex_reset(URegularExpression    *regexp,
         return;
     }
     regexp->fMatcher->reset(index, *status);
-};
+}
 
 
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+//
+//    uregex_setRegion
+//
+//------------------------------------------------------------------------------
+U_CAPI void U_EXPORT2 
+uregex_setRegion(URegularExpression   *regexp,
+                 int32_t               regionStart,
+                 int32_t               regionLimit,
+                 UErrorCode           *status)  {
+    if (validateRE(regexp, status) == FALSE) {
+        return;
+    }
+    regexp->fMatcher->region(regionStart, regionLimit, *status);
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_regionStart
+//
+//------------------------------------------------------------------------------
+U_CAPI int32_t U_EXPORT2 
+uregex_regionStart(const  URegularExpression   *regexp,
+                          UErrorCode           *status)  {
+    if (validateRE(regexp, status) == FALSE) {
+        return 0;
+    }
+    return regexp->fMatcher->regionStart();
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_regionEnd
+//
+//------------------------------------------------------------------------------
+U_CAPI int32_t U_EXPORT2 
+uregex_regionEnd(const  URegularExpression   *regexp,
+                        UErrorCode           *status)  {
+    if (validateRE(regexp, status) == FALSE) {
+        return 0;
+    }
+    return regexp->fMatcher->regionEnd();
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_hasTransparentBounds
+//
+//------------------------------------------------------------------------------
+U_CAPI UBool U_EXPORT2 
+uregex_hasTransparentBounds(const  URegularExpression   *regexp,
+                                   UErrorCode           *status)  {
+    if (validateRE(regexp, status) == FALSE) {
+        return FALSE;
+    }
+    return regexp->fMatcher->hasTransparentBounds();
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_useTransparentBounds
+//
+//------------------------------------------------------------------------------
+U_CAPI void U_EXPORT2 
+uregex_useTransparentBounds(URegularExpression    *regexp,
+             UBool                 b,
+             UErrorCode            *status)  {
+    if (validateRE(regexp, status) == FALSE) {
+        return;
+    }
+    regexp->fMatcher->useTransparentBounds(b);
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_hasAnchoringBounds
+//
+//------------------------------------------------------------------------------
+U_CAPI UBool U_EXPORT2 
+uregex_hasAnchoringBounds(const  URegularExpression   *regexp,
+                                   UErrorCode           *status)  {
+    if (validateRE(regexp, status) == FALSE) {
+        return FALSE;
+    }
+    return regexp->fMatcher->hasAnchoringBounds();
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_useAnchoringBounds
+//
+//------------------------------------------------------------------------------
+U_CAPI void U_EXPORT2 
+uregex_useAnchoringBounds(URegularExpression    *regexp,
+             UBool                 b,
+             UErrorCode            *status)  {
+    if (validateRE(regexp, status) == FALSE) {
+        return;
+    }
+    regexp->fMatcher->useAnchoringBounds(b);
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_hitEnd
+//
+//------------------------------------------------------------------------------
+U_CAPI UBool U_EXPORT2 
+uregex_hitEnd(const  URegularExpression   *regexp,
+                     UErrorCode           *status)  {
+    if (validateRE(regexp, status) == FALSE) {
+        return FALSE;
+    }
+    return regexp->fMatcher->hitEnd();
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_requireEnd
+//
+//------------------------------------------------------------------------------
+U_CAPI UBool U_EXPORT2 
+uregex_requireEnd(const  URegularExpression   *regexp,
+                         UErrorCode           *status)  {
+    if (validateRE(regexp, status) == FALSE) {
+        return FALSE;
+    }
+    return regexp->fMatcher->requireEnd();
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_setTimeLimit
+//
+//------------------------------------------------------------------------------
+U_CAPI void U_EXPORT2 
+uregex_setTimeLimit(URegularExpression   *regexp,
+                    int32_t               limit,
+                    UErrorCode           *status) {
+    if (validateRE(regexp, status)) {
+        regexp->fMatcher->setTimeLimit(limit, *status);
+    }
+}
+
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_getTimeLimit
+//
+//------------------------------------------------------------------------------
+U_CAPI int32_t U_EXPORT2 
+uregex_getTimeLimit(const  URegularExpression   *regexp,
+                           UErrorCode           *status) {
+    int32_t retVal = 0;
+    if (validateRE(regexp, status)) {
+        retVal = regexp->fMatcher->getTimeLimit();
+    }
+    return retVal;
+}
+
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_setStackLimit
+//
+//------------------------------------------------------------------------------
+U_CAPI void U_EXPORT2 
+uregex_setStackLimit(URegularExpression   *regexp,
+                    int32_t               limit,
+                    UErrorCode           *status) {
+    if (validateRE(regexp, status)) {
+        regexp->fMatcher->setStackLimit(limit, *status);
+    }
+}
+
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_getStackLimit
+//
+//------------------------------------------------------------------------------
+U_CAPI int32_t U_EXPORT2 
+uregex_getStackLimit(const  URegularExpression   *regexp,
+                           UErrorCode           *status) {
+    int32_t retVal = 0;
+    if (validateRE(regexp, status)) {
+        retVal = regexp->fMatcher->getStackLimit();
+    }
+    return retVal;
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_setMatchCallback
+//
+//------------------------------------------------------------------------------
+U_CAPI void U_EXPORT2
+uregex_setMatchCallback(URegularExpression      *regexp,
+                        URegexMatchCallback     *callback,
+                        const void              *context,
+                        UErrorCode              *status) {
+    if (validateRE(regexp, status)) {
+      regexp->fMatcher->setMatchCallback(callback, context, *status);
+    }
+}
+
+
+//------------------------------------------------------------------------------
+//
+//    uregex_getMatchCallback
+//
+//------------------------------------------------------------------------------
+U_CAPI void U_EXPORT2 
+uregex_getMatchCallback(const URegularExpression    *regexp,
+                        URegexMatchCallback        **callback,
+                        const void                 **context,
+                        UErrorCode                  *status) {
+     if (validateRE(regexp, status)) {
+         regexp->fMatcher->getMatchCallback(*callback, *context, *status);
+     }
+}
+
+
+//------------------------------------------------------------------------------
 //
 //    uregex_replaceAll
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 U_CAPI int32_t U_EXPORT2 
 uregex_replaceAll(URegularExpression    *regexp,
-                  UChar                 *replacementText,
+                  const UChar           *replacementText,
                   int32_t                replacementLength,
                   UChar                 *destBuf,
                   int32_t                destCapacity,
@@ -536,17 +757,17 @@ uregex_replaceAll(URegularExpression    *regexp,
     len += uregex_appendTail(regexp, &destBuf, &destCapacity, status);
 
     return len;
-};
+}
 
 
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //    uregex_replaceFirst
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 U_CAPI int32_t U_EXPORT2 
 uregex_replaceFirst(URegularExpression  *regexp,
-                    UChar               *replacementText,
+                    const UChar         *replacementText,
                     int32_t              replacementLength,
                     UChar               *destBuf,
                     int32_t              destCapacity,
@@ -572,14 +793,14 @@ uregex_replaceFirst(URegularExpression  *regexp,
     len += uregex_appendTail(regexp, &destBuf, &destCapacity, status);
 
     return len;
-};
+}
 
 
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //    uregex_appendReplacement
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 
 
 //
@@ -590,7 +811,7 @@ U_NAMESPACE_BEGIN
 class RegexCImpl {
  public:
    inline static  int32_t appendReplacement(URegularExpression    *regexp,
-                      UChar                 *replacementText,
+                      const UChar           *replacementText,
                       int32_t                replacementLength,
                       UChar                **destBuf,
                       int32_t               *destCapacity,
@@ -637,7 +858,7 @@ static inline void appendToBuf(UChar c, int32_t *idx, UChar *buf, int32_t bufCap
 //  appendReplacement, the actual implementation.
 //
 int32_t RegexCImpl::appendReplacement(URegularExpression    *regexp,
-                  UChar                 *replacementText,
+                  const UChar           *replacementText,
                   int32_t                replacementLength,
                   UChar                **destBuf,
                   int32_t               *destCapacity,
@@ -720,7 +941,7 @@ int32_t RegexCImpl::appendReplacement(URegularExpression    *regexp,
                     u_unescapeAt(unescape_charAt,
                        &replIdx,                   // Index is updated by unescapeAt 
                        replacementLength,          // Length of replacement text
-                       replacementText);
+                       (void *)replacementText);
 
                 if (escapedChar != (UChar32)0xFFFFFFFF) {
                     if (escapedChar <= 0xffff) {
@@ -836,7 +1057,7 @@ int32_t RegexCImpl::appendReplacement(URegularExpression    *regexp,
 //
 U_CAPI int32_t U_EXPORT2 
 uregex_appendReplacement(URegularExpression    *regexp,
-                  UChar                 *replacementText,
+                  const UChar           *replacementText,
                   int32_t                replacementLength,
                   UChar                **destBuf,
                   int32_t               *destCapacity,
@@ -846,16 +1067,25 @@ uregex_appendReplacement(URegularExpression    *regexp,
 }
 
 
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //    uregex_appendTail
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 int32_t RegexCImpl::appendTail(URegularExpression    *regexp,
                   UChar                **destBuf,
                   int32_t               *destCapacity,
-                  UErrorCode            *status)  {
+                  UErrorCode            *status)
+{
 
+    if (destCapacity == NULL || destBuf == NULL || 
+        *destBuf == NULL && *destCapacity > 0 ||
+        *destCapacity < 0)
+    {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+    
     // If we come in with a buffer overflow error, don't suppress the operation.
     //  A series of appendReplacements, appendTail need to correctly preflight
     //  the buffer size when an overflow happens somewhere in the middle.
@@ -868,13 +1098,6 @@ int32_t RegexCImpl::appendTail(URegularExpression    *regexp,
     if (validateRE(regexp, status) == FALSE) {
         return 0;
     }
-    if (destCapacity == NULL || destBuf == NULL || 
-        *destBuf == NULL && *destCapacity > 0 ||
-        *destCapacity < 0) {
-        *status = U_ILLEGAL_ARGUMENT_ERROR;
-        return 0;
-    }
-    
     RegexMatcher *m = regexp->fMatcher;
 
     int32_t  srcIdx;
@@ -948,7 +1171,7 @@ int32_t RegexCImpl::appendTail(URegularExpression    *regexp,
     }
 
     return destIdx;
-};
+}
 
 
 U_CAPI int32_t U_EXPORT2 
@@ -960,14 +1183,14 @@ uregex_appendTail(URegularExpression    *regexp,
 }
 
 
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //    copyString     Internal utility to copy a string to an output buffer,
 //                   while managing buffer overflow and preflight size
 //                   computation.  NUL termination is added to destination,
 //                   and the NUL is counted in the output size.
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 static void copyString(UChar        *destBuffer,    //  Destination buffer.
                        int32_t       destCapacity,  //  Total capacity of dest buffer
                        int32_t      *destIndex,     //  Index into dest buffer.  Updated on return.
@@ -989,16 +1212,19 @@ static void copyString(UChar        *destBuffer,    //  Destination buffer.
             break;
         }
     }
-    destBuffer[di++] = 0;
+    if (di<destCapacity) {
+        destBuffer[di] = 0;
+    }
+    di++;
     *destIndex = di;
 }
 
 
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //    uregex_split
 //
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 U_CAPI int32_t U_EXPORT2 
 uregex_split(   URegularExpression      *regexp,
                   UChar                 *destBuf,
@@ -1117,15 +1343,12 @@ uregex_split(   URegularExpression      *regexp,
     if (requiredCapacity != NULL) {
         *requiredCapacity = destIdx;
     }
-    if (*requiredCapacity > destCapacity) {
+    if (destIdx > destCapacity) {
         *status = U_BUFFER_OVERFLOW_ERROR;
     }
     return i+1;
 }
 
 
-
-
-
-
 #endif   // !UCONFIG_NO_REGULAR_EXPRESSIONS
+