/*
*******************************************************************************
-* Copyright (C) 1996-2004, International Business Machines
+* Copyright (C) 2004-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: regex.cpp
#include "uassert.h"
#include "cmemory.h"
+U_NAMESPACE_USE
+
struct URegularExpression: public UMemory {
public:
URegularExpression();
static const int32_t REXP_MAGIC = 0x72657870; // "rexp" in ASCII
-U_NAMESPACE_USE
-
URegularExpression::URegularExpression() {
fMagic = REXP_MAGIC;
fPat = NULL;
return FALSE;
}
if (re == NULL || re->fMagic != REXP_MAGIC) {
- // U_ASSERT(FALSE);
*status = U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
}
-
-
-
-//----------------------------------------------------------------------------------------
-//
-// uregex_openC
-//
-//----------------------------------------------------------------------------------------
-U_CAPI URegularExpression * U_EXPORT2
-uregex_openC( const char *pattern,
- uint32_t flags,
- UParseError *pe,
- UErrorCode *status) {
- if (U_FAILURE(*status)) {
- return NULL;
- }
- if (pattern == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
-
- UnicodeString patString(pattern);
- URegularExpression *re = uregex_open(patString.getBuffer(), patString.length(), flags, pe, status);
- return re;
-}
-
//----------------------------------------------------------------------------------------
//
// uregex_close
delete clone;
return NULL;
}
- if (clone == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
clone->fPat = source->fPat;
clone->fPatRefCount = source->fPatRefCount;
// Note: fText is not cloned.
return clone;
-};
+}
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
//
// uregex_pattern
//
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
U_CAPI const UChar * U_EXPORT2
uregex_pattern(const URegularExpression *regexp,
int32_t *patLength,
*patLength = regexp->fPatStringLen;
}
return regexp->fPatString;
-};
+}
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
//
// uregex_flags
//
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
U_CAPI int32_t U_EXPORT2
uregex_flags(const URegularExpression *regexp, UErrorCode *status) {
if (validateRE(regexp, status, FALSE) == FALSE) {
}
int32_t flags = regexp->fPat->flags();
return flags;
-};
+}
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
//
// uregex_setText
//
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
U_CAPI void U_EXPORT2
uregex_setText(URegularExpression *regexp,
const UChar *text,
regexp->fTextString.setTo(isTerminated, text, textLength);
regexp->fMatcher->reset(regexp->fTextString);
-};
+}
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
//
// uregex_getText
//
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
U_CAPI const UChar * U_EXPORT2
uregex_getText(URegularExpression *regexp,
int32_t *textLength,
*textLength = regexp->fTextLength;
}
return regexp->fText;
-};
+}
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
//
// uregex_matches
//
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
U_CAPI UBool U_EXPORT2
uregex_matches(URegularExpression *regexp,
int32_t startIndex,
UErrorCode *status) {
+ UBool result = FALSE;
if (validateRE(regexp, status) == FALSE) {
- return FALSE;
+ return result;
+ }
+ if (startIndex == -1) {
+ result = regexp->fMatcher->matches(*status);
+ } else {
+ result = regexp->fMatcher->matches(startIndex, *status);
}
- UBool result = regexp->fMatcher->matches(startIndex, *status);
return result;
-};
+}
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
//
// uregex_lookingAt
//
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
U_CAPI UBool U_EXPORT2
uregex_lookingAt(URegularExpression *regexp,
int32_t startIndex,
UErrorCode *status) {
+ UBool result = FALSE;
if (validateRE(regexp, status) == FALSE) {
- return FALSE;
+ return result;
+ }
+ if (startIndex == -1) {
+ result = regexp->fMatcher->lookingAt(*status);
+ } else {
+ result = regexp->fMatcher->lookingAt(startIndex, *status);
}
- UBool result = regexp->fMatcher->lookingAt(startIndex, *status);
return result;
-};
+}
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
//
// uregex_find
//
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
U_CAPI UBool U_EXPORT2
uregex_find(URegularExpression *regexp,
int32_t startIndex,
UErrorCode *status) {
+ UBool result = FALSE;
if (validateRE(regexp, status) == FALSE) {
- return FALSE;
+ return result;
+ }
+ if (startIndex == -1) {
+ regexp->fMatcher->resetPreserveRegion();
+ result = regexp->fMatcher->find();
+ } else {
+ result = regexp->fMatcher->find(startIndex, *status);
}
- UBool result = regexp->fMatcher->find(startIndex, *status);
return result;
-};
+}
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
//
// uregex_findNext
//
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
U_CAPI UBool U_EXPORT2
uregex_findNext(URegularExpression *regexp,
UErrorCode *status) {
}
UBool result = regexp->fMatcher->find();
return result;
-};
+}
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
//
// uregex_groupCount
//
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
U_CAPI int32_t U_EXPORT2
uregex_groupCount(URegularExpression *regexp,
UErrorCode *status) {
}
int32_t result = regexp->fMatcher->groupCount();
return result;
-};
+}
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
//
// uregex_group
//
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
U_CAPI int32_t U_EXPORT2
uregex_group(URegularExpression *regexp,
int32_t groupNum,
u_memcpy(dest, ®exp->fText[startIx], copyLength);
}
return fullLength;
-};
+}
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
//
// uregex_start
//
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
U_CAPI int32_t U_EXPORT2
uregex_start(URegularExpression *regexp,
int32_t groupNum,
}
int32_t result = regexp->fMatcher->start(groupNum, *status);
return result;
-};
+}
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
//
// uregex_end
//
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
U_CAPI int32_t U_EXPORT2
uregex_end(URegularExpression *regexp,
int32_t groupNum,
}
int32_t result = regexp->fMatcher->end(groupNum, *status);
return result;
-};
+}
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
//
// uregex_reset
//
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
U_CAPI void U_EXPORT2
uregex_reset(URegularExpression *regexp,
int32_t index,
return;
}
regexp->fMatcher->reset(index, *status);
-};
+}
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+//
+// uregex_setRegion
+//
+//------------------------------------------------------------------------------
+U_CAPI void U_EXPORT2
+uregex_setRegion(URegularExpression *regexp,
+ int32_t regionStart,
+ int32_t regionLimit,
+ UErrorCode *status) {
+ if (validateRE(regexp, status) == FALSE) {
+ return;
+ }
+ regexp->fMatcher->region(regionStart, regionLimit, *status);
+}
+
+
+//------------------------------------------------------------------------------
+//
+// uregex_regionStart
+//
+//------------------------------------------------------------------------------
+U_CAPI int32_t U_EXPORT2
+uregex_regionStart(const URegularExpression *regexp,
+ UErrorCode *status) {
+ if (validateRE(regexp, status) == FALSE) {
+ return 0;
+ }
+ return regexp->fMatcher->regionStart();
+}
+
+
+//------------------------------------------------------------------------------
+//
+// uregex_regionEnd
+//
+//------------------------------------------------------------------------------
+U_CAPI int32_t U_EXPORT2
+uregex_regionEnd(const URegularExpression *regexp,
+ UErrorCode *status) {
+ if (validateRE(regexp, status) == FALSE) {
+ return 0;
+ }
+ return regexp->fMatcher->regionEnd();
+}
+
+
+//------------------------------------------------------------------------------
+//
+// uregex_hasTransparentBounds
+//
+//------------------------------------------------------------------------------
+U_CAPI UBool U_EXPORT2
+uregex_hasTransparentBounds(const URegularExpression *regexp,
+ UErrorCode *status) {
+ if (validateRE(regexp, status) == FALSE) {
+ return FALSE;
+ }
+ return regexp->fMatcher->hasTransparentBounds();
+}
+
+
+//------------------------------------------------------------------------------
+//
+// uregex_useTransparentBounds
+//
+//------------------------------------------------------------------------------
+U_CAPI void U_EXPORT2
+uregex_useTransparentBounds(URegularExpression *regexp,
+ UBool b,
+ UErrorCode *status) {
+ if (validateRE(regexp, status) == FALSE) {
+ return;
+ }
+ regexp->fMatcher->useTransparentBounds(b);
+}
+
+
+//------------------------------------------------------------------------------
+//
+// uregex_hasAnchoringBounds
+//
+//------------------------------------------------------------------------------
+U_CAPI UBool U_EXPORT2
+uregex_hasAnchoringBounds(const URegularExpression *regexp,
+ UErrorCode *status) {
+ if (validateRE(regexp, status) == FALSE) {
+ return FALSE;
+ }
+ return regexp->fMatcher->hasAnchoringBounds();
+}
+
+
+//------------------------------------------------------------------------------
+//
+// uregex_useAnchoringBounds
+//
+//------------------------------------------------------------------------------
+U_CAPI void U_EXPORT2
+uregex_useAnchoringBounds(URegularExpression *regexp,
+ UBool b,
+ UErrorCode *status) {
+ if (validateRE(regexp, status) == FALSE) {
+ return;
+ }
+ regexp->fMatcher->useAnchoringBounds(b);
+}
+
+
+//------------------------------------------------------------------------------
+//
+// uregex_hitEnd
+//
+//------------------------------------------------------------------------------
+U_CAPI UBool U_EXPORT2
+uregex_hitEnd(const URegularExpression *regexp,
+ UErrorCode *status) {
+ if (validateRE(regexp, status) == FALSE) {
+ return FALSE;
+ }
+ return regexp->fMatcher->hitEnd();
+}
+
+
+//------------------------------------------------------------------------------
+//
+// uregex_requireEnd
+//
+//------------------------------------------------------------------------------
+U_CAPI UBool U_EXPORT2
+uregex_requireEnd(const URegularExpression *regexp,
+ UErrorCode *status) {
+ if (validateRE(regexp, status) == FALSE) {
+ return FALSE;
+ }
+ return regexp->fMatcher->requireEnd();
+}
+
+
+//------------------------------------------------------------------------------
+//
+// uregex_setTimeLimit
+//
+//------------------------------------------------------------------------------
+U_CAPI void U_EXPORT2
+uregex_setTimeLimit(URegularExpression *regexp,
+ int32_t limit,
+ UErrorCode *status) {
+ if (validateRE(regexp, status)) {
+ regexp->fMatcher->setTimeLimit(limit, *status);
+ }
+}
+
+
+
+//------------------------------------------------------------------------------
+//
+// uregex_getTimeLimit
+//
+//------------------------------------------------------------------------------
+U_CAPI int32_t U_EXPORT2
+uregex_getTimeLimit(const URegularExpression *regexp,
+ UErrorCode *status) {
+ int32_t retVal = 0;
+ if (validateRE(regexp, status)) {
+ retVal = regexp->fMatcher->getTimeLimit();
+ }
+ return retVal;
+}
+
+
+
+//------------------------------------------------------------------------------
+//
+// uregex_setStackLimit
+//
+//------------------------------------------------------------------------------
+U_CAPI void U_EXPORT2
+uregex_setStackLimit(URegularExpression *regexp,
+ int32_t limit,
+ UErrorCode *status) {
+ if (validateRE(regexp, status)) {
+ regexp->fMatcher->setStackLimit(limit, *status);
+ }
+}
+
+
+
+//------------------------------------------------------------------------------
+//
+// uregex_getStackLimit
+//
+//------------------------------------------------------------------------------
+U_CAPI int32_t U_EXPORT2
+uregex_getStackLimit(const URegularExpression *regexp,
+ UErrorCode *status) {
+ int32_t retVal = 0;
+ if (validateRE(regexp, status)) {
+ retVal = regexp->fMatcher->getStackLimit();
+ }
+ return retVal;
+}
+
+
+//------------------------------------------------------------------------------
+//
+// uregex_setMatchCallback
+//
+//------------------------------------------------------------------------------
+U_CAPI void U_EXPORT2
+uregex_setMatchCallback(URegularExpression *regexp,
+ URegexMatchCallback *callback,
+ const void *context,
+ UErrorCode *status) {
+ if (validateRE(regexp, status)) {
+ regexp->fMatcher->setMatchCallback(callback, context, *status);
+ }
+}
+
+
+//------------------------------------------------------------------------------
+//
+// uregex_getMatchCallback
+//
+//------------------------------------------------------------------------------
+U_CAPI void U_EXPORT2
+uregex_getMatchCallback(const URegularExpression *regexp,
+ URegexMatchCallback **callback,
+ const void **context,
+ UErrorCode *status) {
+ if (validateRE(regexp, status)) {
+ regexp->fMatcher->getMatchCallback(*callback, *context, *status);
+ }
+}
+
+
+//------------------------------------------------------------------------------
//
// uregex_replaceAll
//
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
U_CAPI int32_t U_EXPORT2
uregex_replaceAll(URegularExpression *regexp,
- UChar *replacementText,
+ const UChar *replacementText,
int32_t replacementLength,
UChar *destBuf,
int32_t destCapacity,
len += uregex_appendTail(regexp, &destBuf, &destCapacity, status);
return len;
-};
+}
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
//
// uregex_replaceFirst
//
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
U_CAPI int32_t U_EXPORT2
uregex_replaceFirst(URegularExpression *regexp,
- UChar *replacementText,
+ const UChar *replacementText,
int32_t replacementLength,
UChar *destBuf,
int32_t destCapacity,
len += uregex_appendTail(regexp, &destBuf, &destCapacity, status);
return len;
-};
+}
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
//
// uregex_appendReplacement
//
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
//
class RegexCImpl {
public:
inline static int32_t appendReplacement(URegularExpression *regexp,
- UChar *replacementText,
+ const UChar *replacementText,
int32_t replacementLength,
UChar **destBuf,
int32_t *destCapacity,
// appendReplacement, the actual implementation.
//
int32_t RegexCImpl::appendReplacement(URegularExpression *regexp,
- UChar *replacementText,
+ const UChar *replacementText,
int32_t replacementLength,
UChar **destBuf,
int32_t *destCapacity,
u_unescapeAt(unescape_charAt,
&replIdx, // Index is updated by unescapeAt
replacementLength, // Length of replacement text
- replacementText);
+ (void *)replacementText);
if (escapedChar != (UChar32)0xFFFFFFFF) {
if (escapedChar <= 0xffff) {
//
U_CAPI int32_t U_EXPORT2
uregex_appendReplacement(URegularExpression *regexp,
- UChar *replacementText,
+ const UChar *replacementText,
int32_t replacementLength,
UChar **destBuf,
int32_t *destCapacity,
}
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
//
// uregex_appendTail
//
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
int32_t RegexCImpl::appendTail(URegularExpression *regexp,
UChar **destBuf,
int32_t *destCapacity,
- UErrorCode *status) {
+ UErrorCode *status)
+{
+ if (destCapacity == NULL || destBuf == NULL ||
+ *destBuf == NULL && *destCapacity > 0 ||
+ *destCapacity < 0)
+ {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
// If we come in with a buffer overflow error, don't suppress the operation.
// A series of appendReplacements, appendTail need to correctly preflight
// the buffer size when an overflow happens somewhere in the middle.
if (validateRE(regexp, status) == FALSE) {
return 0;
}
- if (destCapacity == NULL || destBuf == NULL ||
- *destBuf == NULL && *destCapacity > 0 ||
- *destCapacity < 0) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
RegexMatcher *m = regexp->fMatcher;
int32_t srcIdx;
}
return destIdx;
-};
+}
U_CAPI int32_t U_EXPORT2
}
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
//
// copyString Internal utility to copy a string to an output buffer,
// while managing buffer overflow and preflight size
// computation. NUL termination is added to destination,
// and the NUL is counted in the output size.
//
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
static void copyString(UChar *destBuffer, // Destination buffer.
int32_t destCapacity, // Total capacity of dest buffer
int32_t *destIndex, // Index into dest buffer. Updated on return.
break;
}
}
- destBuffer[di++] = 0;
+ if (di<destCapacity) {
+ destBuffer[di] = 0;
+ }
+ di++;
*destIndex = di;
}
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
//
// uregex_split
//
-//----------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
U_CAPI int32_t U_EXPORT2
uregex_split( URegularExpression *regexp,
UChar *destBuf,
if (requiredCapacity != NULL) {
*requiredCapacity = destIdx;
}
- if (*requiredCapacity > destCapacity) {
+ if (destIdx > destCapacity) {
*status = U_BUFFER_OVERFLOW_ERROR;
}
return i+1;
}
-
-
-
-
#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
+