+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
**************************************************************************
-* Copyright (C) 2002-2015 International Business Machines Corporation *
-* and others. All rights reserved. *
+* Copyright (C) 2002-2016 International Business Machines Corporation
+* and others. All rights reserved.
**************************************************************************
*/
//
#include "unicode/utf16.h"
#include "uassert.h"
#include "cmemory.h"
+#include "cstr.h"
#include "uvector.h"
#include "uvectr32.h"
#include "uvectr64.h"
// #include <malloc.h> // Needed for heapcheck testing
+
U_NAMESPACE_BEGIN
// Default limit for the size of the back track stack, to avoid system
fInput = NULL;
fInputLength = 0;
fInputUniStrMaybeMutable = FALSE;
-
- if (U_FAILURE(status)) {
- fDeferredStatus = status;
- }
}
//
return;
}
- if (fPattern->fDataSize > (int32_t)(sizeof(fSmallData)/sizeof(fSmallData[0]))) {
+ if (fPattern->fDataSize > UPRV_LENGTHOF(fSmallData)) {
fData = (int64_t *)uprv_malloc(fPattern->fDataSize * sizeof(int64_t));
if (fData == NULL) {
status = fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
}
}
-
+
} else if (u_isdigit(nextChar)) {
// $n Scan for a capture group number
int32_t numCaptureGroups = fPattern->fGroupMap->size();
break;
}
(void)UTEXT_NEXT32(replacement);
- groupNum=groupNum*10 + nextDigitVal;
+ groupNum=groupNum*10 + nextDigitVal;
++numDigits;
}
} else {
if (findProgressInterrupt(startPos, status))
return FALSE;
}
- U_ASSERT(FALSE);
+ UPRV_UNREACHABLE;
case START_START:
// Matches are only possible at the start of the input string
return FALSE;
}
}
- U_ASSERT(FALSE);
+ UPRV_UNREACHABLE;
case START_STRING:
case START_CHAR:
if (fMatch) {
return TRUE;
}
- UTEXT_SETNATIVEINDEX(fInputText, pos);
+ UTEXT_SETNATIVEINDEX(fInputText, startPos);
}
if (startPos > testStartLimit) {
fMatch = FALSE;
return FALSE;
}
}
- U_ASSERT(FALSE);
+ UPRV_UNREACHABLE;
case START_LINE:
{
- UChar32 c;
+ UChar32 ch;
if (startPos == fAnchorStart) {
MatchAt(startPos, FALSE, status);
if (U_FAILURE(status)) {
return TRUE;
}
UTEXT_SETNATIVEINDEX(fInputText, startPos);
- c = UTEXT_NEXT32(fInputText);
+ ch = UTEXT_NEXT32(fInputText);
startPos = UTEXT_GETNATIVEINDEX(fInputText);
} else {
UTEXT_SETNATIVEINDEX(fInputText, startPos);
- c = UTEXT_PREVIOUS32(fInputText);
+ ch = UTEXT_PREVIOUS32(fInputText);
UTEXT_SETNATIVEINDEX(fInputText, startPos);
}
if (fPattern->fFlags & UREGEX_UNIX_LINES) {
for (;;) {
- if (c == 0x0a) {
+ if (ch == 0x0a) {
MatchAt(startPos, FALSE, status);
if (U_FAILURE(status)) {
return FALSE;
fHitEnd = TRUE;
return FALSE;
}
- c = UTEXT_NEXT32(fInputText);
+ ch = UTEXT_NEXT32(fInputText);
startPos = UTEXT_GETNATIVEINDEX(fInputText);
// Note that it's perfectly OK for a pattern to have a zero-length
// match at the end of a string, so we must make sure that the loop
}
} else {
for (;;) {
- if (isLineTerminator(c)) {
- if (c == 0x0d && startPos < fActiveLimit && UTEXT_CURRENT32(fInputText) == 0x0a) {
+ if (isLineTerminator(ch)) {
+ if (ch == 0x0d && startPos < fActiveLimit && UTEXT_CURRENT32(fInputText) == 0x0a) {
(void)UTEXT_NEXT32(fInputText);
startPos = UTEXT_GETNATIVEINDEX(fInputText);
}
fHitEnd = TRUE;
return FALSE;
}
- c = UTEXT_NEXT32(fInputText);
+ ch = UTEXT_NEXT32(fInputText);
startPos = UTEXT_GETNATIVEINDEX(fInputText);
// Note that it's perfectly OK for a pattern to have a zero-length
// match at the end of a string, so we must make sure that the loop
}
default:
- U_ASSERT(FALSE);
+ UPRV_UNREACHABLE;
}
- U_ASSERT(FALSE);
- return FALSE;
+ UPRV_UNREACHABLE;
}
if (findProgressInterrupt(startPos, status))
return FALSE;
}
- U_ASSERT(FALSE);
+ UPRV_UNREACHABLE;
case START_START:
// Matches are only possible at the start of the input string
return FALSE;
}
}
- U_ASSERT(FALSE);
+ UPRV_UNREACHABLE;
case START_STRING:
case START_CHAR:
return FALSE;
}
}
- U_ASSERT(FALSE);
+ UPRV_UNREACHABLE;
case START_LINE:
{
- UChar32 c;
+ UChar32 ch;
if (startPos == fAnchorStart) {
MatchChunkAt(startPos, FALSE, status);
if (U_FAILURE(status)) {
if (fMatch) {
return TRUE;
}
+ // In bug 31063104 which has a zero-length text buffer we get here with
+ // inputBuf=NULL, startPos=fActiveLimit=0 (and fMatch F) which violates the
+ // requirement for U16_FWD_1 (utf16.h) that startPos < fActiveLimit. Having
+ // inputBuf=NULL (chunkContexts NULL) is probably due to an error in the
+ // CFStringUText functions. Nevertheless, to be defensive, add test below.
+ if (startPos >= testLen) {
+ fHitEnd = TRUE;
+ return FALSE;
+ }
U16_FWD_1(inputBuf, startPos, fActiveLimit);
}
if (fPattern->fFlags & UREGEX_UNIX_LINES) {
for (;;) {
- c = inputBuf[startPos-1];
- if (c == 0x0a) {
+ ch = inputBuf[startPos-1];
+ if (ch == 0x0a) {
MatchChunkAt(startPos, FALSE, status);
if (U_FAILURE(status)) {
return FALSE;
}
} else {
for (;;) {
- c = inputBuf[startPos-1];
- if (isLineTerminator(c)) {
- if (c == 0x0d && startPos < fActiveLimit && inputBuf[startPos] == 0x0a) {
+ ch = inputBuf[startPos-1];
+ if (isLineTerminator(ch)) {
+ if (ch == 0x0d && startPos < fActiveLimit && inputBuf[startPos] == 0x0a) {
startPos++;
}
MatchChunkAt(startPos, FALSE, status);
}
default:
- U_ASSERT(FALSE);
+ UPRV_UNREACHABLE;
}
- U_ASSERT(FALSE);
- return FALSE;
+ UPRV_UNREACHABLE;
}
break;
}
i++;
- dest[i] = utext_extract_replace(fInputText, dest[i],
+ dest[i] = utext_extract_replace(fInputText, dest[i],
start64(groupNum, status), end64(groupNum, status), &status);
}
if (dest[i] == NULL) {
dest[i] = utext_openUChars(NULL, NULL, 0, &status);
} else {
- static UChar emptyString[] = {(UChar)0};
+ static const UChar emptyString[] = {(UChar)0};
utext_replace(dest[i], 0, utext_nativeLength(dest[i]), emptyString, 0, &status);
}
}
fStack->removeAllElements();
REStackFrame *iFrame = (REStackFrame *)fStack->reserveBlock(fPattern->fFrameSize, fDeferredStatus);
+ if(U_FAILURE(fDeferredStatus)) {
+ return NULL;
+ }
+
int32_t i;
for (i=0; i<fPattern->fFrameSize-RESTACKFRAME_HDRCOUNT; i++) {
iFrame->fExtra[i] = -1;
//
//--------------------------------------------------------------------------------
inline REStackFrame *RegexMatcher::StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return fp;
+ }
// push storage for a new frame.
int64_t *newFP = fStack->reserveBlock(fFrameSize, status);
- if (newFP == NULL) {
+ if (U_FAILURE(status)) {
// Failure on attempted stack expansion.
// Stack function set some other error code, change it to a more
// specific one for regular expressions.
return (REStackFrame *)newFP;
}
+#if defined(REGEX_DEBUG)
+namespace {
+UnicodeString StringFromUText(UText *ut) {
+ UnicodeString result;
+ for (UChar32 c = utext_next32From(ut, 0); c != U_SENTINEL; c = UTEXT_NEXT32(ut)) {
+ result.append(c);
+ }
+ return result;
+}
+}
+#endif // REGEX_DEBUG
+
//--------------------------------------------------------------------------------
//
int32_t opValue; // and the operand value.
#ifdef REGEX_RUN_DEBUG
- if (fTraceDebug)
- {
+ if (fTraceDebug) {
printf("MatchAt(startIdx=%ld)\n", startIdx);
- printf("Original Pattern: ");
- UChar32 c = utext_next32From(fPattern->fPattern, 0);
- while (c != U_SENTINEL) {
- if (c<32 || c>256) {
- c = '.';
- }
- printf("%c", c);
-
- c = UTEXT_NEXT32(fPattern->fPattern);
- }
- printf("\n");
- printf("Input String: ");
- c = utext_next32From(fInputText, 0);
- while (c != U_SENTINEL) {
- if (c<32 || c>256) {
- c = '.';
- }
- printf("%c", c);
-
- c = UTEXT_NEXT32(fInputText);
- }
- printf("\n");
- printf("\n");
+ printf("Original Pattern: \"%s\"\n", CStr(StringFromUText(fPattern->fPattern))());
+ printf("Input String: \"%s\"\n\n", CStr(StringFromUText(fInputText))());
}
#endif
int64_t *pat = fPattern->fCompiledPat->getBuffer();
const UChar *litText = fPattern->fLiteralText.getBuffer();
- UVector *sets = fPattern->fSets;
+ UVector *fSets = fPattern->fSets;
fFrameSize = fPattern->fFrameSize;
REStackFrame *fp = resetStack();
+ if (U_FAILURE(fDeferredStatus)) {
+ status = fDeferredStatus;
+ return;
+ }
fp->fPatIdx = 0;
fp->fInputIdx = startIdx;
// There is input left. Pick up one char and test it for set membership.
UChar32 c = UTEXT_NEXT32(fInputText);
- U_ASSERT(opValue > 0 && opValue < sets->size());
+ U_ASSERT(opValue > 0 && opValue < fSets->size());
if (c<256) {
Regex8BitSet *s8 = &fPattern->fSets8[opValue];
if (s8->contains(c)) {
break;
}
} else {
- UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue);
+ UnicodeSet *s = (UnicodeSet *)fSets->elementAt(opValue);
if (s->contains(c)) {
// The character is in the set. A Match.
fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
}
}
fp = StateSave(fp, fp->fPatIdx, status);
+ } else {
+ // Increment time-out counter. (StateSave() does it if count >= minCount)
+ fTickCounter--;
+ if (fTickCounter <= 0) {
+ IncrementTime(status); // Re-initializes fTickCounter
+ }
}
+
fp->fPatIdx = opValue + 4; // Loop back.
}
break;
// We haven't met the minimum number of matches yet.
// Loop back for another one.
fp->fPatIdx = opValue + 4; // Loop back.
+ // Increment time-out counter. (StateSave() does it if count >= minCount)
+ fTickCounter--;
+ if (fTickCounter <= 0) {
+ IncrementTime(status); // Re-initializes fTickCounter
+ }
} else {
// We do have the minimum number of matches.
if (newFP == (int64_t *)fp) {
break;
}
- int32_t i;
- for (i=0; i<fFrameSize; i++) {
- newFP[i] = ((int64_t *)fp)[i];
+ int32_t j;
+ for (j=0; j<fFrameSize; j++) {
+ newFP[j] = ((int64_t *)fp)[j];
}
fp = (REStackFrame *)newFP;
fStack->setSize(newStackSize);
// This makes the capture groups from within the look-ahead
// expression available.
int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize;
- int32_t i;
- for (i=0; i<fFrameSize; i++) {
- newFP[i] = ((int64_t *)fp)[i];
+ int32_t j;
+ for (j=0; j<fFrameSize; j++) {
+ newFP[j] = ((int64_t *)fp)[j];
}
fp = (REStackFrame *)newFP;
fStack->setSize(newStackSize);
// of this op in the pattern.
int32_t minML = (int32_t)pat[fp->fPatIdx++];
int32_t maxML = (int32_t)pat[fp->fPatIdx++];
+ if (!UTEXT_USES_U16(fInputText)) {
+ // utf-8 fix to maximum match length. The pattern compiler assumes utf-16.
+ // The max length need not be exact; it just needs to be >= actual maximum.
+ maxML *= 3;
+ }
U_ASSERT(minML <= maxML);
U_ASSERT(minML >= 0);
// Fetch (from data) the last input index where a match was attempted.
U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
- int64_t *lbStartIdx = &fData[opValue+2];
- if (*lbStartIdx < 0) {
+ int64_t &lbStartIdx = fData[opValue+2];
+ if (lbStartIdx < 0) {
// First time through loop.
- *lbStartIdx = fp->fInputIdx - minML;
+ lbStartIdx = fp->fInputIdx - minML;
+ if (lbStartIdx > 0) {
+ // move index to a code point boudary, if it's not on one already.
+ UTEXT_SETNATIVEINDEX(fInputText, lbStartIdx);
+ lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText);
+ }
} else {
// 2nd through nth time through the loop.
// Back up start position for match by one.
- if (*lbStartIdx == 0) {
- (*lbStartIdx)--;
+ if (lbStartIdx == 0) {
+ (lbStartIdx)--;
} else {
- UTEXT_SETNATIVEINDEX(fInputText, *lbStartIdx);
+ UTEXT_SETNATIVEINDEX(fInputText, lbStartIdx);
(void)UTEXT_PREVIOUS32(fInputText);
- *lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText);
+ lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText);
}
}
- if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) {
+ if (lbStartIdx < 0 || lbStartIdx < fp->fInputIdx - maxML) {
// We have tried all potential match starting points without
// getting a match. Backtrack out, and out of the
// Look Behind altogether.
// Save state to this URX_LB_CONT op, so failure to match will repeat the loop.
// (successful match will fall off the end of the loop.)
fp = StateSave(fp, fp->fPatIdx-3, status);
- fp->fInputIdx = *lbStartIdx;
+ fp->fInputIdx = lbStartIdx;
}
break;
// Fetch the extra parameters of this op.
int32_t minML = (int32_t)pat[fp->fPatIdx++];
int32_t maxML = (int32_t)pat[fp->fPatIdx++];
+ if (!UTEXT_USES_U16(fInputText)) {
+ // utf-8 fix to maximum match length. The pattern compiler assumes utf-16.
+ // The max length need not be exact; it just needs to be >= actual maximum.
+ maxML *= 3;
+ }
int32_t continueLoc = (int32_t)pat[fp->fPatIdx++];
continueLoc = URX_VAL(continueLoc);
U_ASSERT(minML <= maxML);
// Fetch (from data) the last input index where a match was attempted.
U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
- int64_t *lbStartIdx = &fData[opValue+2];
- if (*lbStartIdx < 0) {
+ int64_t &lbStartIdx = fData[opValue+2];
+ if (lbStartIdx < 0) {
// First time through loop.
- *lbStartIdx = fp->fInputIdx - minML;
+ lbStartIdx = fp->fInputIdx - minML;
+ if (lbStartIdx > 0) {
+ // move index to a code point boudary, if it's not on one already.
+ UTEXT_SETNATIVEINDEX(fInputText, lbStartIdx);
+ lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText);
+ }
} else {
// 2nd through nth time through the loop.
// Back up start position for match by one.
- if (*lbStartIdx == 0) {
- (*lbStartIdx)--;
+ if (lbStartIdx == 0) {
+ (lbStartIdx)--;
} else {
- UTEXT_SETNATIVEINDEX(fInputText, *lbStartIdx);
+ UTEXT_SETNATIVEINDEX(fInputText, lbStartIdx);
(void)UTEXT_PREVIOUS32(fInputText);
- *lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText);
+ lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText);
}
}
- if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) {
+ if (lbStartIdx < 0 || lbStartIdx < fp->fInputIdx - maxML) {
// We have tried all potential match starting points without
// getting a match, which means that the negative lookbehind as
// a whole has succeeded. Jump forward to the continue location
// Save state to this URX_LB_CONT op, so failure to match will repeat the loop.
// (successful match will cause a FAIL out of the loop altogether.)
fp = StateSave(fp, fp->fPatIdx-4, status);
- fp->fInputIdx = *lbStartIdx;
+ fp->fInputIdx = lbStartIdx;
}
break;
// This op scans through all matching input.
// The following LOOP_C op emulates stack unwinding if the following pattern fails.
{
- U_ASSERT(opValue > 0 && opValue < sets->size());
+ U_ASSERT(opValue > 0 && opValue < fSets->size());
Regex8BitSet *s8 = &fPattern->fSets8[opValue];
- UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue);
+ UnicodeSet *s = (UnicodeSet *)fSets->elementAt(opValue);
// Loop through input, until either the input is exhausted or
// we reach a character that is not a member of the set.
default:
// Trouble. The compiled pattern contains an entry with an
// unrecognized type tag.
- U_ASSERT(FALSE);
+ UPRV_UNREACHABLE;
}
if (U_FAILURE(status)) {
#ifdef REGEX_RUN_DEBUG
if (fTraceDebug) {
printf("MatchAt(startIdx=%d)\n", startIdx);
- printf("Original Pattern: ");
- UChar32 c = utext_next32From(fPattern->fPattern, 0);
- while (c != U_SENTINEL) {
- if (c<32 || c>256) {
- c = '.';
- }
- printf("%c", c);
-
- c = UTEXT_NEXT32(fPattern->fPattern);
- }
- printf("\n");
- printf("Input String: ");
- c = utext_next32From(fInputText, 0);
- while (c != U_SENTINEL) {
- if (c<32 || c>256) {
- c = '.';
- }
- printf("%c", c);
-
- c = UTEXT_NEXT32(fInputText);
- }
- printf("\n");
- printf("\n");
+ printf("Original Pattern: \"%s\"\n", CStr(StringFromUText(fPattern->fPattern))());
+ printf("Input String: \"%s\"\n\n", CStr(StringFromUText(fInputText))());
}
#endif
int64_t *pat = fPattern->fCompiledPat->getBuffer();
const UChar *litText = fPattern->fLiteralText.getBuffer();
- UVector *sets = fPattern->fSets;
+ UVector *fSets = fPattern->fSets;
const UChar *inputBuf = fInputText->chunkContents;
fFrameSize = fPattern->fFrameSize;
REStackFrame *fp = resetStack();
+ if (U_FAILURE(fDeferredStatus)) {
+ status = fDeferredStatus;
+ return;
+ }
fp->fPatIdx = 0;
fp->fInputIdx = startIdx;
break;
}
- U_ASSERT(opValue > 0 && opValue < sets->size());
+ U_ASSERT(opValue > 0 && opValue < fSets->size());
// There is input left. Pick up one char and test it for set membership.
UChar32 c;
break;
}
} else {
- UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue);
+ UnicodeSet *s = (UnicodeSet *)fSets->elementAt(opValue);
if (s->contains(c)) {
// The character is in the set. A Match.
break;
}
}
fp = StateSave(fp, fp->fPatIdx, status);
+ } else {
+ // Increment time-out counter. (StateSave() does it if count >= minCount)
+ fTickCounter--;
+ if (fTickCounter <= 0) {
+ IncrementTime(status); // Re-initializes fTickCounter
+ }
}
fp->fPatIdx = opValue + 4; // Loop back.
}
// We haven't met the minimum number of matches yet.
// Loop back for another one.
fp->fPatIdx = opValue + 4; // Loop back.
+ fTickCounter--;
+ if (fTickCounter <= 0) {
+ IncrementTime(status); // Re-initializes fTickCounter
+ }
} else {
// We do have the minimum number of matches.
if (newFP == (int64_t *)fp) {
break;
}
- int32_t i;
- for (i=0; i<fFrameSize; i++) {
- newFP[i] = ((int64_t *)fp)[i];
+ int32_t j;
+ for (j=0; j<fFrameSize; j++) {
+ newFP[j] = ((int64_t *)fp)[j];
}
fp = (REStackFrame *)newFP;
fStack->setSize(newStackSize);
break;
}
}
+ if (success && groupStartIdx < groupEndIdx && U16_IS_LEAD(inputBuf[groupEndIdx-1]) &&
+ inputIndex < fActiveLimit && U16_IS_TRAIL(inputBuf[inputIndex])) {
+ // Capture group ended with an unpaired lead surrogate.
+ // Back reference is not permitted to match lead only of a surrogatge pair.
+ success = FALSE;
+ }
if (success) {
fp->fInputIdx = inputIndex;
} else {
// This makes the capture groups from within the look-ahead
// expression available.
int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize;
- int32_t i;
- for (i=0; i<fFrameSize; i++) {
- newFP[i] = ((int64_t *)fp)[i];
+ int32_t j;
+ for (j=0; j<fFrameSize; j++) {
+ newFP[j] = ((int64_t *)fp)[j];
}
fp = (REStackFrame *)newFP;
fStack->setSize(newStackSize);
// Fetch (from data) the last input index where a match was attempted.
U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
- int64_t *lbStartIdx = &fData[opValue+2];
- if (*lbStartIdx < 0) {
+ int64_t &lbStartIdx = fData[opValue+2];
+ if (lbStartIdx < 0) {
// First time through loop.
- *lbStartIdx = fp->fInputIdx - minML;
+ lbStartIdx = fp->fInputIdx - minML;
+ if (lbStartIdx > 0 && lbStartIdx < fInputLength) {
+ U16_SET_CP_START(inputBuf, 0, lbStartIdx);
+ }
} else {
// 2nd through nth time through the loop.
// Back up start position for match by one.
- if (*lbStartIdx == 0) {
- (*lbStartIdx)--;
+ if (lbStartIdx == 0) {
+ lbStartIdx--;
} else {
- U16_BACK_1(inputBuf, 0, *lbStartIdx);
+ U16_BACK_1(inputBuf, 0, lbStartIdx);
}
}
- if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) {
+ if (lbStartIdx < 0 || lbStartIdx < fp->fInputIdx - maxML) {
// We have tried all potential match starting points without
// getting a match. Backtrack out, and out of the
// Look Behind altogether.
// Save state to this URX_LB_CONT op, so failure to match will repeat the loop.
// (successful match will fall off the end of the loop.)
fp = StateSave(fp, fp->fPatIdx-3, status);
- fp->fInputIdx = *lbStartIdx;
+ fp->fInputIdx = lbStartIdx;
}
break;
// Fetch (from data) the last input index where a match was attempted.
U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
- int64_t *lbStartIdx = &fData[opValue+2];
- if (*lbStartIdx < 0) {
+ int64_t &lbStartIdx = fData[opValue+2];
+ if (lbStartIdx < 0) {
// First time through loop.
- *lbStartIdx = fp->fInputIdx - minML;
+ lbStartIdx = fp->fInputIdx - minML;
+ if (lbStartIdx > 0 && lbStartIdx < fInputLength) {
+ U16_SET_CP_START(inputBuf, 0, lbStartIdx);
+ }
} else {
// 2nd through nth time through the loop.
// Back up start position for match by one.
- if (*lbStartIdx == 0) {
- (*lbStartIdx)--; // Because U16_BACK is unsafe starting at 0.
+ if (lbStartIdx == 0) {
+ lbStartIdx--; // Because U16_BACK is unsafe starting at 0.
} else {
- U16_BACK_1(inputBuf, 0, *lbStartIdx);
+ U16_BACK_1(inputBuf, 0, lbStartIdx);
}
}
- if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) {
+ if (lbStartIdx < 0 || lbStartIdx < fp->fInputIdx - maxML) {
// We have tried all potential match starting points without
// getting a match, which means that the negative lookbehind as
// a whole has succeeded. Jump forward to the continue location
// Save state to this URX_LB_CONT op, so failure to match will repeat the loop.
// (successful match will cause a FAIL out of the loop altogether.)
fp = StateSave(fp, fp->fPatIdx-4, status);
- fp->fInputIdx = *lbStartIdx;
+ fp->fInputIdx = lbStartIdx;
}
break;
// This op scans through all matching input.
// The following LOOP_C op emulates stack unwinding if the following pattern fails.
{
- U_ASSERT(opValue > 0 && opValue < sets->size());
+ U_ASSERT(opValue > 0 && opValue < fSets->size());
Regex8BitSet *s8 = &fPattern->fSets8[opValue];
- UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue);
+ UnicodeSet *s = (UnicodeSet *)fSets->elementAt(opValue);
// Loop through input, until either the input is exhausted or
// we reach a character that is not a member of the set.
default:
// Trouble. The compiled pattern contains an entry with an
// unrecognized type tag.
- U_ASSERT(FALSE);
+ UPRV_UNREACHABLE;
}
if (U_FAILURE(status)) {
U_NAMESPACE_END
#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
+