X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/f3c0d7a59d99c2a94c6b8822291f0e42be3773c9..c5116b9f5a666b9d59f443b3770acd6ef64dc6c3:/icuSources/i18n/rematch.cpp diff --git a/icuSources/i18n/rematch.cpp b/icuSources/i18n/rematch.cpp index a570b74f..968fc47d 100644 --- a/icuSources/i18n/rematch.cpp +++ b/icuSources/i18n/rematch.cpp @@ -438,7 +438,7 @@ RegexMatcher &RegexMatcher::appendReplacement(UText *dest, status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; } } - + } else if (u_isdigit(nextChar)) { // $n Scan for a capture group number int32_t numCaptureGroups = fPattern->fGroupMap->size(); @@ -459,7 +459,7 @@ RegexMatcher &RegexMatcher::appendReplacement(UText *dest, break; } (void)UTEXT_NEXT32(replacement); - groupNum=groupNum*10 + nextDigitVal; + groupNum=groupNum*10 + nextDigitVal; ++numDigits; } } else { @@ -717,7 +717,7 @@ UBool RegexMatcher::find(UErrorCode &status) { if (findProgressInterrupt(startPos, status)) return FALSE; } - U_ASSERT(FALSE); + UPRV_UNREACHABLE; case START_START: // Matches are only possible at the start of the input string @@ -765,7 +765,7 @@ UBool RegexMatcher::find(UErrorCode &status) { return FALSE; } } - U_ASSERT(FALSE); + UPRV_UNREACHABLE; case START_STRING: case START_CHAR: @@ -797,11 +797,11 @@ UBool RegexMatcher::find(UErrorCode &status) { return FALSE; } } - U_ASSERT(FALSE); + UPRV_UNREACHABLE; case START_LINE: { - UChar32 c; + UChar32 ch; if (startPos == fAnchorStart) { MatchAt(startPos, FALSE, status); if (U_FAILURE(status)) { @@ -811,17 +811,17 @@ UBool RegexMatcher::find(UErrorCode &status) { return TRUE; } UTEXT_SETNATIVEINDEX(fInputText, startPos); - c = UTEXT_NEXT32(fInputText); + ch = UTEXT_NEXT32(fInputText); startPos = UTEXT_GETNATIVEINDEX(fInputText); } else { UTEXT_SETNATIVEINDEX(fInputText, startPos); - c = UTEXT_PREVIOUS32(fInputText); + ch = UTEXT_PREVIOUS32(fInputText); UTEXT_SETNATIVEINDEX(fInputText, startPos); } if (fPattern->fFlags & UREGEX_UNIX_LINES) { for (;;) { - if (c == 0x0a) { + if (ch == 0x0a) { MatchAt(startPos, FALSE, status); if (U_FAILURE(status)) { return FALSE; @@ -836,7 +836,7 @@ UBool RegexMatcher::find(UErrorCode &status) { fHitEnd = TRUE; return FALSE; } - c = UTEXT_NEXT32(fInputText); + ch = UTEXT_NEXT32(fInputText); startPos = UTEXT_GETNATIVEINDEX(fInputText); // Note that it's perfectly OK for a pattern to have a zero-length // match at the end of a string, so we must make sure that the loop @@ -846,8 +846,8 @@ UBool RegexMatcher::find(UErrorCode &status) { } } else { for (;;) { - if (isLineTerminator(c)) { - if (c == 0x0d && startPos < fActiveLimit && UTEXT_CURRENT32(fInputText) == 0x0a) { + if (isLineTerminator(ch)) { + if (ch == 0x0d && startPos < fActiveLimit && UTEXT_CURRENT32(fInputText) == 0x0a) { (void)UTEXT_NEXT32(fInputText); startPos = UTEXT_GETNATIVEINDEX(fInputText); } @@ -865,7 +865,7 @@ UBool RegexMatcher::find(UErrorCode &status) { fHitEnd = TRUE; return FALSE; } - c = UTEXT_NEXT32(fInputText); + ch = UTEXT_NEXT32(fInputText); startPos = UTEXT_GETNATIVEINDEX(fInputText); // Note that it's perfectly OK for a pattern to have a zero-length // match at the end of a string, so we must make sure that the loop @@ -877,11 +877,10 @@ UBool RegexMatcher::find(UErrorCode &status) { } default: - U_ASSERT(FALSE); + UPRV_UNREACHABLE; } - U_ASSERT(FALSE); - return FALSE; + UPRV_UNREACHABLE; } @@ -992,7 +991,7 @@ UBool RegexMatcher::findUsingChunk(UErrorCode &status) { if (findProgressInterrupt(startPos, status)) return FALSE; } - U_ASSERT(FALSE); + UPRV_UNREACHABLE; case START_START: // Matches are only possible at the start of the input string @@ -1034,7 +1033,7 @@ UBool RegexMatcher::findUsingChunk(UErrorCode &status) { return FALSE; } } - U_ASSERT(FALSE); + UPRV_UNREACHABLE; case START_STRING: case START_CHAR: @@ -1063,11 +1062,11 @@ UBool RegexMatcher::findUsingChunk(UErrorCode &status) { return FALSE; } } - U_ASSERT(FALSE); + UPRV_UNREACHABLE; case START_LINE: { - UChar32 c; + UChar32 ch; if (startPos == fAnchorStart) { MatchChunkAt(startPos, FALSE, status); if (U_FAILURE(status)) { @@ -1090,8 +1089,8 @@ UBool RegexMatcher::findUsingChunk(UErrorCode &status) { if (fPattern->fFlags & UREGEX_UNIX_LINES) { for (;;) { - c = inputBuf[startPos-1]; - if (c == 0x0a) { + ch = inputBuf[startPos-1]; + if (ch == 0x0a) { MatchChunkAt(startPos, FALSE, status); if (U_FAILURE(status)) { return FALSE; @@ -1114,9 +1113,9 @@ UBool RegexMatcher::findUsingChunk(UErrorCode &status) { } } else { for (;;) { - c = inputBuf[startPos-1]; - if (isLineTerminator(c)) { - if (c == 0x0d && startPos < fActiveLimit && inputBuf[startPos] == 0x0a) { + ch = inputBuf[startPos-1]; + if (isLineTerminator(ch)) { + if (ch == 0x0d && startPos < fActiveLimit && inputBuf[startPos] == 0x0a) { startPos++; } MatchChunkAt(startPos, FALSE, status); @@ -1143,11 +1142,10 @@ UBool RegexMatcher::findUsingChunk(UErrorCode &status) { } default: - U_ASSERT(FALSE); + UPRV_UNREACHABLE; } - U_ASSERT(FALSE); - return FALSE; + UPRV_UNREACHABLE; } @@ -2196,7 +2194,7 @@ int32_t RegexMatcher::split(UText *input, break; } i++; - dest[i] = utext_extract_replace(fInputText, dest[i], + dest[i] = utext_extract_replace(fInputText, dest[i], start64(groupNum, status), end64(groupNum, status), &status); } @@ -2209,7 +2207,7 @@ int32_t RegexMatcher::split(UText *input, if (dest[i] == NULL) { dest[i] = utext_openUChars(NULL, NULL, 0, &status); } else { - static UChar emptyString[] = {(UChar)0}; + static const UChar emptyString[] = {(UChar)0}; utext_replace(dest[i], 0, utext_nativeLength(dest[i]), emptyString, 0, &status); } } @@ -2783,7 +2781,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { int64_t *pat = fPattern->fCompiledPat->getBuffer(); const UChar *litText = fPattern->fLiteralText.getBuffer(); - UVector *sets = fPattern->fSets; + UVector *fSets = fPattern->fSets; fFrameSize = fPattern->fFrameSize; REStackFrame *fp = resetStack(); @@ -3385,7 +3383,7 @@ GC_Done: // There is input left. Pick up one char and test it for set membership. UChar32 c = UTEXT_NEXT32(fInputText); - U_ASSERT(opValue > 0 && opValue < sets->size()); + U_ASSERT(opValue > 0 && opValue < fSets->size()); if (c<256) { Regex8BitSet *s8 = &fPattern->fSets8[opValue]; if (s8->contains(c)) { @@ -3393,7 +3391,7 @@ GC_Done: break; } } else { - UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); + UnicodeSet *s = (UnicodeSet *)fSets->elementAt(opValue); if (s->contains(c)) { // The character is in the set. A Match. fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); @@ -3680,9 +3678,9 @@ GC_Done: if (newFP == (int64_t *)fp) { break; } - int32_t i; - for (i=0; isetSize(newStackSize); @@ -3839,9 +3837,9 @@ GC_Done: // This makes the capture groups from within the look-ahead // expression available. int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize; - int32_t i; - for (i=0; isetSize(newStackSize); @@ -4132,9 +4130,9 @@ GC_Done: // This op scans through all matching input. // The following LOOP_C op emulates stack unwinding if the following pattern fails. { - U_ASSERT(opValue > 0 && opValue < sets->size()); + U_ASSERT(opValue > 0 && opValue < fSets->size()); Regex8BitSet *s8 = &fPattern->fSets8[opValue]; - UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); + UnicodeSet *s = (UnicodeSet *)fSets->elementAt(opValue); // Loop through input, until either the input is exhausted or // we reach a character that is not a member of the set. @@ -4287,7 +4285,7 @@ GC_Done: default: // Trouble. The compiled pattern contains an entry with an // unrecognized type tag. - U_ASSERT(FALSE); + UPRV_UNREACHABLE; } if (U_FAILURE(status)) { @@ -4359,7 +4357,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu int64_t *pat = fPattern->fCompiledPat->getBuffer(); const UChar *litText = fPattern->fLiteralText.getBuffer(); - UVector *sets = fPattern->fSets; + UVector *fSets = fPattern->fSets; const UChar *inputBuf = fInputText->chunkContents; @@ -4937,7 +4935,7 @@ GC_Done: break; } - U_ASSERT(opValue > 0 && opValue < sets->size()); + U_ASSERT(opValue > 0 && opValue < fSets->size()); // There is input left. Pick up one char and test it for set membership. UChar32 c; @@ -4949,7 +4947,7 @@ GC_Done: break; } } else { - UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); + UnicodeSet *s = (UnicodeSet *)fSets->elementAt(opValue); if (s->contains(c)) { // The character is in the set. A Match. break; @@ -5223,9 +5221,9 @@ GC_Done: if (newFP == (int64_t *)fp) { break; } - int32_t i; - for (i=0; isetSize(newStackSize); @@ -5370,9 +5368,9 @@ GC_Done: // This makes the capture groups from within the look-ahead // expression available. int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize; - int32_t i; - for (i=0; isetSize(newStackSize); @@ -5478,7 +5476,7 @@ GC_Done: if (lbStartIdx < 0) { // First time through loop. lbStartIdx = fp->fInputIdx - minML; - if (lbStartIdx > 0) { + if (lbStartIdx > 0 && lbStartIdx < fInputLength) { U16_SET_CP_START(inputBuf, 0, lbStartIdx); } } else { @@ -5555,7 +5553,7 @@ GC_Done: if (lbStartIdx < 0) { // First time through loop. lbStartIdx = fp->fInputIdx - minML; - if (lbStartIdx > 0) { + if (lbStartIdx > 0 && lbStartIdx < fInputLength) { U16_SET_CP_START(inputBuf, 0, lbStartIdx); } } else { @@ -5632,9 +5630,9 @@ GC_Done: // This op scans through all matching input. // The following LOOP_C op emulates stack unwinding if the following pattern fails. { - U_ASSERT(opValue > 0 && opValue < sets->size()); + U_ASSERT(opValue > 0 && opValue < fSets->size()); Regex8BitSet *s8 = &fPattern->fSets8[opValue]; - UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); + UnicodeSet *s = (UnicodeSet *)fSets->elementAt(opValue); // Loop through input, until either the input is exhausted or // we reach a character that is not a member of the set. @@ -5787,7 +5785,7 @@ GC_Done: default: // Trouble. The compiled pattern contains an entry with an // unrecognized type tag. - U_ASSERT(FALSE); + UPRV_UNREACHABLE; } if (U_FAILURE(status)) { @@ -5827,3 +5825,4 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexMatcher) U_NAMESPACE_END #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS +