/*
**********************************************************************
-* Copyright (C) 2001-2003 IBM and others. All rights reserved.
+* Copyright (C) 2001-2004 IBM and others. All rights reserved.
**********************************************************************
* Date Name Description
* 07/02/2001 synwee Creation.
#include "ucol_imp.h"
#include "usrchimp.h"
#include "cmemory.h"
+#include "ucln_in.h"
// internal definition ---------------------------------------------------
inline void setColEIterOffset(UCollationElements *elems,
int32_t offset)
{
- collIterate *ci = &(elems->iteratordata_);
- ci->pos = ci->string + offset;
- ci->CEpos = ci->toReturn = ci->CEs;
- if (ci->flags & UCOL_ITER_INNORMBUF) {
- ci->flags = ci->origFlags;
- }
- ci->fcdPosition = NULL;
+ collIterate *ci = &(elems->iteratordata_);
+ ci->pos = ci->string + offset;
+ ci->CEpos = ci->toReturn = ci->CEs;
+ if (ci->flags & UCOL_ITER_INNORMBUF) {
+ ci->flags = ci->origFlags;
+ }
+ ci->fcdPosition = NULL;
}
/**
return UCOL_PRIMARYORDER(ce) % MAX_TABLE_SIZE_;
}
+U_CDECL_BEGIN
+static UBool U_CALLCONV
+usearch_cleanup(void) {
+ FCD_ = NULL;
+ return TRUE;
+}
+U_CDECL_END
+
/**
* Initializing the fcd tables.
* Internal method, status assumed to be a success.
{
if (FCD_ == NULL) {
FCD_ = unorm_getFCDTrie(status);
+ ucln_i18n_registerCleanup(UCLN_I18N_USEARCH, usearch_cleanup);
}
}
* @return the modified collation element
*/
static
-inline uint32_t getCE(const UStringSearch *strsrch, uint32_t sourcece)
+inline int32_t getCE(const UStringSearch *strsrch, uint32_t sourcece)
{
// note for tertiary we can't use the collator->tertiaryMask, that
// is a preprocessed mask that takes into account case options. since
* @return new destination array, destination if there was no new allocation
*/
static
-inline uint32_t * addTouint32_tArray(uint32_t *destination,
- uint32_t offset,
- uint32_t *destinationlength,
- uint32_t value,
- uint32_t increments,
- UErrorCode *status)
+inline int32_t * addTouint32_tArray(int32_t *destination,
+ uint32_t offset,
+ uint32_t *destinationlength,
+ uint32_t value,
+ uint32_t increments,
+ UErrorCode *status)
{
uint32_t newlength = *destinationlength;
if (offset + 1 == newlength) {
newlength += increments;
- uint32_t *temp = (uint32_t *)allocateMemory(
- sizeof(uint32_t) * newlength, status);
+ int32_t *temp = (int32_t *)allocateMemory(
+ sizeof(int32_t) * newlength, status);
if (U_FAILURE(*status)) {
return NULL;
}
- uprv_memcpy(temp, destination, sizeof(uint32_t) * offset);
+ uprv_memcpy(temp, destination, sizeof(int32_t) * offset);
*destinationlength = newlength;
destination = temp;
}
{
UPattern *pattern = &(strsrch->pattern);
uint32_t cetablesize = INITIAL_ARRAY_SIZE_;
- uint32_t *cetable = pattern->CEBuffer;
+ int32_t *cetable = pattern->CEBuffer;
uint32_t patternlength = pattern->textLength;
UCollationElements *coleiter = strsrch->utilIter;
uint16_t offset = 0;
uint16_t result = 0;
- uint32_t ce;
+ int32_t ce;
while ((ce = ucol_next(coleiter, status)) != UCOL_NULLORDER &&
U_SUCCESS(*status)) {
uint32_t newce = getCE(strsrch, ce);
if (newce) {
- uint32_t *temp = addTouint32_tArray(cetable, offset, &cetablesize,
+ int32_t *temp = addTouint32_tArray(cetable, offset, &cetablesize,
newce,
patternlength - ucol_getOffset(coleiter) + 1,
status);
*/
static
inline void setShiftTable(int16_t shift[], int16_t backshift[],
- uint32_t *cetable, int32_t cesize,
+ int32_t *cetable, int32_t cesize,
int16_t expansionsize,
int16_t defaultforward,
int16_t defaultbackward)
int32_t cesize = pattern->CELength;
int16_t minlength = cesize > expandlength
- ? (int16_t)cesize - expandlength : 1;
+ ? (int16_t)cesize - expandlength : 1;
pattern->defaultShiftSize = minlength;
setShiftTable(pattern->shift, pattern->backShift, pattern->CE,
cesize, expandlength, minlength, minlength);
ucol_setText(coleiter, text, end - start, &status);
for (int32_t count = 0; count < strsrch->pattern.CELength;
count ++) {
- uint32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
+ int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
if (ce == UCOL_IGNORABLE) {
count --;
continue;
return FALSE;
}
}
- uint32_t nextce = ucol_next(coleiter, &status);
+ int32_t nextce = ucol_next(coleiter, &status);
while (ucol_getOffset(coleiter) == (end - start)
&& getCE(strsrch, nextce) == UCOL_IGNORABLE) {
nextce = ucol_next(coleiter, &status);
inline int32_t getNextUStringSearchBaseOffset(UStringSearch *strsrch,
int32_t textoffset)
{
- int32_t textlength = strsrch->search->textLength;
+ int32_t textlength = strsrch->search->textLength;
if (strsrch->pattern.hasSuffixAccents &&
textoffset < textlength) {
int32_t temp = textoffset;
static
inline int32_t shiftForward(UStringSearch *strsrch,
int32_t textoffset,
- uint32_t ce,
+ int32_t ce,
int32_t patternceindex)
{
- UPattern *pattern = &(strsrch->pattern);
+ UPattern *pattern = &(strsrch->pattern);
if (ce != UCOL_NULLORDER) {
int32_t shift = pattern->shift[hash(ce)];
// this is to adjust for characters in the middle of the
UErrorCode status = U_ZERO_ERROR;
// we have been iterating forwards previously
uint32_t ignorable = TRUE;
- uint32_t firstce = strsrch->pattern.CE[0];
+ int32_t firstce = strsrch->pattern.CE[0];
- setColEIterOffset(coleiter, start);
- uint32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
- if (U_FAILURE(status)) {
+ setColEIterOffset(coleiter, start);
+ int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
+ if (U_FAILURE(status)) {
return TRUE;
}
while (ce != firstce) {
return TRUE;
}
}
- if (!ignorable && inNormBuf(coleiter)) {
+ if (!ignorable && inNormBuf(coleiter)) {
// within normalization buffer, discontiguous handled here
- return TRUE;
+ return TRUE;
}
- // within text
+ // within text
int32_t temp = start;
- // original code
- // accent = (getFCD(strsrch->search->text, &temp,
+ // original code
+ // accent = (getFCD(strsrch->search->text, &temp,
// strsrch->search->textLength)
- // >> SECOND_LAST_BYTE_SHIFT_);
- // however this code does not work well with VC7 .net in release mode.
- // maybe the inlines for getFCD combined with shifting has bugs in
- // VC7. anyways this is a work around.
- UBool accent = getFCD(strsrch->search->text, &temp,
+ // >> SECOND_LAST_BYTE_SHIFT_);
+ // however this code does not work well with VC7 .net in release mode.
+ // maybe the inlines for getFCD combined with shifting has bugs in
+ // VC7. anyways this is a work around.
+ UBool accent = getFCD(strsrch->search->text, &temp,
strsrch->search->textLength) > 0xFF;
if (!accent) {
- return checkExtraMatchAccents(strsrch, start, end, &status);
+ return checkExtraMatchAccents(strsrch, start, end, &status);
}
- if (!ignorable) {
+ if (!ignorable) {
return TRUE;
}
if (start > 0) {
int32_t textlength = strsrch->search->textLength;
UTF_BACK_1(text, 0, temp);
if (getFCD(text, &temp, textlength) & LAST_BYTE_MASK_) {
- uint32_t firstce = strsrch->pattern.CE[0];
+ int32_t firstce = strsrch->pattern.CE[0];
UCollationElements *coleiter = strsrch->textIter;
UErrorCode status = U_ZERO_ERROR;
setColEIterOffset(coleiter, start);
}
count ++;
}
- uint32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
+ int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
if (U_FAILURE(status)) {
return TRUE;
}
else {
result = start >= lastmatchindex;
}
- if (!strsrch->search->isOverlap) {
+ if (!result && !strsrch->search->isOverlap) {
if (strsrch->search->isForwardSearching) {
result = start < lastmatchindex + strsrch->search->matchedLength;
}
const UChar *text = strsrch->search->text;
// This part checks if either ends of the match contains potential
// contraction. If so we'll have to iterate through them
- // The start contraction needs to be checked since ucol_previous dumps
- // all characters till the first safe character into the buffer.
- // *start + 1 is used to test for the unsafe characters instead of *start
- // because ucol_prev takes all unsafe characters till the first safe
- // character ie *start. so by testing *start + 1, we can estimate if
- // excess prefix characters has been included in the potential search
- // results.
+ // The start contraction needs to be checked since ucol_previous dumps
+ // all characters till the first safe character into the buffer.
+ // *start + 1 is used to test for the unsafe characters instead of *start
+ // because ucol_prev takes all unsafe characters till the first safe
+ // character ie *start. so by testing *start + 1, we can estimate if
+ // excess prefix characters has been included in the potential search
+ // results.
if ((*end < textlength && ucol_unsafeCP(text[*end], collator)) ||
(*start + 1 < textlength
&& ucol_unsafeCP(text[*start + 1], collator))) {
// we are only looking for acute and ring \u030A and \u0301, we'll
// have to skip the first ce in the expansion buffer.
ucol_next(coleiter, status);
- if (U_FAILURE(*status)) {
- return FALSE;
- }
+ if (U_FAILURE(*status)) {
+ return FALSE;
+ }
if (ucol_getOffset(coleiter) != temp) {
*start = temp;
temp = ucol_getOffset(coleiter);
expansion --;
}
- uint32_t *patternce = strsrch->pattern.CE;
+ int32_t *patternce = strsrch->pattern.CE;
int32_t patterncelength = strsrch->pattern.CELength;
int32_t count = 0;
while (count < patterncelength) {
- uint32_t ce = getCE(strsrch, ucol_next(coleiter, status));
+ int32_t ce = getCE(strsrch, ucol_next(coleiter, status));
if (ce == UCOL_IGNORABLE) {
continue;
}
UCollationElements *coleiter = strsrch->textIter;
int32_t start = getColElemIterOffset(coleiter, FALSE);
- if (!checkNextExactContractionMatch(strsrch, &start, textoffset, status)) {
- return FALSE;
+ if (!checkNextExactContractionMatch(strsrch, &start, textoffset, status)) {
+ return FALSE;
}
// this totally matches, however we need to check if it is repeating
hasAccentsBeforeMatch(strsrch, start, *textoffset) ||
!checkIdentical(strsrch, start, *textoffset) ||
hasAccentsAfterMatch(strsrch, start, *textoffset)) {
-
- (*textoffset) ++;
+
+ (*textoffset) ++;
*textoffset = getNextUStringSearchBaseOffset(strsrch, *textoffset);
- return FALSE;
+ return FALSE;
}
// totally match, we will get rid of the ending ignorables.
strsrch->search->matchedIndex = start;
strsrch->search->matchedLength = *textoffset - start;
- return TRUE;
+ return TRUE;
}
/**
UCollationElements *coleiter)
{
int patternceindex = strsrch->pattern.CELength;
- uint32_t *patternce = strsrch->pattern.CE;
+ int32_t *patternce = strsrch->pattern.CE;
UErrorCode status = U_ZERO_ERROR;
while (patternceindex > 0) {
- uint32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
+ int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
if (ce == UCOL_IGNORABLE) {
continue;
}
int32_t accentsindex[INITIAL_ARRAY_SIZE_];
int32_t accentsize = getUnblockedAccentIndex(accents,
accentsindex);
- int32_t count = (2 << (accentsize - 1)) - 2;
+ int32_t count = (2 << (accentsize - 1)) - 1;
UChar buffer[INITIAL_ARRAY_SIZE_];
UCollationElements *coleiter = strsrch->utilIter;
while (U_SUCCESS(*status) && count > 0) {
ucol_setText(coleiter, safetext, safetextlength, status);
// status checked in loop below
- uint32_t *ce = strsrch->pattern.CE;
- uint32_t celength = strsrch->pattern.CELength;
+ int32_t *ce = strsrch->pattern.CE;
+ int32_t celength = strsrch->pattern.CELength;
int ceindex = celength - 1;
UBool isSafe = TRUE; // indication flag for position in safe zone
while (ceindex >= 0) {
- uint32_t textce = ucol_previous(coleiter, status);
+ int32_t textce = ucol_previous(coleiter, status);
if (U_FAILURE(*status)) {
if (isSafe) {
cleanUpSafeText(strsrch, safetext, safebuffer);
int32_t accentsindex[INITIAL_ARRAY_SIZE_];
int32_t size = getUnblockedAccentIndex(accents, accentsindex);
- // 2 power n - 1 minus the full set of accents
- int32_t count = (2 << (size - 1)) - 2;
+ // 2 power n - 1 plus the full set of accents
+ int32_t count = (2 << (size - 1)) - 1;
while (U_SUCCESS(*status) && count > 0) {
UChar *rearrange = strsrch->canonicalSuffixAccents;
// copy the base characters
const UChar *text = strsrch->search->text;
// This part checks if either ends of the match contains potential
// contraction. If so we'll have to iterate through them
- if ((*end < textlength && ucol_unsafeCP(text[*end], collator)) ||
+ if ((*end < textlength && ucol_unsafeCP(text[*end], collator)) ||
(*start + 1 < textlength
&& ucol_unsafeCP(text[*start + 1], collator))) {
int32_t expansion = getExpansionPrefix(coleiter);
// we are only looking for acute and ring \u030A and \u0301, we'll
// have to skip the first ce in the expansion buffer.
ucol_next(coleiter, status);
- if (U_FAILURE(*status)) {
- return FALSE;
- }
+ if (U_FAILURE(*status)) {
+ return FALSE;
+ }
if (ucol_getOffset(coleiter) != temp) {
*start = temp;
temp = ucol_getOffset(coleiter);
expansion --;
}
- uint32_t *patternce = strsrch->pattern.CE;
+ int32_t *patternce = strsrch->pattern.CE;
int32_t patterncelength = strsrch->pattern.CELength;
int32_t count = 0;
int32_t textlength = strsrch->search->textLength;
while (count < patterncelength) {
- uint32_t ce = getCE(strsrch, ucol_next(coleiter, status));
+ int32_t ce = getCE(strsrch, ucol_next(coleiter, status));
// status checked below, note that if status is a failure
// ucol_next returns UCOL_NULLORDER
if (ce == UCOL_IGNORABLE) {
// accents may have extra starting ces, this occurs when a
// pure accent pattern is matched without rearrangement
// text \u0325\u0300 and looking for \u0300
- uint32_t expected = patternce[0];
+ int32_t expected = patternce[0];
if (getFCD(text, start, textlength) & LAST_BYTE_MASK_) {
ce = getCE(strsrch, ucol_next(coleiter, status));
while (U_SUCCESS(*status) && ce != expected &&
static
inline int32_t reverseShift(UStringSearch *strsrch,
int32_t textoffset,
- uint32_t ce,
+ int32_t ce,
int32_t patternceindex)
{
if (strsrch->search->isOverlap) {
const UChar *text = strsrch->search->text;
// This part checks if either if the start of the match contains potential
// contraction. If so we'll have to iterate through them
- // Since we used ucol_next while previously looking for the potential
- // match, this guarantees that our end will not be a partial contraction,
- // or a partial supplementary character.
+ // Since we used ucol_next while previously looking for the potential
+ // match, this guarantees that our end will not be a partial contraction,
+ // or a partial supplementary character.
if (*start < textlength && ucol_unsafeCP(text[*start], collator)) {
int32_t expansion = getExpansionSuffix(coleiter);
UBool expandflag = expansion > 0;
// we are only looking for A ring A\u030A, we'll have to skip the
// last ce in the expansion buffer
ucol_previous(coleiter, status);
- if (U_FAILURE(*status)) {
- return FALSE;
- }
+ if (U_FAILURE(*status)) {
+ return FALSE;
+ }
if (ucol_getOffset(coleiter) != temp) {
*end = temp;
temp = ucol_getOffset(coleiter);
expansion --;
}
- uint32_t *patternce = strsrch->pattern.CE;
+ int32_t *patternce = strsrch->pattern.CE;
int32_t patterncelength = strsrch->pattern.CELength;
int32_t count = patterncelength;
while (count > 0) {
- uint32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
+ int32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
// status checked below, note that if status is a failure
// ucol_previous returns UCOL_NULLORDER
if (ce == UCOL_IGNORABLE) {
int32_t accentsindex[INITIAL_ARRAY_SIZE_];
int32_t accentsize = getUnblockedAccentIndex(accents,
accentsindex);
- int32_t count = (2 << (accentsize - 1)) - 2;
+ int32_t count = (2 << (accentsize - 1)) - 1;
UChar buffer[INITIAL_ARRAY_SIZE_];
UCollationElements *coleiter = strsrch->utilIter;
while (U_SUCCESS(*status) && count > 0) {
ucol_setText(coleiter, safetext, safetextlength, status);
// status checked in loop below
- uint32_t *ce = strsrch->pattern.CE;
+ int32_t *ce = strsrch->pattern.CE;
int32_t celength = strsrch->pattern.CELength;
int ceindex = 0;
UBool isSafe = TRUE; // safe zone indication flag for position
int32_t prefixlength = u_strlen(strsrch->canonicalPrefixAccents);
while (ceindex < celength) {
- uint32_t textce = ucol_next(coleiter, status);
+ int32_t textce = ucol_next(coleiter, status);
if (U_FAILURE(*status)) {
if (isSafe) {
cleanUpSafeText(strsrch, safetext, safebuffer);
int32_t accentsindex[INITIAL_ARRAY_SIZE_];
int32_t size = getUnblockedAccentIndex(accents, accentsindex);
- // 2 power n - 1 minus the full set of accents
- int32_t count = (2 << (size - 1)) - 2;
+ // 2 power n - 1 plus the full set of accents
+ int32_t count = (2 << (size - 1)) - 1;
while (U_SUCCESS(*status) && count > 0) {
UChar *rearrange = strsrch->canonicalPrefixAccents;
// copy the base characters
int32_t temp = *end;
const UCollator *collator = strsrch->collator;
const UChar *text = strsrch->search->text;
- // This part checks if either if the start of the match contains potential
+ // This part checks if either if the start of the match contains potential
// contraction. If so we'll have to iterate through them
- // Since we used ucol_next while previously looking for the potential
- // match, this guarantees that our end will not be a partial contraction,
- // or a partial supplementary character.
+ // Since we used ucol_next while previously looking for the potential
+ // match, this guarantees that our end will not be a partial contraction,
+ // or a partial supplementary character.
if (*start < textlength && ucol_unsafeCP(text[*start], collator)) {
int32_t expansion = getExpansionSuffix(coleiter);
UBool expandflag = expansion > 0;
// we are only looking for A ring A\u030A, we'll have to skip the
// last ce in the expansion buffer
ucol_previous(coleiter, status);
- if (U_FAILURE(*status)) {
- return FALSE;
- }
+ if (U_FAILURE(*status)) {
+ return FALSE;
+ }
if (ucol_getOffset(coleiter) != temp) {
*end = temp;
temp = ucol_getOffset(coleiter);
expansion --;
}
- uint32_t *patternce = strsrch->pattern.CE;
+ int32_t *patternce = strsrch->pattern.CE;
int32_t patterncelength = strsrch->pattern.CELength;
int32_t count = patterncelength;
while (count > 0) {
- uint32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
+ int32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
// status checked below, note that if status is a failure
// ucol_previous returns UCOL_NULLORDER
if (ce == UCOL_IGNORABLE) {
ce != patternce[patterncelength - 1]) {
// accents may have extra starting ces, this occurs when a
// pure accent pattern is matched without rearrangement
- uint32_t expected = patternce[patterncelength - 1];
+ int32_t expected = patternce[patterncelength - 1];
UTF_BACK_1(text, 0, *end);
if (getFCD(text, end, textlength) & LAST_BYTE_MASK_) {
ce = getCE(strsrch, ucol_previous(coleiter, status));
*status = U_ILLEGAL_ARGUMENT_ERROR;
}
+ // string search does not really work when numeric collation is turned on
+ if(ucol_getAttribute(collator, UCOL_NUMERIC_COLLATION, status) == UCOL_ON) {
+ *status = U_UNSUPPORTED_ERROR;
+ }
+
if (U_SUCCESS(*status)) {
initializeFCD(status);
if (U_FAILURE(*status)) {
strsrch->search->matchedLength = 0;
strsrch->search->reset = TRUE;
#if !UCONFIG_NO_BREAK_ITERATION
- if (strsrch->search->breakIter != NULL) {
- ubrk_setText(strsrch->search->breakIter, text,
- textlength, status);
- }
+ if (strsrch->search->breakIter != NULL) {
+ ubrk_setText(strsrch->search->breakIter, text,
+ textlength, status);
+ }
#endif
}
}
uprv_init_collIterate(collator, strsrch->search->text,
strsrch->search->textLength,
&(strsrch->textIter->iteratordata_));
- strsrch->utilIter->iteratordata_.coll = collator;
+ strsrch->utilIter->iteratordata_.coll = collator;
}
}
}
UErrorCode *status)
{
if (U_SUCCESS(*status) && strsrch) {
- int32_t offset = usearch_getOffset(strsrch);
- USearch *search = strsrch->search;
- search->reset = FALSE;
- int32_t textlength = search->textLength;
- int32_t matchedindex = search->matchedIndex;
+ // note offset is either equivalent to the start of the previous match
+ // or is set by the user
+ int32_t offset = usearch_getOffset(strsrch);
+ USearch *search = strsrch->search;
+ search->reset = FALSE;
+ int32_t textlength = search->textLength;
if (search->isForwardSearching) {
- if (offset == textlength || matchedindex == textlength ||
- (!search->isOverlap &&
+ if (offset == textlength
+ || (!search->isOverlap &&
(offset + strsrch->pattern.defaultShiftSize > textlength ||
- (matchedindex != USEARCH_DONE &&
- matchedindex + search->matchedLength >= textlength)))) {
+ (search->matchedIndex != USEARCH_DONE &&
+ offset + search->matchedLength >= textlength)))) {
// not enough characters to match
setMatchNotFound(strsrch);
return USEARCH_DONE;
// string. the iterator would have been set to offset 0 if a
// match is not found.
search->isForwardSearching = TRUE;
- if (matchedindex != USEARCH_DONE) {
+ if (search->matchedIndex != USEARCH_DONE) {
// there's no need to set the collation element iterator
// the next call to next will set the offset.
- return matchedindex;
+ return search->matchedIndex;
}
}
if (U_SUCCESS(*status)) {
if (strsrch->pattern.CELength == 0) {
- if (matchedindex == USEARCH_DONE) {
+ if (search->matchedIndex == USEARCH_DONE) {
search->matchedIndex = offset;
}
else { // moves by codepoints
}
}
else {
- if (search->matchedLength > 0) {
- // if matchlength is 0 we are at the start of the iteration
- int offset = ucol_getOffset(strsrch->textIter);
- if (search->isOverlap) {
- ucol_setOffset(strsrch->textIter, offset + 1, status);
- }
- else {
- ucol_setOffset(strsrch->textIter,
- offset + search->matchedLength, status);
- }
- }
- if (search->isCanonicalMatch) {
- // can't use exact here since extra accents are allowed.
- usearch_handleNextCanonical(strsrch, status);
- }
- else {
- usearch_handleNextExact(strsrch, status);
- }
- }
-
+ if (search->matchedLength > 0) {
+ // if matchlength is 0 we are at the start of the iteration
+ if (search->isOverlap) {
+ ucol_setOffset(strsrch->textIter, offset + 1, status);
+ }
+ else {
+ ucol_setOffset(strsrch->textIter,
+ offset + search->matchedLength, status);
+ }
+ }
+ else {
+ // for boundary check purposes. this will ensure that the
+ // next match will not preceed the current offset
+ // note search->matchedIndex will always be set to something
+ // in the code
+ search->matchedIndex = offset - 1;
+ }
+
+ if (search->isCanonicalMatch) {
+ // can't use exact here since extra accents are allowed.
+ usearch_handleNextCanonical(strsrch, status);
+ }
+ else {
+ usearch_handleNextExact(strsrch, status);
+ }
+ }
+
if (U_FAILURE(*status)) {
return USEARCH_DONE;
}
-
+
return search->matchedIndex;
}
}
return FALSE;
}
- UCollationElements *coleiter = strsrch->textIter;
+ UCollationElements *coleiter = strsrch->textIter;
int32_t textlength = strsrch->search->textLength;
- uint32_t *patternce = strsrch->pattern.CE;
+ int32_t *patternce = strsrch->pattern.CE;
int32_t patterncelength = strsrch->pattern.CELength;
int32_t textoffset = ucol_getOffset(coleiter);
- // status used in setting coleiter offset, since offset is checked in
- // shiftForward before setting the coleiter offset, status never
- // a failure
+ // status used in setting coleiter offset, since offset is checked in
+ // shiftForward before setting the coleiter offset, status never
+ // a failure
textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER,
patterncelength);
while (textoffset <= textlength)
{
uint32_t patternceindex = patterncelength - 1;
- uint32_t targetce;
+ int32_t targetce;
UBool found = FALSE;
- uint32_t lastce = UCOL_NULLORDER;
-
- setColEIterOffset(coleiter, textoffset);
-
+ int32_t lastce = UCOL_NULLORDER;
+
+ setColEIterOffset(coleiter, textoffset);
+
while (TRUE) {
// finding the last pattern ce match, imagine composite characters
// for example: search for pattern A in text \u00C0
}
if (!found) {
- if (U_FAILURE(*status)) {
- break;
- }
- textoffset = shiftForward(strsrch, textoffset, targetce,
+ if (U_FAILURE(*status)) {
+ break;
+ }
+ textoffset = shiftForward(strsrch, textoffset, lastce,
patternceindex);
// status checked at loop.
patternceindex = patterncelength;
continue;
}
-
- if (checkNextExactMatch(strsrch, &textoffset, status)) {
+
+ if (checkNextExactMatch(strsrch, &textoffset, status)) {
// status checked in ucol_setOffset
- setColEIterOffset(coleiter, strsrch->search->matchedIndex);
- return TRUE;
+ setColEIterOffset(coleiter, strsrch->search->matchedIndex);
+ return TRUE;
}
}
setMatchNotFound(strsrch);
- return FALSE;
+ return FALSE;
}
UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status)
UCollationElements *coleiter = strsrch->textIter;
int32_t textlength = strsrch->search->textLength;
- uint32_t *patternce = strsrch->pattern.CE;
+ int32_t *patternce = strsrch->pattern.CE;
int32_t patterncelength = strsrch->pattern.CELength;
- int32_t textoffset = ucol_getOffset(coleiter);
+ int32_t textoffset = ucol_getOffset(coleiter);
UBool hasPatternAccents =
strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
while (textoffset <= textlength)
{
int32_t patternceindex = patterncelength - 1;
- uint32_t targetce;
+ int32_t targetce;
UBool found = FALSE;
- uint32_t lastce = UCOL_NULLORDER;
-
- setColEIterOffset(coleiter, textoffset);
-
- while (TRUE) {
+ int32_t lastce = UCOL_NULLORDER;
+
+ setColEIterOffset(coleiter, textoffset);
+
+ for (;;) {
// finding the last pattern ce match, imagine composite characters
// for example: search for pattern A in text \u00C0
// we'll have to skip \u0300 the grave first before we get to A
break;
}
}
- targetce = lastce;
while (found && patternceindex > 0) {
targetce = ucol_previous(coleiter, status);
if (hasPatternAccents && !found) {
strsrch->canonicalPrefixAccents[0] = 0;
strsrch->canonicalSuffixAccents[0] = 0;
- if (U_FAILURE(*status)) {
- break;
- }
+ if (U_FAILURE(*status)) {
+ break;
+ }
found = doNextCanonicalMatch(strsrch, textoffset, status);
}
if (!found) {
- if (U_FAILURE(*status)) {
- break;
- }
- textoffset = shiftForward(strsrch, textoffset, targetce,
+ if (U_FAILURE(*status)) {
+ break;
+ }
+ textoffset = shiftForward(strsrch, textoffset, lastce,
patternceindex);
// status checked at loop
patternceindex = patterncelength;
}
UCollationElements *coleiter = strsrch->textIter;
- uint32_t *patternce = strsrch->pattern.CE;
+ int32_t *patternce = strsrch->pattern.CE;
int32_t patterncelength = strsrch->pattern.CELength;
- int32_t textoffset = ucol_getOffset(coleiter);
+ int32_t textoffset = ucol_getOffset(coleiter);
// shifting it check for setting offset
// if setOffset is called previously or there was no previous match, we
while (textoffset >= 0)
{
int32_t patternceindex = 1;
- uint32_t targetce;
+ int32_t targetce;
UBool found = FALSE;
- uint32_t firstce = UCOL_NULLORDER;
+ int32_t firstce = UCOL_NULLORDER;
- // if status is a failure, ucol_setOffset does nothing
+ // if status is a failure, ucol_setOffset does nothing
setColEIterOffset(coleiter, textoffset);
-
- while (TRUE) {
+
+ for (;;) {
// finding the first pattern ce match, imagine composite
// characters. for example: search for pattern \u0300 in text
// \u00C0, we'll have to skip A first before we get to
}
if (!found) {
- if (U_FAILURE(*status)) {
- break;
- }
+ if (U_FAILURE(*status)) {
+ break;
+ }
textoffset = reverseShift(strsrch, textoffset, targetce,
patternceindex);
patternceindex = 0;
}
UCollationElements *coleiter = strsrch->textIter;
- uint32_t *patternce = strsrch->pattern.CE;
+ int32_t *patternce = strsrch->pattern.CE;
int32_t patterncelength = strsrch->pattern.CELength;
- int32_t textoffset = ucol_getOffset(coleiter);
+ int32_t textoffset = ucol_getOffset(coleiter);
UBool hasPatternAccents =
strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
while (textoffset >= 0)
{
int32_t patternceindex = 1;
- uint32_t targetce;
+ int32_t targetce;
UBool found = FALSE;
- uint32_t firstce = UCOL_NULLORDER;
+ int32_t firstce = UCOL_NULLORDER;
setColEIterOffset(coleiter, textoffset);
while (TRUE) {
if (hasPatternAccents && !found) {
strsrch->canonicalPrefixAccents[0] = 0;
strsrch->canonicalSuffixAccents[0] = 0;
- if (U_FAILURE(*status)) {
+ if (U_FAILURE(*status)) {
break;
}
found = doPreviousCanonicalMatch(strsrch, textoffset, status);
}
if (!found) {
- if (U_FAILURE(*status)) {
+ if (U_FAILURE(*status)) {
break;
}
textoffset = reverseShift(strsrch, textoffset, targetce,