/*
**********************************************************************
-* Copyright (C) 2001-2011 IBM and others. All rights reserved.
+* Copyright (C) 2001-2015 IBM and others. All rights reserved.
**********************************************************************
* Date Name Description
* 07/02/2001 synwee Creation.
#include "unicode/uchar.h"
#include "unicode/utf16.h"
#include "normalizer2impl.h"
-#include "ucol_imp.h"
#include "usrchimp.h"
#include "cmemory.h"
#include "ucln_in.h"
// (and if we decide to turn this on again there are several new TODOs that will need to be addressed)
#define BOYER_MOORE 0
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-
// internal definition ---------------------------------------------------
#define LAST_BYTE_MASK_ 0xFF
inline void setColEIterOffset(UCollationElements *elems,
int32_t offset)
{
- collIterate *ci = &(elems->iteratordata_);
- ci->pos = ci->string + offset;
- ci->CEpos = ci->toReturn = ci->extendCEs ? ci->extendCEs : ci->CEs;
- if (ci->flags & UCOL_ITER_INNORMBUF) {
- ci->flags = ci->origFlags;
- }
- ci->fcdPosition = NULL;
-
- ci->offsetReturn = NULL;
- ci->offsetStore = ci->offsetBuffer;
- ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
+ // Note: Not "fast" any more after the 2013 collation rewrite.
+ // We do not want to expose more internals than necessary.
+ UErrorCode status = U_ZERO_ERROR;
+ ucol_setOffset(elems, offset, &status);
}
/**
{
UPattern *pattern = &(strsrch->pattern);
uint32_t cetablesize = INITIAL_ARRAY_SIZE_;
- int32_t *cetable = pattern->CEBuffer;
+ int32_t *cetable = pattern->cesBuffer;
uint32_t patternlength = pattern->textLength;
UCollationElements *coleiter = strsrch->utilIter;
strsrch->utilIter = coleiter;
}
else {
- uprv_init_collIterate(strsrch->collator, pattern->text,
- pattern->textLength,
- &coleiter->iteratordata_,
- status);
+ ucol_setText(coleiter, pattern->text, pattern->textLength, status);
}
if(U_FAILURE(*status)) {
return 0;
}
- if (pattern->CE != cetable && pattern->CE) {
- uprv_free(pattern->CE);
+ if (pattern->ces != cetable && pattern->ces) {
+ uprv_free(pattern->ces);
}
uint16_t offset = 0;
return 0;
}
offset ++;
- if (cetable != temp && cetable != pattern->CEBuffer) {
+ if (cetable != temp && cetable != pattern->cesBuffer) {
uprv_free(cetable);
}
cetable = temp;
}
cetable[offset] = 0;
- pattern->CE = cetable;
- pattern->CELength = offset;
+ pattern->ces = cetable;
+ pattern->cesLength = offset;
return result;
}
{
UPattern *pattern = &(strsrch->pattern);
uint32_t pcetablesize = INITIAL_ARRAY_SIZE_;
- int64_t *pcetable = pattern->PCEBuffer;
+ int64_t *pcetable = pattern->pcesBuffer;
uint32_t patternlength = pattern->textLength;
UCollationElements *coleiter = strsrch->utilIter;
// returned.
strsrch->utilIter = coleiter;
} else {
- uprv_init_collIterate(strsrch->collator, pattern->text,
- pattern->textLength,
- &coleiter->iteratordata_,
- status);
+ ucol_setText(coleiter, pattern->text, pattern->textLength, status);
}
if(U_FAILURE(*status)) {
return 0;
}
- if (pattern->PCE != pcetable && pattern->PCE != NULL) {
- uprv_free(pattern->PCE);
+ if (pattern->pces != pcetable && pattern->pces != NULL) {
+ uprv_free(pattern->pces);
}
uint16_t offset = 0;
uint16_t result = 0;
int64_t pce;
- uprv_init_pce(coleiter);
+ icu::UCollationPCE iter(coleiter);
// ** Should processed CEs be signed or unsigned?
// ** (the rest of the code in this file seems to play fast-and-loose with
// ** whether a CE is signed or unsigned. For example, look at routine above this one.)
- while ((pce = ucol_nextProcessed(coleiter, NULL, NULL, status)) != UCOL_PROCESSED_NULLORDER &&
+ while ((pce = iter.nextProcessed(NULL, NULL, status)) != UCOL_PROCESSED_NULLORDER &&
U_SUCCESS(*status)) {
int64_t *temp = addTouint64_tArray(pcetable, offset, &pcetablesize,
pce,
offset += 1;
- if (pcetable != temp && pcetable != pattern->PCEBuffer) {
+ if (pcetable != temp && pcetable != pattern->pcesBuffer) {
uprv_free(pcetable);
}
}
pcetable[offset] = 0;
- pattern->PCE = pcetable;
- pattern->PCELength = offset;
+ pattern->pces = pcetable;
+ pattern->pcesLength = offset;
return result;
}
static
inline int16_t initializePattern(UStringSearch *strsrch, UErrorCode *status)
{
+ if (U_FAILURE(*status)) { return 0; }
UPattern *pattern = &(strsrch->pattern);
const UChar *patterntext = pattern->text;
int32_t length = pattern->textLength;
}
// ** HACK **
- if (strsrch->pattern.PCE != NULL) {
- if (strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) {
- uprv_free(strsrch->pattern.PCE);
+ if (strsrch->pattern.pces != NULL) {
+ if (strsrch->pattern.pces != strsrch->pattern.pcesBuffer) {
+ uprv_free(strsrch->pattern.pces);
}
- strsrch->pattern.PCE = NULL;
+ strsrch->pattern.pces = NULL;
}
// since intializePattern is an internal method status is a success.
inline void initialize(UStringSearch *strsrch, UErrorCode *status)
{
int16_t expandlength = initializePattern(strsrch, status);
- if (U_SUCCESS(*status) && strsrch->pattern.CELength > 0) {
+ if (U_SUCCESS(*status) && strsrch->pattern.cesLength > 0) {
UPattern *pattern = &strsrch->pattern;
- int32_t cesize = pattern->CELength;
+ int32_t cesize = pattern->cesLength;
int16_t minlength = cesize > expandlength
? (int16_t)cesize - expandlength : 1;
pattern->defaultShiftSize = minlength;
- setShiftTable(pattern->shift, pattern->backShift, pattern->CE,
+ setShiftTable(pattern->shift, pattern->backShift, pattern->ces,
cesize, expandlength, minlength, minlength);
return;
}
start;
UErrorCode status = U_ZERO_ERROR;
ucol_setText(coleiter, text, end - start, &status);
- for (int32_t count = 0; count < strsrch->pattern.CELength;
+ for (int32_t count = 0; count < strsrch->pattern.cesLength;
count ++) {
int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
if (ce == UCOL_IGNORABLE) {
count --;
continue;
}
- if (U_FAILURE(status) || ce != strsrch->pattern.CE[count]) {
+ if (U_FAILURE(status) || ce != strsrch->pattern.ces[count]) {
return FALSE;
}
}
int32_t shift = pattern->shift[hash(ce)];
// this is to adjust for characters in the middle of the
// substring for matching that failed.
- int32_t adjust = pattern->CELength - patternceindex;
+ int32_t adjust = pattern->cesLength - patternceindex;
if (adjust > 1 && shift >= adjust) {
shift -= adjust - 1;
}
UCollationElements *coleiter = strsrch->utilIter;
ucol_setText(coleiter, norm, size, status);
- uint32_t firstce = strsrch->pattern.CE[0];
+ uint32_t firstce = strsrch->pattern.ces[0];
UBool ignorable = TRUE;
uint32_t ce = UCOL_IGNORABLE;
while (U_SUCCESS(*status) && ce != firstce && ce != (uint32_t)UCOL_NULLORDER) {
UErrorCode status = U_ZERO_ERROR;
// we have been iterating forwards previously
uint32_t ignorable = TRUE;
- int32_t firstce = strsrch->pattern.CE[0];
+ int32_t firstce = strsrch->pattern.ces[0];
setColEIterOffset(coleiter, start);
int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
int32_t textlength = strsrch->search->textLength;
U16_BACK_1(text, 0, temp);
if (getFCD(text, &temp, textlength) & LAST_BYTE_MASK_) {
- int32_t firstce = strsrch->pattern.CE[0];
+ int32_t firstce = strsrch->pattern.ces[0];
UCollationElements *coleiter = strsrch->textIter;
UErrorCode status = U_ZERO_ERROR;
int32_t ce;
}
}
int32_t count = 1;
- while (count < strsrch->pattern.CELength) {
+ while (count < strsrch->pattern.cesLength) {
if (getCE(strsrch, ucol_next(coleiter, &status))
== UCOL_IGNORABLE) {
// Thai can give an ignorable here.
expansion --;
}
- int32_t *patternce = strsrch->pattern.CE;
- int32_t patterncelength = strsrch->pattern.CELength;
+ int32_t *patternce = strsrch->pattern.ces;
+ int32_t patterncelength = strsrch->pattern.cesLength;
int32_t count = 0;
while (count < patterncelength) {
int32_t ce = getCE(strsrch, ucol_next(coleiter, status));
inline UBool checkCollationMatch(const UStringSearch *strsrch,
UCollationElements *coleiter)
{
- int patternceindex = strsrch->pattern.CELength;
- int32_t *patternce = strsrch->pattern.CE;
+ int patternceindex = strsrch->pattern.cesLength;
+ int32_t *patternce = strsrch->pattern.ces;
UErrorCode status = U_ZERO_ERROR;
while (patternceindex > 0) {
int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
ucol_setText(coleiter, safetext, safetextlength, status);
// status checked in loop below
- int32_t *ce = strsrch->pattern.CE;
- int32_t celength = strsrch->pattern.CELength;
+ int32_t *ce = strsrch->pattern.ces;
+ int32_t celength = strsrch->pattern.cesLength;
int ceindex = celength - 1;
UBool isSafe = TRUE; // indication flag for position in safe zone
expansion --;
}
- int32_t *patternce = strsrch->pattern.CE;
- int32_t patterncelength = strsrch->pattern.CELength;
+ int32_t *patternce = strsrch->pattern.ces;
+ int32_t patterncelength = strsrch->pattern.cesLength;
int32_t count = 0;
int32_t textlength = strsrch->search->textLength;
while (count < patterncelength) {
expansion --;
}
- int32_t *patternce = strsrch->pattern.CE;
- int32_t patterncelength = strsrch->pattern.CELength;
+ int32_t *patternce = strsrch->pattern.ces;
+ int32_t patterncelength = strsrch->pattern.cesLength;
int32_t count = patterncelength;
while (count > 0) {
int32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
ucol_setText(coleiter, safetext, safetextlength, status);
// status checked in loop below
- int32_t *ce = strsrch->pattern.CE;
- int32_t celength = strsrch->pattern.CELength;
+ int32_t *ce = strsrch->pattern.ces;
+ int32_t celength = strsrch->pattern.cesLength;
int ceindex = 0;
UBool isSafe = TRUE; // safe zone indication flag for position
int32_t prefixlength = u_strlen(strsrch->canonicalPrefixAccents);
expansion --;
}
- int32_t *patternce = strsrch->pattern.CE;
- int32_t patterncelength = strsrch->pattern.CELength;
+ int32_t *patternce = strsrch->pattern.ces;
+ int32_t patterncelength = strsrch->pattern.cesLength;
int32_t count = patterncelength;
while (count > 0) {
int32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
UCOL_SHIFTED;
result->variableTop = ucol_getVariableTop(collator, status);
- result->nfd = Normalizer2Factory::getNFDInstance(*status);
+ result->nfd = Normalizer2::getNFDInstance(*status);
if (U_FAILURE(*status)) {
uprv_free(result);
result->pattern.text = pattern;
result->pattern.textLength = patternlength;
- result->pattern.CE = NULL;
- result->pattern.PCE = NULL;
+ result->pattern.ces = NULL;
+ result->pattern.pces = NULL;
result->search->breakIter = breakiter;
#if !UCONFIG_NO_BREAK_ITERATION
result->utilIter = NULL;
result->textIter = ucol_openElements(collator, text,
textlength, status);
+ result->textProcessedIter = NULL;
if (U_FAILURE(*status)) {
usearch_close(result);
return NULL;
U_CAPI void U_EXPORT2 usearch_close(UStringSearch *strsrch)
{
if (strsrch) {
- if (strsrch->pattern.CE != strsrch->pattern.CEBuffer &&
- strsrch->pattern.CE) {
- uprv_free(strsrch->pattern.CE);
+ if (strsrch->pattern.ces != strsrch->pattern.cesBuffer &&
+ strsrch->pattern.ces) {
+ uprv_free(strsrch->pattern.ces);
}
- if (strsrch->pattern.PCE != NULL &&
- strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) {
- uprv_free(strsrch->pattern.PCE);
+ if (strsrch->pattern.pces != NULL &&
+ strsrch->pattern.pces != strsrch->pattern.pcesBuffer) {
+ uprv_free(strsrch->pattern.pces);
}
+ delete strsrch->textProcessedIter;
ucol_closeElements(strsrch->textIter);
ucol_closeElements(strsrch->utilIter);
}
}
+namespace {
+
+UBool initTextProcessedIter(UStringSearch *strsrch, UErrorCode *status) {
+ if (U_FAILURE(*status)) { return FALSE; }
+ if (strsrch->textProcessedIter == NULL) {
+ strsrch->textProcessedIter = new icu::UCollationPCE(strsrch->textIter);
+ if (strsrch->textProcessedIter == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return FALSE;
+ }
+ } else {
+ strsrch->textProcessedIter->init(strsrch->textIter);
+ }
+ return TRUE;
+}
+
+}
+
// set and get methods --------------------------------------------------
U_CAPI void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch,
}
if (strsrch) {
+ delete strsrch->textProcessedIter;
+ strsrch->textProcessedIter = NULL;
+ ucol_closeElements(strsrch->textIter);
+ ucol_closeElements(strsrch->utilIter);
+ strsrch->textIter = strsrch->utilIter = NULL;
if (strsrch->ownCollator && (strsrch->collator != collator)) {
ucol_close((UCollator *)strsrch->collator);
strsrch->ownCollator = FALSE;
UCOL_SHIFTED;
// if status is a failure, ucol_getVariableTop returns 0
strsrch->variableTop = ucol_getVariableTop(collator, status);
- if (U_SUCCESS(*status)) {
- initialize(strsrch, status);
- if (U_SUCCESS(*status)) {
- /* free offset buffer to avoid memory leak before initializing. */
- ucol_freeOffsetBuffer(&(strsrch->textIter->iteratordata_));
- uprv_init_collIterate(collator, strsrch->search->text,
- strsrch->search->textLength,
- &(strsrch->textIter->iteratordata_),
- status);
- strsrch->utilIter->iteratordata_.coll = collator;
- }
- }
+ strsrch->textIter = ucol_openElements(collator,
+ strsrch->search->text,
+ strsrch->search->textLength,
+ status);
+ strsrch->utilIter = ucol_openElements(
+ collator, strsrch->pattern.text, strsrch->pattern.textLength, status);
+ // initialize() _after_ setting the iterators for the new collator.
+ initialize(strsrch, status);
}
// **** are these calls needed?
// **** we call uprv_init_pce in initializePatternPCETable
- // **** and the CEBuffer constructor...
+ // **** and the CEIBuffer constructor...
#if 0
uprv_init_pce(strsrch->textIter);
uprv_init_pce(strsrch->utilIter);
}
if (U_SUCCESS(*status)) {
- if (strsrch->pattern.CELength == 0) {
+ if (strsrch->pattern.cesLength == 0) {
if (search->matchedIndex == USEARCH_DONE) {
search->matchedIndex = offset;
}
}
if (U_SUCCESS(*status)) {
- if (strsrch->pattern.CELength == 0) {
+ if (strsrch->pattern.cesLength == 0) {
search->matchedIndex =
(matchedindex == USEARCH_DONE ? offset : matchedindex);
if (search->matchedIndex == 0) {
if (!sameCollAttribute) {
initialize(strsrch, &status);
}
- /* free offset buffer to avoid memory leak before initializing. */
- ucol_freeOffsetBuffer(&(strsrch->textIter->iteratordata_));
- uprv_init_collIterate(strsrch->collator, strsrch->search->text,
+ ucol_setText(strsrch->textIter, strsrch->search->text,
strsrch->search->textLength,
- &(strsrch->textIter->iteratordata_),
&status);
strsrch->search->matchedLength = 0;
strsrch->search->matchedIndex = USEARCH_DONE;
U_NAMESPACE_BEGIN
-
+namespace {
//
-// CEBuffer A circular buffer of CEs from the text being searched.
+// CEIBuffer A circular buffer of CEs-with-index from the text being searched.
//
#define DEFAULT_CEBUFFER_SIZE 96
#define CEBUFFER_EXTRA 32
#define MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L 8
#define MAX_TARGET_IGNORABLES_PER_PAT_OTHER 3
#define MIGHT_BE_JAMO_L(c) ((c >= 0x1100 && c <= 0x115E) || (c >= 0x3131 && c <= 0x314E) || (c >= 0x3165 && c <= 0x3186))
-struct CEBuffer {
+struct CEIBuffer {
CEI defBuf[DEFAULT_CEBUFFER_SIZE];
CEI *buf;
int32_t bufSize;
- CEBuffer(UStringSearch *ss, UErrorCode *status);
- ~CEBuffer();
+ CEIBuffer(UStringSearch *ss, UErrorCode *status);
+ ~CEIBuffer();
const CEI *get(int32_t index);
const CEI *getPrevious(int32_t index);
};
-CEBuffer::CEBuffer(UStringSearch *ss, UErrorCode *status) {
+CEIBuffer::CEIBuffer(UStringSearch *ss, UErrorCode *status) {
buf = defBuf;
strSearch = ss;
- bufSize = ss->pattern.PCELength + CEBUFFER_EXTRA;
+ bufSize = ss->pattern.pcesLength + CEBUFFER_EXTRA;
if (ss->search->elementComparisonType != 0) {
const UChar * patText = ss->pattern.text;
if (patText) {
firstIx = 0;
limitIx = 0;
- uprv_init_pce(ceIter);
+ if (!initTextProcessedIter(ss, status)) { return; }
if (bufSize>DEFAULT_CEBUFFER_SIZE) {
buf = (CEI *)uprv_malloc(bufSize * sizeof(CEI));
// TODO: add a reset or init function so that allocated
// buffers can be retained & reused.
-CEBuffer::~CEBuffer() {
+CEIBuffer::~CEIBuffer() {
if (buf != defBuf) {
uprv_free(buf);
}
// where n is the largest index to have been fetched by some previous call to this function.
// The CE value will be UCOL__PROCESSED_NULLORDER at end of input.
//
-const CEI *CEBuffer::get(int32_t index) {
+const CEI *CEIBuffer::get(int32_t index) {
int i = index % bufSize;
if (index>=firstIx && index<limitIx) {
UErrorCode status = U_ZERO_ERROR;
- buf[i].ce = ucol_nextProcessed(ceIter, &buf[i].lowIndex, &buf[i].highIndex, &status);
+ buf[i].ce = strSearch->textProcessedIter->nextProcessed(&buf[i].lowIndex, &buf[i].highIndex, &status);
return &buf[i];
}
// where n is the largest index to have been fetched by some previous call to this function.
// The CE value will be UCOL__PROCESSED_NULLORDER at end of input.
//
-const CEI *CEBuffer::getPrevious(int32_t index) {
+const CEI *CEIBuffer::getPrevious(int32_t index) {
int i = index % bufSize;
if (index>=firstIx && index<limitIx) {
UErrorCode status = U_ZERO_ERROR;
- buf[i].ce = ucol_previousProcessed(ceIter, &buf[i].lowIndex, &buf[i].highIndex, &status);
+ buf[i].ce = strSearch->textProcessedIter->previousProcessed(&buf[i].lowIndex, &buf[i].highIndex, &status);
return &buf[i];
}
+}
+
U_NAMESPACE_END
#ifdef USEARCH_DEBUG
if (getenv("USEARCH_DEBUG") != NULL) {
printf("Pattern CEs\n");
- for (int ii=0; ii<strsrch->pattern.CELength; ii++) {
- printf(" %8x", strsrch->pattern.CE[ii]);
+ for (int ii=0; ii<strsrch->pattern.cesLength; ii++) {
+ printf(" %8x", strsrch->pattern.ces[ii]);
}
printf("\n");
}
// Input parameter sanity check.
// TODO: should input indicies clip to the text length
// in the same way that UText does.
- if(strsrch->pattern.CELength == 0 ||
+ if(strsrch->pattern.cesLength == 0 ||
startIdx < 0 ||
startIdx > strsrch->search->textLength ||
- strsrch->pattern.CE == NULL) {
+ strsrch->pattern.ces == NULL) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
- if (strsrch->pattern.PCE == NULL) {
+ if (strsrch->pattern.pces == NULL) {
initializePatternPCETable(strsrch, status);
}
ucol_setOffset(strsrch->textIter, startIdx, status);
- CEBuffer ceb(strsrch, status);
+ CEIBuffer ceb(strsrch, status);
int32_t targetIx = 0;
break;
}
- for (patIx=0; patIx<strsrch->pattern.PCELength; patIx++) {
- patCE = strsrch->pattern.PCE[patIx];
+ for (patIx=0; patIx<strsrch->pattern.pcesLength; patIx++) {
+ patCE = strsrch->pattern.pces[patIx];
targetCEI = ceb.get(targetIx+patIx+targetIxOffset);
// Compare CE from target string with CE from the pattern.
// Note that the target CE will be UCOL_PROCESSED_NULLORDER if we reach the end of input,
}
}
}
- targetIxOffset += strsrch->pattern.PCELength; // this is now the offset in target CE space to end of the match so far
+ targetIxOffset += strsrch->pattern.pcesLength; // this is now the offset in target CE space to end of the match so far
if (!found && ((targetCEI == NULL) || (targetCEI->ce != UCOL_PROCESSED_NULLORDER))) {
// No match at this targetIx. Try again at the next.
}
#endif
- // If advancing to the end of a combining sequence in character indexing space
- // advanced us beyond the end of the match in CE space, reject this match.
- if (mLimit > maxLimit) {
- found = FALSE;
- }
+ // If default breakIter is being used, and next collation element belonging to this
+ // combining sequence has non-zero primary weight and corresponds to a separate
+ // character following the one at end of the current match, then do NOT require
+ // that match end position be on a breakIter boundary, or that end of the
+ // combining sequence not extend beyond the match in CE space. Only do those
+ // tests if the conditions above are not met. Added this to make prefix search
+ // work in Indic scripts per <rdar://problem/18063262>.
+ UBool doLimitTests = !(strsrch->search->breakIter == NULL &&
+ nextCEI != NULL && (((nextCEI->ce) >> 32) & 0xFFFF0000UL) != 0 &&
+ nextCEI->lowIndex >= lastCEI->highIndex && nextCEI->highIndex > nextCEI->lowIndex);
+
+ if (doLimitTests) { // <rdar://problem/18063262>
+ // If advancing to the end of a combining sequence in character indexing space
+ // advanced us beyond the end of the match in CE space, reject this match.
+ if (mLimit > maxLimit) {
+ found = FALSE;
+ }
- if (!isBreakBoundary(strsrch, mLimit)) {
- found = FALSE;
+ if (!isBreakBoundary(strsrch, mLimit)) {
+ found = FALSE;
+ }
}
if (! checkIdentical(strsrch, mStart, mLimit)) {
#ifdef USEARCH_DEBUG
if (getenv("USEARCH_DEBUG") != NULL) {
printf("Pattern CEs\n");
- for (int ii=0; ii<strsrch->pattern.CELength; ii++) {
- printf(" %8x", strsrch->pattern.CE[ii]);
+ for (int ii=0; ii<strsrch->pattern.cesLength; ii++) {
+ printf(" %8x", strsrch->pattern.ces[ii]);
}
printf("\n");
}
// Input parameter sanity check.
// TODO: should input indicies clip to the text length
// in the same way that UText does.
- if(strsrch->pattern.CELength == 0 ||
+ if(strsrch->pattern.cesLength == 0 ||
startIdx < 0 ||
startIdx > strsrch->search->textLength ||
- strsrch->pattern.CE == NULL) {
+ strsrch->pattern.ces == NULL) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
- if (strsrch->pattern.PCE == NULL) {
+ if (strsrch->pattern.pces == NULL) {
initializePatternPCETable(strsrch, status);
}
- CEBuffer ceb(strsrch, status);
+ CEIBuffer ceb(strsrch, status);
int32_t targetIx = 0;
/*
// Inner loop checks for a match beginning at each
// position from the outer loop.
int32_t targetIxOffset = 0;
- for (patIx = strsrch->pattern.PCELength - 1; patIx >= 0; patIx -= 1) {
- int64_t patCE = strsrch->pattern.PCE[patIx];
+ for (patIx = strsrch->pattern.pcesLength - 1; patIx >= 0; patIx -= 1) {
+ int64_t patCE = strsrch->pattern.pces[patIx];
- targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength - 1 - patIx + targetIxOffset);
+ targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.pcesLength - 1 - patIx + targetIxOffset);
// Compare CE from target string with CE from the pattern.
// Note that the target CE will be UCOL_NULLORDER if we reach the end of input,
// which will fail the compare, below.
// There still is a chance of match failure if the CE range not correspond to
// an acceptable character range.
//
- const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength - 1 + targetIxOffset);
+ const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.pcesLength - 1 + targetIxOffset);
mStart = firstCEI->lowIndex;
// Check for the start of the match being within a combining sequence.
mLimit = maxLimit = nextCEI->lowIndex;
// Advance the match end position to the first acceptable match boundary.
- // This advances the index over any combining charcters.
+ // This advances the index over any combining characters.
if (minLimit < maxLimit) {
int32_t nba = nextBoundaryAfter(strsrch, minLimit);
}
}
- // If advancing to the end of a combining sequence in character indexing space
- // advanced us beyond the end of the match in CE space, reject this match.
- if (mLimit > maxLimit) {
- found = FALSE;
- }
+ // If default breakIter is being used, and next collation element belonging to this
+ // combining sequence has non-zero primary weight and corresponds to a separate
+ // character following the one at end of the current match, then do NOT require
+ // that match end position be on a breakIter boundary, or that end of the
+ // combining sequence not extend beyond the match in CE space. Only do those
+ // tests if the conditions above are not met. Added this to make prefix search
+ // work in Indic scripts per <rdar://problem/18063262>.
+ UBool doLimitTests = !(strsrch->search->breakIter == NULL &&
+ nextCEI != NULL && (((nextCEI->ce) >> 32) & 0xFFFF0000UL) != 0 &&
+ nextCEI->lowIndex >= lastCEI->highIndex && nextCEI->highIndex > nextCEI->lowIndex);
+
+ if (doLimitTests) { // <rdar://problem/18063262>
+ // If advancing to the end of a combining sequence in character indexing space
+ // advanced us beyond the end of the match in CE space, reject this match.
+ if (mLimit > maxLimit) {
+ found = FALSE;
+ }
- // Make sure the end of the match is on a break boundary
- if (!isBreakBoundary(strsrch, mLimit)) {
- found = FALSE;
+ // Make sure the end of the match is on a break boundary
+ if (!isBreakBoundary(strsrch, mLimit)) {
+ found = FALSE;
+ }
}
} else {
#if BOYER_MOORE
UCollationElements *coleiter = strsrch->textIter;
int32_t textlength = strsrch->search->textLength;
- int32_t *patternce = strsrch->pattern.CE;
- int32_t patterncelength = strsrch->pattern.CELength;
+ int32_t *patternce = strsrch->pattern.ces;
+ int32_t patterncelength = strsrch->pattern.cesLength;
int32_t textoffset = ucol_getOffset(coleiter);
// status used in setting coleiter offset, since offset is checked in
#if BOYER_MOORE
UCollationElements *coleiter = strsrch->textIter;
int32_t textlength = strsrch->search->textLength;
- int32_t *patternce = strsrch->pattern.CE;
- int32_t patterncelength = strsrch->pattern.CELength;
+ int32_t *patternce = strsrch->pattern.ces;
+ int32_t patterncelength = strsrch->pattern.cesLength;
int32_t textoffset = ucol_getOffset(coleiter);
UBool hasPatternAccents =
strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
#if BOYER_MOORE
UCollationElements *coleiter = strsrch->textIter;
- int32_t *patternce = strsrch->pattern.CE;
- int32_t patterncelength = strsrch->pattern.CELength;
+ int32_t *patternce = strsrch->pattern.ces;
+ int32_t patterncelength = strsrch->pattern.cesLength;
int32_t textoffset = ucol_getOffset(coleiter);
// shifting it check for setting offset
} else {
// move the start position at the end of possible match
initializePatternPCETable(strsrch, status);
- for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.PCELength - 1; nPCEs++) {
- int64_t pce = ucol_nextProcessed(strsrch->textIter, NULL, NULL, status);
+ if (!initTextProcessedIter(strsrch, status)) {
+ setMatchNotFound(strsrch);
+ return FALSE;
+ }
+ for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPCEs++) {
+ int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NULL, status);
if (pce == UCOL_PROCESSED_NULLORDER) {
// at the end of the text
break;
#if BOYER_MOORE
UCollationElements *coleiter = strsrch->textIter;
- int32_t *patternce = strsrch->pattern.CE;
- int32_t patterncelength = strsrch->pattern.CELength;
+ int32_t *patternce = strsrch->pattern.ces;
+ int32_t patterncelength = strsrch->pattern.cesLength;
int32_t textoffset = ucol_getOffset(coleiter);
UBool hasPatternAccents =
strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
} else {
// move the start position at the end of possible match
initializePatternPCETable(strsrch, status);
- for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.PCELength - 1; nPCEs++) {
- int64_t pce = ucol_nextProcessed(strsrch->textIter, NULL, NULL, status);
+ if (!initTextProcessedIter(strsrch, status)) {
+ setMatchNotFound(strsrch);
+ return FALSE;
+ }
+ for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPCEs++) {
+ int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NULL, status);
if (pce == UCOL_PROCESSED_NULLORDER) {
// at the end of the text
break;