/*
**********************************************************************
-* Copyright (C) 2001-2003 IBM and others. All rights reserved.
+* Copyright (C) 2001-2014 IBM and others. All rights reserved.
**********************************************************************
* Date Name Description
* 03/22/2000 helena Creation.
#include "unicode/utypes.h"
-#if !UCONFIG_NO_COLLATION
+#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
#include "unicode/stsearch.h"
#include "usrchimp.h"
U_NAMESPACE_BEGIN
-const char StringSearch::fgClassID=0;
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
// public constructors and destructors -----------------------------------
-StringSearch::StringSearch(const UnicodeString &pattern,
+StringSearch::StringSearch(const UnicodeString &pattern,
const UnicodeString &text,
- const Locale &locale,
+ const Locale &locale,
BreakIterator *breakiter,
UErrorCode &status) :
- SearchIterator(text, breakiter),
- m_collator_(),
+ SearchIterator(text, breakiter),
m_pattern_(pattern)
{
if (U_FAILURE(status)) {
return;
}
- m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
- m_text_.getBuffer(), m_text_.length(),
- locale.getName(), (UBreakIterator *)breakiter,
+ m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
+ m_text_.getBuffer(), m_text_.length(),
+ locale.getName(), (UBreakIterator *)breakiter,
&status);
uprv_free(m_search_);
m_search_ = NULL;
- // !!! dlf m_collator_ is an odd beast. basically it is an aliasing
- // wrapper around the internal collator and rules, which (here) are
- // owned by this stringsearch object. this means 1) it's destructor
- // _should not_ delete the ucollator or rules, and 2) changes made
- // to the exposed collator (setStrength etc) _should_ modify the
- // ucollator. thus the collator is not a copy-on-write alias, and it
- // needs to distinguish itself not merely from 'stand alone' colators
- // but also from copy-on-write ones. it needs additional state, which
- // setUCollator should set.
-
if (U_SUCCESS(status)) {
- int32_t length;
- const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
- m_collation_rules_.setTo(rules, length);
- m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
- &m_collation_rules_);
// m_search_ has been created by the base SearchIterator class
m_search_ = m_strsrch_->search;
}
}
-StringSearch::StringSearch(const UnicodeString &pattern,
+StringSearch::StringSearch(const UnicodeString &pattern,
const UnicodeString &text,
- RuleBasedCollator *coll,
+ RuleBasedCollator *coll,
BreakIterator *breakiter,
UErrorCode &status) :
- SearchIterator(text, breakiter),
- m_collator_(),
+ SearchIterator(text, breakiter),
m_pattern_(pattern)
{
if (U_FAILURE(status)) {
m_strsrch_ = NULL;
return;
}
- m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
- m_pattern_.length(),
- m_text_.getBuffer(),
- m_text_.length(), coll->ucollator,
- (UBreakIterator *)breakiter,
+ m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
+ m_pattern_.length(),
+ m_text_.getBuffer(),
+ m_text_.length(), coll->toUCollator(),
+ (UBreakIterator *)breakiter,
&status);
uprv_free(m_search_);
m_search_ = NULL;
if (U_SUCCESS(status)) {
- int32_t length;
- const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
- m_collation_rules_.setTo(rules, length);
- m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
- &m_collation_rules_);
// m_search_ has been created by the base SearchIterator class
m_search_ = m_strsrch_->search;
}
}
-StringSearch::StringSearch(const UnicodeString &pattern,
+StringSearch::StringSearch(const UnicodeString &pattern,
CharacterIterator &text,
- const Locale &locale,
+ const Locale &locale,
BreakIterator *breakiter,
UErrorCode &status) :
- SearchIterator(text, breakiter),
- m_collator_(),
+ SearchIterator(text, breakiter),
m_pattern_(pattern)
{
if (U_FAILURE(status)) {
m_strsrch_ = NULL;
return;
}
- m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
- m_text_.getBuffer(), m_text_.length(),
- locale.getName(), (UBreakIterator *)breakiter,
+ m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
+ m_text_.getBuffer(), m_text_.length(),
+ locale.getName(), (UBreakIterator *)breakiter,
&status);
uprv_free(m_search_);
m_search_ = NULL;
if (U_SUCCESS(status)) {
- int32_t length;
- const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
- m_collation_rules_.setTo(rules, length);
- m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
- &m_collation_rules_);
// m_search_ has been created by the base SearchIterator class
m_search_ = m_strsrch_->search;
}
StringSearch::StringSearch(const UnicodeString &pattern,
CharacterIterator &text,
- RuleBasedCollator *coll,
+ RuleBasedCollator *coll,
BreakIterator *breakiter,
UErrorCode &status) :
- SearchIterator(text, breakiter),
- m_collator_(),
+ SearchIterator(text, breakiter),
m_pattern_(pattern)
{
if (U_FAILURE(status)) {
m_strsrch_ = NULL;
return;
}
- m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
- m_pattern_.length(),
- m_text_.getBuffer(),
- m_text_.length(), coll->ucollator,
- (UBreakIterator *)breakiter,
+ m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
+ m_pattern_.length(),
+ m_text_.getBuffer(),
+ m_text_.length(), coll->toUCollator(),
+ (UBreakIterator *)breakiter,
&status);
uprv_free(m_search_);
m_search_ = NULL;
if (U_SUCCESS(status)) {
- int32_t length;
- const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
- m_collation_rules_.setTo(rules, length);
- m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
- &m_collation_rules_);
// m_search_ has been created by the base SearchIterator class
m_search_ = m_strsrch_->search;
}
StringSearch::StringSearch(const StringSearch &that) :
SearchIterator(that.m_text_, that.m_breakiterator_),
- m_collator_(),
m_pattern_(that.m_pattern_)
{
UErrorCode status = U_ZERO_ERROR;
+
+ // Free m_search_ from the superclass
+ uprv_free(m_search_);
+ m_search_ = NULL;
+
if (that.m_strsrch_ == NULL) {
+ // This was not a good copy
m_strsrch_ = NULL;
- status = U_ILLEGAL_ARGUMENT_ERROR;
}
else {
- m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
- m_pattern_.length(),
- m_text_.getBuffer(),
- m_text_.length(),
- that.m_strsrch_->collator,
- (UBreakIterator *)that.m_breakiterator_,
+ // Make a deep copy
+ m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
+ m_pattern_.length(),
+ m_text_.getBuffer(),
+ m_text_.length(),
+ that.m_strsrch_->collator,
+ (UBreakIterator *)that.m_breakiterator_,
&status);
- }
- uprv_free(m_search_);
- m_search_ = NULL;
-
- if (U_SUCCESS(status)) {
- int32_t length;
- const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
- m_collation_rules_.setTo(rules, length);
- m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
- &m_collation_rules_);
- // m_search_ has been created by the base SearchIterator class
- m_search_ = m_strsrch_->search;
- m_breakiterator_ = that.m_breakiterator_;
+ if (U_SUCCESS(status)) {
+ // m_search_ has been created by the base SearchIterator class
+ m_search_ = m_strsrch_->search;
+ }
}
}
}
}
+StringSearch *
+StringSearch::clone() const {
+ return new StringSearch(*this);
+}
+
// operator overloading ---------------------------------------------
StringSearch & StringSearch::operator=(const StringSearch &that)
{
m_pattern_ = that.m_pattern_;
// all m_search_ in the parent class is linked up with m_strsrch_
usearch_close(m_strsrch_);
- m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
- m_pattern_.length(),
- m_text_.getBuffer(),
- m_text_.length(),
- that.m_strsrch_->collator,
+ m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
+ m_pattern_.length(),
+ m_text_.getBuffer(),
+ m_text_.length(),
+ that.m_strsrch_->collator,
NULL, &status);
- int32_t length;
- const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
- m_collation_rules_.setTo(rules, length);
- m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
- &m_collation_rules_);
- m_search_ = m_strsrch_->search;
+ // Check null pointer
+ if (m_strsrch_ != NULL) {
+ m_search_ = m_strsrch_->search;
+ }
}
return *this;
}
usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
}
}
-
+
void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
{
if (U_SUCCESS(status)) {
RuleBasedCollator * StringSearch::getCollator() const
{
- return (RuleBasedCollator *)&m_collator_;
+ // Note the const_cast. It would be cleaner if this const method returned a const collator.
+ return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator));
}
-
+
void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
{
if (U_SUCCESS(status)) {
- usearch_setCollator(m_strsrch_, coll->getUCollator(), &status);
- m_collation_rules_.setTo(coll->getRules());
- m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
- &m_collation_rules_);
+ usearch_setCollator(m_strsrch_, coll->toUCollator(), &status);
}
}
-
-void StringSearch::setPattern(const UnicodeString &pattern,
+
+void StringSearch::setPattern(const UnicodeString &pattern,
UErrorCode &status)
{
if (U_SUCCESS(status)) {
&status);
}
}
-
+
const UnicodeString & StringSearch::getPattern() const
{
return m_pattern_;
SearchIterator * StringSearch::safeClone(void) const
{
UErrorCode status = U_ZERO_ERROR;
- StringSearch *result = new StringSearch(m_pattern_, m_text_,
- (RuleBasedCollator *)&m_collator_,
+ StringSearch *result = new StringSearch(m_pattern_, m_text_,
+ getCollator(),
m_breakiterator_,
status);
/* test for NULL */
}
return result;
}
-
+
// protected method -------------------------------------------------
int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
{
// values passed here are already in the pre-shift position
if (U_SUCCESS(status)) {
- if (m_strsrch_->pattern.CELength == 0) {
- m_search_->matchedIndex =
- m_search_->matchedIndex == USEARCH_DONE ?
+ if (m_strsrch_->pattern.cesLength == 0) {
+ m_search_->matchedIndex =
+ m_search_->matchedIndex == USEARCH_DONE ?
getOffset() : m_search_->matchedIndex + 1;
m_search_->matchedLength = 0;
- ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
+ ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
&status);
if (m_search_->matchedIndex == m_search_->textLength) {
m_search_->matchedIndex = USEARCH_DONE;
}
}
else {
- // looking at usearch.cpp, this part is shifted out to
+ // looking at usearch.cpp, this part is shifted out to
// StringSearch instead of SearchIterator because m_strsrch_ is
// not accessible in SearchIterator
- if (position + m_strsrch_->pattern.defaultShiftSize
- > m_search_->textLength) {
+#if 0
+ if (position + m_strsrch_->pattern.defaultShiftSize
+ > m_search_->textLength) {
setMatchNotFound();
return USEARCH_DONE;
}
- ucol_setOffset(m_strsrch_->textIter, position, &status);
- while (TRUE) {
+#endif
+ if (m_search_->matchedLength <= 0) {
+ // the flipping direction issue has already been handled
+ // in next()
+ // for boundary check purposes. this will ensure that the
+ // next match will not preceed the current offset
+ // note search->matchedIndex will always be set to something
+ // in the code
+ m_search_->matchedIndex = position - 1;
+ }
+
+ ucol_setOffset(m_strsrch_->textIter, position, &status);
+
+#if 0
+ for (;;) {
if (m_search_->isCanonicalMatch) {
// can't use exact here since extra accents are allowed.
usearch_handleNextCanonical(m_strsrch_, &status);
}
if (m_breakiterator_ == NULL
#if !UCONFIG_NO_BREAK_ITERATION
- ||
+ ||
m_search_->matchedIndex == USEARCH_DONE ||
(m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
- m_breakiterator_->isBoundary(m_search_->matchedIndex +
+ m_breakiterator_->isBoundary(m_search_->matchedIndex +
m_search_->matchedLength))
#endif
) {
- if (m_search_->matchedIndex == USEARCH_DONE) {
- ucol_setOffset(m_strsrch_->textIter,
- m_search_->textLength, &status);
- }
- else {
- ucol_setOffset(m_strsrch_->textIter,
- m_search_->matchedIndex, &status);
- }
+ if (m_search_->matchedIndex == USEARCH_DONE) {
+ ucol_setOffset(m_strsrch_->textIter,
+ m_search_->textLength, &status);
+ }
+ else {
+ ucol_setOffset(m_strsrch_->textIter,
+ m_search_->matchedIndex, &status);
+ }
return m_search_->matchedIndex;
}
}
+#else
+ // if m_strsrch_->breakIter is always the same as m_breakiterator_
+ // then we don't need to check the match boundaries here because
+ // usearch_handleNextXXX will already have done it.
+ if (m_search_->isCanonicalMatch) {
+ // *could* actually use exact here 'cause no extra accents allowed...
+ usearch_handleNextCanonical(m_strsrch_, &status);
+ } else {
+ usearch_handleNextExact(m_strsrch_, &status);
+ }
+
+ if (U_FAILURE(status)) {
+ return USEARCH_DONE;
+ }
+
+ if (m_search_->matchedIndex == USEARCH_DONE) {
+ ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
+ } else {
+ ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
+ }
+
+ return m_search_->matchedIndex;
+#endif
}
}
return USEARCH_DONE;
{
// values passed here are already in the pre-shift position
if (U_SUCCESS(status)) {
- if (m_strsrch_->pattern.CELength == 0) {
- m_search_->matchedIndex =
- (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
+ if (m_strsrch_->pattern.cesLength == 0) {
+ m_search_->matchedIndex =
+ (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
m_search_->matchedIndex);
if (m_search_->matchedIndex == 0) {
setMatchNotFound();
}
else {
m_search_->matchedIndex --;
- ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
+ ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
&status);
m_search_->matchedLength = 0;
}
}
else {
- // looking at usearch.cpp, this part is shifted out to
+ // looking at usearch.cpp, this part is shifted out to
// StringSearch instead of SearchIterator because m_strsrch_ is
// not accessible in SearchIterator
- if (!m_search_->isOverlap &&
+#if 0
+ if (!m_search_->isOverlap &&
position - m_strsrch_->pattern.defaultShiftSize < 0) {
setMatchNotFound();
return USEARCH_DONE;
}
- while (TRUE) {
+
+ for (;;) {
if (m_search_->isCanonicalMatch) {
// can't use exact here since extra accents are allowed.
usearch_handlePreviousCanonical(m_strsrch_, &status);
}
if (m_breakiterator_ == NULL
#if !UCONFIG_NO_BREAK_ITERATION
- ||
+ ||
m_search_->matchedIndex == USEARCH_DONE ||
(m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
- m_breakiterator_->isBoundary(m_search_->matchedIndex +
+ m_breakiterator_->isBoundary(m_search_->matchedIndex +
m_search_->matchedLength))
#endif
) {
return m_search_->matchedIndex;
}
}
+#else
+ ucol_setOffset(m_strsrch_->textIter, position, &status);
+
+ if (m_search_->isCanonicalMatch) {
+ // *could* use exact match here since extra accents *not* allowed!
+ usearch_handlePreviousCanonical(m_strsrch_, &status);
+ } else {
+ usearch_handlePreviousExact(m_strsrch_, &status);
+ }
+
+ if (U_FAILURE(status)) {
+ return USEARCH_DONE;
+ }
+
+ return m_search_->matchedIndex;
+#endif
}
-
+
return m_search_->matchedIndex;
}
return USEARCH_DONE;