ICU-64243.0.1.tar.gz

[apple/icu.git] / icuSources / i18n / stsearch.cpp
diff --git a/icuSources/i18n/stsearch.cpp b/icuSources/i18n/stsearch.cpp

index a19e93943846b6325c082f35778e3e82a8487f3a..c5565677f9524b0872dd0ad687ce1db504cf2689 100644 (file)
--- a/icuSources/i18n/stsearch.cpp
+++ b/icuSources/i18n/stsearch.cpp
@@ -1,6 +1,8 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
  /*
  **********************************************************************
-*   Copyright (C) 2001-2003 IBM and others. All rights reserved.
+*   Copyright (C) 2001-2014 IBM and others. All rights reserved.
  **********************************************************************
  *   Date        Name        Description
  *  03/22/2000   helena      Creation.
@@ -9,7 +11,7 @@
  
  #include "unicode/utypes.h"
  
-#if !UCONFIG_NO_COLLATION
+#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
  
  #include "unicode/stsearch.h"
  #include "usrchimp.h"
@@ -17,17 +19,16 @@
  
  U_NAMESPACE_BEGIN
  
-const char StringSearch::fgClassID=0;
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
  
  // public constructors and destructors -----------------------------------
  
-StringSearch::StringSearch(const UnicodeString &pattern, 
+StringSearch::StringSearch(const UnicodeString &pattern,
                             const UnicodeString &text,
-                           const Locale        &locale,       
+                           const Locale        &locale,
                                   BreakIterator *breakiter,
                                   UErrorCode    &status) :
-                           SearchIterator(text, breakiter), 
-                           m_collator_(),
+                           SearchIterator(text, breakiter),
                             m_pattern_(pattern)
  {
      if (U_FAILURE(status)) {
@@ -35,41 +36,25 @@ StringSearch::StringSearch(const UnicodeString &pattern,
          return;
      }
  
-    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), 
-                              m_text_.getBuffer(), m_text_.length(), 
-                              locale.getName(), (UBreakIterator *)breakiter, 
+    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
+                              m_text_.getBuffer(), m_text_.length(),
+                              locale.getName(), (UBreakIterator *)breakiter,
                                &status);
      uprv_free(m_search_);
      m_search_ = NULL;
  
-       // !!! dlf m_collator_ is an odd beast.  basically it is an aliasing
-       // wrapper around the internal collator and rules, which (here) are
-       // owned by this stringsearch object.  this means 1) it's destructor
-       // _should not_ delete the ucollator or rules, and 2) changes made
-       // to the exposed collator (setStrength etc) _should_ modify the 
-       // ucollator.  thus the collator is not a copy-on-write alias, and it
-       // needs to distinguish itself not merely from 'stand alone' colators
-       // but also from copy-on-write ones.  it needs additional state, which
-       // setUCollator should set.
-
      if (U_SUCCESS(status)) {
-              int32_t  length;
-        const UChar   *rules = ucol_getRules(m_strsrch_->collator, &length);
-        m_collation_rules_.setTo(rules, length);
-        m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
-                                 &m_collation_rules_);
          // m_search_ has been created by the base SearchIterator class
          m_search_        = m_strsrch_->search;
      }
  }
  
-StringSearch::StringSearch(const UnicodeString     &pattern, 
+StringSearch::StringSearch(const UnicodeString     &pattern,
                             const UnicodeString     &text,
-                                 RuleBasedCollator *coll,       
+                                 RuleBasedCollator *coll,
                                   BreakIterator     *breakiter,
                                   UErrorCode        &status) :
-                           SearchIterator(text, breakiter), 
-                           m_collator_(),
+                           SearchIterator(text, breakiter),
                             m_pattern_(pattern)
  {
      if (U_FAILURE(status)) {
@@ -81,52 +66,41 @@ StringSearch::StringSearch(const UnicodeString     &pattern,
          m_strsrch_ = NULL;
          return;
      }
-    m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 
-                                          m_pattern_.length(), 
-                                          m_text_.getBuffer(), 
-                                          m_text_.length(), coll->ucollator, 
-                                          (UBreakIterator *)breakiter, 
+    m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
+                                          m_pattern_.length(),
+                                          m_text_.getBuffer(),
+                                          m_text_.length(), coll->toUCollator(),
+                                          (UBreakIterator *)breakiter,
                                            &status);
      uprv_free(m_search_);
      m_search_ = NULL;
  
      if (U_SUCCESS(status)) {
-              int32_t  length;
-        const UChar   *rules = ucol_getRules(m_strsrch_->collator, &length);
-        m_collation_rules_.setTo(rules, length);
-        m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
-                                 &m_collation_rules_);
          // m_search_ has been created by the base SearchIterator class
          m_search_ = m_strsrch_->search;
      }
  }
  
-StringSearch::StringSearch(const UnicodeString     &pattern, 
+StringSearch::StringSearch(const UnicodeString     &pattern,
                                   CharacterIterator &text,
-                           const Locale            &locale, 
+                           const Locale            &locale,
                                   BreakIterator     *breakiter,
                                   UErrorCode        &status) :
-                           SearchIterator(text, breakiter), 
-                           m_collator_(),
+                           SearchIterator(text, breakiter),
                             m_pattern_(pattern)
  {
      if (U_FAILURE(status)) {
          m_strsrch_ = NULL;
          return;
      }
-    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), 
-                              m_text_.getBuffer(), m_text_.length(), 
-                              locale.getName(), (UBreakIterator *)breakiter, 
+    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
+                              m_text_.getBuffer(), m_text_.length(),
+                              locale.getName(), (UBreakIterator *)breakiter,
                                &status);
      uprv_free(m_search_);
      m_search_ = NULL;
  
      if (U_SUCCESS(status)) {
-              int32_t  length;
-        const UChar   *rules = ucol_getRules(m_strsrch_->collator, &length);
-        m_collation_rules_.setTo(rules, length);
-        m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
-                                 &m_collation_rules_);
          // m_search_ has been created by the base SearchIterator class
          m_search_ = m_strsrch_->search;
      }
@@ -134,11 +108,10 @@ StringSearch::StringSearch(const UnicodeString     &pattern,
  
  StringSearch::StringSearch(const UnicodeString     &pattern,
                                   CharacterIterator &text,
-                                 RuleBasedCollator *coll, 
+                                 RuleBasedCollator *coll,
                                   BreakIterator     *breakiter,
                                   UErrorCode        &status) :
-                           SearchIterator(text, breakiter), 
-                           m_collator_(),
+                           SearchIterator(text, breakiter),
                             m_pattern_(pattern)
  {
      if (U_FAILURE(status)) {
@@ -150,21 +123,16 @@ StringSearch::StringSearch(const UnicodeString     &pattern,
          m_strsrch_ = NULL;
          return;
      }
-    m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 
-                                          m_pattern_.length(), 
-                                          m_text_.getBuffer(), 
-                                          m_text_.length(), coll->ucollator, 
-                                          (UBreakIterator *)breakiter, 
+    m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
+                                          m_pattern_.length(),
+                                          m_text_.getBuffer(),
+                                          m_text_.length(), coll->toUCollator(),
+                                          (UBreakIterator *)breakiter,
                                            &status);
      uprv_free(m_search_);
      m_search_ = NULL;
  
      if (U_SUCCESS(status)) {
-              int32_t  length;
-        const UChar   *rules = ucol_getRules(m_strsrch_->collator, &length);
-        m_collation_rules_.setTo(rules, length);
-        m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
-                                 &m_collation_rules_);
          // m_search_ has been created by the base SearchIterator class
          m_search_ = m_strsrch_->search;
      }
@@ -172,35 +140,31 @@ StringSearch::StringSearch(const UnicodeString     &pattern,
  
  StringSearch::StringSearch(const StringSearch &that) :
                         SearchIterator(that.m_text_, that.m_breakiterator_),
-                       m_collator_(),
                         m_pattern_(that.m_pattern_)
  {
      UErrorCode status = U_ZERO_ERROR;
+
+    // Free m_search_ from the superclass
+    uprv_free(m_search_);
+    m_search_ = NULL;
+
      if (that.m_strsrch_ == NULL) {
+        // This was not a good copy
          m_strsrch_ = NULL;
-        status     = U_ILLEGAL_ARGUMENT_ERROR;
      }
      else {
-        m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 
-                                              m_pattern_.length(), 
-                                              m_text_.getBuffer(), 
-                                              m_text_.length(), 
-                                              that.m_strsrch_->collator, 
-                                     (UBreakIterator *)that.m_breakiterator_, 
+        // Make a deep copy
+        m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
+                                              m_pattern_.length(),
+                                              m_text_.getBuffer(),
+                                              m_text_.length(),
+                                              that.m_strsrch_->collator,
+                                             (UBreakIterator *)that.m_breakiterator_,
                                                &status);
-    }
-    uprv_free(m_search_);
-    m_search_ = NULL;
-
-    if (U_SUCCESS(status)) {
-        int32_t  length;
-        const UChar   *rules = ucol_getRules(m_strsrch_->collator, &length);
-        m_collation_rules_.setTo(rules, length);
-        m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
-                                 &m_collation_rules_);
-        // m_search_ has been created by the base SearchIterator class
-        m_search_        = m_strsrch_->search;
-        m_breakiterator_ = that.m_breakiterator_;
+        if (U_SUCCESS(status)) {
+            // m_search_ has been created by the base SearchIterator class
+            m_search_        = m_strsrch_->search;
+        }
      }
  }
  
@@ -212,6 +176,11 @@ StringSearch::~StringSearch()
      }
  }
  
+StringSearch *
+StringSearch::clone() const {
+    return new StringSearch(*this);
+}
+
  // operator overloading ---------------------------------------------
  StringSearch & StringSearch::operator=(const StringSearch &that)
  {
@@ -222,18 +191,16 @@ StringSearch & StringSearch::operator=(const StringSearch &that)
          m_pattern_       = that.m_pattern_;
          // all m_search_ in the parent class is linked up with m_strsrch_
          usearch_close(m_strsrch_);
-        m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 
-                                              m_pattern_.length(), 
-                                              m_text_.getBuffer(), 
-                                              m_text_.length(), 
-                                              that.m_strsrch_->collator, 
+        m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
+                                              m_pattern_.length(),
+                                              m_text_.getBuffer(),
+                                              m_text_.length(),
+                                              that.m_strsrch_->collator,
                                                NULL, &status);
-        int32_t  length;
-        const UChar   *rules = ucol_getRules(m_strsrch_->collator, &length);
-        m_collation_rules_.setTo(rules, length);
-        m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
-                                 &m_collation_rules_);
-        m_search_ = m_strsrch_->search;
+        // Check null pointer
+        if (m_strsrch_ != NULL) {
+            m_search_ = m_strsrch_->search;
+        }
      }
      return *this;
  }
@@ -271,7 +238,7 @@ void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
          usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
      }
  }
-    
+
  void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
  {
      if (U_SUCCESS(status)) {
@@ -282,20 +249,18 @@ void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
  
  RuleBasedCollator * StringSearch::getCollator() const
  {
-    return (RuleBasedCollator *)&m_collator_;
+    // Note the const_cast. It would be cleaner if this const method returned a const collator.
+    return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator));
  }
-    
+
  void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
  {
      if (U_SUCCESS(status)) {
-        usearch_setCollator(m_strsrch_, coll->getUCollator(), &status);
-        m_collation_rules_.setTo(coll->getRules());
-        m_collator_.setUCollator((UCollator *)m_strsrch_->collator, 
-                                 &m_collation_rules_);
+        usearch_setCollator(m_strsrch_, coll->toUCollator(), &status);
      }
  }
-    
-void StringSearch::setPattern(const UnicodeString &pattern, 
+
+void StringSearch::setPattern(const UnicodeString &pattern,
                                      UErrorCode    &status)
  {
      if (U_SUCCESS(status)) {
@@ -304,7 +269,7 @@ void StringSearch::setPattern(const UnicodeString &pattern,
                             &status);
      }
  }
-    
+
  const UnicodeString & StringSearch::getPattern() const
  {
      return m_pattern_;
@@ -320,8 +285,8 @@ void StringSearch::reset()
  SearchIterator * StringSearch::safeClone(void) const
  {
      UErrorCode status = U_ZERO_ERROR;
-    StringSearch *result = new StringSearch(m_pattern_, m_text_, 
-                                            (RuleBasedCollator *)&m_collator_, 
+    StringSearch *result = new StringSearch(m_pattern_, m_text_,
+                                            getCollator(),
                                              m_breakiterator_,
                                              status);
      /* test for NULL */
@@ -337,35 +302,49 @@ SearchIterator * StringSearch::safeClone(void) const
      }
      return result;
  }
-    
+
  // protected method -------------------------------------------------
  
  int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
  {
      // values passed here are already in the pre-shift position
      if (U_SUCCESS(status)) {
-        if (m_strsrch_->pattern.CELength == 0) {
-            m_search_->matchedIndex = 
-                                    m_search_->matchedIndex == USEARCH_DONE ? 
+        if (m_strsrch_->pattern.cesLength == 0) {
+            m_search_->matchedIndex =
+                                    m_search_->matchedIndex == USEARCH_DONE ?
                                      getOffset() : m_search_->matchedIndex + 1;
              m_search_->matchedLength = 0;
-            ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, 
+            ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
                             &status);
              if (m_search_->matchedIndex == m_search_->textLength) {
                  m_search_->matchedIndex = USEARCH_DONE;
              }
          }
          else {
-            // looking at usearch.cpp, this part is shifted out to 
+            // looking at usearch.cpp, this part is shifted out to
              // StringSearch instead of SearchIterator because m_strsrch_ is
              // not accessible in SearchIterator
-            if (position + m_strsrch_->pattern.defaultShiftSize 
-                               > m_search_->textLength) {
+#if 0
+            if (position + m_strsrch_->pattern.defaultShiftSize
+                > m_search_->textLength) {
                  setMatchNotFound();
                  return USEARCH_DONE;
              }
-                       ucol_setOffset(m_strsrch_->textIter, position, &status);
-            while (TRUE) {
+#endif
+            if (m_search_->matchedLength <= 0) {
+                // the flipping direction issue has already been handled
+                // in next()
+                // for boundary check purposes. this will ensure that the
+                // next match will not preceed the current offset
+                // note search->matchedIndex will always be set to something
+                // in the code
+                m_search_->matchedIndex = position - 1;
+            }
+
+            ucol_setOffset(m_strsrch_->textIter, position, &status);
+            
+#if 0
+            for (;;) {
                  if (m_search_->isCanonicalMatch) {
                      // can't use exact here since extra accents are allowed.
                      usearch_handleNextCanonical(m_strsrch_, &status);
@@ -378,24 +357,47 @@ int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
                  }
                  if (m_breakiterator_ == NULL
  #if !UCONFIG_NO_BREAK_ITERATION
-                    || 
+                    ||
                      m_search_->matchedIndex == USEARCH_DONE ||
                      (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
-                     m_breakiterator_->isBoundary(m_search_->matchedIndex + 
+                     m_breakiterator_->isBoundary(m_search_->matchedIndex +
                                                    m_search_->matchedLength))
  #endif
                  ) {
-                                       if (m_search_->matchedIndex == USEARCH_DONE) {
-                                               ucol_setOffset(m_strsrch_->textIter, 
-                                                                  m_search_->textLength, &status);
-                                       }
-                                       else {
-                                               ucol_setOffset(m_strsrch_->textIter, 
-                                                                  m_search_->matchedIndex, &status);
-                                       }
+                    if (m_search_->matchedIndex == USEARCH_DONE) {
+                        ucol_setOffset(m_strsrch_->textIter,
+                                       m_search_->textLength, &status);
+                    }
+                    else {
+                        ucol_setOffset(m_strsrch_->textIter,
+                                       m_search_->matchedIndex, &status);
+                    }
                      return m_search_->matchedIndex;
                  }
              }
+#else
+            // if m_strsrch_->breakIter is always the same as m_breakiterator_
+            // then we don't need to check the match boundaries here because
+            // usearch_handleNextXXX will already have done it.
+            if (m_search_->isCanonicalMatch) {
+               // *could* actually use exact here 'cause no extra accents allowed...
+               usearch_handleNextCanonical(m_strsrch_, &status);
+            } else {
+               usearch_handleNextExact(m_strsrch_, &status);
+            }
+            
+            if (U_FAILURE(status)) {
+               return USEARCH_DONE;
+            }
+            
+            if (m_search_->matchedIndex == USEARCH_DONE) {
+               ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
+            } else {
+               ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
+            }
+            
+            return m_search_->matchedIndex;
+#endif
          }
      }
      return USEARCH_DONE;
@@ -405,30 +407,32 @@ int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
  {
      // values passed here are already in the pre-shift position
      if (U_SUCCESS(status)) {
-        if (m_strsrch_->pattern.CELength == 0) {
-            m_search_->matchedIndex = 
-                  (m_search_->matchedIndex == USEARCH_DONE ? getOffset() : 
+        if (m_strsrch_->pattern.cesLength == 0) {
+            m_search_->matchedIndex =
+                  (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
                     m_search_->matchedIndex);
              if (m_search_->matchedIndex == 0) {
                  setMatchNotFound();
              }
              else {
                  m_search_->matchedIndex --;
-                ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, 
+                ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
                                 &status);
                  m_search_->matchedLength = 0;
              }
          }
          else {
-            // looking at usearch.cpp, this part is shifted out to 
+            // looking at usearch.cpp, this part is shifted out to
              // StringSearch instead of SearchIterator because m_strsrch_ is
              // not accessible in SearchIterator
-            if (!m_search_->isOverlap && 
+#if 0
+            if (!m_search_->isOverlap &&
                  position - m_strsrch_->pattern.defaultShiftSize < 0) {
                  setMatchNotFound();
                  return USEARCH_DONE;
              }
-            while (TRUE) {
+            
+            for (;;) {
                  if (m_search_->isCanonicalMatch) {
                      // can't use exact here since extra accents are allowed.
                      usearch_handlePreviousCanonical(m_strsrch_, &status);
@@ -441,18 +445,34 @@ int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
                  }
                  if (m_breakiterator_ == NULL
  #if !UCONFIG_NO_BREAK_ITERATION
-                    || 
+                    ||
                      m_search_->matchedIndex == USEARCH_DONE ||
                      (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
-                     m_breakiterator_->isBoundary(m_search_->matchedIndex + 
+                     m_breakiterator_->isBoundary(m_search_->matchedIndex +
                                                    m_search_->matchedLength))
  #endif
                  ) {
                      return m_search_->matchedIndex;
                  }
              }
+#else
+            ucol_setOffset(m_strsrch_->textIter, position, &status);
+            
+            if (m_search_->isCanonicalMatch) {
+               // *could* use exact match here since extra accents *not* allowed!
+               usearch_handlePreviousCanonical(m_strsrch_, &status);
+            } else {
+               usearch_handlePreviousExact(m_strsrch_, &status);
+            }
+            
+            if (U_FAILURE(status)) {
+               return USEARCH_DONE;
+            }
+            
+            return m_search_->matchedIndex;
+#endif
          }
-          
+
          return m_search_->matchedIndex;
      }
      return USEARCH_DONE;