]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/i18n/stsearch.cpp
ICU-64260.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / stsearch.cpp
index 7815f08131d1b805c94b435bdac37059a56bc401..c5565677f9524b0872dd0ad687ce1db504cf2689 100644 (file)
@@ -1,6 +1,8 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
 /*
 **********************************************************************
 /*
 **********************************************************************
-*   Copyright (C) 2001-2006 IBM and others. All rights reserved.
+*   Copyright (C) 2001-2014 IBM and others. All rights reserved.
 **********************************************************************
 *   Date        Name        Description
 *  03/22/2000   helena      Creation.
 **********************************************************************
 *   Date        Name        Description
 *  03/22/2000   helena      Creation.
@@ -9,7 +11,7 @@
 
 #include "unicode/utypes.h"
 
 
 #include "unicode/utypes.h"
 
-#if !UCONFIG_NO_COLLATION
+#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
 
 #include "unicode/stsearch.h"
 #include "usrchimp.h"
 
 #include "unicode/stsearch.h"
 #include "usrchimp.h"
@@ -27,7 +29,6 @@ StringSearch::StringSearch(const UnicodeString &pattern,
                                  BreakIterator *breakiter,
                                  UErrorCode    &status) :
                            SearchIterator(text, breakiter),
                                  BreakIterator *breakiter,
                                  UErrorCode    &status) :
                            SearchIterator(text, breakiter),
-                           m_collator_(),
                            m_pattern_(pattern)
 {
     if (U_FAILURE(status)) {
                            m_pattern_(pattern)
 {
     if (U_FAILURE(status)) {
@@ -42,19 +43,7 @@ StringSearch::StringSearch(const UnicodeString &pattern,
     uprv_free(m_search_);
     m_search_ = NULL;
 
     uprv_free(m_search_);
     m_search_ = NULL;
 
-    // !!! dlf m_collator_ is an odd beast.  basically it is an aliasing
-    // wrapper around the internal collator and rules, which (here) are
-    // owned by this stringsearch object.  this means 1) it's destructor
-    // _should not_ delete the ucollator or rules, and 2) changes made
-    // to the exposed collator (setStrength etc) _should_ modify the
-    // ucollator.  thus the collator is not a copy-on-write alias, and it
-    // needs to distinguish itself not merely from 'stand alone' colators
-    // but also from copy-on-write ones.  it needs additional state, which
-    // setUCollator should set.
-
     if (U_SUCCESS(status)) {
     if (U_SUCCESS(status)) {
-        // Alias the collator
-        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
         // m_search_ has been created by the base SearchIterator class
         m_search_        = m_strsrch_->search;
     }
         // m_search_ has been created by the base SearchIterator class
         m_search_        = m_strsrch_->search;
     }
@@ -66,7 +55,6 @@ StringSearch::StringSearch(const UnicodeString     &pattern,
                                  BreakIterator     *breakiter,
                                  UErrorCode        &status) :
                            SearchIterator(text, breakiter),
                                  BreakIterator     *breakiter,
                                  UErrorCode        &status) :
                            SearchIterator(text, breakiter),
-                           m_collator_(),
                            m_pattern_(pattern)
 {
     if (U_FAILURE(status)) {
                            m_pattern_(pattern)
 {
     if (U_FAILURE(status)) {
@@ -81,15 +69,13 @@ StringSearch::StringSearch(const UnicodeString     &pattern,
     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
                                           m_pattern_.length(),
                                           m_text_.getBuffer(),
     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
                                           m_pattern_.length(),
                                           m_text_.getBuffer(),
-                                          m_text_.length(), coll->ucollator,
+                                          m_text_.length(), coll->toUCollator(),
                                           (UBreakIterator *)breakiter,
                                           &status);
     uprv_free(m_search_);
     m_search_ = NULL;
 
     if (U_SUCCESS(status)) {
                                           (UBreakIterator *)breakiter,
                                           &status);
     uprv_free(m_search_);
     m_search_ = NULL;
 
     if (U_SUCCESS(status)) {
-        // Alias the collator
-        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
         // m_search_ has been created by the base SearchIterator class
         m_search_ = m_strsrch_->search;
     }
         // m_search_ has been created by the base SearchIterator class
         m_search_ = m_strsrch_->search;
     }
@@ -101,7 +87,6 @@ StringSearch::StringSearch(const UnicodeString     &pattern,
                                  BreakIterator     *breakiter,
                                  UErrorCode        &status) :
                            SearchIterator(text, breakiter),
                                  BreakIterator     *breakiter,
                                  UErrorCode        &status) :
                            SearchIterator(text, breakiter),
-                           m_collator_(),
                            m_pattern_(pattern)
 {
     if (U_FAILURE(status)) {
                            m_pattern_(pattern)
 {
     if (U_FAILURE(status)) {
@@ -116,8 +101,6 @@ StringSearch::StringSearch(const UnicodeString     &pattern,
     m_search_ = NULL;
 
     if (U_SUCCESS(status)) {
     m_search_ = NULL;
 
     if (U_SUCCESS(status)) {
-        // Alias the collator
-        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
         // m_search_ has been created by the base SearchIterator class
         m_search_ = m_strsrch_->search;
     }
         // m_search_ has been created by the base SearchIterator class
         m_search_ = m_strsrch_->search;
     }
@@ -129,7 +112,6 @@ StringSearch::StringSearch(const UnicodeString     &pattern,
                                  BreakIterator     *breakiter,
                                  UErrorCode        &status) :
                            SearchIterator(text, breakiter),
                                  BreakIterator     *breakiter,
                                  UErrorCode        &status) :
                            SearchIterator(text, breakiter),
-                           m_collator_(),
                            m_pattern_(pattern)
 {
     if (U_FAILURE(status)) {
                            m_pattern_(pattern)
 {
     if (U_FAILURE(status)) {
@@ -144,15 +126,13 @@ StringSearch::StringSearch(const UnicodeString     &pattern,
     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
                                           m_pattern_.length(),
                                           m_text_.getBuffer(),
     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
                                           m_pattern_.length(),
                                           m_text_.getBuffer(),
-                                          m_text_.length(), coll->ucollator,
+                                          m_text_.length(), coll->toUCollator(),
                                           (UBreakIterator *)breakiter,
                                           &status);
     uprv_free(m_search_);
     m_search_ = NULL;
 
     if (U_SUCCESS(status)) {
                                           (UBreakIterator *)breakiter,
                                           &status);
     uprv_free(m_search_);
     m_search_ = NULL;
 
     if (U_SUCCESS(status)) {
-        // Alias the collator
-        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
         // m_search_ has been created by the base SearchIterator class
         m_search_ = m_strsrch_->search;
     }
         // m_search_ has been created by the base SearchIterator class
         m_search_ = m_strsrch_->search;
     }
@@ -160,7 +140,6 @@ StringSearch::StringSearch(const UnicodeString     &pattern,
 
 StringSearch::StringSearch(const StringSearch &that) :
                        SearchIterator(that.m_text_, that.m_breakiterator_),
 
 StringSearch::StringSearch(const StringSearch &that) :
                        SearchIterator(that.m_text_, that.m_breakiterator_),
-                       m_collator_(),
                        m_pattern_(that.m_pattern_)
 {
     UErrorCode status = U_ZERO_ERROR;
                        m_pattern_(that.m_pattern_)
 {
     UErrorCode status = U_ZERO_ERROR;
@@ -183,8 +162,6 @@ StringSearch::StringSearch(const StringSearch &that) :
                                              (UBreakIterator *)that.m_breakiterator_,
                                               &status);
         if (U_SUCCESS(status)) {
                                              (UBreakIterator *)that.m_breakiterator_,
                                               &status);
         if (U_SUCCESS(status)) {
-            // Alias the collator
-            m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
             // m_search_ has been created by the base SearchIterator class
             m_search_        = m_strsrch_->search;
         }
             // m_search_ has been created by the base SearchIterator class
             m_search_        = m_strsrch_->search;
         }
@@ -220,9 +197,10 @@ StringSearch & StringSearch::operator=(const StringSearch &that)
                                               m_text_.length(),
                                               that.m_strsrch_->collator,
                                               NULL, &status);
                                               m_text_.length(),
                                               that.m_strsrch_->collator,
                                               NULL, &status);
-        // Alias the collator
-        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
-        m_search_ = m_strsrch_->search;
+        // Check null pointer
+        if (m_strsrch_ != NULL) {
+            m_search_ = m_strsrch_->search;
+        }
     }
     return *this;
 }
     }
     return *this;
 }
@@ -271,15 +249,14 @@ void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
 
 RuleBasedCollator * StringSearch::getCollator() const
 {
 
 RuleBasedCollator * StringSearch::getCollator() const
 {
-    return (RuleBasedCollator *)&m_collator_;
+    // Note the const_cast. It would be cleaner if this const method returned a const collator.
+    return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator));
 }
 
 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
 {
     if (U_SUCCESS(status)) {
 }
 
 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
 {
     if (U_SUCCESS(status)) {
-        usearch_setCollator(m_strsrch_, coll->getUCollator(), &status);
-        // Alias the collator
-        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
+        usearch_setCollator(m_strsrch_, coll->toUCollator(), &status);
     }
 }
 
     }
 }
 
@@ -309,7 +286,7 @@ SearchIterator * StringSearch::safeClone(void) const
 {
     UErrorCode status = U_ZERO_ERROR;
     StringSearch *result = new StringSearch(m_pattern_, m_text_,
 {
     UErrorCode status = U_ZERO_ERROR;
     StringSearch *result = new StringSearch(m_pattern_, m_text_,
-                                            (RuleBasedCollator *)&m_collator_,
+                                            getCollator(),
                                             m_breakiterator_,
                                             status);
     /* test for NULL */
                                             m_breakiterator_,
                                             status);
     /* test for NULL */
@@ -332,7 +309,7 @@ int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
 {
     // values passed here are already in the pre-shift position
     if (U_SUCCESS(status)) {
 {
     // values passed here are already in the pre-shift position
     if (U_SUCCESS(status)) {
-        if (m_strsrch_->pattern.CELength == 0) {
+        if (m_strsrch_->pattern.cesLength == 0) {
             m_search_->matchedIndex =
                                     m_search_->matchedIndex == USEARCH_DONE ?
                                     getOffset() : m_search_->matchedIndex + 1;
             m_search_->matchedIndex =
                                     m_search_->matchedIndex == USEARCH_DONE ?
                                     getOffset() : m_search_->matchedIndex + 1;
@@ -347,11 +324,13 @@ int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
             // looking at usearch.cpp, this part is shifted out to
             // StringSearch instead of SearchIterator because m_strsrch_ is
             // not accessible in SearchIterator
             // looking at usearch.cpp, this part is shifted out to
             // StringSearch instead of SearchIterator because m_strsrch_ is
             // not accessible in SearchIterator
+#if 0
             if (position + m_strsrch_->pattern.defaultShiftSize
                 > m_search_->textLength) {
                 setMatchNotFound();
                 return USEARCH_DONE;
             }
             if (position + m_strsrch_->pattern.defaultShiftSize
                 > m_search_->textLength) {
                 setMatchNotFound();
                 return USEARCH_DONE;
             }
+#endif
             if (m_search_->matchedLength <= 0) {
                 // the flipping direction issue has already been handled
                 // in next()
             if (m_search_->matchedLength <= 0) {
                 // the flipping direction issue has already been handled
                 // in next()
@@ -363,7 +342,9 @@ int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
             }
 
             ucol_setOffset(m_strsrch_->textIter, position, &status);
             }
 
             ucol_setOffset(m_strsrch_->textIter, position, &status);
-            while (TRUE) {
+            
+#if 0
+            for (;;) {
                 if (m_search_->isCanonicalMatch) {
                     // can't use exact here since extra accents are allowed.
                     usearch_handleNextCanonical(m_strsrch_, &status);
                 if (m_search_->isCanonicalMatch) {
                     // can't use exact here since extra accents are allowed.
                     usearch_handleNextCanonical(m_strsrch_, &status);
@@ -394,6 +375,29 @@ int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
                     return m_search_->matchedIndex;
                 }
             }
                     return m_search_->matchedIndex;
                 }
             }
+#else
+            // if m_strsrch_->breakIter is always the same as m_breakiterator_
+            // then we don't need to check the match boundaries here because
+            // usearch_handleNextXXX will already have done it.
+            if (m_search_->isCanonicalMatch) {
+               // *could* actually use exact here 'cause no extra accents allowed...
+               usearch_handleNextCanonical(m_strsrch_, &status);
+            } else {
+               usearch_handleNextExact(m_strsrch_, &status);
+            }
+            
+            if (U_FAILURE(status)) {
+               return USEARCH_DONE;
+            }
+            
+            if (m_search_->matchedIndex == USEARCH_DONE) {
+               ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
+            } else {
+               ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
+            }
+            
+            return m_search_->matchedIndex;
+#endif
         }
     }
     return USEARCH_DONE;
         }
     }
     return USEARCH_DONE;
@@ -403,7 +407,7 @@ int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
 {
     // values passed here are already in the pre-shift position
     if (U_SUCCESS(status)) {
 {
     // values passed here are already in the pre-shift position
     if (U_SUCCESS(status)) {
-        if (m_strsrch_->pattern.CELength == 0) {
+        if (m_strsrch_->pattern.cesLength == 0) {
             m_search_->matchedIndex =
                   (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
                    m_search_->matchedIndex);
             m_search_->matchedIndex =
                   (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
                    m_search_->matchedIndex);
@@ -421,12 +425,14 @@ int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
             // looking at usearch.cpp, this part is shifted out to
             // StringSearch instead of SearchIterator because m_strsrch_ is
             // not accessible in SearchIterator
             // looking at usearch.cpp, this part is shifted out to
             // StringSearch instead of SearchIterator because m_strsrch_ is
             // not accessible in SearchIterator
+#if 0
             if (!m_search_->isOverlap &&
                 position - m_strsrch_->pattern.defaultShiftSize < 0) {
                 setMatchNotFound();
                 return USEARCH_DONE;
             }
             if (!m_search_->isOverlap &&
                 position - m_strsrch_->pattern.defaultShiftSize < 0) {
                 setMatchNotFound();
                 return USEARCH_DONE;
             }
-            while (TRUE) {
+            
+            for (;;) {
                 if (m_search_->isCanonicalMatch) {
                     // can't use exact here since extra accents are allowed.
                     usearch_handlePreviousCanonical(m_strsrch_, &status);
                 if (m_search_->isCanonicalMatch) {
                     // can't use exact here since extra accents are allowed.
                     usearch_handlePreviousCanonical(m_strsrch_, &status);
@@ -449,6 +455,22 @@ int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
                     return m_search_->matchedIndex;
                 }
             }
                     return m_search_->matchedIndex;
                 }
             }
+#else
+            ucol_setOffset(m_strsrch_->textIter, position, &status);
+            
+            if (m_search_->isCanonicalMatch) {
+               // *could* use exact match here since extra accents *not* allowed!
+               usearch_handlePreviousCanonical(m_strsrch_, &status);
+            } else {
+               usearch_handlePreviousExact(m_strsrch_, &status);
+            }
+            
+            if (U_FAILURE(status)) {
+               return USEARCH_DONE;
+            }
+            
+            return m_search_->matchedIndex;
+#endif
         }
 
         return m_search_->matchedIndex;
         }
 
         return m_search_->matchedIndex;