ICU-66108.tar.gz

[apple/icu.git] / icuSources / i18n / coleitr.cpp
diff --git a/icuSources/i18n/coleitr.cpp b/icuSources/i18n/coleitr.cpp

index 7661e854f014bd9cea1daf9fa90980f9ec527ae2..64d3ab4d2bfe734842a6896cc775130648ba156b 100644 (file)
--- a/icuSources/i18n/coleitr.cpp
+++ b/icuSources/i18n/coleitr.cpp
@@ -1,15 +1,15 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
  /*
  *******************************************************************************
-* Copyright (C) 1996-2003, International Business Machines Corporation and    *
-* others. All Rights Reserved.                                                *
+* Copyright (C) 1996-2014, International Business Machines Corporation and
+* others. All Rights Reserved.
  *******************************************************************************
  */
  
  /*
  * File coleitr.cpp
  *
-* 
-*
  * Created by: Helena Shih
  *
  * Modification History:
@@ -20,19 +20,29 @@
  * 08/03/98   erm         Synched with 1.2 version of CollationElementIterator.java
  * 12/10/99   aliu        Ported Thai collation support from Java.
  * 01/25/01   swquek      Modified to a C++ wrapper calling C APIs (ucoliter.h)
-* 02/19/01   swquek      Removed CollationElementsIterator() since it is 
+* 02/19/01   swquek      Removed CollationElementIterator() since it is 
  *                        private constructor and no calls are made to it
+* 2012-2014  markus      Rewritten in C++ again.
  */
  
  #include "unicode/utypes.h"
  
  #if !UCONFIG_NO_COLLATION
  
+#include "unicode/chariter.h"
  #include "unicode/coleitr.h"
+#include "unicode/tblcoll.h"
  #include "unicode/ustring.h"
-#include "ucol_imp.h"
  #include "cmemory.h"
-
+#include "collation.h"
+#include "collationdata.h"
+#include "collationiterator.h"
+#include "collationsets.h"
+#include "collationtailoring.h"
+#include "uassert.h"
+#include "uhash.h"
+#include "utf16collationiterator.h"
+#include "uvectr32.h"
  
  /* Constants --------------------------------------------------------------- */
  
@@ -40,34 +50,50 @@ U_NAMESPACE_BEGIN
  
  UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
  
-/* synwee : public can't remove */
-int32_t const CollationElementIterator::NULLORDER = 0xffffffff;
-
  /* CollationElementIterator public constructor/destructor ------------------ */
  
  CollationElementIterator::CollationElementIterator(
                                           const CollationElementIterator& other) 
-                                         : UObject(other), isDataOwned_(TRUE)
-{
-  UErrorCode status = U_ZERO_ERROR;
-  m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0, 
-                              &status);
-
-  *this = other;
+        : UObject(other), iter_(NULL), rbc_(NULL), otherHalf_(0), dir_(0), offsets_(NULL) {
+    *this = other;
  }
  
  CollationElementIterator::~CollationElementIterator()
  {
-  if (isDataOwned_) {
-    ucol_closeElements(m_data_);
-  }
+    delete iter_;
+    delete offsets_;
  }
  
  /* CollationElementIterator public methods --------------------------------- */
  
+namespace {
+
+uint32_t getFirstHalf(uint32_t p, uint32_t lower32) {
+    return (p & 0xffff0000) | ((lower32 >> 16) & 0xff00) | ((lower32 >> 8) & 0xff);
+}
+uint32_t getSecondHalf(uint32_t p, uint32_t lower32) {
+    return (p << 16) | ((lower32 >> 8) & 0xff00) | (lower32 & 0x3f);
+}
+UBool ceNeedsTwoParts(int64_t ce) {
+    return (ce & INT64_C(0xffff00ff003f)) != 0;
+}
+
+}  // namespace
+
  int32_t CollationElementIterator::getOffset() const
  {
-  return ucol_getOffset(m_data_);
+    if (dir_ < 0 && offsets_ != NULL && !offsets_->isEmpty()) {
+        // CollationIterator::previousCE() decrements the CEs length
+        // while it pops CEs from its internal buffer.
+        int32_t i = iter_->getCEsLength();
+        if (otherHalf_ != 0) {
+            // Return the trailing CE offset while we are in the middle of a 64-bit CE.
+            ++i;
+        }
+        U_ASSERT(i < offsets_->size());
+        return offsets_->elementAti(i);
+    }
+    return iter_->getOffset();
  }
  
  /**
@@ -77,13 +103,44 @@ int32_t CollationElementIterator::getOffset() const
  */
  int32_t CollationElementIterator::next(UErrorCode& status)
  {
-  return ucol_next(m_data_, &status);
+    if (U_FAILURE(status)) { return NULLORDER; }
+    if (dir_ > 1) {
+        // Continue forward iteration. Test this first.
+        if (otherHalf_ != 0) {
+            uint32_t oh = otherHalf_;
+            otherHalf_ = 0;
+            return oh;
+        }
+    } else if (dir_ == 1) {
+        // next() after setOffset()
+        dir_ = 2;
+    } else if (dir_ == 0) {
+        // The iter_ is already reset to the start of the text.
+        dir_ = 2;
+    } else /* dir_ < 0 */ {
+        // illegal change of direction
+        status = U_INVALID_STATE_ERROR;
+        return NULLORDER;
+    }
+    // No need to keep all CEs in the buffer when we iterate.
+    iter_->clearCEsIfNoneRemaining();
+    int64_t ce = iter_->nextCE(status);
+    if (ce == Collation::NO_CE) { return NULLORDER; }
+    // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits.
+    uint32_t p = (uint32_t)(ce >> 32);
+    uint32_t lower32 = (uint32_t)ce;
+    uint32_t firstHalf = getFirstHalf(p, lower32);
+    uint32_t secondHalf = getSecondHalf(p, lower32);
+    if (secondHalf != 0) {
+        otherHalf_ = secondHalf | 0xc0;  // continuation CE
+    }
+    return firstHalf;
  }
  
  UBool CollationElementIterator::operator!=(
                                    const CollationElementIterator& other) const
  {
-  return !(*this == other);
+    return !(*this == other);
  }
  
  UBool CollationElementIterator::operator==(
@@ -92,59 +149,13 @@ UBool CollationElementIterator::operator==(
      if (this == &that) {
          return TRUE;
      }
-  
-    if (m_data_ == that.m_data_) {
-        return TRUE;
-    }
-
-    // option comparison
-    if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
-    {
-        return FALSE;
-    }
  
-    // the constructor and setText always sets a length
-    // and we only compare the string not the contents of the normalization
-    // buffer
-    int thislength = m_data_->iteratordata_.endp -
-                     m_data_->iteratordata_.string;
-    int thatlength = that.m_data_->iteratordata_.endp -
-                     that.m_data_->iteratordata_.string;
-    
-    if (thislength != thatlength) {
-        return FALSE;
-    }
-
-    if (uprv_memcmp(m_data_->iteratordata_.string, 
-                    that.m_data_->iteratordata_.string, 
-                    thislength * U_SIZEOF_UCHAR) != 0) {
-        return FALSE;
-    }
-    if (getOffset() != that.getOffset()) {
-        return FALSE;
-    }
-
-    // checking normalization buffer
-    if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
-        if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
-            return FALSE;
-        }
-        // both are in the normalization buffer
-        if (m_data_->iteratordata_.pos 
-            - m_data_->iteratordata_.writableBuffer 
-            != that.m_data_->iteratordata_.pos 
-            - that.m_data_->iteratordata_.writableBuffer) {
-            // not in the same position in the normalization buffer
-            return FALSE;
-        }
-    }
-    else if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
-        return FALSE;
-    }
-    // checking ce position
-    return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
-            == (that.m_data_->iteratordata_.CEpos 
-                                        - that.m_data_->iteratordata_.CEs);
+    return
+        (rbc_ == that.rbc_ || *rbc_ == *that.rbc_) &&
+        otherHalf_ == that.otherHalf_ &&
+        normalizeDir() == that.normalizeDir() &&
+        string_ == that.string_ &&
+        *iter_ == *that.iter_;
  }
  
  /**
@@ -155,7 +166,55 @@ UBool CollationElementIterator::operator==(
  */
  int32_t CollationElementIterator::previous(UErrorCode& status)
  {
-  return ucol_previous(m_data_, &status);
+    if (U_FAILURE(status)) { return NULLORDER; }
+    if (dir_ < 0) {
+        // Continue backwards iteration. Test this first.
+        if (otherHalf_ != 0) {
+            uint32_t oh = otherHalf_;
+            otherHalf_ = 0;
+            return oh;
+        }
+    } else if (dir_ == 0) {
+        iter_->resetToOffset(string_.length());
+        dir_ = -1;
+    } else if (dir_ == 1) {
+        // previous() after setOffset()
+        dir_ = -1;
+    } else /* dir_ > 1 */ {
+        // illegal change of direction
+        status = U_INVALID_STATE_ERROR;
+        return NULLORDER;
+    }
+    if (offsets_ == NULL) {
+        offsets_ = new UVector32(status);
+        if (offsets_ == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return NULLORDER;
+        }
+    }
+    // If we already have expansion CEs, then we also have offsets.
+    // Otherwise remember the trailing offset in case we need to
+    // write offsets for an artificial expansion.
+    int32_t limitOffset = iter_->getCEsLength() == 0 ? iter_->getOffset() : 0;
+    int64_t ce = iter_->previousCE(*offsets_, status);
+    if (ce == Collation::NO_CE) { return NULLORDER; }
+    // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits.
+    uint32_t p = (uint32_t)(ce >> 32);
+    uint32_t lower32 = (uint32_t)ce;
+    uint32_t firstHalf = getFirstHalf(p, lower32);
+    uint32_t secondHalf = getSecondHalf(p, lower32);
+    if (secondHalf != 0) {
+        if (offsets_->isEmpty()) {
+            // When we convert a single 64-bit CE into two 32-bit CEs,
+            // we need to make this artificial expansion behave like a normal expansion.
+            // See CollationIterator::previousCE().
+            offsets_->addElement(iter_->getOffset(), status);
+            offsets_->addElement(limitOffset, status);
+        }
+        otherHalf_ = firstHalf;
+        return secondHalf | 0xc0;  // continuation CE
+    }
+    return firstHalf;
  }
  
  /**
@@ -163,13 +222,49 @@ int32_t CollationElementIterator::previous(UErrorCode& status)
  */
  void CollationElementIterator::reset()
  {
-  ucol_reset(m_data_);
+    iter_ ->resetToOffset(0);
+    otherHalf_ = 0;
+    dir_ = 0;
  }
  
  void CollationElementIterator::setOffset(int32_t newOffset, 
                                           UErrorCode& status)
  {
-  ucol_setOffset(m_data_, newOffset, &status);
+    if (U_FAILURE(status)) { return; }
+    if (0 < newOffset && newOffset < string_.length()) {
+        int32_t offset = newOffset;
+        do {
+            UChar c = string_.charAt(offset);
+            if (!rbc_->isUnsafe(c) ||
+                    (U16_IS_LEAD(c) && !rbc_->isUnsafe(string_.char32At(offset)))) {
+                break;
+            }
+            // Back up to before this unsafe character.
+            --offset;
+        } while (offset > 0);
+        if (offset < newOffset) {
+            // We might have backed up more than necessary.
+            // For example, contractions "ch" and "cu" make both 'h' and 'u' unsafe,
+            // but for text "chu" setOffset(2) should remain at 2
+            // although we initially back up to offset 0.
+            // Find the last safe offset no greater than newOffset by iterating forward.
+            int32_t lastSafeOffset = offset;
+            do {
+                iter_->resetToOffset(lastSafeOffset);
+                do {
+                    iter_->nextCE(status);
+                    if (U_FAILURE(status)) { return; }
+                } while ((offset = iter_->getOffset()) == lastSafeOffset);
+                if (offset <= newOffset) {
+                    lastSafeOffset = offset;
+                }
+            } while (offset < newOffset);
+            newOffset = lastSafeOffset;
+        }
+    }
+    iter_->resetToOffset(newOffset);
+    otherHalf_ = 0;
+    dir_ = 1;
  }
  
  /**
@@ -178,96 +273,52 @@ void CollationElementIterator::setOffset(int32_t newOffset,
  void CollationElementIterator::setText(const UnicodeString& source,
                                         UErrorCode& status)
  {
-  if (U_FAILURE(status)) {
-    return;
-  }
-
-  int32_t length = source.length();
-  UChar *string = NULL;
-  if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
-    uprv_free(m_data_->iteratordata_.string);
-  }
-  m_data_->isWritable = TRUE;
-  if (length > 0) {
-    string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
-    /* test for NULL */
-    if (string == NULL) {
-        status = U_MEMORY_ALLOCATION_ERROR;
+    if (U_FAILURE(status)) {
          return;
      }
-    u_memcpy(string, source.getBuffer(), length);
-  }
-  else {
-    string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
-    /* test for NULL */
-    if (string == NULL) {
+
+    string_ = source;
+    const UChar *s = string_.getBuffer();
+    CollationIterator *newIter;
+    UBool numeric = rbc_->settings->isNumeric();
+    if (rbc_->settings->dontCheckFCD()) {
+        newIter = new UTF16CollationIterator(rbc_->data, numeric, s, s, s + string_.length());
+    } else {
+        newIter = new FCDUTF16CollationIterator(rbc_->data, numeric, s, s, s + string_.length());
+    }
+    if (newIter == NULL) {
          status = U_MEMORY_ALLOCATION_ERROR;
          return;
      }
-    *string = 0;
-  }
-  uprv_init_collIterate(m_data_->iteratordata_.coll, string, length, 
-                   &m_data_->iteratordata_);
-
-  m_data_->reset_   = TRUE;
+    delete iter_;
+    iter_ = newIter;
+    otherHalf_ = 0;
+    dir_ = 0;
  }
  
  // Sets the source to the new character iterator.
  void CollationElementIterator::setText(CharacterIterator& source, 
                                         UErrorCode& status)
  {
-  if (U_FAILURE(status)) 
-    return;
-    
-  int32_t length = source.getLength();
-  UChar *buffer = NULL;
-
-  if (length == 0) {
-    buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
-    /* test for NULL */
-    if (buffer == NULL) {
-        status = U_MEMORY_ALLOCATION_ERROR;
+    if (U_FAILURE(status)) 
          return;
-    }
-    *buffer = 0;
-  }
-  else {
-      buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
-      /* test for NULL */
-      if (buffer == NULL) {
-          status = U_MEMORY_ALLOCATION_ERROR;
-          return;
-      }
-      /* 
-      Using this constructor will prevent buffer from being removed when
-      string gets removed
-      */
-      UnicodeString string;
-      source.getText(string);
-      u_memcpy(buffer, string.getBuffer(), length);
-  }
-  
-  if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
-      uprv_free(m_data_->iteratordata_.string);
-  }
-  m_data_->isWritable = TRUE;
-  uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length, 
-                   &m_data_->iteratordata_);
-  m_data_->reset_   = TRUE;
+
+    source.getText(string_);
+    setText(string_, status);
  }
  
  int32_t CollationElementIterator::strengthOrder(int32_t order) const
  {
-  UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
-  // Mask off the unwanted differences.
-  if (s == UCOL_PRIMARY) {
-      order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
-  }
-  else if (s == UCOL_SECONDARY) {
-      order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
-  }
-    
-  return order;
+    UColAttributeValue s = (UColAttributeValue)rbc_->settings->getStrength();
+    // Mask off the unwanted differences.
+    if (s == UCOL_PRIMARY) {
+        order &= 0xffff0000;
+    }
+    else if (s == UCOL_SECONDARY) {
+        order &= 0xffffff00;
+    }
+
+    return order;
  }
  
  /* CollationElementIterator private constructors/destructors --------------- */
@@ -277,47 +328,11 @@ int32_t CollationElementIterator::strengthOrder(int32_t order) const
  * over the source text using the specified collator
  */
  CollationElementIterator::CollationElementIterator(
-                                               const UnicodeString& sourceText,
-                                               const RuleBasedCollator* order,
-                                               UErrorCode& status)
-                                               : isDataOwned_(TRUE)
-{
-  if (U_FAILURE(status)) {
-      return;
-  }
- 
-  int32_t length = sourceText.length();
-  UChar *string = NULL;
-  
-  if (length > 0) {
-      string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
-      /* test for NULL */
-      if (string == NULL) {
-          status = U_MEMORY_ALLOCATION_ERROR;
-          return;
-      }
-      /* 
-      Using this constructor will prevent buffer from being removed when
-      string gets removed
-      */
-      u_memcpy(string, sourceText.getBuffer(), length);
-  }
-  else {
-      string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
-      /* test for NULL */
-      if (string == NULL) {
-        status = U_MEMORY_ALLOCATION_ERROR;
-        return;
-      }
-      *string = 0;
-  }
-  m_data_ = ucol_openElements(order->ucollator, string, length, &status);
-  
-  /* Test for buffer overflows */
-  if (U_FAILURE(status)) {
-    return;
-  }
-  m_data_->isWritable = TRUE;
+                                               const UnicodeString &source,
+                                               const RuleBasedCollator *coll,
+                                               UErrorCode &status)
+        : iter_(NULL), rbc_(coll), otherHalf_(0), dir_(0), offsets_(NULL) {
+    setText(source, status);
  }
  
  /** 
@@ -325,170 +340,134 @@ CollationElementIterator::CollationElementIterator(
  * the source text using the specified collator
  */
  CollationElementIterator::CollationElementIterator(
-                                           const CharacterIterator& sourceText,
-                                           const RuleBasedCollator* order,
-                                           UErrorCode& status)
-                                           : isDataOwned_(TRUE)
-{
-  if (U_FAILURE(status))
-    return;
-    
-  // **** should I just drop this test? ****
-  /*
-  if ( sourceText.endIndex() != 0 )
-  {
-    // A CollationElementIterator is really a two-layered beast.
-    // Internally it uses a Normalizer to munge the source text into a form 
-    // where all "composed" Unicode characters (such as \u00FC) are split into a 
-    // normal character and a combining accent character.  
-    // Afterward, CollationElementIterator does its own processing to handle
-    // expanding and contracting collation sequences, ignorables, and so on.
-    
-    Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
-                               ? Normalizer::NO_OP : order->getDecomposition();
-      
-    text = new Normalizer(sourceText, decomp);
-    if (text == NULL)
-      status = U_MEMORY_ALLOCATION_ERROR;    
-  }
-  */
-  int32_t length = sourceText.getLength();
-  UChar *buffer;
-  if (length > 0) {
-      buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
-      /* test for NULL */
-      if (buffer == NULL) {
-        status = U_MEMORY_ALLOCATION_ERROR;
-        return;
-      }
-      /* 
-      Using this constructor will prevent buffer from being removed when
-      string gets removed
-      */
-      UnicodeString string(buffer, length, length);
-      ((CharacterIterator &)sourceText).getText(string);
-      const UChar *temp = string.getBuffer();
-      u_memcpy(buffer, temp, length);
-  }
-  else {
-      buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
-      /* test for NULL */
-      if (buffer == NULL) {
-        status = U_MEMORY_ALLOCATION_ERROR;
-        return;
-      }
-      *buffer = 0;
-  }
-  m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
-
-  /* Test for buffer overflows */
-  if (U_FAILURE(status)) {
-    return;
-  }
-  m_data_->isWritable = TRUE;
+                                           const CharacterIterator &source,
+                                           const RuleBasedCollator *coll,
+                                           UErrorCode &status)
+        : iter_(NULL), rbc_(coll), otherHalf_(0), dir_(0), offsets_(NULL) {
+    // We only call source.getText() which should be const anyway.
+    setText(const_cast<CharacterIterator &>(source), status);
  }
  
-/* CollationElementIterator protected methods ----------------------------- */
+/* CollationElementIterator private methods -------------------------------- */
  
  const CollationElementIterator& CollationElementIterator::operator=(
                                           const CollationElementIterator& other)
  {
-  if (this != &other)
-  {
-      UCollationElements *ucolelem      = this->m_data_;
-      UCollationElements *otherucolelem = other.m_data_;
-      collIterate        *coliter       = &(ucolelem->iteratordata_);
-      collIterate        *othercoliter  = &(otherucolelem->iteratordata_);
-      int                length         = 0;
-      
-      // checking only UCOL_ITER_HASLEN is not enough here as we may be in 
-      // the normalization buffer
-      length = othercoliter->endp - othercoliter->string;
-                                    
-      ucolelem->reset_         = otherucolelem->reset_;
-      ucolelem->isWritable     = TRUE;
-    
-      /* create a duplicate of string */
-      if (length > 0) {
-          coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
-          if(coliter->string != NULL) {
-            uprv_memcpy(coliter->string, othercoliter->string,
-                        length * U_SIZEOF_UCHAR);
-          } else { // Error: couldn't allocate memory. No copying should be done
-            length = 0;
-          }
-      }
-      else {
-          coliter->string = NULL;
-      }
-
-      /* start and end of string */
-      coliter->endp = coliter->string + length;
-
-      /* handle writable buffer here */
-      
-      if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
-          uint32_t wlength = u_strlen(othercoliter->writableBuffer) + 1;
-          if (wlength < coliter->writableBufSize) {
-              uprv_memcpy(coliter->stackWritableBuffer, 
-                        othercoliter->stackWritableBuffer, 
-                        othercoliter->writableBufSize * U_SIZEOF_UCHAR);
-          }
-          else {
-              if (coliter->writableBuffer != coliter->stackWritableBuffer) {
-                  uprv_free(coliter->writableBuffer);
-              }
-              coliter->writableBuffer = (UChar *)uprv_malloc(
-                                         wlength * U_SIZEOF_UCHAR);
-              if(coliter->writableBuffer != NULL) {
-                uprv_memcpy(coliter->writableBuffer, 
-                            othercoliter->writableBuffer,
-                            wlength * U_SIZEOF_UCHAR);
-                coliter->writableBufSize = wlength;
-              } else { // Error: couldn't allocate memory for writableBuffer
-                coliter->writableBufSize = 0;
-              }
-          }
-      }
-
-      /* current position */
-      if (othercoliter->pos >= othercoliter->string && 
-          othercoliter->pos <= othercoliter->endp) {
-          coliter->pos = coliter->string + 
-                        (othercoliter->pos - othercoliter->string);
-      }
-      else {
-        coliter->pos = coliter->writableBuffer + 
-                        (othercoliter->pos - othercoliter->writableBuffer);
-      }
-
-      /* CE buffer */
-      uprv_memcpy(coliter->CEs, othercoliter->CEs, 
-                  UCOL_EXPAND_CE_BUFFER_SIZE * sizeof(uint32_t));
-      coliter->toReturn = coliter->CEs + 
-                         (othercoliter->toReturn - othercoliter->CEs);
-      coliter->CEpos    = coliter->CEs + 
-                         (othercoliter->CEpos - othercoliter->CEs);
-    
-      if (othercoliter->fcdPosition != NULL) {
-          coliter->fcdPosition = coliter->string + 
-                                 (othercoliter->fcdPosition 
-                                                    - othercoliter->string);
-      }
-      else {
-          coliter->fcdPosition = NULL;
-      }
-      coliter->flags       = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
-      coliter->origFlags   = othercoliter->origFlags;
-      coliter->coll = othercoliter->coll;
-      this->isDataOwned_ = TRUE;
-  }
-
-  return *this;
+    if (this == &other) {
+        return *this;
+    }
+
+    CollationIterator *newIter;
+    const FCDUTF16CollationIterator *otherFCDIter =
+            dynamic_cast<const FCDUTF16CollationIterator *>(other.iter_);
+    if(otherFCDIter != NULL) {
+        newIter = new FCDUTF16CollationIterator(*otherFCDIter, string_.getBuffer());
+    } else {
+        const UTF16CollationIterator *otherIter =
+                dynamic_cast<const UTF16CollationIterator *>(other.iter_);
+        if(otherIter != NULL) {
+            newIter = new UTF16CollationIterator(*otherIter, string_.getBuffer());
+        } else {
+            newIter = NULL;
+        }
+    }
+    if(newIter != NULL) {
+        delete iter_;
+        iter_ = newIter;
+        rbc_ = other.rbc_;
+        otherHalf_ = other.otherHalf_;
+        dir_ = other.dir_;
+
+        string_ = other.string_;
+    }
+    if(other.dir_ < 0 && other.offsets_ != NULL && !other.offsets_->isEmpty()) {
+        UErrorCode errorCode = U_ZERO_ERROR;
+        if(offsets_ == NULL) {
+            offsets_ = new UVector32(other.offsets_->size(), errorCode);
+        }
+        if(offsets_ != NULL) {
+            offsets_->assign(*other.offsets_, errorCode);
+        }
+    }
+    return *this;
+}
+
+namespace {
+
+class MaxExpSink : public ContractionsAndExpansions::CESink {
+public:
+    MaxExpSink(UHashtable *h, UErrorCode &ec) : maxExpansions(h), errorCode(ec) {}
+    virtual ~MaxExpSink();
+    virtual void handleCE(int64_t /*ce*/) {}
+    virtual void handleExpansion(const int64_t ces[], int32_t length) {
+        if (length <= 1) {
+            // We do not need to add single CEs into the map.
+            return;
+        }
+        int32_t count = 0;  // number of CE "halves"
+        for (int32_t i = 0; i < length; ++i) {
+            count += ceNeedsTwoParts(ces[i]) ? 2 : 1;
+        }
+        // last "half" of the last CE
+        int64_t ce = ces[length - 1];
+        uint32_t p = (uint32_t)(ce >> 32);
+        uint32_t lower32 = (uint32_t)ce;
+        uint32_t lastHalf = getSecondHalf(p, lower32);
+        if (lastHalf == 0) {
+            lastHalf = getFirstHalf(p, lower32);
+            U_ASSERT(lastHalf != 0);
+        } else {
+            lastHalf |= 0xc0;  // old-style continuation CE
+        }
+        if (count > uhash_igeti(maxExpansions, (int32_t)lastHalf)) {
+            uhash_iputi(maxExpansions, (int32_t)lastHalf, count, &errorCode);
+        }
+    }
+
+private:
+    UHashtable *maxExpansions;
+    UErrorCode &errorCode;
+};
+
+MaxExpSink::~MaxExpSink() {}
+
+}  // namespace
+
+UHashtable *
+CollationElementIterator::computeMaxExpansions(const CollationData *data, UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return NULL; }
+    UHashtable *maxExpansions = uhash_open(uhash_hashLong, uhash_compareLong,
+                                           uhash_compareLong, &errorCode);
+    if (U_FAILURE(errorCode)) { return NULL; }
+    MaxExpSink sink(maxExpansions, errorCode);
+    ContractionsAndExpansions(NULL, NULL, &sink, TRUE).forData(data, errorCode);
+    if (U_FAILURE(errorCode)) {
+        uhash_close(maxExpansions);
+        return NULL;
+    }
+    return maxExpansions;
+}
+
+int32_t
+CollationElementIterator::getMaxExpansion(int32_t order) const {
+    return getMaxExpansion(rbc_->tailoring->maxExpansions, order);
+}
+
+int32_t
+CollationElementIterator::getMaxExpansion(const UHashtable *maxExpansions, int32_t order) {
+    if (order == 0) { return 1; }
+    int32_t max;
+    if(maxExpansions != NULL && (max = uhash_igeti(maxExpansions, order)) != 0) {
+        return max;
+    }
+    if ((order & 0xc0) == 0xc0) {
+        // old-style continuation CE
+        return 2;
+    } else {
+        return 1;
+    }
  }
  
  U_NAMESPACE_END
  
  #endif /* #if !UCONFIG_NO_COLLATION */
-
-/* eof */