]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/i18n/ucoleitr.cpp
ICU-511.34.tar.gz
[apple/icu.git] / icuSources / i18n / ucoleitr.cpp
index f386fb4ef71869a60616269c4c6aaa2718b35c23..b822f207276a7127f075145043e6f2ba11e0f5fa 100644 (file)
@@ -1,6 +1,6 @@
 /*
 ******************************************************************************
-*   Copyright (C) 2001-2003, International Business Machines
+*   Copyright (C) 2001-2011, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 ******************************************************************************
 *
@@ -20,6 +20,7 @@
 #include "unicode/ucoleitr.h"
 #include "unicode/ustring.h"
 #include "unicode/sortkey.h"
+#include "unicode/uobject.h"
 #include "ucol_imp.h"
 #include "cmemory.h"
 
@@ -27,22 +28,284 @@ U_NAMESPACE_USE
 
 #define BUFFER_LENGTH             100
 
-typedef struct collIterate collIterator;
+#define DEFAULT_BUFFER_SIZE 16
+#define BUFFER_GROW 8
 
-/* public methods ---------------------------------------------------- */
+#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
 
-/**
-* Since this is going to be deprecated, I'll leave it as it is
-*/
-U_CAPI int32_t U_EXPORT2
-ucol_keyHashCode(const uint8_t *key, 
-                       int32_t  length)
+#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0])
+
+#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
+
+#define GROW_ARRAY(array, newSize) uprv_realloc((void *) (array), (newSize) * sizeof (array)[0])
+
+#define DELETE_ARRAY(array) uprv_free((void *) (array))
+
+typedef struct icu::collIterate collIterator;
+
+struct RCEI
+{
+    uint32_t ce;
+    int32_t  low;
+    int32_t  high;
+};
+
+U_NAMESPACE_BEGIN
+
+struct RCEBuffer
+{
+    RCEI    defaultBuffer[DEFAULT_BUFFER_SIZE];
+    RCEI   *buffer;
+    int32_t bufferIndex;
+    int32_t bufferSize;
+
+    RCEBuffer();
+    ~RCEBuffer();
+
+    UBool empty() const;
+    void  put(uint32_t ce, int32_t ixLow, int32_t ixHigh);
+    const RCEI *get();
+};
+
+RCEBuffer::RCEBuffer()
+{
+    buffer = defaultBuffer;
+    bufferIndex = 0;
+    bufferSize = DEFAULT_BUFFER_SIZE;
+}
+
+RCEBuffer::~RCEBuffer()
+{
+    if (buffer != defaultBuffer) {
+        DELETE_ARRAY(buffer);
+    }
+}
+
+UBool RCEBuffer::empty() const
 {
+    return bufferIndex <= 0;
+}
+
+void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh)
+{
+    if (bufferIndex >= bufferSize) {
+        RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW);
+
+        ARRAY_COPY(newBuffer, buffer, bufferSize);
+
+        if (buffer != defaultBuffer) {
+            DELETE_ARRAY(buffer);
+        }
+
+        buffer = newBuffer;
+        bufferSize += BUFFER_GROW;
+    }
+
+    buffer[bufferIndex].ce   = ce;
+    buffer[bufferIndex].low  = ixLow;
+    buffer[bufferIndex].high = ixHigh;
 
-    CollationKey newKey(key, length);
-    return newKey.hashCode();
+    bufferIndex += 1;
 }
 
+const RCEI *RCEBuffer::get()
+{
+    if (bufferIndex > 0) {
+     return &buffer[--bufferIndex];
+    }
+
+    return NULL;
+}
+
+struct PCEI
+{
+    uint64_t ce;
+    int32_t  low;
+    int32_t  high;
+};
+
+struct PCEBuffer
+{
+    PCEI    defaultBuffer[DEFAULT_BUFFER_SIZE];
+    PCEI   *buffer;
+    int32_t bufferIndex;
+    int32_t bufferSize;
+
+    PCEBuffer();
+    ~PCEBuffer();
+
+    void  reset();
+    UBool empty() const;
+    void  put(uint64_t ce, int32_t ixLow, int32_t ixHigh);
+    const PCEI *get();
+};
+
+PCEBuffer::PCEBuffer()
+{
+    buffer = defaultBuffer;
+    bufferIndex = 0;
+    bufferSize = DEFAULT_BUFFER_SIZE;
+}
+
+PCEBuffer::~PCEBuffer()
+{
+    if (buffer != defaultBuffer) {
+        DELETE_ARRAY(buffer);
+    }
+}
+
+void PCEBuffer::reset()
+{
+    bufferIndex = 0;
+}
+
+UBool PCEBuffer::empty() const
+{
+    return bufferIndex <= 0;
+}
+
+void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh)
+{
+    if (bufferIndex >= bufferSize) {
+        PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW);
+
+        ARRAY_COPY(newBuffer, buffer, bufferSize);
+
+        if (buffer != defaultBuffer) {
+            DELETE_ARRAY(buffer);
+        }
+
+        buffer = newBuffer;
+        bufferSize += BUFFER_GROW;
+    }
+
+    buffer[bufferIndex].ce   = ce;
+    buffer[bufferIndex].low  = ixLow;
+    buffer[bufferIndex].high = ixHigh;
+
+    bufferIndex += 1;
+}
+
+const PCEI *PCEBuffer::get()
+{
+    if (bufferIndex > 0) {
+     return &buffer[--bufferIndex];
+    }
+
+    return NULL;
+}
+
+/*
+ * This inherits from UObject so that
+ * it can be allocated by new and the
+ * constructor for PCEBuffer is called.
+ */
+struct UCollationPCE : public UObject
+{
+    PCEBuffer          pceBuffer;
+    UCollationStrength strength;
+    UBool              toShift;
+    UBool              isShifted;
+    uint32_t           variableTop;
+
+    UCollationPCE(UCollationElements *elems);
+    ~UCollationPCE();
+
+    void init(const UCollator *coll);
+
+    virtual UClassID getDynamicClassID() const;
+    static UClassID getStaticClassID();
+};
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UCollationPCE)
+
+UCollationPCE::UCollationPCE(UCollationElements *elems)
+{
+    init(elems->iteratordata_.coll);
+}
+
+void UCollationPCE::init(const UCollator *coll)
+{
+    UErrorCode status = U_ZERO_ERROR;
+
+    strength    = ucol_getStrength(coll);
+    toShift     = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED;
+    isShifted   = FALSE;
+    variableTop = coll->variableTopValue << 16;
+}
+
+UCollationPCE::~UCollationPCE()
+{
+    // nothing to do
+}
+
+
+U_NAMESPACE_END
+
+
+inline uint64_t processCE(UCollationElements *elems, uint32_t ce)
+{
+    uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
+
+    // This is clean, but somewhat slow...
+    // We could apply the mask to ce and then
+    // just get all three orders...
+    switch(elems->pce->strength) {
+    default:
+        tertiary = ucol_tertiaryOrder(ce);
+        /* note fall-through */
+
+    case UCOL_SECONDARY:
+        secondary = ucol_secondaryOrder(ce);
+        /* note fall-through */
+
+    case UCOL_PRIMARY:
+        primary = ucol_primaryOrder(ce);
+    }
+
+    // **** This should probably handle continuations too.  ****
+    // **** That means that we need 24 bits for the primary ****
+    // **** instead of the 16 that we're currently using.   ****
+    // **** So we can lay out the 64 bits as: 24.12.12.16.  ****
+    // **** Another complication with continuations is that ****
+    // **** the *second* CE is marked as a continuation, so ****
+    // **** we always have to peek ahead to know how long   ****
+    // **** the primary is...                               ****
+    if ((elems->pce->toShift && elems->pce->variableTop > ce && primary != 0)
+                || (elems->pce->isShifted && primary == 0)) {
+
+        if (primary == 0) {
+            return UCOL_IGNORABLE;
+        }
+
+        if (elems->pce->strength >= UCOL_QUATERNARY) {
+            quaternary = primary;
+        }
+
+        primary = secondary = tertiary = 0;
+        elems->pce->isShifted = TRUE;
+    } else {
+        if (elems->pce->strength >= UCOL_QUATERNARY) {
+            quaternary = 0xFFFF;
+        }
+
+        elems->pce->isShifted = FALSE;
+    }
+
+    return primary << 48 | secondary << 32 | tertiary << 16 | quaternary;
+}
+
+U_CAPI void U_EXPORT2
+uprv_init_pce(const UCollationElements *elems)
+{
+    if (elems->pce != NULL) {
+        elems->pce->init(elems->iteratordata_.coll);
+    }
+}
+
+
+
+/* public methods ---------------------------------------------------- */
 
 U_CAPI UCollationElements* U_EXPORT2
 ucol_openElements(const UCollator  *coll,
@@ -50,97 +313,212 @@ ucol_openElements(const UCollator  *coll,
                         int32_t    textLength,
                         UErrorCode *status)
 {
-  UCollationElements *result;
-
-  if (U_FAILURE(*status)) {
-    return NULL;
-  }
+    if (U_FAILURE(*status)) {
+        return NULL;
+    }
 
-  result = (UCollationElements *)uprv_malloc(sizeof(UCollationElements));
-  /* test for NULL */
-  if (result == NULL) {
-      *status = U_MEMORY_ALLOCATION_ERROR;
-      return NULL;
-  }
+    UCollationElements *result = new UCollationElements;
+    if (result == NULL) {
+        *status = U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
 
-  result->reset_   = TRUE;
-  result->isWritable = FALSE;
+    result->reset_ = TRUE;
+    result->isWritable = FALSE;
+    result->pce = NULL;
 
-  if (text == NULL) {
-      textLength = 0;
-  }
-  uprv_init_collIterate(coll, text, textLength, &result->iteratordata_);
+    if (text == NULL) {
+        textLength = 0;
+    }
+    uprv_init_collIterate(coll, text, textLength, &result->iteratordata_, status);
 
-  return result;
+    return result;
 }
 
+
 U_CAPI void U_EXPORT2
 ucol_closeElements(UCollationElements *elems)
 {
-  collIterate *ci = &elems->iteratordata_;
-  if (ci->writableBuffer != ci->stackWritableBuffer) {
-    uprv_free(ci->writableBuffer);
-  }
-  if (elems->isWritable && elems->iteratordata_.string != NULL)
-  {
-    uprv_free(elems->iteratordata_.string);
-  }
-  uprv_free(elems);
+       if (elems != NULL) {
+         collIterate *ci = &elems->iteratordata_;
+
+         if (ci->extendCEs) {
+                 uprv_free(ci->extendCEs);
+         }
+
+         if (ci->offsetBuffer) {
+                 uprv_free(ci->offsetBuffer);
+         }
+
+         if (elems->isWritable && elems->iteratordata_.string != NULL)
+         {
+               uprv_free((UChar *)elems->iteratordata_.string);
+         }
+
+         if (elems->pce != NULL) {
+                 delete elems->pce;
+         }
+
+         delete elems;
+       }
 }
 
 U_CAPI void U_EXPORT2
 ucol_reset(UCollationElements *elems)
 {
-  collIterate *ci = &(elems->iteratordata_);
-  elems->reset_   = TRUE;
-  ci->pos         = ci->string;
-  if ((ci->flags & UCOL_ITER_HASLEN) == 0 || ci->endp == NULL) {
-    ci->endp      = ci->string + u_strlen(ci->string);
-  }
-  ci->CEpos       = ci->toReturn = ci->CEs;
-  ci->flags       = UCOL_ITER_HASLEN;
-  if (ci->coll->normalizationMode == UCOL_ON) {
-    ci->flags |= UCOL_ITER_NORM;
-  }
-  
-  if (ci->stackWritableBuffer != ci->writableBuffer) {
-    uprv_free(ci->writableBuffer);
-    ci->writableBuffer = ci->stackWritableBuffer;
-    ci->writableBufSize = UCOL_WRITABLE_BUFFER_SIZE;
-  }
-  ci->fcdPosition = NULL;
+    collIterate *ci = &(elems->iteratordata_);
+    elems->reset_   = TRUE;
+    ci->pos         = ci->string;
+    if ((ci->flags & UCOL_ITER_HASLEN) == 0 || ci->endp == NULL) {
+        ci->endp      = ci->string + u_strlen(ci->string);
+    }
+    ci->CEpos       = ci->toReturn = ci->CEs;
+    ci->flags       = (ci->flags & UCOL_FORCE_HAN_IMPLICIT) | UCOL_ITER_HASLEN;
+    if (ci->coll->normalizationMode == UCOL_ON) {
+        ci->flags |= UCOL_ITER_NORM;
+    }
+
+    ci->writableBuffer.remove();
+    ci->fcdPosition = NULL;
+
+  //ci->offsetReturn = ci->offsetStore = NULL;
+       ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
+}
+
+U_CAPI void U_EXPORT2
+ucol_forceHanImplicit(UCollationElements *elems, UErrorCode *status)
+{
+    if (U_FAILURE(*status)) {
+        return;
+    }
+
+    if (elems == NULL) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+
+    elems->iteratordata_.flags |= UCOL_FORCE_HAN_IMPLICIT;
 }
 
 U_CAPI int32_t U_EXPORT2
 ucol_next(UCollationElements *elems, 
           UErrorCode         *status)
 {
-  uint32_t result;
-  if (U_FAILURE(*status)) {
-    return UCOL_NULLORDER;
-  }
+    int32_t result;
+    if (U_FAILURE(*status)) {
+        return UCOL_NULLORDER;
+    }
 
-  elems->reset_ = FALSE;
+    elems->reset_ = FALSE;
 
-  result = ucol_getNextCE(elems->iteratordata_.coll, &elems->iteratordata_, 
-                          status);
-  
-  if (result == UCOL_NO_MORE_CES) {
-    result = UCOL_NULLORDER;
-  }
-  return result;
+    result = (int32_t)ucol_getNextCE(elems->iteratordata_.coll,
+                                     &elems->iteratordata_, 
+                                     status);
+
+    if (result == UCOL_NO_MORE_CES) {
+        result = UCOL_NULLORDER;
+    }
+    return result;
+}
+
+U_CAPI int64_t U_EXPORT2
+ucol_nextProcessed(UCollationElements *elems,
+                   int32_t            *ixLow,
+                   int32_t            *ixHigh,
+                   UErrorCode         *status)
+{
+    const UCollator *coll = elems->iteratordata_.coll;
+    int64_t result = UCOL_IGNORABLE;
+    uint32_t low = 0, high = 0;
+
+    if (U_FAILURE(*status)) {
+        return UCOL_PROCESSED_NULLORDER;
+    }
+
+    if (elems->pce == NULL) {
+        elems->pce = new UCollationPCE(elems);
+    } else {
+        elems->pce->pceBuffer.reset();
+    }
+
+    elems->reset_ = FALSE;
+
+    do {
+        low = ucol_getOffset(elems);
+        uint32_t ce = (uint32_t) ucol_getNextCE(coll, &elems->iteratordata_, status);
+        high = ucol_getOffset(elems);
+
+        if (ce == UCOL_NO_MORE_CES) {
+             result = UCOL_PROCESSED_NULLORDER;
+             break;
+        }
+
+        result = processCE(elems, ce);
+    } while (result == UCOL_IGNORABLE);
+
+    if (ixLow != NULL) {
+        *ixLow = low;
+    }
+
+    if (ixHigh != NULL) {
+        *ixHigh = high;
+    }
+
+    return result;
 }
 
 U_CAPI int32_t U_EXPORT2
 ucol_previous(UCollationElements *elems,
               UErrorCode         *status)
 {
-  if(U_FAILURE(*status)) {
-    return UCOL_NULLORDER;
-  }
-  else
-  {
-    uint32_t result;
+    if(U_FAILURE(*status)) {
+        return UCOL_NULLORDER;
+    }
+    else
+    {
+        int32_t result;
+
+        if (elems->reset_ && (elems->iteratordata_.pos == elems->iteratordata_.string)) {
+            if (elems->iteratordata_.endp == NULL) {
+                elems->iteratordata_.endp = elems->iteratordata_.string + 
+                                            u_strlen(elems->iteratordata_.string);
+                elems->iteratordata_.flags |= UCOL_ITER_HASLEN;
+            }
+            elems->iteratordata_.pos = elems->iteratordata_.endp;
+            elems->iteratordata_.fcdPosition = elems->iteratordata_.endp;
+        }
+
+        elems->reset_ = FALSE;
+
+        result = (int32_t)ucol_getPrevCE(elems->iteratordata_.coll,
+                                         &(elems->iteratordata_), 
+                                         status);
+
+        if (result == UCOL_NO_MORE_CES) {
+            result = UCOL_NULLORDER;
+        }
+
+        return result;
+    }
+}
+
+U_CAPI int64_t U_EXPORT2
+ucol_previousProcessed(UCollationElements *elems,
+                   int32_t            *ixLow,
+                   int32_t            *ixHigh,
+                   UErrorCode         *status)
+{
+    const UCollator *coll = elems->iteratordata_.coll;
+    int64_t result = UCOL_IGNORABLE;
+ // int64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
+ // UCollationStrength strength = ucol_getStrength(coll);
+ //  UBool toShift   = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, status) ==  UCOL_SHIFTED;
+ // uint32_t variableTop = coll->variableTopValue;
+    int32_t  low = 0, high = 0;
+
+    if (U_FAILURE(*status)) {
+        return UCOL_PROCESSED_NULLORDER;
+    }
 
     if (elems->reset_ && 
         (elems->iteratordata_.pos == elems->iteratordata_.string)) {
@@ -149,30 +527,138 @@ ucol_previous(UCollationElements *elems,
                                         u_strlen(elems->iteratordata_.string);
             elems->iteratordata_.flags |= UCOL_ITER_HASLEN;
         }
+
         elems->iteratordata_.pos = elems->iteratordata_.endp;
         elems->iteratordata_.fcdPosition = elems->iteratordata_.endp;
     }
 
+    if (elems->pce == NULL) {
+        elems->pce = new UCollationPCE(elems);
+    } else {
+      //elems->pce->pceBuffer.reset();
+    }
+
     elems->reset_ = FALSE;
 
-    result = ucol_getPrevCE(elems->iteratordata_.coll, &(elems->iteratordata_), 
-                            status);
+    while (elems->pce->pceBuffer.empty()) {
+        // buffer raw CEs up to non-ignorable primary
+        RCEBuffer rceb;
+        uint32_t ce;
+        
+        // **** do we need to reset rceb, or will it always be empty at this point ****
+        do {
+            high = ucol_getOffset(elems);
+            ce   = ucol_getPrevCE(coll, &elems->iteratordata_, status);
+            low  = ucol_getOffset(elems);
+
+            if (ce == UCOL_NO_MORE_CES) {
+                if (! rceb.empty()) {
+                    break;
+                }
+
+                goto finish;
+            }
+
+            rceb.put(ce, low, high);
+        } while ((ce & UCOL_PRIMARYMASK) == 0);
+
+        // process the raw CEs
+        while (! rceb.empty()) {
+            const RCEI *rcei = rceb.get();
+
+            result = processCE(elems, rcei->ce);
+
+            if (result != UCOL_IGNORABLE) {
+                elems->pce->pceBuffer.put(result, rcei->low, rcei->high);
+            }
+        }
+    }
 
-    if (result == UCOL_NO_MORE_CES) {
-      result = UCOL_NULLORDER;
+finish:
+    if (elems->pce->pceBuffer.empty()) {
+        // **** Is -1 the right value for ixLow, ixHigh? ****
+       if (ixLow != NULL) {
+               *ixLow = -1;
+       }
+       
+       if (ixHigh != NULL) {
+               *ixHigh = -1
+               ;
+       }
+        return UCOL_PROCESSED_NULLORDER;
     }
 
-    return result;
-  }
+    const PCEI *pcei = elems->pce->pceBuffer.get();
+
+    if (ixLow != NULL) {
+        *ixLow = pcei->low;
+    }
+
+    if (ixHigh != NULL) {
+        *ixHigh = pcei->high;
+    }
+
+    return pcei->ce;
 }
 
 U_CAPI int32_t U_EXPORT2
 ucol_getMaxExpansion(const UCollationElements *elems,
                            int32_t            order)
 {
-  uint8_t result;
-  UCOL_GETMAXEXPANSION(elems->iteratordata_.coll, (uint32_t)order, result);
-  return result;
+    uint8_t result;
+
+#if 0
+    UCOL_GETMAXEXPANSION(elems->iteratordata_.coll, (uint32_t)order, result);
+#else
+    const UCollator *coll = elems->iteratordata_.coll;
+    const uint32_t *start;
+    const uint32_t *limit;
+    const uint32_t *mid;
+          uint32_t strengthMask = 0;
+          uint32_t mOrder = (uint32_t) order;
+
+    switch (coll->strength) 
+    {
+    default:
+        strengthMask |= UCOL_TERTIARYORDERMASK;
+        /* fall through */
+
+    case UCOL_SECONDARY:
+        strengthMask |= UCOL_SECONDARYORDERMASK;
+        /* fall through */
+
+    case UCOL_PRIMARY:
+        strengthMask |= UCOL_PRIMARYORDERMASK;
+    }
+
+    mOrder &= strengthMask;
+    start = (coll)->endExpansionCE;
+    limit = (coll)->lastEndExpansionCE;
+
+    while (start < limit - 1) {
+        mid = start + ((limit - start) >> 1);
+        if (mOrder <= (*mid & strengthMask)) {
+          limit = mid;
+        } else {
+          start = mid;
+        }
+    }
+
+    // FIXME: with a masked search, there might be more than one hit,
+    // so we need to look forward and backward from the match to find all
+    // of the hits...
+    if ((*start & strengthMask) == mOrder) {
+        result = *((coll)->expansionCESize + (start - (coll)->endExpansionCE));
+    } else if ((*limit & strengthMask) == mOrder) {
+         result = *(coll->expansionCESize + (limit - coll->endExpansionCE));
+   } else if ((mOrder & 0xFFFF) == 0x00C0) {
+        result = 2;
+   } else {
+       result = 1;
+   }
+#endif
+
+    return result;
 }
  
 U_CAPI void U_EXPORT2
@@ -181,30 +667,46 @@ ucol_setText(      UCollationElements *elems,
                    int32_t            textLength,
                    UErrorCode         *status)
 {
-  if (U_FAILURE(*status)) {
-    return;
-  }
+    if (U_FAILURE(*status)) {
+        return;
+    }
 
-  if (elems->isWritable && elems->iteratordata_.string != NULL)
-  {
-    uprv_free(elems->iteratordata_.string);
-  }
-  if (text == NULL) {
-      textLength = 0;
-  }
+    if (elems->isWritable && elems->iteratordata_.string != NULL)
+    {
+        uprv_free((UChar *)elems->iteratordata_.string);
+    }
 
-  elems->isWritable = FALSE;
-  uprv_init_collIterate(elems->iteratordata_.coll, text, textLength, 
-                   &elems->iteratordata_);
+    if (text == NULL) {
+        textLength = 0;
+    }
+
+    elems->isWritable = FALSE;
+    
+    /* free offset buffer to avoid memory leak before initializing. */
+    ucol_freeOffsetBuffer(&(elems->iteratordata_));
+    /* Ensure that previously allocated extendCEs is freed before setting to NULL. */
+    if (elems->iteratordata_.extendCEs != NULL) {
+        uprv_free(elems->iteratordata_.extendCEs);
+    }
+    uprv_init_collIterate(elems->iteratordata_.coll, text, textLength, 
+                          &elems->iteratordata_, status);
 
-  elems->reset_   = TRUE;
+    elems->reset_   = TRUE;
 }
 
 U_CAPI int32_t U_EXPORT2
 ucol_getOffset(const UCollationElements *elems)
 {
   const collIterate *ci = &(elems->iteratordata_);
+
+  if (ci->offsetRepeatCount > 0 && ci->offsetRepeatValue != 0) {
+      return ci->offsetRepeatValue;
+  }
+
+  if (ci->offsetReturn != NULL) {
+      return *ci->offsetReturn;
+  }
+
   // while processing characters in normalization buffer getOffset will 
   // return the next non-normalized character. 
   // should be inline with the old implementation since the old codes uses
@@ -226,44 +728,57 @@ ucol_setOffset(UCollationElements    *elems,
                int32_t           offset,
                UErrorCode            *status)
 {
-  if (U_FAILURE(*status)) {
-    return;
-  }
+    if (U_FAILURE(*status)) {
+        return;
+    }
 
-  // this methods will clean up any use of the writable buffer and points to 
-  // the original string
-  collIterate *ci = &(elems->iteratordata_);
-  ci->pos         = ci->string + offset;
-  ci->CEpos       = ci->toReturn = ci->CEs;
-  if (ci->flags & UCOL_ITER_INNORMBUF) {
-    ci->flags = ci->origFlags;
-  }
-  if ((ci->flags & UCOL_ITER_HASLEN) == 0) {
-      ci->endp  = ci->string + u_strlen(ci->string);
-      ci->flags |= UCOL_ITER_HASLEN;
-  }
-  ci->fcdPosition = NULL;
-  elems->reset_ = FALSE;
+    // this methods will clean up any use of the writable buffer and points to 
+    // the original string
+    collIterate *ci = &(elems->iteratordata_);
+    ci->pos         = ci->string + offset;
+    ci->CEpos       = ci->toReturn = ci->CEs;
+    if (ci->flags & UCOL_ITER_INNORMBUF) {
+        ci->flags = ci->origFlags;
+    }
+    if ((ci->flags & UCOL_ITER_HASLEN) == 0) {
+        ci->endp  = ci->string + u_strlen(ci->string);
+        ci->flags |= UCOL_ITER_HASLEN;
+    }
+    ci->fcdPosition = NULL;
+    elems->reset_ = FALSE;
+
+       ci->offsetReturn = NULL;
+    ci->offsetStore = ci->offsetBuffer;
+       ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
 }
 
 U_CAPI int32_t U_EXPORT2
 ucol_primaryOrder (int32_t order) 
 {
-  order &= UCOL_PRIMARYMASK;
-  return (order >> UCOL_PRIMARYORDERSHIFT);
+    order &= UCOL_PRIMARYMASK;
+    return (order >> UCOL_PRIMARYORDERSHIFT);
 }
 
 U_CAPI int32_t U_EXPORT2
 ucol_secondaryOrder (int32_t order) 
 {
-  order &= UCOL_SECONDARYMASK;
-  return (order >> UCOL_SECONDARYORDERSHIFT);
+    order &= UCOL_SECONDARYMASK;
+    return (order >> UCOL_SECONDARYORDERSHIFT);
 }
 
 U_CAPI int32_t U_EXPORT2
 ucol_tertiaryOrder (int32_t order) 
 {
-  return (order & UCOL_TERTIARYMASK);
+    return (order & UCOL_TERTIARYMASK);
+}
+
+
+void ucol_freeOffsetBuffer(collIterate *s) {
+    if (s != NULL && s->offsetBuffer != NULL) {
+        uprv_free(s->offsetBuffer);
+        s->offsetBuffer = NULL;
+        s->offsetBufferSize = 0;
+    }
 }
 
 #endif /* #if !UCONFIG_NO_COLLATION */