ICU-8.11.tar.gz

[apple/icu.git] / icuSources / i18n / ucol.cpp
diff --git a/icuSources/i18n/ucol.cpp b/icuSources/i18n/ucol.cpp

index 5cf8b1558073980a2b7ab49f946d06c8d14db3fd..5f756b04940fa3703ea96fc61288b0d1d1d35098 100644 (file)
--- a/icuSources/i18n/ucol.cpp
+++ b/icuSources/i18n/ucol.cpp
@@ -1,6 +1,6 @@
  /*
  *******************************************************************************
-*   Copyright (C) 1996-2003, International Business Machines
+*   Copyright (C) 1996-2006, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *******************************************************************************
  *   file name:  ucol.cpp
@@ -21,28 +21,23 @@
  
  #if !UCONFIG_NO_COLLATION
  
-#include "unicode/uloc.h"
-#include "unicode/coll.h"
-#include "unicode/tblcoll.h"
  #include "unicode/coleitr.h"
  #include "unicode/unorm.h"
  #include "unicode/udata.h"
-#include "unicode/uchar.h"
-#include "unicode/caniter.h"
+#include "unicode/ustring.h"
  
-#include "ucol_bld.h"
  #include "ucol_imp.h"
-#include "ucol_tok.h"
  #include "ucol_elm.h"
  #include "bocsu.h"
  
  #include "unormimp.h"
  #include "unorm_it.h"
-#include "uresimp.h"
  #include "umutex.h"
-#include "uhash.h"
-#include "ucln_in.h" 
+#include "cmemory.h"
+#include "ucln_in.h"
  #include "cstring.h"
+#include "utracimp.h"
+#include "putilimp.h"
  
  #ifdef UCOL_DEBUG
  #include <stdio.h>
@@ -60,10 +55,26 @@ U_NAMESPACE_USE
  
  #define ZERO_CC_LIMIT_            0xC0
  
-static UCollator* UCA = NULL;
-static UCAConstants *UCAconsts = NULL; 
+// static UCA. There is only one. Collators don't use it.
+// It is referenced only in ucol_initUCA and ucol_cleanup
+static UCollator* _staticUCA = NULL;
+// static pointer to udata memory. Inited in ucol_initUCA
+// used for cleanup in ucol_cleanup
  static UDataMemory* UCA_DATA_MEM = NULL;
  
+// this is static pointer to the normalizer fcdTrieIndex
+// it is always the same between calls to u_cleanup
+// and therefore writing to it is not synchronized.
+// It is cleaned in ucol_cleanup
+static const uint16_t *fcdTrieIndex=NULL;
+
+// These are values from UCA required for
+// implicit generation and supressing sort key compression
+// they should regularly be in the UCA, but if one
+// is running without UCA, it could be a problem
+static int32_t maxRegularPrimary  = 0xA0;
+static int32_t minImplicitPrimary = 0xE0;
+static int32_t maxImplicitPrimary = 0xE4;
  
  U_CDECL_BEGIN
  static UBool U_CALLCONV
@@ -74,15 +85,15 @@ isAcceptableUCA(void * /*context*/,
      if( pInfo->size>=20 &&
          pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
          pInfo->charsetFamily==U_CHARSET_FAMILY &&
-        pInfo->dataFormat[0]==ucaDataInfo.dataFormat[0] &&   /* dataFormat="UCol" */
-        pInfo->dataFormat[1]==ucaDataInfo.dataFormat[1] &&
-        pInfo->dataFormat[2]==ucaDataInfo.dataFormat[2] &&
-        pInfo->dataFormat[3]==ucaDataInfo.dataFormat[3] &&
-        pInfo->formatVersion[0]==ucaDataInfo.formatVersion[0] &&
-        pInfo->formatVersion[1]>=ucaDataInfo.formatVersion[1]// &&
-        //pInfo->formatVersion[1]==ucaDataInfo.formatVersion[1] &&
-        //pInfo->formatVersion[2]==ucaDataInfo.formatVersion[2] && // Too harsh
-        //pInfo->formatVersion[3]==ucaDataInfo.formatVersion[3] && // Too harsh
+        pInfo->dataFormat[0]==UCA_DATA_FORMAT_0 &&   /* dataFormat="UCol" */
+        pInfo->dataFormat[1]==UCA_DATA_FORMAT_1 &&
+        pInfo->dataFormat[2]==UCA_DATA_FORMAT_2 &&
+        pInfo->dataFormat[3]==UCA_DATA_FORMAT_3 &&
+        pInfo->formatVersion[0]==UCA_FORMAT_VERSION_0 &&
+        pInfo->formatVersion[1]>=UCA_FORMAT_VERSION_1// &&
+        //pInfo->formatVersion[1]==UCA_FORMAT_VERSION_1 &&
+        //pInfo->formatVersion[2]==UCA_FORMAT_VERSION_2 && // Too harsh
+        //pInfo->formatVersion[3]==UCA_FORMAT_VERSION_3 && // Too harsh
          ) {
          UVersionInfo UCDVersion;
          u_getUnicodeVersion(UCDVersion);
@@ -127,7 +138,7 @@ inline void  IInit_collIterate(const UCollator *collator, const UChar *sourceStr
      (s)->coll = (collator);
      (s)->fcdPosition = 0;
      if(collator->normalizationMode == UCOL_ON) {
-        (s)->flags |= UCOL_ITER_NORM; 
+        (s)->flags |= UCOL_ITER_NORM;
      }
      if(collator->hiraganaQ == UCOL_ON && collator->strength >= UCOL_QUATERNARY) {
        (s)->flags |= UCOL_HIRAGANA_Q;
@@ -158,18 +169,19 @@ inline void backupState(const collIterate *data, collIterateState *backup)
      backup->pos         = data->pos;
      backup->bufferaddress = data->writableBuffer;
      backup->buffersize    = data->writableBufSize;
+    backup->iteratorMove = 0;
+    backup->iteratorIndex = 0;
      if(data->iterator != NULL) {
-      //backup->iteratorIndex = data->iterator->getIndex(data->iterator, UITER_CURRENT);
-      backup->iteratorIndex = data->iterator->getState(data->iterator);
-      // no we try to fixup if we're using a normalizing iterator and we get UITER_NO_STATE
-      backup->iteratorMove = 0;
-      if(backup->iteratorIndex == UITER_NO_STATE) {
-        while((backup->iteratorIndex = data->iterator->getState(data->iterator)) == UITER_NO_STATE) {
-          backup->iteratorMove++;
-          data->iterator->move(data->iterator, -1, UITER_CURRENT);
-        }
-        data->iterator->move(data->iterator, backup->iteratorMove, UITER_CURRENT);
-      }
+        //backup->iteratorIndex = data->iterator->getIndex(data->iterator, UITER_CURRENT);
+        backup->iteratorIndex = data->iterator->getState(data->iterator);
+        // no we try to fixup if we're using a normalizing iterator and we get UITER_NO_STATE
+        if(backup->iteratorIndex == UITER_NO_STATE) {
+            while((backup->iteratorIndex = data->iterator->getState(data->iterator)) == UITER_NO_STATE) {
+                backup->iteratorMove++;
+                data->iterator->move(data->iterator, -1, UITER_CURRENT);
+            }
+            data->iterator->move(data->iterator, backup->iteratorMove, UITER_CURRENT);
+        }
      }
  }
  
@@ -184,15 +196,15 @@ static
  inline void loadState(collIterate *data, const collIterateState *backup,
                        UBool        forwards)
  {
-  UErrorCode status = U_ZERO_ERROR;
+    UErrorCode status = U_ZERO_ERROR;
      data->flags       = backup->flags;
      data->origFlags   = backup->origFlags;
      if(data->iterator != NULL) {
-      //data->iterator->move(data->iterator, backup->iteratorIndex, UITER_ZERO);
-      data->iterator->setState(data->iterator, backup->iteratorIndex, &status);
-      if(backup->iteratorMove != 0) {
-        data->iterator->move(data->iterator, backup->iteratorMove, UITER_CURRENT);
-      }
+        //data->iterator->move(data->iterator, backup->iteratorIndex, UITER_ZERO);
+        data->iterator->setState(data->iterator, backup->iteratorIndex, &status);
+        if(backup->iteratorMove != 0) {
+            data->iterator->move(data->iterator, backup->iteratorMove, UITER_CURRENT);
+        }
      }
      data->pos         = backup->pos;
      if ((data->flags & UCOL_ITER_INNORMBUF) &&
@@ -328,327 +340,218 @@ inline void freeHeapWritableBuffer(collIterate *data)
  /* Following are the open/close functions                                   */
  /*                                                                          */
  /****************************************************************************/
-static UCollator*
-tryOpeningFromRules(UResourceBundle *collElem, UErrorCode *status) {
-  int32_t rulesLen = 0;
-  const UChar *rules = ures_getStringByKey(collElem, "Sequence", &rulesLen, status);
-  return ucol_openRules(rules, rulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, status);
-
-}
  
-
-U_CAPI UCollator*
-ucol_open(const char *loc,
-                 UErrorCode *status)
-{
-  UCollator *result = NULL;
-  if (status && U_SUCCESS(*status)) {
-         result = Collator::createUCollator(loc, status);
-       if (result) {
-         return result;
-       }       
-  }
-  return ucol_open_internal(loc, status);
-}
-
-// API in ucol_imp.h
-
-U_CFUNC UCollator*
-ucol_open_internal(const char *loc,
-                          UErrorCode *status)
+static UCollator*
+ucol_initFromBinary(const uint8_t *bin, int32_t length,
+                const UCollator *base,
+                UCollator *fillIn,
+                UErrorCode *status)
  {
-  ucol_initUCA(status);
-
-  /* New version */
-  if(U_FAILURE(*status)) return 0;
-
-  UCollator *result = NULL;
-  UResourceBundle *b = ures_open(NULL, loc, status);
-  UResourceBundle *collElem = ures_getByKey(b, "CollationElements", NULL, status);
-  UResourceBundle *binary = NULL;
-  UErrorCode binaryStatus = U_ZERO_ERROR;  
-
-  if(*status == U_MISSING_RESOURCE_ERROR) { /* We didn't find the tailoring data, we fallback to the UCA */
-    *status = U_USING_DEFAULT_WARNING;
-    result = ucol_initCollator(UCA->image, result, status);
-    // if we use UCA, real locale is root
-    result->rb = ures_open(NULL, "", status);
-    result->elements = ures_open(NULL, "", status);
+    UCollator *result = fillIn;
      if(U_FAILURE(*status)) {
-      goto clean;
+        return NULL;
      }
-    ures_close(b);
-    result->hasRealData = FALSE;
-  } else if(U_SUCCESS(*status)) {   
-    binary = ures_getByKey(collElem, "%%CollationBin", NULL, &binaryStatus);
-
-    if(binaryStatus == U_MISSING_RESOURCE_ERROR) { /* we didn't find the binary image, we should use the rules */
-      binary = NULL;
-      result = tryOpeningFromRules(collElem, status);
-      if(U_FAILURE(*status)) {
-        goto clean;
-      }
-    } else if(U_SUCCESS(*status)) { /* otherwise, we'll pick a collation data that exists */
-      int32_t len = 0;
-      const uint8_t *inData = ures_getBinary(binary, &len, status);
-      UCATableHeader *colData = (UCATableHeader *)inData;
-      if(uprv_memcmp(colData->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
-        uprv_memcmp(colData->UCDVersion, UCA->image->UCDVersion, sizeof(UVersionInfo)) != 0 ||
-        colData->version[0] != UCOL_BUILDER_VERSION) {
-        *status = U_DIFFERENT_UCA_VERSION;
-        result = tryOpeningFromRules(collElem, status);
-      } else {
-        if(U_FAILURE(*status)){
-          goto clean;
+    /*
+    if(base == NULL) {
+        // we don't support null base yet
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return NULL;
+    }
+    */
+    // We need these and we could be running without UCA
+    uprv_uca_initImplicitConstants(0, 0, status);
+    UCATableHeader *colData = (UCATableHeader *)bin;
+    // do we want version check here? We're trying to figure out whether collators are compatible
+    if((base && (uprv_memcmp(colData->UCAVersion, base->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
+        uprv_memcmp(colData->UCDVersion, base->image->UCDVersion, sizeof(UVersionInfo)) != 0)) ||
+        colData->version[0] != UCOL_BUILDER_VERSION)
+    {
+        *status = U_COLLATOR_VERSION_MISMATCH;
+        return NULL;
+    }
+    else {
+        if((uint32_t)length > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) {
+            result = ucol_initCollator((const UCATableHeader *)bin, result, base, status);
+            if(U_FAILURE(*status)){
+                return NULL;
+            }
+            result->hasRealData = TRUE;
          }
-        if((uint32_t)len > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) {
-          result = ucol_initCollator((const UCATableHeader *)inData, result, status);
-          if(U_FAILURE(*status)){
-            goto clean;
-          }
-          result->hasRealData = TRUE;
-        } else {
-          result = ucol_initCollator(UCA->image, result, status);
-          ucol_setOptionsFromHeader(result, (UColOptionSet *)(inData+((const UCATableHeader *)inData)->options), status);
-          if(U_FAILURE(*status)){
-            goto clean;
-          }
-          result->hasRealData = FALSE;
+        else {
+            if(base) {
+                result = ucol_initCollator(base->image, result, base, status);
+                ucol_setOptionsFromHeader(result, (UColOptionSet *)(bin+((const UCATableHeader *)bin)->options), status);
+                if(U_FAILURE(*status)){
+                    return NULL;
+                }
+                result->hasRealData = FALSE;
+            }
+            else {
+                *status = U_USELESS_COLLATOR_ERROR;
+                return NULL;
+            }
          }
          result->freeImageOnClose = FALSE;
-      }
      }
-    result->rb = b;
-    result->elements = collElem;
-  } else { /* There is another error, and we're just gonna clean up */
-clean:
-    ures_close(b);
-    ures_close(collElem);
-    ures_close(binary);
-    return NULL;
-  }
-
-  result->validLocale = NULL; // default is to use rb info
-
-  if(loc == NULL) {
-    loc = ures_getLocale(result->rb, status);
-  }
-  result->requestedLocale = (char *)uprv_malloc((uprv_strlen(loc)+1)*sizeof(char));
-  /* test for NULL */
-  if (result->requestedLocale == NULL) {
-       *status = U_MEMORY_ALLOCATION_ERROR;
-       ures_close(b); // ??? appears needed
-    ures_close(collElem);
-    ures_close(binary); // ??? appears needed
-       return NULL;
-  }
-  uprv_strcpy(result->requestedLocale, loc);
-
-  ures_close(binary);
-  return result;
+    result->validLocale = NULL;
+    result->requestedLocale = NULL;
+    result->rules = NULL;
+    result->rulesLength = 0;
+    result->freeRulesOnClose = FALSE;
+    result->rb = NULL;
+    result->elements = NULL;
+    return result;
  }
  
-U_CAPI void U_EXPORT2
-ucol_setReqValidLocales(UCollator *coll, char *requestedLocaleToAdopt, char *validLocaleToAdopt)
+U_CAPI UCollator* U_EXPORT2
+ucol_openBinary(const uint8_t *bin, int32_t length,
+                const UCollator *base,
+                UErrorCode *status)
  {
-  if (coll) {
-    if (coll->validLocale) {
-      uprv_free(coll->validLocale);
-       }
-    coll->validLocale = validLocaleToAdopt;
-    if (coll->requestedLocale) { // should always have
-      uprv_free(coll->requestedLocale);
-       }
-    coll->requestedLocale = requestedLocaleToAdopt;
-  }
+    return ucol_initFromBinary(bin, length, base, NULL, status);
  }
  
-U_CAPI void U_EXPORT2
-ucol_close(UCollator *coll)
+U_CAPI UCollator* U_EXPORT2
+ucol_safeClone(const UCollator *coll, void *stackBuffer, int32_t * pBufferSize, UErrorCode *status)
  {
-  if(coll != NULL) {
-       // these are always owned by each UCollator struct, 
-       // so we always free them
-    if(coll->validLocale != NULL) {
-      uprv_free(coll->validLocale);
-    }
-    if(coll->requestedLocale != NULL) {
-      uprv_free(coll->requestedLocale);
-    }
-
-    /* Here, it would be advisable to close: */
-    /* - UData for UCA (unless we stuff it in the root resb */
-    /* Again, do we need additional housekeeping... HMMM! */
-    if(coll->freeOnClose == FALSE){
-      return; /* for safeClone, if freeOnClose is FALSE,
-              don't free the other instance data */
-    }
-    if(coll->freeOptionsOnClose != FALSE) {
-      if(coll->options != NULL) {
-        uprv_free(coll->options);
-      }
-    }
-    if(coll->mapping != NULL) {
-        /*ucmpe32_close(coll->mapping);*/
-      uprv_free(coll->mapping);
-    }
-    if(coll->rules != NULL && coll->freeRulesOnClose) {
-      uprv_free((UChar *)coll->rules);
-    }
-    if(coll->rb != NULL) { /* pointing to read-only memory */
-      ures_close(coll->rb);
-    } 
-    if(coll->freeImageOnClose == TRUE) {
-      uprv_free((UCATableHeader *)coll->image);
+    UCollator * localCollator;
+    int32_t bufferSizeNeeded = (int32_t)sizeof(UCollator);
+    char *stackBufferChars = (char *)stackBuffer;
+    int32_t imageSize = 0;
+    int32_t rulesSize = 0;
+    int32_t rulesPadding = 0;
+    uint8_t *image;
+    UChar *rules;
+    UBool colAllocated = FALSE;
+    UBool imageAllocated = FALSE;
+
+    if (status == NULL || U_FAILURE(*status)){
+        return 0;
      }
-    if(coll->elements != NULL) {
-      ures_close(coll->elements);
+    if ((stackBuffer && !pBufferSize) || !coll){
+       *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
      }
-    if(coll->latinOneCEs != NULL) {
-      uprv_free(coll->latinOneCEs);
+    if (coll->rules && coll->freeRulesOnClose) {
+        rulesSize = (int32_t)(coll->rulesLength + 1)*sizeof(UChar);
+        rulesPadding = (int32_t)(bufferSizeNeeded % sizeof(UChar));
+        bufferSizeNeeded += rulesSize + rulesPadding;
      }
-    uprv_free(coll);
-  }
-}
-
-U_CAPI UCollator* U_EXPORT2
-ucol_openRules( const UChar        *rules,
-                int32_t            rulesLength,
-                UColAttributeValue normalizationMode,
-                UCollationStrength strength,
-                UParseError        *parseError,
-                UErrorCode         *status)
-{
-  uint32_t listLen = 0;
-  UColTokenParser src;
-  UColAttributeValue norm;
-  UParseError tErr;
-  
-  if(status == NULL || U_FAILURE(*status)){
-    return 0;
-  }
-
-  if(rulesLength < -1 || (rules == NULL && rulesLength != 0)) {
-    *status = U_ILLEGAL_ARGUMENT_ERROR;
-    return 0;
-  }
-
-  if(rulesLength == -1) {
-    rulesLength = u_strlen(rules);
-  }
-
-  if(parseError == NULL){
-    parseError = &tErr;
-  }
-  
-  switch(normalizationMode) {
-  case UCOL_OFF:
-  case UCOL_ON:
-  case UCOL_DEFAULT:
-    norm = normalizationMode;
-    break;
-  default:
-    *status = U_ILLEGAL_ARGUMENT_ERROR;
-    return 0;
-  }
  
-  ucol_initUCA(status);
-
-  if(U_FAILURE(*status)){
-    return NULL;
-  }
-
-  ucol_tok_initTokenList(&src, rules, rulesLength, UCA, status);
-  listLen = ucol_tok_assembleTokenList(&src,parseError, status);
+    if (stackBuffer && *pBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
+        *pBufferSize =  bufferSizeNeeded;
+        return 0;
+    }
  
-  if(U_FAILURE(*status)) {
-    /* if status is U_ILLEGAL_ARGUMENT_ERROR, src->current points at the offending option */
-    /* if status is U_INVALID_FORMAT_ERROR, src->current points after the problematic part of the rules */
-    /* so something might be done here... or on lower level */
-#ifdef UCOL_DEBUG
-    if(*status == U_ILLEGAL_ARGUMENT_ERROR) {
-      fprintf(stderr, "bad option starting at offset %i\n", src.current-src.source);
-    } else {
-      fprintf(stderr, "invalid rule just before offset %i\n", src.current-src.source);
+    /* Pointers on 64-bit platforms need to be aligned
+     * on a 64-bit boundry in memory.
+     */
+    if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
+        int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
+        if (*pBufferSize > offsetUp) {
+            *pBufferSize -= offsetUp;
+            stackBufferChars += offsetUp;
+        }
+        else {
+            /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
+            *pBufferSize = 1;
+        }
      }
-#endif
-    ucol_tok_closeTokenList(&src);
-    return NULL;
-  }
-  UCollator *result = NULL;
-  UCATableHeader *table = NULL;
+    stackBuffer = (void *)stackBufferChars;
  
-  if(src.resultLen > 0 || src.removeSet != NULL) { /* we have a set of rules, let's make something of it */
-    /* also, if we wanted to remove some contractions, we should make a tailoring */
-    table = ucol_assembleTailoringTable(&src, status);
-    if(U_SUCCESS(*status)) {
-      // builder version
-      table->version[0] = UCOL_BUILDER_VERSION;
-      // no tailoring information on this level
-      table->version[1] = table->version[2] = table->version[3] = 0;
-      // set UCD version
-      u_getUnicodeVersion(table->UCDVersion);
-      // set UCA version
-      uprv_memcpy(table->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo));
-      result = ucol_initCollator(table,0,status);
-      result->hasRealData = TRUE;
-      result->freeImageOnClose = TRUE;
-    }
-  } else { /* no rules, but no error either */
-    // must be only options
-    // We will init the collator from UCA   
-    result = ucol_initCollator(UCA->image,0,status);
-    // And set only the options
-    UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
-    /* test for NULL */
-    if (opts == NULL) {
-        *status = U_MEMORY_ALLOCATION_ERROR;
-        goto cleanup;
+    if (stackBuffer == NULL || *pBufferSize < bufferSizeNeeded) {
+        /* allocate one here...*/
+        stackBufferChars = (char *)uprv_malloc(bufferSizeNeeded);
+        colAllocated = TRUE;
+        if (U_SUCCESS(*status)) {
+            *status = U_SAFECLONE_ALLOCATED_WARNING;
+        }
+    }
+    localCollator = (UCollator *)stackBufferChars;
+    rules = (UChar *)(stackBufferChars + sizeof(UCollator) + rulesPadding);
+    {
+        UErrorCode tempStatus = U_ZERO_ERROR;
+        imageSize = ucol_cloneBinary(coll, NULL, 0, &tempStatus);
+    }
+    if (coll->freeImageOnClose) {
+        image = (uint8_t *)uprv_malloc(imageSize);
+        ucol_cloneBinary(coll, image, imageSize, status);
+        imageAllocated = TRUE;
+    }
+    else {
+        image = (uint8_t *)coll->image;
+    }
+    localCollator = ucol_initFromBinary(image, imageSize, coll->UCA, localCollator, status);
+    if (U_FAILURE(*status)) {
+        return NULL;
      }
-    uprv_memcpy(opts, src.opts, sizeof(UColOptionSet));
-    ucol_setOptionsFromHeader(result, opts, status);
-    result->freeOptionsOnClose = TRUE;
-    result->hasRealData = FALSE;
-    result->freeImageOnClose = FALSE;
-  }
  
-  if(U_SUCCESS(*status)) {
-    UChar *newRules;
-    result->dataInfo.dataVersion[0] = UCOL_BUILDER_VERSION;
-    if(rulesLength > 0) {
-      newRules = (UChar *)uprv_malloc((rulesLength+1)*U_SIZEOF_UCHAR);
-      /* test for NULL */
-      if (newRules == NULL) {
-          *status = U_MEMORY_ALLOCATION_ERROR;
-          goto cleanup;
-      }
-      uprv_memcpy(newRules, rules, rulesLength*U_SIZEOF_UCHAR);
-      newRules[rulesLength]=0;
-      result->rules = newRules;
-      result->rulesLength = rulesLength;
-      result->freeRulesOnClose = TRUE;
+    if (coll->rules) {
+        if (coll->freeRulesOnClose) {
+            localCollator->rules = u_strcpy(rules, coll->rules);
+            //bufferEnd += rulesSize;
+        }
+        else {
+            localCollator->rules = coll->rules;
+        }
+        localCollator->freeRulesOnClose = FALSE;
+        localCollator->rulesLength = coll->rulesLength;
      }
-    result->rb = NULL;
-    result->elements = NULL;
-    result->validLocale = NULL;
-    result->requestedLocale = NULL;
-    ucol_setAttribute(result, UCOL_STRENGTH, strength, status);
-    ucol_setAttribute(result, UCOL_NORMALIZATION_MODE, norm, status);
-  } else {
-cleanup:
-    if(result != NULL) {
-      ucol_close(result);
-    } else {
-      if(table != NULL) {
-        uprv_free(table);
-      }
+
+    int32_t i;
+    for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
+        ucol_setAttribute(localCollator, (UColAttribute)i, ucol_getAttribute(coll, (UColAttribute)i, status), status);
      }
-    result = NULL;
-  }
+    localCollator->requestedLocale = NULL; // zero copies of pointers
+    localCollator->validLocale = NULL;
+    localCollator->rb = NULL;
+    localCollator->elements = NULL;
+    localCollator->freeOnClose = colAllocated;
+    localCollator->freeImageOnClose = imageAllocated;
+    return localCollator;
+}
  
-  ucol_tok_closeTokenList(&src);
+U_CAPI void U_EXPORT2
+ucol_close(UCollator *coll)
+{
+    UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE);
+    UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll);
+    if(coll != NULL) {
+        // these are always owned by each UCollator struct,
+        // so we always free them
+        if(coll->validLocale != NULL) {
+            uprv_free(coll->validLocale);
+        }
+        if(coll->requestedLocale != NULL) {
+            uprv_free(coll->requestedLocale);
+        }
+        if(coll->resCleaner != NULL) {
+            coll->resCleaner(coll);
+        }
+        if(coll->latinOneCEs != NULL) {
+            uprv_free(coll->latinOneCEs);
+        }
+        if(coll->options != NULL && coll->freeOptionsOnClose) {
+            uprv_free(coll->options);
+        }
+        if(coll->rules != NULL && coll->freeRulesOnClose) {
+            uprv_free((UChar *)coll->rules);
+        }
+        if(coll->image != NULL && coll->freeImageOnClose) {
+            uprv_free((UCATableHeader *)coll->image);
+        }
  
-  return result;
+        /* Here, it would be advisable to close: */
+        /* - UData for UCA (unless we stuff it in the root resb */
+        /* Again, do we need additional housekeeping... HMMM! */
+        UTRACE_DATA1(UTRACE_INFO, "coll->freeOnClose: %d", coll->freeOnClose);
+        if(coll->freeOnClose){
+            /* for safeClone, if freeOnClose is FALSE,
+            don't free the other instance data */
+            uprv_free(coll);
+        }
+    }
+    UTRACE_EXIT();
  }
  
  /* This one is currently used by genrb & tests. After constructing from rules (tailoring),*/
@@ -677,7 +580,37 @@ ucol_cloneRuleData(const UCollator *coll, int32_t *length, UErrorCode *status)
          *status = U_MEMORY_ALLOCATION_ERROR;
          return NULL;
      }
-    uprv_memcpy(result, UCA->image, sizeof(UCATableHeader));
+
+    /* build the UCATableHeader with minimal entries */
+    /* do not copy the header from the UCA file because its values are wrong! */
+    /* uprv_memcpy(result, UCA->image, sizeof(UCATableHeader)); */
+
+    /* reset everything */
+    uprv_memset(result, 0, *length);
+
+    /* set the tailoring-specific values */
+    UCATableHeader *myData = (UCATableHeader *)result;
+    myData->size = *length;
+
+    /* offset for the options, the only part of the data that is present after the header */
+    myData->options = sizeof(UCATableHeader);
+
+    /* need to always set the expansion value for an upper bound of the options */
+    myData->expansion = myData->options + sizeof(UColOptionSet);
+
+    myData->magic = UCOL_HEADER_MAGIC;
+    myData->isBigEndian = U_IS_BIG_ENDIAN;
+    myData->charSetFamily = U_CHARSET_FAMILY;
+
+    /* copy UCA's version; genrb will override all but the builder version with tailoring data */
+    uprv_memcpy(myData->version, coll->image->version, sizeof(UVersionInfo));
+
+    uprv_memcpy(myData->UCAVersion, coll->image->UCAVersion, sizeof(UVersionInfo));
+    uprv_memcpy(myData->UCDVersion, coll->image->UCDVersion, sizeof(UVersionInfo));
+    uprv_memcpy(myData->formatVersion, coll->image->formatVersion, sizeof(UVersionInfo));
+    myData->jamoSpecial = coll->image->jamoSpecial;
+
+    /* copy the collator options */
      uprv_memcpy(result+paddedsize(sizeof(UCATableHeader)), coll->options, sizeof(UColOptionSet));
    }
    return result;
@@ -711,26 +644,6 @@ void ucol_setOptionsFromHeader(UCollator* result, UColOptionSet * opts, UErrorCo
      result->options = opts;
  }
  
-#if 0
-// doesn't look like anybody is using this
-void ucol_putOptionsToHeader(UCollator* result, UColOptionSet * opts, UErrorCode *status) {
-  if(U_FAILURE(*status)) {
-    return;
-  }
-    opts->caseFirst = result->caseFirst;
-    opts->caseLevel = result->caseLevel;
-    opts->frenchCollation = result->frenchCollation;
-    opts->normalizationMode = result->normalizationMode;
-    opts->strength = result->strength;
-    opts->variableTopValue = result->variableTopValue;
-    opts->alternateHandling = result->alternateHandling;
-    opts->hiraganaQ = result->hiraganaQ;
-    opts->numericCollation = result->numericCollation;
-}
-#endif
-
-static const uint16_t *fcdTrieIndex=NULL;
-
  
  /**
  * Approximate determination if a character is at a contraction end.
@@ -741,7 +654,7 @@ static const uint16_t *fcdTrieIndex=NULL;
  */
  static
  inline UBool ucol_contractionEndCP(UChar c, const UCollator *coll) {
-    if (UTF_IS_TRAIL(c)) {
+    if (U16_IS_TRAIL(c)) {
        return TRUE;
      }
  
@@ -767,16 +680,15 @@ inline UBool ucol_contractionEndCP(UChar c, const UCollator *coll) {
  *        in contraction processing.
  */
  static
-inline uint8_t i_getCombiningClass(UChar c, const UCollator *coll) {
+inline uint8_t i_getCombiningClass(UChar32 c, const UCollator *coll) {
      uint8_t sCC = 0;
-    if (c >= 0x300 && ucol_unsafeCP(c, coll)) {
+    if ((c >= 0x300 && ucol_unsafeCP(c, coll)) || c > 0xFFFF) {
          sCC = u_getCombiningClass(c);
      }
      return sCC;
  }
  
-
-UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, UErrorCode *status) {
+UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, const UCollator *UCA, UErrorCode *status) {
      UChar c;
      UCollator *result = fillIn;
      if(U_FAILURE(*status) || image == NULL) {
@@ -795,32 +707,19 @@ UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, UEr
      }
  
      result->image = image;
+    result->mapping.getFoldingOffset = _getFoldingOffset;
      const uint8_t *mapping = (uint8_t*)result->image+result->image->mappingPosition;
-    /*CompactEIntArray *newUCAmapping = ucmpe32_openFromData(&mapping, status);*/
-    UTrie *newUCAmapping = (UTrie *)uprv_malloc(sizeof(UTrie)); 
-    if(newUCAmapping != NULL) {
-      utrie_unserialize(newUCAmapping, mapping, result->image->endExpansionCE - result->image->mappingPosition, status);
-    } else {
-      *status = U_MEMORY_ALLOCATION_ERROR;
-      if(result->freeOnClose == TRUE) {
-          uprv_free(result);
-          result = NULL;
-      }
-      return result;
-    }
-    if(U_SUCCESS(*status)) {
-        result->mapping = newUCAmapping;
-    } else {
+    utrie_unserialize(&result->mapping, mapping, result->image->endExpansionCE - result->image->mappingPosition, status);
+    if(U_FAILURE(*status)) {
          if(result->freeOnClose == TRUE) {
              uprv_free(result);
              result = NULL;
          }
-        uprv_free(newUCAmapping);
          return result;
      }
  
      /*result->latinOneMapping = (uint32_t*)((uint8_t*)result->image+result->image->latinOneMapping);*/
-    result->latinOneMapping = UTRIE_GET32_LATIN1(result->mapping);
+    result->latinOneMapping = UTRIE_GET32_LATIN1(&result->mapping);
      result->contractionCEs = (uint32_t*)((uint8_t*)result->image+result->image->contractionCEs);
      result->contractionIndex = (UChar*)((uint8_t*)result->image+result->image->contractionIndex);
      result->expansion = (uint32_t*)((uint8_t*)result->image+result->image->expansion);
@@ -849,14 +748,16 @@ UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, UEr
      result->hiraganaQisDefault = TRUE;
      result->numericCollationisDefault = TRUE;
  
-    result->scriptOrder = NULL;
+    /*result->scriptOrder = NULL;*/
  
      result->rules = NULL;
      result->rulesLength = 0;
  
      /* get the version info from UCATableHeader and populate the Collator struct*/
-    result->dataInfo.dataVersion[0] = result->image->version[0]; /* UCA Builder version*/
-    result->dataInfo.dataVersion[1] = result->image->version[1]; /* UCA Tailoring rules version*/
+    result->dataVersion[0] = result->image->version[0]; /* UCA Builder version*/
+    result->dataVersion[1] = result->image->version[1]; /* UCA Tailoring rules version*/
+    result->dataVersion[2] = 0;
+    result->dataVersion[3] = 0;
  
      result->unsafeCP = (uint8_t *)result->image + result->image->unsafeCP;
      result->minUnsafeCP = 0;
@@ -880,9 +781,6 @@ UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, UEr
      result->expansionCESize = (uint8_t*)result->image +
                                                 result->image->expansionCESize;
  
-    if (fcdTrieIndex == NULL) {
-        fcdTrieIndex = unorm_getFCDTrie(status);
-    }
  
      //result->errorCode = *status;
  
@@ -890,6 +788,8 @@ UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, UEr
  
      result->latinOneRegenTable = FALSE;
      result->latinOneFailed = FALSE;
+    result->UCA = UCA;
+    result->resCleaner = NULL;
  
      ucol_updateInternalState(result, status);
  
@@ -897,67 +797,74 @@ UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, UEr
      return result;
  }
  
-U_CFUNC UBool
-ucol_cleanup(void)
-{
-    if (UCA_DATA_MEM) {
-        udata_close(UCA_DATA_MEM);
-        UCA_DATA_MEM = NULL;
-    }
-    if (UCA) {
-        ucol_close(UCA);
-        UCA = NULL;
-    }
-    return TRUE;
-}
+/* new Mark's code */
  
+/**
+ * For generation of Implicit CEs
+ * @author Davis
+ *
+ * Cleaned up so that changes can be made more easily.
+ * Old values:
+# First Implicit: E26A792D
+# Last Implicit: E3DC70C0
+# First CJK: E0030300
+# Last CJK: E0A9DD00
+# First CJK_A: E0A9DF00
+# Last CJK_A: E0DE3100
+ */
  /* Following is a port of Mark's code for new treatment of implicits.
- * It is positioned here, since ucol_initUCA need to initialize the 
+ * It is positioned here, since ucol_initUCA need to initialize the
   * variables below according to the data in the fractional UCA.
   */
-   
+
  /**
-  * Function used to: 
-  * a) collapse the 2 different Han ranges from UCA into one (in the right order), and
-  * b) bump any non-CJK characters by 10FFFF.
-  * The relevant blocks are:
-  * A: 4E00..9FFF; CJK Unified Ideographs
-  *            F900..FAFF; CJK Compatibility Ideographs
-  * B: 3400..4DBF; CJK Unified Ideographs Extension A
-  *            20000..XX;      CJK Unified Ideographs Extension B (and others later on)
-  * As long as
-  *    no new B characters are allocated between 4E00 and FAFF, and
-  *    no new A characters are outside of this range,
-  * (very high probability) this simple code will work.
-  * The reordered blocks are:
-  * Block1 is CJK
-  * Block2 is CJK_COMPAT_USED
-  * Block3 is CJK_A
-  * Any other CJK gets its normal code point
-  * Any non-CJK gets +10FFFF
-  * When we reorder Block1, we make sure that it is at the very start,
-  * so that it will use a 3-byte form.
-  */
+    * Function used to:
+    * a) collapse the 2 different Han ranges from UCA into one (in the right order), and
+    * b) bump any non-CJK characters by 10FFFF.
+    * The relevant blocks are:
+    * A:    4E00..9FFF; CJK Unified Ideographs
+    *       F900..FAFF; CJK Compatibility Ideographs
+    * B:    3400..4DBF; CJK Unified Ideographs Extension A
+    *       20000..XX;  CJK Unified Ideographs Extension B (and others later on)
+    * As long as
+    *   no new B characters are allocated between 4E00 and FAFF, and
+    *   no new A characters are outside of this range,
+    * (very high probability) this simple code will work.
+    * The reordered blocks are:
+    * Block1 is CJK
+    * Block2 is CJK_COMPAT_USED
+    * Block3 is CJK_A
+    * (all contiguous)
+    * Any other CJK gets its normal code point
+    * Any non-CJK gets +10FFFF
+    * When we reorder Block1, we make sure that it is at the very start,
+    * so that it will use a 3-byte form.
+    * Warning: the we only pick up the compatibility characters that are
+    * NOT decomposed, so that block is smaller!
+    */
  
  // CONSTANTS
-static const uint32_t
+static const UChar32
      NON_CJK_OFFSET = 0x110000,
-    BYTES_TO_AVOID = 3,
-    OTHER_COUNT = 256 - BYTES_TO_AVOID,
-    LAST_COUNT = OTHER_COUNT / 2,
-    LAST_COUNT2 = OTHER_COUNT / 21, // room for intervening, without expanding to 5 bytes
-    IMPLICIT_3BYTE_COUNT = 1;
-    
-// These depend on initUCA, and are initialized at that time
-static uint32_t 
-    IMPLICIT_BASE_BYTE = 0,
-    IMPLICIT_LIMIT_BYTE = 0, // leave room for 1 3-byte and 2 4-byte forms
-    
-    IMPLICIT_4BYTE_BOUNDARY = 0,
-    LAST_MULTIPLIER = 0,
-    LAST2_MULTIPLIER = 0,
-    IMPLICIT_BASE_3BYTE = 0,
-    IMPLICIT_BASE_4BYTE = 0;
+    UCOL_MAX_INPUT = 0x220001; // 2 * Unicode range + 2
+
+/**
+ * Precomputed by constructor
+ */
+static int32_t
+    final3Multiplier = 0,
+    final4Multiplier = 0,
+    final3Count = 0,
+    final4Count = 0,
+    medialCount = 0,
+    min3Primary = 0,
+    min4Primary = 0,
+    max4Primary = 0,
+    minTrail = 0,
+    maxTrail = 0,
+    max3Trail = 0,
+    max4Trail = 0,
+    min4Boundary = 0;
  
  static const UChar32
      CJK_BASE = 0x4E00,
@@ -969,105 +876,283 @@ static const UChar32
      CJK_B_BASE = 0x20000,
      CJK_B_LIMIT = 0x2A6DF+1;
  
-static inline UChar32 swapCJK(UChar32 cp) {
-       
-       if (cp >= CJK_BASE) {
-               if (cp < CJK_LIMIT)                             return cp - CJK_BASE;
-                       
-               if (cp < CJK_COMPAT_USED_BASE)  return cp + NON_CJK_OFFSET;
-               
-               if (cp < CJK_COMPAT_USED_LIMIT) return cp - CJK_COMPAT_USED_BASE
-                                                                                               + (CJK_LIMIT - CJK_BASE);
-               if (cp < CJK_B_BASE)                            return cp + NON_CJK_OFFSET;
-               
-               if (cp < CJK_B_LIMIT)                   return cp; // non-BMP-CJK
-               
-               return cp + NON_CJK_OFFSET;     // non-CJK
-       }
-       if (cp < CJK_A_BASE)                                    return cp + NON_CJK_OFFSET;
-               
-       if (cp < CJK_A_LIMIT)                           return cp - CJK_A_BASE
-                                                                                               + (CJK_LIMIT - CJK_BASE) 
-                                                                                               + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
-    return cp + NON_CJK_OFFSET; // non-CJK
+static UChar32 swapCJK(UChar32 i) {
+
+    if (i >= CJK_BASE) {
+        if (i < CJK_LIMIT)              return i - CJK_BASE;
+
+        if (i < CJK_COMPAT_USED_BASE)   return i + NON_CJK_OFFSET;
+
+        if (i < CJK_COMPAT_USED_LIMIT)  return i - CJK_COMPAT_USED_BASE
+                                                + (CJK_LIMIT - CJK_BASE);
+        if (i < CJK_B_BASE)             return i + NON_CJK_OFFSET;
+
+        if (i < CJK_B_LIMIT)            return i; // non-BMP-CJK
+
+        return i + NON_CJK_OFFSET;  // non-CJK
+    }
+    if (i < CJK_A_BASE)                 return i + NON_CJK_OFFSET;
+
+    if (i < CJK_A_LIMIT)                return i - CJK_A_BASE
+                                                + (CJK_LIMIT - CJK_BASE)
+                                                + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
+    return i + NON_CJK_OFFSET; // non-CJK
+}
+
+U_CAPI UChar32 U_EXPORT2
+uprv_uca_getRawFromCodePoint(UChar32 i) {
+    return swapCJK(i)+1;
+}
+
+U_CAPI UChar32 U_EXPORT2
+uprv_uca_getCodePointFromRaw(UChar32 i) {
+    i--;
+    UChar32 result = 0;
+    if(i >= NON_CJK_OFFSET) {
+        result = i - NON_CJK_OFFSET;
+    } else if(i >= CJK_B_BASE) {
+        result = i;
+    } else if(i < CJK_A_LIMIT + (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) { // rest of CJKs, compacted
+        if(i < CJK_LIMIT - CJK_BASE) {
+            result = i + CJK_BASE;
+        } else if(i < (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) {
+            result = i + CJK_COMPAT_USED_BASE - (CJK_LIMIT - CJK_BASE);
+        } else {
+            result = i + CJK_A_BASE - (CJK_LIMIT - CJK_BASE) - (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
+        }
+    } else {
+        result = -1;
+    }
+    return result;
  }
-    
  
  // GET IMPLICIT PRIMARY WEIGHTS
  // Return value is left justified primary key
+U_CAPI uint32_t U_EXPORT2
+uprv_uca_getImplicitFromRaw(UChar32 cp) {
+    /*
+    if (cp < 0 || cp > UCOL_MAX_INPUT) {
+        throw new IllegalArgumentException("Code point out of range " + Utility.hex(cp));
+    }
+    */
+    int32_t last0 = cp - min4Boundary;
+    if (last0 < 0) {
+        int32_t last1 = cp / final3Count;
+        last0 = cp % final3Count;
  
-static inline uint32_t getImplicitPrimary(UChar32 cp) {
+        int32_t last2 = last1 / medialCount;
+        last1 %= medialCount;
  
-    //if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp));
-    
-    cp = swapCJK(cp);
-    
-    //if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));
-    
-    // we now have a range of numbers from 0 to 21FFFF.
-      
-    // we must skip all 00, 01, 02 bytes, so most bytes have 253 values
-    // we must leave a gap of 01 between all values of the last byte, so the last byte has 126 values (3 byte case)
-    // we shift so that HAN all has the same first primary, for compression.
-    // for the 4 byte case, we make the gap as large as we can fit.
-    // Three byte forms are EC xx xx, ED xx xx, EE xx xx (with a gap of 1)
-    // Four byte forms (most supplementaries) are EF xx xx xx (with a gap of LAST2_MULTIPLIER == 14)
-    
-    int32_t last0 = cp - IMPLICIT_4BYTE_BOUNDARY;
-    if (last0 < 0) {
-        int32_t last1 = cp / LAST_COUNT;
-        last0 = cp % LAST_COUNT;
-        
-        int32_t last2 = last1 / OTHER_COUNT;
-        last1 %= OTHER_COUNT;
+        last0 = minTrail + last0*final3Multiplier; // spread out, leaving gap at start
+        last1 = minTrail + last1; // offset
+        last2 = min3Primary + last2; // offset
          /*
-        if (DEBUG || last2 > 0xFF-BYTES_TO_AVOID) System.out.println("3B: " + Utility.hex(cp) + " => "
-            + Utility.hex(last2) + ", "
-            + Utility.hex(last1) + ", "
-            + Utility.hex(last0) + ", "
-        );
+        if (last2 >= min4Primary) {
+            throw new IllegalArgumentException("4-byte out of range: " + Utility.hex(cp) + ", " + Utility.hex(last2));
+        }
          */
-        
-        return IMPLICIT_BASE_3BYTE + (last2 << 24) + (last1 << 16) + ((last0*LAST_MULTIPLIER) << 8);
+        return (last2 << 24) + (last1 << 16) + (last0 << 8);
      } else {
-        int32_t last1 = last0 / LAST_COUNT2;
-        last0 %= LAST_COUNT2;
-        
-        int32_t last2 = last1 / OTHER_COUNT;
-        last1 %= OTHER_COUNT;
-        
-        int32_t last3 = last2 / OTHER_COUNT;
-        last2 %= OTHER_COUNT;
+        int32_t last1 = last0 / final4Count;
+        last0 %= final4Count;
+
+        int32_t last2 = last1 / medialCount;
+        last1 %= medialCount;
+
+        int32_t last3 = last2 / medialCount;
+        last2 %= medialCount;
  
+        last0 = minTrail + last0*final4Multiplier; // spread out, leaving gap at start
+        last1 = minTrail + last1; // offset
+        last2 = minTrail + last2; // offset
+        last3 = min4Primary + last3; // offset
          /*
-        if (DEBUG || last3 > 0xFF-BYTES_TO_AVOID) System.out.println("4B: " + Utility.hex(cp) + " => "
-            + Utility.hex(last3) + ", "
-            + Utility.hex(last2) + ", "
-            + Utility.hex(last1) + ", "
-            + Utility.hex(last0 * LAST2_MULTIPLIER) + ", "
-        );
+        if (last3 > max4Primary) {
+            throw new IllegalArgumentException("4-byte out of range: " + Utility.hex(cp) + ", " + Utility.hex(last3));
+        }
          */
-
-       return IMPLICIT_BASE_4BYTE + (last3 << 24) + (last2 << 16) + (last1 << 8) + (last0 * LAST2_MULTIPLIER);
+        return (last3 << 24) + (last2 << 16) + (last1 << 8) + last0;
      }
  }
  
+U_CAPI uint32_t U_EXPORT2
+uprv_uca_getImplicitPrimary(UChar32 cp) {
+    //if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp));
+
+    cp = swapCJK(cp);
+    cp++;
+    // we now have a range of numbers from 0 to 21FFFF.
+
+    //if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));
+
+    return uprv_uca_getImplicitFromRaw(cp);
+}
+
+/**
+ * Converts implicit CE into raw integer ("code point")
+ * @param implicit
+ * @return -1 if illegal format
+ */
+U_CAPI UChar32 U_EXPORT2
+uprv_uca_getRawFromImplicit(uint32_t implicit) {
+    UChar32 result;
+    UChar32 b3 = implicit & 0xFF;
+    implicit >>= 8;
+    UChar32 b2 = implicit & 0xFF;
+    implicit >>= 8;
+    UChar32 b1 = implicit & 0xFF;
+    implicit >>= 8;
+    UChar32 b0 = implicit & 0xFF;
+
+    // simple parameter checks
+    if (b0 < min3Primary || b0 > max4Primary
+      || b1 < minTrail || b1 > maxTrail) return -1;
+    // normal offsets
+    b1 -= minTrail;
+
+    // take care of the final values, and compose
+    if (b0 < min4Primary) {
+        if (b2 < minTrail || b2 > max3Trail || b3 != 0) return -1;
+        b2 -= minTrail;
+        UChar32 remainder = b2 % final3Multiplier;
+        if (remainder != 0) return -1;
+        b0 -= min3Primary;
+        b2 /= final3Multiplier;
+        result = ((b0 * medialCount) + b1) * final3Count + b2;
+    } else {
+         if (b2 < minTrail || b2 > maxTrail
+        || b3 < minTrail || b3 > max4Trail) return -1;
+        b2 -= minTrail;
+        b3 -= minTrail;
+        UChar32 remainder = b3 % final4Multiplier;
+        if (remainder != 0) return -1;
+        b3 /= final4Multiplier;
+        b0 -= min4Primary;
+        result = (((b0 * medialCount) + b1) * medialCount + b2) * final4Count + b3 + min4Boundary;
+    }
+    // final check
+    if (result < 0 || result > UCOL_MAX_INPUT) return -1;
+    return result;
+}
+
+
+static inline int32_t divideAndRoundUp(int a, int b) {
+    return 1 + (a-1)/b;
+}
+
  /* this function is either called from initUCA or from genUCA before
   * doing canonical closure for the UCA.
   */
-U_CAPI void U_EXPORT2 
-uprv_uca_initImplicitConstants(uint32_t baseByte) 
+
+/**
+ * Set up to generate implicits.
+ * @param minPrimary
+ * @param maxPrimary
+ * @param minTrail final byte
+ * @param maxTrail final byte
+ * @param gap3 the gap we leave for tailoring for 3-byte forms
+ * @param gap4 the gap we leave for tailoring for 4-byte forms
+ */
+static void initImplicitConstants(int minPrimary, int maxPrimary,
+                                    int minTrailIn, int maxTrailIn,
+                                    int gap3, int primaries3count,
+                                    UErrorCode *status) {
+    // some simple parameter checks
+    if (minPrimary < 0 || minPrimary >= maxPrimary || maxPrimary > 0xFF) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    };
+    if (minTrailIn < 0 || minTrailIn >= maxTrailIn || maxTrailIn > 0xFF) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    };
+    if (primaries3count < 1) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    };
+
+    minTrail = minTrailIn;
+    maxTrail = maxTrailIn;
+
+    min3Primary = minPrimary;
+    max4Primary = maxPrimary;
+    // compute constants for use later.
+    // number of values we can use in trailing bytes
+    // leave room for empty values between AND above, e.g. if gap = 2
+    // range 3..7 => +3 -4 -5 -6 -7: so 1 value
+    // range 3..8 => +3 -4 -5 +6 -7 -8: so 2 values
+    // range 3..9 => +3 -4 -5 +6 -7 -8 -9: so 2 values
+    final3Multiplier = gap3 + 1;
+    final3Count = (maxTrail - minTrail + 1) / final3Multiplier;
+    max3Trail = minTrail + (final3Count - 1) * final3Multiplier;
+
+    // medials can use full range
+    medialCount = (maxTrail - minTrail + 1);
+    // find out how many values fit in each form
+    int32_t threeByteCount = medialCount * final3Count;
+    // now determine where the 3/4 boundary is.
+    // we use 3 bytes below the boundary, and 4 above
+    int32_t primariesAvailable = maxPrimary - minPrimary + 1;
+    int32_t primaries4count = primariesAvailable - primaries3count;
+
+
+    int32_t min3ByteCoverage = primaries3count * threeByteCount;
+    min4Primary = minPrimary + primaries3count;
+    min4Boundary = min3ByteCoverage;
+    // Now expand out the multiplier for the 4 bytes, and redo.
+
+    int32_t totalNeeded = UCOL_MAX_INPUT - min4Boundary;
+    int32_t neededPerPrimaryByte = divideAndRoundUp(totalNeeded, primaries4count);
+    //if (DEBUG) System.out.println("neededPerPrimaryByte: " + neededPerPrimaryByte);
+    int32_t neededPerFinalByte = divideAndRoundUp(neededPerPrimaryByte, medialCount * medialCount);
+    //if (DEBUG) System.out.println("neededPerFinalByte: " + neededPerFinalByte);
+    int32_t gap4 = (maxTrail - minTrail - 1) / neededPerFinalByte;
+    //if (DEBUG) System.out.println("expandedGap: " + gap4);
+    if (gap4 < 1) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+    final4Multiplier = gap4 + 1;
+    final4Count = neededPerFinalByte;
+    max4Trail = minTrail + (final4Count - 1) * final4Multiplier;
+    /*
+    if (DEBUG) {
+        System.out.println("final4Count: " + final4Count);
+        for (int counter = 0; counter <= final4Count; ++counter) {
+            int value = minTrail + (1 + counter)*final4Multiplier;
+            System.out.println(counter + "\t" + value + "\t" + Utility.hex(value));
+        }
+    }
+    */
+}
+
+    /**
+     * Supply parameters for generating implicit CEs
+     */
+U_CAPI void U_EXPORT2
+uprv_uca_initImplicitConstants(int32_t, int32_t, UErrorCode *status) {
+    // 13 is the largest 4-byte gap we can use without getting 2 four-byte forms.
+    //initImplicitConstants(minPrimary, maxPrimary, 0x04, 0xFE, 1, 1, status);
+  initImplicitConstants(minImplicitPrimary, maxImplicitPrimary, 0x04, 0xFE, 1, 1, status);
+}
+
+U_CDECL_BEGIN
+static UBool U_CALLCONV
+ucol_cleanup(void)
  {
-  IMPLICIT_BASE_BYTE = baseByte;
-  IMPLICIT_LIMIT_BYTE = IMPLICIT_BASE_BYTE + 4; // leave room for 1 3-byte and 2 4-byte forms
-
-  IMPLICIT_4BYTE_BOUNDARY = IMPLICIT_3BYTE_COUNT * OTHER_COUNT * LAST_COUNT;
-  LAST_MULTIPLIER = OTHER_COUNT / LAST_COUNT;
-  LAST2_MULTIPLIER = OTHER_COUNT / LAST_COUNT2;
-  IMPLICIT_BASE_3BYTE = (IMPLICIT_BASE_BYTE << 24) + 0x030300;
-  IMPLICIT_BASE_4BYTE = ((IMPLICIT_BASE_BYTE + IMPLICIT_3BYTE_COUNT) << 24) + 0x030303;
+    if (UCA_DATA_MEM) {
+        udata_close(UCA_DATA_MEM);
+        UCA_DATA_MEM = NULL;
+    }
+    if (_staticUCA) {
+        ucol_close(_staticUCA);
+        _staticUCA = NULL;
+    }
+    fcdTrieIndex = NULL;
+    return TRUE;
  }
-    
+U_CDECL_END
+
  /* do not close UCA returned by ucol_initUCA! */
  UCollator *
  ucol_initUCA(UErrorCode *status) {
@@ -1075,57 +1160,63 @@ ucol_initUCA(UErrorCode *status) {
          return NULL;
      }
      umtx_lock(NULL);
-    UBool f = (UCA == NULL);
+    UBool f = (_staticUCA == NULL);
      umtx_unlock(NULL);
-    
+
      if(f) {
          UCollator *newUCA = NULL;
          UDataMemory *result = udata_openChoice(NULL, UCA_DATA_TYPE, UCA_DATA_NAME, isAcceptableUCA, NULL, status);
-        
+
          if(U_FAILURE(*status)) {
              if (result) {
                  udata_close(result);
              }
              uprv_free(newUCA);
          }
-        
+
+        // init FCD data
+        if (fcdTrieIndex == NULL) {
+            fcdTrieIndex = unorm_getFCDTrie(status);
+            ucln_i18n_registerCleanup(UCLN_I18N_UCOL, ucol_cleanup);
+        }
+
          if(result != NULL) { /* It looks like sometimes we can fail to find the data file */
-            newUCA = ucol_initCollator((const UCATableHeader *)udata_getMemory(result), newUCA, status);
+            newUCA = ucol_initCollator((const UCATableHeader *)udata_getMemory(result), newUCA, newUCA, status);
              if(U_SUCCESS(*status)){
                  newUCA->rb = NULL;
-                               newUCA->elements = NULL;
-                               newUCA->validLocale = NULL;
-                               newUCA->requestedLocale = NULL;
-                               newUCA->hasRealData = FALSE; // real data lives in .dat file...
+                newUCA->elements = NULL;
+                newUCA->validLocale = NULL;
+                newUCA->requestedLocale = NULL;
+                newUCA->hasRealData = FALSE; // real data lives in .dat file...
                  newUCA->freeImageOnClose = FALSE;
                  umtx_lock(NULL);
-                if(UCA == NULL) {
-                    UCA = newUCA;
+                if(_staticUCA == NULL) {
+                    _staticUCA = newUCA;
                      UCA_DATA_MEM = result;
                      result = NULL;
                      newUCA = NULL;
                  }
                  umtx_unlock(NULL);
-                
+
                  if(newUCA != NULL) {
                      udata_close(result);
                      uprv_free(newUCA);
                  }
                  else {
-                    ucln_i18n_registerCleanup();
+                    ucln_i18n_registerCleanup(UCLN_I18N_UCOL, ucol_cleanup);
                  }
                  // Initalize variables for implicit generation
-                UCAconsts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
-                uprv_uca_initImplicitConstants(UCAconsts->UCA_PRIMARY_IMPLICIT_MIN);
-                UCA->mapping->getFoldingOffset = _getFoldingOffset;
+                const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)_staticUCA->image + _staticUCA->image->UCAConsts);
+                uprv_uca_initImplicitConstants(UCAconsts->UCA_PRIMARY_IMPLICIT_MIN, UCAconsts->UCA_PRIMARY_IMPLICIT_MAX, status);
+                //_staticUCA->mapping.getFoldingOffset = _getFoldingOffset;
              }else{
                  udata_close(result);
                  uprv_free(newUCA);
-                UCA= NULL;
+                _staticUCA= NULL;
              }
          }
      }
-    return UCA;
+    return _staticUCA;
  }
  
  
@@ -1191,7 +1282,7 @@ inline void normalizeIterator(collIterate *collationSource) {
    UBool wasNormalized = FALSE;
    //int32_t iterIndex = collationSource->iterator->getIndex(collationSource->iterator, UITER_CURRENT);
    uint32_t iterIndex = collationSource->iterator->getState(collationSource->iterator);
-  int32_t normLen = unorm_next(collationSource->iterator, collationSource->writableBuffer, 
+  int32_t normLen = unorm_next(collationSource->iterator, collationSource->writableBuffer,
      (int32_t)collationSource->writableBufSize, UNORM_FCD, 0, TRUE, &wasNormalized, &status);
    if(status == U_BUFFER_OVERFLOW_ERROR || normLen == (int32_t)collationSource->writableBufSize) {
      // reallocate and terminate
@@ -1208,11 +1299,11 @@ inline void normalizeIterator(collIterate *collationSource) {
      status = U_ZERO_ERROR;
      //collationSource->iterator->move(collationSource->iterator, iterIndex, UITER_ZERO);
      collationSource->iterator->setState(collationSource->iterator, iterIndex, &status);
-    normLen = unorm_next(collationSource->iterator, collationSource->writableBuffer, 
+    normLen = unorm_next(collationSource->iterator, collationSource->writableBuffer,
      (int32_t)collationSource->writableBufSize, UNORM_FCD, 0, TRUE, &wasNormalized, &status);
    }
    // Terminate the buffer - we already checked that it is big enough
-  collationSource->writableBuffer[normLen] = 0; 
+  collationSource->writableBuffer[normLen] = 0;
    if(collationSource->writableBuffer != collationSource->stackWritableBuffer) {
        collationSource->flags |= UCOL_ITER_ALLOCATED;
    }
@@ -1256,8 +1347,8 @@ inline UBool collIterFCD(collIterate *collationSource) {
      /* trie access */
      fcd = unorm_getFCD16(fcdTrieIndex, c);
      if (fcd != 0) {
-        if (UTF_IS_FIRST_SURROGATE(c)) {
-            if ((endP == NULL || srcP != endP) && UTF_IS_SECOND_SURROGATE(c2=*srcP)) {
+        if (U16_IS_LEAD(c)) {
+            if ((endP == NULL || srcP != endP) && U16_IS_TRAIL(c2=*srcP)) {
                  ++srcP;
                  fcd = unorm_getFCD16FromSurrogatePair(fcdTrieIndex, fcd, c2);
              } else {
@@ -1277,8 +1368,8 @@ inline UBool collIterFCD(collIterate *collationSource) {
                  c = *srcP++;
                  /* trie access */
                  fcd = unorm_getFCD16(fcdTrieIndex, c);
-                if (fcd != 0 && UTF_IS_FIRST_SURROGATE(c)) {
-                    if ((endP == NULL || srcP != endP) && UTF_IS_SECOND_SURROGATE(c2=*srcP)) {
+                if (fcd != 0 && U16_IS_LEAD(c)) {
+                    if ((endP == NULL || srcP != endP) && U16_IS_TRAIL(c2=*srcP)) {
                          ++srcP;
                          fcd = unorm_getFCD16FromSurrogatePair(fcdTrieIndex, fcd, c2);
                      } else {
@@ -1311,6 +1402,9 @@ inline UBool collIterFCD(collIterate *collationSource) {
  /*                                                                          */
  /****************************************************************************/
  
+static uint32_t getImplicit(UChar32 cp, collIterate *collationSource);
+static uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource);
+
  /* there should be a macro version of this function in the header file */
  /* This is the first function that tries to fetch a collation element  */
  /* If it's not succesfull or it encounters a more difficult situation  */
@@ -1458,27 +1552,30 @@ inline uint32_t ucol_IGetNextCE(const UCollator *coll, collIterate *collationSou
        }
        else
        {
-          order = UTRIE_GET32_FROM_LEAD(coll->mapping, ch);
+          order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
            if(order > UCOL_NOT_FOUND) {                                       /* if a CE is special                */
                order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status);    /* and try to get the special CE     */
            }
-          if(order == UCOL_NOT_FOUND) {   /* We couldn't find a good CE in the tailoring */
+          if(order == UCOL_NOT_FOUND && coll->UCA) {   /* We couldn't find a good CE in the tailoring */
              /* if we got here, the codepoint MUST be over 0xFF - so we look directly in the trie */
-            order = UTRIE_GET32_FROM_LEAD(UCA->mapping, ch);
+            order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
  
              if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */
-              order = ucol_prv_getSpecialCE(UCA, ch, order, collationSource, status);
+              order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSource, status);
              }
            }
        }
-    return order; /* return the CE */
+      if(order == UCOL_NOT_FOUND) {
+        order = getImplicit(ch, collationSource);
+      }
+      return order; /* return the CE */
  }
  
  /* ucol_getNextCE, out-of-line version for use from other files.   */
  U_CAPI uint32_t  U_EXPORT2
  ucol_getNextCE(const UCollator *coll, collIterate *collationSource, UErrorCode *status) {
      return ucol_IGetNextCE(coll, collationSource, status);
-    }
+}
  
  
  /**
@@ -1565,9 +1662,9 @@ inline UBool collPrevIterFCD(collIterate *data)
  
      /* Get the trailing combining class of the current character. */
      c = *--src;
-    if (!UTF_IS_SURROGATE(c)) {
+    if (!U16_IS_SURROGATE(c)) {
          fcd = unorm_getFCD16(fcdTrieIndex, c);
-    } else if (UTF_IS_SECOND_SURROGATE(c) && start < src && UTF_IS_FIRST_SURROGATE(c2 = *(src - 1))) {
+    } else if (U16_IS_TRAIL(c) && start < src && U16_IS_LEAD(c2 = *(src - 1))) {
          --src;
          fcd = unorm_getFCD16(fcdTrieIndex, c2);
          if (fcd != 0) {
@@ -1592,9 +1689,9 @@ inline UBool collPrevIterFCD(collIterate *data)
              }
  
              c = *--src;
-            if (!UTF_IS_SURROGATE(c)) {
+            if (!U16_IS_SURROGATE(c)) {
                  fcd = unorm_getFCD16(fcdTrieIndex, c);
-            } else if (UTF_IS_SECOND_SURROGATE(c) && start < src && UTF_IS_FIRST_SURROGATE(c2 = *(src - 1))) {
+            } else if (U16_IS_TRAIL(c) && start < src && U16_IS_LEAD(c2 = *(src - 1))) {
                  --src;
                  fcd = unorm_getFCD16(fcdTrieIndex, c2);
                  if (fcd != 0) {
@@ -1627,7 +1724,7 @@ inline UBool collPrevIterFCD(collIterate *data)
   *  Handles both normal and iterative cases.
   *  No error checking - caller beware!
   */
-inline static 
+inline static
  UChar peekCharacter(collIterate *source, int32_t offset) {
    if(source->pos != NULL) {
      return *(source->pos + offset);
@@ -1662,6 +1759,26 @@ inline UBool isAtStartPrevIterate(collIterate *data) {
              *(data->pos - 1) == 0 && data->fcdPosition == NULL);
  }
  
+static
+inline void goBackOne(collIterate *data) {
+# if 0
+  // somehow, it looks like we need to keep iterator synced up
+  // at all times, as above.
+  if(data->pos) {
+    data->pos--;
+  }
+  if(data->iterator) {
+    data->iterator->previous(data->iterator);
+  }
+#endif
+  if(data->iterator && (data->flags & UCOL_USE_ITERATOR)) {
+    data->iterator->previous(data->iterator);
+  }
+  if(data->pos) {
+    data->pos --;
+  }
+}
+
  /**
  * Inline function that gets a simple CE.
  * So what it does is that it will first check the expansion buffer. If the
@@ -1677,7 +1794,7 @@ static
  inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data,
                                 UErrorCode *status)
  {
-    uint32_t result = UCOL_NULLORDER;
+    uint32_t result = (uint32_t)UCOL_NULLORDER;
      if (data->toReturn > data->CEs) {
          data->toReturn --;
          result = *(data->toReturn);
@@ -1744,19 +1861,19 @@ inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data,
                  data->flags &= ~UCOL_WAS_HIRAGANA;
                }
              }
-            
+
              /*
-            * got a character to determine if there's fcd and/or normalization 
+            * got a character to determine if there's fcd and/or normalization
              * stuff to do.
              * if the current character is not fcd.
              * if current character is at the start of the string
              * Trailing combining class == 0.
              * Note if pos is in the writablebuffer, norm is always 0
              */
-            if (ch < ZERO_CC_LIMIT_ || 
+            if (ch < ZERO_CC_LIMIT_ ||
                // this should propel us out of the loop in the iterator case
                  (data->flags & UCOL_ITER_NORM) == 0 ||
-                (data->fcdPosition != NULL && data->fcdPosition <= data->pos) 
+                (data->fcdPosition != NULL && data->fcdPosition <= data->pos)
                  || data->string == data->pos) {
                  break;
              }
@@ -1794,47 +1911,36 @@ inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data,
          contraction
          */
          if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) {
-            result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status);
-        }
-        else {
-            if (ch <= 0xFF) {
-              result = coll->latinOneMapping[ch];
-              if (result > UCOL_NOT_FOUND) {
-                    result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
+          result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status);
+        } else {
+          if (ch <= 0xFF) {
+            result = coll->latinOneMapping[ch];
+          }
+          else {
+            result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
+          }
+          if (result > UCOL_NOT_FOUND) {
+            result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
+          }
+          if (result == UCOL_NOT_FOUND) { // Not found in master list
+            if (!isAtStartPrevIterate(data) &&
+              ucol_contractionEndCP(ch, data->coll)) {
+                result = UCOL_CONTRACTION;
+            } else {
+              if(coll->UCA) {
+                result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
                }
              }
-            else {
-              // TODO: fix me for THAI - I reference *(data->pos-1)
-                if ((data->flags & UCOL_ITER_INNORMBUF) == 0 &&
-                    /*UCOL_ISTHAIBASECONSONANT(ch) &&*/   // This is from the old specs - we now rearrange unconditionally
-                    data->pos > data->string &&
-                    UCOL_ISTHAIPREVOWEL(peekCharacter(data, -1)))
-                    //UCOL_ISTHAIPREVOWEL(*(data->pos -1)))
-                {
-                    result = UCOL_THAI;
-                }
-                else {
-                    /*result = ucmpe32_get(coll->mapping, ch);*/
-                    result = UTRIE_GET32_FROM_LEAD(coll->mapping, ch);
-                }
-                if (result > UCOL_NOT_FOUND) {
-                    result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
-                }
-                if (result == UCOL_NOT_FOUND) {
-                  if (!isAtStartPrevIterate(data) &&
-                      ucol_contractionEndCP(ch, data->coll)) {
-                      result = UCOL_CONTRACTION;
-                  }
-                  else {
-                        /*result = ucmpe32_get(UCA->mapping, ch);*/
-                        result = UTRIE_GET32_FROM_LEAD(UCA->mapping, ch);
-                  }
-
-                  if (result > UCOL_NOT_FOUND) {
-                    result = ucol_prv_getSpecialPrevCE(UCA, ch, result, data, status);
-                  }
-                }
+            
+            if (result > UCOL_NOT_FOUND) {
+              if(coll->UCA) {
+                result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status);
+              }
              }
+          }
+        }
+        if(result == UCOL_NOT_FOUND) {
+          result = getPrevImplicit(ch, data);
          }
      }
      return result;
@@ -2054,10 +2160,10 @@ inline UChar getNextNormalizedChar(collIterate *data)
      }
      else {
          if (innormbuf) {
-          // inside the normalization buffer, but at the end 
-          // (since we encountered zero). This means, in the 
-          // case we're using char iterator, that we need to 
-          // do another round of normalization. 
+          // inside the normalization buffer, but at the end
+          // (since we encountered zero). This means, in the
+          // case we're using char iterator, that we need to
+          // do another round of normalization.
            //if(data->origFlags & UCOL_USE_ITERATOR) {
              // we need to restore original flags,
              // otherwise, we'll lose them
@@ -2133,30 +2239,11 @@ inline UChar getNextNormalizedChar(collIterate *data)
      }
  
      /* points back to the pos in string */
-    return ch;
-}
-
-static 
-inline void goBackOne(collIterate *data) {
-# if 0
-  // somehow, it looks like we need to keep iterator synced up
-  // at all times, as above.
-  if(data->pos) {
-    data->pos--;
-  }
-  if(data->iterator) {
-    data->iterator->previous(data->iterator);
-  }
-#endif
-  if(data->iterator && (data->flags & UCOL_USE_ITERATOR)) {
-    data->iterator->previous(data->iterator);
-  } 
-  if(data->pos) {
-    data->pos --;
-  }
+    return ch;
  }
  
  
+
  /**
  * Function to copy the buffer into writableBuffer and sets the fcd position to
  * the correct position
@@ -2334,7 +2421,7 @@ inline uint32_t getImplicit(UChar32 cp, collIterate *collationSource) {
    if(isNonChar(cp)) {
      return 0;
    }
-  uint32_t r = getImplicitPrimary(cp);
+  uint32_t r = uprv_uca_getImplicitPrimary(cp);
    *(collationSource->CEpos++) = ((r & 0x0000FFFF)<<16) | 0x000000C0;
    return (r & UCOL_PRIMARYMASK) | 0x00000505; // This was 'order'
  }
@@ -2393,12 +2480,12 @@ inline UChar * insertBufferFront(collIterate *data, UChar *pNull, UChar ch)
  * @param data collation iterator data
  */
  static
-inline void normalizePrevContraction(collIterate *data)
+inline void normalizePrevContraction(collIterate *data, UErrorCode *status)
  {
      UChar      *buffer     = data->writableBuffer;
      uint32_t    buffersize = data->writableBufSize;
      uint32_t    nulltermsize;
-    UErrorCode  status     = U_ZERO_ERROR;
+    UErrorCode  localstatus = U_ZERO_ERROR;
      UChar      *pEnd       = data->pos + 1;         /* End normalize + 1 */
      UChar      *pStart;
      uint32_t    normLen;
@@ -2429,22 +2516,23 @@ inline void normalizePrevContraction(collIterate *data)
      }
  
      normLen = unorm_normalize(pStart, pEnd - pStart, UNORM_NFD, 0, buffer, 0,
-                              &status);
+                              &localstatus);
  
      if (nulltermsize <= normLen) {
          uint32_t  size = buffersize - nulltermsize + normLen + 1;
          UChar    *temp = (UChar *)uprv_malloc(size * sizeof(UChar));
-        if(temp != NULL) {
-          nulltermsize   = normLen + 1;
-          uprv_memcpy(temp + normLen, buffer,
-                      sizeof(UChar) * (buffersize - nulltermsize));
-          freeHeapWritableBuffer(data);
-          data->writableBuffer = temp;
-          data->writableBufSize = size;
+        if (temp == NULL) {
+            *status = U_MEMORY_ALLOCATION_ERROR;
+            return;
          }
+        nulltermsize   = normLen + 1;
+        uprv_memcpy(temp + normLen, buffer,
+                    sizeof(UChar) * (buffersize - nulltermsize));
+        freeHeapWritableBuffer(data);
+        data->writableBuffer = temp;
+        data->writableBufSize = size;
      }
  
-    status = U_ZERO_ERROR;
      /*
      this puts the null termination infront of the normalized string instead
      of the end
@@ -2452,7 +2540,7 @@ inline void normalizePrevContraction(collIterate *data)
      pStartNorm   = buffer + (nulltermsize - normLen);
      *(pStartNorm - 1) = 0;
      unorm_normalize(pStart, pEnd - pStart, UNORM_NFD, 0, pStartNorm, normLen,
-                    &status);
+                    status);
  
      data->pos        = data->writableBuffer + nulltermsize;
      data->origFlags  = data->flags;
@@ -2474,7 +2562,7 @@ inline void normalizePrevContraction(collIterate *data)
  * @return previous character
  */
  static
-inline UChar getPrevNormalizedChar(collIterate *data)
+inline UChar getPrevNormalizedChar(collIterate *data, UErrorCode *status)
  {
      UChar  prevch;
      UChar  ch;
@@ -2536,7 +2624,7 @@ inline UChar getPrevNormalizedChar(collIterate *data)
          UChar *backuppos = data->pos;
          data->pos = start;
          if (collPrevIterFCD(data)) {
-            normalizePrevContraction(data);
+            normalizePrevContraction(data, status);
              return *(data->pos - 1);
          }
          data->pos = backuppos;
@@ -2583,64 +2671,30 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
          UChar trail;
          collIterateState state;
          backupState(source, &state);
-        if (collIter_eos(source) || !(UTF16_IS_TRAIL((trail = getNextNormalizedChar(source))))) {
-          // we chould have stepped one char forward and it might have turned that it 
+        if (collIter_eos(source) || !(U16_IS_TRAIL((trail = getNextNormalizedChar(source))))) {
+          // we chould have stepped one char forward and it might have turned that it
            // was not a trail surrogate. In that case, we have to backup.
            loadState(source, &state, TRUE);
            return 0;
          } else {
            /* TODO: CE contain the data from the previous CE + the mask. It should at least be unmasked */
-          CE = UTRIE_GET32_FROM_OFFSET_TRAIL(coll->mapping, CE&0xFFFFFF, trail);
+          CE = UTRIE_GET32_FROM_OFFSET_TRAIL(&coll->mapping, CE&0xFFFFFF, trail);
            if(CE == UCOL_NOT_FOUND) { // there are tailored surrogates in this block, but not this one.
              // We need to backup
              loadState(source, &state, TRUE);
              return CE;
-          } 
+          }
            // calculate the supplementary code point value, if surrogate was not tailored
            cp = ((((uint32_t)ch)<<10UL)+(trail)-(((uint32_t)0xd800<<10UL)+0xdc00-0x10000));
          }
        }
        break;
-    case THAI_TAG:
-      /* Thai/Lao reordering */
-        if  (((source->flags) & UCOL_ITER_INNORMBUF)      /* Already Swapped     ||                 */
-            || (source->iterator && !source->iterator->hasNext(source->iterator))
-            || (source->pos && source->endp == source->pos)                /* At end of string.  No swap possible || */
-            /*|| UCOL_ISTHAIBASECONSONANT(*(source->pos)) == 0*/)  /* next char not Thai base cons.*/ // This is from the old specs - we now rearrange unconditionally
-        {
-            // Treat Thai as a length one expansion */
-            CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
-            CE = *CEOffset++;
-        }
-        else
-        {
-            // Move the prevowel and the following base Consonant into the normalization buffer
-            //   with their order swapped
-
-            source->writableBuffer[0] = peekCharacter(source, 0);
-            source->writableBuffer[1] = peekCharacter(source, -1);
-            source->writableBuffer[2] = 0;
-
-            if(source->pos) {
-              source->fcdPosition       = source->pos+1;   // Indicate where to continue in main input string
-                                                           //   after exhausting the writableBuffer
-            } else if(source->iterator) {
-              source->iterator->next(source->iterator);
-            }
-        source->pos   = source->writableBuffer;
-            source->origFlags         = source->flags;
-            source->flags            |= UCOL_ITER_INNORMBUF;
-            source->flags            &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR);
-
-        CE = UCOL_IGNORABLE;
-      }
-      break;
      case SPEC_PROC_TAG:
        {
          // Special processing is getting a CE that is preceded by a certain prefix
          // Currently this is only needed for optimizing Japanese length and iteration marks.
-        // When we encouter a special processing tag, we go backwards and try to see if 
-        // we have a match. 
+        // When we encouter a special processing tag, we go backwards and try to see if
+        // we have a match.
          // Contraction tables are used - so the whole process is not unlike contraction.
          // prefix data is stored backwards in the table.
          const UChar *UCharOffset;
@@ -2653,15 +2707,15 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
  
          for(;;) {
          // This loop will run once per source string character, for as long as we
-        //  are matching a potential contraction sequence                  
+        //  are matching a potential contraction sequence
  
-          // First we position ourselves at the begining of contraction sequence 
+          // First we position ourselves at the begining of contraction sequence
            const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE);
            if (collIter_bos(source)) {
              CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
              break;
            }
-          schar = getPrevNormalizedChar(source);
+          schar = getPrevNormalizedChar(source, status);
            goBackOne(source);
  
            while(schar > (tchar = *UCharOffset)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
@@ -2676,42 +2730,6 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
            }
            else
            {
-              // if there is a completely ignorable code point in the middle of 
-              // a prefix, we need to act as if it's not there
-              // assumption: 'real' noncharacters (*fffe, *ffff, fdd0-fdef are set to zero)
-              // lone surrogates cannot be set to zero as it would break other processing
-              uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(coll->mapping, schar);
-              // it's easy for BMP code points
-              if(isZeroCE == 0) {
-                continue;
-              } else if(UTF_IS_TRAIL(schar) || UTF_IS_LEAD(schar)) {
-                // for supplementary code points, we have to check the next one
-                // situations where we are going to ignore
-                // 1. beginning of the string: schar is a lone surrogate
-                // 2. schar is a lone surrogate
-                // 3. schar is a trail surrogate in a valid surrogate sequence
-                //    that is explicitly set to zero.
-                if (!collIter_bos(source)) {
-                  UChar lead;
-                  if(UTF_IS_LEAD(lead = getPrevNormalizedChar(source))) {
-                    isZeroCE = UTRIE_GET32_FROM_LEAD(coll->mapping, lead);
-                    if(getCETag(isZeroCE) == SURROGATE_TAG) {
-                      uint32_t finalCE = UTRIE_GET32_FROM_OFFSET_TRAIL(coll->mapping, isZeroCE&0xFFFFFF, schar);
-                      if(finalCE == 0) {
-                        // this is a real, assigned completely ignorable code point
-                        goBackOne(source);
-                        continue;
-                      }
-                    }
-                  } else {
-                    // lone surrogate, completely ignorable
-                    continue;
-                  }
-                } else {
-                  // lone surrogate at the beggining, completely ignorable
-                  continue;
-                }
-              }
                // Source string char was not in the table.
                //   We have not found the prefix.
                CE = *(coll->contractionCEs +
@@ -2742,7 +2760,7 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
        /* This should handle contractions */
        collIterateState state;
        backupState(source, &state);
-      uint32_t firstCE = UCOL_NOT_FOUND;
+      uint32_t firstCE = *(coll->contractionCEs + ((UChar *)coll->image+getContractOffset(CE) - coll->contractionIndex)); //UCOL_NOT_FOUND;
        const UChar *UCharOffset;
        UChar schar, tchar;
  
@@ -2759,10 +2777,10 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
              // So we'll pick whatever we have at the point...
              if (CE == UCOL_NOT_FOUND) {
                  // back up the source over all the chars we scanned going into this contraction.
-                CE = firstCE;  
+                CE = firstCE;
                  loadState(source, &state, TRUE);
                  if(source->origFlags & UCOL_USE_ITERATOR) {
-                  source->flags = source->origFlags;
+                    source->flags = source->origFlags;
                  }
              }
              break;
@@ -2784,49 +2802,29 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
          }
          else
          {
-            // if there is a completely ignorable code point in the middle of 
-            // contraction, we need to act as if it's not there
-            uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(coll->mapping, schar);
-            // it's easy for BMP code points
-            if(isZeroCE == 0) {
-              continue;
-            } else if(UTF_IS_LEAD(schar)) {
-              if(!collIter_eos(source)) {
-                backupState(source, &state);
-                UChar trail = getNextNormalizedChar(source);
-                if(UTF_IS_TRAIL(trail)) { // do stuff with trail
-                  if(getCETag(isZeroCE) == SURROGATE_TAG) {
-                    uint32_t finalCE = UTRIE_GET32_FROM_OFFSET_TRAIL(coll->mapping, isZeroCE&0xFFFFFF, trail);
-                    if(finalCE == 0) {
-                      continue;
-                    }
-                  }
-                } else {
-                  // broken surrogate sequence, thus completely ignorable
-                  loadState(source, &state, TRUE);
-                  continue;
-                }
-                loadState(source, &state, TRUE);
-              } else { // no  more characters, so broken surrogate pair... 
-                // this contraction will ultimately fail, but not because of us
-                continue; 
-              }
-            } // else if(UTF_IS_LEAD(schar))
-
              // Source string char was not in contraction table.
              //   Unless we have a discontiguous contraction, we have finished
              //   with this contraction.
+            UChar32 miss = schar;
+            if(U16_IS_LEAD(schar)) { // in order to do the proper detection, we
+              // need to see if we're dealing with a supplementary
+              miss = U16_GET_SUPPLEMENTARY(schar, getNextNormalizedChar(source));
+            }
+
              uint8_t sCC;
-            if (schar < 0x300 ||   
+            if (miss < 0x300 ||
                  maxCC == 0 ||
-                (sCC = i_getCombiningClass(schar, coll)) == 0 ||
-                sCC>maxCC || 
+                (sCC = i_getCombiningClass(miss, coll)) == 0 ||
+                sCC>maxCC ||
                  (allSame != 0 && sCC == maxCC) ||
                  collIter_eos(source)) {
-                    //  Contraction can not be discontiguous.  
-                    goBackOne(source);  // back up the source string by one, 
+                    //  Contraction can not be discontiguous.
+                    goBackOne(source);  // back up the source string by one,
                                          //  because  the character we just looked at was
                                          //  not part of the contraction.   */
+                    if(U_IS_SUPPLEMENTARY(miss)) {
+                      goBackOne(source);
+                    }
                      CE = *(coll->contractionCEs +
                          (ContractionStart - coll->contractionIndex));
              } else {
@@ -2838,9 +2836,13 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
                  /* find the next character if schar is not a base character
                      and we are not yet at the end of the string */
                  tempchar = getNextNormalizedChar(source);
+                // probably need another supplementary thingie here
                  goBackOne(source);
                  if (i_getCombiningClass(tempchar, coll) == 0) {
                      goBackOne(source);
+                    if(U_IS_SUPPLEMENTARY(miss)) {
+                      goBackOne(source);
+                    }
                      /* Spit out the last char of the string, wasn't tasty enough */
                      CE = *(coll->contractionCEs +
                          (ContractionStart - coll->contractionIndex));
@@ -2856,12 +2858,9 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
              /*   scanned through what ultimately proved to not be a contraction.       */
            loadState(source, &state, TRUE);
            CE = firstCE;
-          if(source->origFlags & UCOL_USE_ITERATOR) {
-            source->flags = source->origFlags;
-          }
            break;
          }
-        
+
          if(!isContraction(CE)) {
              // The source string char was in the contraction table, and the corresponding
              //   CE is not a contraction CE.  We completed the contraction, break
@@ -2870,7 +2869,7 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
              //   the contraction.
              break;
          }
-        
+
  
          // The source string char was in the contraction table, and the corresponding
          //   CE is IS  a contraction CE.  We will continue looping to check the source
@@ -2878,7 +2877,7 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
          uint32_t tempCE = *(coll->contractionCEs + (ContractionStart - coll->contractionIndex));
          if(tempCE != UCOL_NOT_FOUND) {
              // We have scanned a a section of source string for which there is a
-            //  CE from the contraction table.  Remember the CE and scan position, so 
+            //  CE from the contraction table.  Remember the CE and scan position, so
              //  that we can return to this point if further scanning fails to
              //  match a longer contraction sequence.
              firstCE = tempCE;
@@ -2895,9 +2894,9 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
              //loadState(source, &tempState, TRUE);
  
              // The problem is that for incomplete contractions we have to remember the previous
-            // position. Before, the only thing I needed to do was state.pos--; 
+            // position. Before, the only thing I needed to do was state.pos--;
              // After iterator introduction and especially after introduction of normalizing
-            // iterators, it became much more difficult to decrease the saved state. 
+            // iterators, it became much more difficult to decrease the saved state.
              // I'm not yet sure which of the two methods above is faster.
          }
        } // for(;;)
@@ -2930,200 +2929,215 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
        }
        return CE;
        }
-    case DIGIT_TAG:      
+    case DIGIT_TAG:
        {
-      /* 
-        We do a check to see if we want to collate digits as numbers; if so we generate
+      /*
+         We do a check to see if we want to collate digits as numbers; if so we generate
           a custom collation key. Otherwise we pull out the value stored in the expansion table.
        */
        uint32_t size;
        uint32_t i;    /* general counter */
-      
-      if (coll->numericCollation == UCOL_ON){
-               UChar32 char32 = 0;             
-               
-               uint32_t digIndx = 0; 
-               uint32_t endIndex = 0;          
-               uint32_t trailingZeroIndex = 0;
-
-               uint32_t primWeight = 0;
-               
-               uint32_t digVal = 0;            
-               uint8_t collateVal = 0;
-               
-               UBool nonZeroValReached = false;
-
-               uint8_t *numTempBuf;
-               uint8_t stackNumTempBuf[UCOL_MAX_BUFFER]; // I just need a temporary place to store my generated CEs.
-               uint32_t numTempBufSize = UCOL_MAX_BUFFER;
-               
-               numTempBuf = stackNumTempBuf;
-               /*
-                        We parse the source string until we hit a char that's NOT a digit.
-               Use this u_charDigitValue. This might be slow because we have to 
-               handle surrogates...
-       */
-       
-       if (U16_IS_LEAD(ch)){
-               if (!collIter_eos(source))
-                               char32 = U16_GET_SUPPLEMENTARY(ch, getNextNormalizedChar(source));
-                       else
-                               char32 = ch;
-       }
-               else
-                       char32 = ch;
-               digVal = u_charDigitValue(char32);
-               
-               /* 
-                       We  pad a zero in front of the first element anyways. This takes
-                       care of the (probably) most common case where people are sorting things followed
-                       by a single digit
-               */
-               digIndx++;
-       for(;;){
-       // Make sure we have enough space.
-       if (digIndx >= ((numTempBufSize - 2) * 2) + 1)
-       {
-               numTempBufSize *= 2;
-               if (numTempBuf == stackNumTempBuf){
-                       numTempBuf = (uint8_t *)malloc(sizeof(uint8_t) * numTempBufSize);
-                       memcpy(numTempBuf, stackNumTempBuf, UCOL_MAX_BUFFER);
-               }else
-                       realloc(numTempBuf, numTempBufSize);
-       }
-       
-                       // Skipping over leading zeroes.        
-               if (digVal != 0 || nonZeroValReached){
-                               if (digVal != 0 && !nonZeroValReached)
-                                       nonZeroValReached = true;
-                               
-                               /*
-                                       We parse the digit string into base 100 numbers (this fits into a byte).
-                                       We only add to the buffer in twos, thus if we are parsing an odd character,
-                                       that serves as the 'tens' digit while the if we are parsing an even one, that 
-                                       is the 'ones' digit. We dumped the parsed base 100 value (collateVal) into 
-                                       a buffer. We multiply each collateVal by 2 (to give us room) and add 5 (to avoid
-                                       overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less
-                                       than all the other bytes. 
-                                */
-
-                               if (digIndx % 2 == 1){
-                                       collateVal += (uint8_t)digVal;  
-                                        
-                                       // We don't enter the low-order-digit case unless we've already seen
-                                       // the high order, or for the first digit, which is always non-zero.
-                                       if (collateVal != 0)
-                                               trailingZeroIndex = 0;
-                                               
-                                       numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
-                                       collateVal = 0;
-                               }
-                               else{
-                                       // We drop the collation value into the buffer so if we need to do
-                                       // a "front patch" we don't have to check to see if we're hitting the
-                                       // last element.
-                                       collateVal = (uint8_t)(digVal * 10);
-
-                                       // Check for trailing zeroes.
-                                       if (collateVal == 0)
-                                       {
-                                               if (!trailingZeroIndex)
-                                                       trailingZeroIndex = (digIndx/2) + 2;
-                                       }
-                                       else
-                                               trailingZeroIndex = 0;
-
-                                       numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
-                               }
-                               digIndx++;
-               }
-               
-               // Get next character.
-               if (!collIter_eos(source)){
-                               ch = getNextNormalizedChar(source);
-                               if (U16_IS_LEAD(ch)){
-                                       if (!collIter_eos(source))
-                                               char32 = U16_GET_SUPPLEMENTARY(ch, getNextNormalizedChar(source));
-                               }
-                               else
-                                       char32 = ch;
-                                       
-                               if ((digVal = u_charDigitValue(char32)) == -1){
-                                       // Resetting position to point to the next unprocessed char. We
-                                       // overshot it when doing our test/set for numbers.
-                                       goBackOne(source);
-                                       if (char32 > 0xFFFF) // For surrogates.
-                                               goBackOne(source);
-                                       break;
-                               }
-                       }else
-                               break;
-               }
-               
-               if (nonZeroValReached == false){
-                       digIndx = 2;
-                       numTempBuf[2] = 6;
-               }
-               
-               endIndex = trailingZeroIndex ? trailingZeroIndex : ((digIndx/2) + 2) ;                          
-               if (digIndx % 2 != 0){
-                       /* 
-                               We missed a value. Since digIndx isn't even, stuck too many values into the buffer (this is what
-                               we get for padding the first byte with a zero). "Front-patch" now by pushing all nybbles forward. 
-                               Doing it this way ensures that at least 50% of the time (statistically speaking) we'll only be doing a
-                               single pass and optimizes for strings with single digits. I'm just assuming that's the more common case.
-                       */
-
-                       for(i = 2; i < endIndex; i++){
-                               numTempBuf[i] =         (((((numTempBuf[i] - 6)/2) % 10) * 10) + 
-                                                                       (((numTempBuf[i+1])-6)/2) / 10) * 2 + 6;
-                       }
-                       --digIndx;
-               }
-               
-               // Subtract one off of the last byte. 
-               numTempBuf[endIndex-1] -= 1;                    
-                               
-               /* 
-                       We want to skip over the first two slots in the buffer. The first slot
-                       is reserved for the header byte 0x1B. The second slot is for the 
-                       sign/exponent byte: 0x80 + (decimalPos/2) & 7f.
-               */ 
-               numTempBuf[0] = 0x1B;
-               numTempBuf[1] = (uint8_t)(0x80 + ((digIndx/2) & 0x7F));
-               
-               // Now transfer the collation key to our collIterate struct.
-               // The total size for our collation key is endIndx bumped up to the next largest even value divided by two.
-                 size = ((endIndex+1) & ~1)/2;
-                 CE = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) | //Primary weight
-                               (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Secondary weight
-                               UCOL_BYTE_COMMON; // Tertiary weight.
-                 i = 2; // Reset the index into the buffer.
-                 while(i < endIndex)
-                 {
-                       primWeight = numTempBuf[i++] << 8;
-                       if ( i < endIndex)
-                               primWeight |= numTempBuf[i++];
-                       *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) | UCOL_CONTINUATION_MARKER;
-                 }
-                 
-                 if (numTempBuf != stackNumTempBuf)
-                       free(numTempBuf);
+
+      if (source->coll->numericCollation == UCOL_ON){
+        collIterateState digitState = {0,0,0,0,0,0,0,0};
+        UChar32 char32 = 0;
+
+        uint32_t digIndx = 0;
+        uint32_t endIndex = 0;
+        uint32_t trailingZeroIndex = 0;
+
+        uint32_t primWeight = 0;
+
+        int32_t digVal = 0;
+        uint8_t collateVal = 0;
+
+        UBool nonZeroValReached = FALSE;
+
+        uint8_t *numTempBuf;
+        uint8_t stackNumTempBuf[UCOL_MAX_BUFFER]; // I just need a temporary place to store my generated CEs.
+        uint32_t numTempBufSize = UCOL_MAX_BUFFER;
+
+        numTempBuf = stackNumTempBuf;
+        /*
+             We parse the source string until we hit a char that's NOT a digit.
+            Use this u_charDigitValue. This might be slow because we have to
+            handle surrogates...
+        */
+/*
+        if (U16_IS_LEAD(ch)){
+          if (!collIter_eos(source)) {
+            backupState(source, &digitState);
+            UChar trail = getNextNormalizedChar(source);
+            if(U16_IS_TRAIL(trail)) {
+              char32 = U16_GET_SUPPLEMENTARY(ch, trail);
+            } else {
+              loadState(source, &digitState, TRUE);
+              char32 = ch;
+            }
+          } else {
+            char32 = ch;
+          }
+        } else {
+          char32 = ch;
+        }
+        digVal = u_charDigitValue(char32);
+*/
+        digVal = u_charDigitValue(cp); // if we have arrived here, we have
+        // already processed possible supplementaries that trigered the digit tag -
+        // all supplementaries are marked in the UCA.
+        /*
+            We  pad a zero in front of the first element anyways. This takes
+            care of the (probably) most common case where people are sorting things followed
+            by a single digit
+        */
+        digIndx++;
+        for(;;){
+            // Make sure we have enough space.
+            if (digIndx >= ((numTempBufSize - 2) * 2) + 1)
+            {
+                numTempBufSize *= 2;
+                if (numTempBuf == stackNumTempBuf){
+                    numTempBuf = (uint8_t *)uprv_malloc(sizeof(uint8_t) * numTempBufSize);
+                    uprv_memcpy(numTempBuf, stackNumTempBuf, UCOL_MAX_BUFFER);
+                } else {
+                    uprv_realloc(numTempBuf, numTempBufSize);
+                }
+            }
+
+            // Skipping over leading zeroes.
+            if (digVal != 0) {
+                nonZeroValReached = TRUE;
+            }
+            if (nonZeroValReached) {
+                /*
+                    We parse the digit string into base 100 numbers (this fits into a byte).
+                    We only add to the buffer in twos, thus if we are parsing an odd character,
+                    that serves as the 'tens' digit while the if we are parsing an even one, that
+                    is the 'ones' digit. We dumped the parsed base 100 value (collateVal) into
+                    a buffer. We multiply each collateVal by 2 (to give us room) and add 5 (to avoid
+                    overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less
+                    than all the other bytes.
+                 */
+
+                if (digIndx % 2 == 1){
+                    collateVal += (uint8_t)digVal;
+
+                    // We don't enter the low-order-digit case unless we've already seen
+                    // the high order, or for the first digit, which is always non-zero.
+                    if (collateVal != 0)
+                        trailingZeroIndex = 0;
+
+                    numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
+                    collateVal = 0;
+                }
+                else{
+                    // We drop the collation value into the buffer so if we need to do
+                    // a "front patch" we don't have to check to see if we're hitting the
+                    // last element.
+                    collateVal = (uint8_t)(digVal * 10);
+
+                    // Check for trailing zeroes.
+                    if (collateVal == 0)
+                    {
+                        if (!trailingZeroIndex)
+                            trailingZeroIndex = (digIndx/2) + 2;
+                    }
+                    else
+                        trailingZeroIndex = 0;
+
+                    numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
+                }
+                digIndx++;
+            }
+
+            // Get next character.
+            if (!collIter_eos(source)){
+                ch = getNextNormalizedChar(source);
+                if (U16_IS_LEAD(ch)){
+                  if (!collIter_eos(source)) {
+                    backupState(source, &digitState);
+                    UChar trail = getNextNormalizedChar(source);
+                    if(U16_IS_TRAIL(trail)) {
+                      char32 = U16_GET_SUPPLEMENTARY(ch, trail);
+                    } else {
+                      loadState(source, &digitState, TRUE);
+                      char32 = ch;
+                    }
+                  }
+                } else {
+                  char32 = ch;
+                }
+
+                if ((digVal = u_charDigitValue(char32)) == -1){
+                    // Resetting position to point to the next unprocessed char. We
+                    // overshot it when doing our test/set for numbers.
+                  if (char32 > 0xFFFF) { // For surrogates.
+                    loadState(source, &digitState, TRUE);
+                    //goBackOne(source);
+                  }
+                  goBackOne(source);
+                  break;
+                }
+            } else {
+              break;
+            }
+        }
+
+        if (nonZeroValReached == FALSE){
+            digIndx = 2;
+            numTempBuf[2] = 6;
+        }
+
+        endIndex = trailingZeroIndex ? trailingZeroIndex : ((digIndx/2) + 2) ;
+        if (digIndx % 2 != 0){
+            /*
+                We missed a value. Since digIndx isn't even, stuck too many values into the buffer (this is what
+                we get for padding the first byte with a zero). "Front-patch" now by pushing all nybbles forward.
+                Doing it this way ensures that at least 50% of the time (statistically speaking) we'll only be doing a
+                single pass and optimizes for strings with single digits. I'm just assuming that's the more common case.
+            */
+
+            for(i = 2; i < endIndex; i++){
+                numTempBuf[i] =     (((((numTempBuf[i] - 6)/2) % 10) * 10) +
+                                    (((numTempBuf[i+1])-6)/2) / 10) * 2 + 6;
+            }
+            --digIndx;
+        }
+
+        // Subtract one off of the last byte.
+        numTempBuf[endIndex-1] -= 1;
+
+        /*
+            We want to skip over the first two slots in the buffer. The first slot
+            is reserved for the header byte UCOL_CODAN_PLACEHOLDER. The second slot is for the
+            sign/exponent byte: 0x80 + (decimalPos/2) & 7f.
+        */
+        numTempBuf[0] = UCOL_CODAN_PLACEHOLDER;
+        numTempBuf[1] = (uint8_t)(0x80 + ((digIndx/2) & 0x7F));
+
+        // Now transfer the collation key to our collIterate struct.
+        // The total size for our collation key is endIndx bumped up to the next largest even value divided by two.
+          size = ((endIndex+1) & ~1)/2;
+          CE = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) | //Primary weight
+                (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Secondary weight
+                UCOL_BYTE_COMMON; // Tertiary weight.
+          i = 2; // Reset the index into the buffer.
+          while(i < endIndex)
+          {
+            primWeight = numTempBuf[i++] << 8;
+            if ( i < endIndex)
+                primWeight |= numTempBuf[i++];
+            *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) | UCOL_CONTINUATION_MARKER;
+          }
+
+          if (numTempBuf != stackNumTempBuf)
+            uprv_free(numTempBuf);
+      } else {
+        // no numeric mode, we'll just switch to whatever we stashed and continue
+          CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
+          CE = *CEOffset++;
+          break;
        }
-      else{
-                 CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
-                 size = getExpansionCount(CE);
-                 CE = *CEOffset++;
-                 if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */
-                       for(i = 1; i<size; i++) {
-                         *(source->CEpos++) = *CEOffset++;
-                       }
-                 } else { /* else, we do */
-                       while(*CEOffset != 0) {
-                         *(source->CEpos++) = *CEOffset++;
-                       }
-                 }
-         }
        return CE;
        }
      /* various implicits optimization */
@@ -3160,7 +3174,7 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
          const uint32_t
            SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7;
          //const uint32_t LCount = 19;
-        const uint32_t VCount = 21; 
+        const uint32_t VCount = 21;
          const uint32_t TCount = 28;
          //const uint32_t NCount = VCount * TCount;   // 588
          //const uint32_t SCount = LCount * NCount;   // 11172
@@ -3182,26 +3196,20 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
          // return the first CE, but first put the rest into the expansion buffer
          if (!source->coll->image->jamoSpecial) { // FAST PATH
  
-          /**(source->CEpos++) = ucmpe32_get(UCA->mapping, V);*/
-          /**(source->CEpos++) = UTRIE_GET32_FROM_LEAD(UCA->mapping, V);*/
-          *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(coll->mapping, V);
+          *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, V);
            if (T != TBase) {
-              /**(source->CEpos++) = ucmpe32_get(UCA->mapping, T);*/
-              /**(source->CEpos++) = UTRIE_GET32_FROM_LEAD(UCA->mapping, T);*/
-              *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(coll->mapping, T);
+              *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, T);
            }
  
-          /*return ucmpe32_get(UCA->mapping, L);*/ // return first one
-          /*return UTRIE_GET32_FROM_LEAD(UCA->mapping, L);*/
-          return UTRIE_GET32_FROM_LEAD(coll->mapping, L);
+          return UTRIE_GET32_FROM_LEAD(&coll->mapping, L);
  
          } else { // Jamo is Special
-          // Since Hanguls pass the FCD check, it is 
+          // Since Hanguls pass the FCD check, it is
            // guaranteed that we won't be in
            // the normalization buffer if something like this happens
            // However, if we are using a uchar iterator and normalization
            // is ON, the Hangul that lead us here is going to be in that
-          // normalization buffer. Here we want to restore the uchar 
+          // normalization buffer. Here we want to restore the uchar
            // iterator state and pull out of the normalization buffer
            if(source->iterator != NULL && source->flags & UCOL_ITER_INNORMBUF) {
              source->flags = source->origFlags; // restore the iterator
@@ -3249,7 +3257,7 @@ inline uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource) {
      return 0;
    }
  
-  uint32_t r = getImplicitPrimary(cp);
+  uint32_t r = uprv_uca_getImplicitPrimary(cp);
  
    *(collationSource->CEpos++) = (r & UCOL_PRIMARYMASK) | 0x00000505;
    collationSource->toReturn = collationSource->CEpos;
@@ -3287,56 +3295,12 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
        /* if you have encountered it here, it means that a */
        /* broken sequence was encountered and this is an error */
        return 0;
-    case THAI_TAG:
-      if  ((source->flags & UCOL_ITER_INNORMBUF) || /* Already Swapped || */
-            source->string == source->pos        || /* At start of string.|| */
-            /* previous char not Thai prevowel */
-            /*UCOL_ISTHAIBASECONSONANT(*(source->pos)) == FALSE ||*/ // This is from the old specs - we now rearrange unconditionally
-            UCOL_ISTHAIPREVOWEL(peekCharacter(source, -1)) == FALSE)
-            //UCOL_ISTHAIPREVOWEL(*(source->pos - 1)) == FALSE)
-      {
-          /* Treat Thai as a length one expansion */
-          /* find the offset to expansion table */
-          CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE);
-          CE = *CEOffset ++;
-      }
-      else
-      {
-          /*
-          Move the prevowel and the following base Consonant into the
-          normalization buffer with their order swapped
-          */
-          UChar *tempbuffer = source->writableBuffer +
-                              (source->writableBufSize - 1);
-          *(tempbuffer - 2) = 0;
-          *(tempbuffer - 1) = peekCharacter(source, 0);
-          *(tempbuffer)     = peekCharacter(source, -1);
-
-          /*
-          Indicate where to continue in main input string after exhausting
-          the writableBuffer
-          */
-          if (source->pos - 1 == source->string) {
-              source->fcdPosition = NULL;
-          } else {
-            source->fcdPosition       = source->pos-2;
-          }
-
-          source->pos               = tempbuffer;
-          source->origFlags         = source->flags;
-          source->flags            |= UCOL_ITER_INNORMBUF;
-          source->flags            &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
-
-          //CE = UCOL_IGNORABLE;
-          return(UCOL_IGNORABLE);
-      }
-      break;
      case SPEC_PROC_TAG:
        {
          // Special processing is getting a CE that is preceded by a certain prefix
          // Currently this is only needed for optimizing Japanese length and iteration marks.
-        // When we encouter a special processing tag, we go backwards and try to see if 
-        // we have a match. 
+        // When we encouter a special processing tag, we go backwards and try to see if
+        // we have a match.
          // Contraction tables are used - so the whole process is not unlike contraction.
          // prefix data is stored backwards in the table.
          const UChar *UCharOffset;
@@ -3345,16 +3309,16 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
          backupState(source, &prefixState);
          for(;;) {
          // This loop will run once per source string character, for as long as we
-        //  are matching a potential contraction sequence                  
+        //  are matching a potential contraction sequence
  
-          // First we position ourselves at the begining of contraction sequence 
+          // First we position ourselves at the begining of contraction sequence
            const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE);
  
            if (collIter_bos(source)) {
              CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
              break;
            }
-          schar = getPrevNormalizedChar(source);
+          schar = getPrevNormalizedChar(source, status);
            goBackOne(source);
  
            while(schar > (tchar = *UCharOffset)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
@@ -3368,16 +3332,16 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
                    (UCharOffset - coll->contractionIndex));
            }
            else
-          {             
-              // if there is a completely ignorable code point in the middle of 
+          {
+              // if there is a completely ignorable code point in the middle of
                // a prefix, we need to act as if it's not there
                // assumption: 'real' noncharacters (*fffe, *ffff, fdd0-fdef are set to zero)
                // lone surrogates cannot be set to zero as it would break other processing
-              uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(coll->mapping, schar);
+              uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, schar);
                // it's easy for BMP code points
                if(isZeroCE == 0) {
                  continue;
-              } else if(UTF_IS_TRAIL(schar) || UTF_IS_LEAD(schar)) {
+              } else if(U16_IS_TRAIL(schar) || U16_IS_LEAD(schar)) {
                  // for supplementary code points, we have to check the next one
                  // situations where we are going to ignore
                  // 1. beginning of the string: schar is a lone surrogate
@@ -3386,10 +3350,10 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
                  //    that is explicitly set to zero.
                  if (!collIter_bos(source)) {
                    UChar lead;
-                  if(UTF_IS_LEAD(lead = getPrevNormalizedChar(source))) {
-                    isZeroCE = UTRIE_GET32_FROM_LEAD(coll->mapping, lead);
+                  if(U16_IS_LEAD(lead = getPrevNormalizedChar(source, status))) {
+                    isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, lead);
                      if(getCETag(isZeroCE) == SURROGATE_TAG) {
-                      uint32_t finalCE = UTRIE_GET32_FROM_OFFSET_TRAIL(coll->mapping, isZeroCE&0xFFFFFF, schar);
+                      uint32_t finalCE = UTRIE_GET32_FROM_OFFSET_TRAIL(&coll->mapping, isZeroCE&0xFFFFFF, schar);
                        if(finalCE == 0) {
                          // this is a real, assigned completely ignorable code point
                          goBackOne(source);
@@ -3445,11 +3409,11 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
          *(UCharOffset --) = 0;
          noChars = 0;
          // have to swap thai characters
-        while (ucol_unsafeCP(schar, coll) || UCOL_ISTHAIBASECONSONANT(schar)) {
+        while (ucol_unsafeCP(schar, coll)) {
              *(UCharOffset) = schar;
              noChars++;
              UCharOffset --;
-            schar = getPrevNormalizedChar(source);
+            schar = getPrevNormalizedChar(source, status);
              goBackOne(source);
              // TODO: when we exhaust the contraction buffer,
              // it needs to get reallocated. The problem is
@@ -3467,11 +3431,11 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
                }
                  strbuffer = (UChar *)uprv_malloc(sizeof(UChar) *
                                               (newsize + UCOL_MAX_BUFFER));
-                               /* test for NULL */
-                               if (strbuffer == NULL) {
-                                       *status = U_MEMORY_ALLOCATION_ERROR;
-                                       return UCOL_NO_MORE_CES;
-                               }
+                /* test for NULL */
+                if (strbuffer == NULL) {
+                    *status = U_MEMORY_ALLOCATION_ERROR;
+                    return UCOL_NO_MORE_CES;
+                }
                  UCharOffset = strbuffer + newsize;
                  uprv_memcpy(UCharOffset, buffer,
                                               UCOL_MAX_BUFFER * sizeof(UChar));
@@ -3488,7 +3452,7 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
          *(UCharOffset) = schar;
          noChars++;
  
-        /* a new collIterate is used to simply things, since using the current
+        /* a new collIterate is used to simplify things, since using the current
          collIterate will mean that the forward and backwards iteration will
          share and change the same buffers. we don't want to get into that. */
          collIterate temp;
@@ -3510,7 +3474,7 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
                  if (strbuffer != buffer) {
                      uprv_free(strbuffer);
                  }
-                return UCOL_NULLORDER;
+                return (uint32_t)UCOL_NULLORDER;
              }
              CE = ucol_IGetNextCE(coll, &temp, status);
          }
@@ -3555,230 +3519,231 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
          }
        }
        source->toReturn = source->CEpos - 1;
-      // in case of one element expansion, we 
+      // in case of one element expansion, we
        // want to immediately return CEpos
        if(source->toReturn == source->CEs) {
          source->CEpos = source->CEs;
        }
        return *(source->toReturn);
-     case DIGIT_TAG:      
+     case DIGIT_TAG:
        {
-      /* 
-        We do a check to see if we want to collate digits as numbers; if so we generate
+      /*
+         We do a check to see if we want to collate digits as numbers; if so we generate
           a custom collation key. Otherwise we pull out the value stored in the expansion table.
        */
-      uint32_t size;
+      //uint32_t size;
        uint32_t i;    /* general counter */
-      
-      if (coll->numericCollation == UCOL_ON){
-               UChar32 char32 = 0;             
-               
-               uint32_t digIndx = 0; 
-               uint32_t endIndex = 0;          
-               uint32_t leadingZeroIndex = 0;
-               uint32_t trailingZeroCount = 0;
-
-               uint32_t primWeight = 0;
-               
-               uint32_t digVal = 0;            
-               uint8_t collateVal = 0;
-               
-               UBool nonZeroValReached = false;
-
-               uint8_t *numTempBuf;
-               uint8_t stackNumTempBuf[UCOL_MAX_BUFFER]; // I just need a temporary place to store my generated CEs.
-               uint32_t numTempBufSize = UCOL_MAX_BUFFER;
-               
-               numTempBuf = stackNumTempBuf;
-               /*
-                        We parse the source string until we hit a char that's NOT a digit.
-               Use this u_charDigitValue. This might be slow because we have to 
-               handle surrogates...
-       */
-       
-       if (U16_IS_TRAIL (ch)){
-               if (!collIter_bos(source)){
-                               char32 = U16_GET_SUPPLEMENTARY(getPrevNormalizedChar(source),ch);
-                               goBackOne(source);
-                       }       
-                       else
-                               char32 = ch;
-       }
-               else
-                       char32 = ch;
-               digVal = u_charDigitValue(char32);
-               
-       for(;;){
-                       // Make sure we have enough space.
-                       if (digIndx >= ((numTempBufSize - 2) * 2) + 1)
-                       {
-                               numTempBufSize *= 2;
-                               if (numTempBuf == stackNumTempBuf){
-                                       numTempBuf = (uint8_t *)malloc(sizeof(uint8_t) * numTempBufSize);
-                                       memcpy(numTempBuf, stackNumTempBuf, UCOL_MAX_BUFFER);
-                               }else
-                                       realloc(numTempBuf, numTempBufSize);
-                       }
-       
-                       // Skip over trailing zeroes, and keep a count of them.
-                       if (digVal != 0)
-                               nonZeroValReached = true;
-               if (nonZeroValReached){
-                               /*
-                                       We parse the digit string into base 100 numbers (this fits into a byte).
-                                       We only add to the buffer in twos, thus if we are parsing an odd character,
-                                       that serves as the 'tens' digit while the if we are parsing an even one, that 
-                                       is the 'ones' digit. We dumped the parsed base 100 value (collateVal) into 
-                                       a buffer. We multiply each collateVal by 2 (to give us room) and add 5 (to avoid
-                                       overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less
-                                       than all the other bytes. 
-                                       
-                                       Since we're doing in this reverse we want to put the first digit encountered into the
-                                       ones place and the second digit encountered into the tens place.
-                                */
-                               
-                               if ((digIndx + trailingZeroCount) % 2 == 1){
-                                       // High-order digit case (tens place)
-                                       collateVal += digVal * 10;
-                                       
-                                       // We cannot set leadingZeroIndex unless it has been set for the
-                                       // low-order digit. Therefore, all we can do for the high-order
-                                       // digit is turn it off, never on.
-                                       // The only time we will have a high digit without a low is for
-                                       // the very first non-zero digit, so no zero check is necessary.
-                                       if (collateVal != 0)
-                                               leadingZeroIndex = 0;
-                                                                                       
-                                       numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
-                                       collateVal = 0;
-                               }
-                               else{
-                                       // Low-order digit case (ones place)
-                                       collateVal = digVal;    
-
-                                       // Check for leading zeroes.
-                                       if (collateVal == 0)
-                                       {
-                                               if (!leadingZeroIndex)
-                                                       leadingZeroIndex = (digIndx/2) + 2;
-                                       }
-                                       else
-                                               leadingZeroIndex = 0;
-                                       
-                                       // No need to write to buffer; the case of a last odd digit
-                                       // is handled below.
-                               }
-                       ++digIndx;
-               }
-               else
-                       ++trailingZeroCount;
-               
-               if (!collIter_bos(source)){
-                               ch = getPrevNormalizedChar(source);
-                               goBackOne(source);
-                               if (U16_IS_TRAIL(ch)){
-                                       if (!collIter_bos(source))
-                                       {
-                                               char32 = U16_GET_SUPPLEMENTARY(getPrevNormalizedChar(source),ch);
-                                               goBackOne(source);
-                                       }
-                               }
-                               else
-                                       char32 = ch;
-                                       
-                               if ((digVal = u_charDigitValue(char32)) == -1){
-                                       // Don't need to "reverse" the goBackOne call,
-                                       // as this points to the next position to process..
-                                       if (char32 > 0xFFFF) // For surrogates.
-                                               getNextNormalizedChar(source);
-                                       break;
-                               }
-                       }else
-                               break;
-               }
-
-               if (nonZeroValReached == false){
-                       digIndx = 2;
-                       trailingZeroCount = 0;
-                       numTempBuf[2] = 6;
-               }
-
-               if ((digIndx + trailingZeroCount) % 2 != 0){
-                               numTempBuf[((digIndx)/2) + 2] = collateVal*2 + 6;
-                               digIndx += 1;
-               }
-               
-               endIndex = leadingZeroIndex ? leadingZeroIndex : ((digIndx/2) + 2) ;                            
-               
-               // Subtract one off of the last byte. Really the first byte here, but it's reversed...
-               numTempBuf[2] -= 1;                     
-                               
-               /* 
-                       We want to skip over the first two slots in the buffer. The first slot
-                       is reserved for the header byte 0x1B. The second slot is for the 
-                       sign/exponent byte: 0x80 + (decimalPos/2) & 7f.
-                       The exponent must be adjusted by the number of leading zeroes, and the number of
-                       trailing zeroes.
-               */ 
-               numTempBuf[0] = 0x1B;
-               uint32_t exponent = (digIndx+trailingZeroCount)/2;
-               if (leadingZeroIndex)
-                       exponent -= ((digIndx/2) + 2 - leadingZeroIndex);
-               numTempBuf[1] = 0x80 + (exponent & 0x7F);
-               
-               // Now transfer the collation key to our collIterate struct.
-               // The total size for our collation key is endIndx bumped up to the next largest even value divided by two.
-               //size = ((endIndex+1) & ~1)/2;
-                 *(source->CEpos++) = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) | //Primary weight
-                               (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Secondary weight
-                               UCOL_BYTE_COMMON; // Tertiary weight.
-                 i = endIndex - 1; // Reset the index into the buffer.
-                 while(i >= 2)
-                 {
-                       primWeight = numTempBuf[i--] << 8;
-                       if ( i >= 2)
-                               primWeight |= numTempBuf[i--];
-                       *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) | UCOL_CONTINUATION_MARKER;
-                 }
-                 if (numTempBuf != stackNumTempBuf)
-                       free(numTempBuf);
-                       
-                 source->toReturn = source->CEpos -1;
-                 return *(source->toReturn);
-      }
-      else{
-               /* find the offset to expansion table */
-                 CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
-                 size     = getExpansionCount(CE);
-                 if (size != 0) {
-                       /*
-                       if there are less than 16 elements in expansion, we don't terminate
-                       */
-                       uint32_t count;
-                       for (count = 0; count < size; count++) {
-                         *(source->CEpos ++) = *CEOffset++;
-                       }
-                 }
-                 else {
-                       /* else, we do */
-                       while (*CEOffset != 0) {
-                         *(source->CEpos ++) = *CEOffset ++;
-                       }
-                 }
-                 source->toReturn = source->CEpos - 1;
-          // in case of one element expansion, we 
-          // want to immediately return CEpos
-          if(source->toReturn == source->CEs) {
-            source->CEpos = source->CEs;
+
+      if (source->coll->numericCollation == UCOL_ON){
+        collIterateState state = {0,0,0,0,0,0,0,0};
+        UChar32 char32 = 0;
+
+        uint32_t digIndx = 0;
+        uint32_t endIndex = 0;
+        uint32_t leadingZeroIndex = 0;
+        uint32_t trailingZeroCount = 0;
+
+        uint32_t primWeight = 0;
+
+        int32_t digVal = 0;
+        uint8_t collateVal = 0;
+
+        UBool nonZeroValReached = FALSE;
+
+        uint8_t *numTempBuf;
+        uint8_t stackNumTempBuf[UCOL_MAX_BUFFER]; // I just need a temporary place to store my generated CEs.
+        uint32_t numTempBufSize = UCOL_MAX_BUFFER;
+
+        numTempBuf = stackNumTempBuf;
+        /*
+             We parse the source string until we hit a char that's NOT a digit.
+            Use this u_charDigitValue. This might be slow because we have to
+            handle surrogates...
+        */
+
+        if (U16_IS_TRAIL (ch)){
+            if (!collIter_bos(source)){
+              UChar lead = getPrevNormalizedChar(source, status);
+              if(U16_IS_LEAD(lead)) {
+                char32 = U16_GET_SUPPLEMENTARY(lead,ch);
+                goBackOne(source);
+              } else {
+                char32 = ch;
+              }
+            } else {
+                char32 = ch;
+            }
+        } else {
+            char32 = ch;
+        }
+        digVal = u_charDigitValue(char32);
+
+        for(;;){
+        // Make sure we have enough space.
+        if (digIndx >= ((numTempBufSize - 2) * 2) + 1)
+        {
+            numTempBufSize *= 2;
+            if (numTempBuf == stackNumTempBuf){
+                numTempBuf = (uint8_t *)uprv_malloc(sizeof(uint8_t) * numTempBufSize);
+                uprv_memcpy(numTempBuf, stackNumTempBuf, UCOL_MAX_BUFFER);
+            }else
+                uprv_realloc(numTempBuf, numTempBufSize);
+        }
+
+            // Skip over trailing zeroes, and keep a count of them.
+            if (digVal != 0)
+                    nonZeroValReached = TRUE;
+            if (nonZeroValReached){
+                /*
+                    We parse the digit string into base 100 numbers (this fits into a byte).
+                    We only add to the buffer in twos, thus if we are parsing an odd character,
+                    that serves as the 'tens' digit while the if we are parsing an even one, that
+                    is the 'ones' digit. We dumped the parsed base 100 value (collateVal) into
+                    a buffer. We multiply each collateVal by 2 (to give us room) and add 5 (to avoid
+                    overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less
+                    than all the other bytes.
+
+                    Since we're doing in this reverse we want to put the first digit encountered into the
+                    ones place and the second digit encountered into the tens place.
+                 */
+
+                if ((digIndx + trailingZeroCount) % 2 == 1){
+                    // High-order digit case (tens place)
+                    collateVal += (uint8_t)(digVal * 10);
+
+                    // We cannot set leadingZeroIndex unless it has been set for the
+                    // low-order digit. Therefore, all we can do for the high-order
+                    // digit is turn it off, never on.
+                    // The only time we will have a high digit without a low is for
+                    // the very first non-zero digit, so no zero check is necessary.
+                    if (collateVal != 0)
+                        leadingZeroIndex = 0;
+
+                    numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
+                    collateVal = 0;
+                }
+                else{
+                    // Low-order digit case (ones place)
+                    collateVal = (uint8_t)digVal;
+
+                    // Check for leading zeroes.
+                    if (collateVal == 0)
+                    {
+                        if (!leadingZeroIndex)
+                            leadingZeroIndex = (digIndx/2) + 2;
+                    }
+                    else
+                        leadingZeroIndex = 0;
+
+                    // No need to write to buffer; the case of a last odd digit
+                    // is handled below.
+                }
+                ++digIndx;
+            }
+            else
+                ++trailingZeroCount;
+
+            if (!collIter_bos(source)){
+                ch = getPrevNormalizedChar(source, status);
+                //goBackOne(source);
+                if (U16_IS_TRAIL(ch)){
+                    backupState(source, &state);
+                    if (!collIter_bos(source))
+                    {
+                        goBackOne(source);
+                        UChar lead = getPrevNormalizedChar(source, status);
+                        if(U16_IS_LEAD(lead)) {
+                          char32 = U16_GET_SUPPLEMENTARY(lead,ch);
+                        } else {
+                          loadState(source, &state, FALSE);
+                          char32 = ch;
+                        }
+                    }
+                }
+                else
+                    char32 = ch;
+
+                if ((digVal = u_charDigitValue(char32)) == -1){
+                  if (char32 > 0xFFFF) {// For surrogates.
+                    loadState(source, &state, FALSE);
+                  }
+                    // Don't need to "reverse" the goBackOne call,
+                    // as this points to the next position to process..
+                    //if (char32 > 0xFFFF) // For surrogates.
+                        //getNextNormalizedChar(source);
+                    break;
+                }
+                goBackOne(source);
+            }else
+                break;
+        }
+
+        if (nonZeroValReached == FALSE){
+            digIndx = 2;
+            trailingZeroCount = 0;
+            numTempBuf[2] = 6;
+        }
+
+        if ((digIndx + trailingZeroCount) % 2 != 0){
+                numTempBuf[((digIndx)/2) + 2] = collateVal*2 + 6;
+            digIndx += 1;       // The implicit leading zero
+            }
+        if (trailingZeroCount % 2 != 0){
+            // We had to consume one trailing zero for the low digit
+            // of the least significant byte
+            digIndx += 1;       // The trailing zero not in the exponent
+            trailingZeroCount -= 1;
+        }
+
+        endIndex = leadingZeroIndex ? leadingZeroIndex : ((digIndx/2) + 2) ;
+
+        // Subtract one off of the last byte. Really the first byte here, but it's reversed...
+        numTempBuf[2] -= 1;
+
+        /*
+            We want to skip over the first two slots in the buffer. The first slot
+            is reserved for the header byte UCOL_CODAN_PLACEHOLDER. The second slot is for the
+            sign/exponent byte: 0x80 + (decimalPos/2) & 7f.
+            The exponent must be adjusted by the number of leading zeroes, and the number of
+            trailing zeroes.
+        */
+        numTempBuf[0] = UCOL_CODAN_PLACEHOLDER;
+        uint32_t exponent = (digIndx+trailingZeroCount)/2;
+        if (leadingZeroIndex)
+            exponent -= ((digIndx/2) + 2 - leadingZeroIndex);
+        numTempBuf[1] = (uint8_t)(0x80 + (exponent & 0x7F));
+
+        // Now transfer the collation key to our collIterate struct.
+        // The total size for our collation key is endIndx bumped up to the next largest even value divided by two.
+        //size = ((endIndex+1) & ~1)/2;
+          *(source->CEpos++) = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) | //Primary weight
+                (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Secondary weight
+                UCOL_BYTE_COMMON; // Tertiary weight.
+          i = endIndex - 1; // Reset the index into the buffer.
+          while(i >= 2)
+          {
+            primWeight = numTempBuf[i--] << 8;
+            if ( i >= 2)
+                primWeight |= numTempBuf[i--];
+            *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) | UCOL_CONTINUATION_MARKER;
            }
-                 return *(source->toReturn);
-         }
-      }  
+          if (numTempBuf != stackNumTempBuf)
+            uprv_free(numTempBuf);
+
+          source->toReturn = source->CEpos -1;
+          return *(source->toReturn);
+      }
+      else {
+          CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
+          CE = *(CEOffset++);
+          break;
+      }
+      }
      case HANGUL_SYLLABLE_TAG: /* AC00-D7AF*/
        {
          const uint32_t
            SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7;
-        //const uint32_t LCount = 19; 
+        //const uint32_t LCount = 19;
          const uint32_t VCount = 21;
          const uint32_t TCount = 28;
          //const uint32_t NCount = VCount * TCount;   /* 588 */
@@ -3805,27 +3770,21 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
          */
          if (!source->coll->image->jamoSpecial)
          {
-          /**(source->CEpos ++) = ucmpe32_get(UCA->mapping, L);*/
-          /**(source->CEpos++) = UTRIE_GET32_FROM_LEAD(UCA->mapping, L);*/
-          *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(coll->mapping, L);
-          /**(source->CEpos ++) = ucmpe32_get(UCA->mapping, V);*/
-          /**(source->CEpos++) = UTRIE_GET32_FROM_LEAD(UCA->mapping, V);*/
-          *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(coll->mapping, V);
+          *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, L);
+          *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, V);
            if (T != TBase)
-            /**(source->CEpos ++) = ucmpe32_get(UCA->mapping, T);*/
-            /**(source->CEpos++) = UTRIE_GET32_FROM_LEAD(UCA->mapping, T);*/
-            *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(coll->mapping, T);
+            *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, T);
  
            source->toReturn = source->CEpos - 1;
            return *(source->toReturn);
          } else {
-          // Since Hanguls pass the FCD check, it is 
+          // Since Hanguls pass the FCD check, it is
            // guaranteed that we won't be in
            // the normalization buffer if something like this happens
            // Move Jamos into normalization buffer
            /*
            Move the Jamos into the
-          normalization buffer 
+          normalization buffer
            */
            UChar *tempbuffer = source->writableBuffer +
                                (source->writableBufSize - 1);
@@ -3878,7 +3837,7 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
        prevChar = *prev;
  
        /* Handles Han and Supplementary characters here.*/
-      if (UTF_IS_FIRST_SURROGATE(prevChar)) {
+      if (U16_IS_LEAD(prevChar)) {
          cp = ((((uint32_t)prevChar)<<10UL)+(ch)-(((uint32_t)0xd800<<10UL)+0xdc00-0x10000));
          source->pos = prev;
        } else {
@@ -4076,21 +4035,33 @@ ucol_getSortKey(const    UCollator    *coll,
          uint8_t        *result,
          int32_t        resultLength)
  {
+  UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY);
+  if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
+      int32_t actualSrcLen = sourceLength;
+      if (actualSrcLen==-1 && source!=NULL) {
+          actualSrcLen = u_strlen(source);
+      }
+      UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, source, actualSrcLen);
+  }
+
    UErrorCode status = U_ZERO_ERROR;
+  int32_t keySize   = 0;
  
-  if(source == NULL) {
-    // this is actually an error situation, but we would need to 
-    // have an error code to return it. Until we introduce a new
-    // API, it stays like this
-    return 0;
-  }
-  /* this uses the function pointer that is set in updateinternalstate */
-  /* currently, there are two funcs: */
-  /*ucol_calcSortKey(...);*/
-  /*ucol_calcSortKeySimpleTertiary(...);*/
+  if(source != NULL) {
+      // source == NULL is actually an error situation, but we would need to
+      // have an error code to return it. Until we introduce a new
+      // API, it stays like this
+
+      /* this uses the function pointer that is set in updateinternalstate */
+      /* currently, there are two funcs: */
+      /*ucol_calcSortKey(...);*/
+      /*ucol_calcSortKeySimpleTertiary(...);*/
  
-  int32_t keySize = coll->sortKeyGen(coll, source, sourceLength, &result, resultLength, FALSE, &status);
-  //((UCollator *)coll)->errorCode = status; /*semantically const */
+      keySize = coll->sortKeyGen(coll, source, sourceLength, &result, resultLength, FALSE, &status);
+      //((UCollator *)coll)->errorCode = status; /*semantically const */
+  }
+  UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize);
+  UTRACE_EXIT_STATUS(status);
    return keySize;
  }
  
@@ -4110,6 +4081,7 @@ ucol_getSortKeyWithAllocation(const UCollator *coll,
  /* or if we run out of space while making a sortkey and want to return ASAP                                   */
  int32_t ucol_getSortKeySize(const UCollator *coll, collIterate *s, int32_t currentSize, UColAttributeValue strength, int32_t len) {
      UErrorCode status = U_ZERO_ERROR;
+    //const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)coll->UCA->image + coll->image->UCAConsts);
      uint8_t compareSec   = (uint8_t)((strength >= UCOL_SECONDARY)?0:0xFF);
      uint8_t compareTer   = (uint8_t)((strength >= UCOL_TERTIARY)?0:0xFF);
      uint8_t compareQuad  = (uint8_t)((strength >= UCOL_QUATERNARY)?0:0xFF);
@@ -4125,7 +4097,7 @@ int32_t ucol_getSortKeySize(const UCollator *coll, collIterate *s, int32_t curre
      uint8_t *frenchStartPtr = NULL, *frenchEndPtr = NULL;
  
      uint32_t variableTopValue = coll->variableTopValue;
-    uint8_t UCOL_COMMON_BOT4 = (uint8_t)((coll->variableTopValue>>8)+1); 
+    uint8_t UCOL_COMMON_BOT4 = (uint8_t)((coll->variableTopValue>>8)+1);
      if(doHiragana) {
        UCOL_COMMON_BOT4++;
        /* allocate one more space for hiragana */
@@ -4173,7 +4145,7 @@ int32_t ucol_getSortKeySize(const UCollator *coll, collIterate *s, int32_t curre
  
  
            if(shifted && ((notIsContinuation && order <= variableTopValue && primary1 > 0)
-            || (!notIsContinuation && wasShifted)) 
+            || (!notIsContinuation && wasShifted))
              || (wasShifted && primary1 == 0)) { /* amendment to the UCA says that primary ignorables */
              /* and other ignorables should be removed if following a shifted code point */
              if(primary1 == 0) { /* if we were shifted and we got an ignorable code point */
@@ -4210,7 +4182,8 @@ int32_t ucol_getSortKeySize(const UCollator *coll, collIterate *s, int32_t curre
                        leadPrimary = 0;
                    } else if(primary1<UCOL_BYTE_FIRST_NON_LATIN_PRIMARY ||
                        //(primary1 > (UCOL_RESET_TOP_VALUE>>24) && primary1 < (UCOL_NEXT_TOP_VALUE>>24))) {
-                      (primary1 > (*UCAconsts->UCA_LAST_NON_VARIABLE>>24) && primary1 < (*UCAconsts->UCA_FIRST_IMPLICIT>>24))) {
+                      //(primary1 > (*UCAconsts->UCA_LAST_NON_VARIABLE>>24) && primary1 < (*UCAconsts->UCA_FIRST_IMPLICIT>>24))) {
+                      (primary1 > maxRegularPrimary && primary1 < minImplicitPrimary)) {
                    /* not compressible */
                        leadPrimary = 0;
                        currentSize+=2;
@@ -4271,7 +4244,10 @@ int32_t ucol_getSortKeySize(const UCollator *coll, collIterate *s, int32_t curre
                }
              }
  
-            if(doCase) {
+            if(doCase && (primary1 > 0 || strength >= UCOL_SECONDARY)) {
+                // do the case level if we need to do it. We don't want to calculate
+                // case level for primary ignorables if we have only primary strength and case level
+                // otherwise we would break well formedness of CEs 
                if (caseShift  == 0) {
                  currentSize++;
                  caseShift = UCOL_CASE_SHIFT_START;
@@ -4392,14 +4368,14 @@ inline void addWithIncrement(uint8_t *&primaries, uint8_t *limit, uint32_t &size
    }
  }
  
-// Packs the secondary buffer when processing French locale. Adds the terminator. 
+// Packs the secondary buffer when processing French locale. Adds the terminator.
  static
  inline uint8_t *packFrench(uint8_t *primaries, uint8_t *primEnd, uint8_t *secondaries, uint32_t *secsize, uint8_t *frenchStartPtr, uint8_t *frenchEndPtr) {
    uint8_t secondary;
    int32_t count2 = 0;
    uint32_t i = 0, size = 0;
    // we use i here since the key size already accounts for terminators, so we'll discard the increment
-  addWithIncrement(primaries, primEnd, i, UCOL_LEVELTERMINATOR); 
+  addWithIncrement(primaries, primEnd, i, UCOL_LEVELTERMINATOR);
    /* If there are any unresolved continuation secondaries, reverse them here so that we can reverse the whole secondary thing */
    if(frenchStartPtr != NULL) {
      uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr);
@@ -4450,6 +4426,8 @@ ucol_calcSortKey(const    UCollator    *coll,
          UBool allocateSKBuffer,
          UErrorCode *status)
  {
+    //const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)coll->UCA->image + coll->image->UCAConsts);
+
      uint32_t i = 0; /* general purpose counter */
  
      /* Stack allocated buffers for buffers we use */
@@ -4488,7 +4466,7 @@ ucol_calcSortKey(const    UCollator    *coll,
      UBool  shifted = (coll->alternateHandling == UCOL_SHIFTED);
      //UBool  qShifted = shifted && (compareQuad == 0);
      UBool  doHiragana = (coll->hiraganaQ == UCOL_ON) && (compareQuad == 0);
-    const uint8_t *scriptOrder = coll->scriptOrder;
+    /*const uint8_t *scriptOrder = coll->scriptOrder;*/
  
      uint32_t variableTopValue = coll->variableTopValue;
      // TODO: UCOL_COMMON_BOT4 should be a function of qShifted. If we have no
@@ -4556,7 +4534,10 @@ ucol_calcSortKey(const    UCollator    *coll,
        }
        return keyLen;
      }
-    uint8_t *primarySafeEnd = primaries + resultLength - 2;
+    uint8_t *primarySafeEnd = primaries + resultLength - 1;
+    if(strength > UCOL_PRIMARY) {
+        primarySafeEnd--;
+    }
  
      uint32_t minBufferSize = UCOL_MAX_BUFFER;
  
@@ -4614,11 +4595,9 @@ ucol_calcSortKey(const    UCollator    *coll,
              primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
              primary1 = (uint8_t)(order >> 8);
  
-            if(notIsContinuation) {
-              if(scriptOrder != NULL) {
-                primary1 = scriptOrder[primary1];
-              }
-            }
+            /*if(notIsContinuation && scriptOrder != NULL) {
+              primary1 = scriptOrder[primary1];
+            }*/
  
              if(shifted && ((notIsContinuation && order <= variableTopValue && primary1 > 0)
                || (!notIsContinuation && wasShifted))
@@ -4665,7 +4644,8 @@ ucol_calcSortKey(const    UCollator    *coll,
                          *primaries++ = primary1;
                          leadPrimary = 0;
                      } else if(primary1<UCOL_BYTE_FIRST_NON_LATIN_PRIMARY ||
-                        (primary1 > (*UCAconsts->UCA_LAST_NON_VARIABLE>>24) && primary1 < (*UCAconsts->UCA_FIRST_IMPLICIT>>24))) {
+                        //(primary1 > (*UCAconsts->UCA_LAST_NON_VARIABLE>>24) && primary1 < (*UCAconsts->UCA_FIRST_IMPLICIT>>24))) {
+                       (primary1 > maxRegularPrimary && primary1 < minImplicitPrimary)) {
                      /* not compressible */
                          leadPrimary = 0;
                          *primaries++ = primary1;
@@ -4727,7 +4707,10 @@ ucol_calcSortKey(const    UCollator    *coll,
                  }
                }
  
-              if(doCase) {
+              if(doCase && (primary1 > 0 || strength >= UCOL_SECONDARY)) {
+                // do the case level if we need to do it. We don't want to calculate
+                // case level for primary ignorables if we have only primary strength and case level
+                // otherwise we would break well formedness of CEs 
                  doCaseShift(&cases, caseShift);
                  if(notIsContinuation) {
                    caseBits = (uint8_t)(tertiary & 0xC0);
@@ -4768,9 +4751,10 @@ ucol_calcSortKey(const    UCollator    *coll,
                  if (tertiary == tertiaryCommon && notIsContinuation) {
                    ++count3;
                  } else {
-                  if((tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMON3_NORMAL)
-                    || (tertiary <= tertiaryCommon && tertiaryCommon == UCOL_COMMON3_UPPERFIRST)) {
+                  if(tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMON3_NORMAL) {
                      tertiary += tertiaryAddition;
+                  } else if(tertiary <= tertiaryCommon && tertiaryCommon == UCOL_COMMON3_UPPERFIRST) {
+                    tertiary -= tertiaryAddition;
                    }
                    if (count3 > 0) {
                      if ((tertiary > tertiaryCommon)) {
@@ -4824,7 +4808,10 @@ ucol_calcSortKey(const    UCollator    *coll,
                  primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, 2*sks, status);
                  if(U_SUCCESS(*status)) {
                    *result = primStart;
-                  primarySafeEnd = primStart + resultLength - 2;
+                  primarySafeEnd = primStart + resultLength - 1;
+                  if(strength > UCOL_PRIMARY) {
+                      primarySafeEnd--;
+                  }
                  } else {
                    IInit_collIterate(coll, (UChar *)source, len, &s);
                    if(source == normSource) {
@@ -5004,7 +4991,7 @@ ucol_calcSortKey(const    UCollator    *coll,
                if(U_SUCCESS(*status)) {
                  *result = primStart;
                  *(primaries++) = UCOL_LEVELTERMINATOR;
-                u_writeIdenticalLevelRun(s.string, len, primaries);            
+                u_writeIdenticalLevelRun(s.string, len, primaries);
                }
              } else {
                *status = U_BUFFER_OVERFLOW_ERROR;
@@ -5028,11 +5015,11 @@ ucol_calcSortKey(const    UCollator    *coll,
  
      if(allocateSKBuffer == TRUE) {
        *result = (uint8_t*)uprv_malloc(sortKeySize);
-         /* test for NULL */
-         if (*result == NULL) {
-               *status = U_MEMORY_ALLOCATION_ERROR;
-               return sortKeySize;
-         }
+      /* test for NULL */
+      if (*result == NULL) {
+        *status = U_MEMORY_ALLOCATION_ERROR;
+        return sortKeySize;
+      }
        uprv_memcpy(*result, primStart, sortKeySize);
        if(primStart != prim) {
          uprv_free(primStart);
@@ -5053,6 +5040,8 @@ ucol_calcSortKeySimpleTertiary(const    UCollator    *coll,
          UErrorCode *status)
  {
      U_ALIGN_CODE(16);
+
+    //const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)coll->UCA->image + coll->image->UCAConsts);
      uint32_t i = 0; /* general purpose counter */
  
      /* Stack allocated buffers for buffers we use */
@@ -5190,8 +5179,9 @@ ucol_calcSortKeySimpleTertiary(const    UCollator    *coll,
                        *primaries++ = primary1;
                        leadPrimary = 0;
                    } else if(primary1<UCOL_BYTE_FIRST_NON_LATIN_PRIMARY ||
-                      //(primary1 > (UCOL_RESET_TOP_VALUE>>24) && primary1 < (UCOL_NEXT_TOP_VALUE>>24))) 
-                      (primary1 > (*UCAconsts->UCA_LAST_NON_VARIABLE>>24) && primary1 < (*UCAconsts->UCA_FIRST_IMPLICIT>>24))) {
+                      //(primary1 > (UCOL_RESET_TOP_VALUE>>24) && primary1 < (UCOL_NEXT_TOP_VALUE>>24)))
+                      //(primary1 > (*UCAconsts->UCA_LAST_NON_VARIABLE>>24) && primary1 < (*UCAconsts->UCA_FIRST_IMPLICIT>>24))) {
+                      (primary1 > maxRegularPrimary && primary1 < minImplicitPrimary)) {
                    /* not compressible */
                        leadPrimary = 0;
                        *primaries++ = primary1;
@@ -5392,11 +5382,11 @@ ucol_calcSortKeySimpleTertiary(const    UCollator    *coll,
  
      if(allocateSKBuffer == TRUE) {
        *result = (uint8_t*)uprv_malloc(sortKeySize);
-         /* test for NULL */
-         if (*result == NULL) {
-               *status = U_MEMORY_ALLOCATION_ERROR;
-               return sortKeySize;
-         }
+      /* test for NULL */
+      if (*result == NULL) {
+        *status = U_MEMORY_ALLOCATION_ERROR;
+        return sortKeySize;
+      }
        uprv_memcpy(*result, primStart, sortKeySize);
        if(primStart != prim) {
          uprv_free(primStart);
@@ -5409,7 +5399,7 @@ ucol_calcSortKeySimpleTertiary(const    UCollator    *coll,
  static inline
  UBool isShiftedCE(uint32_t CE, uint32_t LVT, UBool *wasShifted) {
    UBool notIsContinuation = !isContinuation(CE);
-  uint8_t primary1 = (uint8_t)((CE >> 24) & 0xFF); 
+  uint8_t primary1 = (uint8_t)((CE >> 24) & 0xFF);
    if(LVT && ((notIsContinuation && (CE & 0xFFFF0000)<= LVT && primary1 > 0)
      || (!notIsContinuation && *wasShifted))
      || (*wasShifted && primary1 == 0)) { /* amendment to the UCA says that primary ignorables */
@@ -5448,8 +5438,8 @@ enum {
      UCOL_PSK_LIMIT
  };
  
-/** collation state enum. *_SHIFT value is how much to shift right 
- *  to get the state piece to the right. *_MASK value should be 
+/** collation state enum. *_SHIFT value is how much to shift right
+ *  to get the state piece to the right. *_MASK value should be
   *  ANDed with the shifted state. This data is stored in state[1]
   *  field.
   */
@@ -5457,7 +5447,7 @@ enum {
      UCOL_PSK_LEVEL_SHIFT = 0,      /** level identificator. stores an enum value from above */
      UCOL_PSK_LEVEL_MASK = 7,       /** three bits */
      UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT = 3, /** number of bytes of primary or quaternary already written */
-    UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK = 1,  
+    UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK = 1,
      /** can be only 0 or 1, since we get up to two bytes from primary or quaternary
       *  This field is also used to denote that the French secondary level is finished
       */
@@ -5466,37 +5456,40 @@ enum {
      UCOL_PSK_USED_FRENCH_SHIFT = 5,/** how many French bytes have we already written */
      UCOL_PSK_USED_FRENCH_MASK = 3, /** up to 4 bytes. See comment just below */
      /** When we do French we need to reverse secondary values. However, continuations
-     *  need to stay the same. So if you had abc1c2c3de, you need to have edc1c2c3ba 
+     *  need to stay the same. So if you had abc1c2c3de, you need to have edc1c2c3ba
       */
-    UCOL_PSK_USED_ELEMENTS_SHIFT = 7,
-    UCOL_PSK_USED_ELEMENTS_MASK = 0x3FF,
-    UCOL_PSK_ITER_SKIP_SHIFT = 17,
-    UCOL_PSK_ITER_SKIP_MASK = 0x7FFF
+    UCOL_PSK_BOCSU_BYTES_SHIFT = 7,
+    UCOL_PSK_BOCSU_BYTES_MASK = 3,
+    UCOL_PSK_CONSUMED_CES_SHIFT = 9,
+    UCOL_PSK_CONSUMED_CES_MASK = 0x7FFFF
  };
  
+// macro calculating the number of expansion CEs available
+#define uprv_numAvailableExpCEs(s) (s).CEpos - (s).toReturn
  
-/** main sortkey part procedure. On the first call, 
- *  you should pass in a collator, an iterator, empty state 
+
+/** main sortkey part procedure. On the first call,
+ *  you should pass in a collator, an iterator, empty state
   *  state[0] == state[1] == 0, a buffer to hold results
   *  number of bytes you need and an error code pointer.
   *  Make sure your buffer is big enough to hold the wanted
- *  number of sortkey bytes. I don't check. 
- *  The only meaningful status you can get back is 
- *  U_BUFFER_OVERFLOW_ERROR, which basically means that you 
+ *  number of sortkey bytes. I don't check.
+ *  The only meaningful status you can get back is
+ *  U_BUFFER_OVERFLOW_ERROR, which basically means that you
   *  have been dealt a raw deal and that you probably won't
   *  be able to use partial sortkey generation for this
   *  particular combination of string and collator. This
   *  is highly unlikely, but you should still check the error code.
- *  Any other status means that you're not in a sane situation 
- *  anymore. After the first call, preserve state values and 
+ *  Any other status means that you're not in a sane situation
+ *  anymore. After the first call, preserve state values and
   *  use them on subsequent calls to obtain more bytes of a sortkey.
   *  Use until the number of bytes written is smaller than the requested
   *  number of bytes. Generated sortkey is not compatible with the
   *  one generated by ucol_getSortKey, as we don't do any compression.
   *  However, levels are still terminated by a 1 (one) and the sortkey
   *  is terminated by a 0 (zero). Identical level is the same as in the
- *  regular sortkey - internal bocu-1 implementation is used. 
- *  For curious, although you cannot do much about this, here is 
+ *  regular sortkey - internal bocu-1 implementation is used.
+ *  For curious, although you cannot do much about this, here is
   *  the structure of state words.
   *  state[0] - iterator state. Depends on the iterator implementation,
   *             but allows the iterator to continue where it stopped in
@@ -5511,16 +5504,12 @@ enum {
   *   4       - was shifted. Whether the previous iteration finished in the
   *             shifted state.
   *   5, 6    - French continuation bytes written. See the comment in the enum
- *   7..16   - Used elements. Number of CEs that were already used from the 
- *             expansion buffer or number of bytes from a bocu sequence on 
+ *   7,8     - Bocsu bytes used. Number of bytes from a bocu sequence on
   *             the identical level.
- *  17..31   - iterator skip. Number of move operations iterator needs to 
- *             skip from the current state in order to continue. This is used
- *             only if normalization is turned on, since the normalizing iterator
- *             can return undefined state, which means that it's in the middle 
- *             of normalizing sequence.
+ *   9..31   - CEs consumed. Number of getCE or next32 operations performed 
+ *             since thes last successful update of the iterator state.
   */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  ucol_nextSortKeyPart(const UCollator *coll,
                       UCharIterator *iter,
                       uint32_t state[2],
@@ -5530,6 +5519,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
      if(status==NULL || U_FAILURE(*status)) {
          return 0;
      }
+    UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART);
      if( coll==NULL || iter==NULL ||
          state==NULL ||
          count<0 || (count>0 && dest==NULL)
@@ -5537,12 +5527,14 @@ ucol_nextSortKeyPart(const UCollator *coll,
          *status=U_ILLEGAL_ARGUMENT_ERROR;
      }
  
+    UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d",
+                  coll, iter, state[0], state[1], dest, count);
  
      if(count==0) {
          /* nothing to do */
+        UTRACE_EXIT_VALUE(0);
          return 0;
      }
-
      /** Setting up situation according to the state we got from the previous iteration */
      // The state of the iterator from the previous invocation
      uint32_t iterState = state[0];
@@ -5552,19 +5544,19 @@ ucol_nextSortKeyPart(const UCollator *coll,
      int32_t level= (state[1] >> UCOL_PSK_LEVEL_SHIFT) & UCOL_PSK_LEVEL_MASK;
      // Have we written only one byte from a two byte primary in the previous iteration?
      // Also on secondary level - have we finished with the French secondary?
-    int32_t byteCountOrFrenchDone = (state[1] >> UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK; 
+    int32_t byteCountOrFrenchDone = (state[1] >> UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK;
      // number of bytes in the continuation buffer for French
      int32_t usedFrench = (state[1] >> UCOL_PSK_USED_FRENCH_SHIFT) & UCOL_PSK_USED_FRENCH_MASK;
-    // Skip the CEs that we got from an extraction
-    // and delivered in the previous call
-    int32_t usedElements = (state[1] >> UCOL_PSK_USED_ELEMENTS_SHIFT) & UCOL_PSK_USED_ELEMENTS_MASK;
-    // Number of times to skip because the iterator returned
-    // UITER_NO_STATE when it was stopped in the last iteration, so we had to save the 
-    // last valid state.
-    int32_t iterSkips = (state[1] >> UCOL_PSK_ITER_SKIP_SHIFT) & UCOL_PSK_ITER_SKIP_MASK;
+    // Number of bytes already written from a bocsu sequence. Since
+    // the longes bocsu sequence is 4 long, this can be up to 3. 
+    int32_t bocsuBytesUsed = (state[1] >> UCOL_PSK_BOCSU_BYTES_SHIFT) & UCOL_PSK_BOCSU_BYTES_MASK;
+    // Number of elements that need to be consumed in this iteration because
+    // the iterator returned UITER_NO_STATE at the end of the last iteration, 
+    // so we had to save the last valid state.
+    int32_t cces = (state[1] >> UCOL_PSK_CONSUMED_CES_SHIFT) & UCOL_PSK_CONSUMED_CES_MASK;
  
      /** values that depend on the collator attributes */
-    // strength of the collator. 
+    // strength of the collator.
      int32_t strength = ucol_getAttribute(coll, UCOL_STRENGTH, status);
      // maximal level of the partial sortkey. Need to take whether case level is done
      int32_t maxLevel = 0;
@@ -5584,7 +5576,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
          }
      }
      // value for the quaternary level if Hiragana is encountered. Used for JIS X 4061 collation
-    uint8_t UCOL_HIRAGANA_QUAD = 
+    uint8_t UCOL_HIRAGANA_QUAD =
        (ucol_getAttribute(coll, UCOL_HIRAGANA_QUATERNARY_MODE, status) == UCOL_ON)?0xFE:0xFF;
      // Boundary value that decides whether a CE is shifted or not
      uint32_t LVT = (coll->alternateHandling == UCOL_SHIFTED)?(coll->variableTopValue<<16):0;
@@ -5600,7 +5592,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
      s.iterator = iter;
      s.flags |= UCOL_USE_ITERATOR;
      // This variable tells us whether we have produced some other levels in this iteration
-    // before we moved to the identical level. In that case, we need to switch the 
+    // before we moved to the identical level. In that case, we need to switch the
      // type of the iterator.
      UBool doingIdenticalFromStart = FALSE;
      // Normalizing iterator
@@ -5616,15 +5608,17 @@ ucol_nextSortKeyPart(const UCollator *coll,
        s.iterator = unorm_setIter(normIter, iter, UNORM_FCD, status);
        s.flags &= ~UCOL_ITER_NORM;
        if(U_FAILURE(*status)) {
+        UTRACE_EXIT_STATUS(*status);
          return 0;
        }
      } else if(level == UCOL_PSK_IDENTICAL) {
-      // for identical level, we need a NFD iterator. We need to instantiate it here, since we 
+      // for identical level, we need a NFD iterator. We need to instantiate it here, since we
        // will be updating the state - and this cannot be done on an ordinary iterator.
        normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);
        s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);
        s.flags &= ~UCOL_ITER_NORM;
        if(U_FAILURE(*status)) {
+        UTRACE_EXIT_STATUS(*status);
          return 0;
        }
        doingIdenticalFromStart = TRUE;
@@ -5649,41 +5643,27 @@ ucol_nextSortKeyPart(const UCollator *coll,
          /* reset to previous state */
        s.iterator->setState(s.iterator, iterState, status);
        if(U_FAILURE(*status)) {
+          UTRACE_EXIT_STATUS(*status);
            return 0;
        }
      }
  
-    // Then, we may have to move more, if the normalizing iterator
-    // was going through a normalizing sequence.
-    if(iterSkips) {
-      // if we are on secondary level AND we do French, we need to go backward instead of forward
-      if(level == UCOL_PSK_SECONDARY && doingFrench) {
-        s.iterator->move(s.iterator, -iterSkips, UITER_CURRENT);
-      } else {
-        s.iterator->move(s.iterator, iterSkips, UITER_CURRENT);
-      }
-    }
  
  
-    // Number of expansion CEs that were already consumed in the
-    // previous iteration for the last code point processed. We
-    // want to clean out the expansion buffer, so that we can 
-    // get correct CEs. This value is persistent over iterations,
-    // since we can have several iterations on the one expansion
-    // buffer.
-    int32_t consumedExpansionCEs = usedElements;
-    // Number of bytes already writted from a bocsu sequence. Since
-    // the longes bocsu sequence is 4 long, this can be up to 3. It
-    // shares the state field with consumedExpansionCEs value, since
-    // they cannot simultanously appear on the same level
-    int32_t bocsuBytesUsed = 0;
-    // Clean out the expansion buffer unless we are on 
-    // identical level. In that case we use this field
-    // to store the number of bytes already written
-    // from the previous bocsu sequence.
-    if(level < UCOL_PSK_IDENTICAL && usedElements != 0) {
-      while(usedElements-->0) {
-        // If we're doing French and we are on the secondary level, 
+    // This variable tells us whether we can attempt to update the state
+    // of iterator. Situations where we don't want to update iterator state
+    // are the existence of expansion CEs that are not yet processed, and
+    // finishing the case level without enough space in the buffer to insert
+    // a level terminator.
+    UBool canUpdateState = TRUE;
+
+    // Consume all the CEs that were consumed at the end of the previous
+    // iteration without updating the iterator state. On identical level,
+    // consume the code points. 
+    int32_t counter = cces;
+    if(level < UCOL_PSK_IDENTICAL) {
+      while(counter-->0) {
+        // If we're doing French and we are on the secondary level,
          // we go backwards.
          if(level == UCOL_PSK_SECONDARY && doingFrench) {
            CE = ucol_IGetPrevCE(coll, &s, status);
@@ -5693,28 +5673,26 @@ ucol_nextSortKeyPart(const UCollator *coll,
          if(CE==UCOL_NO_MORE_CES) {
            /* should not happen */
            *status=U_INTERNAL_PROGRAM_ERROR;
+          UTRACE_EXIT_STATUS(*status);
            return 0;
          }
+        if(uprv_numAvailableExpCEs(s)) {
+          canUpdateState = FALSE;
+        }
        }
      } else {
-      bocsuBytesUsed = usedElements;
+      while(counter-->0) {
+        uiter_next32(s.iterator);
+      }
      }
  
-    // This variable prevents the adjusting of iterator
-    // skip variable when we are the first time on a 
-    // level. I hope there is a better way to do it, but
-    // I could not think of it.
-    UBool firstTimeOnLevel = TRUE;
      // French secondary needs to know whether the iterator state of zero came from previous level OR
      // from a new invocation...
      UBool wasDoingPrimary = FALSE;
-    // Case level is kind of goofy. This variable tells us that 
-    // we are still not done with the case level.
-    UBool dontAdvanceIteratorBecauseWeNeedALevelTerminator = FALSE; 
      // destination buffer byte counter. When this guy
      // gets to count, we're done with the iteration
-    int32_t i = 0; 
-    // used to count the zero bytes written after we 
+    int32_t i = 0;
+    // used to count the zero bytes written after we
      // have finished with the sort key
      int32_t j = 0;
  
@@ -5726,7 +5704,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
      // way to exit is a goto saveState, which happens when we have filled
      // out our buffer.
      switch(level) {
-    case UCOL_PSK_PRIMARY:          
+    case UCOL_PSK_PRIMARY:
        wasDoingPrimary = TRUE;
        for(;;) {
            if(i==count) {
@@ -5735,19 +5713,15 @@ ucol_nextSortKeyPart(const UCollator *coll,
            // We should save the state only if we
            // are sure that we are done with the
            // previous iterator state
-          if(consumedExpansionCEs == 0 && byteCountOrFrenchDone == 0) {
+          if(canUpdateState && byteCountOrFrenchDone == 0) {
              newState = s.iterator->getState(s.iterator);
-            if(newState != UITER_NO_STATE) {
+            if(newState != UITER_NO_STATE) {         
                iterState = newState;
-              iterSkips = 0;
-            } else {
-              if(!firstTimeOnLevel && !byteCountOrFrenchDone) {
-                iterSkips++;
-              }
+              cces = 0;
              }
            }
-          firstTimeOnLevel = FALSE;
            CE = ucol_IGetNextCE(coll, &s, status);
+          cces++;
            if(CE==UCOL_NO_MORE_CES) {
                // Add the level separator
                terminatePSKLevel(level, maxLevel, i, dest);
@@ -5755,6 +5729,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
                // Restart the iteration an move to the
                // second level
                s.iterator->move(s.iterator, 0, UITER_START);
+              cces = 0;
                level = UCOL_PSK_SECONDARY;
                break;
            }
@@ -5770,24 +5745,18 @@ ucol_nextSortKeyPart(const UCollator *coll,
                if((CE &=0xff)!=0) {
                    if(i==count) {
                        /* overflow */
-                      byteCountOrFrenchDone=1;
+                      byteCountOrFrenchDone = 1;
+                      cces--;
                        goto saveState;
                    }
                    dest[i++]=(uint8_t)CE;
                }
              }
            }
-          if(s.CEpos - s.toReturn || (s.pos && *s.pos != 0)) { 
-            // s.pos != NULL means there is a normalization buffer in effect
-            // in iterative case, this means that we are doing Thai (maybe discontiguos)
-            consumedExpansionCEs++;
+          if(uprv_numAvailableExpCEs(s)) {
+            canUpdateState = FALSE;
            } else {
-            consumedExpansionCEs = 0;
-          }
-          if(s.pos && *s.pos == 0) { 
-            // maybe it is the end of Thai - we have to have
-            // an extra skip
-            iterSkips++;
+            canUpdateState = TRUE;
            }
        }
        /* fall through to next level */
@@ -5801,26 +5770,23 @@ ucol_nextSortKeyPart(const UCollator *coll,
              // We should save the state only if we
              // are sure that we are done with the
              // previous iterator state
-            if(consumedExpansionCEs == 0) {
+            if(canUpdateState) {
                newState = s.iterator->getState(s.iterator);
                if(newState != UITER_NO_STATE) {
                  iterState = newState;
-                iterSkips = 0;
-              } else {
-                if(!firstTimeOnLevel) {
-                  iterSkips++;
-                }
+                cces = 0;
                }
              }
-            firstTimeOnLevel = FALSE;
              CE = ucol_IGetNextCE(coll, &s, status);
+            cces++;
              if(CE==UCOL_NO_MORE_CES) {
                  // Add the level separator
                  terminatePSKLevel(level, maxLevel, i, dest);
-                byteCountOrFrenchDone=0;
+                byteCountOrFrenchDone = 0;
                  // Restart the iteration an move to the
                  // second level
-                s.iterator->move(s.iterator, 0, UITER_START);               
+                s.iterator->move(s.iterator, 0, UITER_START);
+                cces = 0;
                  level = UCOL_PSK_CASE;
                  break;
              }
@@ -5830,55 +5796,49 @@ ucol_nextSortKeyPart(const UCollator *coll,
                  dest[i++]=(uint8_t)CE;
                }
              }
-            if(s.CEpos - s.toReturn || (s.pos && *s.pos != 0)) {
-              consumedExpansionCEs++;
+            if(uprv_numAvailableExpCEs(s)) {
+              canUpdateState = FALSE;
              } else {
-              consumedExpansionCEs = 0;
-            }
-            if(s.pos && *s.pos == 0) { 
-              iterSkips++;
+              canUpdateState = TRUE;
              }
            }
          } else { // French secondary processing
            uint8_t frenchBuff[UCOL_MAX_BUFFER];
            int32_t frenchIndex = 0;
            // Here we are going backwards.
-          // If the iterator is at the beggining, it should be 
-          // moved to end. 
+          // If the iterator is at the beggining, it should be
+          // moved to end.
            if(wasDoingPrimary) {
              s.iterator->move(s.iterator, 0, UITER_LIMIT);
+            cces = 0;
            }
            for(;;) {
              if(i == count) {
                goto saveState;
              }
-            if(consumedExpansionCEs == 0) {
+            if(canUpdateState) {
                newState = s.iterator->getState(s.iterator);
                if(newState != UITER_NO_STATE) {
                  iterState = newState;
-                iterSkips = 0;
-              } else { 
-                if(!firstTimeOnLevel) {
-                  iterSkips++;
-                }
-              }
+                cces = 0;
+              } 
              }
-            firstTimeOnLevel = FALSE;
              CE = ucol_IGetPrevCE(coll, &s, status);
+            cces++;
              if(CE==UCOL_NO_MORE_CES) {
                  // Add the level separator
                  terminatePSKLevel(level, maxLevel, i, dest);
-                byteCountOrFrenchDone=0;
+                byteCountOrFrenchDone = 0;
                  // Restart the iteration an move to the next level
                  s.iterator->move(s.iterator, 0, UITER_START);
                  level = UCOL_PSK_CASE;
                  break;
              }
-            if(isContinuation(CE)) { // if it's a continuation, we want to save it and 
+            if(isContinuation(CE)) { // if it's a continuation, we want to save it and
                // reverse when we get a first non-continuation CE.
                CE >>= 8;
                frenchBuff[frenchIndex++] = (uint8_t)CE;
-            } else if(!isShiftedCE(CE, LVT, &wasShifted)) { 
+            } else if(!isShiftedCE(CE, LVT, &wasShifted)) {
                CE >>= 8; /* get secondary */
                if(!frenchIndex) {
                  if(CE != 0) {
@@ -5894,13 +5854,10 @@ ucol_nextSortKeyPart(const UCollator *coll,
                  }
                }
              }
-            if(s.CEpos - s.toReturn || (s.pos && *s.pos != 0)) {
-              consumedExpansionCEs++;
+            if(uprv_numAvailableExpCEs(s)) {
+              canUpdateState = FALSE;
              } else {
-              consumedExpansionCEs = 0;
-            }
-            if(s.pos && *s.pos == 0) {
-              iterSkips++;
+              canUpdateState = TRUE;
              }
            }
          }
@@ -5921,29 +5878,25 @@ ucol_nextSortKeyPart(const UCollator *coll,
            // We should save the state only if we
            // are sure that we are done with the
            // previous iterator state
-          if(consumedExpansionCEs == 0) {
+          if(canUpdateState) {
              newState = s.iterator->getState(s.iterator);
              if(newState != UITER_NO_STATE) {
                iterState = newState;
-              iterSkips = 0;
-            } else {
-              if(!firstTimeOnLevel) {
-                iterSkips++;
-              }
+              cces = 0;
              }
            }
-          firstTimeOnLevel = FALSE;
            CE = ucol_IGetNextCE(coll, &s, status);
+          cces++;
            if(CE==UCOL_NO_MORE_CES) {
              // On the case level we might have an unfinished
              // case byte. Add one if it's started.
              if(caseShift != UCOL_CASE_SHIFT_START) {
                dest[i++] = caseByte;
              }
-            // This is kind of tricky - situation where
-            // we need to keep the iterator in the old 
-            // state, but don't need to bring anything
-            // to the next invocation
+            cces = 0;
+            // We have finished processing CEs on this level.
+            // However, we don't know if we have enough space
+            // to add a case level terminator.
              if(i < count) {
                // Add the level separator
                terminatePSKLevel(level, maxLevel, i, dest);
@@ -5952,16 +5905,19 @@ ucol_nextSortKeyPart(const UCollator *coll,
                s.iterator->move(s.iterator, 0, UITER_START);
                level = UCOL_PSK_TERTIARY;
              } else {
-              dontAdvanceIteratorBecauseWeNeedALevelTerminator = TRUE;
+              canUpdateState = FALSE;
              }
              break;
            }
  
            if(!isShiftedCE(CE, LVT, &wasShifted)) {
-            if(!isContinuation(CE)) {
+            if(!isContinuation(CE) && ((CE & UCOL_PRIMARYMASK) != 0 || strength > UCOL_PRIMARY)) {
+                // do the case level if we need to do it. We don't want to calculate
+                // case level for primary ignorables if we have only primary strength and case level
+                // otherwise we would break well formedness of CEs 
                CE = (uint8_t)(CE & UCOL_BYTE_SIZE_MASK);
                caseBits = (uint8_t)(CE & 0xC0);
-              // this copies the case level logic from the 
+              // this copies the case level logic from the
                // sort key generation code
                if(CE != 0) {
                  if(coll->caseFirst == UCOL_UPPER_FIRST) {
@@ -5996,13 +5952,10 @@ ucol_nextSortKeyPart(const UCollator *coll,
              }
            }
            // Not sure this is correct for the case level - revisit
-          if(s.CEpos - s.toReturn || (s.pos && *s.pos != 0)) {
-            consumedExpansionCEs++;
+          if(uprv_numAvailableExpCEs(s)) {
+            canUpdateState = FALSE;
            } else {
-            consumedExpansionCEs = 0;
-          }
-          if(s.pos && *s.pos == 0) {
-            iterSkips++;
+            canUpdateState = TRUE;
            }
          }
        } else {
@@ -6018,26 +5971,23 @@ ucol_nextSortKeyPart(const UCollator *coll,
            // We should save the state only if we
            // are sure that we are done with the
            // previous iterator state
-          if(consumedExpansionCEs == 0) {
+          if(canUpdateState) {
              newState = s.iterator->getState(s.iterator);
              if(newState != UITER_NO_STATE) {
                iterState = newState;
-              iterSkips = 0;
-            } else {
-              if(!firstTimeOnLevel) {
-                iterSkips++;
-              }
+              cces = 0;
              }
            }
-          firstTimeOnLevel = FALSE;
            CE = ucol_IGetNextCE(coll, &s, status);
+          cces++;
            if(CE==UCOL_NO_MORE_CES) {
                // Add the level separator
                terminatePSKLevel(level, maxLevel, i, dest);
-              byteCountOrFrenchDone=0;
+              byteCountOrFrenchDone = 0;
                // Restart the iteration an move to the
                // second level
                s.iterator->move(s.iterator, 0, UITER_START);
+              cces = 0;
                level = UCOL_PSK_QUATERNARY;
                break;
            }
@@ -6056,13 +6006,10 @@ ucol_nextSortKeyPart(const UCollator *coll,
                dest[i++]=(uint8_t)CE;
              }
            }
-          if(s.CEpos - s.toReturn || (s.pos && *s.pos != 0)) {
-            consumedExpansionCEs++;
+          if(uprv_numAvailableExpCEs(s)) {
+            canUpdateState = FALSE;
            } else {
-            consumedExpansionCEs = 0;
-          }
-          if(s.pos && *s.pos == 0) {
-            iterSkips++;
+            canUpdateState = TRUE;
            }
          }
        } else {
@@ -6080,27 +6027,24 @@ ucol_nextSortKeyPart(const UCollator *coll,
            // We should save the state only if we
            // are sure that we are done with the
            // previous iterator state
-          if(consumedExpansionCEs == 0) {
+          if(canUpdateState) {
              newState = s.iterator->getState(s.iterator);
              if(newState != UITER_NO_STATE) {
                iterState = newState;
-              iterSkips = 0;
-            } else {
-              if(!firstTimeOnLevel) {
-                iterSkips++;
-              }
+              cces = 0;
              }
            }
-          firstTimeOnLevel = FALSE;
            CE = ucol_IGetNextCE(coll, &s, status);
+          cces++;
            if(CE==UCOL_NO_MORE_CES) {
                // Add the level separator
                terminatePSKLevel(level, maxLevel, i, dest);
-              //dest[i++] = UCOL_LEVELTERMINATOR; 
-              byteCountOrFrenchDone=0;
+              //dest[i++] = UCOL_LEVELTERMINATOR;
+              byteCountOrFrenchDone = 0;
                // Restart the iteration an move to the
                // second level
                s.iterator->move(s.iterator, 0, UITER_START);
+              cces = 0;
                level = UCOL_PSK_QUIN;
                break;
            }
@@ -6115,7 +6059,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
                if((CE &=0xff)!=0) {
                    if(i==count) {
                        /* overflow */
-                      byteCountOrFrenchDone=1;
+                      byteCountOrFrenchDone = 1;
                        goto saveState;
                    }
                    dest[i++]=(uint8_t)CE;
@@ -6131,13 +6075,10 @@ ucol_nextSortKeyPart(const UCollator *coll,
                }
              }
            }
-          if(s.CEpos - s.toReturn || (s.pos && *s.pos != 0)) {
-            consumedExpansionCEs++;
+          if(uprv_numAvailableExpCEs(s)) {
+            canUpdateState = FALSE;
            } else {
-            consumedExpansionCEs = 0;
-          }
-          if(s.pos && *s.pos == 0) {
-            iterSkips++;
+            canUpdateState = TRUE;
            }
          }
        } else {
@@ -6153,7 +6094,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
        if(strength >= UCOL_IDENTICAL) {
          UChar32 first, second;
          int32_t bocsuBytesWritten = 0;
-        // We always need to do identical on 
+        // We always need to do identical on
          // the NFD form of the string.
          if(normIter == NULL) {
            // we arrived from the level below and
@@ -6161,10 +6102,10 @@ ucol_nextSortKeyPart(const UCollator *coll,
            // therefore, we need to make a fresh NFD iterator
            normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);
            s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);
-        } else if(!doingIdenticalFromStart) { 
+        } else if(!doingIdenticalFromStart) {
            // there is an iterator, but we did some other levels.
-          // therefore, we have a FCD iterator - need to make 
-          // a NFD one. 
+          // therefore, we have a FCD iterator - need to make
+          // a NFD one.
            // normIter being at the beginning does not guarantee
            // that the underlying iterator is at the beginning
            iter->move(iter, 0, UITER_START);
@@ -6173,6 +6114,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
          // At this point we have a NFD iterator that is positioned
          // in the right place
          if(U_FAILURE(*status)) {
+          UTRACE_EXIT_STATUS(*status);
            return 0;
          }
          first = uiter_previous32(s.iterator);
@@ -6192,7 +6134,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
              goto saveState;
            }
  
-          // On identical level, we will always save 
+          // On identical level, we will always save
            // the state if we reach this point, since
            // we don't depend on getNextCE for content
            // all the content is in our buffer and we
@@ -6201,13 +6143,12 @@ ucol_nextSortKeyPart(const UCollator *coll,
            newState = s.iterator->getState(s.iterator);
            if(newState != UITER_NO_STATE) {
              iterState = newState;
-            iterSkips = 0;
-          } else {
-            iterSkips++;
-          }
+            cces = 0;
+          } 
  
            uint8_t buff[4];
            second = uiter_next32(s.iterator);
+          cces++;
  
            // end condition for identical level
            if(second == U_SENTINEL) {
@@ -6242,46 +6183,33 @@ ucol_nextSortKeyPart(const UCollator *coll,
        break;
      default:
        *status = U_INTERNAL_PROGRAM_ERROR;
+      UTRACE_EXIT_STATUS(*status);
        return 0;
      }
  
  saveState:
      // Now we need to return stuff. First we want to see whether we have
      // done everything for the current state of iterator.
-    if(consumedExpansionCEs || byteCountOrFrenchDone 
-      || dontAdvanceIteratorBecauseWeNeedALevelTerminator) {
-      // Any of above mean that the previous transaction 
-      // wasn't finished and that we should store the 
+    if(byteCountOrFrenchDone
+    || canUpdateState == FALSE
+    || (newState = s.iterator->getState(s.iterator)) == UITER_NO_STATE) {
+      // Any of above mean that the previous transaction
+      // wasn't finished and that we should store the
        // previous iterator state.
        state[0] = iterState;
      } else {
-      // The transaction is complete. We will continue in 
-      // next iteration.
-      if((newState = s.iterator->getState(s.iterator))!= UITER_NO_STATE) {
+      // The transaction is complete. We will continue in the next iteration.
          state[0] = s.iterator->getState(s.iterator);
-        iterSkips = 0;
-      } else {
-        state[0] = iterState;
-        iterSkips++;
-      }
+        cces = 0;
      }
-    // Store the number of elements processed. On CE levels, this is
-    // the number of expansion CEs processed. On identical level, this
-    // is the number of bocsu bytes written.
-    if(level < UCOL_PSK_IDENTICAL) {
-      if((consumedExpansionCEs & UCOL_PSK_USED_ELEMENTS_MASK) != consumedExpansionCEs) {
-        *status = U_INDEX_OUTOFBOUNDS_ERROR;
-      }
-      state[1] = (consumedExpansionCEs & UCOL_PSK_USED_ELEMENTS_MASK) << UCOL_PSK_USED_ELEMENTS_SHIFT;
-    } else {
-      if((bocsuBytesUsed & UCOL_PSK_USED_ELEMENTS_MASK) != bocsuBytesUsed) {
-        *status = U_INDEX_OUTOFBOUNDS_ERROR;
-      }
-      state[1] = (bocsuBytesUsed & UCOL_PSK_USED_ELEMENTS_MASK) << UCOL_PSK_USED_ELEMENTS_SHIFT;
+    // Store the number of bocsu bytes written.
+    if((bocsuBytesUsed & UCOL_PSK_BOCSU_BYTES_MASK) != bocsuBytesUsed) {
+      *status = U_INDEX_OUTOFBOUNDS_ERROR;
      }
+    state[1] = (bocsuBytesUsed & UCOL_PSK_BOCSU_BYTES_MASK) << UCOL_PSK_BOCSU_BYTES_SHIFT;
  
      // Next we put in the level of comparison
-    state[1] |= ((level & UCOL_PSK_LEVEL_MASK) << UCOL_PSK_LEVEL_SHIFT); 
+    state[1] |= ((level & UCOL_PSK_LEVEL_MASK) << UCOL_PSK_LEVEL_SHIFT);
  
      // If we are doing French, we need to store whether we have just finished the French level
      if(level == UCOL_PSK_SECONDARY && doingFrench) {
@@ -6294,12 +6222,12 @@ saveState:
      if(wasShifted) {
        state[1] |= 1 << UCOL_PSK_WAS_SHIFTED_SHIFT;
      }
-    // Check for iterSkips overflow
-    if((iterSkips & UCOL_PSK_ITER_SKIP_MASK) != iterSkips) {
+    // Check for cces overflow
+    if((cces & UCOL_PSK_CONSUMED_CES_MASK) != cces) {
        *status = U_INDEX_OUTOFBOUNDS_ERROR;
      }
-    // Store iterSkips
-    state[1] |= ((iterSkips & UCOL_PSK_ITER_SKIP_MASK) << UCOL_PSK_ITER_SKIP_SHIFT);
+    // Store cces
+    state[1] |= ((cces & UCOL_PSK_CONSUMED_CES_MASK) << UCOL_PSK_CONSUMED_CES_SHIFT);
  
      // Check for French overflow
      if((usedFrench & UCOL_PSK_USED_FRENCH_MASK) != usedFrench) {
@@ -6315,13 +6243,16 @@ saveState:
      }
  
      // Return number of meaningful sortkey bytes.
+    UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d",
+                  dest,i, state[0], state[1]);
+    UTRACE_EXIT_VALUE(i);
      return i;
  }
  
  /**
   * Produce a bound for a given sortkey and a number of levels.
   */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  ucol_getBound(const uint8_t       *source,
          int32_t             sourceLength,
          UColBoundMode       boundType,
@@ -6329,7 +6260,7 @@ ucol_getBound(const uint8_t       *source,
          uint8_t             *result,
          int32_t             resultLength,
          UErrorCode          *status) {
-  // consistency checks 
+  // consistency checks
    if(status == NULL || U_FAILURE(*status)) {
      return 0;
    }
@@ -6345,7 +6276,7 @@ ucol_getBound(const uint8_t       *source,
      if(source[sourceIndex] == UCOL_LEVELTERMINATOR) {
        noOfLevels--;
      }
-  } while (noOfLevels > 0 
+  } while (noOfLevels > 0
      && (source[sourceIndex] != 0 || sourceIndex < sourceLength));
  
    if((source[sourceIndex] == 0 || sourceIndex == sourceLength)
@@ -6356,7 +6287,7 @@ ucol_getBound(const uint8_t       *source,
  
    // READ ME: this code assumes that the values for boundType
    // enum will not changes. They are set so that the enum value
-  // corresponds to the number of extra bytes each bound type 
+  // corresponds to the number of extra bytes each bound type
    // needs.
    if(result != NULL && resultLength >= sourceIndex+boundType) {
      uprv_memcpy(result, source, sourceIndex);
@@ -6381,67 +6312,10 @@ ucol_getBound(const uint8_t       *source,
  
      return sourceIndex;
    } else {
-    return sourceIndex+boundType+1; 
-  }
-}
-
-static
-inline void uprv_appendByteToHexString(char *dst, uint8_t val) {
-  uint32_t len = (uint32_t)uprv_strlen(dst);
-  *(dst+len) = T_CString_itosOffset((val >> 4));
-  *(dst+len+1) = T_CString_itosOffset((val & 0xF));
-  *(dst+len+2) = 0;
-}
-
-/* this function makes a string with representation of a sortkey */
-U_CAPI char* U_EXPORT2 ucol_sortKeyToString(const UCollator *coll, const uint8_t *sortkey, char *buffer, uint32_t *len) {
-  int32_t strength = UCOL_PRIMARY;
-  uint32_t res_size = 0;
-  UBool doneCase = FALSE;
-
-  char *current = buffer;
-  const uint8_t *currentSk = sortkey;
-
-  uprv_strcpy(current, "[");
-
-  while(strength <= UCOL_QUATERNARY && strength <= coll->strength) {
-    if(strength > UCOL_PRIMARY) {
-      strcat(current, " . ");
-    }
-    while(*currentSk != 0x01 && *currentSk != 0x00) { /* print a level */
-      uprv_appendByteToHexString(current, *currentSk++);
-      uprv_strcat(current, " ");
-    }
-    if(coll->caseLevel == UCOL_ON && strength == UCOL_SECONDARY && doneCase == FALSE) {
-        doneCase = TRUE;
-    } else if(coll->caseLevel == UCOL_OFF || doneCase == TRUE || strength != UCOL_SECONDARY) {
-      strength ++;
-    }
-    uprv_appendByteToHexString(current, *currentSk++); /* This should print '01' */
-    if(strength == UCOL_QUATERNARY && coll->alternateHandling == UCOL_NON_IGNORABLE) {
-      break;
-    }
-  }
-
-  if(coll->strength == UCOL_IDENTICAL) {
-    uprv_strcat(current, " . ");
-    while(*currentSk != 0) {
-      uprv_appendByteToHexString(current, *currentSk++);
-      uprv_strcat(current, " ");
-    }
-
-    uprv_appendByteToHexString(current, *currentSk++);
-  }
-  uprv_strcat(current, "]");
-
-  if(res_size > *len) {
-    return NULL;
+    return sourceIndex+boundType+1;
    }
-
-  return buffer;
  }
  
-
  /****************************************************************************/
  /* Following are the functions that deal with the properties of a collator  */
  /* there are new APIs and some compatibility APIs                           */
@@ -6484,7 +6358,7 @@ ucol_addLatinOneEntry(UCollator *coll, UChar ch, uint32_t CE,
      if(reverseSecondary && coll->frenchCollation == UCOL_ON) { // reverse secondary
        coll->latinOneCEs[coll->latinOneTableLen+ch] >>= 8; // make space for secondary
        coll->latinOneCEs[coll->latinOneTableLen+ch] |= (secondary << 24);
-    } else { // normal case 
+    } else { // normal case
        coll->latinOneCEs[coll->latinOneTableLen+ch] |= (secondary << *secShift);
      }
      *secShift -= 8;
@@ -6539,9 +6413,9 @@ ucol_setUpLatinOne(UCollator *coll, UErrorCode *status) {
      if(ch < 0x100) {
        CE = coll->latinOneMapping[ch];
      } else {
-      CE = UTRIE_GET32_FROM_LEAD(coll->mapping, ch);
-      if(CE == UCOL_NOT_FOUND) {
-        CE = UTRIE_GET32_FROM_LEAD(UCA->mapping, ch);
+      CE = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
+      if(CE == UCOL_NOT_FOUND && coll->UCA) {
+        CE = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
        }
      }
      if(CE < UCOL_NOT_FOUND) {
@@ -6549,8 +6423,9 @@ ucol_setUpLatinOne(UCollator *coll, UErrorCode *status) {
      } else {
        switch (getCETag(CE)) {
        case EXPANSION_TAG:
+      case DIGIT_TAG:
          ucol_setText(it, &ch, 1, status);
-        while((CE = ucol_next(it, status)) != UCOL_NULLORDER) {
+        while((int32_t)(CE = ucol_next(it, status)) != UCOL_NULLORDER) {
            if(primShift < 0 || secShift < 0 || terShift < 0) {
              coll->latinOneCEs[ch] = UCOL_BAIL_OUT_CE;
              coll->latinOneCEs[coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
@@ -6569,13 +6444,13 @@ ucol_setUpLatinOne(UCollator *coll, UErrorCode *status) {
          {
            if((CE & 0x00FFF000) != 0) {
              *status = U_UNSUPPORTED_ERROR;
-            return FALSE;
+            goto cleanup_after_failure;
            }
  
            const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE);
  
            CE |= (contractionOffset & 0xFFF) << 12; // insert the offset in latin-1 table
-    
+
            coll->latinOneCEs[ch] = CE;
            coll->latinOneCEs[coll->latinOneTableLen+ch] = CE;
            coll->latinOneCEs[2*coll->latinOneTableLen+ch] = CE;
@@ -6585,7 +6460,7 @@ ucol_setUpLatinOne(UCollator *coll, UErrorCode *status) {
            do {
                CE = *(coll->contractionCEs +
                    (UCharOffset - coll->contractionIndex));
-              if(getCETag(CE) == EXPANSION_TAG) {
+              if(CE > UCOL_NOT_FOUND && getCETag(CE) == EXPANSION_TAG) {
                  uint32_t size;
                  uint32_t i;    /* general counter */
                  uint32_t *CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
@@ -6620,90 +6495,96 @@ ucol_setUpLatinOne(UCollator *coll, UErrorCode *status) {
                  coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
                  coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
                  contractionOffset++;
-              }                
+              }
                UCharOffset++;
                primShift = 24; secShift = 24; terShift = 24;
                if(contractionOffset == coll->latinOneTableLen) { // we need to reallocate
                  if(!ucol_resizeLatinOneTable(coll, 2*coll->latinOneTableLen, status)) {
-                  return FALSE;
+                  goto cleanup_after_failure;
                  }
                }
            } while(*UCharOffset != 0xFFFF);
          }
          break;
        default:
-        coll->latinOneFailed = TRUE;
-        result = FALSE;
-        break;
+        goto cleanup_after_failure;
        }
      }
    }
-  ucol_closeElements(it);
    // compact table
    if(contractionOffset < coll->latinOneTableLen) {
      if(!ucol_resizeLatinOneTable(coll, contractionOffset, status)) {
-      return FALSE;
+      goto cleanup_after_failure;
      }
    }
+  ucol_closeElements(it);
    return result;
+
+cleanup_after_failure:
+  // status should already be set before arriving here.
+  coll->latinOneFailed = TRUE;
+  ucol_closeElements(it);
+  return FALSE;
  }
  
  void ucol_updateInternalState(UCollator *coll, UErrorCode *status) {
-      if(U_SUCCESS(*status)) {
-        if(coll->caseFirst == UCOL_UPPER_FIRST) {
-          coll->caseSwitch = UCOL_CASE_SWITCH;
-        } else {
-          coll->caseSwitch = UCOL_NO_CASE_SWITCH;
-        }
+  if(U_SUCCESS(*status)) {
+    if(coll->caseFirst == UCOL_UPPER_FIRST) {
+      coll->caseSwitch = UCOL_CASE_SWITCH;
+    } else {
+      coll->caseSwitch = UCOL_NO_CASE_SWITCH;
+    }
  
-        if(coll->caseLevel == UCOL_ON || coll->caseFirst == UCOL_OFF) {
-          coll->tertiaryMask = UCOL_REMOVE_CASE;
-          coll->tertiaryCommon = UCOL_COMMON3_NORMAL;
-          coll->tertiaryAddition = UCOL_FLAG_BIT_MASK_CASE_SW_OFF;
-          coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_OFF;
-          coll->tertiaryBottom = UCOL_COMMON_BOT3;
-        } else {
-          coll->tertiaryMask = UCOL_KEEP_CASE;
-          coll->tertiaryAddition = UCOL_FLAG_BIT_MASK_CASE_SW_ON;
-          if(coll->caseFirst == UCOL_UPPER_FIRST) {
-            coll->tertiaryCommon = UCOL_COMMON3_UPPERFIRST;
-            coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_UPPER;
-            coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_UPPER;
-          } else {
-            coll->tertiaryCommon = UCOL_COMMON3_NORMAL;
-            coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_LOWER;
-            coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_LOWER;
-          }
-        }
+    if(coll->caseLevel == UCOL_ON || coll->caseFirst == UCOL_OFF) {
+      coll->tertiaryMask = UCOL_REMOVE_CASE;
+      coll->tertiaryCommon = UCOL_COMMON3_NORMAL;
+      coll->tertiaryAddition = UCOL_FLAG_BIT_MASK_CASE_SW_OFF;
+      coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_OFF;
+      coll->tertiaryBottom = UCOL_COMMON_BOT3;
+    } else {
+      coll->tertiaryMask = UCOL_KEEP_CASE;
+      coll->tertiaryAddition = UCOL_FLAG_BIT_MASK_CASE_SW_ON;
+      if(coll->caseFirst == UCOL_UPPER_FIRST) {
+        coll->tertiaryCommon = UCOL_COMMON3_UPPERFIRST;
+        coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_UPPER;
+        coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_UPPER;
+      } else {
+        coll->tertiaryCommon = UCOL_COMMON3_NORMAL;
+        coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_LOWER;
+        coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_LOWER;
+      }
+    }
  
-        /* Set the compression values */
-        uint8_t tertiaryTotal = (uint8_t)(coll->tertiaryTop - UCOL_COMMON_BOT3-1);
-        coll->tertiaryTopCount = (uint8_t)(UCOL_PROPORTION3*tertiaryTotal); /* we multilply double with int, but need only int */
-        coll->tertiaryBottomCount = (uint8_t)(tertiaryTotal - coll->tertiaryTopCount);
+    /* Set the compression values */
+    uint8_t tertiaryTotal = (uint8_t)(coll->tertiaryTop - UCOL_COMMON_BOT3-1);
+    coll->tertiaryTopCount = (uint8_t)(UCOL_PROPORTION3*tertiaryTotal); /* we multilply double with int, but need only int */
+    coll->tertiaryBottomCount = (uint8_t)(tertiaryTotal - coll->tertiaryTopCount);
  
-        if(coll->caseLevel == UCOL_OFF && coll->strength == UCOL_TERTIARY
-          && coll->frenchCollation == UCOL_OFF && coll->alternateHandling == UCOL_NON_IGNORABLE) {
-          coll->sortKeyGen = ucol_calcSortKeySimpleTertiary;
-        } else {
-          coll->sortKeyGen = ucol_calcSortKey;
-        }
-        if(coll->caseLevel == UCOL_OFF && coll->strength <= UCOL_TERTIARY
-          && coll->alternateHandling == UCOL_NON_IGNORABLE && !coll->latinOneFailed) {
-          if(coll->latinOneCEs == NULL || coll->latinOneRegenTable) {
-            if(ucol_setUpLatinOne(coll, status)) { // if we succeed in building latin1 table, we'll use it
-              //fprintf(stderr, "F");
-              coll->latinOneUse = TRUE;
-            } else {
-              coll->latinOneUse = FALSE;
-            }
-          } else { // latin1Table exists and it doesn't need to be regenerated, just use it
-            coll->latinOneUse = TRUE;
-          }
+    if(coll->caseLevel == UCOL_OFF && coll->strength == UCOL_TERTIARY
+      && coll->frenchCollation == UCOL_OFF && coll->alternateHandling == UCOL_NON_IGNORABLE) {
+      coll->sortKeyGen = ucol_calcSortKeySimpleTertiary;
+    } else {
+      coll->sortKeyGen = ucol_calcSortKey;
+    }
+    if(coll->caseLevel == UCOL_OFF && coll->strength <= UCOL_TERTIARY && coll->numericCollation == UCOL_OFF
+      && coll->alternateHandling == UCOL_NON_IGNORABLE && !coll->latinOneFailed) {
+      if(coll->latinOneCEs == NULL || coll->latinOneRegenTable) {
+        if(ucol_setUpLatinOne(coll, status)) { // if we succeed in building latin1 table, we'll use it
+          //fprintf(stderr, "F");
+          coll->latinOneUse = TRUE;
          } else {
            coll->latinOneUse = FALSE;
-        }     
+        }
+        if(*status == U_UNSUPPORTED_ERROR) {
+          *status = U_ZERO_ERROR;
+        }
+      } else { // latin1Table exists and it doesn't need to be regenerated, just use it
+        coll->latinOneUse = TRUE;
        }
-
+    } else {
+      coll->latinOneUse = FALSE;
+    }
+  }
  }
  
  U_CAPI uint32_t  U_EXPORT2
@@ -6738,8 +6619,10 @@ ucol_setVariableTop(UCollator *coll, const UChar *varTop, int32_t len, UErrorCod
      *status = U_PRIMARY_TOO_LONG_ERROR;
      return 0;
    }
-
-  coll->variableTopValue = (CE & UCOL_PRIMARYMASK)>>16;
+  if(coll->variableTopValue != (CE & UCOL_PRIMARYMASK)>>16) {
+    coll->variableTopValueisDefault = FALSE;
+    coll->variableTopValue = (CE & UCOL_PRIMARYMASK)>>16;
+  }
  
    return CE & UCOL_PRIMARYMASK;
  }
@@ -6756,7 +6639,11 @@ ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *stat
    if(U_FAILURE(*status) || coll == NULL) {
      return;
    }
-  coll->variableTopValue = (varTop & UCOL_PRIMARYMASK)>>16;
+
+  if(coll->variableTopValue != (varTop & UCOL_PRIMARYMASK)>>16) {
+      coll->variableTopValueisDefault = FALSE;
+      coll->variableTopValue = (varTop & UCOL_PRIMARYMASK)>>16;
+  }
  }
  /* Attribute setter API */
  U_CAPI void  U_EXPORT2
@@ -6886,7 +6773,7 @@ ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value,
      }
      if(oldFrench != coll->frenchCollation || oldCaseFirst != coll->caseFirst) {
        coll->latinOneRegenTable = TRUE;
-    } else { 
+    } else {
        coll->latinOneRegenTable = FALSE;
      }
      ucol_updateInternalState(coll, status);
@@ -6899,7 +6786,7 @@ ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status)
      }
      switch(attr) {
      case UCOL_NUMERIC_COLLATION:
-      return coll->numericCollation;    
+      return coll->numericCollation;
      case UCOL_HIRAGANA_QUATERNARY_MODE:
        return coll->hiraganaQ;
      case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*/
@@ -6942,144 +6829,6 @@ ucol_getStrength(const UCollator *coll)
  /* there are new APIs and some compatibility APIs                           */
  /****************************************************************************/
  
-U_CAPI UCollator* U_EXPORT2
-ucol_safeClone(const UCollator *coll, void *stackBuffer, int32_t * pBufferSize, UErrorCode *status)
-{
-    UCollator * localCollator;
-    int32_t bufferSizeNeeded = (int32_t)sizeof(UCollator);
-    char *stackBufferChars = (char *)stackBuffer;
-
-    if (status == NULL || U_FAILURE(*status)){
-        return 0;
-    }
-    if ((stackBuffer && !pBufferSize) || !coll){
-       *status = U_ILLEGAL_ARGUMENT_ERROR;
-        return 0;
-    }
-    /* Pointers on 64-bit platforms need to be aligned
-     * on a 64-bit boundry in memory.
-     */
-    if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
-        int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
-        *pBufferSize -= offsetUp;
-        stackBufferChars += offsetUp;
-    }
-    stackBuffer = (void *)stackBufferChars;
-
-    if (stackBuffer && *pBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
-        *pBufferSize =  bufferSizeNeeded;
-        return 0;
-    }
-    if (!stackBuffer || *pBufferSize < bufferSizeNeeded) {
-        /* allocate one here...*/
-        int32_t length;
-        const UChar * rules = ucol_getRules(coll, &length);
-
-        localCollator = ucol_openRules(rules,
-                                       length,
-                                       ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status),
-                                       ucol_getStrength(coll),
-                                       NULL,
-                                       status);
-        if (U_SUCCESS(*status))
-        {
-            *status = U_SAFECLONE_ALLOCATED_WARNING;
-        }
-    } else {
-        localCollator = (UCollator *)stackBuffer;
-        memcpy(localCollator, coll, sizeof(UCollator));
-        localCollator->freeOnClose = FALSE;
-               localCollator->requestedLocale = NULL; // zero copies of pointers
-               localCollator->validLocale = NULL;
-    }
-    return localCollator;
-}
-
-U_CAPI int32_t U_EXPORT2
-ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) {
-  UErrorCode status = U_ZERO_ERROR;
-  int32_t len = 0;
-  int32_t UCAlen = 0;
-  const UChar* ucaRules = 0;
-  const UChar *rules = ucol_getRules(coll, &len);
-  if(delta == UCOL_FULL_RULES) {
-    /* take the UCA rules and append real rules at the end */
-    /* UCA rules will be probably coming from the root RB */
-    ucaRules = ures_getStringByKey(coll->rb,"%%UCARULES",&UCAlen,&status);
-  }
-  if(U_FAILURE(status)) {
-    return 0;
-  }
-  if(buffer!=0 && bufferLen>0){
-      *buffer=0;
-      if(UCAlen > 0) {
-        u_memcpy(buffer, ucaRules, uprv_min(UCAlen, bufferLen));
-      }
-      if(len > 0 && bufferLen > UCAlen) {
-        u_memcpy(buffer+UCAlen, rules, uprv_min(len, bufferLen-UCAlen));
-      }
-  }
-  return u_terminateUChars(buffer, bufferLen, len+UCAlen, &status);
-}
-
-static const UChar _NUL = 0;
-
-U_CAPI const UChar* U_EXPORT2
-ucol_getRules(    const    UCollator       *coll,
-        int32_t            *length)
-{
-  if(coll->rules != NULL) {
-    *length = coll->rulesLength;
-    return coll->rules;
-  } else {
-    UErrorCode status = U_ZERO_ERROR;
-    if(coll->rb != NULL) {
-      UResourceBundle *collElem = ures_getByKey(coll->rb, "CollationElements", NULL, &status);
-      if(U_SUCCESS(status)) {
-        /*Semantic const */
-        ((UCollator *)coll)->rules = ures_getStringByKey(collElem, "Sequence", length, &status);
-        ((UCollator *)coll)->rulesLength = *length;
-        ((UCollator *)coll)->freeRulesOnClose = FALSE;
-        ures_close(collElem);
-        return coll->rules;
-      }
-    }
-    *length = 0;
-    return &_NUL;
-  }
-}
-
-U_CAPI int32_t U_EXPORT2
-ucol_getDisplayName(    const    char        *objLoc,
-            const    char        *dispLoc,
-            UChar             *result,
-            int32_t         resultLength,
-            UErrorCode        *status)
-{
-
-  if(U_FAILURE(*status)) return -1;
-  UnicodeString dst;
-  if(!(result==NULL && resultLength==0)) {
-    // NULL destination for pure preflighting: empty dummy string
-    // otherwise, alias the destination buffer
-    dst.setTo(result, 0, resultLength);
-  }
-  Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst);
-  return dst.extract(result, resultLength, *status);
-}
-
-U_CAPI const char* U_EXPORT2
-ucol_getAvailable(int32_t index)
-{
-  return uloc_getAvailable(index);
-}
-
-U_CAPI int32_t U_EXPORT2
-ucol_countAvailable()
-{
-  return uloc_countAvailable();
-}
-
  U_CAPI void U_EXPORT2
  ucol_getVersion(const UCollator* coll,
                  UVersionInfo versionInfo)
@@ -7102,7 +6851,11 @@ ucol_getVersion(const UCollator* coll,
      versionInfo[0] = (uint8_t)(cmbVersion>>8);
      versionInfo[1] = (uint8_t)cmbVersion;
      versionInfo[2] = coll->image->version[1];
-    versionInfo[3] = UCA->image->UCAVersion[0];
+    if(coll->UCA) {
+        versionInfo[3] = coll->UCA->image->UCAVersion[0];
+    } else {
+        versionInfo[3] = 0;
+    }
  }
  
  
@@ -7112,17 +6865,15 @@ ucol_isTailored(const UCollator *coll, const UChar u, UErrorCode *status) {
    uint32_t CE = UCOL_NOT_FOUND;
    const UChar *ContractionStart = NULL;
    if(U_SUCCESS(*status) && coll != NULL) {
-    if(coll == UCA) {
+    if(coll == coll->UCA) {
        return FALSE;
      } else if(u < 0x100) { /* latin-1 */
        CE = coll->latinOneMapping[u];
-      if(CE == UCA->latinOneMapping[u]) {
+      if(coll->UCA && CE == coll->UCA->latinOneMapping[u]) {
          return FALSE;
        }
      } else { /* regular */
-      /*CE = ucmpe32_get(coll->mapping, u);*/
-      CE = UTRIE_GET32_FROM_LEAD(coll->mapping, u);
-
+      CE = UTRIE_GET32_FROM_LEAD(&coll->mapping, u);
      }
  
      if(isContraction(CE)) {
@@ -7162,13 +6913,13 @@ static
  UCollationResult    ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBool normalize, UErrorCode *status)
  {
  
-  // TODO: When we have an UChar iterator, we need to access the whole string. One 
+  // TODO: When we have an UChar iterator, we need to access the whole string. One
    // useful modification would be a UChar iterator extract API, since reset next next...
    // is not optimal.
    // TODO: Handle long strings. Do the same in compareUsingSortKeys.
  
    // When we arrive here, we can have normal strings or UCharIterators. Currently they are both
-  // of same type, but that doesn't really mean that it will stay that way. 
+  // of same type, but that doesn't really mean that it will stay that way.
  
      // The division for the array length may truncate the array size to
      // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high
@@ -7306,7 +7057,7 @@ static
  inline void UCOL_INIT_CEBUF(ucol_CEBuf *b) {
      (b)->buf = (b)->pos = (b)->localArray;
      (b)->endp = (b)->buf + UCOL_CEBUF_SIZE;
-};
+}
  
  static
  void ucol_CEBuf_Expand(ucol_CEBuf *b, collIterate *ci) {
@@ -7335,12 +7086,13 @@ inline void UCOL_CEBUF_PUT(ucol_CEBuf *b, uint32_t ce, collIterate *ci) {
          ucol_CEBuf_Expand(b, ci);
  }
      *(b)->pos++ = ce;
-};
+}
  
  /* This is a trick string compare function that goes in and uses sortkeys to compare */
  /* It is used when compare gets in trouble and needs to bail out                     */
  static UCollationResult ucol_compareUsingSortKeys(collIterate *sColl,
-                                                  collIterate *tColl)
+                                                  collIterate *tColl,
+                                                  UErrorCode *status)
  {
      uint8_t sourceKey[UCOL_MAX_BUFFER], targetKey[UCOL_MAX_BUFFER];
      uint8_t *sourceKeyP = sourceKey;
@@ -7349,31 +7101,32 @@ static UCollationResult ucol_compareUsingSortKeys(collIterate *sColl,
      const UCollator *coll = sColl->coll;
      UChar *source = NULL;
      UChar *target = NULL;
+    int32_t result = UCOL_EQUAL;
      UChar sStackBuf[256], tStackBuf[256];
      int32_t sourceLength = (sColl->flags&UCOL_ITER_HASLEN)?(sColl->endp-sColl->string):-1;
      int32_t targetLength = (tColl->flags&UCOL_ITER_HASLEN)?(tColl->endp-tColl->string):-1;
  
      // TODO: Handle long strings. Do the same in ucol_checkIdent.
      if(sColl->flags & UCOL_USE_ITERATOR) {
-      sColl->iterator->move(sColl->iterator, 0, UITER_START);
-      tColl->iterator->move(tColl->iterator, 0, UITER_START);
-      source = sStackBuf;
-      UChar *sBufp = source;
-      target = tStackBuf;
-      UChar *tBufp = target;
-      while(sColl->iterator->hasNext(sColl->iterator)) {
-        *sBufp++ = (UChar)sColl->iterator->next(sColl->iterator);
-      }
-      while(tColl->iterator->hasNext(tColl->iterator)) {
-        *tBufp++ = (UChar)tColl->iterator->next(tColl->iterator);
-      }
-      sourceLength = sBufp - source;
-      targetLength = tBufp - target;
+        sColl->iterator->move(sColl->iterator, 0, UITER_START);
+        tColl->iterator->move(tColl->iterator, 0, UITER_START);
+        source = sStackBuf;
+        UChar *sBufp = source;
+        target = tStackBuf;
+        UChar *tBufp = target;
+        while(sColl->iterator->hasNext(sColl->iterator)) {
+            *sBufp++ = (UChar)sColl->iterator->next(sColl->iterator);
+        }
+        while(tColl->iterator->hasNext(tColl->iterator)) {
+            *tBufp++ = (UChar)tColl->iterator->next(tColl->iterator);
+        }
+        sourceLength = sBufp - source;
+        targetLength = tBufp - target;
      } else { // no iterators
-      sourceLength = (sColl->flags&UCOL_ITER_HASLEN)?(sColl->endp-sColl->string):-1;
-      targetLength = (tColl->flags&UCOL_ITER_HASLEN)?(tColl->endp-tColl->string):-1;
-      source = sColl->string;
-      target = tColl->string;
+        sourceLength = (sColl->flags&UCOL_ITER_HASLEN)?(sColl->endp-sColl->string):-1;
+        targetLength = (tColl->flags&UCOL_ITER_HASLEN)?(tColl->endp-tColl->string):-1;
+        source = sColl->string;
+        target = tColl->string;
      }
  
  
@@ -7381,26 +7134,31 @@ static UCollationResult ucol_compareUsingSortKeys(collIterate *sColl,
      sourceKeyLen = ucol_getSortKey(coll, source, sourceLength, sourceKeyP, sourceKeyLen);
      if(sourceKeyLen > UCOL_MAX_BUFFER) {
          sourceKeyP = (uint8_t*)uprv_malloc(sourceKeyLen*sizeof(uint8_t));
-        if(sourceKeyP != NULL) {
-          sourceKeyLen = ucol_getSortKey(coll, source, sourceLength, sourceKeyP, sourceKeyLen);
+        if(sourceKeyP == NULL) {
+            *status = U_MEMORY_ALLOCATION_ERROR;
+            goto cleanup_and_do_compare;
          }
+        sourceKeyLen = ucol_getSortKey(coll, source, sourceLength, sourceKeyP, sourceKeyLen);
      }
  
      targetKeyLen = ucol_getSortKey(coll, target, targetLength, targetKeyP, targetKeyLen);
      if(targetKeyLen > UCOL_MAX_BUFFER) {
          targetKeyP = (uint8_t*)uprv_malloc(targetKeyLen*sizeof(uint8_t));
-        if(targetKeyP != NULL) {
-          targetKeyLen = ucol_getSortKey(coll, target, targetLength, targetKeyP, targetKeyLen);
+        if(targetKeyP == NULL) {
+            *status = U_MEMORY_ALLOCATION_ERROR;
+            goto cleanup_and_do_compare;
          }
+        targetKeyLen = ucol_getSortKey(coll, target, targetLength, targetKeyP, targetKeyLen);
      }
  
-    int32_t result = uprv_strcmp((const char*)sourceKeyP, (const char*)targetKeyP);
+    result = uprv_strcmp((const char*)sourceKeyP, (const char*)targetKeyP);
  
-    if(sourceKeyP != sourceKey) {
+cleanup_and_do_compare:
+    if(sourceKeyP != NULL && sourceKeyP != sourceKey) {
          uprv_free(sourceKeyP);
      }
  
-    if(targetKeyP != targetKey) {
+    if(targetKeyP != NULL && targetKeyP != targetKey) {
          uprv_free(targetKeyP);
      }
  
@@ -7414,7 +7172,7 @@ static UCollationResult ucol_compareUsingSortKeys(collIterate *sColl,
  }
  
  
-static inline UCollationResult 
+static inline UCollationResult
  ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
  //              const UCollator    *coll,
  //              const UChar        *source,
@@ -7443,7 +7201,7 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
      UBool doHiragana = (coll->hiraganaQ == UCOL_ON) && checkQuad;
  
      if(doHiragana && shifted) {
-      return (ucol_compareUsingSortKeys(sColl, tColl));
+      return (ucol_compareUsingSortKeys(sColl, tColl, status));
      }
      uint8_t caseSwitch = coll->caseSwitch;
      uint8_t tertiaryMask = coll->tertiaryMask;
@@ -7492,7 +7250,7 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
              }
              if(doHiragana && hirResult == UCOL_EQUAL) {
                if((sColl->flags & UCOL_WAS_HIRAGANA) != (tColl->flags & UCOL_WAS_HIRAGANA)) {
-                hirResult = ((sColl->flags & UCOL_WAS_HIRAGANA) > (tColl->flags & UCOL_WAS_HIRAGANA)) 
+                hirResult = ((sColl->flags & UCOL_WAS_HIRAGANA) > (tColl->flags & UCOL_WAS_HIRAGANA))
                    ? UCOL_LESS:UCOL_GREATER;
                }
              }
@@ -7513,8 +7271,8 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
            if(sOrder == UCOL_NO_MORE_CES) {
              UCOL_CEBUF_PUT(&sCEs, sOrder, sColl);
              break;
-          } else if(sOrder == 0 
-            || (sInShifted && (sOrder & UCOL_PRIMARYMASK) == 0)) { 
+          } else if(sOrder == 0
+            || (sInShifted && (sOrder & UCOL_PRIMARYMASK) == 0)) {
              /* UCA amendment - ignore ignorables that follow shifted code points */
              continue;
            } else if(isContinuation(sOrder)) {
@@ -7562,7 +7320,7 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
              UCOL_CEBUF_PUT(&tCEs, tOrder, tColl);
              break;
            } else if(tOrder == 0
-            || (tInShifted && (tOrder & UCOL_PRIMARYMASK) == 0)) { 
+            || (tInShifted && (tOrder & UCOL_PRIMARYMASK) == 0)) {
              /* UCA amendment - ignore ignorables that follow shifted code points */
              continue;
            } else if(isContinuation(tOrder)) {
@@ -7608,7 +7366,7 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
            /*
              if(doHiragana && hirResult == UCOL_EQUAL) {
                if((sColl.flags & UCOL_WAS_HIRAGANA) != (tColl.flags & UCOL_WAS_HIRAGANA)) {
-                hirResult = ((sColl.flags & UCOL_WAS_HIRAGANA) > (tColl.flags & UCOL_WAS_HIRAGANA)) 
+                hirResult = ((sColl.flags & UCOL_WAS_HIRAGANA) > (tColl.flags & UCOL_WAS_HIRAGANA))
                    ? UCOL_LESS:UCOL_GREATER;
                }
              }
@@ -7726,8 +7484,15 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
        for(;;) {
          while((secS & UCOL_REMOVE_CASE) == 0) {
            if(!isContinuation(*sCE++)) {
-            secS =*(sCE-1) & UCOL_TERT_CASE_MASK;
-            secS ^= caseSwitch;
+            secS =*(sCE-1);
+            if(((secS & UCOL_PRIMARYMASK) != 0) || strength > UCOL_PRIMARY) {  
+            // primary ignorables should not be considered on the case level when the strength is primary
+            // otherwise, the CEs stop being well-formed
+              secS &= UCOL_TERT_CASE_MASK;
+              secS ^= caseSwitch;
+            } else {
+              secS = 0;
+            }
            } else {
              secS = 0;
            }
@@ -7735,8 +7500,15 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
  
          while((secT & UCOL_REMOVE_CASE) == 0) {
            if(!isContinuation(*tCE++)) {
-            secT = *(tCE-1) & UCOL_TERT_CASE_MASK;
-            secT ^= caseSwitch;
+            secT = *(tCE-1);
+            if(((secT & UCOL_PRIMARYMASK) != 0) || strength > UCOL_PRIMARY) {
+            // primary ignorables should not be considered on the case level when the strength is primary
+            // otherwise, the CEs stop being well-formed
+              secT &= UCOL_TERT_CASE_MASK;
+              secT ^= caseSwitch;
+            } else {
+              secT = 0;
+            }
            } else {
              secT = 0;
            }
@@ -7884,8 +7656,8 @@ commonReturn:
  }
  
  
-static inline uint32_t 
-ucol_getLatinOneContraction(const UCollator *coll, int32_t strength, 
+static inline uint32_t
+ucol_getLatinOneContraction(const UCollator *coll, int32_t strength,
                            uint32_t CE, const UChar *s, int32_t *index, int32_t len) {
    const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE&0xFFF);
    int32_t latinOneOffset = (CE & 0x00FFF000) >> 12;
@@ -7921,7 +7693,7 @@ ucol_getLatinOneContraction(const UCollator *coll, int32_t strength,
          return UCOL_BAIL_OUT_CE;
        }
        // skip completely ignorables
-      uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(coll->mapping, schar);
+      uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, schar);
        if(isZeroCE == 0) { // we have to ignore completely ignorables
          (*index)++;
          continue;
@@ -7933,22 +7705,22 @@ ucol_getLatinOneContraction(const UCollator *coll, int32_t strength,
  }
  
  
-/** 
- * This is a fast strcoll, geared towards text in Latin-1. 
+/**
+ * This is a fast strcoll, geared towards text in Latin-1.
   * It supports contractions of size two, French secondaries
   * and case switching. You can use it with strengths primary
   * to tertiary. It does not support shifted and case level.
   * It relies on the table build by setupLatin1Table. If it
   * doesn't understand something, it will go to the regular
- * strcoll. 
+ * strcoll.
   */
-static inline UCollationResult 
+static inline UCollationResult
  ucol_strcollUseLatin1( const UCollator    *coll,
                const UChar        *source,
                int32_t            sLen,
                const UChar        *target,
                int32_t            tLen,
-              UErrorCode *status) 
+              UErrorCode *status)
  {
      U_ALIGN_CODE(16);
      int32_t strength = coll->strength;
@@ -7957,7 +7729,7 @@ ucol_strcollUseLatin1( const UCollator    *coll,
      UChar sChar = 0, tChar = 0;
      uint32_t sOrder=0, tOrder=0;
  
-    UBool endOfSource = FALSE, endOfTarget = FALSE;
+    UBool endOfSource = FALSE;
  
      uint32_t *elements = coll->latinOneCEs;
  
@@ -8009,10 +7781,10 @@ ucol_strcollUseLatin1( const UCollator    *coll,
          if(tLen==-1) {    // handling zero terminated strings
            tChar=target[tIndex++];
            if(tChar==0) {
-            if(endOfSource) { // this is different than source loop, 
+            if(endOfSource) { // this is different than source loop,
                // as we already know that source loop is done here,
                // so we can either finish the primary loop if both
-              // strings are done or anounce the result if only 
+              // strings are done or anounce the result if only
                // target is done. Same below.
                goto endOfPrimLoop;
              } else {
@@ -8071,22 +7843,22 @@ ucol_strcollUseLatin1( const UCollator    *coll,
          // top bytes match, continue with following bytes
          sOrder<<=8;
          tOrder<<=8;
-      } 
+      }
      }
  
  endOfPrimLoop:
-    // after primary loop, we definitely know the sizes of strings, 
+    // after primary loop, we definitely know the sizes of strings,
      // so we set it and use simpler loop for secondaries and tertiaries
      sLen = sIndex; tLen = tIndex;
      if(strength >= UCOL_SECONDARY) {
        // adjust the table beggining
        elements += coll->latinOneTableLen;
-      endOfSource = FALSE; endOfTarget = FALSE;
+      endOfSource = FALSE;
  
        if(coll->frenchCollation == UCOL_OFF) { // non French
          // This loop is a simplified copy of primary loop
-        // at this point we know that whole strings are latin-1, so we don't 
-        // check for that. We also know that we only have contractions as 
+        // at this point we know that whole strings are latin-1, so we don't
+        // check for that. We also know that we only have contractions as
          // specials.
          sIndex = 0; tIndex = 0;
          for(;;) {
@@ -8134,7 +7906,7 @@ endOfPrimLoop:
              }
              sOrder<<=8;
              tOrder<<=8;
-          } 
+          }
          }
        } else { // French
          if(haveContractions) { // if we have contractions, we have to bail out
@@ -8185,17 +7957,17 @@ endOfPrimLoop:
              }
              sOrder<<=8;
              tOrder<<=8;
-          }       
+          }
          }
        }
-    } 
+    }
  
  endOfSecLoop:
      if(strength >= UCOL_TERTIARY) {
        // tertiary loop is the same as secondary (except no French)
        elements += coll->latinOneTableLen;
        sIndex = 0; tIndex = 0;
-      endOfSource = FALSE; endOfTarget = FALSE;
+      endOfSource = FALSE;
        for(;;) {
          while(sOrder==0) {
            if(sIndex==sLen) {
@@ -8238,9 +8010,9 @@ endOfSecLoop:
            }
            sOrder<<=8;
            tOrder<<=8;
-        } 
+        }
        }
-    } 
+    }
      return UCOL_EQUAL;
  
  returnRegular:
@@ -8249,7 +8021,7 @@ returnRegular:
  
      IInit_collIterate(coll, source, sLen, &sColl);
      IInit_collIterate(coll, target, tLen, &tColl);
-    return ucol_strcollRegular(&sColl, &tColl, status);    
+    return ucol_strcollRegular(&sColl, &tColl, status);
  }
  
  
@@ -8258,11 +8030,20 @@ ucol_strcollIter( const UCollator    *coll,
                   UCharIterator *sIter,
                   UCharIterator *tIter,
                   UErrorCode         *status) {
-  if(!status || U_FAILURE(*status) || sIter == tIter) {
+  if(!status || U_FAILURE(*status)) {
+    return UCOL_EQUAL;
+  }
+
+  UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER);
+  UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIter);
+
+  if (sIter == tIter) {
+    UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)
      return UCOL_EQUAL;
    }
    if(sIter == NULL || tIter == NULL || coll == NULL) {
      *status = U_ILLEGAL_ARGUMENT_ERROR;
+    UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)
      return UCOL_EQUAL;
    }
  
@@ -8295,8 +8076,8 @@ ucol_strcollIter( const UCollator    *coll,
    }
  
    UChar32 sChar = U_SENTINEL, tChar = U_SENTINEL;
-  
-  while((sChar = sColl.iterator->next(sColl.iterator)) ==  
+
+  while((sChar = sColl.iterator->next(sColl.iterator)) ==
      (tChar = tColl.iterator->next(tColl.iterator))) {
      if(sChar == U_SENTINEL) {
        result = UCOL_EQUAL;
@@ -8339,6 +8120,7 @@ end_compare:
      unorm_closeIter(tNormIter);
    }
  
+  UTRACE_EXIT_VALUE_STATUS(result, *status)
    return result;
  }
  
@@ -8354,10 +8136,19 @@ ucol_strcoll( const UCollator    *coll,
                const UChar        *target,
                int32_t            targetLength) {
      U_ALIGN_CODE(16);
+
+    UTRACE_ENTRY(UTRACE_UCOL_STRCOLL);
+    if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
+      UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target);
+      UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLength);
+      UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLength);
+    }
+
      UErrorCode status = U_ZERO_ERROR;
      if(source == NULL || target == NULL) {
-      // do not crash, but return. Should have 
+      // do not crash, but return. Should have
        // status argument to return error.
+      UTRACE_EXIT_VALUE(UTRACE_UCOL_STRCOLL);
        return UCOL_EQUAL;
      }
        collIterate sColl, tColl;
@@ -8374,6 +8165,7 @@ ucol_strcoll( const UCollator    *coll,
          //    Check for them being the same string, and scan through
          //    any leading equal portion.
          if (source==target) {
+            UTRACE_EXIT_VALUE(UCOL_EQUAL);
              return UCOL_EQUAL;
          }
  
@@ -8385,6 +8177,7 @@ ucol_strcoll( const UCollator    *coll,
              pTarg++;
          }
          if (*pSrc == 0 && *pTarg == 0) {
+            UTRACE_EXIT_VALUE(UCOL_EQUAL);
              return UCOL_EQUAL;
          }
          equalLength = pSrc - source;
@@ -8395,6 +8188,7 @@ ucol_strcoll( const UCollator    *coll,
          /* check if source and target are same strings */
  
          if (source==target  && sourceLength==targetLength) {
+            UTRACE_EXIT_VALUE(UCOL_EQUAL);
              return UCOL_EQUAL;
          }
          const UChar    *pSrcEnd = source + sourceLength;
@@ -8420,6 +8214,7 @@ ucol_strcoll( const UCollator    *coll,
              // If we made it all the way through both strings, we are done.  They are ==
              if ((pSrc ==pSrcEnd  || (pSrcEnd <pSrc  && *pSrc==0))  &&   /* At end of src string, however it was specified. */
                  (pTarg==pTargEnd || (pTargEnd<pTarg && *pTarg==0)))  {  /* and also at end of dest string                  */
+                UTRACE_EXIT_VALUE(UCOL_EQUAL);
                  return UCOL_EQUAL;
              }
      }
@@ -8453,14 +8248,17 @@ ucol_strcoll( const UCollator    *coll,
          }
      }
  
+    UCollationResult  returnVal;
      if(!coll->latinOneUse || (sourceLength > 0 && *source&0xff00) || (targetLength > 0 && *target&0xff00)) {
        // Preparing the context objects for iterating over strings
        IInit_collIterate(coll, source, sourceLength, &sColl);
        IInit_collIterate(coll, target, targetLength, &tColl);
-      return ucol_strcollRegular(&sColl, &tColl, &status);
+      returnVal = ucol_strcollRegular(&sColl, &tColl, &status);
      } else {
-      return ucol_strcollUseLatin1(coll, source, sourceLength, target, targetLength, &status);    
+      returnVal = ucol_strcollUseLatin1(coll, source, sourceLength, target, targetLength, &status);
      }
+    UTRACE_EXIT_VALUE(returnVal);
+    return returnVal;
  }
  
  /* convenience function for comparing strings */
@@ -8499,192 +8297,80 @@ ucol_equal(        const    UCollator        *coll,
        == UCOL_EQUAL);
  }
  
-/* returns the locale name the collation data comes from */
-U_CAPI const char * U_EXPORT2
-ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
-  const char *result = NULL;
-  if(status == NULL || U_FAILURE(*status)) {
-    return NULL;
-  }
-  switch(type) {
-  case ULOC_ACTUAL_LOCALE:
-    // validLocale is set only if service registration has explicitly set the
-    // requested and valid locales.  if this is the case, the actual locale
-    // is considered to be the valid locale.
-    if (coll->validLocale != NULL) {
-      result = coll->validLocale;
-    } else if(coll->elements != NULL) {
-      result = ures_getLocale(coll->elements, status);
-    }
-    break;
-  case ULOC_VALID_LOCALE:
-    if (coll->validLocale != NULL) {
-      result = coll->validLocale;
-    } else if(coll->rb != NULL) {
-      result = ures_getLocale(coll->rb, status);
-    } 
-    break;
-  case ULOC_REQUESTED_LOCALE:
-    result = coll->requestedLocale;
-    break;
-  default:
-    *status = U_ILLEGAL_ARGUMENT_ERROR;
+U_CAPI void U_EXPORT2
+ucol_getUCAVersion(const UCollator* coll, UVersionInfo info) {
+  if(coll && coll->UCA) {
+    uprv_memcpy(info, coll->UCA->image->UCAVersion, sizeof(UVersionInfo));
    }
-  return result;
  }
  
-U_CAPI USet * U_EXPORT2
-ucol_getTailoredSet(const UCollator *coll, UErrorCode *status) 
+U_CAPI int32_t U_EXPORT2
+ucol_cloneBinary(const UCollator *coll,
+                 uint8_t *buffer, int32_t capacity,
+                 UErrorCode *status)
  {
-  if(status == NULL || U_FAILURE(*status)) {
-    return NULL;
-  }
-  if(coll == NULL) {
-    *status = U_ILLEGAL_ARGUMENT_ERROR;
-  }
-  UParseError parseError;
-  UColTokenParser src;
-  int32_t rulesLen = 0;
-  const UChar *rules = ucol_getRules(coll, &rulesLen);
-  const UChar *current = NULL;
-  UBool startOfRules = TRUE;
-  // we internally use the C++ class, for the following reasons:
-  // 1. we need to utilize canonical iterator, which is a C++ only class
-  // 2. canonical iterator returns UnicodeStrings - USet cannot take them
-  // 3. USet is internally really UnicodeSet, C is just a wrapper
-  UnicodeSet *tailored = new UnicodeSet();
-  UnicodeString pattern;
-  CanonicalIterator it("", *status);
-
-
-  // The idea is to tokenize the rule set. For each non-reset token,
-  // we add all the canonicaly equivalent FCD sequences 
-  ucol_tok_initTokenList(&src, rules, rulesLen, UCA, status);
-  while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError, status)) != NULL) {
-    startOfRules = FALSE;
-    if(src.parsedToken.strength != UCOL_TOK_RESET) {
-      const UChar *stuff = src.source+(src.parsedToken.charsOffset);
-      it.setSource(UnicodeString(stuff, src.parsedToken.charsLen), *status);
-      pattern = it.next();
-      while(!pattern.isBogus()) {
-        if(Normalizer::quickCheck(pattern, UNORM_FCD, *status) != UNORM_NO) {
-          tailored->add(pattern);
-        }
-        pattern = it.next();
-      }
+    int32_t length = 0;
+    if(U_FAILURE(*status)) {
+        return length;
      }
-  }
-  ucol_tok_closeTokenList(&src);
-  return (USet *)tailored;
-}
-
-U_CAPI UBool U_EXPORT2
-ucol_equals(const UCollator *source, const UCollator *target) {
-  UErrorCode status = U_ZERO_ERROR;
-  // if pointers are equal, collators are equal
-  if(source == target) {
-    return TRUE;
-  }
-  int32_t i = 0, j = 0;
-  // if any of attributes are different, collators are not equal
-  for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
-    if(ucol_getAttribute(source, (UColAttribute)i, &status) != ucol_getAttribute(target, (UColAttribute)i, &status) || U_FAILURE(status)) {
-      return FALSE;
+    if(capacity < 0) {
+      *status = U_ILLEGAL_ARGUMENT_ERROR;
+      return length;
      }
-  }
+    if(coll->hasRealData == TRUE) {
+        length = coll->image->size;
+        if(length <= capacity) {
+            uprv_memcpy(buffer, coll->image, length);
+        } else {
+            *status = U_BUFFER_OVERFLOW_ERROR;
+        }
+    } else {
+        length = (int32_t)(paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet)));
+        if(length <= capacity) {
+            /* build the UCATableHeader with minimal entries */
+            /* do not copy the header from the UCA file because its values are wrong! */
+            /* uprv_memcpy(result, UCA->image, sizeof(UCATableHeader)); */
  
-  int32_t sourceRulesLen = 0, targetRulesLen = 0;
-  const UChar *sourceRules = ucol_getRules(source, &sourceRulesLen);
-  const UChar *targetRules = ucol_getRules(target, &targetRulesLen);
+            /* reset everything */
+            uprv_memset(buffer, 0, length);
  
-  if(sourceRulesLen == targetRulesLen && u_strncmp(sourceRules, targetRules, sourceRulesLen) == 0) {
-    // all the attributes are equal and the rules are equal - collators are equal
-    return(TRUE);
-  } 
-  // hard part, need to construct tree from rules and see if they yield the same tailoring
-  UBool result = TRUE;
-  UParseError parseError;
-  UColTokenParser sourceParser, targetParser;
-  int32_t sourceListLen = 0, targetListLen = 0;
-  ucol_tok_initTokenList(&sourceParser, sourceRules, sourceRulesLen, UCA, &status);
-  ucol_tok_initTokenList(&targetParser, targetRules, targetRulesLen, UCA, &status);
-  sourceListLen = ucol_tok_assembleTokenList(&sourceParser, &parseError, &status);
-  targetListLen = ucol_tok_assembleTokenList(&targetParser, &parseError, &status);
-
-  if(sourceListLen != targetListLen) {
-    // different number of resets
-    result = FALSE;
-  } else {
-    UColToken *sourceReset = NULL, *targetReset = NULL;
-    UChar *sourceResetString = NULL, *targetResetString = NULL;
-    int32_t sourceStringLen = 0, targetStringLen = 0;
-    for(i = 0; i < sourceListLen; i++) {
-      sourceReset = sourceParser.lh[i].reset;
-      sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
-      sourceStringLen = sourceReset->source >> 24;
-      for(j = 0; j < sourceListLen; j++) {
-        targetReset = targetParser.lh[j].reset;
-        targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
-        targetStringLen = targetReset->source >> 24;
-        if(sourceStringLen == targetStringLen && (u_strncmp(sourceResetString, targetResetString, sourceStringLen) == 0)) {
-          sourceReset = sourceParser.lh[i].first;
-          targetReset = targetParser.lh[j].first;
-          while(sourceReset != NULL && targetReset != NULL) {
-            sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
-            sourceStringLen = sourceReset->source >> 24;
-            targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
-            targetStringLen = targetReset->source >> 24;
-            if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
-              result = FALSE;
-              goto returnResult;
-            }
-            // probably also need to check the expansions
-            if(sourceReset->expansion) {
-              if(!targetReset->expansion) {
-                result = FALSE;
-                goto returnResult;
-              } else {
-                // compare expansions
-                sourceResetString = sourceParser.source+(sourceReset->expansion& 0xFFFFFF);
-                sourceStringLen = sourceReset->expansion >> 24;
-                targetResetString = targetParser.source+(targetReset->expansion & 0xFFFFFF);
-                targetStringLen = targetReset->expansion >> 24;
-                if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
-                  result = FALSE;
-                  goto returnResult;
-                }
-              }
-            } else {
-              if(targetReset->expansion) {
-                result = FALSE;
-                goto returnResult;
-              }
-            }
-            sourceReset = sourceReset->next;
-            targetReset = targetReset->next;
-          }
-          if(sourceReset != targetReset) { // at least one is not NULL
-            // there are more tailored elements in one list
-            result = FALSE;
-            goto returnResult;
-          }
+            /* set the tailoring-specific values */
+            UCATableHeader *myData = (UCATableHeader *)buffer;
+            myData->size = length;
  
+            /* offset for the options, the only part of the data that is present after the header */
+            myData->options = sizeof(UCATableHeader);
  
-          break;
+            /* need to always set the expansion value for an upper bound of the options */
+            myData->expansion = myData->options + sizeof(UColOptionSet);
+
+            myData->magic = UCOL_HEADER_MAGIC;
+            myData->isBigEndian = U_IS_BIG_ENDIAN;
+            myData->charSetFamily = U_CHARSET_FAMILY;
+
+            /* copy UCA's version; genrb will override all but the builder version with tailoring data */
+            uprv_memcpy(myData->version, coll->image->version, sizeof(UVersionInfo));
+
+            uprv_memcpy(myData->UCAVersion, coll->image->UCAVersion, sizeof(UVersionInfo));
+            uprv_memcpy(myData->UCDVersion, coll->image->UCDVersion, sizeof(UVersionInfo));
+            uprv_memcpy(myData->formatVersion, coll->image->formatVersion, sizeof(UVersionInfo));
+            myData->jamoSpecial = coll->image->jamoSpecial;
+
+            /* copy the collator options */
+            uprv_memcpy(buffer+paddedsize(sizeof(UCATableHeader)), coll->options, sizeof(UColOptionSet));
+        } else {
+            *status = U_BUFFER_OVERFLOW_ERROR;
          }
-      }
-      // couldn't find the reset anchor, so the collators are not equal
-      if(j == sourceListLen) {
-        result = FALSE;
-        goto returnResult;
-      }
      }
-  }
-
-returnResult:
-  ucol_tok_closeTokenList(&sourceParser);
-  ucol_tok_closeTokenList(&targetParser);
-  return result;
+    return length;
+}
  
+U_CAPI void U_EXPORT2
+ucol_forgetUCA(void)
+{
+  _staticUCA = NULL;
+  UCA_DATA_MEM = NULL;
  }
+
  #endif /* #if !UCONFIG_NO_COLLATION */
+