ICU-6.2.4.tar.gz

[apple/icu.git] / icuSources / i18n / ucol.cpp
diff --git a/icuSources/i18n/ucol.cpp b/icuSources/i18n/ucol.cpp

index 5cf8b1558073980a2b7ab49f946d06c8d14db3fd..acc2591d46adf0184f8507751cd26ccccda2c2e7 100644 (file)
--- a/icuSources/i18n/ucol.cpp
+++ b/icuSources/i18n/ucol.cpp
@@ -1,6 +1,6 @@
  /*
  *******************************************************************************
-*   Copyright (C) 1996-2003, International Business Machines
+*   Copyright (C) 1996-2004, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *******************************************************************************
  *   file name:  ucol.cpp
@@ -17,6 +17,7 @@
  */
  
  #include "unicode/utypes.h"
+#include "ustrenum.h"
  #include "uassert.h"
  
  #if !UCONFIG_NO_COLLATION
@@ -41,8 +42,10 @@
  #include "uresimp.h"
  #include "umutex.h"
  #include "uhash.h"
-#include "ucln_in.h" 
+#include "ucln_in.h"
  #include "cstring.h"
+#include "utracimp.h"
+#include "putilimp.h"
  
  #ifdef UCOL_DEBUG
  #include <stdio.h>
@@ -60,10 +63,18 @@ U_NAMESPACE_USE
  
  #define ZERO_CC_LIMIT_            0xC0
  
-static UCollator* UCA = NULL;
-static UCAConstants *UCAconsts = NULL; 
+// static UCA. There is only one. Collators don't use it.
+// It is referenced only in ucol_initUCA and ucol_cleanup
+static UCollator* _staticUCA = NULL;
+// static pointer to udata memory. Inited in ucol_initUCA
+// used for cleanup in ucol_cleanup
  static UDataMemory* UCA_DATA_MEM = NULL;
  
+// this is static pointer to the normalizer fcdTrieIndex
+// it is always the same between calls to u_cleanup
+// and therefore writing to it is not synchronized.
+// It is cleaned in ucol_cleanup
+static const uint16_t *fcdTrieIndex=NULL;
  
  U_CDECL_BEGIN
  static UBool U_CALLCONV
@@ -74,15 +85,15 @@ isAcceptableUCA(void * /*context*/,
      if( pInfo->size>=20 &&
          pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
          pInfo->charsetFamily==U_CHARSET_FAMILY &&
-        pInfo->dataFormat[0]==ucaDataInfo.dataFormat[0] &&   /* dataFormat="UCol" */
-        pInfo->dataFormat[1]==ucaDataInfo.dataFormat[1] &&
-        pInfo->dataFormat[2]==ucaDataInfo.dataFormat[2] &&
-        pInfo->dataFormat[3]==ucaDataInfo.dataFormat[3] &&
-        pInfo->formatVersion[0]==ucaDataInfo.formatVersion[0] &&
-        pInfo->formatVersion[1]>=ucaDataInfo.formatVersion[1]// &&
-        //pInfo->formatVersion[1]==ucaDataInfo.formatVersion[1] &&
-        //pInfo->formatVersion[2]==ucaDataInfo.formatVersion[2] && // Too harsh
-        //pInfo->formatVersion[3]==ucaDataInfo.formatVersion[3] && // Too harsh
+        pInfo->dataFormat[0]==UCA_DATA_FORMAT_0 &&   /* dataFormat="UCol" */
+        pInfo->dataFormat[1]==UCA_DATA_FORMAT_1 &&
+        pInfo->dataFormat[2]==UCA_DATA_FORMAT_2 &&
+        pInfo->dataFormat[3]==UCA_DATA_FORMAT_3 &&
+        pInfo->formatVersion[0]==UCA_FORMAT_VERSION_0 &&
+        pInfo->formatVersion[1]>=UCA_FORMAT_VERSION_1// &&
+        //pInfo->formatVersion[1]==UCA_FORMAT_VERSION_1 &&
+        //pInfo->formatVersion[2]==UCA_FORMAT_VERSION_2 && // Too harsh
+        //pInfo->formatVersion[3]==UCA_FORMAT_VERSION_3 && // Too harsh
          ) {
          UVersionInfo UCDVersion;
          u_getUnicodeVersion(UCDVersion);
@@ -127,7 +138,7 @@ inline void  IInit_collIterate(const UCollator *collator, const UChar *sourceStr
      (s)->coll = (collator);
      (s)->fcdPosition = 0;
      if(collator->normalizationMode == UCOL_ON) {
-        (s)->flags |= UCOL_ITER_NORM; 
+        (s)->flags |= UCOL_ITER_NORM;
      }
      if(collator->hiraganaQ == UCOL_ON && collator->strength >= UCOL_QUATERNARY) {
        (s)->flags |= UCOL_HIRAGANA_Q;
@@ -339,47 +350,76 @@ tryOpeningFromRules(UResourceBundle *collElem, UErrorCode *status) {
  
  U_CAPI UCollator*
  ucol_open(const char *loc,
-                 UErrorCode *status)
+          UErrorCode *status)
  {
+  UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN);
+  UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc);
    UCollator *result = NULL;
-  if (status && U_SUCCESS(*status)) {
-         result = Collator::createUCollator(loc, status);
-       if (result) {
-         return result;
-       }       
+
+  u_init(status);
+#if !UCONFIG_NO_SERVICE
+  result = Collator::createUCollator(loc, status);
+  if (result == NULL)
+#endif
+  {
+    result = ucol_open_internal(loc, status);
    }
-  return ucol_open_internal(loc, status);
+  UTRACE_EXIT_PTR_STATUS(result, *status);
+  return result;
  }
  
  // API in ucol_imp.h
  
  U_CFUNC UCollator*
  ucol_open_internal(const char *loc,
-                          UErrorCode *status)
+                   UErrorCode *status)
  {
-  ucol_initUCA(status);
+  const UCollator* UCA = ucol_initUCA(status);
  
    /* New version */
    if(U_FAILURE(*status)) return 0;
  
+
+
    UCollator *result = NULL;
-  UResourceBundle *b = ures_open(NULL, loc, status);
-  UResourceBundle *collElem = ures_getByKey(b, "CollationElements", NULL, status);
+  UResourceBundle *b = ures_open(U_ICUDATA_COLL, loc, status);
+
+  /* we try to find stuff from keyword */
+  UResourceBundle *collations = ures_getByKey(b, "collations", NULL, status);
+  UResourceBundle *collElem = NULL;
+  char keyBuffer[256];
+  // if there is a keyword, we pick it up and try to get elements
+  if(!uloc_getKeywordValue(loc, "collation", keyBuffer, 256, status)) {
+    // no keyword. we try to find the default setting, which will give us the keyword value
+    UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", NULL, status);
+    if(U_SUCCESS(*status)) {
+      int32_t defaultKeyLen = 0;
+      const UChar *defaultKey = ures_getString(defaultColl, &defaultKeyLen, status);
+      u_UCharsToChars(defaultKey, keyBuffer, defaultKeyLen);
+      keyBuffer[defaultKeyLen] = 0;
+    } else {
+      *status = U_INTERNAL_PROGRAM_ERROR;
+      return NULL;
+    }
+    ures_close(defaultColl);
+  }
+  collElem = ures_getByKeyWithFallback(collations, keyBuffer, collElem, status);
+
    UResourceBundle *binary = NULL;
-  UErrorCode binaryStatus = U_ZERO_ERROR;  
+  UErrorCode binaryStatus = U_ZERO_ERROR;
  
    if(*status == U_MISSING_RESOURCE_ERROR) { /* We didn't find the tailoring data, we fallback to the UCA */
      *status = U_USING_DEFAULT_WARNING;
-    result = ucol_initCollator(UCA->image, result, status);
+    result = ucol_initCollator(UCA->image, result, UCA, status);
      // if we use UCA, real locale is root
-    result->rb = ures_open(NULL, "", status);
-    result->elements = ures_open(NULL, "", status);
+    result->rb = ures_open(U_ICUDATA_COLL, "", status);
+    result->elements = ures_open(U_ICUDATA_COLL, "", status);
      if(U_FAILURE(*status)) {
        goto clean;
      }
      ures_close(b);
      result->hasRealData = FALSE;
-  } else if(U_SUCCESS(*status)) {   
+  } else if(U_SUCCESS(*status)) {
      binary = ures_getByKey(collElem, "%%CollationBin", NULL, &binaryStatus);
  
      if(binaryStatus == U_MISSING_RESOURCE_ERROR) { /* we didn't find the binary image, we should use the rules */
@@ -402,13 +442,13 @@ ucol_open_internal(const char *loc,
            goto clean;
          }
          if((uint32_t)len > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) {
-          result = ucol_initCollator((const UCATableHeader *)inData, result, status);
+          result = ucol_initCollator((const UCATableHeader *)inData, result, UCA, status);
            if(U_FAILURE(*status)){
              goto clean;
            }
            result->hasRealData = TRUE;
          } else {
-          result = ucol_initCollator(UCA->image, result, status);
+          result = ucol_initCollator(UCA->image, result, UCA, status);
            ucol_setOptionsFromHeader(result, (UColOptionSet *)(inData+((const UCATableHeader *)inData)->options), status);
            if(U_FAILURE(*status)){
              goto clean;
@@ -424,6 +464,7 @@ ucol_open_internal(const char *loc,
  clean:
      ures_close(b);
      ures_close(collElem);
+    ures_close(collations);
      ures_close(binary);
      return NULL;
    }
@@ -436,29 +477,32 @@ clean:
    result->requestedLocale = (char *)uprv_malloc((uprv_strlen(loc)+1)*sizeof(char));
    /* test for NULL */
    if (result->requestedLocale == NULL) {
-       *status = U_MEMORY_ALLOCATION_ERROR;
-       ures_close(b); // ??? appears needed
+    *status = U_MEMORY_ALLOCATION_ERROR;
+    ures_close(b); // ??? appears needed
      ures_close(collElem);
+    ures_close(collations);
      ures_close(binary); // ??? appears needed
-       return NULL;
+    return NULL;
    }
    uprv_strcpy(result->requestedLocale, loc);
  
    ures_close(binary);
+  ures_close(collations); //??? we have to decide on that. Probably affects something :)
    return result;
  }
  
+
  U_CAPI void U_EXPORT2
  ucol_setReqValidLocales(UCollator *coll, char *requestedLocaleToAdopt, char *validLocaleToAdopt)
  {
    if (coll) {
      if (coll->validLocale) {
        uprv_free(coll->validLocale);
-       }
+    }
      coll->validLocale = validLocaleToAdopt;
      if (coll->requestedLocale) { // should always have
        uprv_free(coll->requestedLocale);
-       }
+    }
      coll->requestedLocale = requestedLocaleToAdopt;
    }
  }
@@ -466,49 +510,53 @@ ucol_setReqValidLocales(UCollator *coll, char *requestedLocaleToAdopt, char *val
  U_CAPI void U_EXPORT2
  ucol_close(UCollator *coll)
  {
+  UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE);
+  UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll);
    if(coll != NULL) {
-       // these are always owned by each UCollator struct, 
-       // so we always free them
-    if(coll->validLocale != NULL) {
-      uprv_free(coll->validLocale);
-    }
-    if(coll->requestedLocale != NULL) {
-      uprv_free(coll->requestedLocale);
-    }
+      // these are always owned by each UCollator struct,
+      // so we always free them
+      if(coll->validLocale != NULL) {
+          uprv_free(coll->validLocale);
+      }
+      if(coll->requestedLocale != NULL) {
+          uprv_free(coll->requestedLocale);
+      }
  
-    /* Here, it would be advisable to close: */
-    /* - UData for UCA (unless we stuff it in the root resb */
-    /* Again, do we need additional housekeeping... HMMM! */
-    if(coll->freeOnClose == FALSE){
-      return; /* for safeClone, if freeOnClose is FALSE,
-              don't free the other instance data */
-    }
-    if(coll->freeOptionsOnClose != FALSE) {
-      if(coll->options != NULL) {
-        uprv_free(coll->options);
+      /* Here, it would be advisable to close: */
+      /* - UData for UCA (unless we stuff it in the root resb */
+      /* Again, do we need additional housekeeping... HMMM! */
+      UTRACE_DATA1(UTRACE_INFO, "coll->freeOnClose: %d", coll->freeOnClose);
+      if(coll->freeOnClose){
+      /* for safeClone, if freeOnClose is FALSE,
+          don't free the other instance data */
+          if(coll->freeOptionsOnClose != FALSE) {
+              if(coll->options != NULL) {
+                  uprv_free(coll->options);
+              }
+          }
+          if(coll->mapping != NULL) {
+              /*ucmpe32_close(coll->mapping);*/
+              uprv_free(coll->mapping);
+          }
+          if(coll->rules != NULL && coll->freeRulesOnClose) {
+              uprv_free((UChar *)coll->rules);
+          }
+          if(coll->rb != NULL) { /* pointing to read-only memory */
+              ures_close(coll->rb);
+          }
+          if(coll->freeImageOnClose == TRUE) {
+              uprv_free((UCATableHeader *)coll->image);
+          }
+          if(coll->elements != NULL) {
+              ures_close(coll->elements);
+          }
+          if(coll->latinOneCEs != NULL) {
+              uprv_free(coll->latinOneCEs);
+          }
+          uprv_free(coll);
        }
-    }
-    if(coll->mapping != NULL) {
-        /*ucmpe32_close(coll->mapping);*/
-      uprv_free(coll->mapping);
-    }
-    if(coll->rules != NULL && coll->freeRulesOnClose) {
-      uprv_free((UChar *)coll->rules);
-    }
-    if(coll->rb != NULL) { /* pointing to read-only memory */
-      ures_close(coll->rb);
-    } 
-    if(coll->freeImageOnClose == TRUE) {
-      uprv_free((UCATableHeader *)coll->image);
-    }
-    if(coll->elements != NULL) {
-      ures_close(coll->elements);
-    }
-    if(coll->latinOneCEs != NULL) {
-      uprv_free(coll->latinOneCEs);
-    }
-    uprv_free(coll);
    }
+  UTRACE_EXIT();
  }
  
  U_CAPI UCollator* U_EXPORT2
@@ -523,11 +571,16 @@ ucol_openRules( const UChar        *rules,
    UColTokenParser src;
    UColAttributeValue norm;
    UParseError tErr;
-  
+
    if(status == NULL || U_FAILURE(*status)){
      return 0;
    }
  
+  u_init(status);
+  if (U_FAILURE(*status)) {
+      return NULL;
+  }
+
    if(rulesLength < -1 || (rules == NULL && rulesLength != 0)) {
      *status = U_ILLEGAL_ARGUMENT_ERROR;
      return 0;
@@ -540,7 +593,7 @@ ucol_openRules( const UChar        *rules,
    if(parseError == NULL){
      parseError = &tErr;
    }
-  
+
    switch(normalizationMode) {
    case UCOL_OFF:
    case UCOL_ON:
@@ -552,7 +605,7 @@ ucol_openRules( const UChar        *rules,
      return 0;
    }
  
-  ucol_initUCA(status);
+  UCollator *UCA = ucol_initUCA(status);
  
    if(U_FAILURE(*status)){
      return NULL;
@@ -590,14 +643,14 @@ ucol_openRules( const UChar        *rules,
        u_getUnicodeVersion(table->UCDVersion);
        // set UCA version
        uprv_memcpy(table->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo));
-      result = ucol_initCollator(table,0,status);
+      result = ucol_initCollator(table, 0, UCA, status);
        result->hasRealData = TRUE;
        result->freeImageOnClose = TRUE;
      }
    } else { /* no rules, but no error either */
      // must be only options
-    // We will init the collator from UCA   
-    result = ucol_initCollator(UCA->image,0,status);
+    // We will init the collator from UCA
+    result = ucol_initCollator(UCA->image, 0, UCA, status);
      // And set only the options
      UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
      /* test for NULL */
@@ -677,7 +730,37 @@ ucol_cloneRuleData(const UCollator *coll, int32_t *length, UErrorCode *status)
          *status = U_MEMORY_ALLOCATION_ERROR;
          return NULL;
      }
-    uprv_memcpy(result, UCA->image, sizeof(UCATableHeader));
+
+    /* build the UCATableHeader with minimal entries */
+    /* do not copy the header from the UCA file because its values are wrong! */
+    /* uprv_memcpy(result, UCA->image, sizeof(UCATableHeader)); */
+
+    /* reset everything */
+    uprv_memset(result, 0, *length);
+
+    /* set the tailoring-specific values */
+    UCATableHeader *myData = (UCATableHeader *)result;
+    myData->size = *length;
+
+    /* offset for the options, the only part of the data that is present after the header */
+    myData->options = sizeof(UCATableHeader);
+
+    /* need to always set the expansion value for an upper bound of the options */
+    myData->expansion = myData->options + sizeof(UColOptionSet);
+
+    myData->magic = UCOL_HEADER_MAGIC;
+    myData->isBigEndian = U_IS_BIG_ENDIAN;
+    myData->charSetFamily = U_CHARSET_FAMILY;
+
+    /* copy UCA's version; genrb will override all but the builder version with tailoring data */
+    uprv_memcpy(myData->version, coll->image->version, sizeof(UVersionInfo));
+
+    uprv_memcpy(myData->UCAVersion, coll->image->UCAVersion, sizeof(UVersionInfo));
+    uprv_memcpy(myData->UCDVersion, coll->image->UCDVersion, sizeof(UVersionInfo));
+    uprv_memcpy(myData->formatVersion, coll->image->formatVersion, sizeof(UVersionInfo));
+    myData->jamoSpecial = coll->image->jamoSpecial;
+
+    /* copy the collator options */
      uprv_memcpy(result+paddedsize(sizeof(UCATableHeader)), coll->options, sizeof(UColOptionSet));
    }
    return result;
@@ -729,8 +812,6 @@ void ucol_putOptionsToHeader(UCollator* result, UColOptionSet * opts, UErrorCode
  }
  #endif
  
-static const uint16_t *fcdTrieIndex=NULL;
-
  
  /**
  * Approximate determination if a character is at a contraction end.
@@ -776,7 +857,7 @@ inline uint8_t i_getCombiningClass(UChar c, const UCollator *coll) {
  }
  
  
-UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, UErrorCode *status) {
+UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, const UCollator *UCA, UErrorCode *status) {
      UChar c;
      UCollator *result = fillIn;
      if(U_FAILURE(*status) || image == NULL) {
@@ -797,7 +878,7 @@ UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, UEr
      result->image = image;
      const uint8_t *mapping = (uint8_t*)result->image+result->image->mappingPosition;
      /*CompactEIntArray *newUCAmapping = ucmpe32_openFromData(&mapping, status);*/
-    UTrie *newUCAmapping = (UTrie *)uprv_malloc(sizeof(UTrie)); 
+    UTrie *newUCAmapping = (UTrie *)uprv_malloc(sizeof(UTrie));
      if(newUCAmapping != NULL) {
        utrie_unserialize(newUCAmapping, mapping, result->image->endExpansionCE - result->image->mappingPosition, status);
      } else {
@@ -880,9 +961,6 @@ UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, UEr
      result->expansionCESize = (uint8_t*)result->image +
                                                 result->image->expansionCESize;
  
-    if (fcdTrieIndex == NULL) {
-        fcdTrieIndex = unorm_getFCDTrie(status);
-    }
  
      //result->errorCode = *status;
  
@@ -890,6 +968,7 @@ UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, UEr
  
      result->latinOneRegenTable = FALSE;
      result->latinOneFailed = FALSE;
+    result->UCA = UCA;
  
      ucol_updateInternalState(result, status);
  
@@ -897,67 +976,74 @@ UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, UEr
      return result;
  }
  
-U_CFUNC UBool
-ucol_cleanup(void)
-{
-    if (UCA_DATA_MEM) {
-        udata_close(UCA_DATA_MEM);
-        UCA_DATA_MEM = NULL;
-    }
-    if (UCA) {
-        ucol_close(UCA);
-        UCA = NULL;
-    }
-    return TRUE;
-}
+/* new Mark's code */
  
+/**
+ * For generation of Implicit CEs
+ * @author Davis
+ *
+ * Cleaned up so that changes can be made more easily.
+ * Old values:
+# First Implicit: E26A792D
+# Last Implicit: E3DC70C0
+# First CJK: E0030300
+# Last CJK: E0A9DD00
+# First CJK_A: E0A9DF00
+# Last CJK_A: E0DE3100
+ */
  /* Following is a port of Mark's code for new treatment of implicits.
- * It is positioned here, since ucol_initUCA need to initialize the 
+ * It is positioned here, since ucol_initUCA need to initialize the
   * variables below according to the data in the fractional UCA.
   */
-   
+
  /**
-  * Function used to: 
-  * a) collapse the 2 different Han ranges from UCA into one (in the right order), and
-  * b) bump any non-CJK characters by 10FFFF.
-  * The relevant blocks are:
-  * A: 4E00..9FFF; CJK Unified Ideographs
-  *            F900..FAFF; CJK Compatibility Ideographs
-  * B: 3400..4DBF; CJK Unified Ideographs Extension A
-  *            20000..XX;      CJK Unified Ideographs Extension B (and others later on)
-  * As long as
-  *    no new B characters are allocated between 4E00 and FAFF, and
-  *    no new A characters are outside of this range,
-  * (very high probability) this simple code will work.
-  * The reordered blocks are:
-  * Block1 is CJK
-  * Block2 is CJK_COMPAT_USED
-  * Block3 is CJK_A
-  * Any other CJK gets its normal code point
-  * Any non-CJK gets +10FFFF
-  * When we reorder Block1, we make sure that it is at the very start,
-  * so that it will use a 3-byte form.
-  */
+    * Function used to:
+    * a) collapse the 2 different Han ranges from UCA into one (in the right order), and
+    * b) bump any non-CJK characters by 10FFFF.
+    * The relevant blocks are:
+    * A:    4E00..9FFF; CJK Unified Ideographs
+    *       F900..FAFF; CJK Compatibility Ideographs
+    * B:    3400..4DBF; CJK Unified Ideographs Extension A
+    *       20000..XX;  CJK Unified Ideographs Extension B (and others later on)
+    * As long as
+    *   no new B characters are allocated between 4E00 and FAFF, and
+    *   no new A characters are outside of this range,
+    * (very high probability) this simple code will work.
+    * The reordered blocks are:
+    * Block1 is CJK
+    * Block2 is CJK_COMPAT_USED
+    * Block3 is CJK_A
+    * (all contiguous)
+    * Any other CJK gets its normal code point
+    * Any non-CJK gets +10FFFF
+    * When we reorder Block1, we make sure that it is at the very start,
+    * so that it will use a 3-byte form.
+    * Warning: the we only pick up the compatibility characters that are
+    * NOT decomposed, so that block is smaller!
+    */
  
  // CONSTANTS
-static const uint32_t
+static const UChar32
      NON_CJK_OFFSET = 0x110000,
-    BYTES_TO_AVOID = 3,
-    OTHER_COUNT = 256 - BYTES_TO_AVOID,
-    LAST_COUNT = OTHER_COUNT / 2,
-    LAST_COUNT2 = OTHER_COUNT / 21, // room for intervening, without expanding to 5 bytes
-    IMPLICIT_3BYTE_COUNT = 1;
-    
-// These depend on initUCA, and are initialized at that time
-static uint32_t 
-    IMPLICIT_BASE_BYTE = 0,
-    IMPLICIT_LIMIT_BYTE = 0, // leave room for 1 3-byte and 2 4-byte forms
-    
-    IMPLICIT_4BYTE_BOUNDARY = 0,
-    LAST_MULTIPLIER = 0,
-    LAST2_MULTIPLIER = 0,
-    IMPLICIT_BASE_3BYTE = 0,
-    IMPLICIT_BASE_4BYTE = 0;
+    UCOL_MAX_INPUT = 0x220001; // 2 * Unicode range + 2
+
+/**
+ * Precomputed by constructor
+ */
+static int32_t
+    final3Multiplier = 0,
+    final4Multiplier = 0,
+    final3Count = 0,
+    final4Count = 0,
+    medialCount = 0,
+    min3Primary = 0,
+    min4Primary = 0,
+    max4Primary = 0,
+    minTrail = 0,
+    maxTrail = 0,
+    max3Trail = 0,
+    max4Trail = 0,
+    min4Boundary = 0;
  
  static const UChar32
      CJK_BASE = 0x4E00,
@@ -969,105 +1055,282 @@ static const UChar32
      CJK_B_BASE = 0x20000,
      CJK_B_LIMIT = 0x2A6DF+1;
  
-static inline UChar32 swapCJK(UChar32 cp) {
-       
-       if (cp >= CJK_BASE) {
-               if (cp < CJK_LIMIT)                             return cp - CJK_BASE;
-                       
-               if (cp < CJK_COMPAT_USED_BASE)  return cp + NON_CJK_OFFSET;
-               
-               if (cp < CJK_COMPAT_USED_LIMIT) return cp - CJK_COMPAT_USED_BASE
-                                                                                               + (CJK_LIMIT - CJK_BASE);
-               if (cp < CJK_B_BASE)                            return cp + NON_CJK_OFFSET;
-               
-               if (cp < CJK_B_LIMIT)                   return cp; // non-BMP-CJK
-               
-               return cp + NON_CJK_OFFSET;     // non-CJK
-       }
-       if (cp < CJK_A_BASE)                                    return cp + NON_CJK_OFFSET;
-               
-       if (cp < CJK_A_LIMIT)                           return cp - CJK_A_BASE
-                                                                                               + (CJK_LIMIT - CJK_BASE) 
-                                                                                               + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
-    return cp + NON_CJK_OFFSET; // non-CJK
+static UChar32 swapCJK(UChar32 i) {
+
+    if (i >= CJK_BASE) {
+        if (i < CJK_LIMIT)              return i - CJK_BASE;
+
+        if (i < CJK_COMPAT_USED_BASE)   return i + NON_CJK_OFFSET;
+
+        if (i < CJK_COMPAT_USED_LIMIT)  return i - CJK_COMPAT_USED_BASE
+                                                + (CJK_LIMIT - CJK_BASE);
+        if (i < CJK_B_BASE)             return i + NON_CJK_OFFSET;
+
+        if (i < CJK_B_LIMIT)            return i; // non-BMP-CJK
+
+        return i + NON_CJK_OFFSET;  // non-CJK
+    }
+    if (i < CJK_A_BASE)                 return i + NON_CJK_OFFSET;
+
+    if (i < CJK_A_LIMIT)                return i - CJK_A_BASE
+                                                + (CJK_LIMIT - CJK_BASE)
+                                                + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
+    return i + NON_CJK_OFFSET; // non-CJK
+}
+
+U_CAPI UChar32 U_EXPORT2
+uprv_uca_getRawFromCodePoint(UChar32 i) {
+    return swapCJK(i)+1;
+}
+
+U_CAPI UChar32 U_EXPORT2
+uprv_uca_getCodePointFromRaw(UChar32 i) {
+    i--;
+    UChar32 result = 0;
+    if(i >= NON_CJK_OFFSET) {
+        result = i - NON_CJK_OFFSET;
+    } else if(i >= CJK_B_BASE) {
+        result = i;
+    } else if(i < CJK_A_LIMIT + (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) { // rest of CJKs, compacted
+        if(i < CJK_LIMIT - CJK_BASE) {
+            result = i + CJK_BASE;
+        } else if(i < (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) {
+            result = i + CJK_COMPAT_USED_BASE - (CJK_LIMIT - CJK_BASE);
+        } else {
+            result = i + CJK_A_BASE - (CJK_LIMIT - CJK_BASE) - (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
+        }
+    } else {
+        result = -1;
+    }
+    return result;
  }
-    
  
  // GET IMPLICIT PRIMARY WEIGHTS
  // Return value is left justified primary key
+U_CAPI uint32_t U_EXPORT2
+uprv_uca_getImplicitFromRaw(UChar32 cp) {
+    /*
+    if (cp < 0 || cp > UCOL_MAX_INPUT) {
+        throw new IllegalArgumentException("Code point out of range " + Utility.hex(cp));
+    }
+    */
+    int32_t last0 = cp - min4Boundary;
+    if (last0 < 0) {
+        int32_t last1 = cp / final3Count;
+        last0 = cp % final3Count;
  
-static inline uint32_t getImplicitPrimary(UChar32 cp) {
+        int32_t last2 = last1 / medialCount;
+        last1 %= medialCount;
  
-    //if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp));
-    
-    cp = swapCJK(cp);
-    
-    //if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));
-    
-    // we now have a range of numbers from 0 to 21FFFF.
-      
-    // we must skip all 00, 01, 02 bytes, so most bytes have 253 values
-    // we must leave a gap of 01 between all values of the last byte, so the last byte has 126 values (3 byte case)
-    // we shift so that HAN all has the same first primary, for compression.
-    // for the 4 byte case, we make the gap as large as we can fit.
-    // Three byte forms are EC xx xx, ED xx xx, EE xx xx (with a gap of 1)
-    // Four byte forms (most supplementaries) are EF xx xx xx (with a gap of LAST2_MULTIPLIER == 14)
-    
-    int32_t last0 = cp - IMPLICIT_4BYTE_BOUNDARY;
-    if (last0 < 0) {
-        int32_t last1 = cp / LAST_COUNT;
-        last0 = cp % LAST_COUNT;
-        
-        int32_t last2 = last1 / OTHER_COUNT;
-        last1 %= OTHER_COUNT;
+        last0 = minTrail + last0*final3Multiplier; // spread out, leaving gap at start
+        last1 = minTrail + last1; // offset
+        last2 = min3Primary + last2; // offset
          /*
-        if (DEBUG || last2 > 0xFF-BYTES_TO_AVOID) System.out.println("3B: " + Utility.hex(cp) + " => "
-            + Utility.hex(last2) + ", "
-            + Utility.hex(last1) + ", "
-            + Utility.hex(last0) + ", "
-        );
+        if (last2 >= min4Primary) {
+            throw new IllegalArgumentException("4-byte out of range: " + Utility.hex(cp) + ", " + Utility.hex(last2));
+        }
          */
-        
-        return IMPLICIT_BASE_3BYTE + (last2 << 24) + (last1 << 16) + ((last0*LAST_MULTIPLIER) << 8);
+        return (last2 << 24) + (last1 << 16) + (last0 << 8);
      } else {
-        int32_t last1 = last0 / LAST_COUNT2;
-        last0 %= LAST_COUNT2;
-        
-        int32_t last2 = last1 / OTHER_COUNT;
-        last1 %= OTHER_COUNT;
-        
-        int32_t last3 = last2 / OTHER_COUNT;
-        last2 %= OTHER_COUNT;
+        int32_t last1 = last0 / final4Count;
+        last0 %= final4Count;
  
+        int32_t last2 = last1 / medialCount;
+        last1 %= medialCount;
+
+        int32_t last3 = last2 / medialCount;
+        last2 %= medialCount;
+
+        last0 = minTrail + last0*final4Multiplier; // spread out, leaving gap at start
+        last1 = minTrail + last1; // offset
+        last2 = minTrail + last2; // offset
+        last3 = min4Primary + last3; // offset
          /*
-        if (DEBUG || last3 > 0xFF-BYTES_TO_AVOID) System.out.println("4B: " + Utility.hex(cp) + " => "
-            + Utility.hex(last3) + ", "
-            + Utility.hex(last2) + ", "
-            + Utility.hex(last1) + ", "
-            + Utility.hex(last0 * LAST2_MULTIPLIER) + ", "
-        );
+        if (last3 > max4Primary) {
+            throw new IllegalArgumentException("4-byte out of range: " + Utility.hex(cp) + ", " + Utility.hex(last3));
+        }
          */
-
-       return IMPLICIT_BASE_4BYTE + (last3 << 24) + (last2 << 16) + (last1 << 8) + (last0 * LAST2_MULTIPLIER);
+        return (last3 << 24) + (last2 << 16) + (last1 << 8) + last0;
      }
  }
  
+U_CAPI uint32_t U_EXPORT2
+uprv_uca_getImplicitPrimary(UChar32 cp) {
+    //if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp));
+
+    cp = swapCJK(cp);
+    cp++;
+    // we now have a range of numbers from 0 to 21FFFF.
+
+    //if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));
+
+    return uprv_uca_getImplicitFromRaw(cp);
+}
+
+/**
+ * Converts implicit CE into raw integer ("code point")
+ * @param implicit
+ * @return -1 if illegal format
+ */
+U_CAPI UChar32 U_EXPORT2
+uprv_uca_getRawFromImplicit(uint32_t implicit) {
+    UChar32 result;
+    UChar32 b3 = implicit & 0xFF;
+    implicit >>= 8;
+    UChar32 b2 = implicit & 0xFF;
+    implicit >>= 8;
+    UChar32 b1 = implicit & 0xFF;
+    implicit >>= 8;
+    UChar32 b0 = implicit & 0xFF;
+
+    // simple parameter checks
+    if (b0 < min3Primary || b0 > max4Primary
+      || b1 < minTrail || b1 > maxTrail) return -1;
+    // normal offsets
+    b1 -= minTrail;
+
+    // take care of the final values, and compose
+    if (b0 < min4Primary) {
+        if (b2 < minTrail || b2 > max3Trail || b3 != 0) return -1;
+        b2 -= minTrail;
+        UChar32 remainder = b2 % final3Multiplier;
+        if (remainder != 0) return -1;
+        b0 -= min3Primary;
+        b2 /= final3Multiplier;
+        result = ((b0 * medialCount) + b1) * final3Count + b2;
+    } else {
+         if (b2 < minTrail || b2 > maxTrail
+        || b3 < minTrail || b3 > max4Trail) return -1;
+        b2 -= minTrail;
+        b3 -= minTrail;
+        UChar32 remainder = b3 % final4Multiplier;
+        if (remainder != 0) return -1;
+        b3 /= final4Multiplier;
+        b0 -= min4Primary;
+        result = (((b0 * medialCount) + b1) * medialCount + b2) * final4Count + b3 + min4Boundary;
+    }
+    // final check
+    if (result < 0 || result > UCOL_MAX_INPUT) return -1;
+    return result;
+}
+
+
+static inline int32_t divideAndRoundUp(int a, int b) {
+    return 1 + (a-1)/b;
+}
+
  /* this function is either called from initUCA or from genUCA before
   * doing canonical closure for the UCA.
   */
-U_CAPI void U_EXPORT2 
-uprv_uca_initImplicitConstants(uint32_t baseByte) 
+
+/**
+ * Set up to generate implicits.
+ * @param minPrimary
+ * @param maxPrimary
+ * @param minTrail final byte
+ * @param maxTrail final byte
+ * @param gap3 the gap we leave for tailoring for 3-byte forms
+ * @param gap4 the gap we leave for tailoring for 4-byte forms
+ */
+static void initImplicitConstants(int minPrimary, int maxPrimary,
+                                    int minTrailIn, int maxTrailIn,
+                                    int gap3, int primaries3count,
+                                    UErrorCode *status) {
+    // some simple parameter checks
+    if (minPrimary < 0 || minPrimary >= maxPrimary || maxPrimary > 0xFF) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    };
+    if (minTrailIn < 0 || minTrailIn >= maxTrailIn || maxTrailIn > 0xFF) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    };
+    if (primaries3count < 1) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    };
+
+    minTrail = minTrailIn;
+    maxTrail = maxTrailIn;
+
+    min3Primary = minPrimary;
+    max4Primary = maxPrimary;
+    // compute constants for use later.
+    // number of values we can use in trailing bytes
+    // leave room for empty values between AND above, e.g. if gap = 2
+    // range 3..7 => +3 -4 -5 -6 -7: so 1 value
+    // range 3..8 => +3 -4 -5 +6 -7 -8: so 2 values
+    // range 3..9 => +3 -4 -5 +6 -7 -8 -9: so 2 values
+    final3Multiplier = gap3 + 1;
+    final3Count = (maxTrail - minTrail + 1) / final3Multiplier;
+    max3Trail = minTrail + (final3Count - 1) * final3Multiplier;
+
+    // medials can use full range
+    medialCount = (maxTrail - minTrail + 1);
+    // find out how many values fit in each form
+    int32_t threeByteCount = medialCount * final3Count;
+    // now determine where the 3/4 boundary is.
+    // we use 3 bytes below the boundary, and 4 above
+    int32_t primariesAvailable = maxPrimary - minPrimary + 1;
+    int32_t primaries4count = primariesAvailable - primaries3count;
+
+
+    int32_t min3ByteCoverage = primaries3count * threeByteCount;
+    min4Primary = minPrimary + primaries3count;
+    min4Boundary = min3ByteCoverage;
+    // Now expand out the multiplier for the 4 bytes, and redo.
+
+    int32_t totalNeeded = UCOL_MAX_INPUT - min4Boundary;
+    int32_t neededPerPrimaryByte = divideAndRoundUp(totalNeeded, primaries4count);
+    //if (DEBUG) System.out.println("neededPerPrimaryByte: " + neededPerPrimaryByte);
+    int32_t neededPerFinalByte = divideAndRoundUp(neededPerPrimaryByte, medialCount * medialCount);
+    //if (DEBUG) System.out.println("neededPerFinalByte: " + neededPerFinalByte);
+    int32_t gap4 = (maxTrail - minTrail - 1) / neededPerFinalByte;
+    //if (DEBUG) System.out.println("expandedGap: " + gap4);
+    if (gap4 < 1) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+    final4Multiplier = gap4 + 1;
+    final4Count = neededPerFinalByte;
+    max4Trail = minTrail + (final4Count - 1) * final4Multiplier;
+    /*
+    if (DEBUG) {
+        System.out.println("final4Count: " + final4Count);
+        for (int counter = 0; counter <= final4Count; ++counter) {
+            int value = minTrail + (1 + counter)*final4Multiplier;
+            System.out.println(counter + "\t" + value + "\t" + Utility.hex(value));
+        }
+    }
+    */
+}
+
+    /**
+     * Supply parameters for generating implicit CEs
+     */
+U_CAPI void U_EXPORT2
+uprv_uca_initImplicitConstants(int32_t minPrimary, int32_t maxPrimary, UErrorCode *status) {
+    // 13 is the largest 4-byte gap we can use without getting 2 four-byte forms.
+    initImplicitConstants(minPrimary, maxPrimary, 0x04, 0xFE, 1, 1, status);
+}
+
+U_CDECL_BEGIN
+static UBool U_CALLCONV
+ucol_cleanup(void)
  {
-  IMPLICIT_BASE_BYTE = baseByte;
-  IMPLICIT_LIMIT_BYTE = IMPLICIT_BASE_BYTE + 4; // leave room for 1 3-byte and 2 4-byte forms
-
-  IMPLICIT_4BYTE_BOUNDARY = IMPLICIT_3BYTE_COUNT * OTHER_COUNT * LAST_COUNT;
-  LAST_MULTIPLIER = OTHER_COUNT / LAST_COUNT;
-  LAST2_MULTIPLIER = OTHER_COUNT / LAST_COUNT2;
-  IMPLICIT_BASE_3BYTE = (IMPLICIT_BASE_BYTE << 24) + 0x030300;
-  IMPLICIT_BASE_4BYTE = ((IMPLICIT_BASE_BYTE + IMPLICIT_3BYTE_COUNT) << 24) + 0x030303;
+    if (UCA_DATA_MEM) {
+        udata_close(UCA_DATA_MEM);
+        UCA_DATA_MEM = NULL;
+    }
+    if (_staticUCA) {
+        ucol_close(_staticUCA);
+        _staticUCA = NULL;
+    }
+    fcdTrieIndex = NULL;
+    return TRUE;
  }
-    
+U_CDECL_END
+
  /* do not close UCA returned by ucol_initUCA! */
  UCollator *
  ucol_initUCA(UErrorCode *status) {
@@ -1075,57 +1338,63 @@ ucol_initUCA(UErrorCode *status) {
          return NULL;
      }
      umtx_lock(NULL);
-    UBool f = (UCA == NULL);
+    UBool f = (_staticUCA == NULL);
      umtx_unlock(NULL);
-    
+
      if(f) {
          UCollator *newUCA = NULL;
          UDataMemory *result = udata_openChoice(NULL, UCA_DATA_TYPE, UCA_DATA_NAME, isAcceptableUCA, NULL, status);
-        
+
          if(U_FAILURE(*status)) {
              if (result) {
                  udata_close(result);
              }
              uprv_free(newUCA);
          }
-        
+
+        // init FCD data
+        if (fcdTrieIndex == NULL) {
+            fcdTrieIndex = unorm_getFCDTrie(status);
+            ucln_i18n_registerCleanup(UCLN_I18N_UCOL, ucol_cleanup);
+        }
+
          if(result != NULL) { /* It looks like sometimes we can fail to find the data file */
-            newUCA = ucol_initCollator((const UCATableHeader *)udata_getMemory(result), newUCA, status);
+            newUCA = ucol_initCollator((const UCATableHeader *)udata_getMemory(result), newUCA, newUCA, status);
              if(U_SUCCESS(*status)){
                  newUCA->rb = NULL;
-                               newUCA->elements = NULL;
-                               newUCA->validLocale = NULL;
-                               newUCA->requestedLocale = NULL;
-                               newUCA->hasRealData = FALSE; // real data lives in .dat file...
+                newUCA->elements = NULL;
+                newUCA->validLocale = NULL;
+                newUCA->requestedLocale = NULL;
+                newUCA->hasRealData = FALSE; // real data lives in .dat file...
                  newUCA->freeImageOnClose = FALSE;
                  umtx_lock(NULL);
-                if(UCA == NULL) {
-                    UCA = newUCA;
+                if(_staticUCA == NULL) {
+                    _staticUCA = newUCA;
                      UCA_DATA_MEM = result;
                      result = NULL;
                      newUCA = NULL;
                  }
                  umtx_unlock(NULL);
-                
+
                  if(newUCA != NULL) {
                      udata_close(result);
                      uprv_free(newUCA);
                  }
                  else {
-                    ucln_i18n_registerCleanup();
+                    ucln_i18n_registerCleanup(UCLN_I18N_UCOL, ucol_cleanup);
                  }
                  // Initalize variables for implicit generation
-                UCAconsts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
-                uprv_uca_initImplicitConstants(UCAconsts->UCA_PRIMARY_IMPLICIT_MIN);
-                UCA->mapping->getFoldingOffset = _getFoldingOffset;
+                const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)_staticUCA->image + _staticUCA->image->UCAConsts);
+                uprv_uca_initImplicitConstants(UCAconsts->UCA_PRIMARY_IMPLICIT_MIN, UCAconsts->UCA_PRIMARY_IMPLICIT_MAX, status);
+                _staticUCA->mapping->getFoldingOffset = _getFoldingOffset;
              }else{
                  udata_close(result);
                  uprv_free(newUCA);
-                UCA= NULL;
+                _staticUCA= NULL;
              }
          }
      }
-    return UCA;
+    return _staticUCA;
  }
  
  
@@ -1191,7 +1460,7 @@ inline void normalizeIterator(collIterate *collationSource) {
    UBool wasNormalized = FALSE;
    //int32_t iterIndex = collationSource->iterator->getIndex(collationSource->iterator, UITER_CURRENT);
    uint32_t iterIndex = collationSource->iterator->getState(collationSource->iterator);
-  int32_t normLen = unorm_next(collationSource->iterator, collationSource->writableBuffer, 
+  int32_t normLen = unorm_next(collationSource->iterator, collationSource->writableBuffer,
      (int32_t)collationSource->writableBufSize, UNORM_FCD, 0, TRUE, &wasNormalized, &status);
    if(status == U_BUFFER_OVERFLOW_ERROR || normLen == (int32_t)collationSource->writableBufSize) {
      // reallocate and terminate
@@ -1208,11 +1477,11 @@ inline void normalizeIterator(collIterate *collationSource) {
      status = U_ZERO_ERROR;
      //collationSource->iterator->move(collationSource->iterator, iterIndex, UITER_ZERO);
      collationSource->iterator->setState(collationSource->iterator, iterIndex, &status);
-    normLen = unorm_next(collationSource->iterator, collationSource->writableBuffer, 
+    normLen = unorm_next(collationSource->iterator, collationSource->writableBuffer,
      (int32_t)collationSource->writableBufSize, UNORM_FCD, 0, TRUE, &wasNormalized, &status);
    }
    // Terminate the buffer - we already checked that it is big enough
-  collationSource->writableBuffer[normLen] = 0; 
+  collationSource->writableBuffer[normLen] = 0;
    if(collationSource->writableBuffer != collationSource->stackWritableBuffer) {
        collationSource->flags |= UCOL_ITER_ALLOCATED;
    }
@@ -1462,12 +1731,12 @@ inline uint32_t ucol_IGetNextCE(const UCollator *coll, collIterate *collationSou
            if(order > UCOL_NOT_FOUND) {                                       /* if a CE is special                */
                order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status);    /* and try to get the special CE     */
            }
-          if(order == UCOL_NOT_FOUND) {   /* We couldn't find a good CE in the tailoring */
+          if(order == UCOL_NOT_FOUND && coll->UCA) {   /* We couldn't find a good CE in the tailoring */
              /* if we got here, the codepoint MUST be over 0xFF - so we look directly in the trie */
-            order = UTRIE_GET32_FROM_LEAD(UCA->mapping, ch);
+            order = UTRIE_GET32_FROM_LEAD(coll->UCA->mapping, ch);
  
              if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */
-              order = ucol_prv_getSpecialCE(UCA, ch, order, collationSource, status);
+              order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSource, status);
              }
            }
        }
@@ -1478,7 +1747,7 @@ inline uint32_t ucol_IGetNextCE(const UCollator *coll, collIterate *collationSou
  U_CAPI uint32_t  U_EXPORT2
  ucol_getNextCE(const UCollator *coll, collIterate *collationSource, UErrorCode *status) {
      return ucol_IGetNextCE(coll, collationSource, status);
-    }
+}
  
  
  /**
@@ -1627,7 +1896,7 @@ inline UBool collPrevIterFCD(collIterate *data)
   *  Handles both normal and iterative cases.
   *  No error checking - caller beware!
   */
-inline static 
+inline static
  UChar peekCharacter(collIterate *source, int32_t offset) {
    if(source->pos != NULL) {
      return *(source->pos + offset);
@@ -1662,6 +1931,26 @@ inline UBool isAtStartPrevIterate(collIterate *data) {
              *(data->pos - 1) == 0 && data->fcdPosition == NULL);
  }
  
+static
+inline void goBackOne(collIterate *data) {
+# if 0
+  // somehow, it looks like we need to keep iterator synced up
+  // at all times, as above.
+  if(data->pos) {
+    data->pos--;
+  }
+  if(data->iterator) {
+    data->iterator->previous(data->iterator);
+  }
+#endif
+  if(data->iterator && (data->flags & UCOL_USE_ITERATOR)) {
+    data->iterator->previous(data->iterator);
+  }
+  if(data->pos) {
+    data->pos --;
+  }
+}
+
  /**
  * Inline function that gets a simple CE.
  * So what it does is that it will first check the expansion buffer. If the
@@ -1677,7 +1966,7 @@ static
  inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data,
                                 UErrorCode *status)
  {
-    uint32_t result = UCOL_NULLORDER;
+    uint32_t result = (uint32_t)UCOL_NULLORDER;
      if (data->toReturn > data->CEs) {
          data->toReturn --;
          result = *(data->toReturn);
@@ -1744,19 +2033,19 @@ inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data,
                  data->flags &= ~UCOL_WAS_HIRAGANA;
                }
              }
-            
+
              /*
-            * got a character to determine if there's fcd and/or normalization 
+            * got a character to determine if there's fcd and/or normalization
              * stuff to do.
              * if the current character is not fcd.
              * if current character is at the start of the string
              * Trailing combining class == 0.
              * Note if pos is in the writablebuffer, norm is always 0
              */
-            if (ch < ZERO_CC_LIMIT_ || 
+            if (ch < ZERO_CC_LIMIT_ ||
                // this should propel us out of the loop in the iterator case
                  (data->flags & UCOL_ITER_NORM) == 0 ||
-                (data->fcdPosition != NULL && data->fcdPosition <= data->pos) 
+                (data->fcdPosition != NULL && data->fcdPosition <= data->pos)
                  || data->string == data->pos) {
                  break;
              }
@@ -1797,29 +2086,54 @@ inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data,
              result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status);
          }
          else {
-            if (ch <= 0xFF) {
-              result = coll->latinOneMapping[ch];
-              if (result > UCOL_NOT_FOUND) {
-                    result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
-              }
-            }
-            else {
                // TODO: fix me for THAI - I reference *(data->pos-1)
                  if ((data->flags & UCOL_ITER_INNORMBUF) == 0 &&
                      /*UCOL_ISTHAIBASECONSONANT(ch) &&*/   // This is from the old specs - we now rearrange unconditionally
-                    data->pos > data->string &&
+                    // makes sure that we're not at the beggining of the string
+                    //data->pos > data->string &&
+                    !collIter_bos(data) &&
                      UCOL_ISTHAIPREVOWEL(peekCharacter(data, -1)))
                      //UCOL_ISTHAIPREVOWEL(*(data->pos -1)))
                  {
-                    result = UCOL_THAI;
+                    collIterateState entryState;
+                    backupState(data, &entryState);
+                    // we have to check if the previous character is also Thai
+                    // if not, we can just set the result
+                    goBackOne(data);
+                    if(collIter_bos(data) || !UCOL_ISTHAIPREVOWEL(peekCharacter(data, -1))) {
+                      loadState(data, &entryState, FALSE);
+                      result = UCOL_THAI;
+                    } else { // previous is also reordered
+                      // we need to go back as long as they are being reordered
+                      // count over the range of reorderable characters and see
+                      // if there is an even or odd number of them
+                      // if even, we should not reorder. If odd we should reorder.
+                      int32_t noReordered = 1; // the one we already detected
+                      while(!collIter_bos(data) && UCOL_ISTHAIPREVOWEL(peekCharacter(data, -1))) {
+                        noReordered++;
+                        goBackOne(data);
+                      }
+                      if(noReordered & 1) { // odd number of reorderables
+                        result = UCOL_THAI;
+                      } else {
+                        result = UTRIE_GET32_FROM_LEAD(coll->mapping, ch);
+                      }
+                      loadState(data, &entryState, FALSE);
+                    }
                  }
+            else if (ch <= 0xFF) {
+              result = coll->latinOneMapping[ch];
+              //if (result > UCOL_NOT_FOUND) {
+                //result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
+              //}
+            }
                  else {
                      /*result = ucmpe32_get(coll->mapping, ch);*/
                      result = UTRIE_GET32_FROM_LEAD(coll->mapping, ch);
                  }
-                if (result > UCOL_NOT_FOUND) {
-                    result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
-                }
+                    if (result > UCOL_NOT_FOUND) {
+                        result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
+                    }
                  if (result == UCOL_NOT_FOUND) {
                    if (!isAtStartPrevIterate(data) &&
                        ucol_contractionEndCP(ch, data->coll)) {
@@ -1827,16 +2141,17 @@ inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data,
                    }
                    else {
                          /*result = ucmpe32_get(UCA->mapping, ch);*/
-                        result = UTRIE_GET32_FROM_LEAD(UCA->mapping, ch);
+                      if(coll->UCA) {
+                        result = UTRIE_GET32_FROM_LEAD(coll->UCA->mapping, ch);
+                      }
                    }
  
-                  if (result > UCOL_NOT_FOUND) {
-                    result = ucol_prv_getSpecialPrevCE(UCA, ch, result, data, status);
+                  if (result > UCOL_NOT_FOUND && coll->UCA) {
+                    result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status);
                    }
                  }
              }
          }
-    }
      return result;
  }
  
@@ -2054,10 +2369,10 @@ inline UChar getNextNormalizedChar(collIterate *data)
      }
      else {
          if (innormbuf) {
-          // inside the normalization buffer, but at the end 
-          // (since we encountered zero). This means, in the 
-          // case we're using char iterator, that we need to 
-          // do another round of normalization. 
+          // inside the normalization buffer, but at the end
+          // (since we encountered zero). This means, in the
+          // case we're using char iterator, that we need to
+          // do another round of normalization.
            //if(data->origFlags & UCOL_USE_ITERATOR) {
              // we need to restore original flags,
              // otherwise, we'll lose them
@@ -2136,25 +2451,6 @@ inline UChar getNextNormalizedChar(collIterate *data)
      return ch;
  }
  
-static 
-inline void goBackOne(collIterate *data) {
-# if 0
-  // somehow, it looks like we need to keep iterator synced up
-  // at all times, as above.
-  if(data->pos) {
-    data->pos--;
-  }
-  if(data->iterator) {
-    data->iterator->previous(data->iterator);
-  }
-#endif
-  if(data->iterator && (data->flags & UCOL_USE_ITERATOR)) {
-    data->iterator->previous(data->iterator);
-  } 
-  if(data->pos) {
-    data->pos --;
-  }
-}
  
  
  /**
@@ -2334,7 +2630,7 @@ inline uint32_t getImplicit(UChar32 cp, collIterate *collationSource) {
    if(isNonChar(cp)) {
      return 0;
    }
-  uint32_t r = getImplicitPrimary(cp);
+  uint32_t r = uprv_uca_getImplicitPrimary(cp);
    *(collationSource->CEpos++) = ((r & 0x0000FFFF)<<16) | 0x000000C0;
    return (r & UCOL_PRIMARYMASK) | 0x00000505; // This was 'order'
  }
@@ -2584,7 +2880,7 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
          collIterateState state;
          backupState(source, &state);
          if (collIter_eos(source) || !(UTF16_IS_TRAIL((trail = getNextNormalizedChar(source))))) {
-          // we chould have stepped one char forward and it might have turned that it 
+          // we chould have stepped one char forward and it might have turned that it
            // was not a trail surrogate. In that case, we have to backup.
            loadState(source, &state, TRUE);
            return 0;
@@ -2595,7 +2891,7 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
              // We need to backup
              loadState(source, &state, TRUE);
              return CE;
-          } 
+          }
            // calculate the supplementary code point value, if surrogate was not tailored
            cp = ((((uint32_t)ch)<<10UL)+(trail)-(((uint32_t)0xd800<<10UL)+0xdc00-0x10000));
          }
@@ -2604,9 +2900,7 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
      case THAI_TAG:
        /* Thai/Lao reordering */
          if  (((source->flags) & UCOL_ITER_INNORMBUF)      /* Already Swapped     ||                 */
-            || (source->iterator && !source->iterator->hasNext(source->iterator))
-            || (source->pos && source->endp == source->pos)                /* At end of string.  No swap possible || */
-            /*|| UCOL_ISTHAIBASECONSONANT(*(source->pos)) == 0*/)  /* next char not Thai base cons.*/ // This is from the old specs - we now rearrange unconditionally
+          || collIter_eos(source))                        /* At end of string.  No swap possible    */
          {
              // Treat Thai as a length one expansion */
              CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
@@ -2614,33 +2908,88 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
          }
          else
          {
-            // Move the prevowel and the following base Consonant into the normalization buffer
-            //   with their order swapped
-
-            source->writableBuffer[0] = peekCharacter(source, 0);
-            source->writableBuffer[1] = peekCharacter(source, -1);
-            source->writableBuffer[2] = 0;
-
+          // Move the prevowel and the following base Consonant into the normalization buffer
+          //   with their order swapped
+          // Note: this operation might activate the normalization buffer. We have to check for
+          // that and act accordingly.
+          UChar thCh = getNextNormalizedChar(source);
+          UChar32 cp = 0;
+          if(U16_IS_LEAD(thCh)) {
+            if(!collIter_eos(source)) {
+              collIterateState thaiState;
+              backupState(source, &thaiState);
+              UChar trailCh = getNextNormalizedChar(source);
+              if(U16_IS_TRAIL(trailCh)) {
+                cp = U16_GET_SUPPLEMENTARY(thCh, trailCh);
+              } else {
+                loadState(source, &thaiState, TRUE);
+                cp = (UChar32)thCh;
+              }
+            } else {
+              cp = (UChar32)thCh;
+            }
+          } else {
+              cp = (UChar32)thCh;
+          }
+          // Now we have the character that needs to be decomposed
+          // if the normalizing buffer was not used, we can just use our structure and be happy.
+          if((source->flags & UCOL_ITER_INNORMBUF) == 0) {
+            // decompose into writable buffer
+            int32_t decompLen = unorm_getDecomposition(cp, FALSE, &(source->writableBuffer[1]), UCOL_WRITABLE_BUFFER_SIZE-1);
+            if(decompLen < 0) {
+              decompLen = -decompLen;
+            }
+            // reorder Thai and the character after it
+            if(decompLen >= 2 && U16_IS_LEAD(source->writableBuffer[1]) && U16_IS_TRAIL(source->writableBuffer[2])) {
+              source->writableBuffer[0] = source->writableBuffer[1];
+              source->writableBuffer[1] = source->writableBuffer[2];
+              source->writableBuffer[2] = ch;
+            } else {
+              source->writableBuffer[0] = source->writableBuffer[1];
+              source->writableBuffer[1] = ch;
+            }
+            // zero terminate, since normalization buffer is always zero terminated
+            source->writableBuffer[decompLen+1] = 0; // we added the prevowel
              if(source->pos) {
-              source->fcdPosition       = source->pos+1;   // Indicate where to continue in main input string
+              source->fcdPosition       = source->pos;   // Indicate where to continue in main input string
                                                             //   after exhausting the writableBuffer
-            } else if(source->iterator) {
-              source->iterator->next(source->iterator);
              }
-        source->pos   = source->writableBuffer;
+            source->pos   = source->writableBuffer;
              source->origFlags         = source->flags;
              source->flags            |= UCOL_ITER_INNORMBUF;
              source->flags            &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR);
-
-        CE = UCOL_IGNORABLE;
+          }
+          else {
+              // stuff is already normalized... what to do here???
+
+              // if we are in the normalization buffer, thCh must be in it
+              // prove by contradiction
+              // if thCh is not in the normalization buffer,
+              // that means that trailCh is the normalization buffer
+              // that means that trailCh is a trail surrogate by the above
+              // bounding if block, this is a contradiction because there
+              // are no characters at the moment that decomposes to an
+              // unmatched surrogate. qed.
+              if (cp >= 0x10000) {
+                  source->writableBuffer[0] = source->writableBuffer[1];
+                  source->writableBuffer[1] = source->writableBuffer[2];
+                  source->writableBuffer[2] = ch;
+              }
+              else {
+                  source->writableBuffer[0] = source->writableBuffer[1];
+                  source->writableBuffer[1] = ch;
+              }
+              source->pos = source->writableBuffer;
+          }
+          CE = UCOL_IGNORABLE;
        }
        break;
      case SPEC_PROC_TAG:
        {
          // Special processing is getting a CE that is preceded by a certain prefix
          // Currently this is only needed for optimizing Japanese length and iteration marks.
-        // When we encouter a special processing tag, we go backwards and try to see if 
-        // we have a match. 
+        // When we encouter a special processing tag, we go backwards and try to see if
+        // we have a match.
          // Contraction tables are used - so the whole process is not unlike contraction.
          // prefix data is stored backwards in the table.
          const UChar *UCharOffset;
@@ -2653,9 +3002,9 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
  
          for(;;) {
          // This loop will run once per source string character, for as long as we
-        //  are matching a potential contraction sequence                  
+        //  are matching a potential contraction sequence
  
-          // First we position ourselves at the begining of contraction sequence 
+          // First we position ourselves at the begining of contraction sequence
            const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE);
            if (collIter_bos(source)) {
              CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
@@ -2676,7 +3025,7 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
            }
            else
            {
-              // if there is a completely ignorable code point in the middle of 
+              // if there is a completely ignorable code point in the middle of
                // a prefix, we need to act as if it's not there
                // assumption: 'real' noncharacters (*fffe, *ffff, fdd0-fdef are set to zero)
                // lone surrogates cannot be set to zero as it would break other processing
@@ -2759,10 +3108,10 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
              // So we'll pick whatever we have at the point...
              if (CE == UCOL_NOT_FOUND) {
                  // back up the source over all the chars we scanned going into this contraction.
-                CE = firstCE;  
+                CE = firstCE;
                  loadState(source, &state, TRUE);
                  if(source->origFlags & UCOL_USE_ITERATOR) {
-                  source->flags = source->origFlags;
+                    source->flags = source->origFlags;
                  }
              }
              break;
@@ -2784,12 +3133,12 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
          }
          else
          {
-            // if there is a completely ignorable code point in the middle of 
+            // if there is a completely ignorable code point in the middle of
              // contraction, we need to act as if it's not there
              uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(coll->mapping, schar);
              // it's easy for BMP code points
              if(isZeroCE == 0) {
-              continue;
+                continue;
              } else if(UTF_IS_LEAD(schar)) {
                if(!collIter_eos(source)) {
                  backupState(source, &state);
@@ -2807,9 +3156,9 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
                    continue;
                  }
                  loadState(source, &state, TRUE);
-              } else { // no  more characters, so broken surrogate pair... 
+              } else { // no  more characters, so broken surrogate pair...
                  // this contraction will ultimately fail, but not because of us
-                continue; 
+                continue;
                }
              } // else if(UTF_IS_LEAD(schar))
  
@@ -2817,14 +3166,14 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
              //   Unless we have a discontiguous contraction, we have finished
              //   with this contraction.
              uint8_t sCC;
-            if (schar < 0x300 ||   
+            if (schar < 0x300 ||
                  maxCC == 0 ||
                  (sCC = i_getCombiningClass(schar, coll)) == 0 ||
-                sCC>maxCC || 
+                sCC>maxCC ||
                  (allSame != 0 && sCC == maxCC) ||
                  collIter_eos(source)) {
-                    //  Contraction can not be discontiguous.  
-                    goBackOne(source);  // back up the source string by one, 
+                    //  Contraction can not be discontiguous.
+                    goBackOne(source);  // back up the source string by one,
                                          //  because  the character we just looked at was
                                          //  not part of the contraction.   */
                      CE = *(coll->contractionCEs +
@@ -2856,12 +3205,9 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
              /*   scanned through what ultimately proved to not be a contraction.       */
            loadState(source, &state, TRUE);
            CE = firstCE;
-          if(source->origFlags & UCOL_USE_ITERATOR) {
-            source->flags = source->origFlags;
-          }
            break;
          }
-        
+
          if(!isContraction(CE)) {
              // The source string char was in the contraction table, and the corresponding
              //   CE is not a contraction CE.  We completed the contraction, break
@@ -2870,7 +3216,7 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
              //   the contraction.
              break;
          }
-        
+
  
          // The source string char was in the contraction table, and the corresponding
          //   CE is IS  a contraction CE.  We will continue looping to check the source
@@ -2878,7 +3224,7 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
          uint32_t tempCE = *(coll->contractionCEs + (ContractionStart - coll->contractionIndex));
          if(tempCE != UCOL_NOT_FOUND) {
              // We have scanned a a section of source string for which there is a
-            //  CE from the contraction table.  Remember the CE and scan position, so 
+            //  CE from the contraction table.  Remember the CE and scan position, so
              //  that we can return to this point if further scanning fails to
              //  match a longer contraction sequence.
              firstCE = tempCE;
@@ -2895,9 +3241,9 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
              //loadState(source, &tempState, TRUE);
  
              // The problem is that for incomplete contractions we have to remember the previous
-            // position. Before, the only thing I needed to do was state.pos--; 
+            // position. Before, the only thing I needed to do was state.pos--;
              // After iterator introduction and especially after introduction of normalizing
-            // iterators, it became much more difficult to decrease the saved state. 
+            // iterators, it became much more difficult to decrease the saved state.
              // I'm not yet sure which of the two methods above is faster.
          }
        } // for(;;)
@@ -2923,209 +3269,237 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
          for(i = 1; i<size; i++) {
            *(source->CEpos++) = *CEOffset++;
          }
-      } else { /* else, we do */
-        while(*CEOffset != 0) {
-          *(source->CEpos++) = *CEOffset++;
+      } else { /* else, we do */
+        while(*CEOffset != 0) {
+          *(source->CEpos++) = *CEOffset++;
+        }
+      }
+      return CE;
+      }
+    case DIGIT_TAG:
+      {
+      /*
+         We do a check to see if we want to collate digits as numbers; if so we generate
+         a custom collation key. Otherwise we pull out the value stored in the expansion table.
+      */
+      uint32_t size;
+      uint32_t i;    /* general counter */
+      collIterateState digitState;
+
+      if (source->coll->numericCollation == UCOL_ON){
+        UChar32 char32 = 0;
+
+        uint32_t digIndx = 0;
+        uint32_t endIndex = 0;
+        uint32_t trailingZeroIndex = 0;
+
+        uint32_t primWeight = 0;
+
+        int32_t digVal = 0;
+        uint8_t collateVal = 0;
+
+        UBool nonZeroValReached = FALSE;
+
+        uint8_t *numTempBuf;
+        uint8_t stackNumTempBuf[UCOL_MAX_BUFFER]; // I just need a temporary place to store my generated CEs.
+        uint32_t numTempBufSize = UCOL_MAX_BUFFER;
+
+        numTempBuf = stackNumTempBuf;
+        /*
+             We parse the source string until we hit a char that's NOT a digit.
+            Use this u_charDigitValue. This might be slow because we have to
+            handle surrogates...
+        */
+/*
+        if (U16_IS_LEAD(ch)){
+          if (!collIter_eos(source)) {
+            backupState(source, &digitState);
+            UChar trail = getNextNormalizedChar(source);
+            if(U16_IS_TRAIL(trail)) {
+              char32 = U16_GET_SUPPLEMENTARY(ch, trail);
+            } else {
+              loadState(source, &digitState, TRUE);
+              char32 = ch;
+            }
+          } else {
+            char32 = ch;
+          }
+        } else {
+          char32 = ch;
+        }
+        digVal = u_charDigitValue(char32);
+*/
+        digVal = u_charDigitValue(cp); // if we have arrived here, we have
+        // already processed possible supplementaries that trigered the digit tag -
+        // all supplementaries are marked in the UCA.
+        /*
+            We  pad a zero in front of the first element anyways. This takes
+            care of the (probably) most common case where people are sorting things followed
+            by a single digit
+        */
+        digIndx++;
+        for(;;){
+        // Make sure we have enough space.
+        if (digIndx >= ((numTempBufSize - 2) * 2) + 1)
+        {
+            numTempBufSize *= 2;
+            if (numTempBuf == stackNumTempBuf){
+                numTempBuf = (uint8_t *)uprv_malloc(sizeof(uint8_t) * numTempBufSize);
+                uprv_memcpy(numTempBuf, stackNumTempBuf, UCOL_MAX_BUFFER);
+            }else
+                uprv_realloc(numTempBuf, numTempBufSize);
+        }
+
+            // Skipping over leading zeroes.
+            if (digVal != 0 || nonZeroValReached){
+                if (digVal != 0 && !nonZeroValReached)
+                    nonZeroValReached = TRUE;
+
+                /*
+                    We parse the digit string into base 100 numbers (this fits into a byte).
+                    We only add to the buffer in twos, thus if we are parsing an odd character,
+                    that serves as the 'tens' digit while the if we are parsing an even one, that
+                    is the 'ones' digit. We dumped the parsed base 100 value (collateVal) into
+                    a buffer. We multiply each collateVal by 2 (to give us room) and add 5 (to avoid
+                    overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less
+                    than all the other bytes.
+                 */
+
+                if (digIndx % 2 == 1){
+                    collateVal += (uint8_t)digVal;
+
+                    // We don't enter the low-order-digit case unless we've already seen
+                    // the high order, or for the first digit, which is always non-zero.
+                    if (collateVal != 0)
+                        trailingZeroIndex = 0;
+
+                    numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
+                    collateVal = 0;
+                }
+                else{
+                    // We drop the collation value into the buffer so if we need to do
+                    // a "front patch" we don't have to check to see if we're hitting the
+                    // last element.
+                    collateVal = (uint8_t)(digVal * 10);
+
+                    // Check for trailing zeroes.
+                    if (collateVal == 0)
+                    {
+                        if (!trailingZeroIndex)
+                            trailingZeroIndex = (digIndx/2) + 2;
+                    }
+                    else
+                        trailingZeroIndex = 0;
+
+                    numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
+                }
+                digIndx++;
+            }
+
+            // Get next character.
+            if (!collIter_eos(source)){
+                ch = getNextNormalizedChar(source);
+                if (U16_IS_LEAD(ch)){
+                  if (!collIter_eos(source)) {
+                    backupState(source, &digitState);
+                    UChar trail = getNextNormalizedChar(source);
+                    if(U16_IS_TRAIL(trail)) {
+                      char32 = U16_GET_SUPPLEMENTARY(ch, trail);
+                    } else {
+                      loadState(source, &digitState, TRUE);
+                      char32 = ch;
+                    }
+                  }
+                } else {
+                  char32 = ch;
+                }
+
+                if ((digVal = u_charDigitValue(char32)) == -1){
+                    // Resetting position to point to the next unprocessed char. We
+                    // overshot it when doing our test/set for numbers.
+                  if (char32 > 0xFFFF) { // For surrogates.
+                    loadState(source, &digitState, TRUE);
+                    //goBackOne(source);
+                  }
+              goBackOne(source);
+                  break;
+                }
+            } else {
+              break;
+            }
+        }
+
+        if (nonZeroValReached == FALSE){
+            digIndx = 2;
+            numTempBuf[2] = 6;
          }
+
+        endIndex = trailingZeroIndex ? trailingZeroIndex : ((digIndx/2) + 2) ;
+        if (digIndx % 2 != 0){
+            /*
+                We missed a value. Since digIndx isn't even, stuck too many values into the buffer (this is what
+                we get for padding the first byte with a zero). "Front-patch" now by pushing all nybbles forward.
+                Doing it this way ensures that at least 50% of the time (statistically speaking) we'll only be doing a
+                single pass and optimizes for strings with single digits. I'm just assuming that's the more common case.
+            */
+
+            for(i = 2; i < endIndex; i++){
+                numTempBuf[i] =     (((((numTempBuf[i] - 6)/2) % 10) * 10) +
+                                    (((numTempBuf[i+1])-6)/2) / 10) * 2 + 6;
+            }
+            --digIndx;
+        }
+
+        // Subtract one off of the last byte.
+        numTempBuf[endIndex-1] -= 1;
+
+        /*
+            We want to skip over the first two slots in the buffer. The first slot
+            is reserved for the header byte UCOL_CODAN_PLACEHOLDER. The second slot is for the
+            sign/exponent byte: 0x80 + (decimalPos/2) & 7f.
+        */
+        numTempBuf[0] = UCOL_CODAN_PLACEHOLDER;
+        numTempBuf[1] = (uint8_t)(0x80 + ((digIndx/2) & 0x7F));
+
+        // Now transfer the collation key to our collIterate struct.
+        // The total size for our collation key is endIndx bumped up to the next largest even value divided by two.
+          size = ((endIndex+1) & ~1)/2;
+          CE = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) | //Primary weight
+                (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Secondary weight
+                UCOL_BYTE_COMMON; // Tertiary weight.
+          i = 2; // Reset the index into the buffer.
+          while(i < endIndex)
+          {
+            primWeight = numTempBuf[i++] << 8;
+            if ( i < endIndex)
+                primWeight |= numTempBuf[i++];
+            *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) | UCOL_CONTINUATION_MARKER;
+          }
+
+          if (numTempBuf != stackNumTempBuf)
+            uprv_free(numTempBuf);
+      } else {
+        // no numeric mode, we'll just switch to whatever we stashed and continue
+          CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
+          CE = *CEOffset++;
+          break;
+#if 0
+          CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
+          size = getExpansionCount(CE);
+          CE = *CEOffset++;
+          if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */
+            for(i = 1; i<size; i++) {
+              *(source->CEpos++) = *CEOffset++;
+            }
+          } else { /* else, we do */
+            while(*CEOffset != 0) {
+              *(source->CEpos++) = *CEOffset++;
+            }
+          }
+#endif
        }
        return CE;
        }
-    case DIGIT_TAG:      
-      {
-      /* 
-        We do a check to see if we want to collate digits as numbers; if so we generate
-         a custom collation key. Otherwise we pull out the value stored in the expansion table.
-      */
-      uint32_t size;
-      uint32_t i;    /* general counter */
-      
-      if (coll->numericCollation == UCOL_ON){
-               UChar32 char32 = 0;             
-               
-               uint32_t digIndx = 0; 
-               uint32_t endIndex = 0;          
-               uint32_t trailingZeroIndex = 0;
-
-               uint32_t primWeight = 0;
-               
-               uint32_t digVal = 0;            
-               uint8_t collateVal = 0;
-               
-               UBool nonZeroValReached = false;
-
-               uint8_t *numTempBuf;
-               uint8_t stackNumTempBuf[UCOL_MAX_BUFFER]; // I just need a temporary place to store my generated CEs.
-               uint32_t numTempBufSize = UCOL_MAX_BUFFER;
-               
-               numTempBuf = stackNumTempBuf;
-               /*
-                        We parse the source string until we hit a char that's NOT a digit.
-               Use this u_charDigitValue. This might be slow because we have to 
-               handle surrogates...
-       */
-       
-       if (U16_IS_LEAD(ch)){
-               if (!collIter_eos(source))
-                               char32 = U16_GET_SUPPLEMENTARY(ch, getNextNormalizedChar(source));
-                       else
-                               char32 = ch;
-       }
-               else
-                       char32 = ch;
-               digVal = u_charDigitValue(char32);
-               
-               /* 
-                       We  pad a zero in front of the first element anyways. This takes
-                       care of the (probably) most common case where people are sorting things followed
-                       by a single digit
-               */
-               digIndx++;
-       for(;;){
-       // Make sure we have enough space.
-       if (digIndx >= ((numTempBufSize - 2) * 2) + 1)
-       {
-               numTempBufSize *= 2;
-               if (numTempBuf == stackNumTempBuf){
-                       numTempBuf = (uint8_t *)malloc(sizeof(uint8_t) * numTempBufSize);
-                       memcpy(numTempBuf, stackNumTempBuf, UCOL_MAX_BUFFER);
-               }else
-                       realloc(numTempBuf, numTempBufSize);
-       }
-       
-                       // Skipping over leading zeroes.        
-               if (digVal != 0 || nonZeroValReached){
-                               if (digVal != 0 && !nonZeroValReached)
-                                       nonZeroValReached = true;
-                               
-                               /*
-                                       We parse the digit string into base 100 numbers (this fits into a byte).
-                                       We only add to the buffer in twos, thus if we are parsing an odd character,
-                                       that serves as the 'tens' digit while the if we are parsing an even one, that 
-                                       is the 'ones' digit. We dumped the parsed base 100 value (collateVal) into 
-                                       a buffer. We multiply each collateVal by 2 (to give us room) and add 5 (to avoid
-                                       overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less
-                                       than all the other bytes. 
-                                */
-
-                               if (digIndx % 2 == 1){
-                                       collateVal += (uint8_t)digVal;  
-                                        
-                                       // We don't enter the low-order-digit case unless we've already seen
-                                       // the high order, or for the first digit, which is always non-zero.
-                                       if (collateVal != 0)
-                                               trailingZeroIndex = 0;
-                                               
-                                       numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
-                                       collateVal = 0;
-                               }
-                               else{
-                                       // We drop the collation value into the buffer so if we need to do
-                                       // a "front patch" we don't have to check to see if we're hitting the
-                                       // last element.
-                                       collateVal = (uint8_t)(digVal * 10);
-
-                                       // Check for trailing zeroes.
-                                       if (collateVal == 0)
-                                       {
-                                               if (!trailingZeroIndex)
-                                                       trailingZeroIndex = (digIndx/2) + 2;
-                                       }
-                                       else
-                                               trailingZeroIndex = 0;
-
-                                       numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
-                               }
-                               digIndx++;
-               }
-               
-               // Get next character.
-               if (!collIter_eos(source)){
-                               ch = getNextNormalizedChar(source);
-                               if (U16_IS_LEAD(ch)){
-                                       if (!collIter_eos(source))
-                                               char32 = U16_GET_SUPPLEMENTARY(ch, getNextNormalizedChar(source));
-                               }
-                               else
-                                       char32 = ch;
-                                       
-                               if ((digVal = u_charDigitValue(char32)) == -1){
-                                       // Resetting position to point to the next unprocessed char. We
-                                       // overshot it when doing our test/set for numbers.
-                                       goBackOne(source);
-                                       if (char32 > 0xFFFF) // For surrogates.
-                                               goBackOne(source);
-                                       break;
-                               }
-                       }else
-                               break;
-               }
-               
-               if (nonZeroValReached == false){
-                       digIndx = 2;
-                       numTempBuf[2] = 6;
-               }
-               
-               endIndex = trailingZeroIndex ? trailingZeroIndex : ((digIndx/2) + 2) ;                          
-               if (digIndx % 2 != 0){
-                       /* 
-                               We missed a value. Since digIndx isn't even, stuck too many values into the buffer (this is what
-                               we get for padding the first byte with a zero). "Front-patch" now by pushing all nybbles forward. 
-                               Doing it this way ensures that at least 50% of the time (statistically speaking) we'll only be doing a
-                               single pass and optimizes for strings with single digits. I'm just assuming that's the more common case.
-                       */
-
-                       for(i = 2; i < endIndex; i++){
-                               numTempBuf[i] =         (((((numTempBuf[i] - 6)/2) % 10) * 10) + 
-                                                                       (((numTempBuf[i+1])-6)/2) / 10) * 2 + 6;
-                       }
-                       --digIndx;
-               }
-               
-               // Subtract one off of the last byte. 
-               numTempBuf[endIndex-1] -= 1;                    
-                               
-               /* 
-                       We want to skip over the first two slots in the buffer. The first slot
-                       is reserved for the header byte 0x1B. The second slot is for the 
-                       sign/exponent byte: 0x80 + (decimalPos/2) & 7f.
-               */ 
-               numTempBuf[0] = 0x1B;
-               numTempBuf[1] = (uint8_t)(0x80 + ((digIndx/2) & 0x7F));
-               
-               // Now transfer the collation key to our collIterate struct.
-               // The total size for our collation key is endIndx bumped up to the next largest even value divided by two.
-                 size = ((endIndex+1) & ~1)/2;
-                 CE = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) | //Primary weight
-                               (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Secondary weight
-                               UCOL_BYTE_COMMON; // Tertiary weight.
-                 i = 2; // Reset the index into the buffer.
-                 while(i < endIndex)
-                 {
-                       primWeight = numTempBuf[i++] << 8;
-                       if ( i < endIndex)
-                               primWeight |= numTempBuf[i++];
-                       *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) | UCOL_CONTINUATION_MARKER;
-                 }
-                 
-                 if (numTempBuf != stackNumTempBuf)
-                       free(numTempBuf);
-      }
-      else{
-                 CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
-                 size = getExpansionCount(CE);
-                 CE = *CEOffset++;
-                 if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */
-                       for(i = 1; i<size; i++) {
-                         *(source->CEpos++) = *CEOffset++;
-                       }
-                 } else { /* else, we do */
-                       while(*CEOffset != 0) {
-                         *(source->CEpos++) = *CEOffset++;
-                       }
-                 }
-         }
-      return CE;
-      }
      /* various implicits optimization */
      // TODO: remove CJK_IMPLICIT_TAG completely - handled by the getImplicit
      case CJK_IMPLICIT_TAG:    /* 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D*/
@@ -3160,7 +3534,7 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
          const uint32_t
            SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7;
          //const uint32_t LCount = 19;
-        const uint32_t VCount = 21; 
+        const uint32_t VCount = 21;
          const uint32_t TCount = 28;
          //const uint32_t NCount = VCount * TCount;   // 588
          //const uint32_t SCount = LCount * NCount;   // 11172
@@ -3196,12 +3570,12 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
            return UTRIE_GET32_FROM_LEAD(coll->mapping, L);
  
          } else { // Jamo is Special
-          // Since Hanguls pass the FCD check, it is 
+          // Since Hanguls pass the FCD check, it is
            // guaranteed that we won't be in
            // the normalization buffer if something like this happens
            // However, if we are using a uchar iterator and normalization
            // is ON, the Hangul that lead us here is going to be in that
-          // normalization buffer. Here we want to restore the uchar 
+          // normalization buffer. Here we want to restore the uchar
            // iterator state and pull out of the normalization buffer
            if(source->iterator != NULL && source->flags & UCOL_ITER_INNORMBUF) {
              source->flags = source->origFlags; // restore the iterator
@@ -3249,7 +3623,7 @@ inline uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource) {
      return 0;
    }
  
-  uint32_t r = getImplicitPrimary(cp);
+  uint32_t r = uprv_uca_getImplicitPrimary(cp);
  
    *(collationSource->CEpos++) = (r & UCOL_PRIMARYMASK) | 0x00000505;
    collationSource->toReturn = collationSource->CEpos;
@@ -3306,12 +3680,40 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
            Move the prevowel and the following base Consonant into the
            normalization buffer with their order swapped
            */
+          UChar32 cp = (UChar32)peekCharacter(source, 0);
+          UBool reorder = TRUE;
+
+          int32_t decompLen = unorm_getDecomposition(cp, FALSE, source->writableBuffer, UCOL_WRITABLE_BUFFER_SIZE-1);
+          if(decompLen < 0) {
+            decompLen = -decompLen; // there was no decomposition
+          } else { // we need to check if we will hit a contraction trigger because of decomposition
+            int32_t i = decompLen;
+            for(i = 0; i < decompLen; i++) {
+              if(ucol_contractionEndCP(source->writableBuffer[i], coll)) {
+                reorder = FALSE;
+              }
+            }
+          }
+
+          UChar *tempbuffer = source->writableBuffer +
+                              (source->writableBufSize - 1);
+          uprv_memcpy(tempbuffer-decompLen + 1, source->writableBuffer, sizeof(UChar)*decompLen);
+          if(reorder) {
+            *(tempbuffer - decompLen) = *(tempbuffer - decompLen + 1);
+            *(tempbuffer - decompLen + 1)     = peekCharacter(source, -1);
+          } else {
+            *(tempbuffer - decompLen) = peekCharacter(source, -1);
+          }
+          *(tempbuffer - decompLen - 1) = 0;
+
+
+/*
            UChar *tempbuffer = source->writableBuffer +
                                (source->writableBufSize - 1);
            *(tempbuffer - 2) = 0;
            *(tempbuffer - 1) = peekCharacter(source, 0);
            *(tempbuffer)     = peekCharacter(source, -1);
-
+*/
            /*
            Indicate where to continue in main input string after exhausting
            the writableBuffer
@@ -3322,7 +3724,7 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
              source->fcdPosition       = source->pos-2;
            }
  
-          source->pos               = tempbuffer;
+          source->pos               = tempbuffer+1; // we're doing predecrement, right?
            source->origFlags         = source->flags;
            source->flags            |= UCOL_ITER_INNORMBUF;
            source->flags            &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
@@ -3335,8 +3737,8 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
        {
          // Special processing is getting a CE that is preceded by a certain prefix
          // Currently this is only needed for optimizing Japanese length and iteration marks.
-        // When we encouter a special processing tag, we go backwards and try to see if 
-        // we have a match. 
+        // When we encouter a special processing tag, we go backwards and try to see if
+        // we have a match.
          // Contraction tables are used - so the whole process is not unlike contraction.
          // prefix data is stored backwards in the table.
          const UChar *UCharOffset;
@@ -3345,9 +3747,9 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
          backupState(source, &prefixState);
          for(;;) {
          // This loop will run once per source string character, for as long as we
-        //  are matching a potential contraction sequence                  
+        //  are matching a potential contraction sequence
  
-          // First we position ourselves at the begining of contraction sequence 
+          // First we position ourselves at the begining of contraction sequence
            const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE);
  
            if (collIter_bos(source)) {
@@ -3368,8 +3770,8 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
                    (UCharOffset - coll->contractionIndex));
            }
            else
-          {             
-              // if there is a completely ignorable code point in the middle of 
+          {
+              // if there is a completely ignorable code point in the middle of
                // a prefix, we need to act as if it's not there
                // assumption: 'real' noncharacters (*fffe, *ffff, fdd0-fdef are set to zero)
                // lone surrogates cannot be set to zero as it would break other processing
@@ -3445,7 +3847,9 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
          *(UCharOffset --) = 0;
          noChars = 0;
          // have to swap thai characters
-        while (ucol_unsafeCP(schar, coll) || UCOL_ISTHAIBASECONSONANT(schar)) {
+        while (ucol_unsafeCP(schar, coll) || UCOL_ISTHAIPREVOWEL(peekCharacter(source, -1))) {
+          // we might have ended here after trying to reorder Thai, but seeing that there are unsafe points
+          // in the backward processing
              *(UCharOffset) = schar;
              noChars++;
              UCharOffset --;
@@ -3467,11 +3871,11 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
                }
                  strbuffer = (UChar *)uprv_malloc(sizeof(UChar) *
                                               (newsize + UCOL_MAX_BUFFER));
-                               /* test for NULL */
-                               if (strbuffer == NULL) {
-                                       *status = U_MEMORY_ALLOCATION_ERROR;
-                                       return UCOL_NO_MORE_CES;
-                               }
+                /* test for NULL */
+                if (strbuffer == NULL) {
+                    *status = U_MEMORY_ALLOCATION_ERROR;
+                    return UCOL_NO_MORE_CES;
+                }
                  UCharOffset = strbuffer + newsize;
                  uprv_memcpy(UCharOffset, buffer,
                                               UCOL_MAX_BUFFER * sizeof(UChar));
@@ -3488,7 +3892,7 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
          *(UCharOffset) = schar;
          noChars++;
  
-        /* a new collIterate is used to simply things, since using the current
+        /* a new collIterate is used to simplify things, since using the current
          collIterate will mean that the forward and backwards iteration will
          share and change the same buffers. we don't want to get into that. */
          collIterate temp;
@@ -3510,7 +3914,7 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
                  if (strbuffer != buffer) {
                      uprv_free(strbuffer);
                  }
-                return UCOL_NULLORDER;
+                return (uint32_t)UCOL_NULLORDER;
              }
              CE = ucol_IGetNextCE(coll, &temp, status);
          }
@@ -3555,230 +3959,258 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
          }
        }
        source->toReturn = source->CEpos - 1;
-      // in case of one element expansion, we 
+      // in case of one element expansion, we
        // want to immediately return CEpos
        if(source->toReturn == source->CEs) {
          source->CEpos = source->CEs;
        }
        return *(source->toReturn);
-     case DIGIT_TAG:      
+     case DIGIT_TAG:
        {
-      /* 
-        We do a check to see if we want to collate digits as numbers; if so we generate
+      /*
+         We do a check to see if we want to collate digits as numbers; if so we generate
           a custom collation key. Otherwise we pull out the value stored in the expansion table.
        */
-      uint32_t size;
+      //uint32_t size;
        uint32_t i;    /* general counter */
-      
-      if (coll->numericCollation == UCOL_ON){
-               UChar32 char32 = 0;             
-               
-               uint32_t digIndx = 0; 
-               uint32_t endIndex = 0;          
-               uint32_t leadingZeroIndex = 0;
-               uint32_t trailingZeroCount = 0;
-
-               uint32_t primWeight = 0;
-               
-               uint32_t digVal = 0;            
-               uint8_t collateVal = 0;
-               
-               UBool nonZeroValReached = false;
-
-               uint8_t *numTempBuf;
-               uint8_t stackNumTempBuf[UCOL_MAX_BUFFER]; // I just need a temporary place to store my generated CEs.
-               uint32_t numTempBufSize = UCOL_MAX_BUFFER;
-               
-               numTempBuf = stackNumTempBuf;
-               /*
-                        We parse the source string until we hit a char that's NOT a digit.
-               Use this u_charDigitValue. This might be slow because we have to 
-               handle surrogates...
-       */
-       
-       if (U16_IS_TRAIL (ch)){
-               if (!collIter_bos(source)){
-                               char32 = U16_GET_SUPPLEMENTARY(getPrevNormalizedChar(source),ch);
-                               goBackOne(source);
-                       }       
-                       else
-                               char32 = ch;
-       }
-               else
-                       char32 = ch;
-               digVal = u_charDigitValue(char32);
-               
-       for(;;){
-                       // Make sure we have enough space.
-                       if (digIndx >= ((numTempBufSize - 2) * 2) + 1)
-                       {
-                               numTempBufSize *= 2;
-                               if (numTempBuf == stackNumTempBuf){
-                                       numTempBuf = (uint8_t *)malloc(sizeof(uint8_t) * numTempBufSize);
-                                       memcpy(numTempBuf, stackNumTempBuf, UCOL_MAX_BUFFER);
-                               }else
-                                       realloc(numTempBuf, numTempBufSize);
-                       }
-       
-                       // Skip over trailing zeroes, and keep a count of them.
-                       if (digVal != 0)
-                               nonZeroValReached = true;
-               if (nonZeroValReached){
-                               /*
-                                       We parse the digit string into base 100 numbers (this fits into a byte).
-                                       We only add to the buffer in twos, thus if we are parsing an odd character,
-                                       that serves as the 'tens' digit while the if we are parsing an even one, that 
-                                       is the 'ones' digit. We dumped the parsed base 100 value (collateVal) into 
-                                       a buffer. We multiply each collateVal by 2 (to give us room) and add 5 (to avoid
-                                       overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less
-                                       than all the other bytes. 
-                                       
-                                       Since we're doing in this reverse we want to put the first digit encountered into the
-                                       ones place and the second digit encountered into the tens place.
-                                */
-                               
-                               if ((digIndx + trailingZeroCount) % 2 == 1){
-                                       // High-order digit case (tens place)
-                                       collateVal += digVal * 10;
-                                       
-                                       // We cannot set leadingZeroIndex unless it has been set for the
-                                       // low-order digit. Therefore, all we can do for the high-order
-                                       // digit is turn it off, never on.
-                                       // The only time we will have a high digit without a low is for
-                                       // the very first non-zero digit, so no zero check is necessary.
-                                       if (collateVal != 0)
-                                               leadingZeroIndex = 0;
-                                                                                       
-                                       numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
-                                       collateVal = 0;
-                               }
-                               else{
-                                       // Low-order digit case (ones place)
-                                       collateVal = digVal;    
-
-                                       // Check for leading zeroes.
-                                       if (collateVal == 0)
-                                       {
-                                               if (!leadingZeroIndex)
-                                                       leadingZeroIndex = (digIndx/2) + 2;
-                                       }
-                                       else
-                                               leadingZeroIndex = 0;
-                                       
-                                       // No need to write to buffer; the case of a last odd digit
-                                       // is handled below.
-                               }
-                       ++digIndx;
-               }
-               else
-                       ++trailingZeroCount;
-               
-               if (!collIter_bos(source)){
-                               ch = getPrevNormalizedChar(source);
-                               goBackOne(source);
-                               if (U16_IS_TRAIL(ch)){
-                                       if (!collIter_bos(source))
-                                       {
-                                               char32 = U16_GET_SUPPLEMENTARY(getPrevNormalizedChar(source),ch);
-                                               goBackOne(source);
-                                       }
-                               }
-                               else
-                                       char32 = ch;
-                                       
-                               if ((digVal = u_charDigitValue(char32)) == -1){
-                                       // Don't need to "reverse" the goBackOne call,
-                                       // as this points to the next position to process..
-                                       if (char32 > 0xFFFF) // For surrogates.
-                                               getNextNormalizedChar(source);
-                                       break;
-                               }
-                       }else
-                               break;
-               }
-
-               if (nonZeroValReached == false){
-                       digIndx = 2;
-                       trailingZeroCount = 0;
-                       numTempBuf[2] = 6;
-               }
-
-               if ((digIndx + trailingZeroCount) % 2 != 0){
-                               numTempBuf[((digIndx)/2) + 2] = collateVal*2 + 6;
-                               digIndx += 1;
-               }
-               
-               endIndex = leadingZeroIndex ? leadingZeroIndex : ((digIndx/2) + 2) ;                            
-               
-               // Subtract one off of the last byte. Really the first byte here, but it's reversed...
-               numTempBuf[2] -= 1;                     
-                               
-               /* 
-                       We want to skip over the first two slots in the buffer. The first slot
-                       is reserved for the header byte 0x1B. The second slot is for the 
-                       sign/exponent byte: 0x80 + (decimalPos/2) & 7f.
-                       The exponent must be adjusted by the number of leading zeroes, and the number of
-                       trailing zeroes.
-               */ 
-               numTempBuf[0] = 0x1B;
-               uint32_t exponent = (digIndx+trailingZeroCount)/2;
-               if (leadingZeroIndex)
-                       exponent -= ((digIndx/2) + 2 - leadingZeroIndex);
-               numTempBuf[1] = 0x80 + (exponent & 0x7F);
-               
-               // Now transfer the collation key to our collIterate struct.
-               // The total size for our collation key is endIndx bumped up to the next largest even value divided by two.
-               //size = ((endIndex+1) & ~1)/2;
-                 *(source->CEpos++) = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) | //Primary weight
-                               (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Secondary weight
-                               UCOL_BYTE_COMMON; // Tertiary weight.
-                 i = endIndex - 1; // Reset the index into the buffer.
-                 while(i >= 2)
-                 {
-                       primWeight = numTempBuf[i--] << 8;
-                       if ( i >= 2)
-                               primWeight |= numTempBuf[i--];
-                       *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) | UCOL_CONTINUATION_MARKER;
-                 }
-                 if (numTempBuf != stackNumTempBuf)
-                       free(numTempBuf);
-                       
-                 source->toReturn = source->CEpos -1;
-                 return *(source->toReturn);
+      collIterateState state;
+
+      if (source->coll->numericCollation == UCOL_ON){
+        UChar32 char32 = 0;
+
+        uint32_t digIndx = 0;
+        uint32_t endIndex = 0;
+        uint32_t leadingZeroIndex = 0;
+        uint32_t trailingZeroCount = 0;
+
+        uint32_t primWeight = 0;
+
+        int32_t digVal = 0;
+        uint8_t collateVal = 0;
+
+        UBool nonZeroValReached = FALSE;
+
+        uint8_t *numTempBuf;
+        uint8_t stackNumTempBuf[UCOL_MAX_BUFFER]; // I just need a temporary place to store my generated CEs.
+        uint32_t numTempBufSize = UCOL_MAX_BUFFER;
+
+        numTempBuf = stackNumTempBuf;
+        /*
+             We parse the source string until we hit a char that's NOT a digit.
+            Use this u_charDigitValue. This might be slow because we have to
+            handle surrogates...
+        */
+
+        if (U16_IS_TRAIL (ch)){
+            if (!collIter_bos(source)){
+              UChar lead = getPrevNormalizedChar(source);
+              if(U16_IS_LEAD(lead)) {
+                char32 = U16_GET_SUPPLEMENTARY(lead,ch);
+                goBackOne(source);
+              } else {
+                char32 = ch;
+              }
+            } else {
+                char32 = ch;
+            }
+        } else {
+            char32 = ch;
+        }
+        digVal = u_charDigitValue(char32);
+
+        for(;;){
+        // Make sure we have enough space.
+        if (digIndx >= ((numTempBufSize - 2) * 2) + 1)
+        {
+            numTempBufSize *= 2;
+            if (numTempBuf == stackNumTempBuf){
+                numTempBuf = (uint8_t *)uprv_malloc(sizeof(uint8_t) * numTempBufSize);
+                uprv_memcpy(numTempBuf, stackNumTempBuf, UCOL_MAX_BUFFER);
+            }else
+                uprv_realloc(numTempBuf, numTempBufSize);
+        }
+
+            // Skip over trailing zeroes, and keep a count of them.
+            if (digVal != 0)
+                    nonZeroValReached = TRUE;
+            if (nonZeroValReached){
+                /*
+                    We parse the digit string into base 100 numbers (this fits into a byte).
+                    We only add to the buffer in twos, thus if we are parsing an odd character,
+                    that serves as the 'tens' digit while the if we are parsing an even one, that
+                    is the 'ones' digit. We dumped the parsed base 100 value (collateVal) into
+                    a buffer. We multiply each collateVal by 2 (to give us room) and add 5 (to avoid
+                    overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less
+                    than all the other bytes.
+
+                    Since we're doing in this reverse we want to put the first digit encountered into the
+                    ones place and the second digit encountered into the tens place.
+                 */
+
+                if ((digIndx + trailingZeroCount) % 2 == 1){
+                    // High-order digit case (tens place)
+                    collateVal += (uint8_t)(digVal * 10);
+
+                    // We cannot set leadingZeroIndex unless it has been set for the
+                    // low-order digit. Therefore, all we can do for the high-order
+                    // digit is turn it off, never on.
+                    // The only time we will have a high digit without a low is for
+                    // the very first non-zero digit, so no zero check is necessary.
+                    if (collateVal != 0)
+                        leadingZeroIndex = 0;
+
+                    numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
+                    collateVal = 0;
+                }
+                else{
+                    // Low-order digit case (ones place)
+                    collateVal = (uint8_t)digVal;
+
+                    // Check for leading zeroes.
+                    if (collateVal == 0)
+                    {
+                        if (!leadingZeroIndex)
+                            leadingZeroIndex = (digIndx/2) + 2;
+                    }
+                    else
+                        leadingZeroIndex = 0;
+
+                    // No need to write to buffer; the case of a last odd digit
+                    // is handled below.
+                }
+                ++digIndx;
+            }
+            else
+                ++trailingZeroCount;
+
+            if (!collIter_bos(source)){
+                ch = getPrevNormalizedChar(source);
+                //goBackOne(source);
+                if (U16_IS_TRAIL(ch)){
+                    backupState(source, &state);
+                    if (!collIter_bos(source))
+                    {
+                        goBackOne(source);
+                        UChar lead = getPrevNormalizedChar(source);
+                        if(U16_IS_LEAD(lead)) {
+                          char32 = U16_GET_SUPPLEMENTARY(lead,ch);
+                        } else {
+                          loadState(source, &state, FALSE);
+                          char32 = ch;
+                        }
+                    }
+                }
+                else
+                    char32 = ch;
+
+                if ((digVal = u_charDigitValue(char32)) == -1){
+                  if (char32 > 0xFFFF) {// For surrogates.
+                    loadState(source, &state, FALSE);
+                  }
+                    // Don't need to "reverse" the goBackOne call,
+                    // as this points to the next position to process..
+                    //if (char32 > 0xFFFF) // For surrogates.
+                        //getNextNormalizedChar(source);
+                    break;
+                }
+                goBackOne(source);
+            }else
+                break;
+        }
+
+        if (nonZeroValReached == FALSE){
+            digIndx = 2;
+            trailingZeroCount = 0;
+            numTempBuf[2] = 6;
+        }
+
+        if ((digIndx + trailingZeroCount) % 2 != 0){
+                numTempBuf[((digIndx)/2) + 2] = collateVal*2 + 6;
+            digIndx += 1;       // The implicit leading zero
+            }
+        if (trailingZeroCount % 2 != 0){
+            // We had to consume one trailing zero for the low digit
+            // of the least significant byte
+            digIndx += 1;       // The trailing zero not in the exponent
+            trailingZeroCount -= 1;
+        }
+
+        endIndex = leadingZeroIndex ? leadingZeroIndex : ((digIndx/2) + 2) ;
+
+        // Subtract one off of the last byte. Really the first byte here, but it's reversed...
+        numTempBuf[2] -= 1;
+
+        /*
+            We want to skip over the first two slots in the buffer. The first slot
+            is reserved for the header byte UCOL_CODAN_PLACEHOLDER. The second slot is for the
+            sign/exponent byte: 0x80 + (decimalPos/2) & 7f.
+            The exponent must be adjusted by the number of leading zeroes, and the number of
+            trailing zeroes.
+        */
+        numTempBuf[0] = UCOL_CODAN_PLACEHOLDER;
+        uint32_t exponent = (digIndx+trailingZeroCount)/2;
+        if (leadingZeroIndex)
+            exponent -= ((digIndx/2) + 2 - leadingZeroIndex);
+        numTempBuf[1] = (uint8_t)(0x80 + (exponent & 0x7F));
+
+        // Now transfer the collation key to our collIterate struct.
+        // The total size for our collation key is endIndx bumped up to the next largest even value divided by two.
+        //size = ((endIndex+1) & ~1)/2;
+          *(source->CEpos++) = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) | //Primary weight
+                (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Secondary weight
+                UCOL_BYTE_COMMON; // Tertiary weight.
+          i = endIndex - 1; // Reset the index into the buffer.
+          while(i >= 2)
+          {
+            primWeight = numTempBuf[i--] << 8;
+            if ( i >= 2)
+                primWeight |= numTempBuf[i--];
+            *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) | UCOL_CONTINUATION_MARKER;
+          }
+          if (numTempBuf != stackNumTempBuf)
+            uprv_free(numTempBuf);
+
+          source->toReturn = source->CEpos -1;
+          return *(source->toReturn);
        }
-      else{
-               /* find the offset to expansion table */
-                 CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
-                 size     = getExpansionCount(CE);
-                 if (size != 0) {
-                       /*
-                       if there are less than 16 elements in expansion, we don't terminate
-                       */
-                       uint32_t count;
-                       for (count = 0; count < size; count++) {
-                         *(source->CEpos ++) = *CEOffset++;
-                       }
-                 }
-                 else {
-                       /* else, we do */
-                       while (*CEOffset != 0) {
-                         *(source->CEpos ++) = *CEOffset ++;
-                       }
-                 }
-                 source->toReturn = source->CEpos - 1;
-          // in case of one element expansion, we 
+      else {
+          CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
+          CE = *(CEOffset++);
+          break;
+#if 0
+        /* find the offset to expansion table */
+          CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
+          size     = getExpansionCount(CE);
+          if (size != 0) {
+            /*
+            if there are less than 16 elements in expansion, we don't terminate
+            */
+            uint32_t count;
+            for (count = 0; count < size; count++) {
+              *(source->CEpos ++) = *CEOffset++;
+            }
+          }
+          else {
+            /* else, we do */
+            while (*CEOffset != 0) {
+              *(source->CEpos ++) = *CEOffset ++;
+            }
+          }
+          source->toReturn = source->CEpos - 1;
+          // in case of one element expansion, we
            // want to immediately return CEpos
            if(source->toReturn == source->CEs) {
              source->CEpos = source->CEs;
            }
-                 return *(source->toReturn);
-         }
-      }  
+          return *(source->toReturn);
+#endif
+      }
+      }
      case HANGUL_SYLLABLE_TAG: /* AC00-D7AF*/
        {
          const uint32_t
            SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7;
-        //const uint32_t LCount = 19; 
+        //const uint32_t LCount = 19;
          const uint32_t VCount = 21;
          const uint32_t TCount = 28;
          //const uint32_t NCount = VCount * TCount;   /* 588 */
@@ -3819,13 +4251,13 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
            source->toReturn = source->CEpos - 1;
            return *(source->toReturn);
          } else {
-          // Since Hanguls pass the FCD check, it is 
+          // Since Hanguls pass the FCD check, it is
            // guaranteed that we won't be in
            // the normalization buffer if something like this happens
            // Move Jamos into normalization buffer
            /*
            Move the Jamos into the
-          normalization buffer 
+          normalization buffer
            */
            UChar *tempbuffer = source->writableBuffer +
                                (source->writableBufSize - 1);
@@ -4076,21 +4508,33 @@ ucol_getSortKey(const    UCollator    *coll,
          uint8_t        *result,
          int32_t        resultLength)
  {
+  UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY);
+  if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
+      int32_t actualSrcLen = sourceLength;
+      if (actualSrcLen==-1 && source!=NULL) {
+          actualSrcLen = u_strlen(source);
+      }
+      UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, source, actualSrcLen);
+  }
+
    UErrorCode status = U_ZERO_ERROR;
+  int32_t keySize   = 0;
  
-  if(source == NULL) {
-    // this is actually an error situation, but we would need to 
-    // have an error code to return it. Until we introduce a new
-    // API, it stays like this
-    return 0;
-  }
-  /* this uses the function pointer that is set in updateinternalstate */
-  /* currently, there are two funcs: */
-  /*ucol_calcSortKey(...);*/
-  /*ucol_calcSortKeySimpleTertiary(...);*/
+  if(source != NULL) {
+      // source == NULL is actually an error situation, but we would need to
+      // have an error code to return it. Until we introduce a new
+      // API, it stays like this
  
-  int32_t keySize = coll->sortKeyGen(coll, source, sourceLength, &result, resultLength, FALSE, &status);
-  //((UCollator *)coll)->errorCode = status; /*semantically const */
+      /* this uses the function pointer that is set in updateinternalstate */
+      /* currently, there are two funcs: */
+      /*ucol_calcSortKey(...);*/
+      /*ucol_calcSortKeySimpleTertiary(...);*/
+
+      keySize = coll->sortKeyGen(coll, source, sourceLength, &result, resultLength, FALSE, &status);
+      //((UCollator *)coll)->errorCode = status; /*semantically const */
+  }
+  UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize);
+  UTRACE_EXIT_STATUS(status);
    return keySize;
  }
  
@@ -4110,6 +4554,7 @@ ucol_getSortKeyWithAllocation(const UCollator *coll,
  /* or if we run out of space while making a sortkey and want to return ASAP                                   */
  int32_t ucol_getSortKeySize(const UCollator *coll, collIterate *s, int32_t currentSize, UColAttributeValue strength, int32_t len) {
      UErrorCode status = U_ZERO_ERROR;
+    const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)coll->UCA->image + coll->image->UCAConsts);
      uint8_t compareSec   = (uint8_t)((strength >= UCOL_SECONDARY)?0:0xFF);
      uint8_t compareTer   = (uint8_t)((strength >= UCOL_TERTIARY)?0:0xFF);
      uint8_t compareQuad  = (uint8_t)((strength >= UCOL_QUATERNARY)?0:0xFF);
@@ -4125,7 +4570,7 @@ int32_t ucol_getSortKeySize(const UCollator *coll, collIterate *s, int32_t curre
      uint8_t *frenchStartPtr = NULL, *frenchEndPtr = NULL;
  
      uint32_t variableTopValue = coll->variableTopValue;
-    uint8_t UCOL_COMMON_BOT4 = (uint8_t)((coll->variableTopValue>>8)+1); 
+    uint8_t UCOL_COMMON_BOT4 = (uint8_t)((coll->variableTopValue>>8)+1);
      if(doHiragana) {
        UCOL_COMMON_BOT4++;
        /* allocate one more space for hiragana */
@@ -4173,7 +4618,7 @@ int32_t ucol_getSortKeySize(const UCollator *coll, collIterate *s, int32_t curre
  
  
            if(shifted && ((notIsContinuation && order <= variableTopValue && primary1 > 0)
-            || (!notIsContinuation && wasShifted)) 
+            || (!notIsContinuation && wasShifted))
              || (wasShifted && primary1 == 0)) { /* amendment to the UCA says that primary ignorables */
              /* and other ignorables should be removed if following a shifted code point */
              if(primary1 == 0) { /* if we were shifted and we got an ignorable code point */
@@ -4392,14 +4837,14 @@ inline void addWithIncrement(uint8_t *&primaries, uint8_t *limit, uint32_t &size
    }
  }
  
-// Packs the secondary buffer when processing French locale. Adds the terminator. 
+// Packs the secondary buffer when processing French locale. Adds the terminator.
  static
  inline uint8_t *packFrench(uint8_t *primaries, uint8_t *primEnd, uint8_t *secondaries, uint32_t *secsize, uint8_t *frenchStartPtr, uint8_t *frenchEndPtr) {
    uint8_t secondary;
    int32_t count2 = 0;
    uint32_t i = 0, size = 0;
    // we use i here since the key size already accounts for terminators, so we'll discard the increment
-  addWithIncrement(primaries, primEnd, i, UCOL_LEVELTERMINATOR); 
+  addWithIncrement(primaries, primEnd, i, UCOL_LEVELTERMINATOR);
    /* If there are any unresolved continuation secondaries, reverse them here so that we can reverse the whole secondary thing */
    if(frenchStartPtr != NULL) {
      uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr);
@@ -4450,6 +4895,8 @@ ucol_calcSortKey(const    UCollator    *coll,
          UBool allocateSKBuffer,
          UErrorCode *status)
  {
+    const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)coll->UCA->image + coll->image->UCAConsts);
+
      uint32_t i = 0; /* general purpose counter */
  
      /* Stack allocated buffers for buffers we use */
@@ -5004,7 +5451,7 @@ ucol_calcSortKey(const    UCollator    *coll,
                if(U_SUCCESS(*status)) {
                  *result = primStart;
                  *(primaries++) = UCOL_LEVELTERMINATOR;
-                u_writeIdenticalLevelRun(s.string, len, primaries);            
+                u_writeIdenticalLevelRun(s.string, len, primaries);
                }
              } else {
                *status = U_BUFFER_OVERFLOW_ERROR;
@@ -5028,11 +5475,11 @@ ucol_calcSortKey(const    UCollator    *coll,
  
      if(allocateSKBuffer == TRUE) {
        *result = (uint8_t*)uprv_malloc(sortKeySize);
-         /* test for NULL */
-         if (*result == NULL) {
-               *status = U_MEMORY_ALLOCATION_ERROR;
-               return sortKeySize;
-         }
+      /* test for NULL */
+      if (*result == NULL) {
+        *status = U_MEMORY_ALLOCATION_ERROR;
+        return sortKeySize;
+      }
        uprv_memcpy(*result, primStart, sortKeySize);
        if(primStart != prim) {
          uprv_free(primStart);
@@ -5053,6 +5500,8 @@ ucol_calcSortKeySimpleTertiary(const    UCollator    *coll,
          UErrorCode *status)
  {
      U_ALIGN_CODE(16);
+
+    const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)coll->UCA->image + coll->image->UCAConsts);
      uint32_t i = 0; /* general purpose counter */
  
      /* Stack allocated buffers for buffers we use */
@@ -5190,7 +5639,7 @@ ucol_calcSortKeySimpleTertiary(const    UCollator    *coll,
                        *primaries++ = primary1;
                        leadPrimary = 0;
                    } else if(primary1<UCOL_BYTE_FIRST_NON_LATIN_PRIMARY ||
-                      //(primary1 > (UCOL_RESET_TOP_VALUE>>24) && primary1 < (UCOL_NEXT_TOP_VALUE>>24))) 
+                      //(primary1 > (UCOL_RESET_TOP_VALUE>>24) && primary1 < (UCOL_NEXT_TOP_VALUE>>24)))
                        (primary1 > (*UCAconsts->UCA_LAST_NON_VARIABLE>>24) && primary1 < (*UCAconsts->UCA_FIRST_IMPLICIT>>24))) {
                    /* not compressible */
                        leadPrimary = 0;
@@ -5392,11 +5841,11 @@ ucol_calcSortKeySimpleTertiary(const    UCollator    *coll,
  
      if(allocateSKBuffer == TRUE) {
        *result = (uint8_t*)uprv_malloc(sortKeySize);
-         /* test for NULL */
-         if (*result == NULL) {
-               *status = U_MEMORY_ALLOCATION_ERROR;
-               return sortKeySize;
-         }
+      /* test for NULL */
+      if (*result == NULL) {
+        *status = U_MEMORY_ALLOCATION_ERROR;
+        return sortKeySize;
+      }
        uprv_memcpy(*result, primStart, sortKeySize);
        if(primStart != prim) {
          uprv_free(primStart);
@@ -5409,7 +5858,7 @@ ucol_calcSortKeySimpleTertiary(const    UCollator    *coll,
  static inline
  UBool isShiftedCE(uint32_t CE, uint32_t LVT, UBool *wasShifted) {
    UBool notIsContinuation = !isContinuation(CE);
-  uint8_t primary1 = (uint8_t)((CE >> 24) & 0xFF); 
+  uint8_t primary1 = (uint8_t)((CE >> 24) & 0xFF);
    if(LVT && ((notIsContinuation && (CE & 0xFFFF0000)<= LVT && primary1 > 0)
      || (!notIsContinuation && *wasShifted))
      || (*wasShifted && primary1 == 0)) { /* amendment to the UCA says that primary ignorables */
@@ -5448,8 +5897,8 @@ enum {
      UCOL_PSK_LIMIT
  };
  
-/** collation state enum. *_SHIFT value is how much to shift right 
- *  to get the state piece to the right. *_MASK value should be 
+/** collation state enum. *_SHIFT value is how much to shift right
+ *  to get the state piece to the right. *_MASK value should be
   *  ANDed with the shifted state. This data is stored in state[1]
   *  field.
   */
@@ -5457,7 +5906,7 @@ enum {
      UCOL_PSK_LEVEL_SHIFT = 0,      /** level identificator. stores an enum value from above */
      UCOL_PSK_LEVEL_MASK = 7,       /** three bits */
      UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT = 3, /** number of bytes of primary or quaternary already written */
-    UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK = 1,  
+    UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK = 1,
      /** can be only 0 or 1, since we get up to two bytes from primary or quaternary
       *  This field is also used to denote that the French secondary level is finished
       */
@@ -5466,7 +5915,7 @@ enum {
      UCOL_PSK_USED_FRENCH_SHIFT = 5,/** how many French bytes have we already written */
      UCOL_PSK_USED_FRENCH_MASK = 3, /** up to 4 bytes. See comment just below */
      /** When we do French we need to reverse secondary values. However, continuations
-     *  need to stay the same. So if you had abc1c2c3de, you need to have edc1c2c3ba 
+     *  need to stay the same. So if you had abc1c2c3de, you need to have edc1c2c3ba
       */
      UCOL_PSK_USED_ELEMENTS_SHIFT = 7,
      UCOL_PSK_USED_ELEMENTS_MASK = 0x3FF,
@@ -5475,28 +5924,28 @@ enum {
  };
  
  
-/** main sortkey part procedure. On the first call, 
- *  you should pass in a collator, an iterator, empty state 
+/** main sortkey part procedure. On the first call,
+ *  you should pass in a collator, an iterator, empty state
   *  state[0] == state[1] == 0, a buffer to hold results
   *  number of bytes you need and an error code pointer.
   *  Make sure your buffer is big enough to hold the wanted
- *  number of sortkey bytes. I don't check. 
- *  The only meaningful status you can get back is 
- *  U_BUFFER_OVERFLOW_ERROR, which basically means that you 
+ *  number of sortkey bytes. I don't check.
+ *  The only meaningful status you can get back is
+ *  U_BUFFER_OVERFLOW_ERROR, which basically means that you
   *  have been dealt a raw deal and that you probably won't
   *  be able to use partial sortkey generation for this
   *  particular combination of string and collator. This
   *  is highly unlikely, but you should still check the error code.
- *  Any other status means that you're not in a sane situation 
- *  anymore. After the first call, preserve state values and 
+ *  Any other status means that you're not in a sane situation
+ *  anymore. After the first call, preserve state values and
   *  use them on subsequent calls to obtain more bytes of a sortkey.
   *  Use until the number of bytes written is smaller than the requested
   *  number of bytes. Generated sortkey is not compatible with the
   *  one generated by ucol_getSortKey, as we don't do any compression.
   *  However, levels are still terminated by a 1 (one) and the sortkey
   *  is terminated by a 0 (zero). Identical level is the same as in the
- *  regular sortkey - internal bocu-1 implementation is used. 
- *  For curious, although you cannot do much about this, here is 
+ *  regular sortkey - internal bocu-1 implementation is used.
+ *  For curious, although you cannot do much about this, here is
   *  the structure of state words.
   *  state[0] - iterator state. Depends on the iterator implementation,
   *             but allows the iterator to continue where it stopped in
@@ -5511,16 +5960,16 @@ enum {
   *   4       - was shifted. Whether the previous iteration finished in the
   *             shifted state.
   *   5, 6    - French continuation bytes written. See the comment in the enum
- *   7..16   - Used elements. Number of CEs that were already used from the 
- *             expansion buffer or number of bytes from a bocu sequence on 
+ *   7..16   - Used elements. Number of CEs that were already used from the
+ *             expansion buffer or number of bytes from a bocu sequence on
   *             the identical level.
- *  17..31   - iterator skip. Number of move operations iterator needs to 
+ *  17..31   - iterator skip. Number of move operations iterator needs to
   *             skip from the current state in order to continue. This is used
   *             only if normalization is turned on, since the normalizing iterator
- *             can return undefined state, which means that it's in the middle 
+ *             can return undefined state, which means that it's in the middle
   *             of normalizing sequence.
   */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  ucol_nextSortKeyPart(const UCollator *coll,
                       UCharIterator *iter,
                       uint32_t state[2],
@@ -5530,6 +5979,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
      if(status==NULL || U_FAILURE(*status)) {
          return 0;
      }
+    UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART);
      if( coll==NULL || iter==NULL ||
          state==NULL ||
          count<0 || (count>0 && dest==NULL)
@@ -5537,9 +5987,12 @@ ucol_nextSortKeyPart(const UCollator *coll,
          *status=U_ILLEGAL_ARGUMENT_ERROR;
      }
  
+    UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d",
+                  coll, iter, state[0], state[1], dest, count);
  
      if(count==0) {
          /* nothing to do */
+        UTRACE_EXIT_VALUE(0);
          return 0;
      }
  
@@ -5552,19 +6005,19 @@ ucol_nextSortKeyPart(const UCollator *coll,
      int32_t level= (state[1] >> UCOL_PSK_LEVEL_SHIFT) & UCOL_PSK_LEVEL_MASK;
      // Have we written only one byte from a two byte primary in the previous iteration?
      // Also on secondary level - have we finished with the French secondary?
-    int32_t byteCountOrFrenchDone = (state[1] >> UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK; 
+    int32_t byteCountOrFrenchDone = (state[1] >> UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK;
      // number of bytes in the continuation buffer for French
      int32_t usedFrench = (state[1] >> UCOL_PSK_USED_FRENCH_SHIFT) & UCOL_PSK_USED_FRENCH_MASK;
      // Skip the CEs that we got from an extraction
      // and delivered in the previous call
      int32_t usedElements = (state[1] >> UCOL_PSK_USED_ELEMENTS_SHIFT) & UCOL_PSK_USED_ELEMENTS_MASK;
      // Number of times to skip because the iterator returned
-    // UITER_NO_STATE when it was stopped in the last iteration, so we had to save the 
+    // UITER_NO_STATE when it was stopped in the last iteration, so we had to save the
      // last valid state.
      int32_t iterSkips = (state[1] >> UCOL_PSK_ITER_SKIP_SHIFT) & UCOL_PSK_ITER_SKIP_MASK;
  
      /** values that depend on the collator attributes */
-    // strength of the collator. 
+    // strength of the collator.
      int32_t strength = ucol_getAttribute(coll, UCOL_STRENGTH, status);
      // maximal level of the partial sortkey. Need to take whether case level is done
      int32_t maxLevel = 0;
@@ -5584,7 +6037,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
          }
      }
      // value for the quaternary level if Hiragana is encountered. Used for JIS X 4061 collation
-    uint8_t UCOL_HIRAGANA_QUAD = 
+    uint8_t UCOL_HIRAGANA_QUAD =
        (ucol_getAttribute(coll, UCOL_HIRAGANA_QUATERNARY_MODE, status) == UCOL_ON)?0xFE:0xFF;
      // Boundary value that decides whether a CE is shifted or not
      uint32_t LVT = (coll->alternateHandling == UCOL_SHIFTED)?(coll->variableTopValue<<16):0;
@@ -5600,7 +6053,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
      s.iterator = iter;
      s.flags |= UCOL_USE_ITERATOR;
      // This variable tells us whether we have produced some other levels in this iteration
-    // before we moved to the identical level. In that case, we need to switch the 
+    // before we moved to the identical level. In that case, we need to switch the
      // type of the iterator.
      UBool doingIdenticalFromStart = FALSE;
      // Normalizing iterator
@@ -5616,15 +6069,17 @@ ucol_nextSortKeyPart(const UCollator *coll,
        s.iterator = unorm_setIter(normIter, iter, UNORM_FCD, status);
        s.flags &= ~UCOL_ITER_NORM;
        if(U_FAILURE(*status)) {
+        UTRACE_EXIT_STATUS(*status);
          return 0;
        }
      } else if(level == UCOL_PSK_IDENTICAL) {
-      // for identical level, we need a NFD iterator. We need to instantiate it here, since we 
+      // for identical level, we need a NFD iterator. We need to instantiate it here, since we
        // will be updating the state - and this cannot be done on an ordinary iterator.
        normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);
        s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);
        s.flags &= ~UCOL_ITER_NORM;
        if(U_FAILURE(*status)) {
+        UTRACE_EXIT_STATUS(*status);
          return 0;
        }
        doingIdenticalFromStart = TRUE;
@@ -5649,6 +6104,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
          /* reset to previous state */
        s.iterator->setState(s.iterator, iterState, status);
        if(U_FAILURE(*status)) {
+          UTRACE_EXIT_STATUS(*status);
            return 0;
        }
      }
@@ -5667,7 +6123,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
  
      // Number of expansion CEs that were already consumed in the
      // previous iteration for the last code point processed. We
-    // want to clean out the expansion buffer, so that we can 
+    // want to clean out the expansion buffer, so that we can
      // get correct CEs. This value is persistent over iterations,
      // since we can have several iterations on the one expansion
      // buffer.
@@ -5677,13 +6133,13 @@ ucol_nextSortKeyPart(const UCollator *coll,
      // shares the state field with consumedExpansionCEs value, since
      // they cannot simultanously appear on the same level
      int32_t bocsuBytesUsed = 0;
-    // Clean out the expansion buffer unless we are on 
+    // Clean out the expansion buffer unless we are on
      // identical level. In that case we use this field
      // to store the number of bytes already written
      // from the previous bocsu sequence.
      if(level < UCOL_PSK_IDENTICAL && usedElements != 0) {
        while(usedElements-->0) {
-        // If we're doing French and we are on the secondary level, 
+        // If we're doing French and we are on the secondary level,
          // we go backwards.
          if(level == UCOL_PSK_SECONDARY && doingFrench) {
            CE = ucol_IGetPrevCE(coll, &s, status);
@@ -5693,6 +6149,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
          if(CE==UCOL_NO_MORE_CES) {
            /* should not happen */
            *status=U_INTERNAL_PROGRAM_ERROR;
+          UTRACE_EXIT_STATUS(*status);
            return 0;
          }
        }
@@ -5701,20 +6158,20 @@ ucol_nextSortKeyPart(const UCollator *coll,
      }
  
      // This variable prevents the adjusting of iterator
-    // skip variable when we are the first time on a 
+    // skip variable when we are the first time on a
      // level. I hope there is a better way to do it, but
      // I could not think of it.
      UBool firstTimeOnLevel = TRUE;
      // French secondary needs to know whether the iterator state of zero came from previous level OR
      // from a new invocation...
      UBool wasDoingPrimary = FALSE;
-    // Case level is kind of goofy. This variable tells us that 
+    // Case level is kind of goofy. This variable tells us that
      // we are still not done with the case level.
-    UBool dontAdvanceIteratorBecauseWeNeedALevelTerminator = FALSE; 
+    UBool dontAdvanceIteratorBecauseWeNeedALevelTerminator = FALSE;
      // destination buffer byte counter. When this guy
      // gets to count, we're done with the iteration
-    int32_t i = 0; 
-    // used to count the zero bytes written after we 
+    int32_t i = 0;
+    // used to count the zero bytes written after we
      // have finished with the sort key
      int32_t j = 0;
  
@@ -5726,7 +6183,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
      // way to exit is a goto saveState, which happens when we have filled
      // out our buffer.
      switch(level) {
-    case UCOL_PSK_PRIMARY:          
+    case UCOL_PSK_PRIMARY:
        wasDoingPrimary = TRUE;
        for(;;) {
            if(i==count) {
@@ -5777,14 +6234,14 @@ ucol_nextSortKeyPart(const UCollator *coll,
                }
              }
            }
-          if(s.CEpos - s.toReturn || (s.pos && *s.pos != 0)) { 
+          if(s.CEpos - s.toReturn || (s.pos && *s.pos != 0)) {
              // s.pos != NULL means there is a normalization buffer in effect
              // in iterative case, this means that we are doing Thai (maybe discontiguos)
              consumedExpansionCEs++;
            } else {
              consumedExpansionCEs = 0;
            }
-          if(s.pos && *s.pos == 0) { 
+          if(s.pos && *s.pos == 0) {
              // maybe it is the end of Thai - we have to have
              // an extra skip
              iterSkips++;
@@ -5820,7 +6277,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
                  byteCountOrFrenchDone=0;
                  // Restart the iteration an move to the
                  // second level
-                s.iterator->move(s.iterator, 0, UITER_START);               
+                s.iterator->move(s.iterator, 0, UITER_START);
                  level = UCOL_PSK_CASE;
                  break;
              }
@@ -5835,7 +6292,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
              } else {
                consumedExpansionCEs = 0;
              }
-            if(s.pos && *s.pos == 0) { 
+            if(s.pos && *s.pos == 0) {
                iterSkips++;
              }
            }
@@ -5843,8 +6300,8 @@ ucol_nextSortKeyPart(const UCollator *coll,
            uint8_t frenchBuff[UCOL_MAX_BUFFER];
            int32_t frenchIndex = 0;
            // Here we are going backwards.
-          // If the iterator is at the beggining, it should be 
-          // moved to end. 
+          // If the iterator is at the beggining, it should be
+          // moved to end.
            if(wasDoingPrimary) {
              s.iterator->move(s.iterator, 0, UITER_LIMIT);
            }
@@ -5857,7 +6314,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
                if(newState != UITER_NO_STATE) {
                  iterState = newState;
                  iterSkips = 0;
-              } else { 
+              } else {
                  if(!firstTimeOnLevel) {
                    iterSkips++;
                  }
@@ -5874,11 +6331,11 @@ ucol_nextSortKeyPart(const UCollator *coll,
                  level = UCOL_PSK_CASE;
                  break;
              }
-            if(isContinuation(CE)) { // if it's a continuation, we want to save it and 
+            if(isContinuation(CE)) { // if it's a continuation, we want to save it and
                // reverse when we get a first non-continuation CE.
                CE >>= 8;
                frenchBuff[frenchIndex++] = (uint8_t)CE;
-            } else if(!isShiftedCE(CE, LVT, &wasShifted)) { 
+            } else if(!isShiftedCE(CE, LVT, &wasShifted)) {
                CE >>= 8; /* get secondary */
                if(!frenchIndex) {
                  if(CE != 0) {
@@ -5941,7 +6398,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
                dest[i++] = caseByte;
              }
              // This is kind of tricky - situation where
-            // we need to keep the iterator in the old 
+            // we need to keep the iterator in the old
              // state, but don't need to bring anything
              // to the next invocation
              if(i < count) {
@@ -5961,7 +6418,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
              if(!isContinuation(CE)) {
                CE = (uint8_t)(CE & UCOL_BYTE_SIZE_MASK);
                caseBits = (uint8_t)(CE & 0xC0);
-              // this copies the case level logic from the 
+              // this copies the case level logic from the
                // sort key generation code
                if(CE != 0) {
                  if(coll->caseFirst == UCOL_UPPER_FIRST) {
@@ -6096,7 +6553,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
            if(CE==UCOL_NO_MORE_CES) {
                // Add the level separator
                terminatePSKLevel(level, maxLevel, i, dest);
-              //dest[i++] = UCOL_LEVELTERMINATOR; 
+              //dest[i++] = UCOL_LEVELTERMINATOR;
                byteCountOrFrenchDone=0;
                // Restart the iteration an move to the
                // second level
@@ -6153,7 +6610,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
        if(strength >= UCOL_IDENTICAL) {
          UChar32 first, second;
          int32_t bocsuBytesWritten = 0;
-        // We always need to do identical on 
+        // We always need to do identical on
          // the NFD form of the string.
          if(normIter == NULL) {
            // we arrived from the level below and
@@ -6161,10 +6618,10 @@ ucol_nextSortKeyPart(const UCollator *coll,
            // therefore, we need to make a fresh NFD iterator
            normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);
            s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);
-        } else if(!doingIdenticalFromStart) { 
+        } else if(!doingIdenticalFromStart) {
            // there is an iterator, but we did some other levels.
-          // therefore, we have a FCD iterator - need to make 
-          // a NFD one. 
+          // therefore, we have a FCD iterator - need to make
+          // a NFD one.
            // normIter being at the beginning does not guarantee
            // that the underlying iterator is at the beginning
            iter->move(iter, 0, UITER_START);
@@ -6173,6 +6630,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
          // At this point we have a NFD iterator that is positioned
          // in the right place
          if(U_FAILURE(*status)) {
+          UTRACE_EXIT_STATUS(*status);
            return 0;
          }
          first = uiter_previous32(s.iterator);
@@ -6192,7 +6650,7 @@ ucol_nextSortKeyPart(const UCollator *coll,
              goto saveState;
            }
  
-          // On identical level, we will always save 
+          // On identical level, we will always save
            // the state if we reach this point, since
            // we don't depend on getNextCE for content
            // all the content is in our buffer and we
@@ -6242,20 +6700,21 @@ ucol_nextSortKeyPart(const UCollator *coll,
        break;
      default:
        *status = U_INTERNAL_PROGRAM_ERROR;
+      UTRACE_EXIT_STATUS(*status);
        return 0;
      }
  
  saveState:
      // Now we need to return stuff. First we want to see whether we have
      // done everything for the current state of iterator.
-    if(consumedExpansionCEs || byteCountOrFrenchDone 
+    if(consumedExpansionCEs || byteCountOrFrenchDone
        || dontAdvanceIteratorBecauseWeNeedALevelTerminator) {
-      // Any of above mean that the previous transaction 
-      // wasn't finished and that we should store the 
+      // Any of above mean that the previous transaction
+      // wasn't finished and that we should store the
        // previous iterator state.
        state[0] = iterState;
      } else {
-      // The transaction is complete. We will continue in 
+      // The transaction is complete. We will continue in
        // next iteration.
        if((newState = s.iterator->getState(s.iterator))!= UITER_NO_STATE) {
          state[0] = s.iterator->getState(s.iterator);
@@ -6281,7 +6740,7 @@ saveState:
      }
  
      // Next we put in the level of comparison
-    state[1] |= ((level & UCOL_PSK_LEVEL_MASK) << UCOL_PSK_LEVEL_SHIFT); 
+    state[1] |= ((level & UCOL_PSK_LEVEL_MASK) << UCOL_PSK_LEVEL_SHIFT);
  
      // If we are doing French, we need to store whether we have just finished the French level
      if(level == UCOL_PSK_SECONDARY && doingFrench) {
@@ -6315,13 +6774,16 @@ saveState:
      }
  
      // Return number of meaningful sortkey bytes.
+    UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d",
+                  dest,i, state[0], state[1]);
+    UTRACE_EXIT_VALUE(i);
      return i;
  }
  
  /**
   * Produce a bound for a given sortkey and a number of levels.
   */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  ucol_getBound(const uint8_t       *source,
          int32_t             sourceLength,
          UColBoundMode       boundType,
@@ -6329,7 +6791,7 @@ ucol_getBound(const uint8_t       *source,
          uint8_t             *result,
          int32_t             resultLength,
          UErrorCode          *status) {
-  // consistency checks 
+  // consistency checks
    if(status == NULL || U_FAILURE(*status)) {
      return 0;
    }
@@ -6345,7 +6807,7 @@ ucol_getBound(const uint8_t       *source,
      if(source[sourceIndex] == UCOL_LEVELTERMINATOR) {
        noOfLevels--;
      }
-  } while (noOfLevels > 0 
+  } while (noOfLevels > 0
      && (source[sourceIndex] != 0 || sourceIndex < sourceLength));
  
    if((source[sourceIndex] == 0 || sourceIndex == sourceLength)
@@ -6356,7 +6818,7 @@ ucol_getBound(const uint8_t       *source,
  
    // READ ME: this code assumes that the values for boundType
    // enum will not changes. They are set so that the enum value
-  // corresponds to the number of extra bytes each bound type 
+  // corresponds to the number of extra bytes each bound type
    // needs.
    if(result != NULL && resultLength >= sourceIndex+boundType) {
      uprv_memcpy(result, source, sourceIndex);
@@ -6381,7 +6843,7 @@ ucol_getBound(const uint8_t       *source,
  
      return sourceIndex;
    } else {
-    return sourceIndex+boundType+1; 
+    return sourceIndex+boundType+1;
    }
  }
  
@@ -6406,7 +6868,7 @@ U_CAPI char* U_EXPORT2 ucol_sortKeyToString(const UCollator *coll, const uint8_t
  
    while(strength <= UCOL_QUATERNARY && strength <= coll->strength) {
      if(strength > UCOL_PRIMARY) {
-      strcat(current, " . ");
+      uprv_strcat(current, " . ");
      }
      while(*currentSk != 0x01 && *currentSk != 0x00) { /* print a level */
        uprv_appendByteToHexString(current, *currentSk++);
@@ -6484,7 +6946,7 @@ ucol_addLatinOneEntry(UCollator *coll, UChar ch, uint32_t CE,
      if(reverseSecondary && coll->frenchCollation == UCOL_ON) { // reverse secondary
        coll->latinOneCEs[coll->latinOneTableLen+ch] >>= 8; // make space for secondary
        coll->latinOneCEs[coll->latinOneTableLen+ch] |= (secondary << 24);
-    } else { // normal case 
+    } else { // normal case
        coll->latinOneCEs[coll->latinOneTableLen+ch] |= (secondary << *secShift);
      }
      *secShift -= 8;
@@ -6540,8 +7002,8 @@ ucol_setUpLatinOne(UCollator *coll, UErrorCode *status) {
        CE = coll->latinOneMapping[ch];
      } else {
        CE = UTRIE_GET32_FROM_LEAD(coll->mapping, ch);
-      if(CE == UCOL_NOT_FOUND) {
-        CE = UTRIE_GET32_FROM_LEAD(UCA->mapping, ch);
+      if(CE == UCOL_NOT_FOUND && coll->UCA) {
+        CE = UTRIE_GET32_FROM_LEAD(coll->UCA->mapping, ch);
        }
      }
      if(CE < UCOL_NOT_FOUND) {
@@ -6549,8 +7011,9 @@ ucol_setUpLatinOne(UCollator *coll, UErrorCode *status) {
      } else {
        switch (getCETag(CE)) {
        case EXPANSION_TAG:
+      case DIGIT_TAG:
          ucol_setText(it, &ch, 1, status);
-        while((CE = ucol_next(it, status)) != UCOL_NULLORDER) {
+        while((int32_t)(CE = ucol_next(it, status)) != UCOL_NULLORDER) {
            if(primShift < 0 || secShift < 0 || terShift < 0) {
              coll->latinOneCEs[ch] = UCOL_BAIL_OUT_CE;
              coll->latinOneCEs[coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
@@ -6569,13 +7032,14 @@ ucol_setUpLatinOne(UCollator *coll, UErrorCode *status) {
          {
            if((CE & 0x00FFF000) != 0) {
              *status = U_UNSUPPORTED_ERROR;
+            coll->latinOneFailed = TRUE;
              return FALSE;
            }
  
            const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE);
  
            CE |= (contractionOffset & 0xFFF) << 12; // insert the offset in latin-1 table
-    
+
            coll->latinOneCEs[ch] = CE;
            coll->latinOneCEs[coll->latinOneTableLen+ch] = CE;
            coll->latinOneCEs[2*coll->latinOneTableLen+ch] = CE;
@@ -6585,7 +7049,7 @@ ucol_setUpLatinOne(UCollator *coll, UErrorCode *status) {
            do {
                CE = *(coll->contractionCEs +
                    (UCharOffset - coll->contractionIndex));
-              if(getCETag(CE) == EXPANSION_TAG) {
+              if(CE > UCOL_NOT_FOUND && getCETag(CE) == EXPANSION_TAG) {
                  uint32_t size;
                  uint32_t i;    /* general counter */
                  uint32_t *CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
@@ -6620,11 +7084,12 @@ ucol_setUpLatinOne(UCollator *coll, UErrorCode *status) {
                  coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
                  coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
                  contractionOffset++;
-              }                
+              }
                UCharOffset++;
                primShift = 24; secShift = 24; terShift = 24;
                if(contractionOffset == coll->latinOneTableLen) { // we need to reallocate
                  if(!ucol_resizeLatinOneTable(coll, 2*coll->latinOneTableLen, status)) {
+                  coll->latinOneFailed = TRUE;
                    return FALSE;
                  }
                }
@@ -6642,7 +7107,8 @@ ucol_setUpLatinOne(UCollator *coll, UErrorCode *status) {
    // compact table
    if(contractionOffset < coll->latinOneTableLen) {
      if(!ucol_resizeLatinOneTable(coll, contractionOffset, status)) {
-      return FALSE;
+        coll->latinOneFailed = TRUE;
+        return FALSE;
      }
    }
    return result;
@@ -6687,7 +7153,7 @@ void ucol_updateInternalState(UCollator *coll, UErrorCode *status) {
          } else {
            coll->sortKeyGen = ucol_calcSortKey;
          }
-        if(coll->caseLevel == UCOL_OFF && coll->strength <= UCOL_TERTIARY
+        if(coll->caseLevel == UCOL_OFF && coll->strength <= UCOL_TERTIARY && coll->numericCollation == UCOL_OFF
            && coll->alternateHandling == UCOL_NON_IGNORABLE && !coll->latinOneFailed) {
            if(coll->latinOneCEs == NULL || coll->latinOneRegenTable) {
              if(ucol_setUpLatinOne(coll, status)) { // if we succeed in building latin1 table, we'll use it
@@ -6696,12 +7162,15 @@ void ucol_updateInternalState(UCollator *coll, UErrorCode *status) {
              } else {
                coll->latinOneUse = FALSE;
              }
+            if(*status == U_UNSUPPORTED_ERROR) {
+              *status = U_ZERO_ERROR;
+            }
            } else { // latin1Table exists and it doesn't need to be regenerated, just use it
              coll->latinOneUse = TRUE;
            }
          } else {
            coll->latinOneUse = FALSE;
-        }     
+        }
        }
  
  }
@@ -6738,8 +7207,10 @@ ucol_setVariableTop(UCollator *coll, const UChar *varTop, int32_t len, UErrorCod
      *status = U_PRIMARY_TOO_LONG_ERROR;
      return 0;
    }
-
-  coll->variableTopValue = (CE & UCOL_PRIMARYMASK)>>16;
+  if(coll->variableTopValue != (CE & UCOL_PRIMARYMASK)>>16) {
+    coll->variableTopValueisDefault = FALSE;
+    coll->variableTopValue = (CE & UCOL_PRIMARYMASK)>>16;
+  }
  
    return CE & UCOL_PRIMARYMASK;
  }
@@ -6756,7 +7227,11 @@ ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *stat
    if(U_FAILURE(*status) || coll == NULL) {
      return;
    }
-  coll->variableTopValue = (varTop & UCOL_PRIMARYMASK)>>16;
+
+  if(coll->variableTopValue != (varTop & UCOL_PRIMARYMASK)>>16) {
+      coll->variableTopValueisDefault = FALSE;
+      coll->variableTopValue = (varTop & UCOL_PRIMARYMASK)>>16;
+  }
  }
  /* Attribute setter API */
  U_CAPI void  U_EXPORT2
@@ -6886,7 +7361,7 @@ ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value,
      }
      if(oldFrench != coll->frenchCollation || oldCaseFirst != coll->caseFirst) {
        coll->latinOneRegenTable = TRUE;
-    } else { 
+    } else {
        coll->latinOneRegenTable = FALSE;
      }
      ucol_updateInternalState(coll, status);
@@ -6899,7 +7374,7 @@ ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status)
      }
      switch(attr) {
      case UCOL_NUMERIC_COLLATION:
-      return coll->numericCollation;    
+      return coll->numericCollation;
      case UCOL_HIRAGANA_QUATERNARY_MODE:
        return coll->hiraganaQ;
      case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*/
@@ -6987,10 +7462,10 @@ ucol_safeClone(const UCollator *coll, void *stackBuffer, int32_t * pBufferSize,
          }
      } else {
          localCollator = (UCollator *)stackBuffer;
-        memcpy(localCollator, coll, sizeof(UCollator));
+        uprv_memcpy(localCollator, coll, sizeof(UCollator));
          localCollator->freeOnClose = FALSE;
-               localCollator->requestedLocale = NULL; // zero copies of pointers
-               localCollator->validLocale = NULL;
+        localCollator->requestedLocale = NULL; // zero copies of pointers
+        localCollator->validLocale = NULL;
      }
      return localCollator;
  }
@@ -7006,6 +7481,13 @@ ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int3
      /* take the UCA rules and append real rules at the end */
      /* UCA rules will be probably coming from the root RB */
      ucaRules = ures_getStringByKey(coll->rb,"%%UCARULES",&UCAlen,&status);
+    /*
+    UResourceBundle* cresb = ures_getByKeyWithFallback(coll->rb, "collations", NULL, &status);
+    UResourceBundle*  uca = ures_getByKeyWithFallback(cresb, "UCA", NULL, &status);
+    ucaRules = ures_getStringByKey(uca,"Sequence",&UCAlen,&status);
+    ures_close(uca);
+    ures_close(cresb);
+    */
    }
    if(U_FAILURE(status)) {
      return 0;
@@ -7033,14 +7515,12 @@ ucol_getRules(    const    UCollator       *coll,
      return coll->rules;
    } else {
      UErrorCode status = U_ZERO_ERROR;
-    if(coll->rb != NULL) {
-      UResourceBundle *collElem = ures_getByKey(coll->rb, "CollationElements", NULL, &status);
+    if(coll->elements != NULL) {
        if(U_SUCCESS(status)) {
          /*Semantic const */
-        ((UCollator *)coll)->rules = ures_getStringByKey(collElem, "Sequence", length, &status);
+        ((UCollator *)coll)->rules = ures_getStringByKey(coll->elements, "Sequence", length, &status);
          ((UCollator *)coll)->rulesLength = *length;
          ((UCollator *)coll)->freeRulesOnClose = FALSE;
-        ures_close(collElem);
          return coll->rules;
        }
      }
@@ -7080,6 +7560,61 @@ ucol_countAvailable()
    return uloc_countAvailable();
  }
  
+#if !UCONFIG_NO_SERVICE
+U_CAPI UEnumeration* U_EXPORT2
+ucol_openAvailableLocales(UErrorCode *status) {
+    // This is a wrapper over Collator::getAvailableLocales()
+    if (U_FAILURE(*status)) {
+        return NULL;
+    }
+    StringEnumeration *s = Collator::getAvailableLocales();
+    if (s == NULL) {
+        *status = U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
+    return uenum_openStringEnumeration(s, status);
+}
+#endif
+
+// Note: KEYWORDS[0] != RESOURCE_NAME - alan
+
+static const char* RESOURCE_NAME = "collations";
+
+static const char* KEYWORDS[] = { "collation" };
+
+#define KEYWORD_COUNT (sizeof(KEYWORDS)/sizeof(KEYWORDS[0]))
+
+U_CAPI UEnumeration* U_EXPORT2
+ucol_getKeywords(UErrorCode *status) {
+    UEnumeration *result = NULL;
+    if (U_SUCCESS(*status)) {
+        return uenum_openCharStringsEnumeration(KEYWORDS, KEYWORD_COUNT, status);
+    }
+    return result;
+}
+
+U_CAPI UEnumeration* U_EXPORT2
+ucol_getKeywordValues(const char *keyword, UErrorCode *status) {
+    // hard-coded to accept exactly one collation keyword
+    // modify if additional collation keyword is added later
+    if (U_SUCCESS(*status) &&
+        keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return NULL;
+    }
+    return ures_getKeywordValues(U_ICUDATA_COLL, RESOURCE_NAME, status);
+}
+
+U_CAPI int32_t U_EXPORT2
+ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
+                             const char* keyword, const char* locale,
+                             UBool* isAvailable, UErrorCode* status) {
+    // N.B.: Resource name is "collations" but keyword is "collation"
+    return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL,
+                                        "collations", keyword, locale,
+                                        isAvailable, TRUE, status);
+}
+
  U_CAPI void U_EXPORT2
  ucol_getVersion(const UCollator* coll,
                  UVersionInfo versionInfo)
@@ -7102,7 +7637,11 @@ ucol_getVersion(const UCollator* coll,
      versionInfo[0] = (uint8_t)(cmbVersion>>8);
      versionInfo[1] = (uint8_t)cmbVersion;
      versionInfo[2] = coll->image->version[1];
-    versionInfo[3] = UCA->image->UCAVersion[0];
+    if(coll->UCA) {
+        versionInfo[3] = coll->UCA->image->UCAVersion[0];
+    } else {
+        versionInfo[3] = 0;
+    }
  }
  
  
@@ -7112,11 +7651,11 @@ ucol_isTailored(const UCollator *coll, const UChar u, UErrorCode *status) {
    uint32_t CE = UCOL_NOT_FOUND;
    const UChar *ContractionStart = NULL;
    if(U_SUCCESS(*status) && coll != NULL) {
-    if(coll == UCA) {
+    if(coll == coll->UCA) {
        return FALSE;
      } else if(u < 0x100) { /* latin-1 */
        CE = coll->latinOneMapping[u];
-      if(CE == UCA->latinOneMapping[u]) {
+      if(coll->UCA && CE == coll->UCA->latinOneMapping[u]) {
          return FALSE;
        }
      } else { /* regular */
@@ -7162,13 +7701,13 @@ static
  UCollationResult    ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBool normalize, UErrorCode *status)
  {
  
-  // TODO: When we have an UChar iterator, we need to access the whole string. One 
+  // TODO: When we have an UChar iterator, we need to access the whole string. One
    // useful modification would be a UChar iterator extract API, since reset next next...
    // is not optimal.
    // TODO: Handle long strings. Do the same in compareUsingSortKeys.
  
    // When we arrive here, we can have normal strings or UCharIterators. Currently they are both
-  // of same type, but that doesn't really mean that it will stay that way. 
+  // of same type, but that doesn't really mean that it will stay that way.
  
      // The division for the array length may truncate the array size to
      // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high
@@ -7414,7 +7953,7 @@ static UCollationResult ucol_compareUsingSortKeys(collIterate *sColl,
  }
  
  
-static inline UCollationResult 
+static inline UCollationResult
  ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
  //              const UCollator    *coll,
  //              const UChar        *source,
@@ -7492,7 +8031,7 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
              }
              if(doHiragana && hirResult == UCOL_EQUAL) {
                if((sColl->flags & UCOL_WAS_HIRAGANA) != (tColl->flags & UCOL_WAS_HIRAGANA)) {
-                hirResult = ((sColl->flags & UCOL_WAS_HIRAGANA) > (tColl->flags & UCOL_WAS_HIRAGANA)) 
+                hirResult = ((sColl->flags & UCOL_WAS_HIRAGANA) > (tColl->flags & UCOL_WAS_HIRAGANA))
                    ? UCOL_LESS:UCOL_GREATER;
                }
              }
@@ -7513,8 +8052,8 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
            if(sOrder == UCOL_NO_MORE_CES) {
              UCOL_CEBUF_PUT(&sCEs, sOrder, sColl);
              break;
-          } else if(sOrder == 0 
-            || (sInShifted && (sOrder & UCOL_PRIMARYMASK) == 0)) { 
+          } else if(sOrder == 0
+            || (sInShifted && (sOrder & UCOL_PRIMARYMASK) == 0)) {
              /* UCA amendment - ignore ignorables that follow shifted code points */
              continue;
            } else if(isContinuation(sOrder)) {
@@ -7562,7 +8101,7 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
              UCOL_CEBUF_PUT(&tCEs, tOrder, tColl);
              break;
            } else if(tOrder == 0
-            || (tInShifted && (tOrder & UCOL_PRIMARYMASK) == 0)) { 
+            || (tInShifted && (tOrder & UCOL_PRIMARYMASK) == 0)) {
              /* UCA amendment - ignore ignorables that follow shifted code points */
              continue;
            } else if(isContinuation(tOrder)) {
@@ -7608,7 +8147,7 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
            /*
              if(doHiragana && hirResult == UCOL_EQUAL) {
                if((sColl.flags & UCOL_WAS_HIRAGANA) != (tColl.flags & UCOL_WAS_HIRAGANA)) {
-                hirResult = ((sColl.flags & UCOL_WAS_HIRAGANA) > (tColl.flags & UCOL_WAS_HIRAGANA)) 
+                hirResult = ((sColl.flags & UCOL_WAS_HIRAGANA) > (tColl.flags & UCOL_WAS_HIRAGANA))
                    ? UCOL_LESS:UCOL_GREATER;
                }
              }
@@ -7884,8 +8423,8 @@ commonReturn:
  }
  
  
-static inline uint32_t 
-ucol_getLatinOneContraction(const UCollator *coll, int32_t strength, 
+static inline uint32_t
+ucol_getLatinOneContraction(const UCollator *coll, int32_t strength,
                            uint32_t CE, const UChar *s, int32_t *index, int32_t len) {
    const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE&0xFFF);
    int32_t latinOneOffset = (CE & 0x00FFF000) >> 12;
@@ -7933,22 +8472,22 @@ ucol_getLatinOneContraction(const UCollator *coll, int32_t strength,
  }
  
  
-/** 
- * This is a fast strcoll, geared towards text in Latin-1. 
+/**
+ * This is a fast strcoll, geared towards text in Latin-1.
   * It supports contractions of size two, French secondaries
   * and case switching. You can use it with strengths primary
   * to tertiary. It does not support shifted and case level.
   * It relies on the table build by setupLatin1Table. If it
   * doesn't understand something, it will go to the regular
- * strcoll. 
+ * strcoll.
   */
-static inline UCollationResult 
+static inline UCollationResult
  ucol_strcollUseLatin1( const UCollator    *coll,
                const UChar        *source,
                int32_t            sLen,
                const UChar        *target,
                int32_t            tLen,
-              UErrorCode *status) 
+              UErrorCode *status)
  {
      U_ALIGN_CODE(16);
      int32_t strength = coll->strength;
@@ -8009,10 +8548,10 @@ ucol_strcollUseLatin1( const UCollator    *coll,
          if(tLen==-1) {    // handling zero terminated strings
            tChar=target[tIndex++];
            if(tChar==0) {
-            if(endOfSource) { // this is different than source loop, 
+            if(endOfSource) { // this is different than source loop,
                // as we already know that source loop is done here,
                // so we can either finish the primary loop if both
-              // strings are done or anounce the result if only 
+              // strings are done or anounce the result if only
                // target is done. Same below.
                goto endOfPrimLoop;
              } else {
@@ -8071,11 +8610,11 @@ ucol_strcollUseLatin1( const UCollator    *coll,
          // top bytes match, continue with following bytes
          sOrder<<=8;
          tOrder<<=8;
-      } 
+      }
      }
  
  endOfPrimLoop:
-    // after primary loop, we definitely know the sizes of strings, 
+    // after primary loop, we definitely know the sizes of strings,
      // so we set it and use simpler loop for secondaries and tertiaries
      sLen = sIndex; tLen = tIndex;
      if(strength >= UCOL_SECONDARY) {
@@ -8085,8 +8624,8 @@ endOfPrimLoop:
  
        if(coll->frenchCollation == UCOL_OFF) { // non French
          // This loop is a simplified copy of primary loop
-        // at this point we know that whole strings are latin-1, so we don't 
-        // check for that. We also know that we only have contractions as 
+        // at this point we know that whole strings are latin-1, so we don't
+        // check for that. We also know that we only have contractions as
          // specials.
          sIndex = 0; tIndex = 0;
          for(;;) {
@@ -8134,7 +8673,7 @@ endOfPrimLoop:
              }
              sOrder<<=8;
              tOrder<<=8;
-          } 
+          }
          }
        } else { // French
          if(haveContractions) { // if we have contractions, we have to bail out
@@ -8185,10 +8724,10 @@ endOfPrimLoop:
              }
              sOrder<<=8;
              tOrder<<=8;
-          }       
+          }
          }
        }
-    } 
+    }
  
  endOfSecLoop:
      if(strength >= UCOL_TERTIARY) {
@@ -8238,9 +8777,9 @@ endOfSecLoop:
            }
            sOrder<<=8;
            tOrder<<=8;
-        } 
+        }
        }
-    } 
+    }
      return UCOL_EQUAL;
  
  returnRegular:
@@ -8249,7 +8788,7 @@ returnRegular:
  
      IInit_collIterate(coll, source, sLen, &sColl);
      IInit_collIterate(coll, target, tLen, &tColl);
-    return ucol_strcollRegular(&sColl, &tColl, status);    
+    return ucol_strcollRegular(&sColl, &tColl, status);
  }
  
  
@@ -8258,11 +8797,20 @@ ucol_strcollIter( const UCollator    *coll,
                   UCharIterator *sIter,
                   UCharIterator *tIter,
                   UErrorCode         *status) {
-  if(!status || U_FAILURE(*status) || sIter == tIter) {
+  if(!status || U_FAILURE(*status)) {
+    return UCOL_EQUAL;
+  }
+
+  UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER);
+  UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIter);
+
+  if (sIter == tIter) {
+    UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)
      return UCOL_EQUAL;
    }
    if(sIter == NULL || tIter == NULL || coll == NULL) {
      *status = U_ILLEGAL_ARGUMENT_ERROR;
+    UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)
      return UCOL_EQUAL;
    }
  
@@ -8295,9 +8843,12 @@ ucol_strcollIter( const UCollator    *coll,
    }
  
    UChar32 sChar = U_SENTINEL, tChar = U_SENTINEL;
-  
-  while((sChar = sColl.iterator->next(sColl.iterator)) ==  
+
+  while((sChar = sColl.iterator->next(sColl.iterator)) ==
      (tChar = tColl.iterator->next(tColl.iterator))) {
+    if(UCOL_ISTHAIPREVOWEL(sChar)) {
+      break;
+    }
      if(sChar == U_SENTINEL) {
        result = UCOL_EQUAL;
        goto end_compare;
@@ -8339,6 +8890,7 @@ end_compare:
      unorm_closeIter(tNormIter);
    }
  
+  UTRACE_EXIT_VALUE_STATUS(result, *status)
    return result;
  }
  
@@ -8354,10 +8906,19 @@ ucol_strcoll( const UCollator    *coll,
                const UChar        *target,
                int32_t            targetLength) {
      U_ALIGN_CODE(16);
+
+    UTRACE_ENTRY(UTRACE_UCOL_STRCOLL);
+    if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
+      UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target);
+      UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLength);
+      UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLength);
+    }
+
      UErrorCode status = U_ZERO_ERROR;
      if(source == NULL || target == NULL) {
-      // do not crash, but return. Should have 
+      // do not crash, but return. Should have
        // status argument to return error.
+      UTRACE_EXIT_VALUE(UTRACE_UCOL_STRCOLL);
        return UCOL_EQUAL;
      }
        collIterate sColl, tColl;
@@ -8374,6 +8935,7 @@ ucol_strcoll( const UCollator    *coll,
          //    Check for them being the same string, and scan through
          //    any leading equal portion.
          if (source==target) {
+            UTRACE_EXIT_VALUE(UCOL_EQUAL);
              return UCOL_EQUAL;
          }
  
@@ -8381,10 +8943,14 @@ ucol_strcoll( const UCollator    *coll,
              if ( *pSrc != *pTarg || *pSrc == 0) {
                  break;
              }
+            if(UCOL_ISTHAIPREVOWEL(*pSrc)) {
+              break;
+            }
              pSrc++;
              pTarg++;
          }
          if (*pSrc == 0 && *pTarg == 0) {
+            UTRACE_EXIT_VALUE(UCOL_EQUAL);
              return UCOL_EQUAL;
          }
          equalLength = pSrc - source;
@@ -8395,6 +8961,7 @@ ucol_strcoll( const UCollator    *coll,
          /* check if source and target are same strings */
  
          if (source==target  && sourceLength==targetLength) {
+            UTRACE_EXIT_VALUE(UCOL_EQUAL);
              return UCOL_EQUAL;
          }
          const UChar    *pSrcEnd = source + sourceLength;
@@ -8412,6 +8979,9 @@ ucol_strcoll( const UCollator    *coll,
                  if (*pSrc != *pTarg) {
                      break;
                  }
+                if(UCOL_ISTHAIPREVOWEL(*pSrc)) { // they are the same here, so any will do
+                    break;
+                }
                  pSrc++;
                  pTarg++;
              }
@@ -8420,6 +8990,7 @@ ucol_strcoll( const UCollator    *coll,
              // If we made it all the way through both strings, we are done.  They are ==
              if ((pSrc ==pSrcEnd  || (pSrcEnd <pSrc  && *pSrc==0))  &&   /* At end of src string, however it was specified. */
                  (pTarg==pTargEnd || (pTargEnd<pTarg && *pTarg==0)))  {  /* and also at end of dest string                  */
+                UTRACE_EXIT_VALUE(UCOL_EQUAL);
                  return UCOL_EQUAL;
              }
      }
@@ -8453,14 +9024,17 @@ ucol_strcoll( const UCollator    *coll,
          }
      }
  
+    UCollationResult  returnVal;
      if(!coll->latinOneUse || (sourceLength > 0 && *source&0xff00) || (targetLength > 0 && *target&0xff00)) {
        // Preparing the context objects for iterating over strings
        IInit_collIterate(coll, source, sourceLength, &sColl);
        IInit_collIterate(coll, target, targetLength, &tColl);
-      return ucol_strcollRegular(&sColl, &tColl, &status);
+      returnVal = ucol_strcollRegular(&sColl, &tColl, &status);
      } else {
-      return ucol_strcollUseLatin1(coll, source, sourceLength, target, targetLength, &status);    
+      returnVal = ucol_strcollUseLatin1(coll, source, sourceLength, target, targetLength, &status);
      }
+    UTRACE_EXIT_VALUE(returnVal);
+    return returnVal;
  }
  
  /* convenience function for comparing strings */
@@ -8502,10 +9076,18 @@ ucol_equal(        const    UCollator        *coll,
  /* returns the locale name the collation data comes from */
  U_CAPI const char * U_EXPORT2
  ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
+  return ucol_getLocaleByType(coll, type, status);
+}
+
+U_CAPI const char * U_EXPORT2
+ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
    const char *result = NULL;
    if(status == NULL || U_FAILURE(*status)) {
      return NULL;
    }
+  UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE);
+  UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll);
+
    switch(type) {
    case ULOC_ACTUAL_LOCALE:
      // validLocale is set only if service registration has explicitly set the
@@ -8522,7 +9104,7 @@ ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *statu
        result = coll->validLocale;
      } else if(coll->rb != NULL) {
        result = ures_getLocale(coll->rb, status);
-    } 
+    }
      break;
    case ULOC_REQUESTED_LOCALE:
      result = coll->requestedLocale;
@@ -8530,16 +9112,18 @@ ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *statu
    default:
      *status = U_ILLEGAL_ARGUMENT_ERROR;
    }
+  UTRACE_DATA1(UTRACE_INFO, "result = %s", result);
+  UTRACE_EXIT_STATUS(*status);
    return result;
  }
  
  U_CAPI USet * U_EXPORT2
-ucol_getTailoredSet(const UCollator *coll, UErrorCode *status) 
+ucol_getTailoredSet(const UCollator *coll, UErrorCode *status)
  {
    if(status == NULL || U_FAILURE(*status)) {
      return NULL;
    }
-  if(coll == NULL) {
+  if(coll == NULL || coll->UCA == NULL) {
      *status = U_ILLEGAL_ARGUMENT_ERROR;
    }
    UParseError parseError;
@@ -8554,12 +9138,13 @@ ucol_getTailoredSet(const UCollator *coll, UErrorCode *status)
    // 3. USet is internally really UnicodeSet, C is just a wrapper
    UnicodeSet *tailored = new UnicodeSet();
    UnicodeString pattern;
-  CanonicalIterator it("", *status);
+  UnicodeString empty;
+  CanonicalIterator it(empty, *status);
  
  
    // The idea is to tokenize the rule set. For each non-reset token,
-  // we add all the canonicaly equivalent FCD sequences 
-  ucol_tok_initTokenList(&src, rules, rulesLen, UCA, status);
+  // we add all the canonicaly equivalent FCD sequences
+  ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA, status);
    while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError, status)) != NULL) {
      startOfRules = FALSE;
      if(src.parsedToken.strength != UCOL_TOK_RESET) {
@@ -8600,14 +9185,14 @@ ucol_equals(const UCollator *source, const UCollator *target) {
    if(sourceRulesLen == targetRulesLen && u_strncmp(sourceRules, targetRules, sourceRulesLen) == 0) {
      // all the attributes are equal and the rules are equal - collators are equal
      return(TRUE);
-  } 
+  }
    // hard part, need to construct tree from rules and see if they yield the same tailoring
    UBool result = TRUE;
    UParseError parseError;
    UColTokenParser sourceParser, targetParser;
    int32_t sourceListLen = 0, targetListLen = 0;
-  ucol_tok_initTokenList(&sourceParser, sourceRules, sourceRulesLen, UCA, &status);
-  ucol_tok_initTokenList(&targetParser, targetRules, targetRulesLen, UCA, &status);
+  ucol_tok_initTokenList(&sourceParser, sourceRules, sourceRulesLen, source->UCA, &status);
+  ucol_tok_initTokenList(&targetParser, targetRules, targetRulesLen, target->UCA, &status);
    sourceListLen = ucol_tok_assembleTokenList(&sourceParser, &parseError, &status);
    targetListLen = ucol_tok_assembleTokenList(&targetParser, &parseError, &status);
  
@@ -8687,4 +9272,119 @@ returnResult:
    return result;
  
  }
+
+U_CAPI void U_EXPORT2
+ucol_getUCAVersion(const UCollator* coll, UVersionInfo info) {
+  if(coll && coll->UCA) {
+    uprv_memcpy(info, coll->UCA->image->UCAVersion, sizeof(UVersionInfo));
+  }
+}
+
+U_CAPI int32_t U_EXPORT2
+ucol_cloneBinary(const UCollator *coll,
+                 uint8_t *buffer, int32_t capacity,
+                 UErrorCode *status)
+{
+    int32_t length = 0;
+    if(U_FAILURE(*status)) {
+        return length;
+    }
+    if(coll->hasRealData == TRUE) {
+        length = coll->image->size;
+        if(length <= capacity) {
+            uprv_memcpy(buffer, coll->image, length);
+        }
+    } else {
+        length = (int32_t)(paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet)));
+        if(length <= capacity) {
+            /* build the UCATableHeader with minimal entries */
+            /* do not copy the header from the UCA file because its values are wrong! */
+            /* uprv_memcpy(result, UCA->image, sizeof(UCATableHeader)); */
+
+            /* reset everything */
+            uprv_memset(buffer, 0, length);
+
+            /* set the tailoring-specific values */
+            UCATableHeader *myData = (UCATableHeader *)buffer;
+            myData->size = length;
+
+            /* offset for the options, the only part of the data that is present after the header */
+            myData->options = sizeof(UCATableHeader);
+
+            /* need to always set the expansion value for an upper bound of the options */
+            myData->expansion = myData->options + sizeof(UColOptionSet);
+
+            myData->magic = UCOL_HEADER_MAGIC;
+            myData->isBigEndian = U_IS_BIG_ENDIAN;
+            myData->charSetFamily = U_CHARSET_FAMILY;
+
+            /* copy UCA's version; genrb will override all but the builder version with tailoring data */
+            uprv_memcpy(myData->version, coll->image->version, sizeof(UVersionInfo));
+
+            uprv_memcpy(myData->UCAVersion, coll->image->UCAVersion, sizeof(UVersionInfo));
+            uprv_memcpy(myData->UCDVersion, coll->image->UCDVersion, sizeof(UVersionInfo));
+            uprv_memcpy(myData->formatVersion, coll->image->formatVersion, sizeof(UVersionInfo));
+            myData->jamoSpecial = coll->image->jamoSpecial;
+
+            /* copy the collator options */
+            uprv_memcpy(buffer+paddedsize(sizeof(UCATableHeader)), coll->options, sizeof(UColOptionSet));
+        }
+    }
+    return length;
+}
+
+U_CAPI UCollator* U_EXPORT2
+ucol_openBinary(const uint8_t *bin, int32_t length,
+                const UCollator *base,
+                UErrorCode *status)
+{
+    UCollator *result = NULL;
+    if(U_FAILURE(*status)){
+        return NULL;
+    }
+    if(base == NULL) {
+        // we don't support null base yet
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return NULL;
+    }
+    UCATableHeader *colData = (UCATableHeader *)bin;
+    // do we want version check here? We're trying to figure out whether collators are compatible
+    if(uprv_memcmp(colData->UCAVersion, base->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
+        uprv_memcmp(colData->UCDVersion, base->image->UCDVersion, sizeof(UVersionInfo)) != 0 ||
+        colData->version[0] != UCOL_BUILDER_VERSION) {
+            *status = U_COLLATOR_VERSION_MISMATCH;
+            return NULL;
+        } else {
+            if((uint32_t)length > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) {
+                result = ucol_initCollator((const UCATableHeader *)bin, result, base, status);
+                if(U_FAILURE(*status)){
+                    return NULL;
+                }
+                result->hasRealData = TRUE;
+            } else {
+                if(base) {
+                    result = ucol_initCollator(base->image, result, base, status);
+                    ucol_setOptionsFromHeader(result, (UColOptionSet *)(bin+((const UCATableHeader *)bin)->options), status);
+                    if(U_FAILURE(*status)){
+                        return NULL;
+                    }
+                    result->hasRealData = FALSE;
+                } else {
+                    *status = U_USELESS_COLLATOR_ERROR;
+                    return NULL;
+                }
+            }
+            result->freeImageOnClose = FALSE;
+        }
+        result->validLocale = NULL;
+        result->requestedLocale = NULL;
+        result->rules = NULL;
+        result->rulesLength = 0;
+        result->freeRulesOnClose = FALSE;
+        result->rb = NULL;
+        result->elements = NULL;
+        return result;
+}
+
  #endif /* #if !UCONFIG_NO_COLLATION */
+