]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/i18n/tblcoll.cpp
ICU-400.38.tar.gz
[apple/icu.git] / icuSources / i18n / tblcoll.cpp
index f6014bb7783bb9105751489d9a067853f60c5d3f..720107ee032a597492567d5a91e16f83644736ec 100644 (file)
@@ -1,58 +1,58 @@
 /*
-******************************************************************************
-* Copyright (C) {1996-2003}, International Business Machines Corporation and   *
-* others. All Rights Reserved.                                               *
-******************************************************************************
-*/
+ ******************************************************************************
+ * Copyright (C) 1996-2008, International Business Machines Corporation and   *
+ * others. All Rights Reserved.                                               *
+ ******************************************************************************
+ */
 
 /**
-* File tblcoll.cpp
-*
-* Created by: Helena Shih
-*
-* Modification History:
-*
-*  Date        Name        Description
-*  2/5/97      aliu        Added streamIn and streamOut methods.  Added
-*                          constructor which reads RuleBasedCollator object from
-*                          a binary file.  Added writeToFile method which streams
-*                          RuleBasedCollator out to a binary file.  The streamIn
-*                          and streamOut methods use istream and ostream objects
-*                          in binary mode.
-*  2/11/97     aliu        Moved declarations out of for loop initializer.
-*                          Added Mac compatibility #ifdef for ios::nocreate.
-*  2/12/97     aliu        Modified to use TableCollationData sub-object to
-*                          hold invariant data.
-*  2/13/97     aliu        Moved several methods into this class from Collation.
-*                          Added a private RuleBasedCollator(Locale&) constructor,
-*                          to be used by Collator::getInstance().  General
-*                          clean up.  Made use of UErrorCode variables consistent.
-*  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
-*                          constructor and getDynamicClassID.
-*  3/5/97      aliu        Changed compaction cycle to improve performance.  We
-*                          use the maximum allowable value which is kBlockCount.
-*                          Modified getRules() to load rules dynamically.  Changed
-*                          constructFromFile() call to accomodate this (added
-*                          parameter to specify whether binary loading is to
-*                          take place).
-* 05/06/97     helena      Added memory allocation error check.
-*  6/20/97     helena      Java class name change.
-*  6/23/97     helena      Adding comments to make code more readable.
-* 09/03/97     helena      Added createCollationKeyValues().
-* 06/26/98     erm         Changes for CollationKeys using byte arrays.
-* 08/10/98     erm         Synched with 1.2 version of RuleBasedCollator.java
-* 04/23/99     stephen     Removed EDecompositionMode, merged with
-*                          Normalizer::EMode
-* 06/14/99     stephen     Removed kResourceBundleSuffix
-* 06/22/99     stephen     Fixed logic in constructFromFile() since .ctx
-*                          files are no longer used.
-* 11/02/99     helena      Collator performance enhancements.  Special case
-*                          for NO_OP situations.
-* 11/17/99     srl         More performance enhancements. Inlined some internal functions.
-* 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
-*                          to implementation file.
-* 01/29/01     synwee      Modified into a C++ wrapper calling C APIs (ucol.h)
-*/
+ * File tblcoll.cpp
+ *
+ * Created by: Helena Shih
+ *
+ * Modification History:
+ *
+ *  Date        Name        Description
+ *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
+ *                          constructor which reads RuleBasedCollator object from
+ *                          a binary file.  Added writeToFile method which streams
+ *                          RuleBasedCollator out to a binary file.  The streamIn
+ *                          and streamOut methods use istream and ostream objects
+ *                          in binary mode.
+ *  2/11/97     aliu        Moved declarations out of for loop initializer.
+ *                          Added Mac compatibility #ifdef for ios::nocreate.
+ *  2/12/97     aliu        Modified to use TableCollationData sub-object to
+ *                          hold invariant data.
+ *  2/13/97     aliu        Moved several methods into this class from Collation.
+ *                          Added a private RuleBasedCollator(Locale&) constructor,
+ *                          to be used by Collator::getInstance().  General
+ *                          clean up.  Made use of UErrorCode variables consistent.
+ *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
+ *                          constructor and getDynamicClassID.
+ *  3/5/97      aliu        Changed compaction cycle to improve performance.  We
+ *                          use the maximum allowable value which is kBlockCount.
+ *                          Modified getRules() to load rules dynamically.  Changed
+ *                          constructFromFile() call to accomodate this (added
+ *                          parameter to specify whether binary loading is to
+ *                          take place).
+ * 05/06/97     helena      Added memory allocation error check.
+ *  6/20/97     helena      Java class name change.
+ *  6/23/97     helena      Adding comments to make code more readable.
+ * 09/03/97     helena      Added createCollationKeyValues().
+ * 06/26/98     erm         Changes for CollationKeys using byte arrays.
+ * 08/10/98     erm         Synched with 1.2 version of RuleBasedCollator.java
+ * 04/23/99     stephen     Removed EDecompositionMode, merged with
+ *                          Normalizer::EMode
+ * 06/14/99     stephen     Removed kResourceBundleSuffix
+ * 06/22/99     stephen     Fixed logic in constructFromFile() since .ctx
+ *                          files are no longer used.
+ * 11/02/99     helena      Collator performance enhancements.  Special case
+ *                          for NO_OP situations.
+ * 11/17/99     srl         More performance enhancements. Inlined some internal functions.
+ * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
+ *                          to implementation file.
+ * 01/29/01     synwee      Modified into a C++ wrapper calling C APIs (ucol.h)
+ */
 
 #include "unicode/utypes.h"
 
 
 #include "unicode/tblcoll.h"
 #include "unicode/coleitr.h"
-#include "unicode/resbund.h"
+#include "unicode/ures.h"
 #include "unicode/uset.h"
 #include "ucol_imp.h"
 #include "uresimp.h"
 #include "uhash.h"
 #include "cmemory.h"
 #include "cstring.h"
+#include "putilimp.h"
 
 /* public RuleBasedCollator constructor ---------------------------------- */
 
@@ -75,79 +76,78 @@ U_NAMESPACE_BEGIN
 /**
 * Copy constructor, aliasing, not write-through
 */
-RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that) 
-   : Collator(that)
-   , dataIsOwned(FALSE)
-   , isWriteThroughAlias(FALSE)
-   , ucollator(that.ucollator)
-   , urulestring(that.urulestring)
+RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that)
+: Collator(that)
+, dataIsOwned(FALSE)
+, isWriteThroughAlias(FALSE)
+, ucollator(NULL)
 {
+    RuleBasedCollator::operator=(that);
 }
 
 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
-                                           UErrorCode& status) :
-                                           dataIsOwned(FALSE)
+                                     UErrorCode& status) :
+dataIsOwned(FALSE)
 {
-  construct(rules,
-            UCOL_DEFAULT_STRENGTH,
-            UCOL_DEFAULT,
-            status);
+    construct(rules,
+        UCOL_DEFAULT_STRENGTH,
+        UCOL_DEFAULT,
+        status);
 }
 
 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
-                      ECollationStrength collationStrength,
-                      UErrorCode& status) : dataIsOwned(FALSE)
+                                     ECollationStrength collationStrength,
+                                     UErrorCode& status) : dataIsOwned(FALSE)
 {
-  construct(rules,
-            getUCollationStrength(collationStrength),
-            UCOL_DEFAULT,
-            status);
+    construct(rules,
+        getUCollationStrength(collationStrength),
+        UCOL_DEFAULT,
+        status);
 }
 
 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
                                      UColAttributeValue decompositionMode,
                                      UErrorCode& status) :
-                                     dataIsOwned(FALSE)
+dataIsOwned(FALSE)
 {
-  construct(rules,
-            UCOL_DEFAULT_STRENGTH,
-            decompositionMode,
-            status);
+    construct(rules,
+        UCOL_DEFAULT_STRENGTH,
+        decompositionMode,
+        status);
 }
 
 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
-                      ECollationStrength collationStrength,
-                      UColAttributeValue decompositionMode,
-                      UErrorCode& status) : dataIsOwned(FALSE)
+                                     ECollationStrength collationStrength,
+                                     UColAttributeValue decompositionMode,
+                                     UErrorCode& status) : dataIsOwned(FALSE)
 {
-  construct(rules,
-            getUCollationStrength(collationStrength),
-            decompositionMode,
-            status);
+    construct(rules,
+        getUCollationStrength(collationStrength),
+        decompositionMode,
+        status);
+}
+RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length, 
+                    const RuleBasedCollator *base, 
+                    UErrorCode &status) :
+dataIsOwned(TRUE),
+isWriteThroughAlias(FALSE)
+{
+  ucollator = ucol_openBinary(bin, length, base->ucollator, &status);
 }
 
 void
-RuleBasedCollator::setRuleStringFromCollator(UErrorCode& status)
+RuleBasedCollator::setRuleStringFromCollator()
 {
-  urulestring = NULL;
-  if (U_SUCCESS(status))
-  {
     int32_t length;
     const UChar *r = ucol_getRules(ucollator, &length);
-  
-       if (length > 0) {
+
+    if (r && length > 0) {
         // alias the rules string
-        urulestring = new UnicodeString(TRUE, r, length);
+        urulestring.setTo(TRUE, r, length);
     }
     else {
-        urulestring = new UnicodeString();
+        urulestring.truncate(0); // Clear string.
     }
-    /* test for NULL */
-    if (urulestring == 0) {
-        status = U_MEMORY_ALLOCATION_ERROR;
-        return;
-    }
-  }
 }
 
 // not aliasing, not write-through
@@ -157,28 +157,32 @@ RuleBasedCollator::construct(const UnicodeString& rules,
                              UColAttributeValue decompositionMode,
                              UErrorCode& status)
 {
-  urulestring = 0;
-  ucollator = ucol_openRules(rules.getBuffer(), rules.length(),
-                             decompositionMode, collationStrength,
-                             NULL, &status);
+    ucollator = ucol_openRules(rules.getBuffer(), rules.length(),
+        decompositionMode, collationStrength,
+        NULL, &status);
 
-  dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it
-  isWriteThroughAlias = FALSE;
+    dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it
+    isWriteThroughAlias = FALSE;
 
-  setRuleStringFromCollator(status);
+    if(ucollator == NULL) {
+        if(U_SUCCESS(status)) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+        }
+        return; // Failure
+    }
+
+    setRuleStringFromCollator();
 }
 
 /* RuleBasedCollator public destructor ----------------------------------- */
 
 RuleBasedCollator::~RuleBasedCollator()
 {
-  if (dataIsOwned)
-  {
-    ucol_close(ucollator);
-    delete urulestring;
-  }
-  ucollator = 0;
-  urulestring = 0;
+    if (dataIsOwned)
+    {
+        ucol_close(ucollator);
+    }
+    ucollator = 0;
 }
 
 /* RuleBaseCollator public methods --------------------------------------- */
@@ -207,44 +211,54 @@ UBool RuleBasedCollator::operator==(const Collator& that) const
   */
 }
 
+UBool RuleBasedCollator::operator!=(const Collator& other) const
+{
+    return !(*this == other);
+}
+
 // aliasing, not write-through
 RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that)
 {
-  if (this != &that)
-  {
-    if (dataIsOwned)
+    if (this != &that)
     {
-      ucol_close(ucollator);
-      ucollator = NULL;
-      delete urulestring;
-    }
+        if (dataIsOwned)
+        {
+            ucol_close(ucollator);
+        }
 
-    dataIsOwned = FALSE;
-       isWriteThroughAlias = FALSE;
-    ucollator = that.ucollator;
-    urulestring = that.urulestring;
-  }
-  return *this;
+        urulestring.truncate(0); // empty the rule string
+        dataIsOwned = TRUE;
+        isWriteThroughAlias = FALSE;
+
+        UErrorCode intStatus = U_ZERO_ERROR;
+        int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
+        ucollator = ucol_safeClone(that.ucollator, NULL, &buffersize,
+                                        &intStatus);
+        if (U_SUCCESS(intStatus)) {
+            setRuleStringFromCollator();
+        }
+    }
+    return *this;
 }
 
 // aliasing, not write-through
 Collator* RuleBasedCollator::clone() const
 {
-  return new RuleBasedCollator(*this);
+    return new RuleBasedCollator(*this);
 }
 
 CollationElementIterator* RuleBasedCollator::createCollationElementIterator
                                            (const UnicodeString& source) const
 {
-  UErrorCode status = U_ZERO_ERROR;
-  CollationElementIterator *result = new CollationElementIterator(source, this,
-                                                                  status);
-  if (U_FAILURE(status)) {
-    delete result;
-    return NULL;
-  }
+    UErrorCode status = U_ZERO_ERROR;
+    CollationElementIterator *result = new CollationElementIterator(source, this,
+                                                                    status);
+    if (U_FAILURE(status)) {
+        delete result;
+        return NULL;
+    }
 
-  return result;
+    return result;
 }
 
 /**
@@ -255,16 +269,16 @@ CollationElementIterator* RuleBasedCollator::createCollationElementIterator
 CollationElementIterator* RuleBasedCollator::createCollationElementIterator
                                        (const CharacterIterator& source) const
 {
-  UErrorCode status = U_ZERO_ERROR;
-  CollationElementIterator *result = new CollationElementIterator(source, this,
-                                                                  status);
+    UErrorCode status = U_ZERO_ERROR;
+    CollationElementIterator *result = new CollationElementIterator(source, this,
+                                                                    status);
 
-  if (U_FAILURE(status)) {
-    delete result;
-    return NULL;
-  }
+    if (U_FAILURE(status)) {
+        delete result;
+        return NULL;
+    }
 
-  return result;
+    return result;
 }
 
 /**
@@ -276,7 +290,7 @@ CollationElementIterator* RuleBasedCollator::createCollationElementIterator
 */
 const UnicodeString& RuleBasedCollator::getRules() const
 {
-    return (*urulestring);
+    return urulestring;
 }
 
 void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer)
@@ -286,11 +300,11 @@ void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer)
     if (rulesize > 0) {
         UChar *rules = (UChar*) uprv_malloc( sizeof(UChar) * (rulesize) );
         if(rules != NULL) {
-          ucol_getRulesEx(ucollator, delta, rules, rulesize);
-          buffer.setTo(rules, rulesize);
-          uprv_free(rules);
+            ucol_getRulesEx(ucollator, delta, rules, rulesize);
+            buffer.setTo(rules, rulesize);
+            uprv_free(rules);
         } else { // couldn't allocate 
-          buffer.remove();
+            buffer.remove();
         }
     }
     else {
@@ -301,10 +315,10 @@ void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer)
 UnicodeSet *
 RuleBasedCollator::getTailoredSet(UErrorCode &status) const
 {
-  if(U_FAILURE(status)) {
-    return NULL;
-  }
-  return (UnicodeSet *)ucol_getTailoredSet(this->ucollator, &status);
+    if(U_FAILURE(status)) {
+        return NULL;
+    }
+    return (UnicodeSet *)ucol_getTailoredSet(this->ucollator, &status);
 }
 
 
@@ -320,17 +334,17 @@ Collator::EComparisonResult RuleBasedCollator::compare(
                                                const UnicodeString& target,
                                                int32_t length) const
 {
-  UErrorCode status = U_ZERO_ERROR;
-  return getEComparisonResult(compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status));
+    UErrorCode status = U_ZERO_ERROR;
+    return getEComparisonResult(compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status));
 }
 
 UCollationResult RuleBasedCollator::compare(
                                                const UnicodeString& source,
                                                const UnicodeString& target,
-                                               int32_t length, 
+                                               int32_t length,
                                                UErrorCode &status) const
 {
-  return compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status);
+    return compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status);
 }
 
 Collator::EComparisonResult RuleBasedCollator::compare(const UChar* source,
@@ -339,21 +353,21 @@ Collator::EComparisonResult RuleBasedCollator::compare(const UChar* source,
                                                        int32_t targetLength)
                                                        const
 {
-  return  getEComparisonResult(ucol_strcoll(ucollator, source, sourceLength,
-                                                     target, targetLength));
+    return  getEComparisonResult(ucol_strcoll(ucollator, source, sourceLength,
+                                                         target, targetLength));
 }
 
 UCollationResult RuleBasedCollator::compare(const UChar* source,
                                                        int32_t sourceLength,
                                                        const UChar* target,
-                                                       int32_t targetLength, 
+                                                       int32_t targetLength,
                                                        UErrorCode &status) const
 {
-  if(U_SUCCESS(status)) {
-    return  ucol_strcoll(ucollator, source, sourceLength, target, targetLength);
-  } else {
-    return UCOL_EQUAL;
-  }
+    if(U_SUCCESS(status)) {
+        return  ucol_strcoll(ucollator, source, sourceLength, target, targetLength);
+    } else {
+        return UCOL_EQUAL;
+    }
 }
 
 /**
@@ -363,21 +377,21 @@ Collator::EComparisonResult RuleBasedCollator::compare(
                                              const UnicodeString& source,
                                              const UnicodeString& target) const
 {
-  return getEComparisonResult(ucol_strcoll(ucollator, source.getBuffer(), source.length(), 
-                                                      target.getBuffer(), target.length()));
+    return getEComparisonResult(ucol_strcoll(ucollator, source.getBuffer(), source.length(),
+                                                        target.getBuffer(), target.length()));
 }
 
 UCollationResult RuleBasedCollator::compare(
                                              const UnicodeString& source,
-                                             const UnicodeString& target, 
+                                             const UnicodeString& target,
                                              UErrorCode &status) const
 {
-  if(U_SUCCESS(status)) {
-    return ucol_strcoll(ucollator, source.getBuffer(), source.length(), 
-                       target.getBuffer(), target.length());
-  } else {
-    return UCOL_EQUAL;
-  }
+    if(U_SUCCESS(status)) {
+        return ucol_strcoll(ucollator, source.getBuffer(), source.length(),
+                                       target.getBuffer(), target.length());
+    } else {
+        return UCOL_EQUAL;
+    }
 }
 
 /**
@@ -390,7 +404,7 @@ UCollationResult RuleBasedCollator::compare(
 * character in the source string, convert them to an ASCII representation, and
 * put them into the collation key.  But it's trickier than that. Each
 * collation element in a string has three components: primary ('A' vs 'B'),
-* secondary ('u' vs 'ü'), and tertiary ('A' vs 'a'), and a primary difference
+* secondary ('u' vs '\u00FC'), and tertiary ('A' vs 'a'), and a primary difference
 * at the end of a string takes precedence over a secondary or tertiary
 * difference earlier in the string.
 *
@@ -402,7 +416,7 @@ UCollationResult RuleBasedCollator::compare(
 * Here's a hypothetical example, with the collation element represented as a
 * three-digit number, one digit for primary, one for secondary, etc.
 *
-* String:              A     a     B    É
+* String:              A     a     B    \u00C9
 * Collation Elements: 101   100   201  511
 * Collation Key:      1125<null>0001<null>1011<null>
 *
@@ -418,7 +432,7 @@ CollationKey& RuleBasedCollator::getCollationKey(
                                                   CollationKey& sortkey,
                                                   UErrorCode& status) const
 {
-  return getCollationKey(source.getBuffer(), source.length(), sortkey, status);
+    return getCollationKey(source.getBuffer(), source.length(), sortkey, status);
 }
 
 CollationKey& RuleBasedCollator::getCollationKey(const UChar* source,
@@ -426,22 +440,22 @@ CollationKey& RuleBasedCollator::getCollationKey(const UChar* source,
                                                     CollationKey& sortkey,
                                                     UErrorCode& status) const
 {
-  if (U_FAILURE(status))
-  {
-    return sortkey.setToBogus();
-  }
+    if (U_FAILURE(status))
+    {
+        return sortkey.setToBogus();
+    }
 
-  if ((!source) || (sourceLen == 0)) {
-    return sortkey.reset();
-  }
+    if ((!source) || (sourceLen == 0)) {
+        return sortkey.reset();
+    }
 
-  uint8_t *result;
-  int32_t resultLen = ucol_getSortKeyWithAllocation(ucollator,
-                                                    source, sourceLen,
-                                                    &result,
-                                                    &status);
-  sortkey.adopt(result, resultLen);
-  return sortkey;
+    uint8_t *result;
+    int32_t resultLen = ucol_getSortKeyWithAllocation(ucollator,
+                                                      source, sourceLen,
+                                                      &result,
+                                                      &status);
+    sortkey.adopt(result, resultLen);
+    return sortkey;
 }
 
 /**
@@ -455,48 +469,54 @@ CollationKey& RuleBasedCollator::getCollationKey(const UChar* source,
  */
 int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const
 {
-  uint8_t result;
-  UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result);
-  return result;
+    uint8_t result;
+    UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result);
+    return result;
 }
 
 uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length,
                                               UErrorCode &status)
 {
-  return ucol_cloneRuleData(ucollator, &length, &status);
+    return ucol_cloneRuleData(ucollator, &length, &status);
+}
+
+
+int32_t RuleBasedCollator::cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status)
+{
+  return ucol_cloneBinary(ucollator, buffer, capacity, &status);
 }
 
 void RuleBasedCollator::setAttribute(UColAttribute attr,
                                      UColAttributeValue value,
                                      UErrorCode &status)
 {
-  if (U_FAILURE(status))
-    return;
-  checkOwned();
-  ucol_setAttribute(ucollator, attr, value, &status);
+    if (U_FAILURE(status))
+        return;
+    checkOwned();
+    ucol_setAttribute(ucollator, attr, value, &status);
 }
 
 UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr,
                                                       UErrorCode &status)
 {
-  if (U_FAILURE(status))
-    return UCOL_DEFAULT;
-  return ucol_getAttribute(ucollator, attr, &status);
+    if (U_FAILURE(status))
+        return UCOL_DEFAULT;
+    return ucol_getAttribute(ucollator, attr, &status);
 }
 
 uint32_t RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) {
-       checkOwned();
-       return ucol_setVariableTop(ucollator, varTop, len, &status);
+    checkOwned();
+    return ucol_setVariableTop(ucollator, varTop, len, &status);
 }
 
 uint32_t RuleBasedCollator::setVariableTop(const UnicodeString varTop, UErrorCode &status) {
-       checkOwned();
-       return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &status);
+    checkOwned();
+    return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &status);
 }
 
 void RuleBasedCollator::setVariableTop(const uint32_t varTop, UErrorCode &status) {
-       checkOwned();
-       ucol_restoreVariableTop(ucollator, varTop, &status);
+    checkOwned();
+    ucol_restoreVariableTop(ucollator, varTop, &status);
 }
 
 uint32_t RuleBasedCollator::getVariableTop(UErrorCode &status) const {
@@ -505,20 +525,24 @@ uint32_t RuleBasedCollator::getVariableTop(UErrorCode &status) const {
 
 Collator* RuleBasedCollator::safeClone(void)
 {
-  UErrorCode intStatus = U_ZERO_ERROR;
-  int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
-  UCollator *ucol = ucol_safeClone(ucollator, NULL, &buffersize, 
-                                   &intStatus);
-  if (U_FAILURE(intStatus)) {
-    return NULL;
-  }
+    UErrorCode intStatus = U_ZERO_ERROR;
+    int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
+    UCollator *ucol = ucol_safeClone(ucollator, NULL, &buffersize,
+                                    &intStatus);
+    if (U_FAILURE(intStatus)) {
+        return NULL;
+    }
 
-  UnicodeString *r = new UnicodeString(*urulestring);
-  RuleBasedCollator *result = new RuleBasedCollator(ucol, r);
-  result->dataIsOwned = TRUE;
-  result->isWriteThroughAlias = FALSE;
+    RuleBasedCollator *result = new RuleBasedCollator();
+    // Null pointer check
+    if (result != NULL) {
+           result->ucollator = ucol;
+           result->dataIsOwned = TRUE;
+           result->isWriteThroughAlias = FALSE;
+           setRuleStringFromCollator();
+    }
 
-  return result;
+    return result;
 }
 
 
@@ -526,29 +550,29 @@ int32_t RuleBasedCollator::getSortKey(const UnicodeString& source,
                                          uint8_t *result, int32_t resultLength)
                                          const
 {
-  return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength);
+    return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength);
 }
 
 int32_t RuleBasedCollator::getSortKey(const UChar *source,
                                          int32_t sourceLength, uint8_t *result,
                                          int32_t resultLength) const
 {
-  return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength);
+    return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength);
 }
 
 Collator::ECollationStrength RuleBasedCollator::getStrength(void) const
 {
-  UErrorCode intStatus = U_ZERO_ERROR;
-  return getECollationStrength(ucol_getAttribute(ucollator, UCOL_STRENGTH,
-                               &intStatus));
+    UErrorCode intStatus = U_ZERO_ERROR;
+    return getECollationStrength(ucol_getAttribute(ucollator, UCOL_STRENGTH,
+                                &intStatus));
 }
 
 void RuleBasedCollator::setStrength(ECollationStrength newStrength)
 {
-  checkOwned();
-  UErrorCode intStatus = U_ZERO_ERROR;
-  UCollationStrength strength = getUCollationStrength(newStrength);
-  ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus);
+    checkOwned();
+    UErrorCode intStatus = U_ZERO_ERROR;
+    UCollationStrength strength = getUCollationStrength(newStrength);
+    ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus);
 }
 
 /**
@@ -557,38 +581,38 @@ void RuleBasedCollator::setStrength(ECollationStrength newStrength)
 */
 int32_t RuleBasedCollator::hashCode() const
 {
-  int32_t length;
-  const UChar *rules = ucol_getRules(ucollator, &length);
-  return uhash_hashUCharsN(rules, length);
+    int32_t length;
+    const UChar *rules = ucol_getRules(ucollator, &length);
+    return uhash_hashUCharsN(rules, length);
 }
 
 /**
 * return the locale of this collator
 */
 const Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const {
-  const char *result = ucol_getLocale(ucollator, type, &status);
-  if(result == NULL) {
-    Locale res("");
-    res.setToBogus();
-    return res;
-  } else {
-    return Locale(result);
-  }
+    const char *result = ucol_getLocale(ucollator, type, &status);
+    if(result == NULL) {
+        Locale res("");
+        res.setToBogus();
+        return res;
+    } else {
+        return Locale(result);
+    }
 }
 
 void
-RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale) {
-       checkOwned();
-    size_t rlen = uprv_strlen(requestedLocale.getName());
-    char* rloc  = (char *)uprv_malloc((rlen+1)*sizeof(char));
+RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) {
+    checkOwned();
+    char* rloc  = uprv_strdup(requestedLocale.getName());
     if (rloc) {
-        uprv_strcpy(rloc, requestedLocale.getName());
-        size_t vlen = uprv_strlen(validLocale.getName());
-        char* vloc = (char*)uprv_malloc((vlen+1)*sizeof(char));
+        char* vloc = uprv_strdup(validLocale.getName());
         if (vloc) {
-            uprv_strcpy(vloc, validLocale.getName());
-            ucol_setReqValidLocales(ucollator, rloc, vloc);
-            return;
+            char* aloc = uprv_strdup(actualLocale.getName());
+            if (aloc) {
+                ucol_setReqValidLocales(ucollator, rloc, vloc, aloc);
+                return;
+            }
+            uprv_free(vloc);
         }
         uprv_free(rloc);
     }
@@ -596,167 +620,85 @@ RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& valid
 
 // RuleBaseCollatorNew private constructor ----------------------------------
 
-RuleBasedCollator::RuleBasedCollator() 
-  : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(0), urulestring(0)
+RuleBasedCollator::RuleBasedCollator()
+  : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
 {
 }
 
-RuleBasedCollator::RuleBasedCollator(UCollator *collator,
-                                     UnicodeString *rule)
-  : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), urulestring(0) 
-{
-  ucollator = collator;
-  urulestring = rule;
-}
-
 RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale,
-                                           UErrorCode& status) :
                                    dataIsOwned(FALSE), ucollator(0), urulestring(0)
+                                           UErrorCode& status)
: dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
 {
-  if (U_FAILURE(status))
-    return;
-
-  /*
-  Try to load, in order:
-   1. The desired locale's collation.
-   2. A fallback of the desired locale.
-   3. The default locale's collation.
-   4. A fallback of the default locale.
-   5. The default collation rules, which contains en_US collation rules.
-
-   To reiterate, we try:
-   Specific:
-    language+country+variant
-    language+country
-    language
-   Default:
-    language+country+variant
-    language+country
-    language
-   Root: (aka DEFAULTRULES)
-   steps 1-5 are handled by resource bundle fallback mechanism.
-   however, in a very unprobable situation that no resource bundle
-   data exists, step 5 is repeated with hardcoded default rules.
-  */
-
-  setUCollator(desiredLocale, status);
+    if (U_FAILURE(status))
+        return;
 
-  if (U_FAILURE(status))
-  {
-    status = U_ZERO_ERROR;
+    /*
+    Try to load, in order:
+     1. The desired locale's collation.
+     2. A fallback of the desired locale.
+     3. The default locale's collation.
+     4. A fallback of the default locale.
+     5. The default collation rules, which contains en_US collation rules.
+
+     To reiterate, we try:
+     Specific:
+      language+country+variant
+      language+country
+      language
+     Default:
+      language+country+variant
+      language+country
+      language
+     Root: (aka DEFAULTRULES)
+     steps 1-5 are handled by resource bundle fallback mechanism.
+     however, in a very unprobable situation that no resource bundle
+     data exists, step 5 is repeated with hardcoded default rules.
+    */
+
+    setUCollator(desiredLocale, status);
+
+    if (U_FAILURE(status))
+    {
+        status = U_ZERO_ERROR;
 
-    setUCollator(kRootLocaleName, status);
-    if (status == U_ZERO_ERROR) {
-        status = U_USING_DEFAULT_WARNING;
+        setUCollator(kRootLocaleName, status);
+        if (status == U_ZERO_ERROR) {
+            status = U_USING_DEFAULT_WARNING;
+        }
     }
-  }
 
-  if (U_SUCCESS(status))
-  {
-    int32_t length;
-    const UChar *r = ucol_getRules(ucollator, &length);
-    if (length > 0) {
-        // alias the rules string
-        urulestring = new UnicodeString(TRUE, r, length);
-    }
-    else {
-        urulestring = new UnicodeString();
-    }
-    /* test for NULL */
-    if (urulestring == 0) {
-        status = U_MEMORY_ALLOCATION_ERROR;
-        return;
+    if (U_SUCCESS(status))
+    {
+        setRuleStringFromCollator();
     }
-    dataIsOwned = TRUE;
-       isWriteThroughAlias = FALSE;
-  }
-
-  return;
 }
 
 void 
 RuleBasedCollator::setUCollator(const char *locale,
                                 UErrorCode &status)
 {
-  if (U_FAILURE(status))
-    return;
-  if (ucollator && dataIsOwned)
-    ucol_close(ucollator);
-  ucollator = ucol_open_internal(locale, &status);
-  dataIsOwned = TRUE;
-  isWriteThroughAlias = FALSE;
+    if (U_FAILURE(status))
+        return;
+    if (ucollator && dataIsOwned)
+        ucol_close(ucollator);
+    ucollator = ucol_open_internal(locale, &status);
+    dataIsOwned = TRUE;
+    isWriteThroughAlias = FALSE;
 }
 
 
 void
 RuleBasedCollator::checkOwned() {
-       if (!(dataIsOwned || isWriteThroughAlias)) {
-               UErrorCode status = U_ZERO_ERROR;
-               ucollator = ucol_safeClone(ucollator, NULL, NULL, &status);
-               setRuleStringFromCollator(status);
-               dataIsOwned = TRUE;
-               isWriteThroughAlias = FALSE;
-       }
+    if (!(dataIsOwned || isWriteThroughAlias)) {
+        UErrorCode status = U_ZERO_ERROR;
+        ucollator = ucol_safeClone(ucollator, NULL, NULL, &status);
+        setRuleStringFromCollator();
+        dataIsOwned = TRUE;
+        isWriteThroughAlias = FALSE;
+    }
 }
 
-/* RuleBasedCollator private data members -------------------------------- */
-
-/*
- * TODO:
- * These should probably be enums (<=0xffff) or #defines (>0xffff)
- * for better performance.
- * Include ucol_imp.h and use its constants if possible.
- * Only used in coleitr.h?!
- * Remove from here!
- */
-
-/* need look up in .commit() */
-const int32_t RuleBasedCollator::CHARINDEX = 0x70000000;
-/* Expand index follows */
-const int32_t RuleBasedCollator::EXPANDCHARINDEX = 0x7E000000;
-/* contract indexes follows */
-const int32_t RuleBasedCollator::CONTRACTCHARINDEX = 0x7F000000;
-/* unmapped character values */
-const int32_t RuleBasedCollator::UNMAPPED = 0xFFFFFFFF;
-/* primary strength increment */
-const int32_t RuleBasedCollator::PRIMARYORDERINCREMENT = 0x00010000;
-/* secondary strength increment */
-const int32_t RuleBasedCollator::SECONDARYORDERINCREMENT = 0x00000100;
-/* tertiary strength increment */
-const int32_t RuleBasedCollator::TERTIARYORDERINCREMENT = 0x00000001;
-/* mask off anything but primary order */
-const int32_t RuleBasedCollator::PRIMARYORDERMASK = 0xffff0000;
-/* mask off anything but secondary order */
-const int32_t RuleBasedCollator::SECONDARYORDERMASK = 0x0000ff00;
-/* mask off anything but tertiary order */
-const int32_t RuleBasedCollator::TERTIARYORDERMASK = 0x000000ff;
-/* mask off ignorable char order */
-const int32_t RuleBasedCollator::IGNORABLEMASK = 0x0000ffff;
-/* use only the primary difference */
-const int32_t RuleBasedCollator::PRIMARYDIFFERENCEONLY = 0xffff0000;
-/* use only the primary and secondary difference */
-const int32_t RuleBasedCollator::SECONDARYDIFFERENCEONLY = 0xffffff00;
-/* primary order shift */
-const int32_t RuleBasedCollator::PRIMARYORDERSHIFT = 16;
-/* secondary order shift */
-const int32_t RuleBasedCollator::SECONDARYORDERSHIFT = 8;
-/* starting value for collation elements */
-const int32_t RuleBasedCollator::COLELEMENTSTART = 0x02020202;
-/* testing mask for primary low element */
-const int32_t RuleBasedCollator::PRIMARYLOWZEROMASK = 0x00FF0000;
-/* reseting value for secondaries and tertiaries */
-const int32_t RuleBasedCollator::RESETSECONDARYTERTIARY = 0x00000202;
-/* reseting value for tertiaries */
-const int32_t RuleBasedCollator::RESETTERTIARY = 0x00000002;
-
-const int32_t RuleBasedCollator::PRIMIGNORABLE = 0x0202;
-
-/* unique file id for parity check */
-const int16_t RuleBasedCollator::FILEID = 0x5443;
-/* binary collation file extension */
-const char RuleBasedCollator::kFilenameSuffix[] = ".col";
-/* class id ? Value is irrelevant */
-const char  RuleBasedCollator::fgClassID = 0;
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
 
 U_NAMESPACE_END