]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/i18n/tblcoll.cpp
ICU-400.38.tar.gz
[apple/icu.git] / icuSources / i18n / tblcoll.cpp
index 14f7ad1776f688aeee786cc17de8a9050f433ea5..720107ee032a597492567d5a91e16f83644736ec 100644 (file)
@@ -1,58 +1,58 @@
 /*
-******************************************************************************
-* Copyright (C) 1996-2004, International Business Machines Corporation and   *
-* others. All Rights Reserved.                                               *
-******************************************************************************
-*/
+ ******************************************************************************
+ * Copyright (C) 1996-2008, International Business Machines Corporation and   *
+ * others. All Rights Reserved.                                               *
+ ******************************************************************************
+ */
 
 /**
-* File tblcoll.cpp
-*
-* Created by: Helena Shih
-*
-* Modification History:
-*
-*  Date        Name        Description
-*  2/5/97      aliu        Added streamIn and streamOut methods.  Added
-*                          constructor which reads RuleBasedCollator object from
-*                          a binary file.  Added writeToFile method which streams
-*                          RuleBasedCollator out to a binary file.  The streamIn
-*                          and streamOut methods use istream and ostream objects
-*                          in binary mode.
-*  2/11/97     aliu        Moved declarations out of for loop initializer.
-*                          Added Mac compatibility #ifdef for ios::nocreate.
-*  2/12/97     aliu        Modified to use TableCollationData sub-object to
-*                          hold invariant data.
-*  2/13/97     aliu        Moved several methods into this class from Collation.
-*                          Added a private RuleBasedCollator(Locale&) constructor,
-*                          to be used by Collator::getInstance().  General
-*                          clean up.  Made use of UErrorCode variables consistent.
-*  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
-*                          constructor and getDynamicClassID.
-*  3/5/97      aliu        Changed compaction cycle to improve performance.  We
-*                          use the maximum allowable value which is kBlockCount.
-*                          Modified getRules() to load rules dynamically.  Changed
-*                          constructFromFile() call to accomodate this (added
-*                          parameter to specify whether binary loading is to
-*                          take place).
-* 05/06/97     helena      Added memory allocation error check.
-*  6/20/97     helena      Java class name change.
-*  6/23/97     helena      Adding comments to make code more readable.
-* 09/03/97     helena      Added createCollationKeyValues().
-* 06/26/98     erm         Changes for CollationKeys using byte arrays.
-* 08/10/98     erm         Synched with 1.2 version of RuleBasedCollator.java
-* 04/23/99     stephen     Removed EDecompositionMode, merged with
-*                          Normalizer::EMode
-* 06/14/99     stephen     Removed kResourceBundleSuffix
-* 06/22/99     stephen     Fixed logic in constructFromFile() since .ctx
-*                          files are no longer used.
-* 11/02/99     helena      Collator performance enhancements.  Special case
-*                          for NO_OP situations.
-* 11/17/99     srl         More performance enhancements. Inlined some internal functions.
-* 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
-*                          to implementation file.
-* 01/29/01     synwee      Modified into a C++ wrapper calling C APIs (ucol.h)
-*/
+ * File tblcoll.cpp
+ *
+ * Created by: Helena Shih
+ *
+ * Modification History:
+ *
+ *  Date        Name        Description
+ *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
+ *                          constructor which reads RuleBasedCollator object from
+ *                          a binary file.  Added writeToFile method which streams
+ *                          RuleBasedCollator out to a binary file.  The streamIn
+ *                          and streamOut methods use istream and ostream objects
+ *                          in binary mode.
+ *  2/11/97     aliu        Moved declarations out of for loop initializer.
+ *                          Added Mac compatibility #ifdef for ios::nocreate.
+ *  2/12/97     aliu        Modified to use TableCollationData sub-object to
+ *                          hold invariant data.
+ *  2/13/97     aliu        Moved several methods into this class from Collation.
+ *                          Added a private RuleBasedCollator(Locale&) constructor,
+ *                          to be used by Collator::getInstance().  General
+ *                          clean up.  Made use of UErrorCode variables consistent.
+ *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
+ *                          constructor and getDynamicClassID.
+ *  3/5/97      aliu        Changed compaction cycle to improve performance.  We
+ *                          use the maximum allowable value which is kBlockCount.
+ *                          Modified getRules() to load rules dynamically.  Changed
+ *                          constructFromFile() call to accomodate this (added
+ *                          parameter to specify whether binary loading is to
+ *                          take place).
+ * 05/06/97     helena      Added memory allocation error check.
+ *  6/20/97     helena      Java class name change.
+ *  6/23/97     helena      Adding comments to make code more readable.
+ * 09/03/97     helena      Added createCollationKeyValues().
+ * 06/26/98     erm         Changes for CollationKeys using byte arrays.
+ * 08/10/98     erm         Synched with 1.2 version of RuleBasedCollator.java
+ * 04/23/99     stephen     Removed EDecompositionMode, merged with
+ *                          Normalizer::EMode
+ * 06/14/99     stephen     Removed kResourceBundleSuffix
+ * 06/22/99     stephen     Fixed logic in constructFromFile() since .ctx
+ *                          files are no longer used.
+ * 11/02/99     helena      Collator performance enhancements.  Special case
+ *                          for NO_OP situations.
+ * 11/17/99     srl         More performance enhancements. Inlined some internal functions.
+ * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
+ *                          to implementation file.
+ * 01/29/01     synwee      Modified into a C++ wrapper calling C APIs (ucol.h)
+ */
 
 #include "unicode/utypes.h"
 
@@ -80,9 +80,9 @@ RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that)
 : Collator(that)
 , dataIsOwned(FALSE)
 , isWriteThroughAlias(FALSE)
-, ucollator(that.ucollator)
-, urulestring(that.urulestring)
+, ucollator(NULL)
 {
+    RuleBasedCollator::operator=(that);
 }
 
 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
@@ -126,28 +126,27 @@ RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
         decompositionMode,
         status);
 }
+RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length, 
+                    const RuleBasedCollator *base, 
+                    UErrorCode &status) :
+dataIsOwned(TRUE),
+isWriteThroughAlias(FALSE)
+{
+  ucollator = ucol_openBinary(bin, length, base->ucollator, &status);
+}
 
 void
-RuleBasedCollator::setRuleStringFromCollator(UErrorCode& status)
+RuleBasedCollator::setRuleStringFromCollator()
 {
-    urulestring = NULL;
-    if (U_SUCCESS(status))
-    {
-        int32_t length;
-        const UChar *r = ucol_getRules(ucollator, &length);
+    int32_t length;
+    const UChar *r = ucol_getRules(ucollator, &length);
 
-        if (length > 0) {
-            // alias the rules string
-            urulestring = new UnicodeString(TRUE, r, length);
-        }
-        else {
-            urulestring = new UnicodeString();
-        }
-        /* test for NULL */
-        if (urulestring == 0) {
-            status = U_MEMORY_ALLOCATION_ERROR;
-            return;
-        }
+    if (r && length > 0) {
+        // alias the rules string
+        urulestring.setTo(TRUE, r, length);
+    }
+    else {
+        urulestring.truncate(0); // Clear string.
     }
 }
 
@@ -158,7 +157,6 @@ RuleBasedCollator::construct(const UnicodeString& rules,
                              UColAttributeValue decompositionMode,
                              UErrorCode& status)
 {
-    urulestring = 0;
     ucollator = ucol_openRules(rules.getBuffer(), rules.length(),
         decompositionMode, collationStrength,
         NULL, &status);
@@ -166,7 +164,14 @@ RuleBasedCollator::construct(const UnicodeString& rules,
     dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it
     isWriteThroughAlias = FALSE;
 
-    setRuleStringFromCollator(status);
+    if(ucollator == NULL) {
+        if(U_SUCCESS(status)) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+        }
+        return; // Failure
+    }
+
+    setRuleStringFromCollator();
 }
 
 /* RuleBasedCollator public destructor ----------------------------------- */
@@ -176,10 +181,8 @@ RuleBasedCollator::~RuleBasedCollator()
     if (dataIsOwned)
     {
         ucol_close(ucollator);
-        delete urulestring;
     }
     ucollator = 0;
-    urulestring = 0;
 }
 
 /* RuleBaseCollator public methods --------------------------------------- */
@@ -221,14 +224,19 @@ RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that)
         if (dataIsOwned)
         {
             ucol_close(ucollator);
-            ucollator = NULL;
-            delete urulestring;
         }
 
-        dataIsOwned = FALSE;
+        urulestring.truncate(0); // empty the rule string
+        dataIsOwned = TRUE;
         isWriteThroughAlias = FALSE;
-        ucollator = that.ucollator;
-        urulestring = that.urulestring;
+
+        UErrorCode intStatus = U_ZERO_ERROR;
+        int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
+        ucollator = ucol_safeClone(that.ucollator, NULL, &buffersize,
+                                        &intStatus);
+        if (U_SUCCESS(intStatus)) {
+            setRuleStringFromCollator();
+        }
     }
     return *this;
 }
@@ -236,7 +244,7 @@ RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that)
 // aliasing, not write-through
 Collator* RuleBasedCollator::clone() const
 {
-  return new RuleBasedCollator(*this);
+    return new RuleBasedCollator(*this);
 }
 
 CollationElementIterator* RuleBasedCollator::createCollationElementIterator
@@ -282,7 +290,7 @@ CollationElementIterator* RuleBasedCollator::createCollationElementIterator
 */
 const UnicodeString& RuleBasedCollator::getRules() const
 {
-    return (*urulestring);
+    return urulestring;
 }
 
 void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer)
@@ -472,6 +480,12 @@ uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length,
     return ucol_cloneRuleData(ucollator, &length, &status);
 }
 
+
+int32_t RuleBasedCollator::cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status)
+{
+  return ucol_cloneBinary(ucollator, buffer, capacity, &status);
+}
+
 void RuleBasedCollator::setAttribute(UColAttribute attr,
                                      UColAttributeValue value,
                                      UErrorCode &status)
@@ -519,10 +533,14 @@ Collator* RuleBasedCollator::safeClone(void)
         return NULL;
     }
 
-    UnicodeString *r = new UnicodeString(*urulestring);
-    RuleBasedCollator *result = new RuleBasedCollator(ucol, r);
-    result->dataIsOwned = TRUE;
-    result->isWriteThroughAlias = FALSE;
+    RuleBasedCollator *result = new RuleBasedCollator();
+    // Null pointer check
+    if (result != NULL) {
+           result->ucollator = ucol;
+           result->dataIsOwned = TRUE;
+           result->isWriteThroughAlias = FALSE;
+           setRuleStringFromCollator();
+    }
 
     return result;
 }
@@ -583,18 +601,18 @@ const Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &s
 }
 
 void
-RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale) {
+RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) {
     checkOwned();
-    size_t rlen = uprv_strlen(requestedLocale.getName());
-    char* rloc  = (char *)uprv_malloc((rlen+1)*sizeof(char));
+    char* rloc  = uprv_strdup(requestedLocale.getName());
     if (rloc) {
-        uprv_strcpy(rloc, requestedLocale.getName());
-        size_t vlen = uprv_strlen(validLocale.getName());
-        char* vloc = (char*)uprv_malloc((vlen+1)*sizeof(char));
+        char* vloc = uprv_strdup(validLocale.getName());
         if (vloc) {
-            uprv_strcpy(vloc, validLocale.getName());
-            ucol_setReqValidLocales(ucollator, rloc, vloc);
-            return;
+            char* aloc = uprv_strdup(actualLocale.getName());
+            if (aloc) {
+                ucol_setReqValidLocales(ucollator, rloc, vloc, aloc);
+                return;
+            }
+            uprv_free(vloc);
         }
         uprv_free(rloc);
     }
@@ -603,21 +621,13 @@ RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& valid
 // RuleBaseCollatorNew private constructor ----------------------------------
 
 RuleBasedCollator::RuleBasedCollator()
-  : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(0), urulestring(0)
-{
-}
-
-RuleBasedCollator::RuleBasedCollator(UCollator *collator,
-                                     UnicodeString *rule)
-  : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), urulestring(0)
+  : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
 {
-    ucollator = collator;
-    urulestring = rule;
 }
 
 RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale,
-                                           UErrorCode& status) :
                                    dataIsOwned(FALSE), ucollator(0), urulestring(0)
+                                           UErrorCode& status)
: dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
 {
     if (U_FAILURE(status))
         return;
@@ -659,22 +669,7 @@ RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale,
 
     if (U_SUCCESS(status))
     {
-        int32_t length;
-        const UChar *r = ucol_getRules(ucollator, &length);
-        if (length > 0) {
-            // alias the rules string
-            urulestring = new UnicodeString(TRUE, r, length);
-        }
-        else {
-            urulestring = new UnicodeString();
-        }
-        /* test for NULL */
-        if (urulestring == 0) {
-            status = U_MEMORY_ALLOCATION_ERROR;
-            return;
-        }
-        dataIsOwned = TRUE;
-        isWriteThroughAlias = FALSE;
+        setRuleStringFromCollator();
     }
 }
 
@@ -697,64 +692,12 @@ RuleBasedCollator::checkOwned() {
     if (!(dataIsOwned || isWriteThroughAlias)) {
         UErrorCode status = U_ZERO_ERROR;
         ucollator = ucol_safeClone(ucollator, NULL, NULL, &status);
-        setRuleStringFromCollator(status);
+        setRuleStringFromCollator();
         dataIsOwned = TRUE;
         isWriteThroughAlias = FALSE;
     }
 }
 
-/* RuleBasedCollator private data members -------------------------------- */
-
-/*
- * TODO:
- * These should probably be enums (<=0xffff) or #defines (>0xffff)
- * for better performance.
- * Include ucol_imp.h and use its constants if possible.
- * Only used in coleitr.h?!
- * Remove from here!
- */
-
-/* need look up in .commit() */
-const int32_t RuleBasedCollator::CHARINDEX = 0x70000000;
-/* Expand index follows */
-const int32_t RuleBasedCollator::EXPANDCHARINDEX = 0x7E000000;
-/* contract indexes follows */
-const int32_t RuleBasedCollator::CONTRACTCHARINDEX = 0x7F000000;
-/* unmapped character values */
-const int32_t RuleBasedCollator::UNMAPPED = 0xFFFFFFFF;
-/* primary strength increment */
-const int32_t RuleBasedCollator::PRIMARYORDERINCREMENT = 0x00010000;
-/* secondary strength increment */
-const int32_t RuleBasedCollator::SECONDARYORDERINCREMENT = 0x00000100;
-/* tertiary strength increment */
-const int32_t RuleBasedCollator::TERTIARYORDERINCREMENT = 0x00000001;
-/* mask off anything but primary order */
-const int32_t RuleBasedCollator::PRIMARYORDERMASK = 0xffff0000;
-/* mask off anything but secondary order */
-const int32_t RuleBasedCollator::SECONDARYORDERMASK = 0x0000ff00;
-/* mask off anything but tertiary order */
-const int32_t RuleBasedCollator::TERTIARYORDERMASK = 0x000000ff;
-/* mask off ignorable char order */
-const int32_t RuleBasedCollator::IGNORABLEMASK = 0x0000ffff;
-/* use only the primary difference */
-const int32_t RuleBasedCollator::PRIMARYDIFFERENCEONLY = 0xffff0000;
-/* use only the primary and secondary difference */
-const int32_t RuleBasedCollator::SECONDARYDIFFERENCEONLY = 0xffffff00;
-/* primary order shift */
-const int32_t RuleBasedCollator::PRIMARYORDERSHIFT = 16;
-/* secondary order shift */
-const int32_t RuleBasedCollator::SECONDARYORDERSHIFT = 8;
-/* starting value for collation elements */
-const int32_t RuleBasedCollator::COLELEMENTSTART = 0x02020202;
-/* testing mask for primary low element */
-const int32_t RuleBasedCollator::PRIMARYLOWZEROMASK = 0x00FF0000;
-/* reseting value for secondaries and tertiaries */
-const int32_t RuleBasedCollator::RESETSECONDARYTERTIARY = 0x00000202;
-/* reseting value for tertiaries */
-const int32_t RuleBasedCollator::RESETTERTIARY = 0x00000002;
-
-const int32_t RuleBasedCollator::PRIMIGNORABLE = 0x0202;
-
 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
 
 U_NAMESPACE_END