]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/i18n/tblcoll.cpp
ICU-491.11.2.tar.gz
[apple/icu.git] / icuSources / i18n / tblcoll.cpp
index 14f7ad1776f688aeee786cc17de8a9050f433ea5..c99c5d480e50cc762823bbed6fc7dbf442ffc52c 100644 (file)
@@ -1,58 +1,60 @@
 /*
-******************************************************************************
-* Copyright (C) 1996-2004, International Business Machines Corporation and   *
-* others. All Rights Reserved.                                               *
-******************************************************************************
-*/
+ ******************************************************************************
+ * Copyright (C) 1996-2012, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ******************************************************************************
+ */
 
 /**
-* File tblcoll.cpp
-*
-* Created by: Helena Shih
-*
-* Modification History:
-*
-*  Date        Name        Description
-*  2/5/97      aliu        Added streamIn and streamOut methods.  Added
-*                          constructor which reads RuleBasedCollator object from
-*                          a binary file.  Added writeToFile method which streams
-*                          RuleBasedCollator out to a binary file.  The streamIn
-*                          and streamOut methods use istream and ostream objects
-*                          in binary mode.
-*  2/11/97     aliu        Moved declarations out of for loop initializer.
-*                          Added Mac compatibility #ifdef for ios::nocreate.
-*  2/12/97     aliu        Modified to use TableCollationData sub-object to
-*                          hold invariant data.
-*  2/13/97     aliu        Moved several methods into this class from Collation.
-*                          Added a private RuleBasedCollator(Locale&) constructor,
-*                          to be used by Collator::getInstance().  General
-*                          clean up.  Made use of UErrorCode variables consistent.
-*  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
-*                          constructor and getDynamicClassID.
-*  3/5/97      aliu        Changed compaction cycle to improve performance.  We
-*                          use the maximum allowable value which is kBlockCount.
-*                          Modified getRules() to load rules dynamically.  Changed
-*                          constructFromFile() call to accomodate this (added
-*                          parameter to specify whether binary loading is to
-*                          take place).
-* 05/06/97     helena      Added memory allocation error check.
-*  6/20/97     helena      Java class name change.
-*  6/23/97     helena      Adding comments to make code more readable.
-* 09/03/97     helena      Added createCollationKeyValues().
-* 06/26/98     erm         Changes for CollationKeys using byte arrays.
-* 08/10/98     erm         Synched with 1.2 version of RuleBasedCollator.java
-* 04/23/99     stephen     Removed EDecompositionMode, merged with
-*                          Normalizer::EMode
-* 06/14/99     stephen     Removed kResourceBundleSuffix
-* 06/22/99     stephen     Fixed logic in constructFromFile() since .ctx
-*                          files are no longer used.
-* 11/02/99     helena      Collator performance enhancements.  Special case
-*                          for NO_OP situations.
-* 11/17/99     srl         More performance enhancements. Inlined some internal functions.
-* 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
-*                          to implementation file.
-* 01/29/01     synwee      Modified into a C++ wrapper calling C APIs (ucol.h)
-*/
+ * File tblcoll.cpp
+ *
+ * Created by: Helena Shih
+ *
+ * Modification History:
+ *
+ *  Date        Name        Description
+ *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
+ *                          constructor which reads RuleBasedCollator object from
+ *                          a binary file.  Added writeToFile method which streams
+ *                          RuleBasedCollator out to a binary file.  The streamIn
+ *                          and streamOut methods use istream and ostream objects
+ *                          in binary mode.
+ *  2/11/97     aliu        Moved declarations out of for loop initializer.
+ *                          Added Mac compatibility #ifdef for ios::nocreate.
+ *  2/12/97     aliu        Modified to use TableCollationData sub-object to
+ *                          hold invariant data.
+ *  2/13/97     aliu        Moved several methods into this class from Collation.
+ *                          Added a private RuleBasedCollator(Locale&) constructor,
+ *                          to be used by Collator::getInstance().  General
+ *                          clean up.  Made use of UErrorCode variables consistent.
+ *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
+ *                          constructor and getDynamicClassID.
+ *  3/5/97      aliu        Changed compaction cycle to improve performance.  We
+ *                          use the maximum allowable value which is kBlockCount.
+ *                          Modified getRules() to load rules dynamically.  Changed
+ *                          constructFromFile() call to accomodate this (added
+ *                          parameter to specify whether binary loading is to
+ *                          take place).
+ * 05/06/97     helena      Added memory allocation error check.
+ *  6/20/97     helena      Java class name change.
+ *  6/23/97     helena      Adding comments to make code more readable.
+ * 09/03/97     helena      Added createCollationKeyValues().
+ * 06/26/98     erm         Changes for CollationKeys using byte arrays.
+ * 08/10/98     erm         Synched with 1.2 version of RuleBasedCollator.java
+ * 04/23/99     stephen     Removed EDecompositionMode, merged with
+ *                          Normalizer::EMode
+ * 06/14/99     stephen     Removed kResourceBundleSuffix
+ * 06/22/99     stephen     Fixed logic in constructFromFile() since .ctx
+ *                          files are no longer used.
+ * 11/02/99     helena      Collator performance enhancements.  Special case
+ *                          for NO_OP situations.
+ * 11/17/99     srl         More performance enhancements. Inlined some internal functions.
+ * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
+ *                          to implementation file.
+ * 01/29/01     synwee      Modified into a C++ wrapper calling C APIs (ucol.h)
+ */
+
+#include <typeinfo>  // for 'typeid' to work
 
 #include "unicode/utypes.h"
 
@@ -68,6 +70,7 @@
 #include "cmemory.h"
 #include "cstring.h"
 #include "putilimp.h"
+#include "ustr_imp.h"
 
 /* public RuleBasedCollator constructor ---------------------------------- */
 
@@ -80,9 +83,9 @@ RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that)
 : Collator(that)
 , dataIsOwned(FALSE)
 , isWriteThroughAlias(FALSE)
-, ucollator(that.ucollator)
-, urulestring(that.urulestring)
+, ucollator(NULL)
 {
+    RuleBasedCollator::operator=(that);
 }
 
 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
@@ -126,28 +129,27 @@ RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
         decompositionMode,
         status);
 }
+RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
+                    const RuleBasedCollator *base,
+                    UErrorCode &status) :
+dataIsOwned(TRUE),
+isWriteThroughAlias(FALSE)
+{
+  ucollator = ucol_openBinary(bin, length, base->ucollator, &status);
+}
 
 void
-RuleBasedCollator::setRuleStringFromCollator(UErrorCode& status)
+RuleBasedCollator::setRuleStringFromCollator()
 {
-    urulestring = NULL;
-    if (U_SUCCESS(status))
-    {
-        int32_t length;
-        const UChar *r = ucol_getRules(ucollator, &length);
+    int32_t length;
+    const UChar *r = ucol_getRules(ucollator, &length);
 
-        if (length > 0) {
-            // alias the rules string
-            urulestring = new UnicodeString(TRUE, r, length);
-        }
-        else {
-            urulestring = new UnicodeString();
-        }
-        /* test for NULL */
-        if (urulestring == 0) {
-            status = U_MEMORY_ALLOCATION_ERROR;
-            return;
-        }
+    if (r && length > 0) {
+        // alias the rules string
+        urulestring.setTo(TRUE, r, length);
+    }
+    else {
+        urulestring.truncate(0); // Clear string.
     }
 }
 
@@ -158,7 +160,6 @@ RuleBasedCollator::construct(const UnicodeString& rules,
                              UColAttributeValue decompositionMode,
                              UErrorCode& status)
 {
-    urulestring = 0;
     ucollator = ucol_openRules(rules.getBuffer(), rules.length(),
         decompositionMode, collationStrength,
         NULL, &status);
@@ -166,7 +167,14 @@ RuleBasedCollator::construct(const UnicodeString& rules,
     dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it
     isWriteThroughAlias = FALSE;
 
-    setRuleStringFromCollator(status);
+    if(ucollator == NULL) {
+        if(U_SUCCESS(status)) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+        }
+        return; // Failure
+    }
+
+    setRuleStringFromCollator();
 }
 
 /* RuleBasedCollator public destructor ----------------------------------- */
@@ -176,10 +184,8 @@ RuleBasedCollator::~RuleBasedCollator()
     if (dataIsOwned)
     {
         ucol_close(ucollator);
-        delete urulestring;
     }
     ucollator = 0;
-    urulestring = 0;
 }
 
 /* RuleBaseCollator public methods --------------------------------------- */
@@ -190,7 +196,7 @@ UBool RuleBasedCollator::operator==(const Collator& that) const
   if (Collator::operator==(that))
     return TRUE;
 
-  if (getDynamicClassID() != that.getDynamicClassID())
+  if (typeid(*this) != typeid(that))
     return FALSE;  /* not the same class */
 
   RuleBasedCollator& thatAlias = (RuleBasedCollator&)that;
@@ -221,14 +227,19 @@ RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that)
         if (dataIsOwned)
         {
             ucol_close(ucollator);
-            ucollator = NULL;
-            delete urulestring;
         }
 
-        dataIsOwned = FALSE;
+        urulestring.truncate(0); // empty the rule string
+        dataIsOwned = TRUE;
         isWriteThroughAlias = FALSE;
-        ucollator = that.ucollator;
-        urulestring = that.urulestring;
+
+        UErrorCode intStatus = U_ZERO_ERROR;
+        int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
+        ucollator = ucol_safeClone(that.ucollator, NULL, &buffersize,
+                                        &intStatus);
+        if (U_SUCCESS(intStatus)) {
+            setRuleStringFromCollator();
+        }
     }
     return *this;
 }
@@ -236,9 +247,10 @@ RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that)
 // aliasing, not write-through
 Collator* RuleBasedCollator::clone() const
 {
-  return new RuleBasedCollator(*this);
+    return new RuleBasedCollator(*this);
 }
 
+
 CollationElementIterator* RuleBasedCollator::createCollationElementIterator
                                            (const UnicodeString& source) const
 {
@@ -282,7 +294,7 @@ CollationElementIterator* RuleBasedCollator::createCollationElementIterator
 */
 const UnicodeString& RuleBasedCollator::getRules() const
 {
-    return (*urulestring);
+    return urulestring;
 }
 
 void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer)
@@ -295,7 +307,7 @@ void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer)
             ucol_getRulesEx(ucollator, delta, rules, rulesize);
             buffer.setTo(rules, rulesize);
             uprv_free(rules);
-        } else { // couldn't allocate 
+        } else { // couldn't allocate
             buffer.remove();
         }
     }
@@ -386,6 +398,16 @@ UCollationResult RuleBasedCollator::compare(
     }
 }
 
+UCollationResult RuleBasedCollator::compare(UCharIterator &sIter,
+                                            UCharIterator &tIter,
+                                            UErrorCode &status) const {
+    if(U_SUCCESS(status)) {
+        return ucol_strcollIter(ucollator, &sIter, &tIter, &status);
+    } else {
+        return UCOL_EQUAL;
+    }
+}
+
 /**
 * Retrieve a collation key for the specified string. The key can be compared
 * with other collation keys using a bitwise comparison (e.g. memcmp) to find
@@ -432,21 +454,46 @@ CollationKey& RuleBasedCollator::getCollationKey(const UChar* source,
                                                     CollationKey& sortkey,
                                                     UErrorCode& status) const
 {
-    if (U_FAILURE(status))
-    {
+    if (U_FAILURE(status)) {
+        return sortkey.setToBogus();
+    }
+    if (sourceLen < -1 || (source == NULL && sourceLen != 0)) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
         return sortkey.setToBogus();
     }
 
-    if ((!source) || (sourceLen == 0)) {
+    if (sourceLen < 0) {
+        sourceLen = u_strlen(source);
+    }
+    if (sourceLen == 0) {
         return sortkey.reset();
     }
 
     uint8_t *result;
-    int32_t resultLen = ucol_getSortKeyWithAllocation(ucollator,
-                                                      source, sourceLen,
-                                                      &result,
-                                                      &status);
-    sortkey.adopt(result, resultLen);
+    int32_t resultCapacity;
+    if (sortkey.fCapacity >= (sourceLen * 3)) {
+        // Try to reuse the CollationKey.fBytes.
+        result = sortkey.fBytes;
+        resultCapacity = sortkey.fCapacity;
+    } else {
+        result = NULL;
+        resultCapacity = 0;
+    }
+    int32_t resultLen = ucol_getSortKeyWithAllocation(ucollator, source, sourceLen,
+                                                      result, resultCapacity, &status);
+
+    if (U_SUCCESS(status)) {
+        if (result == sortkey.fBytes) {
+            sortkey.setLength(resultLen);
+        } else {
+            sortkey.adopt(result, resultCapacity, resultLen);
+        }
+    } else {
+        if (result != sortkey.fBytes) {
+            uprv_free(result);
+        }
+        sortkey.setToBogus();
+    }
     return sortkey;
 }
 
@@ -472,6 +519,12 @@ uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length,
     return ucol_cloneRuleData(ucollator, &length, &status);
 }
 
+
+int32_t RuleBasedCollator::cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status)
+{
+  return ucol_cloneBinary(ucollator, buffer, capacity, &status);
+}
+
 void RuleBasedCollator::setAttribute(UColAttribute attr,
                                      UColAttributeValue value,
                                      UErrorCode &status)
@@ -519,10 +572,14 @@ Collator* RuleBasedCollator::safeClone(void)
         return NULL;
     }
 
-    UnicodeString *r = new UnicodeString(*urulestring);
-    RuleBasedCollator *result = new RuleBasedCollator(ucol, r);
-    result->dataIsOwned = TRUE;
-    result->isWriteThroughAlias = FALSE;
+    RuleBasedCollator *result = new RuleBasedCollator();
+    // Null pointer check
+    if (result != NULL) {
+           result->ucollator = ucol;
+           result->dataIsOwned = TRUE;
+           result->isWriteThroughAlias = FALSE;
+           setRuleStringFromCollator();
+    }
 
     return result;
 }
@@ -557,6 +614,29 @@ void RuleBasedCollator::setStrength(ECollationStrength newStrength)
     ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus);
 }
 
+int32_t RuleBasedCollator::getReorderCodes(int32_t *dest,
+                                          int32_t destCapacity,
+                                          UErrorCode& status) const
+{
+    return ucol_getReorderCodes(ucollator, dest, destCapacity, &status);
+}
+
+void RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes,
+                                       int32_t reorderCodesLength,
+                                       UErrorCode& status)
+{
+    checkOwned();
+    ucol_setReorderCodes(ucollator, reorderCodes, reorderCodesLength, &status);
+}
+
+int32_t RuleBasedCollator::getEquivalentReorderCodes(int32_t reorderCode,
+                                int32_t* dest,
+                                int32_t destCapacity,
+                                UErrorCode& status)
+{
+    return ucol_getEquivalentReorderCodes(reorderCode, dest, destCapacity, &status);
+}
+
 /**
 * Create a hash code for this collation. Just hash the main rule table -- that
 * should be good enough for almost any use.
@@ -565,14 +645,14 @@ int32_t RuleBasedCollator::hashCode() const
 {
     int32_t length;
     const UChar *rules = ucol_getRules(ucollator, &length);
-    return uhash_hashUCharsN(rules, length);
+    return ustr_hashUCharsN(rules, length);
 }
 
 /**
 * return the locale of this collator
 */
 const Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const {
-    const char *result = ucol_getLocale(ucollator, type, &status);
+    const char *result = ucol_getLocaleByType(ucollator, type, &status);
     if(result == NULL) {
         Locale res("");
         res.setToBogus();
@@ -583,18 +663,18 @@ const Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &s
 }
 
 void
-RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale) {
+RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) {
     checkOwned();
-    size_t rlen = uprv_strlen(requestedLocale.getName());
-    char* rloc  = (char *)uprv_malloc((rlen+1)*sizeof(char));
+    char* rloc  = uprv_strdup(requestedLocale.getName());
     if (rloc) {
-        uprv_strcpy(rloc, requestedLocale.getName());
-        size_t vlen = uprv_strlen(validLocale.getName());
-        char* vloc = (char*)uprv_malloc((vlen+1)*sizeof(char));
+        char* vloc = uprv_strdup(validLocale.getName());
         if (vloc) {
-            uprv_strcpy(vloc, validLocale.getName());
-            ucol_setReqValidLocales(ucollator, rloc, vloc);
-            return;
+            char* aloc = uprv_strdup(actualLocale.getName());
+            if (aloc) {
+                ucol_setReqValidLocales(ucollator, rloc, vloc, aloc);
+                return;
+            }
+            uprv_free(vloc);
         }
         uprv_free(rloc);
     }
@@ -603,21 +683,13 @@ RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& valid
 // RuleBaseCollatorNew private constructor ----------------------------------
 
 RuleBasedCollator::RuleBasedCollator()
-  : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(0), urulestring(0)
-{
-}
-
-RuleBasedCollator::RuleBasedCollator(UCollator *collator,
-                                     UnicodeString *rule)
-  : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), urulestring(0)
+  : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
 {
-    ucollator = collator;
-    urulestring = rule;
 }
 
 RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale,
-                                           UErrorCode& status) :
                                    dataIsOwned(FALSE), ucollator(0), urulestring(0)
+                                           UErrorCode& status)
: dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
 {
     if (U_FAILURE(status))
         return;
@@ -659,31 +731,17 @@ RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale,
 
     if (U_SUCCESS(status))
     {
-        int32_t length;
-        const UChar *r = ucol_getRules(ucollator, &length);
-        if (length > 0) {
-            // alias the rules string
-            urulestring = new UnicodeString(TRUE, r, length);
-        }
-        else {
-            urulestring = new UnicodeString();
-        }
-        /* test for NULL */
-        if (urulestring == 0) {
-            status = U_MEMORY_ALLOCATION_ERROR;
-            return;
-        }
-        dataIsOwned = TRUE;
-        isWriteThroughAlias = FALSE;
+        setRuleStringFromCollator();
     }
 }
 
-void 
+void
 RuleBasedCollator::setUCollator(const char *locale,
                                 UErrorCode &status)
 {
-    if (U_FAILURE(status))
+    if (U_FAILURE(status)) {
         return;
+    }
     if (ucollator && dataIsOwned)
         ucol_close(ucollator);
     ucollator = ucol_open_internal(locale, &status);
@@ -697,63 +755,21 @@ RuleBasedCollator::checkOwned() {
     if (!(dataIsOwned || isWriteThroughAlias)) {
         UErrorCode status = U_ZERO_ERROR;
         ucollator = ucol_safeClone(ucollator, NULL, NULL, &status);
-        setRuleStringFromCollator(status);
+        setRuleStringFromCollator();
         dataIsOwned = TRUE;
         isWriteThroughAlias = FALSE;
     }
 }
 
-/* RuleBasedCollator private data members -------------------------------- */
 
-/*
- * TODO:
- * These should probably be enums (<=0xffff) or #defines (>0xffff)
- * for better performance.
- * Include ucol_imp.h and use its constants if possible.
- * Only used in coleitr.h?!
- * Remove from here!
- */
+int32_t RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
+                                                                      char *buffer,
+                                                                      int32_t capacity,
+                                                                      UErrorCode &status) const {
+  /* simply delegate */
+  return ucol_getShortDefinitionString(ucollator, locale, buffer, capacity, &status);
+}
 
-/* need look up in .commit() */
-const int32_t RuleBasedCollator::CHARINDEX = 0x70000000;
-/* Expand index follows */
-const int32_t RuleBasedCollator::EXPANDCHARINDEX = 0x7E000000;
-/* contract indexes follows */
-const int32_t RuleBasedCollator::CONTRACTCHARINDEX = 0x7F000000;
-/* unmapped character values */
-const int32_t RuleBasedCollator::UNMAPPED = 0xFFFFFFFF;
-/* primary strength increment */
-const int32_t RuleBasedCollator::PRIMARYORDERINCREMENT = 0x00010000;
-/* secondary strength increment */
-const int32_t RuleBasedCollator::SECONDARYORDERINCREMENT = 0x00000100;
-/* tertiary strength increment */
-const int32_t RuleBasedCollator::TERTIARYORDERINCREMENT = 0x00000001;
-/* mask off anything but primary order */
-const int32_t RuleBasedCollator::PRIMARYORDERMASK = 0xffff0000;
-/* mask off anything but secondary order */
-const int32_t RuleBasedCollator::SECONDARYORDERMASK = 0x0000ff00;
-/* mask off anything but tertiary order */
-const int32_t RuleBasedCollator::TERTIARYORDERMASK = 0x000000ff;
-/* mask off ignorable char order */
-const int32_t RuleBasedCollator::IGNORABLEMASK = 0x0000ffff;
-/* use only the primary difference */
-const int32_t RuleBasedCollator::PRIMARYDIFFERENCEONLY = 0xffff0000;
-/* use only the primary and secondary difference */
-const int32_t RuleBasedCollator::SECONDARYDIFFERENCEONLY = 0xffffff00;
-/* primary order shift */
-const int32_t RuleBasedCollator::PRIMARYORDERSHIFT = 16;
-/* secondary order shift */
-const int32_t RuleBasedCollator::SECONDARYORDERSHIFT = 8;
-/* starting value for collation elements */
-const int32_t RuleBasedCollator::COLELEMENTSTART = 0x02020202;
-/* testing mask for primary low element */
-const int32_t RuleBasedCollator::PRIMARYLOWZEROMASK = 0x00FF0000;
-/* reseting value for secondaries and tertiaries */
-const int32_t RuleBasedCollator::RESETSECONDARYTERTIARY = 0x00000202;
-/* reseting value for tertiaries */
-const int32_t RuleBasedCollator::RESETTERTIARY = 0x00000002;
-
-const int32_t RuleBasedCollator::PRIMIGNORABLE = 0x0202;
 
 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)