/*
-******************************************************************************
-* Copyright (C) 1996-2004, International Business Machines Corporation and *
-* others. All Rights Reserved. *
-******************************************************************************
-*/
+ ******************************************************************************
+ * Copyright (C) 1996-2012, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ******************************************************************************
+ */
/**
-* File tblcoll.cpp
-*
-* Created by: Helena Shih
-*
-* Modification History:
-*
-* Date Name Description
-* 2/5/97 aliu Added streamIn and streamOut methods. Added
-* constructor which reads RuleBasedCollator object from
-* a binary file. Added writeToFile method which streams
-* RuleBasedCollator out to a binary file. The streamIn
-* and streamOut methods use istream and ostream objects
-* in binary mode.
-* 2/11/97 aliu Moved declarations out of for loop initializer.
-* Added Mac compatibility #ifdef for ios::nocreate.
-* 2/12/97 aliu Modified to use TableCollationData sub-object to
-* hold invariant data.
-* 2/13/97 aliu Moved several methods into this class from Collation.
-* Added a private RuleBasedCollator(Locale&) constructor,
-* to be used by Collator::getInstance(). General
-* clean up. Made use of UErrorCode variables consistent.
-* 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy
-* constructor and getDynamicClassID.
-* 3/5/97 aliu Changed compaction cycle to improve performance. We
-* use the maximum allowable value which is kBlockCount.
-* Modified getRules() to load rules dynamically. Changed
-* constructFromFile() call to accomodate this (added
-* parameter to specify whether binary loading is to
-* take place).
-* 05/06/97 helena Added memory allocation error check.
-* 6/20/97 helena Java class name change.
-* 6/23/97 helena Adding comments to make code more readable.
-* 09/03/97 helena Added createCollationKeyValues().
-* 06/26/98 erm Changes for CollationKeys using byte arrays.
-* 08/10/98 erm Synched with 1.2 version of RuleBasedCollator.java
-* 04/23/99 stephen Removed EDecompositionMode, merged with
-* Normalizer::EMode
-* 06/14/99 stephen Removed kResourceBundleSuffix
-* 06/22/99 stephen Fixed logic in constructFromFile() since .ctx
-* files are no longer used.
-* 11/02/99 helena Collator performance enhancements. Special case
-* for NO_OP situations.
-* 11/17/99 srl More performance enhancements. Inlined some internal functions.
-* 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator
-* to implementation file.
-* 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h)
-*/
+ * File tblcoll.cpp
+ *
+ * Created by: Helena Shih
+ *
+ * Modification History:
+ *
+ * Date Name Description
+ * 2/5/97 aliu Added streamIn and streamOut methods. Added
+ * constructor which reads RuleBasedCollator object from
+ * a binary file. Added writeToFile method which streams
+ * RuleBasedCollator out to a binary file. The streamIn
+ * and streamOut methods use istream and ostream objects
+ * in binary mode.
+ * 2/11/97 aliu Moved declarations out of for loop initializer.
+ * Added Mac compatibility #ifdef for ios::nocreate.
+ * 2/12/97 aliu Modified to use TableCollationData sub-object to
+ * hold invariant data.
+ * 2/13/97 aliu Moved several methods into this class from Collation.
+ * Added a private RuleBasedCollator(Locale&) constructor,
+ * to be used by Collator::getInstance(). General
+ * clean up. Made use of UErrorCode variables consistent.
+ * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy
+ * constructor and getDynamicClassID.
+ * 3/5/97 aliu Changed compaction cycle to improve performance. We
+ * use the maximum allowable value which is kBlockCount.
+ * Modified getRules() to load rules dynamically. Changed
+ * constructFromFile() call to accomodate this (added
+ * parameter to specify whether binary loading is to
+ * take place).
+ * 05/06/97 helena Added memory allocation error check.
+ * 6/20/97 helena Java class name change.
+ * 6/23/97 helena Adding comments to make code more readable.
+ * 09/03/97 helena Added createCollationKeyValues().
+ * 06/26/98 erm Changes for CollationKeys using byte arrays.
+ * 08/10/98 erm Synched with 1.2 version of RuleBasedCollator.java
+ * 04/23/99 stephen Removed EDecompositionMode, merged with
+ * Normalizer::EMode
+ * 06/14/99 stephen Removed kResourceBundleSuffix
+ * 06/22/99 stephen Fixed logic in constructFromFile() since .ctx
+ * files are no longer used.
+ * 11/02/99 helena Collator performance enhancements. Special case
+ * for NO_OP situations.
+ * 11/17/99 srl More performance enhancements. Inlined some internal functions.
+ * 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator
+ * to implementation file.
+ * 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h)
+ */
#include "unicode/utypes.h"
#include "cmemory.h"
#include "cstring.h"
#include "putilimp.h"
+#include "ustr_imp.h"
/* public RuleBasedCollator constructor ---------------------------------- */
: Collator(that)
, dataIsOwned(FALSE)
, isWriteThroughAlias(FALSE)
-, ucollator(that.ucollator)
-, urulestring(that.urulestring)
+, ucollator(NULL)
{
+ RuleBasedCollator::operator=(that);
}
RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
UErrorCode& status) : dataIsOwned(FALSE)
{
construct(rules,
- getUCollationStrength(collationStrength),
+ (UColAttributeValue)collationStrength,
UCOL_DEFAULT,
status);
}
UErrorCode& status) : dataIsOwned(FALSE)
{
construct(rules,
- getUCollationStrength(collationStrength),
+ (UColAttributeValue)collationStrength,
decompositionMode,
status);
}
+RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
+ const RuleBasedCollator *base,
+ UErrorCode &status) :
+dataIsOwned(TRUE),
+isWriteThroughAlias(FALSE)
+{
+ ucollator = ucol_openBinary(bin, length, base->ucollator, &status);
+}
void
-RuleBasedCollator::setRuleStringFromCollator(UErrorCode& status)
+RuleBasedCollator::setRuleStringFromCollator()
{
- urulestring = NULL;
- if (U_SUCCESS(status))
- {
- int32_t length;
- const UChar *r = ucol_getRules(ucollator, &length);
+ int32_t length;
+ const UChar *r = ucol_getRules(ucollator, &length);
- if (length > 0) {
- // alias the rules string
- urulestring = new UnicodeString(TRUE, r, length);
- }
- else {
- urulestring = new UnicodeString();
- }
- /* test for NULL */
- if (urulestring == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
+ if (r && length > 0) {
+ // alias the rules string
+ urulestring.setTo(TRUE, r, length);
+ }
+ else {
+ urulestring.truncate(0); // Clear string.
}
}
UColAttributeValue decompositionMode,
UErrorCode& status)
{
- urulestring = 0;
ucollator = ucol_openRules(rules.getBuffer(), rules.length(),
decompositionMode, collationStrength,
NULL, &status);
dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it
isWriteThroughAlias = FALSE;
- setRuleStringFromCollator(status);
+ if(ucollator == NULL) {
+ if(U_SUCCESS(status)) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ return; // Failure
+ }
+
+ setRuleStringFromCollator();
}
/* RuleBasedCollator public destructor ----------------------------------- */
if (dataIsOwned)
{
ucol_close(ucollator);
- delete urulestring;
}
ucollator = 0;
- urulestring = 0;
}
/* RuleBaseCollator public methods --------------------------------------- */
UBool RuleBasedCollator::operator==(const Collator& that) const
{
/* only checks for address equals here */
- if (Collator::operator==(that))
+ if (this == &that) {
return TRUE;
-
- if (getDynamicClassID() != that.getDynamicClassID())
+ }
+ if (!Collator::operator==(that)) {
return FALSE; /* not the same class */
+ }
RuleBasedCollator& thatAlias = (RuleBasedCollator&)that;
- // weiv: use C function, commented code below is wrong
return ucol_equals(this->ucollator, thatAlias.ucollator);
- /*
- synwee : orginal code does not check for data compatibility
- */
- /*
- if (ucollator != thatAlias.ucollator)
- return FALSE;
-
- return TRUE;
- */
-}
-
-UBool RuleBasedCollator::operator!=(const Collator& other) const
-{
- return !(*this == other);
}
// aliasing, not write-through
RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that)
{
- if (this != &that)
- {
- if (dataIsOwned)
- {
- ucol_close(ucollator);
- ucollator = NULL;
- delete urulestring;
- }
+ if (this == &that) { return *this; }
- dataIsOwned = FALSE;
- isWriteThroughAlias = FALSE;
- ucollator = that.ucollator;
- urulestring = that.urulestring;
+ UErrorCode intStatus = U_ZERO_ERROR;
+ int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
+ UCollator *ucol = ucol_safeClone(that.ucollator, NULL, &buffersize, &intStatus);
+ if (U_FAILURE(intStatus)) { return *this; }
+
+ if (dataIsOwned) {
+ ucol_close(ucollator);
}
+ ucollator = ucol;
+ dataIsOwned = TRUE;
+ isWriteThroughAlias = FALSE;
+ setRuleStringFromCollator();
return *this;
}
// aliasing, not write-through
Collator* RuleBasedCollator::clone() const
{
- return new RuleBasedCollator(*this);
+ RuleBasedCollator* coll = new RuleBasedCollator(*this);
+ // There is a small chance that the internal ucol_safeClone() call fails.
+ if (coll != NULL && coll->ucollator == NULL) {
+ delete coll;
+ return NULL;
+ }
+ return coll;
}
+
CollationElementIterator* RuleBasedCollator::createCollationElementIterator
(const UnicodeString& source) const
{
*/
const UnicodeString& RuleBasedCollator::getRules() const
{
- return (*urulestring);
+ return urulestring;
}
void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer)
ucol_getRulesEx(ucollator, delta, rules, rulesize);
buffer.setTo(rules, rulesize);
uprv_free(rules);
- } else { // couldn't allocate
+ } else { // couldn't allocate
buffer.remove();
}
}
}
}
-Collator::EComparisonResult RuleBasedCollator::compare(
- const UnicodeString& source,
- const UnicodeString& target,
- int32_t length) const
-{
- UErrorCode status = U_ZERO_ERROR;
- return getEComparisonResult(compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status));
-}
-
+/**
+* Compare two strings using this collator
+*/
UCollationResult RuleBasedCollator::compare(
const UnicodeString& source,
const UnicodeString& target,
return compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status);
}
-Collator::EComparisonResult RuleBasedCollator::compare(const UChar* source,
- int32_t sourceLength,
- const UChar* target,
- int32_t targetLength)
- const
-{
- return getEComparisonResult(ucol_strcoll(ucollator, source, sourceLength,
- target, targetLength));
-}
-
UCollationResult RuleBasedCollator::compare(const UChar* source,
int32_t sourceLength,
const UChar* target,
}
}
-/**
-* Compare two strings using this collator
-*/
-Collator::EComparisonResult RuleBasedCollator::compare(
- const UnicodeString& source,
- const UnicodeString& target) const
-{
- return getEComparisonResult(ucol_strcoll(ucollator, source.getBuffer(), source.length(),
- target.getBuffer(), target.length()));
-}
-
UCollationResult RuleBasedCollator::compare(
const UnicodeString& source,
const UnicodeString& target,
}
}
+UCollationResult RuleBasedCollator::compare(UCharIterator &sIter,
+ UCharIterator &tIter,
+ UErrorCode &status) const {
+ if(U_SUCCESS(status)) {
+ return ucol_strcollIter(ucollator, &sIter, &tIter, &status);
+ } else {
+ return UCOL_EQUAL;
+ }
+}
+
/**
* Retrieve a collation key for the specified string. The key can be compared
* with other collation keys using a bitwise comparison (e.g. memcmp) to find
CollationKey& sortkey,
UErrorCode& status) const
{
- if (U_FAILURE(status))
- {
+ if (U_FAILURE(status)) {
+ return sortkey.setToBogus();
+ }
+ if (sourceLen < -1 || (source == NULL && sourceLen != 0)) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
return sortkey.setToBogus();
}
- if ((!source) || (sourceLen == 0)) {
+ if (sourceLen < 0) {
+ sourceLen = u_strlen(source);
+ }
+ if (sourceLen == 0) {
return sortkey.reset();
}
- uint8_t *result;
- int32_t resultLen = ucol_getSortKeyWithAllocation(ucollator,
- source, sourceLen,
- &result,
- &status);
- sortkey.adopt(result, resultLen);
+ int32_t resultLen = ucol_getCollationKey(ucollator, source, sourceLen, sortkey, status);
+
+ if (U_SUCCESS(status)) {
+ sortkey.setLength(resultLen);
+ } else {
+ sortkey.setToBogus();
+ }
return sortkey;
}
return ucol_cloneRuleData(ucollator, &length, &status);
}
+
+int32_t RuleBasedCollator::cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status)
+{
+ return ucol_cloneBinary(ucollator, buffer, capacity, &status);
+}
+
void RuleBasedCollator::setAttribute(UColAttribute attr,
UColAttributeValue value,
UErrorCode &status)
}
UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr,
- UErrorCode &status)
+ UErrorCode &status) const
{
if (U_FAILURE(status))
return UCOL_DEFAULT;
return ucol_setVariableTop(ucollator, varTop, len, &status);
}
-uint32_t RuleBasedCollator::setVariableTop(const UnicodeString varTop, UErrorCode &status) {
+uint32_t RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &status) {
checkOwned();
return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &status);
}
-void RuleBasedCollator::setVariableTop(const uint32_t varTop, UErrorCode &status) {
+void RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &status) {
checkOwned();
ucol_restoreVariableTop(ucollator, varTop, &status);
}
return ucol_getVariableTop(ucollator, &status);
}
-Collator* RuleBasedCollator::safeClone(void)
-{
- UErrorCode intStatus = U_ZERO_ERROR;
- int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
- UCollator *ucol = ucol_safeClone(ucollator, NULL, &buffersize,
- &intStatus);
- if (U_FAILURE(intStatus)) {
- return NULL;
- }
-
- UnicodeString *r = new UnicodeString(*urulestring);
- RuleBasedCollator *result = new RuleBasedCollator(ucol, r);
- result->dataIsOwned = TRUE;
- result->isWriteThroughAlias = FALSE;
-
- return result;
-}
-
-
int32_t RuleBasedCollator::getSortKey(const UnicodeString& source,
uint8_t *result, int32_t resultLength)
const
return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength);
}
-Collator::ECollationStrength RuleBasedCollator::getStrength(void) const
+int32_t RuleBasedCollator::getReorderCodes(int32_t *dest,
+ int32_t destCapacity,
+ UErrorCode& status) const
{
- UErrorCode intStatus = U_ZERO_ERROR;
- return getECollationStrength(ucol_getAttribute(ucollator, UCOL_STRENGTH,
- &intStatus));
+ return ucol_getReorderCodes(ucollator, dest, destCapacity, &status);
}
-void RuleBasedCollator::setStrength(ECollationStrength newStrength)
+void RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes,
+ int32_t reorderCodesLength,
+ UErrorCode& status)
{
checkOwned();
- UErrorCode intStatus = U_ZERO_ERROR;
- UCollationStrength strength = getUCollationStrength(newStrength);
- ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus);
+ ucol_setReorderCodes(ucollator, reorderCodes, reorderCodesLength, &status);
+}
+
+int32_t RuleBasedCollator::getEquivalentReorderCodes(int32_t reorderCode,
+ int32_t* dest,
+ int32_t destCapacity,
+ UErrorCode& status)
+{
+ return ucol_getEquivalentReorderCodes(reorderCode, dest, destCapacity, &status);
}
/**
{
int32_t length;
const UChar *rules = ucol_getRules(ucollator, &length);
- return uhash_hashUCharsN(rules, length);
+ return ustr_hashUCharsN(rules, length);
}
/**
* return the locale of this collator
*/
-const Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const {
- const char *result = ucol_getLocale(ucollator, type, &status);
+Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const {
+ const char *result = ucol_getLocaleByType(ucollator, type, &status);
if(result == NULL) {
Locale res("");
res.setToBogus();
}
void
-RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale) {
+RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) {
checkOwned();
- size_t rlen = uprv_strlen(requestedLocale.getName());
- char* rloc = (char *)uprv_malloc((rlen+1)*sizeof(char));
+ char* rloc = uprv_strdup(requestedLocale.getName());
if (rloc) {
- uprv_strcpy(rloc, requestedLocale.getName());
- size_t vlen = uprv_strlen(validLocale.getName());
- char* vloc = (char*)uprv_malloc((vlen+1)*sizeof(char));
+ char* vloc = uprv_strdup(validLocale.getName());
if (vloc) {
- uprv_strcpy(vloc, validLocale.getName());
- ucol_setReqValidLocales(ucollator, rloc, vloc);
- return;
+ char* aloc = uprv_strdup(actualLocale.getName());
+ if (aloc) {
+ ucol_setReqValidLocales(ucollator, rloc, vloc, aloc);
+ return;
+ }
+ uprv_free(vloc);
}
uprv_free(rloc);
}
// RuleBaseCollatorNew private constructor ----------------------------------
RuleBasedCollator::RuleBasedCollator()
- : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(0), urulestring(0)
-{
-}
-
-RuleBasedCollator::RuleBasedCollator(UCollator *collator,
- UnicodeString *rule)
- : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), urulestring(0)
+ : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
{
- ucollator = collator;
- urulestring = rule;
}
RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale,
- UErrorCode& status) :
- dataIsOwned(FALSE), ucollator(0), urulestring(0)
+ UErrorCode& status)
+ : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
{
if (U_FAILURE(status))
return;
if (U_SUCCESS(status))
{
- int32_t length;
- const UChar *r = ucol_getRules(ucollator, &length);
- if (length > 0) {
- // alias the rules string
- urulestring = new UnicodeString(TRUE, r, length);
- }
- else {
- urulestring = new UnicodeString();
- }
- /* test for NULL */
- if (urulestring == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- dataIsOwned = TRUE;
- isWriteThroughAlias = FALSE;
+ setRuleStringFromCollator();
}
}
-void
+void
RuleBasedCollator::setUCollator(const char *locale,
UErrorCode &status)
{
- if (U_FAILURE(status))
+ if (U_FAILURE(status)) {
return;
+ }
if (ucollator && dataIsOwned)
ucol_close(ucollator);
ucollator = ucol_open_internal(locale, &status);
if (!(dataIsOwned || isWriteThroughAlias)) {
UErrorCode status = U_ZERO_ERROR;
ucollator = ucol_safeClone(ucollator, NULL, NULL, &status);
- setRuleStringFromCollator(status);
+ setRuleStringFromCollator();
dataIsOwned = TRUE;
isWriteThroughAlias = FALSE;
}
}
-/* RuleBasedCollator private data members -------------------------------- */
-/*
- * TODO:
- * These should probably be enums (<=0xffff) or #defines (>0xffff)
- * for better performance.
- * Include ucol_imp.h and use its constants if possible.
- * Only used in coleitr.h?!
- * Remove from here!
- */
+int32_t RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
+ char *buffer,
+ int32_t capacity,
+ UErrorCode &status) const {
+ /* simply delegate */
+ return ucol_getShortDefinitionString(ucollator, locale, buffer, capacity, &status);
+}
-/* need look up in .commit() */
-const int32_t RuleBasedCollator::CHARINDEX = 0x70000000;
-/* Expand index follows */
-const int32_t RuleBasedCollator::EXPANDCHARINDEX = 0x7E000000;
-/* contract indexes follows */
-const int32_t RuleBasedCollator::CONTRACTCHARINDEX = 0x7F000000;
-/* unmapped character values */
-const int32_t RuleBasedCollator::UNMAPPED = 0xFFFFFFFF;
-/* primary strength increment */
-const int32_t RuleBasedCollator::PRIMARYORDERINCREMENT = 0x00010000;
-/* secondary strength increment */
-const int32_t RuleBasedCollator::SECONDARYORDERINCREMENT = 0x00000100;
-/* tertiary strength increment */
-const int32_t RuleBasedCollator::TERTIARYORDERINCREMENT = 0x00000001;
-/* mask off anything but primary order */
-const int32_t RuleBasedCollator::PRIMARYORDERMASK = 0xffff0000;
-/* mask off anything but secondary order */
-const int32_t RuleBasedCollator::SECONDARYORDERMASK = 0x0000ff00;
-/* mask off anything but tertiary order */
-const int32_t RuleBasedCollator::TERTIARYORDERMASK = 0x000000ff;
-/* mask off ignorable char order */
-const int32_t RuleBasedCollator::IGNORABLEMASK = 0x0000ffff;
-/* use only the primary difference */
-const int32_t RuleBasedCollator::PRIMARYDIFFERENCEONLY = 0xffff0000;
-/* use only the primary and secondary difference */
-const int32_t RuleBasedCollator::SECONDARYDIFFERENCEONLY = 0xffffff00;
-/* primary order shift */
-const int32_t RuleBasedCollator::PRIMARYORDERSHIFT = 16;
-/* secondary order shift */
-const int32_t RuleBasedCollator::SECONDARYORDERSHIFT = 8;
-/* starting value for collation elements */
-const int32_t RuleBasedCollator::COLELEMENTSTART = 0x02020202;
-/* testing mask for primary low element */
-const int32_t RuleBasedCollator::PRIMARYLOWZEROMASK = 0x00FF0000;
-/* reseting value for secondaries and tertiaries */
-const int32_t RuleBasedCollator::RESETSECONDARYTERTIARY = 0x00000202;
-/* reseting value for tertiaries */
-const int32_t RuleBasedCollator::RESETTERTIARY = 0x00000002;
-
-const int32_t RuleBasedCollator::PRIMIGNORABLE = 0x0202;
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)