X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..38fbf2fd31f5cd99b500914d6037b1d06b608645:/icuSources/i18n/unicode/tblcoll.h diff --git a/icuSources/i18n/unicode/tblcoll.h b/icuSources/i18n/unicode/tblcoll.h index 9ccad375..d4cf184a 100644 --- a/icuSources/i18n/unicode/tblcoll.h +++ b/icuSources/i18n/unicode/tblcoll.h @@ -1,10 +1,17 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** -* Copyright (C) 1996-2003, International Business Machines Corporation and +* Copyright (C) 1996-2016, International Business Machines Corporation and * others. All Rights Reserved. ****************************************************************************** */ +/** + * \file + * \brief C++ API: The RuleBasedCollator class implements the Collator abstract base class. + */ + /** * File tblcoll.h * @@ -49,6 +56,7 @@ * to implementation file. * 01/29/01 synwee Modified into a C++ wrapper which calls C API * (ucol.h) +* 2012-2014 markus Rewritten in C++ again. */ #ifndef TBLCOLL_H @@ -59,12 +67,17 @@ #if !UCONFIG_NO_COLLATION #include "unicode/coll.h" +#include "unicode/locid.h" +#include "unicode/uiter.h" #include "unicode/ucol.h" -#include "unicode/sortkey.h" -#include "unicode/normlzr.h" +#if U_SHOW_CPLUSPLUS_API U_NAMESPACE_BEGIN +struct CollationCacheEntry; +struct CollationData; +struct CollationSettings; +struct CollationTailoring; /** * @stable ICU 2.0 */ @@ -73,824 +86,794 @@ class StringSearch; * @stable ICU 2.0 */ class CollationElementIterator; +class CollationKey; +class SortKeyByteSink; +class UnicodeSet; +class UnicodeString; +class UVector64; /** - * The RuleBasedCollator class provides the simple implementation of + * The RuleBasedCollator class provides the implementation of * Collator, using data-driven tables. The user can create a customized * table-based collation. - *

- * Important: The ICU collation service has been reimplemented - * in order to achieve better performance and UCA compliance. - * For details, see the - * - * collation design document. *

- * RuleBasedCollator is a thin C++ wrapper over the C implementation. + * For more information about the collation service see + * the User Guide. *

- * For more information about the collation service see - * the users guide. - *

- * Collation service provides correct sorting orders for most locales supported in ICU. + * Collation service provides correct sorting orders for most locales supported in ICU. * If specific data for a locale is not available, the orders eventually falls back - * to the UCA sort order. + * to the CLDR root sort order. *

* Sort ordering may be customized by providing your own set of rules. For more on - * this subject see the - * Collation customization section of the users guide. + * this subject see the + * Collation Customization section of the User Guide. *

* Note, RuleBasedCollator is not to be subclassed. * @see Collator - * @version 2.0 11/15/2001 */ -class U_I18N_API RuleBasedCollator : public Collator -{ +class U_I18N_API RuleBasedCollator : public Collator { public: - - // constructor ------------------------------------------------------------- - - /** - * RuleBasedCollator constructor. This takes the table rules and builds a - * collation table out of them. Please see RuleBasedCollator class - * description for more details on the collation rule syntax. - * @param rules the collation rules to build the collation table from. - * @param status reporting a success or an error. - * @see Locale - * @stable ICU 2.0 - */ + /** + * RuleBasedCollator constructor. This takes the table rules and builds a + * collation table out of them. Please see RuleBasedCollator class + * description for more details on the collation rule syntax. + * @param rules the collation rules to build the collation table from. + * @param status reporting a success or an error. + * @stable ICU 2.0 + */ RuleBasedCollator(const UnicodeString& rules, UErrorCode& status); - /** - * RuleBasedCollator constructor. This takes the table rules and builds a - * collation table out of them. Please see RuleBasedCollator class - * description for more details on the collation rule syntax. - * @param rules the collation rules to build the collation table from. - * @param collationStrength default strength for comparison - * @param status reporting a success or an error. - * @see Locale - * @stable ICU 2.0 - */ - RuleBasedCollator(const UnicodeString& rules, + /** + * RuleBasedCollator constructor. This takes the table rules and builds a + * collation table out of them. Please see RuleBasedCollator class + * description for more details on the collation rule syntax. + * @param rules the collation rules to build the collation table from. + * @param collationStrength strength for comparison + * @param status reporting a success or an error. + * @stable ICU 2.0 + */ + RuleBasedCollator(const UnicodeString& rules, ECollationStrength collationStrength, UErrorCode& status); - /** - * RuleBasedCollator constructor. This takes the table rules and builds a - * collation table out of them. Please see RuleBasedCollator class - * description for more details on the collation rule syntax. - * @param rules the collation rules to build the collation table from. - * @param decompositionMode the normalisation mode - * @param status reporting a success or an error. - * @see Locale - * @stable ICU 2.0 - */ - RuleBasedCollator(const UnicodeString& rules, + /** + * RuleBasedCollator constructor. This takes the table rules and builds a + * collation table out of them. Please see RuleBasedCollator class + * description for more details on the collation rule syntax. + * @param rules the collation rules to build the collation table from. + * @param decompositionMode the normalisation mode + * @param status reporting a success or an error. + * @stable ICU 2.0 + */ + RuleBasedCollator(const UnicodeString& rules, UColAttributeValue decompositionMode, UErrorCode& status); - /** - * RuleBasedCollator constructor. This takes the table rules and builds a - * collation table out of them. Please see RuleBasedCollator class - * description for more details on the collation rule syntax. - * @param rules the collation rules to build the collation table from. - * @param collationStrength default strength for comparison - * @param decompositionMode the normalisation mode - * @param status reporting a success or an error. - * @see Locale - * @stable ICU 2.0 - */ - RuleBasedCollator(const UnicodeString& rules, + /** + * RuleBasedCollator constructor. This takes the table rules and builds a + * collation table out of them. Please see RuleBasedCollator class + * description for more details on the collation rule syntax. + * @param rules the collation rules to build the collation table from. + * @param collationStrength strength for comparison + * @param decompositionMode the normalisation mode + * @param status reporting a success or an error. + * @stable ICU 2.0 + */ + RuleBasedCollator(const UnicodeString& rules, ECollationStrength collationStrength, UColAttributeValue decompositionMode, UErrorCode& status); - /** - * Copy constructor. - * @param other the RuleBasedCollator object to be copied - * @see Locale - * @stable ICU 2.0 - */ +#ifndef U_HIDE_INTERNAL_API + /** + * TODO: document & propose as public API + * @internal + */ + RuleBasedCollator(const UnicodeString &rules, + UParseError &parseError, UnicodeString &reason, + UErrorCode &errorCode); +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Copy constructor. + * @param other the RuleBasedCollator object to be copied + * @stable ICU 2.0 + */ RuleBasedCollator(const RuleBasedCollator& other); - // destructor -------------------------------------------------------------- - /** - * Destructor. - * @stable ICU 2.0 - */ + /** Opens a collator from a collator binary image created using + * cloneBinary. Binary image used in instantiation of the + * collator remains owned by the user and should stay around for + * the lifetime of the collator. The API also takes a base collator + * which must be the root collator. + * @param bin binary image owned by the user and required through the + * lifetime of the collator + * @param length size of the image. If negative, the API will try to + * figure out the length of the image + * @param base Base collator, for lookup of untailored characters. + * Must be the root collator, must not be NULL. + * The base is required to be present through the lifetime of the collator. + * @param status for catching errors + * @return newly created collator + * @see cloneBinary + * @stable ICU 3.4 + */ + RuleBasedCollator(const uint8_t *bin, int32_t length, + const RuleBasedCollator *base, + UErrorCode &status); + + /** + * Destructor. + * @stable ICU 2.0 + */ virtual ~RuleBasedCollator(); - // public methods ---------------------------------------------------------- - - /** - * Assignment operator. - * @param other other RuleBasedCollator object to compare with. - * @stable ICU 2.0 - */ + /** + * Assignment operator. + * @param other other RuleBasedCollator object to copy from. + * @stable ICU 2.0 + */ RuleBasedCollator& operator=(const RuleBasedCollator& other); - /** - * Returns true if argument is the same as this object. - * @param other Collator object to be compared. - * @return true if arguments is the same as this object. - * @stable ICU 2.0 - */ - virtual UBool operator==(const Collator& other) const; - - /** - * Returns true if argument is not the same as this object. - * @param other Collator object to be compared - * @return returns true if argument is not the same as this object. - * @stable ICU 2.0 - */ - virtual UBool operator!=(const Collator& other) const; - - /** - * Makes a deep copy of the object. - * The caller owns the returned object. - * @return the cloned object. - * @stable ICU 2.0 - */ - virtual Collator* clone(void) const; - - /** - * Creates a collation element iterator for the source string. The caller of - * this method is responsible for the memory management of the return - * pointer. - * @param source the string over which the CollationElementIterator will - * iterate. - * @return the collation element iterator of the source string using this as - * the based Collator. - * @stable ICU 2.2 - */ + /** + * Returns true if argument is the same as this object. + * @param other Collator object to be compared. + * @return true if arguments is the same as this object. + * @stable ICU 2.0 + */ + virtual UBool operator==(const Collator& other) const; + + /** + * Makes a copy of this object. + * @return a copy of this object, owned by the caller + * @stable ICU 2.0 + */ + virtual Collator* clone(void) const; + + /** + * Creates a collation element iterator for the source string. The caller of + * this method is responsible for the memory management of the return + * pointer. + * @param source the string over which the CollationElementIterator will + * iterate. + * @return the collation element iterator of the source string using this as + * the based Collator. + * @stable ICU 2.2 + */ virtual CollationElementIterator* createCollationElementIterator( const UnicodeString& source) const; - /** - * Creates a collation element iterator for the source. The caller of this - * method is responsible for the memory management of the returned pointer. - * @param source the CharacterIterator which produces the characters over - * which the CollationElementItgerator will iterate. - * @return the collation element iterator of the source using this as the - * based Collator. - * @stable ICU 2.2 - */ - virtual CollationElementIterator* createCollationElementIterator( - const CharacterIterator& source) const; - - /** - * Compares a range of character data stored in two different strings based - * on the collation rules. Returns information about whether a string is - * less than, greater than or equal to another string in a language. - * This can be overriden in a subclass. - * @param source the source string. - * @param target the target string to be compared with the source string. - * @return the comparison result. GREATER if the source string is greater - * than the target string, LESS if the source is less than the - * target. Otherwise, returns EQUAL. - * @deprecated ICU 2.6 Use overload with UErrorCode& - */ - virtual EComparisonResult compare(const UnicodeString& source, - const UnicodeString& target) const; - - - /** - * The comparison function compares the character data stored in two - * different strings. Returns information about whether a string is less - * than, greater than or equal to another string. - * @param source the source string to be compared with. - * @param target the string that is to be compared with the source string. - * @param status possible error code - * @return Returns an enum value. UCOL_GREATER if source is greater - * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less - * than target - * @draft ICU 2.6 - **/ - virtual UCollationResult compare(const UnicodeString& source, - const UnicodeString& target, - UErrorCode &status) const; - /** - * Compares a range of character data stored in two different strings based - * on the collation rules up to the specified length. Returns information - * about whether a string is less than, greater than or equal to another - * string in a language. This can be overriden in a subclass. - * @param source the source string. - * @param target the target string to be compared with the source string. - * @param length compares up to the specified length - * @return the comparison result. GREATER if the source string is greater - * than the target string, LESS if the source is less than the - * target. Otherwise, returns EQUAL. - * @deprecated ICU 2.6 Use overload with UErrorCode& - */ - virtual EComparisonResult compare(const UnicodeString& source, - const UnicodeString& target, - int32_t length) const; - - /** - * Does the same thing as compare but limits the comparison to a specified - * length - * @param source the source string to be compared with. - * @param target the string that is to be compared with the source string. - * @param length the length the comparison is limited to - * @param status possible error code - * @return Returns an enum value. UCOL_GREATER if source (up to the specified - * length) is greater than target; UCOL_EQUAL if source (up to specified - * length) is equal to target; UCOL_LESS if source (up to the specified - * length) is less than target. - * @draft ICU 2.6 - */ - virtual UCollationResult compare(const UnicodeString& source, - const UnicodeString& target, - int32_t length, - UErrorCode &status) const; - - /** - * The comparison function compares the character data stored in two - * different string arrays. Returns information about whether a string array - * is less than, greater than or equal to another string array. - *

Example of use: - *

-   * .       UChar ABC[] = {0x41, 0x42, 0x43, 0};  // = "ABC"
-   * .       UChar abc[] = {0x61, 0x62, 0x63, 0};  // = "abc"
-   * .       UErrorCode status = U_ZERO_ERROR;
-   * .       Collator *myCollation =
-   * .                         Collator::createInstance(Locale::US, status);
-   * .       if (U_FAILURE(status)) return;
-   * .       myCollation->setStrength(Collator::PRIMARY);
-   * .       // result would be Collator::EQUAL ("abc" == "ABC")
-   * .       // (no primary difference between "abc" and "ABC")
-   * .       Collator::EComparisonResult result =
-   * .                             myCollation->compare(abc, 3, ABC, 3);
-   * .       myCollation->setStrength(Collator::TERTIARY);
-   * .       // result would be Collator::LESS ("abc" <<< "ABC")
-   * .       // (with tertiary difference between "abc" and "ABC")
-   * .       result =  myCollation->compare(abc, 3, ABC, 3);
-   * 
- * @param source the source string array to be compared with. - * @param sourceLength the length of the source string array. If this value - * is equal to -1, the string array is null-terminated. - * @param target the string that is to be compared with the source string. - * @param targetLength the length of the target string array. If this value - * is equal to -1, the string array is null-terminated. - * @return Returns a byte value. GREATER if source is greater than target; - * EQUAL if source is equal to target; LESS if source is less than - * target - * @deprecated ICU 2.6 Use overload with UErrorCode& - */ - virtual EComparisonResult compare(const UChar* source, int32_t sourceLength, - const UChar* target, int32_t targetLength) - const; - - /** - * The comparison function compares the character data stored in two - * different string arrays. Returns information about whether a string array - * is less than, greater than or equal to another string array. - * @param source the source string array to be compared with. - * @param sourceLength the length of the source string array. If this value - * is equal to -1, the string array is null-terminated. - * @param target the string that is to be compared with the source string. - * @param targetLength the length of the target string array. If this value - * is equal to -1, the string array is null-terminated. - * @param status possible error code - * @return Returns an enum value. UCOL_GREATER if source is greater - * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less - * than target - * @draft ICU 2.6 - */ - virtual UCollationResult compare(const UChar* source, int32_t sourceLength, - const UChar* target, int32_t targetLength, - UErrorCode &status) const; - - /** - * Transforms a specified region of the string into a series of characters - * that can be compared with CollationKey.compare. Use a CollationKey when - * you need to do repeated comparisions on the same string. For a single - * comparison the compare method will be faster. - * @param source the source string. - * @param key the transformed key of the source string. - * @param status the error code status. - * @return the transformed key. - * @see CollationKey - * @stable ICU 2.0 - */ - virtual CollationKey& getCollationKey(const UnicodeString& source, - CollationKey& key, - UErrorCode& status) const; - - /** - * Transforms a specified region of the string into a series of characters - * that can be compared with CollationKey.compare. Use a CollationKey when - * you need to do repeated comparisions on the same string. For a single - * comparison the compare method will be faster. - * @param source the source string. - * @param sourceLength the length of the source string. - * @param key the transformed key of the source string. - * @param status the error code status. - * @return the transformed key. - * @see CollationKey - * @stable ICU 2.0 - */ - virtual CollationKey& getCollationKey(const UChar *source, - int32_t sourceLength, - CollationKey& key, - UErrorCode& status) const; - - /** - * Generates the hash code for the rule-based collation object. - * @return the hash code. - * @stable ICU 2.0 - */ - virtual int32_t hashCode(void) const; - - /** - * Gets the locale of the Collator - * @param type can be either requested, valid or actual locale. For more - * information see the definition of ULocDataLocaleType in - * uloc.h - * @param status the error code status. - * @return locale where the collation data lives. If the collator - * was instantiated from rules, locale is empty. - * @stable ICU 2.1 - */ - virtual const Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; - - /** - * Gets the table-based rules for the collation object. - * @return returns the collation rules that the table collation object was - * created from. - * @stable ICU 2.0 - */ - const UnicodeString& getRules(void) const; - - /** - * Gets the version information for a Collator. - * @param info the version # information, the result will be filled in - * @stable ICU 2.0 - */ - virtual void getVersion(UVersionInfo info) const; - - /** - * Return the maximum length of any expansion sequences that end with the - * specified comparison order. - * @param order a collation order returned by previous or next. - * @return maximum size of the expansion sequences ending with the collation - * element or 1 if collation element does not occur at the end of - * any expansion sequence - * @see CollationElementIterator#getMaxExpansion - * @stable ICU 2.0 - */ - int32_t getMaxExpansion(int32_t order) const; - - /** - * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This - * method is to implement a simple version of RTTI, since not all C++ - * compilers support genuine RTTI. Polymorphic operator==() and clone() - * methods call this method. - * @return The class ID for this object. All objects of a given class have - * the same class ID. Objects of other classes have different class - * IDs. - * @stable ICU 2.0 - */ - virtual UClassID getDynamicClassID(void) const; - - /** - * Returns the class ID for this class. This is useful only for comparing to - * a return value from getDynamicClassID(). For example: - *
-   * Base* polymorphic_pointer = createPolymorphicObject();
-   * if (polymorphic_pointer->getDynamicClassID() ==
-   *                                          Derived::getStaticClassID()) ...
-   * 
- * @return The class ID for all objects of this class. - * @stable ICU 2.0 - */ - static UClassID getStaticClassID(void); - - /** - * Returns the binary format of the class's rules. The format is that of - * .col files. - * @param length Returns the length of the data, in bytes - * @param status the error code status. - * @return memory, owned by the caller, of size 'length' bytes. - * @stable ICU 2.2 - */ - uint8_t *cloneRuleData(int32_t &length, UErrorCode &status); - - /** - * Returns current rules. Delta defines whether full rules are returned or - * just the tailoring. - * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. - * @param buffer UnicodeString to store the result rules - * @stable ICU 2.2 - */ - void getRules(UColRuleOption delta, UnicodeString &buffer); - - /** - * Universal attribute setter - * @param attr attribute type - * @param value attribute value - * @param status to indicate whether the operation went on smoothly or there were errors - * @stable ICU 2.2 - */ - virtual void setAttribute(UColAttribute attr, UColAttributeValue value, - UErrorCode &status); - - /** - * Universal attribute getter. - * @param attr attribute type - * @param status to indicate whether the operation went on smoothly or there were errors - * @return attribute value - * @stable ICU 2.2 - */ - virtual UColAttributeValue getAttribute(UColAttribute attr, - UErrorCode &status); - - /** - * Sets the variable top to a collation element value of a string supplied. - * @param varTop one or more (if contraction) UChars to which the variable top should be set - * @param len length of variable top string. If -1 it is considered to be zero terminated. - * @param status error code. If error code is set, the return value is undefined. Errors set by this function are:
- * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction
- * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes - * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined - * @stable ICU 2.0 - */ - virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status); - - /** - * Sets the variable top to a collation element value of a string supplied. - * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set - * @param status error code. If error code is set, the return value is undefined. Errors set by this function are:
- * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction
- * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes - * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined - * @stable ICU 2.0 - */ - virtual uint32_t setVariableTop(const UnicodeString varTop, UErrorCode &status); - - /** - * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits. - * Lower 16 bits are ignored. - * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop - * @param status error code (not changed by function) - * @stable ICU 2.0 - */ - virtual void setVariableTop(const uint32_t varTop, UErrorCode &status); - - /** - * Gets the variable top value of a Collator. - * Lower 16 bits are undefined and should be ignored. - * @param status error code (not changed by function). If error code is set, the return value is undefined. - * @stable ICU 2.0 - */ - virtual uint32_t getVariableTop(UErrorCode &status) const; - - /** - * Get an UnicodeSet that contains all the characters and sequences tailored in - * this collator. - * @param status error code of the operation - * @return a pointer to a UnicodeSet object containing all the - * code points and sequences that may sort differently than - * in the UCA. The object must be disposed of by using delete - * @draft ICU 2.4 - */ - virtual UnicodeSet *getTailoredSet(UErrorCode &status) const; - - /** - * Thread safe cloning operation. - * @return pointer to the new clone, user should remove it. - * @stable ICU 2.2 - */ - virtual Collator* safeClone(void); - - /** - * Get the sort key as an array of bytes from an UnicodeString. - * @param source string to be processed. - * @param result buffer to store result in. If NULL, number of bytes needed - * will be returned. - * @param resultLength length of the result buffer. If if not enough the - * buffer will be filled to capacity. - * @return Number of bytes needed for storing the sort key - * @stable ICU 2.0 - */ - virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result, - int32_t resultLength) const; - - /** - * Get the sort key as an array of bytes from an UChar buffer. - * @param source string to be processed. - * @param sourceLength length of string to be processed. If -1, the string - * is 0 terminated and length will be decided by the function. - * @param result buffer to store result in. If NULL, number of bytes needed - * will be returned. - * @param resultLength length of the result buffer. If if not enough the - * buffer will be filled to capacity. - * @return Number of bytes needed for storing the sort key - * @stable ICU 2.2 - */ - virtual int32_t getSortKey(const UChar *source, int32_t sourceLength, - uint8_t *result, int32_t resultLength) const; - - /** - * Determines the minimum strength that will be use in comparison or - * transformation. - *

E.g. with strength == SECONDARY, the tertiary difference is ignored - *

E.g. with strength == PRIMARY, the secondary and tertiary difference - * are ignored. - * @return the current comparison level. - * @see RuleBasedCollator#setStrength - * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead - */ - virtual ECollationStrength getStrength(void) const; - - /** - * Sets the minimum strength to be used in comparison or transformation. - * @see RuleBasedCollator#getStrength - * @param newStrength the new comparison level. - * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead - */ - virtual void setStrength(ECollationStrength newStrength); - -private: - - // private static constants ----------------------------------------------- - - static const int32_t UNMAPPED; - static const int32_t CHARINDEX; // need look up in .commit() - static const int32_t EXPANDCHARINDEX; // Expand index follows - static const int32_t CONTRACTCHARINDEX; // contract indexes follow - - static const int32_t PRIMARYORDERINCREMENT; - static const int32_t SECONDARYORDERINCREMENT; - static const int32_t TERTIARYORDERINCREMENT; - static const int32_t PRIMARYORDERMASK; - static const int32_t SECONDARYORDERMASK; - static const int32_t TERTIARYORDERMASK; - static const int32_t IGNORABLEMASK; - static const int32_t PRIMARYDIFFERENCEONLY; - static const int32_t SECONDARYDIFFERENCEONLY; - static const int32_t PRIMARYORDERSHIFT; - static const int32_t SECONDARYORDERSHIFT; - - static const int32_t COLELEMENTSTART; - static const int32_t PRIMARYLOWZEROMASK; - static const int32_t RESETSECONDARYTERTIARY; - static const int32_t RESETTERTIARY; - - static const int32_t PRIMIGNORABLE; - - static const int16_t FILEID; - static const char kFilenameSuffix[]; - - // private static variables ----------------------------------------------- - - /** - * static class id - */ - static const char fgClassID; - - // private data members --------------------------------------------------- - - UBool dataIsOwned; - - UBool isWriteThroughAlias; - - /** - * c struct for collation. All initialisation for it has to be done through - * setUCollator(). - */ - UCollator *ucollator; - - /** - * Rule UnicodeString - */ - UnicodeString *urulestring; - - // friend classes -------------------------------------------------------- - - /** - * Used to iterate over collation elements in a character source. - */ - friend class CollationElementIterator; - - /** - * Collator ONLY needs access to RuleBasedCollator(const Locale&, - * UErrorCode&) - */ - friend class Collator; - - /** - * Searching over collation elements in a character source - */ - friend class StringSearch; - - // private constructors -------------------------------------------------- - - /** - * Default constructor - */ - RuleBasedCollator(); - - /** - * Constructor that takes in a UCollator struct - * @param collator UCollator struct - * @param rule the rule for the collator. - */ - RuleBasedCollator(UCollator *collator, UnicodeString *rule); - - /** - * RuleBasedCollator constructor. This constructor takes a locale. The - * only caller of this class should be Collator::createInstance(). If - * createInstance() happens to know that the requested locale's collation is - * implemented as a RuleBasedCollator, it can then call this constructor. - * OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID - * COLLATION TABLE. It does this by falling back to defaults. - * @param desiredLocale locale used - * @param status error code status - */ - RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status); - - /** - * common constructor implementation - * - * @param rules the collation rules to build the collation table from. - * @param collationStrength default strength for comparison - * @param decompositionMode the normalisation mode - * @param status reporting a success or an error. - */ - void - construct(const UnicodeString& rules, - UColAttributeValue collationStrength, + /** + * Creates a collation element iterator for the source. The caller of this + * method is responsible for the memory management of the returned pointer. + * @param source the CharacterIterator which produces the characters over + * which the CollationElementItgerator will iterate. + * @return the collation element iterator of the source using this as the + * based Collator. + * @stable ICU 2.2 + */ + virtual CollationElementIterator* createCollationElementIterator( + const CharacterIterator& source) const; + + // Make deprecated versions of Collator::compare() visible. + using Collator::compare; + + /** + * The comparison function compares the character data stored in two + * different strings. Returns information about whether a string is less + * than, greater than or equal to another string. + * @param source the source string to be compared with. + * @param target the string that is to be compared with the source string. + * @param status possible error code + * @return Returns an enum value. UCOL_GREATER if source is greater + * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less + * than target + * @stable ICU 2.6 + **/ + virtual UCollationResult compare(const UnicodeString& source, + const UnicodeString& target, + UErrorCode &status) const; + + /** + * Does the same thing as compare but limits the comparison to a specified + * length + * @param source the source string to be compared with. + * @param target the string that is to be compared with the source string. + * @param length the length the comparison is limited to + * @param status possible error code + * @return Returns an enum value. UCOL_GREATER if source (up to the specified + * length) is greater than target; UCOL_EQUAL if source (up to specified + * length) is equal to target; UCOL_LESS if source (up to the specified + * length) is less than target. + * @stable ICU 2.6 + */ + virtual UCollationResult compare(const UnicodeString& source, + const UnicodeString& target, + int32_t length, + UErrorCode &status) const; + + /** + * The comparison function compares the character data stored in two + * different string arrays. Returns information about whether a string array + * is less than, greater than or equal to another string array. + * @param source the source string array to be compared with. + * @param sourceLength the length of the source string array. If this value + * is equal to -1, the string array is null-terminated. + * @param target the string that is to be compared with the source string. + * @param targetLength the length of the target string array. If this value + * is equal to -1, the string array is null-terminated. + * @param status possible error code + * @return Returns an enum value. UCOL_GREATER if source is greater + * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less + * than target + * @stable ICU 2.6 + */ + virtual UCollationResult compare(const char16_t* source, int32_t sourceLength, + const char16_t* target, int32_t targetLength, + UErrorCode &status) const; + + /** + * Compares two strings using the Collator. + * Returns whether the first one compares less than/equal to/greater than + * the second one. + * This version takes UCharIterator input. + * @param sIter the first ("source") string iterator + * @param tIter the second ("target") string iterator + * @param status ICU status + * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER + * @stable ICU 4.2 + */ + virtual UCollationResult compare(UCharIterator &sIter, + UCharIterator &tIter, + UErrorCode &status) const; + + /** + * Compares two UTF-8 strings using the Collator. + * Returns whether the first one compares less than/equal to/greater than + * the second one. + * This version takes UTF-8 input. + * Note that a StringPiece can be implicitly constructed + * from a std::string or a NUL-terminated const char * string. + * @param source the first UTF-8 string + * @param target the second UTF-8 string + * @param status ICU status + * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER + * @stable ICU 51 + */ + virtual UCollationResult compareUTF8(const StringPiece &source, + const StringPiece &target, + UErrorCode &status) const; + + /** + * Transforms the string into a series of characters + * that can be compared with CollationKey.compare(). + * + * Note that sort keys are often less efficient than simply doing comparison. + * For more details, see the ICU User Guide. + * + * @param source the source string. + * @param key the transformed key of the source string. + * @param status the error code status. + * @return the transformed key. + * @see CollationKey + * @stable ICU 2.0 + */ + virtual CollationKey& getCollationKey(const UnicodeString& source, + CollationKey& key, + UErrorCode& status) const; + + /** + * Transforms a specified region of the string into a series of characters + * that can be compared with CollationKey.compare. + * + * Note that sort keys are often less efficient than simply doing comparison. + * For more details, see the ICU User Guide. + * + * @param source the source string. + * @param sourceLength the length of the source string. + * @param key the transformed key of the source string. + * @param status the error code status. + * @return the transformed key. + * @see CollationKey + * @stable ICU 2.0 + */ + virtual CollationKey& getCollationKey(const char16_t *source, + int32_t sourceLength, + CollationKey& key, + UErrorCode& status) const; + + /** + * Generates the hash code for the rule-based collation object. + * @return the hash code. + * @stable ICU 2.0 + */ + virtual int32_t hashCode() const; + + /** + * Gets the locale of the Collator + * @param type can be either requested, valid or actual locale. For more + * information see the definition of ULocDataLocaleType in + * uloc.h + * @param status the error code status. + * @return locale where the collation data lives. If the collator + * was instantiated from rules, locale is empty. + * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback + */ + virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; + + /** + * Gets the tailoring rules for this collator. + * @return the collation tailoring from which this collator was created + * @stable ICU 2.0 + */ + const UnicodeString& getRules() const; + + /** + * Gets the version information for a Collator. + * @param info the version # information, the result will be filled in + * @stable ICU 2.0 + */ + virtual void getVersion(UVersionInfo info) const; + +#ifndef U_HIDE_DEPRECATED_API + /** + * Returns the maximum length of any expansion sequences that end with the + * specified comparison order. + * + * This is specific to the kind of collation element values and sequences + * returned by the CollationElementIterator. + * Call CollationElementIterator::getMaxExpansion() instead. + * + * @param order a collation order returned by CollationElementIterator::previous + * or CollationElementIterator::next. + * @return maximum size of the expansion sequences ending with the collation + * element, or 1 if the collation element does not occur at the end of + * any expansion sequence + * @see CollationElementIterator#getMaxExpansion + * @deprecated ICU 51 Use CollationElementIterator::getMaxExpansion() instead. + */ + int32_t getMaxExpansion(int32_t order) const; +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This + * method is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and clone() + * methods call this method. + * @return The class ID for this object. All objects of a given class have + * the same class ID. Objects of other classes have different class + * IDs. + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID(void) const; + + /** + * Returns the class ID for this class. This is useful only for comparing to + * a return value from getDynamicClassID(). For example: + *

+     * Base* polymorphic_pointer = createPolymorphicObject();
+     * if (polymorphic_pointer->getDynamicClassID() ==
+     *                                          Derived::getStaticClassID()) ...
+     * 
+ * @return The class ID for all objects of this class. + * @stable ICU 2.0 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + +#ifndef U_HIDE_DEPRECATED_API + /** + * Do not use this method: The caller and the ICU library might use different heaps. + * Use cloneBinary() instead which writes to caller-provided memory. + * + * Returns a binary format of this collator. + * @param length Returns the length of the data, in bytes + * @param status the error code status. + * @return memory, owned by the caller, of size 'length' bytes. + * @deprecated ICU 52. Use cloneBinary() instead. + */ + uint8_t *cloneRuleData(int32_t &length, UErrorCode &status) const; +#endif /* U_HIDE_DEPRECATED_API */ + + /** Creates a binary image of a collator. This binary image can be stored and + * later used to instantiate a collator using ucol_openBinary. + * This API supports preflighting. + * @param buffer a fill-in buffer to receive the binary image + * @param capacity capacity of the destination buffer + * @param status for catching errors + * @return size of the image + * @see ucol_openBinary + * @stable ICU 3.4 + */ + int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) const; + + /** + * Returns current rules. Delta defines whether full rules are returned or + * just the tailoring. + * + * getRules(void) should normally be used instead. + * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales + * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. + * @param buffer UnicodeString to store the result rules + * @stable ICU 2.2 + * @see UCOL_FULL_RULES + */ + void getRules(UColRuleOption delta, UnicodeString &buffer) const; + + /** + * Universal attribute setter + * @param attr attribute type + * @param value attribute value + * @param status to indicate whether the operation went on smoothly or there were errors + * @stable ICU 2.2 + */ + virtual void setAttribute(UColAttribute attr, UColAttributeValue value, + UErrorCode &status); + + /** + * Universal attribute getter. + * @param attr attribute type + * @param status to indicate whether the operation went on smoothly or there were errors + * @return attribute value + * @stable ICU 2.2 + */ + virtual UColAttributeValue getAttribute(UColAttribute attr, + UErrorCode &status) const; + + /** + * Sets the variable top to the top of the specified reordering group. + * The variable top determines the highest-sorting character + * which is affected by UCOL_ALTERNATE_HANDLING. + * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect. + * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, + * UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY; + * or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return *this + * @see getMaxVariable + * @stable ICU 53 + */ + virtual Collator &setMaxVariable(UColReorderCode group, UErrorCode &errorCode); + + /** + * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING. + * @return the maximum variable reordering group. + * @see setMaxVariable + * @stable ICU 53 + */ + virtual UColReorderCode getMaxVariable() const; + + /** + * Sets the variable top to the primary weight of the specified string. + * + * Beginning with ICU 53, the variable top is pinned to + * the top of one of the supported reordering groups, + * and it must not be beyond the last of those groups. + * See setMaxVariable(). + * @param varTop one or more (if contraction) char16_ts to which the variable top should be set + * @param len length of variable top string. If -1 it is considered to be zero terminated. + * @param status error code. If error code is set, the return value is undefined. Errors set by this function are:
+ * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction
+ * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond + * the last reordering group supported by setMaxVariable() + * @return variable top primary weight + * @deprecated ICU 53 Call setMaxVariable() instead. + */ + virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status); + + /** + * Sets the variable top to the primary weight of the specified string. + * + * Beginning with ICU 53, the variable top is pinned to + * the top of one of the supported reordering groups, + * and it must not be beyond the last of those groups. + * See setMaxVariable(). + * @param varTop a UnicodeString size 1 or more (if contraction) of char16_ts to which the variable top should be set + * @param status error code. If error code is set, the return value is undefined. Errors set by this function are:
+ * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction
+ * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond + * the last reordering group supported by setMaxVariable() + * @return variable top primary weight + * @deprecated ICU 53 Call setMaxVariable() instead. + */ + virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status); + + /** + * Sets the variable top to the specified primary weight. + * + * Beginning with ICU 53, the variable top is pinned to + * the top of one of the supported reordering groups, + * and it must not be beyond the last of those groups. + * See setMaxVariable(). + * @param varTop primary weight, as returned by setVariableTop or ucol_getVariableTop + * @param status error code + * @deprecated ICU 53 Call setMaxVariable() instead. + */ + virtual void setVariableTop(uint32_t varTop, UErrorCode &status); + + /** + * Gets the variable top value of a Collator. + * @param status error code (not changed by function). If error code is set, the return value is undefined. + * @return the variable top primary weight + * @see getMaxVariable + * @stable ICU 2.0 + */ + virtual uint32_t getVariableTop(UErrorCode &status) const; + + /** + * Get a UnicodeSet that contains all the characters and sequences tailored in + * this collator. + * @param status error code of the operation + * @return a pointer to a UnicodeSet object containing all the + * code points and sequences that may sort differently than + * in the root collator. The object must be disposed of by using delete + * @stable ICU 2.4 + */ + virtual UnicodeSet *getTailoredSet(UErrorCode &status) const; + + /** + * Get the sort key as an array of bytes from a UnicodeString. + * + * Note that sort keys are often less efficient than simply doing comparison. + * For more details, see the ICU User Guide. + * + * @param source string to be processed. + * @param result buffer to store result in. If NULL, number of bytes needed + * will be returned. + * @param resultLength length of the result buffer. If if not enough the + * buffer will be filled to capacity. + * @return Number of bytes needed for storing the sort key + * @stable ICU 2.0 + */ + virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result, + int32_t resultLength) const; + + /** + * Get the sort key as an array of bytes from a char16_t buffer. + * + * Note that sort keys are often less efficient than simply doing comparison. + * For more details, see the ICU User Guide. + * + * @param source string to be processed. + * @param sourceLength length of string to be processed. If -1, the string + * is 0 terminated and length will be decided by the function. + * @param result buffer to store result in. If NULL, number of bytes needed + * will be returned. + * @param resultLength length of the result buffer. If if not enough the + * buffer will be filled to capacity. + * @return Number of bytes needed for storing the sort key + * @stable ICU 2.2 + */ + virtual int32_t getSortKey(const char16_t *source, int32_t sourceLength, + uint8_t *result, int32_t resultLength) const; + + /** + * Retrieves the reordering codes for this collator. + * @param dest The array to fill with the script ordering. + * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function + * will only return the length of the result without writing any codes (pre-flighting). + * @param status A reference to an error code value, which must not indicate + * a failure before the function call. + * @return The length of the script ordering array. + * @see ucol_setReorderCodes + * @see Collator#getEquivalentReorderCodes + * @see Collator#setReorderCodes + * @stable ICU 4.8 + */ + virtual int32_t getReorderCodes(int32_t *dest, + int32_t destCapacity, + UErrorCode& status) const; + + /** + * Sets the ordering of scripts for this collator. + * @param reorderCodes An array of script codes in the new order. This can be NULL if the + * length is also set to 0. An empty array will clear any reordering codes on the collator. + * @param reorderCodesLength The length of reorderCodes. + * @param status error code + * @see ucol_setReorderCodes + * @see Collator#getReorderCodes + * @see Collator#getEquivalentReorderCodes + * @stable ICU 4.8 + */ + virtual void setReorderCodes(const int32_t* reorderCodes, + int32_t reorderCodesLength, + UErrorCode& status) ; + + /** + * Implements ucol_strcollUTF8(). + * @internal + */ + virtual UCollationResult internalCompareUTF8( + const char *left, int32_t leftLength, + const char *right, int32_t rightLength, + UErrorCode &errorCode) const; + + /** Get the short definition string for a collator. This internal API harvests the collator's + * locale and the attribute set and produces a string that can be used for opening + * a collator with the same attributes using the ucol_openFromShortString API. + * This string will be normalized. + * The structure and the syntax of the string is defined in the "Naming collators" + * section of the users guide: + * http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme + * This function supports preflighting. + * + * This is internal, and intended to be used with delegate converters. + * + * @param locale a locale that will appear as a collators locale in the resulting + * short string definition. If NULL, the locale will be harvested + * from the collator. + * @param buffer space to hold the resulting string + * @param capacity capacity of the buffer + * @param status for returning errors. All the preflighting errors are featured + * @return length of the resulting string + * @see ucol_openFromShortString + * @see ucol_normalizeShortDefinitionString + * @see ucol_getShortDefinitionString + * @internal + */ + virtual int32_t internalGetShortDefinitionString(const char *locale, + char *buffer, + int32_t capacity, + UErrorCode &status) const; + + /** + * Implements ucol_nextSortKeyPart(). + * @internal + */ + virtual int32_t internalNextSortKeyPart( + UCharIterator *iter, uint32_t state[2], + uint8_t *dest, int32_t count, UErrorCode &errorCode) const; + + // Do not enclose the default constructor with #ifndef U_HIDE_INTERNAL_API + /** + * Only for use in ucol_openRules(). + * @internal + */ + RuleBasedCollator(); + +#ifndef U_HIDE_INTERNAL_API + /** + * Implements ucol_getLocaleByType(). + * Needed because the lifetime of the locale ID string must match that of the collator. + * getLocale() returns a copy of a Locale, with minimal lifetime in a C wrapper. + * @internal + */ + const char *internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const; + + /** + * Implements ucol_getContractionsAndExpansions(). + * Gets this collator's sets of contraction strings and/or + * characters and strings that map to multiple collation elements (expansions). + * If addPrefixes is TRUE, then contractions that are expressed as + * prefix/pre-context rules are included. + * @param contractions if not NULL, the set to hold the contractions + * @param expansions if not NULL, the set to hold the expansions + * @param addPrefixes include prefix contextual mappings + * @param errorCode in/out ICU error code + * @internal + */ + void internalGetContractionsAndExpansions( + UnicodeSet *contractions, UnicodeSet *expansions, + UBool addPrefixes, UErrorCode &errorCode) const; + + /** + * Adds the contractions that start with character c to the set. + * Ignores prefixes. Used by AlphabeticIndex. + * @internal + */ + void internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const; + + /** + * Implements from-rule constructors, and ucol_openRules(). + * @internal + */ + void internalBuildTailoring( + const UnicodeString &rules, + int32_t strength, UColAttributeValue decompositionMode, - UErrorCode& status); - - // private methods ------------------------------------------------------- - - /** - * Creates the c struct for ucollator - * @param locale desired locale - * @param status error status - */ - void setUCollator(const Locale& locale, UErrorCode& status); - - /** - * Creates the c struct for ucollator - * @param locale desired locale name - * @param status error status - */ - void setUCollator(const char* locale, UErrorCode& status); - - /** - * Creates the c struct for ucollator. This used internally by StringSearch. - * Hence the responsibility of cleaning up the ucollator is not done by - * this RuleBasedCollator. The isDataOwned flag is set to FALSE. - * @param collator new ucollator data - * @param rules corresponding collation rules - */ - void setUCollator(UCollator *collator, UnicodeString *rules); + UParseError *outParseError, UnicodeString *outReason, + UErrorCode &errorCode); -public: - /** - * Get UCollator data struct. Used only by StringSearch & intltest. - * @return UCollator data struct - * @internal - */ - const UCollator * getUCollator(); + /** @internal */ + static inline RuleBasedCollator *rbcFromUCollator(UCollator *uc) { + return dynamic_cast(fromUCollator(uc)); + } + /** @internal */ + static inline const RuleBasedCollator *rbcFromUCollator(const UCollator *uc) { + return dynamic_cast(fromUCollator(uc)); + } + + /** + * Appends the CEs for the string to the vector. + * @internal for tests & tools + */ + void internalGetCEs(const UnicodeString &str, UVector64 &ces, UErrorCode &errorCode) const; +#endif // U_HIDE_INTERNAL_API protected: - /** - * Used internally by registraton to define the requested and valid locales. - * @param requestedLocale the requsted locale - * @param validLocale the valid locale - * @internal - */ - virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale); + /** + * Used internally by registration to define the requested and valid locales. + * @param requestedLocale the requested locale + * @param validLocale the valid locale + * @param actualLocale the actual locale + * @internal + */ + virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale); private: + friend class CollationElementIterator; + friend class Collator; - // if not owned and not a write through alias, copy the ucollator - void checkOwned(void); - - // utility to init rule string used by checkOwned and construct - void setRuleStringFromCollator(UErrorCode& status); - - /** - * Converts C's UCollationResult to EComparisonResult - * @param result member of the enum UComparisonResult - * @return EComparisonResult equivalent of UCollationResult - * @deprecated ICU 2.6. We will not need it. - */ - Collator::EComparisonResult getEComparisonResult( - const UCollationResult &result) const; - - /** - * Converts C's UCollationStrength to ECollationStrength - * @param strength member of the enum UCollationStrength - * @return ECollationStrength equivalent of UCollationStrength - */ - Collator::ECollationStrength getECollationStrength( - const UCollationStrength &strength) const; - - /** - * Converts C++'s ECollationStrength to UCollationStrength - * @param strength member of the enum ECollationStrength - * @return UCollationStrength equivalent of ECollationStrength - */ - UCollationStrength getUCollationStrength( - const Collator::ECollationStrength &strength) const; -}; + RuleBasedCollator(const CollationCacheEntry *entry); -// inline method implementation --------------------------------------------- + /** + * Enumeration of attributes that are relevant for short definition strings + * (e.g., ucol_getShortDefinitionString()). + * Effectively extends UColAttribute. + */ + enum Attributes { + ATTR_VARIABLE_TOP = UCOL_ATTRIBUTE_COUNT, + ATTR_LIMIT + }; -inline UClassID -RuleBasedCollator::getStaticClassID(void) -{ return (UClassID)&fgClassID; } + void adoptTailoring(CollationTailoring *t, UErrorCode &errorCode); -inline UClassID -RuleBasedCollator::getDynamicClassID(void) const -{ return RuleBasedCollator::getStaticClassID(); } + // Both lengths must be <0 or else both must be >=0. + UCollationResult doCompare(const char16_t *left, int32_t leftLength, + const char16_t *right, int32_t rightLength, + UErrorCode &errorCode) const; + UCollationResult doCompare(const uint8_t *left, int32_t leftLength, + const uint8_t *right, int32_t rightLength, + UErrorCode &errorCode) const; -inline UBool RuleBasedCollator::operator!=(const Collator& other) const -{ - return !(*this == other); -} + void writeSortKey(const char16_t *s, int32_t length, + SortKeyByteSink &sink, UErrorCode &errorCode) const; -inline void RuleBasedCollator::setUCollator(const Locale &locale, - UErrorCode &status) -{ - setUCollator(locale.getName(), status); -} + void writeIdenticalLevel(const char16_t *s, const char16_t *limit, + SortKeyByteSink &sink, UErrorCode &errorCode) const; + const CollationSettings &getDefaultSettings() const; -inline void RuleBasedCollator::setUCollator(UCollator *collator, - UnicodeString *rules) -{ - if (ucollator && dataIsOwned) { - ucol_close(ucollator); - delete urulestring; + void setAttributeDefault(int32_t attribute) { + explicitlySetAttributes &= ~((uint32_t)1 << attribute); + } + void setAttributeExplicitly(int32_t attribute) { + explicitlySetAttributes |= (uint32_t)1 << attribute; + } + UBool attributeHasBeenSetExplicitly(int32_t attribute) const { + // assert(0 <= attribute < ATTR_LIMIT); + return (UBool)((explicitlySetAttributes & ((uint32_t)1 << attribute)) != 0); } - ucollator = collator; - urulestring = rules; - dataIsOwned = FALSE; - isWriteThroughAlias = TRUE; -} - -inline const UCollator * RuleBasedCollator::getUCollator() -{ - return ucollator; -} - -inline Collator::EComparisonResult RuleBasedCollator::getEComparisonResult( - const UCollationResult &result) const -{ - switch (result) - { - case UCOL_LESS : - return Collator::LESS; - case UCOL_EQUAL : - return Collator::EQUAL; - default : - return Collator::GREATER; - } -} - -inline Collator::ECollationStrength RuleBasedCollator::getECollationStrength( - const UCollationStrength &strength) const -{ - switch (strength) - { - case UCOL_PRIMARY : - return Collator::PRIMARY; - case UCOL_SECONDARY : - return Collator::SECONDARY; - case UCOL_TERTIARY : - return Collator::TERTIARY; - case UCOL_QUATERNARY : - return Collator::QUATERNARY; - default : - return Collator::IDENTICAL; - } -} - -inline UCollationStrength RuleBasedCollator::getUCollationStrength( - const Collator::ECollationStrength &strength) const -{ - switch (strength) - { - case Collator::PRIMARY : - return UCOL_PRIMARY; - case Collator::SECONDARY : - return UCOL_SECONDARY; - case Collator::TERTIARY : - return UCOL_TERTIARY; - case Collator::QUATERNARY : - return UCOL_QUATERNARY; - default : - return UCOL_IDENTICAL; - } -} -U_NAMESPACE_END + /** + * Tests whether a character is "unsafe" for use as a collation starting point. + * + * @param c code point or code unit + * @return TRUE if c is unsafe + * @see CollationElementIterator#setOffset(int) + */ + UBool isUnsafe(UChar32 c) const; -#endif /* #if !UCONFIG_NO_COLLATION */ + static void U_CALLCONV computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode); + UBool initMaxExpansions(UErrorCode &errorCode) const; + + void setFastLatinOptions(CollationSettings &ownedSettings) const; + + const CollationData *data; + const CollationSettings *settings; // reference-counted + const CollationTailoring *tailoring; // alias of cacheEntry->tailoring + const CollationCacheEntry *cacheEntry; // reference-counted + Locale validLocale; + uint32_t explicitlySetAttributes; + + UBool actualLocaleIsSameAsValid; +}; + +U_NAMESPACE_END +#endif // U_SHOW_CPLUSPLUS_API -#endif +#endif // !UCONFIG_NO_COLLATION +#endif // TBLCOLL_H