+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
-* Copyright (C) 1996-2014, International Business Machines Corporation and
+* Copyright (C) 1996-2016, International Business Machines Corporation and
* others. All Rights Reserved.
******************************************************************************
*/
/**
- * \file
+ * \file
* \brief C++ API: The RuleBasedCollator class implements the Collator abstract base class.
*/
#include "unicode/uiter.h"
#include "unicode/ucol.h"
+#if U_SHOW_CPLUSPLUS_API
U_NAMESPACE_BEGIN
+struct CollationCacheEntry;
struct CollationData;
struct CollationSettings;
struct CollationTailoring;
* Collator, using data-driven tables. The user can create a customized
* table-based collation.
* <p>
- * For more information about the collation service see
+ * For more information about the collation service see
* <a href="http://userguide.icu-project.org/collation">the User Guide</a>.
* <p>
- * Collation service provides correct sorting orders for most locales supported in ICU.
+ * Collation service provides correct sorting orders for most locales supported in ICU.
* If specific data for a locale is not available, the orders eventually falls back
- * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>.
+ * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>.
* <p>
* Sort ordering may be customized by providing your own set of rules. For more on
* this subject see the <a href="http://userguide.icu-project.org/collation/customization">
* description for more details on the collation rule syntax.
* @param rules the collation rules to build the collation table from.
* @param status reporting a success or an error.
- * @see Locale
* @stable ICU 2.0
*/
RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
* collation table out of them. Please see RuleBasedCollator class
* description for more details on the collation rule syntax.
* @param rules the collation rules to build the collation table from.
- * @param collationStrength default strength for comparison
+ * @param collationStrength strength for comparison
* @param status reporting a success or an error.
- * @see Locale
* @stable ICU 2.0
*/
RuleBasedCollator(const UnicodeString& rules,
* @param rules the collation rules to build the collation table from.
* @param decompositionMode the normalisation mode
* @param status reporting a success or an error.
- * @see Locale
* @stable ICU 2.0
*/
RuleBasedCollator(const UnicodeString& rules,
* collation table out of them. Please see RuleBasedCollator class
* description for more details on the collation rule syntax.
* @param rules the collation rules to build the collation table from.
- * @param collationStrength default strength for comparison
+ * @param collationStrength strength for comparison
* @param decompositionMode the normalisation mode
* @param status reporting a success or an error.
- * @see Locale
* @stable ICU 2.0
*/
RuleBasedCollator(const UnicodeString& rules,
UColAttributeValue decompositionMode,
UErrorCode& status);
-#ifndef U_HIDE_INTERNAL_API
+#ifndef U_HIDE_INTERNAL_API
/**
* TODO: document & propose as public API
* @internal
/**
* Copy constructor.
* @param other the RuleBasedCollator object to be copied
- * @see Locale
* @stable ICU 2.0
*/
RuleBasedCollator(const RuleBasedCollator& other);
/** Opens a collator from a collator binary image created using
- * cloneBinary. Binary image used in instantiation of the
- * collator remains owned by the user and should stay around for
+ * cloneBinary. Binary image used in instantiation of the
+ * collator remains owned by the user and should stay around for
* the lifetime of the collator. The API also takes a base collator
- * which usually should be the root collator.
+ * which must be the root collator.
* @param bin binary image owned by the user and required through the
* lifetime of the collator
* @param length size of the image. If negative, the API will try to
* figure out the length of the image
- * @param base fallback collator, usually root. The base is required to be
- * present through the lifetime of the collator. Currently
- * it cannot be NULL.
+ * @param base Base collator, for lookup of untailored characters.
+ * Must be the root collator, must not be NULL.
+ * The base is required to be present through the lifetime of the collator.
* @param status for catching errors
* @return newly created collator
* @see cloneBinary
* @stable ICU 3.4
*/
- RuleBasedCollator(const uint8_t *bin, int32_t length,
- const RuleBasedCollator *base,
+ RuleBasedCollator(const uint8_t *bin, int32_t length,
+ const RuleBasedCollator *base,
UErrorCode &status);
/**
/**
* The comparison function compares the character data stored in two
- * different strings. Returns information about whether a string is less
+ * different strings. Returns information about whether a string is less
* than, greater than or equal to another string.
* @param source the source string to be compared with.
* @param target the string that is to be compared with the source string.
UErrorCode &status) const;
/**
- * Does the same thing as compare but limits the comparison to a specified
+ * Does the same thing as compare but limits the comparison to a specified
* length
* @param source the source string to be compared with.
* @param target the string that is to be compared with the source string.
* @param length the length the comparison is limited to
* @param status possible error code
- * @return Returns an enum value. UCOL_GREATER if source (up to the specified
- * length) is greater than target; UCOL_EQUAL if source (up to specified
- * length) is equal to target; UCOL_LESS if source (up to the specified
+ * @return Returns an enum value. UCOL_GREATER if source (up to the specified
+ * length) is greater than target; UCOL_EQUAL if source (up to specified
+ * length) is equal to target; UCOL_LESS if source (up to the specified
* length) is less than target.
* @stable ICU 2.6
*/
/**
* The comparison function compares the character data stored in two
- * different string arrays. Returns information about whether a string array
+ * different string arrays. Returns information about whether a string array
* is less than, greater than or equal to another string array.
* @param source the source string array to be compared with.
* @param sourceLength the length of the source string array. If this value
* than target
* @stable ICU 2.6
*/
- virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
- const UChar* target, int32_t targetLength,
+ virtual UCollationResult compare(const char16_t* source, int32_t sourceLength,
+ const char16_t* target, int32_t targetLength,
UErrorCode &status) const;
/**
UErrorCode &status) const;
/**
- * Transforms a specified region of the string into a series of characters
- * that can be compared with CollationKey.compare. Use a CollationKey when
- * you need to do repeated comparisions on the same string. For a single
- * comparison the compare method will be faster.
- * @param source the source string.
- * @param key the transformed key of the source string.
- * @param status the error code status.
- * @return the transformed key.
- * @see CollationKey
- * @stable ICU 2.0
- */
+ * Transforms the string into a series of characters
+ * that can be compared with CollationKey.compare().
+ *
+ * Note that sort keys are often less efficient than simply doing comparison.
+ * For more details, see the ICU User Guide.
+ *
+ * @param source the source string.
+ * @param key the transformed key of the source string.
+ * @param status the error code status.
+ * @return the transformed key.
+ * @see CollationKey
+ * @stable ICU 2.0
+ */
virtual CollationKey& getCollationKey(const UnicodeString& source,
CollationKey& key,
UErrorCode& status) const;
/**
- * Transforms a specified region of the string into a series of characters
- * that can be compared with CollationKey.compare. Use a CollationKey when
- * you need to do repeated comparisions on the same string. For a single
- * comparison the compare method will be faster.
- * @param source the source string.
- * @param sourceLength the length of the source string.
- * @param key the transformed key of the source string.
- * @param status the error code status.
- * @return the transformed key.
- * @see CollationKey
- * @stable ICU 2.0
- */
- virtual CollationKey& getCollationKey(const UChar *source,
+ * Transforms a specified region of the string into a series of characters
+ * that can be compared with CollationKey.compare.
+ *
+ * Note that sort keys are often less efficient than simply doing comparison.
+ * For more details, see the ICU User Guide.
+ *
+ * @param source the source string.
+ * @param sourceLength the length of the source string.
+ * @param key the transformed key of the source string.
+ * @param status the error code status.
+ * @return the transformed key.
+ * @see CollationKey
+ * @stable ICU 2.0
+ */
+ virtual CollationKey& getCollationKey(const char16_t *source,
int32_t sourceLength,
CollationKey& key,
UErrorCode& status) const;
*/
virtual void getVersion(UVersionInfo info) const;
-#ifndef U_HIDE_DEPRECATED_API
+#ifndef U_HIDE_DEPRECATED_API
/**
* Returns the maximum length of any expansion sequences that end with the
* specified comparison order.
*/
static UClassID U_EXPORT2 getStaticClassID(void);
-#ifndef U_HIDE_DEPRECATED_API
+#ifndef U_HIDE_DEPRECATED_API
/**
* Do not use this method: The caller and the ICU library might use different heaps.
* Use cloneBinary() instead which writes to caller-provided memory.
uint8_t *cloneRuleData(int32_t &length, UErrorCode &status) const;
#endif /* U_HIDE_DEPRECATED_API */
- /** Creates a binary image of a collator. This binary image can be stored and
+ /** Creates a binary image of a collator. This binary image can be stored and
* later used to instantiate a collator using ucol_openBinary.
* This API supports preflighting.
* @param buffer a fill-in buffer to receive the binary image
* function chaining. (See User Guide for details.)
* @return *this
* @see getMaxVariable
- * @draft ICU 53
+ * @stable ICU 53
*/
virtual Collator &setMaxVariable(UColReorderCode group, UErrorCode &errorCode);
* Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
* @return the maximum variable reordering group.
* @see setMaxVariable
- * @draft ICU 53
+ * @stable ICU 53
*/
virtual UColReorderCode getMaxVariable() const;
* the top of one of the supported reordering groups,
* and it must not be beyond the last of those groups.
* See setMaxVariable().
- * @param varTop one or more (if contraction) UChars to which the variable top should be set
+ * @param varTop one or more (if contraction) char16_ts to which the variable top should be set
* @param len length of variable top string. If -1 it is considered to be zero terminated.
* @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
* U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br>
* @return variable top primary weight
* @deprecated ICU 53 Call setMaxVariable() instead.
*/
- virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
+ virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status);
/**
* Sets the variable top to the primary weight of the specified string.
* the top of one of the supported reordering groups,
* and it must not be beyond the last of those groups.
* See setMaxVariable().
- * @param varTop a UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
+ * @param varTop a UnicodeString size 1 or more (if contraction) of char16_ts to which the variable top should be set
* @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
* U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br>
* U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond
virtual uint32_t getVariableTop(UErrorCode &status) const;
/**
- * Get a UnicodeSet that contains all the characters and sequences tailored in
+ * Get a UnicodeSet that contains all the characters and sequences tailored in
* this collator.
* @param status error code of the operation
- * @return a pointer to a UnicodeSet object containing all the
+ * @return a pointer to a UnicodeSet object containing all the
* code points and sequences that may sort differently than
* in the root collator. The object must be disposed of by using delete
* @stable ICU 2.4
/**
* Get the sort key as an array of bytes from a UnicodeString.
+ *
+ * Note that sort keys are often less efficient than simply doing comparison.
+ * For more details, see the ICU User Guide.
+ *
* @param source string to be processed.
* @param result buffer to store result in. If NULL, number of bytes needed
* will be returned.
int32_t resultLength) const;
/**
- * Get the sort key as an array of bytes from a UChar buffer.
+ * Get the sort key as an array of bytes from a char16_t buffer.
+ *
+ * Note that sort keys are often less efficient than simply doing comparison.
+ * For more details, see the ICU User Guide.
+ *
* @param source string to be processed.
* @param sourceLength length of string to be processed. If -1, the string
* is 0 terminated and length will be decided by the function.
* @return Number of bytes needed for storing the sort key
* @stable ICU 2.2
*/
- virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
+ virtual int32_t getSortKey(const char16_t *source, int32_t sourceLength,
uint8_t *result, int32_t resultLength) const;
/**
* Retrieves the reordering codes for this collator.
* @param dest The array to fill with the script ordering.
* @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
- * will only return the length of the result without writing any of the result string (pre-flighting).
+ * will only return the length of the result without writing any codes (pre-flighting).
* @param status A reference to an error code value, which must not indicate
* a failure before the function call.
* @return The length of the script ordering array.
* @see ucol_setReorderCodes
* @see Collator#getEquivalentReorderCodes
* @see Collator#setReorderCodes
- * @stable ICU 4.8
+ * @stable ICU 4.8
*/
virtual int32_t getReorderCodes(int32_t *dest,
int32_t destCapacity,
/**
* Sets the ordering of scripts for this collator.
- * @param reorderCodes An array of script codes in the new order. This can be NULL if the
+ * @param reorderCodes An array of script codes in the new order. This can be NULL if the
* length is also set to 0. An empty array will clear any reordering codes on the collator.
* @param reorderCodesLength The length of reorderCodes.
* @param status error code
+ * @see ucol_setReorderCodes
* @see Collator#getReorderCodes
* @see Collator#getEquivalentReorderCodes
- * @stable ICU 4.8
+ * @stable ICU 4.8
*/
virtual void setReorderCodes(const int32_t* reorderCodes,
int32_t reorderCodesLength,
UErrorCode &errorCode) const;
/** Get the short definition string for a collator. This internal API harvests the collator's
- * locale and the attribute set and produces a string that can be used for opening
+ * locale and the attribute set and produces a string that can be used for opening
* a collator with the same attributes using the ucol_openFromShortString API.
* This string will be normalized.
* The structure and the syntax of the string is defined in the "Naming collators"
- * section of the users guide:
+ * section of the users guide:
* http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme
* This function supports preflighting.
- *
+ *
* This is internal, and intended to be used with delegate converters.
*
* @param locale a locale that will appear as a collators locale in the resulting
- * short string definition. If NULL, the locale will be harvested
+ * short string definition. If NULL, the locale will be harvested
* from the collator.
* @param buffer space to hold the resulting string
* @param capacity capacity of the buffer
UCharIterator *iter, uint32_t state[2],
uint8_t *dest, int32_t count, UErrorCode &errorCode) const;
-#ifndef U_HIDE_INTERNAL_API
+ // Do not enclose the default constructor with #ifndef U_HIDE_INTERNAL_API
/**
* Only for use in ucol_openRules().
* @internal
*/
RuleBasedCollator();
+#ifndef U_HIDE_INTERNAL_API
/**
* Implements ucol_getLocaleByType().
* Needed because the lifetime of the locale ID string must match that of the collator.
friend class CollationElementIterator;
friend class Collator;
- RuleBasedCollator(const CollationTailoring *t, const Locale &vl);
+ RuleBasedCollator(const CollationCacheEntry *entry);
/**
* Enumeration of attributes that are relevant for short definition strings
ATTR_LIMIT
};
- void adoptTailoring(CollationTailoring *t);
+ void adoptTailoring(CollationTailoring *t, UErrorCode &errorCode);
// Both lengths must be <0 or else both must be >=0.
- UCollationResult doCompare(const UChar *left, int32_t leftLength,
- const UChar *right, int32_t rightLength,
+ UCollationResult doCompare(const char16_t *left, int32_t leftLength,
+ const char16_t *right, int32_t rightLength,
UErrorCode &errorCode) const;
UCollationResult doCompare(const uint8_t *left, int32_t leftLength,
const uint8_t *right, int32_t rightLength,
UErrorCode &errorCode) const;
- void writeSortKey(const UChar *s, int32_t length,
+ void writeSortKey(const char16_t *s, int32_t length,
SortKeyByteSink &sink, UErrorCode &errorCode) const;
- void writeIdenticalLevel(const UChar *s, const UChar *limit,
+ void writeIdenticalLevel(const char16_t *s, const char16_t *limit,
SortKeyByteSink &sink, UErrorCode &errorCode) const;
const CollationSettings &getDefaultSettings() const;
*/
UBool isUnsafe(UChar32 c) const;
- static void computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode);
+ static void U_CALLCONV computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode);
UBool initMaxExpansions(UErrorCode &errorCode) const;
void setFastLatinOptions(CollationSettings &ownedSettings) const;
const CollationData *data;
const CollationSettings *settings; // reference-counted
- const CollationTailoring *tailoring; // reference-counted
+ const CollationTailoring *tailoring; // alias of cacheEntry->tailoring
+ const CollationCacheEntry *cacheEntry; // reference-counted
Locale validLocale;
uint32_t explicitlySetAttributes;
};
U_NAMESPACE_END
+#endif // U_SHOW_CPLUSPLUS_API
#endif // !UCONFIG_NO_COLLATION
#endif // TBLCOLL_H