X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/57a6839dcb3bba09e8228b822b290604668416fe..340931cb2e044a2141d11567dd0f782524e32994:/icuSources/i18n/unicode/coll.h diff --git a/icuSources/i18n/unicode/coll.h b/icuSources/i18n/unicode/coll.h index e5039106..f5564c73 100644 --- a/icuSources/i18n/unicode/coll.h +++ b/icuSources/i18n/unicode/coll.h @@ -1,15 +1,17 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** -* Copyright (C) 1996-2014, International Business Machines +* Copyright (C) 1996-2016, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** */ /** - * \file + * \file * \brief C++ API: Collation Service. */ - + /** * File coll.h * @@ -52,11 +54,13 @@ #include "unicode/utypes.h" +#if U_SHOW_CPLUSPLUS_API + #if !UCONFIG_NO_COLLATION #include "unicode/uobject.h" #include "unicode/ucol.h" -#include "unicode/normlzr.h" +#include "unicode/unorm.h" #include "unicode/locid.h" #include "unicode/uniset.h" #include "unicode/umisc.h" @@ -135,20 +139,12 @@ class CollationKey; * \endcode * * \htmlonly\endhtmlonly -*
-* For comparing strings exactly once, the compare
method
-* provides the best performance. When sorting a list of strings however, it
-* is generally necessary to compare each string multiple times. In this case,
-* sort keys provide better performance. The getSortKey
methods
+*
+* The getSortKey
methods
* convert a string to a series of bytes that can be compared bitwise against
* other sort keys using strcmp()
. Sort keys are written as
-* zero-terminated byte strings. They consist of several substrings, one for
-* each collation strength level, that are delimited by 0x01 bytes.
-* If the string code points are appended for UCOL_IDENTICAL, then they are
-* processed for correct code point order comparison and may contain 0x01
-* bytes but not zero bytes.
-*
+* zero-terminated byte strings.
+*
* Another set of APIs returns a CollationKey
object that wraps
* the sort key bytes instead of returning the bytes themselves.
*
typeid(*this) == typeid(other)
.
+ * `typeid(*this) == typeid(other)`.
*
* Subclass implementations should do something like the following:
- * - * if (this == &other) { return TRUE; } - * if (!Collator::operator==(other)) { return FALSE; } // not the same class * - * const MyCollator &o = (const MyCollator&)other; - * (compare this vs. o's subclass fields) - *+ * if (this == &other) { return TRUE; } + * if (!Collator::operator==(other)) { return FALSE; } // not the same class + * + * const MyCollator &o = (const MyCollator&)other; + * (compare this vs. o's subclass fields) + * * @param other Collator object to be compared * @return TRUE if other is the same as this. * @stable ICU 2.0 @@ -268,7 +269,7 @@ public: * @return a copy of this object, owned by the caller * @stable ICU 2.0 */ - virtual Collator* clone(void) const = 0; + virtual Collator* clone() const = 0; /** * Creates the Collator object for the current default locale. @@ -292,10 +293,19 @@ public: static Collator* U_EXPORT2 createInstance(UErrorCode& err); /** - * Gets the table-based collation object for the desired locale. The + * Gets the collation object for the desired locale. The * resource of the desired locale will be loaded. + * * Locale::getRoot() is the base collation table and all other languages are * built on top of it with additional language-specific modifications. + * + * For some languages, multiple collation types are available; + * for example, "de@collation=phonebook". + * Starting with ICU 54, collation attributes can be specified via locale keywords as well, + * in the old locale extension syntax ("el@colCaseFirst=upper") + * or in language tag syntax ("el-u-kf-upper"). + * See User Guide: Collation API. + * * The UErrorCode& err parameter is used to return status information to the user. * To check whether the construction succeeded or not, you should check * the value of U_SUCCESS(err). If you wish more detailed information, you @@ -305,6 +315,7 @@ public: * used. U_USING_DEFAULT_ERROR indicates that the default locale data was * used; neither the requested locale nor any of its fall back locales * could be found. + * * The caller owns the returned object and is responsible for deleting it. * @param loc The locale ID for which to open a collator. * @param err the error code status. @@ -316,6 +327,7 @@ public: */ static Collator* U_EXPORT2 createInstance(const Locale& loc, UErrorCode& err); +#ifndef U_FORCE_HIDE_DEPRECATED_API /** * The comparison function compares the character data stored in two * different strings. Returns information about whether a string is less @@ -329,6 +341,7 @@ public: */ virtual EComparisonResult compare(const UnicodeString& source, const UnicodeString& target) const; +#endif // U_FORCE_HIDE_DEPRECATED_API /** * The comparison function compares the character data stored in two @@ -346,6 +359,7 @@ public: const UnicodeString& target, UErrorCode &status) const = 0; +#ifndef U_FORCE_HIDE_DEPRECATED_API /** * Does the same thing as compare but limits the comparison to a specified * length @@ -361,6 +375,7 @@ public: virtual EComparisonResult compare(const UnicodeString& source, const UnicodeString& target, int32_t length) const; +#endif // U_FORCE_HIDE_DEPRECATED_API /** * Does the same thing as compare but limits the comparison to a specified @@ -380,14 +395,15 @@ public: int32_t length, UErrorCode &status) const = 0; +#ifndef U_FORCE_HIDE_DEPRECATED_API /** * The comparison function compares the character data stored in two * different string arrays. Returns information about whether a string array * is less than, greater than or equal to another string array. *
Example of use: *
- * . UChar ABC[] = {0x41, 0x42, 0x43, 0}; // = "ABC" - * . UChar abc[] = {0x61, 0x62, 0x63, 0}; // = "abc" + * . char16_t ABC[] = {0x41, 0x42, 0x43, 0}; // = "ABC" + * . char16_t abc[] = {0x61, 0x62, 0x63, 0}; // = "abc" * . UErrorCode status = U_ZERO_ERROR; * . Collator *myCollation = * . Collator::createInstance(Locale::getUS(), status); @@ -413,9 +429,10 @@ public: * target * @deprecated ICU 2.6 use the overload with UErrorCode & */ - virtual EComparisonResult compare(const UChar* source, int32_t sourceLength, - const UChar* target, int32_t targetLength) + virtual EComparisonResult compare(const char16_t* source, int32_t sourceLength, + const char16_t* target, int32_t targetLength) const; +#endif // U_FORCE_HIDE_DEPRECATED_API /** * The comparison function compares the character data stored in two @@ -433,8 +450,8 @@ public: * than target * @stable ICU 2.6 */ - virtual UCollationResult compare(const UChar* source, int32_t sourceLength, - const UChar* target, int32_t targetLength, + virtual UCollationResult compare(const char16_t* source, int32_t sourceLength, + const char16_t* target, int32_t targetLength, UErrorCode &status) const = 0; /** @@ -472,11 +489,14 @@ public: /** * Transforms the string into a series of characters that can be compared * with CollationKey::compareTo. It is not possible to restore the original - * string from the chars in the sort key. The generated sort key handles - * only a limited number of ignorable characters. + * string from the chars in the sort key. *Use CollationKey::equals or CollationKey::compare to compare the * generated sort keys. * If the source string is null, a null collation key will be returned. + * + * Note that sort keys are often less efficient than simply doing comparison. + * For more details, see the ICU User Guide. + * * @param source the source string to be transformed into a sort key. * @param key the collation key to be filled in * @param status the error code status. @@ -491,11 +511,14 @@ public: /** * Transforms the string into a series of characters that can be compared * with CollationKey::compareTo. It is not possible to restore the original - * string from the chars in the sort key. The generated sort key handles - * only a limited number of ignorable characters. + * string from the chars in the sort key. *
Use CollationKey::equals or CollationKey::compare to compare the * generated sort keys. *
If the source string is null, a null collation key will be returned. + * + * Note that sort keys are often less efficient than simply doing comparison. + * For more details, see the ICU User Guide. + * * @param source the source string to be transformed into a sort key. * @param sourceLength length of the collation key * @param key the collation key to be filled in @@ -504,7 +527,7 @@ public: * @see CollationKey#compare * @stable ICU 2.0 */ - virtual CollationKey& getCollationKey(const UChar*source, + virtual CollationKey& getCollationKey(const char16_t*source, int32_t sourceLength, CollationKey& key, UErrorCode& status) const = 0; @@ -514,6 +537,7 @@ public: */ virtual int32_t hashCode(void) const = 0; +#ifndef U_FORCE_HIDE_DEPRECATED_API /** * Gets the locale of the Collator * @@ -527,6 +551,7 @@ public: * in ICU 3.0. */ virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const = 0; +#endif // U_FORCE_HIDE_DEPRECATED_API /** * Convenience method for comparing two strings based on the collation rules. @@ -563,6 +588,7 @@ public: */ UBool equals(const UnicodeString& source, const UnicodeString& target) const; +#ifndef U_FORCE_HIDE_DEPRECATED_API /** * Determines the minimum strength that will be used in comparison or * transformation. @@ -594,12 +620,13 @@ public: * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead */ virtual void setStrength(ECollationStrength newStrength); +#endif // U_FORCE_HIDE_DEPRECATED_API /** * Retrieves the reordering codes for this collator. * @param dest The array to fill with the script ordering. * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function - * will only return the length of the result without writing any of the result string (pre-flighting). + * will only return the length of the result without writing any codes (pre-flighting). * @param status A reference to an error code value, which must not indicate * a failure before the function call. * @return The length of the script ordering array. @@ -608,7 +635,7 @@ public: * @see Collator#setReorderCodes * @see UScriptCode * @see UColReorderCode - * @stable ICU 4.8 + * @stable ICU 4.8 */ virtual int32_t getReorderCodes(int32_t *dest, int32_t destCapacity, @@ -618,15 +645,16 @@ public: * Sets the ordering of scripts for this collator. * *
The reordering codes are a combination of script codes and reorder codes. - * @param reorderCodes An array of script codes in the new order. This can be NULL if the + * @param reorderCodes An array of script codes in the new order. This can be NULL if the * length is also set to 0. An empty array will clear any reordering codes on the collator. * @param reorderCodesLength The length of reorderCodes. * @param status error code + * @see ucol_setReorderCodes * @see Collator#getReorderCodes * @see Collator#getEquivalentReorderCodes * @see UScriptCode * @see UColReorderCode - * @stable ICU 4.8 + * @stable ICU 4.8 */ virtual void setReorderCodes(const int32_t* reorderCodes, int32_t reorderCodesLength, @@ -635,12 +663,14 @@ public: /** * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder * codes will be grouped and must reorder together. - * @param reorderCode The reorder code to determine equivalence for. + * Beginning with ICU 55, scripts only reorder together if they are primary-equal, + * for example Hiragana and Katakana. + * + * @param reorderCode The reorder code to determine equivalence for. * @param dest The array to fill with the script equivalence reordering codes. - * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the - * function will only return the length of the result without writing any of the result - * string (pre-flighting). - * @param status A reference to an error code value, which must not indicate + * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the + * function will only return the length of the result without writing any codes (pre-flighting). + * @param status A reference to an error code value, which must not indicate * a failure before the function call. * @return The length of the of the reordering code equivalence array. * @see ucol_setReorderCodes @@ -648,7 +678,7 @@ public: * @see Collator#setReorderCodes * @see UScriptCode * @see UColReorderCode - * @stable ICU 4.8 + * @stable ICU 4.8 */ static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode, int32_t* dest, @@ -656,7 +686,7 @@ public: UErrorCode& status); /** - * Get name of the object for the desired Locale, in the desired langauge + * Get name of the object for the desired Locale, in the desired language * @param objectLocale must be from getAvailableLocales * @param displayLocale specifies the desired locale for output * @param name the fill-in parameter of the return value @@ -669,7 +699,7 @@ public: UnicodeString& name); /** - * Get name of the object for the desired Locale, in the langauge of the + * Get name of the object for the desired Locale, in the language of the * default locale. * @param objectLocale must be from getAvailableLocales * @param name the fill-in parameter of the return value @@ -858,7 +888,6 @@ public: virtual UColAttributeValue getAttribute(UColAttribute attr, UErrorCode &status) const = 0; - /* Cannot use #ifndef U_HIDE_DRAFT_API for the following draft methods since they are virtual */ /** * Sets the variable top to the top of the specified reordering group. * The variable top determines the highest-sorting character @@ -875,7 +904,7 @@ public: * function chaining. (See User Guide for details.) * @return *this * @see getMaxVariable - * @draft ICU 53 + * @stable ICU 53 */ virtual Collator &setMaxVariable(UColReorderCode group, UErrorCode &errorCode); @@ -885,10 +914,11 @@ public: * The base class implementation returns UCOL_REORDER_CODE_PUNCTUATION. * @return the maximum variable reordering group. * @see setMaxVariable - * @draft ICU 53 + * @stable ICU 53 */ virtual UColReorderCode getMaxVariable() const; +#ifndef U_FORCE_HIDE_DEPRECATED_API /** * Sets the variable top to the primary weight of the specified string. * @@ -896,7 +926,7 @@ public: * the top of one of the supported reordering groups, * and it must not be beyond the last of those groups. * See setMaxVariable(). - * @param varTop one or more (if contraction) UChars to which the variable top should be set + * @param varTop one or more (if contraction) char16_ts to which the variable top should be set * @param len length of variable top string. If -1 it is considered to be zero terminated. * @param status error code. If error code is set, the return value is undefined. Errors set by this function are:
* U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction
@@ -905,7 +935,7 @@ public: * @return variable top primary weight * @deprecated ICU 53 Call setMaxVariable() instead. */ - virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) = 0; + virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status) = 0; /** * Sets the variable top to the primary weight of the specified string. @@ -914,7 +944,7 @@ public: * the top of one of the supported reordering groups, * and it must not be beyond the last of those groups. * See setMaxVariable(). - * @param varTop a UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set + * @param varTop a UnicodeString size 1 or more (if contraction) of char16_ts to which the variable top should be set * @param status error code. If error code is set, the return value is undefined. Errors set by this function are:
* U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction
* U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond @@ -936,6 +966,7 @@ public: * @deprecated ICU 53 Call setMaxVariable() instead. */ virtual void setVariableTop(uint32_t varTop, UErrorCode &status) = 0; +#endif // U_FORCE_HIDE_DEPRECATED_API /** * Gets the variable top value of a Collator. @@ -957,6 +988,7 @@ public: */ virtual UnicodeSet *getTailoredSet(UErrorCode &status) const; +#ifndef U_FORCE_HIDE_DEPRECATED_API /** * Same as clone(). * The base class implementation simply calls clone(). @@ -964,12 +996,17 @@ public: * @see clone() * @deprecated ICU 50 no need to have two methods for cloning */ - virtual Collator* safeClone(void) const; + virtual Collator* safeClone() const; +#endif // U_FORCE_HIDE_DEPRECATED_API /** * Get the sort key as an array of bytes from a UnicodeString. * Sort key byte arrays are zero-terminated and can be compared using * strcmp(). + * + * Note that sort keys are often less efficient than simply doing comparison. + * For more details, see the ICU User Guide. + * * @param source string to be processed. * @param result buffer to store result in. If NULL, number of bytes needed * will be returned. @@ -983,9 +1020,13 @@ public: int32_t resultLength) const = 0; /** - * Get the sort key as an array of bytes from a UChar buffer. + * Get the sort key as an array of bytes from a char16_t buffer. * Sort key byte arrays are zero-terminated and can be compared using * strcmp(). + * + * Note that sort keys are often less efficient than simply doing comparison. + * For more details, see the ICU User Guide. + * * @param source string to be processed. * @param sourceLength length of string to be processed. * If -1, the string is 0 terminated and length will be decided by the @@ -997,7 +1038,7 @@ public: * @return Number of bytes needed for storing the sort key * @stable ICU 2.2 */ - virtual int32_t getSortKey(const UChar*source, int32_t sourceLength, + virtual int32_t getSortKey(const char16_t*source, int32_t sourceLength, uint8_t*result, int32_t resultLength) const = 0; /** @@ -1093,18 +1134,18 @@ public: virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale); /** Get the short definition string for a collator. This internal API harvests the collator's - * locale and the attribute set and produces a string that can be used for opening + * locale and the attribute set and produces a string that can be used for opening * a collator with the same attributes using the ucol_openFromShortString API. * This string will be normalized. * The structure and the syntax of the string is defined in the "Naming collators" - * section of the users guide: + * section of the users guide: * http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme * This function supports preflighting. - * + * * This is internal, and intended to be used with delegate converters. * * @param locale a locale that will appear as a collators locale in the resulting - * short string definition. If NULL, the locale will be harvested + * short string definition. If NULL, the locale will be harvested * from the collator. * @param buffer space to hold the resulting string * @param capacity capacity of the buffer @@ -1248,4 +1289,6 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_COLLATION */ +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif