/*
*******************************************************************************
-* Copyright (c) 1996-2012, International Business Machines Corporation and others.
+* Copyright (c) 1996-2015, International Business Machines Corporation and others.
* All Rights Reserved.
*******************************************************************************
*/
* The C API for Collator performs locale-sensitive
* string comparison. You use this service to build
* searching and sorting routines for natural language text.
- * <em>Important: </em>The ICU collation service has been reimplemented
- * in order to achieve better performance and UCA compliance.
- * For details, see the
- * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
- * collation design document</a>.
* <p>
* For more information about the collation service see
- * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>.
+ * <a href="http://userguide.icu-project.org/collation">the User Guide</a>.
* <p>
* Collation service provides correct sorting orders for most locales supported in ICU.
* If specific data for a locale is not available, the orders eventually falls back
- * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>.
+ * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>.
* <p>
* Sort ordering may be customized by providing your own set of rules. For more on
- * this subject see the
- * <a href="http://icu-project.org/userguide/Collate_Customization.html">
- * Collation customization</a> section of the users guide.
+ * this subject see the <a href="http://userguide.icu-project.org/collation/customization">
+ * Collation Customization</a> section of the User Guide.
* <p>
* @see UCollationResult
* @see UNormalizationMode
/**
* UCOL_LESS is returned if source string is compared to be less than target
- * string in the u_strcoll() method.
+ * string in the ucol_strcoll() method.
* UCOL_EQUAL is returned if source string is compared to be equal to target
- * string in the u_strcoll() method.
+ * string in the ucol_strcoll() method.
* UCOL_GREATER is returned if source string is compared to be greater than
- * target string in the u_strcoll() method.
- * @see u_strcoll()
+ * target string in the ucol_strcoll() method.
+ * @see ucol_strcoll()
* <p>
* Possible values for a comparison result
* @stable ICU 2.0
* @stable ICU 2.0
*/
typedef enum {
- /** Attribute for direction of secondary weights - used in French.
+ /** Attribute for direction of secondary weights - used in Canadian French.
* Acceptable values are UCOL_ON, which results in secondary weights
* being considered backwards and UCOL_OFF which treats secondary
- * weights in the order they appear.*/
+ * weights in the order they appear.
+ * @stable ICU 2.0
+ */
UCOL_FRENCH_COLLATION,
/** Attribute for handling variable elements.
* Acceptable values are UCOL_NON_IGNORABLE (default)
* and UCOL_SHIFTED which causes codepoints with primary
* weights that are equal or below the variable top value
* to be ignored on primary level and moved to the quaternary
- * level.*/
+ * level.
+ * @stable ICU 2.0
+ */
UCOL_ALTERNATE_HANDLING,
/** Controls the ordering of upper and lower case letters.
* Acceptable values are UCOL_OFF (default), which orders
* upper and lower case letters in accordance to their tertiary
* weights, UCOL_UPPER_FIRST which forces upper case letters to
* sort before lower case letters, and UCOL_LOWER_FIRST which does
- * the opposite. */
+ * the opposite.
+ * @stable ICU 2.0
+ */
UCOL_CASE_FIRST,
/** Controls whether an extra case level (positioned before the third
* level) is generated or not. Acceptable values are UCOL_OFF (default),
* level to be generated. Contents of the case level are affected by
* the value of UCOL_CASE_FIRST attribute. A simple way to ignore
* accent differences in a string is to set the strength to UCOL_PRIMARY
- * and enable case level. */
+ * and enable case level.
+ * @stable ICU 2.0
+ */
UCOL_CASE_LEVEL,
/** Controls whether the normalization check and necessary normalizations
* are performed. When set to UCOL_OFF (default) no normalization check
* input data is in so-called FCD form (see users manual for more info).
* When set to UCOL_ON, an incremental check is performed to see whether
* the input data is in the FCD form. If the data is not in the FCD form,
- * incremental NFD normalization is performed. */
+ * incremental NFD normalization is performed.
+ * @stable ICU 2.0
+ */
UCOL_NORMALIZATION_MODE,
- /** An alias for UCOL_NORMALIZATION_MODE attribute */
+ /** An alias for UCOL_NORMALIZATION_MODE attribute.
+ * @stable ICU 2.0
+ */
UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE,
/** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY,
* UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength
- * for most locales (except Japanese) is tertiary. Quaternary strength
+ * for most locales (except Japanese) is tertiary.
+ *
+ * Quaternary strength
* is useful when combined with shifted setting for alternate handling
- * attribute and for JIS x 4061 collation, when it is used to distinguish
- * between Katakana and Hiragana (this is achieved by setting the
- * UCOL_HIRAGANA_QUATERNARY mode to on. Otherwise, quaternary level
- * is affected only by the number of non ignorable code points in
- * the string. Identical strength is rarely useful, as it amounts
- * to codepoints of the NFD form of the string. */
+ * attribute and for JIS X 4061 collation, when it is used to distinguish
+ * between Katakana and Hiragana.
+ * Otherwise, quaternary level
+ * is affected only by the number of non-ignorable code points in
+ * the string.
+ *
+ * Identical strength is rarely useful, as it amounts
+ * to codepoints of the NFD form of the string.
+ * @stable ICU 2.0
+ */
UCOL_STRENGTH,
+#ifndef U_HIDE_DEPRECATED_API
/** When turned on, this attribute positions Hiragana before all
* non-ignorables on quaternary level This is a sneaky way to produce JIS
- * sort order */
- UCOL_HIRAGANA_QUATERNARY_MODE,
- /** When turned on, this attribute generates a collation key
- * for the numeric value of substrings of digits.
+ * sort order.
+ *
+ * This attribute was an implementation detail of the CLDR Japanese tailoring.
+ * Since ICU 50, this attribute is not settable any more via API functions.
+ * Since CLDR 25/ICU 53, explicit quaternary relations are used
+ * to achieve the same Japanese sort order.
+ *
+ * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation.
+ */
+ UCOL_HIRAGANA_QUATERNARY_MODE = UCOL_STRENGTH + 1,
+#endif /* U_HIDE_DEPRECATED_API */
+ /**
+ * When turned on, this attribute makes
+ * substrings of digits sort according to their numeric values.
+ *
* This is a way to get '100' to sort AFTER '2'. Note that the longest
- * digit substring that can be treated as a single collation element is
+ * digit substring that can be treated as a single unit is
* 254 digits (not counting leading zeros). If a digit substring is
* longer than that, the digits beyond the limit will be treated as a
- * separate digit substring associated with a separate collation element. */
- UCOL_NUMERIC_COLLATION,
+ * separate digit substring.
+ *
+ * A "digit" in this sense is a code point with General_Category=Nd,
+ * which does not include circled numbers, roman numerals, etc.
+ * Only a contiguous digit substring is considered, that is,
+ * non-negative integers without separators.
+ * There is no support for plus/minus signs, decimals, exponents, etc.
+ *
+ * @stable ICU 2.8
+ */
+ UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2,
+ /**
+ * The number of UColAttribute constants.
+ * @stable ICU 2.0
+ */
UCOL_ATTRIBUTE_COUNT
} UColAttribute;
* @stable ICU 2.0
*/
typedef enum {
- /** Retrieve tailoring only */
+ /**
+ * Retrieves the tailoring rules only.
+ * Same as calling the version of getRules() without UColRuleOption.
+ * @stable ICU 2.0
+ */
UCOL_TAILORING_ONLY,
- /** Retrieve UCA rules and tailoring */
+ /**
+ * Retrieves the "UCA rules" concatenated with the tailoring rules.
+ * The "UCA rules" are an <i>approximation</i> of the root collator's sort order.
+ * They are almost never used or useful at runtime and can be removed from the data.
+ * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
+ * @stable ICU 2.0
+ */
UCOL_FULL_RULES
} UColRuleOption ;
/**
* Open a UCollator for comparing strings.
+ *
+ * For some languages, multiple collation types are available;
+ * for example, "de@collation=phonebook".
+ * Starting with ICU 54, collation attributes can be specified via locale keywords as well,
+ * in the old locale extension syntax ("el@colCaseFirst=upper")
+ * or in language tag syntax ("el-u-kf-upper").
+ * See <a href="http://userguide.icu-project.org/collation/api">User Guide: Collation API</a>.
+ *
* The UCollator pointer is used in all the calls to the Collation
* service. After finished, collator must be disposed of by calling
* {@link #ucol_close }.
* Special values for locales can be passed in -
* if NULL is passed for the locale, the default locale
* collation rules will be used. If empty string ("") or
- * "root" are passed, UCA rules will be used.
- * @param status A pointer to an UErrorCode to receive any errors
+ * "root" are passed, the root collator will be returned.
+ * @param status A pointer to a UErrorCode to receive any errors
* @return A pointer to a UCollator, or 0 if an error occurred.
* @see ucol_openRules
* @see ucol_safeClone
ucol_open(const char *loc, UErrorCode *status);
/**
- * Produce an UCollator instance according to the rules supplied.
+ * Produce a UCollator instance according to the rules supplied.
* The rules are used to change the default ordering, defined in the
* UCA in a process called tailoring. The resulting UCollator pointer
* can be used in the same way as the one obtained by {@link #ucol_strcoll }.
* @param parseError A pointer to UParseError to recieve information about errors
* occurred during parsing. This argument can currently be set
* to NULL, but at users own risk. Please provide a real structure.
- * @param status A pointer to an UErrorCode to receive any errors
+ * @param status A pointer to a UErrorCode to receive any errors
* @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case
* of error - please use status argument to check for errors.
* @see ucol_open
UParseError *parseError,
UErrorCode *status);
+#ifndef U_HIDE_DEPRECATED_API
/**
* Open a collator defined by a short form string.
* The structure and the syntax of the string is defined in the "Naming collators"
* section of the users guide:
- * http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators
+ * http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme
* Attributes are overriden by the subsequent attributes. So, for "S2_S3", final
* strength will be 3. 3066bis locale overrides individual locale parts.
* The call to this function is equivalent to a call to ucol_open, followed by a
* @see ucol_setVariableTop
* @see ucol_getShortDefinitionString
* @see ucol_normalizeShortDefinitionString
- * @stable ICU 3.0
- *
+ * @deprecated ICU 54 Use ucol_open() with language tag collation keywords instead.
*/
-U_STABLE UCollator* U_EXPORT2
+U_DEPRECATED UCollator* U_EXPORT2
ucol_openFromShortString( const char *definition,
UBool forceDefaults,
UParseError *parseError,
UErrorCode *status);
+#endif /* U_HIDE_DEPRECATED_API */
#ifndef U_HIDE_DEPRECATED_API
/**
* Get a set containing the contractions defined by the collator. The set includes
- * both the UCA contractions and the contractions defined by the collator. This set
+ * both the root collator's contractions and the contractions defined by the collator. This set
* will contain only strings. If a tailoring explicitly suppresses contractions from
- * the UCA (like Russian), removed contractions will not be in the resulting set.
+ * the root collator (like Russian), removed contractions will not be in the resulting set.
* @param coll collator
* @param conts the set to hold the result. It gets emptied before
* contractions are added.
/**
* Get a set containing the expansions defined by the collator. The set includes
- * both the UCA expansions and the expansions defined by the tailoring
+ * both the root collator's expansions and the expansions defined by the tailoring
* @param coll collator
* @param contractions if not NULL, the set to hold the contractions
* @param expansions if not NULL, the set to hold the expansions
const UChar *target,
int32_t targetLength);
+/**
+* Compare two strings in UTF-8.
+* The strings will be compared using the options already specified.
+* Note: When input string contains malformed a UTF-8 byte sequence,
+* this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD).
+* @param coll The UCollator containing the comparison rules.
+* @param source The source UTF-8 string.
+* @param sourceLength The length of source, or -1 if null-terminated.
+* @param target The target UTF-8 string.
+* @param targetLength The length of target, or -1 if null-terminated.
+* @param status A pointer to a UErrorCode to receive any errors
+* @return The result of comparing the strings; one of UCOL_EQUAL,
+* UCOL_GREATER, UCOL_LESS
+* @see ucol_greater
+* @see ucol_greaterOrEqual
+* @see ucol_equal
+* @stable ICU 50
+*/
+U_STABLE UCollationResult U_EXPORT2
+ucol_strcollUTF8(
+ const UCollator *coll,
+ const char *source,
+ int32_t sourceLength,
+ const char *target,
+ int32_t targetLength,
+ UErrorCode *status);
+
/**
* Determine if one string is greater than another.
* This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER
* @param tIter The target string iterator.
* @return The result of comparing the strings; one of UCOL_EQUAL,
* UCOL_GREATER, UCOL_LESS
- * @param status A pointer to an UErrorCode to receive any errors
+ * @param status A pointer to a UErrorCode to receive any errors
* @see ucol_strcoll
* @stable ICU 2.6
*/
* @param coll The UCollator to query.
* @param dest The array to fill with the script ordering.
* @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
- * will only return the length of the result without writing any of the result string (pre-flighting).
+ * will only return the length of the result without writing any codes (pre-flighting).
* @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a
* failure before the function call.
* @return The number of reordering codes written to the dest array.
* @see UColReorderCode
* @stable ICU 4.8
*/
-U_DRAFT int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
ucol_getReorderCodes(const UCollator* coll,
int32_t* dest,
int32_t destCapacity,
UErrorCode *pErrorCode);
/**
* Sets the reordering codes for this collator.
- * Collation reordering allows scripts and some other defined blocks of characters
- * to be moved relative to each other as a block. This reordering is done on top of
+ * Collation reordering allows scripts and some other groups of characters
+ * to be moved relative to each other. This reordering is done on top of
* the DUCET/CLDR standard collation order. Reordering can specify groups to be placed
* at the start and/or the end of the collation order. These groups are specified using
* UScript codes and UColReorderCode entries.
+ *
* <p>By default, reordering codes specified for the start of the order are placed in the
- * order given after a group of "special" non-script blocks. These special groups of characters
+ * order given after several special non-script blocks. These special groups of characters
* are space, punctuation, symbol, currency, and digit. These special groups are represented with
* UColReorderCode entries. Script groups can be intermingled with
- * these special non-script blocks if those special blocks are explicitly specified in the reordering.
+ * these special non-script groups if those special groups are explicitly specified in the reordering.
+ *
* <p>The special code OTHERS stands for any script that is not explicitly
* mentioned in the list of reordering codes given. Anything that is after OTHERS
* will go at the very end of the reordering in the order given.
+ *
* <p>The special reorder code DEFAULT will reset the reordering for this collator
* to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that
* was specified when this collator was created from resource data or from rules. The
- * DEFAULT code <b>must</b> be the sole code supplied when it used. If not
- * that will result in an U_ILLEGAL_ARGUMENT_ERROR being set.
+ * DEFAULT code <b>must</b> be the sole code supplied when it is used.
+ * If not, then U_ILLEGAL_ARGUMENT_ERROR will be set.
+ *
* <p>The special reorder code NONE will remove any reordering for this collator.
* The result of setting no reordering will be to have the DUCET/CLDR ordering used. The
- * NONE code <b>must</b> be the sole code supplied when it used.
+ * NONE code <b>must</b> be the sole code supplied when it is used.
+ *
* @param coll The UCollator to set.
* @param reorderCodes An array of script codes in the new order. This can be NULL if the
* length is also set to 0. An empty array will clear any reordering codes on the collator.
* @see UColReorderCode
* @stable ICU 4.8
*/
-U_DRAFT void U_EXPORT2
+U_STABLE void U_EXPORT2
ucol_setReorderCodes(UCollator* coll,
const int32_t* reorderCodes,
int32_t reorderCodesLength,
/**
* Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
* codes will be grouped and must reorder together.
+ * Beginning with ICU 55, scripts only reorder together if they are primary-equal,
+ * for example Hiragana and Katakana.
+ *
* @param reorderCode The reorder code to determine equivalence for.
* @param dest The array to fill with the script ordering.
* @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
- * will only return the length of the result without writing any of the result string (pre-flighting).
+ * will only return the length of the result without writing any codes (pre-flighting).
* @param pErrorCode Must be a valid pointer to an error code value, which must not indicate
* a failure before the function call.
* @return The number of reordering codes written to the dest array.
* @see UColReorderCode
* @stable ICU 4.8
*/
-U_DRAFT int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
ucol_getEquivalentReorderCodes(int32_t reorderCode,
int32_t* dest,
int32_t destCapacity,
* @param dispLoc The locale for display.
* @param result A pointer to a buffer to receive the attribute.
* @param resultLength The maximum size of result.
- * @param status A pointer to an UErrorCode to receive any errors
+ * @param status A pointer to a UErrorCode to receive any errors
* @return The total buffer size needed; if greater than resultLength,
* the output was truncated.
* @stable ICU 2.0
UErrorCode* status);
/**
- * Return the functionally equivalent locale for the given
- * requested locale, with respect to given keyword, for the
- * collation service. If two locales return the same result, then
- * collators instantiated for these locales will behave
- * equivalently. The converse is not always true; two collators
+ * Return the functionally equivalent locale for the specified
+ * input locale, with respect to given keyword, for the
+ * collation service. If two different input locale + keyword
+ * combinations produce the same result locale, then collators
+ * instantiated for these two different input locales will behave
+ * equivalently. The converse is not always true; two collators
* may in fact be equivalent, but return different results, due to
- * internal details. The return result has no other meaning than
+ * internal details. The return result has no other meaning than
* that stated above, and implies nothing as to the relationship
- * between the two locales. This is intended for use by
+ * between the two locales. This is intended for use by
* applications who wish to cache collators, or otherwise reuse
- * collators when possible. The functional equivalent may change
- * over time. For more information, please see the <a
- * href="http://icu-project.org/userguide/locale.html#services">
+ * collators when possible. The functional equivalent may change
+ * over time. For more information, please see the <a
+ * href="http://userguide.icu-project.org/locale#TOC-Locales-and-Services">
* Locales and Services</a> section of the ICU User Guide.
- * @param result fillin for the functionally equivalent locale
+ * @param result fillin for the functionally equivalent result locale
* @param resultCapacity capacity of the fillin buffer
* @param keyword a particular keyword as enumerated by
* ucol_getKeywords.
- * @param locale the requested locale
+ * @param locale the specified input locale
* @param isAvailable if non-NULL, pointer to a fillin parameter that
- * indicates whether the requested locale was 'available' to the
- * collation service. A locale is defined as 'available' if it
+ * on return indicates whether the specified input locale was 'available'
+ * to the collation service. A locale is defined as 'available' if it
* physically exists within the collation locale data.
* @param status pointer to input-output error code
- * @return the actual buffer size needed for the locale. If greater
+ * @return the actual buffer size needed for the locale. If greater
* than resultCapacity, the returned full name will be truncated and
* an error code will be returned.
* @stable ICU 3.0
UBool* isAvailable, UErrorCode* status);
/**
- * Get the collation rules from a UCollator.
+ * Get the collation tailoring rules from a UCollator.
* The rules will follow the rule syntax.
* @param coll The UCollator to query.
* @param length
- * @return The collation rules.
+ * @return The collation tailoring rules.
* @stable ICU 2.0
*/
U_STABLE const UChar* U_EXPORT2
ucol_getRules( const UCollator *coll,
int32_t *length);
+#ifndef U_HIDE_DEPRECATED_API
/** Get the short definition string for a collator. This API harvests the collator's
* locale and the attribute set and produces a string that can be used for opening
- * a collator with the same properties using the ucol_openFromShortString API.
+ * a collator with the same attributes using the ucol_openFromShortString API.
* This string will be normalized.
* The structure and the syntax of the string is defined in the "Naming collators"
* section of the users guide:
- * http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators
+ * http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme
* This API supports preflighting.
* @param coll a collator
* @param locale a locale that will appear as a collators locale in the resulting
* @return length of the resulting string
* @see ucol_openFromShortString
* @see ucol_normalizeShortDefinitionString
- * @stable ICU 3.0
+ * @deprecated ICU 54
*/
-U_STABLE int32_t U_EXPORT2
+U_DEPRECATED int32_t U_EXPORT2
ucol_getShortDefinitionString(const UCollator *coll,
const char *locale,
char *buffer,
* @see ucol_openFromShortString
* @see ucol_getShortDefinitionString
*
- * @stable ICU 3.0
+ * @deprecated ICU 54
*/
-U_STABLE int32_t U_EXPORT2
+U_DEPRECATED int32_t U_EXPORT2
ucol_normalizeShortDefinitionString(const char *source,
char *destination,
int32_t capacity,
UParseError *parseError,
UErrorCode *status);
+#endif /* U_HIDE_DEPRECATED_API */
/**
* Get a sort key for a string from a UCollator.
* Sort keys may be compared using <TT>strcmp</TT>.
*
+ * Note that sort keys are often less efficient than simply doing comparison.
+ * For more details, see the ICU User Guide.
+ *
* Like ICU functions that write to an output buffer, the buffer contents
* is undefined if the buffer capacity (resultLength parameter) is too small.
* Unlike ICU functions that write a string to an output buffer,
* to preserve state array between calls and to provide
* the same type of UCharIterator set with the same string.
* The destination buffer provided must be big enough to store
- * the number of requested bytes. Generated sortkey is not
- * compatible with sortkeys generated using ucol_getSortKey
- * API, since we don't do any compression. If uncompressed
- * sortkeys are required, this API can be used.
+ * the number of requested bytes.
+ *
+ * The generated sort key may or may not be compatible with
+ * sort keys generated using ucol_getSortKey().
* @param coll The UCollator containing the collation rules.
* @param iter UCharIterator containing the string we need
* the sort key to be calculated for.
U_STABLE void U_EXPORT2
ucol_getUCAVersion(const UCollator* coll, UVersionInfo info);
-/**
- * Merge two sort keys. The levels are merged with their corresponding counterparts
+/**
+ * Merges two sort keys. The levels are merged with their corresponding counterparts
* (primaries with primaries, secondaries with secondaries etc.). Between the values
* from the same level a separator is inserted.
- * example (uncompressed):
- * 191B1D 01 050505 01 910505 00 and 1F2123 01 050505 01 910505 00
+ *
+ * This is useful, for example, for combining sort keys from first and last names
+ * to sort such pairs.
+ * See http://www.unicode.org/reports/tr10/#Merging_Sort_Keys
+ *
+ * The recommended way to achieve "merged" sorting is by
+ * concatenating strings with U+FFFE between them.
+ * The concatenation has the same sort order as the merged sort keys,
+ * but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '\uFFFE' + str2).
+ * Using strings with U+FFFE may yield shorter sort keys.
+ *
+ * For details about Sort Key Features see
+ * http://userguide.icu-project.org/collation/api#TOC-Sort-Key-Features
+ *
+ * It is possible to merge multiple sort keys by consecutively merging
+ * another one with the intermediate result.
+ *
+ * The length of the merge result is the sum of the lengths of the input sort keys.
+ *
+ * Example (uncompressed):
+ * <pre>191B1D 01 050505 01 910505 00
+ * 1F2123 01 050505 01 910505 00</pre>
* will be merged as
- * 191B1D 02 1F212301 050505 02 050505 01 910505 02 910505 00
- * This allows for concatenating of first and last names for sorting, among other things.
- * If the destination buffer is not big enough, the results are undefined.
- * If any of source lengths are zero or any of source pointers are NULL/undefined,
- * result is of size zero.
- * @param src1 pointer to the first sortkey
- * @param src1Length length of the first sortkey
- * @param src2 pointer to the second sortkey
- * @param src2Length length of the second sortkey
- * @param dest buffer to hold the result
- * @param destCapacity size of the buffer for the result
- * @return size of the result. If the buffer is big enough size is always
- * src1Length+src2Length-1
+ * <pre>191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00</pre>
+ *
+ * If the destination buffer is not big enough, then its contents are undefined.
+ * If any of source lengths are zero or any of the source pointers are NULL/undefined,
+ * the result is of size zero.
+ *
+ * @param src1 the first sort key
+ * @param src1Length the length of the first sort key, including the zero byte at the end;
+ * can be -1 if the function is to find the length
+ * @param src2 the second sort key
+ * @param src2Length the length of the second sort key, including the zero byte at the end;
+ * can be -1 if the function is to find the length
+ * @param dest the buffer where the merged sort key is written,
+ * can be NULL if destCapacity==0
+ * @param destCapacity the number of bytes in the dest buffer
+ * @return the length of the merged sort key, src1Length+src2Length;
+ * can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments),
+ * in which cases the contents of dest is undefined
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
U_STABLE UColAttributeValue U_EXPORT2
ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status);
-/** Variable top
- * is a two byte primary value which causes all the codepoints with primary values that
- * are less or equal than the variable top to be shifted when alternate handling is set
- * to UCOL_SHIFTED.
- * Sets the variable top to a collation element value of a string supplied.
- * @param coll collator which variable top needs to be changed
+/**
+ * Sets the variable top to the top of the specified reordering group.
+ * The variable top determines the highest-sorting character
+ * which is affected by UCOL_ALTERNATE_HANDLING.
+ * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect.
+ * @param coll the collator
+ * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION,
+ * UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY;
+ * or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @see ucol_getMaxVariable
+ * @stable ICU 53
+ */
+U_STABLE void U_EXPORT2
+ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode);
+
+/**
+ * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
+ * @param coll the collator
+ * @return the maximum variable reordering group.
+ * @see ucol_setMaxVariable
+ * @stable ICU 53
+ */
+U_STABLE UColReorderCode U_EXPORT2
+ucol_getMaxVariable(const UCollator *coll);
+
+#ifndef U_HIDE_DEPRECATED_API
+/**
+ * Sets the variable top to the primary weight of the specified string.
+ *
+ * Beginning with ICU 53, the variable top is pinned to
+ * the top of one of the supported reordering groups,
+ * and it must not be beyond the last of those groups.
+ * See ucol_setMaxVariable().
+ * @param coll the collator
* @param varTop one or more (if contraction) UChars to which the variable top should be set
* @param len length of variable top string. If -1 it is considered to be zero terminated.
- * @param status error code. If error code is set, the return value is undefined.
- * Errors set by this function are: <br>
- * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such
- * a contraction<br>
- * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
- * @return a 32 bit value containing the value of the variable top in upper 16 bits.
- * Lower 16 bits are undefined
+ * @param status error code. If error code is set, the return value is undefined.
+ * Errors set by this function are:<br>
+ * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br>
+ * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond
+ * the last reordering group supported by ucol_setMaxVariable()
+ * @return variable top primary weight
* @see ucol_getVariableTop
* @see ucol_restoreVariableTop
- * @stable ICU 2.0
+ * @deprecated ICU 53 Call ucol_setMaxVariable() instead.
*/
-U_STABLE uint32_t U_EXPORT2
+U_DEPRECATED uint32_t U_EXPORT2
ucol_setVariableTop(UCollator *coll,
const UChar *varTop, int32_t len,
UErrorCode *status);
+#endif /* U_HIDE_DEPRECATED_API */
/**
* Gets the variable top value of a Collator.
- * Lower 16 bits are undefined and should be ignored.
* @param coll collator which variable top needs to be retrieved
* @param status error code (not changed by function). If error code is set,
* the return value is undefined.
- * @return the variable top value of a Collator.
+ * @return the variable top primary weight
+ * @see ucol_getMaxVariable
* @see ucol_setVariableTop
* @see ucol_restoreVariableTop
* @stable ICU 2.0
*/
U_STABLE uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status);
-/**
- * Sets the variable top to a collation element value supplied. Variable top is
- * set to the upper 16 bits.
- * Lower 16 bits are ignored.
- * @param coll collator which variable top needs to be changed
- * @param varTop CE value, as returned by ucol_setVariableTop or ucol)getVariableTop
- * @param status error code (not changed by function)
+#ifndef U_HIDE_DEPRECATED_API
+/**
+ * Sets the variable top to the specified primary weight.
+ *
+ * Beginning with ICU 53, the variable top is pinned to
+ * the top of one of the supported reordering groups,
+ * and it must not be beyond the last of those groups.
+ * See ucol_setMaxVariable().
+ * @param varTop primary weight, as returned by ucol_setVariableTop or ucol_getVariableTop
+ * @param status error code
* @see ucol_getVariableTop
* @see ucol_setVariableTop
- * @stable ICU 2.0
+ * @deprecated ICU 53 Call ucol_setMaxVariable() instead.
*/
-U_STABLE void U_EXPORT2
+U_DEPRECATED void U_EXPORT2
ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status);
+#endif /* U_HIDE_DEPRECATED_API */
/**
* Thread safe cloning operation. The result is a clone of a given collator.
* @param coll collator to be cloned
- * @param stackBuffer user allocated space for the new clone.
+ * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
+ * user allocated space for the new clone.
* If NULL new memory will be allocated.
* If buffer is not large enough, new memory will be allocated.
- * Clients can use the U_COL_SAFECLONE_BUFFERSIZE.
- * This will probably be enough to avoid memory allocations.
- * @param pBufferSize pointer to size of allocated space.
+ * Clients can use the U_COL_SAFECLONE_BUFFERSIZE.
+ * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
+ * pointer to size of allocated space.
* If *pBufferSize == 0, a sufficient size for use in cloning will
* be returned ('pre-flighting')
* If *pBufferSize is not enough for a stack-based safe clone,
int32_t *pBufferSize,
UErrorCode *status);
-/** default memory size for the new clone. It needs to be this large for os/400 large pointers
- * @stable ICU 2.0
+#ifndef U_HIDE_DEPRECATED_API
+
+/** default memory size for the new clone.
+ * @deprecated ICU 52. Do not rely on ucol_safeClone() cloning into any provided buffer.
*/
-#define U_COL_SAFECLONE_BUFFERSIZE 528
+#define U_COL_SAFECLONE_BUFFERSIZE 1
+
+#endif /* U_HIDE_DEPRECATED_API */
/**
* Returns current rules. Delta defines whether full rules are returned or just the tailoring.
* Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough
* to store rules, will store up to available space.
+ *
+ * ucol_getRules() should normally be used instead.
+ * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
* @param coll collator to get the rules from
* @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
* @param buffer buffer to store the result in. If NULL, you'll get no rules.
- * @param bufferLen lenght of buffer to store rules in. If less then needed you'll get only the part that fits in.
+ * @param bufferLen length of buffer to store rules in. If less than needed you'll get only the part that fits in.
* @return current rules
* @stable ICU 2.0
+ * @see UCOL_FULL_RULES
*/
U_STABLE int32_t U_EXPORT2
ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen);
ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
/**
- * Get an Unicode set that contains all the characters and sequences tailored in
+ * Get a Unicode set that contains all the characters and sequences tailored in
* this collator. The result must be disposed of by using uset_close.
* @param coll The UCollator for which we want to get tailored chars
* @param status error code of the operation
ucol_getTailoredSet(const UCollator *coll, UErrorCode *status);
#ifndef U_HIDE_INTERNAL_API
-/**
- * Universal attribute getter that returns UCOL_DEFAULT if the value is default
- * @param coll collator which attributes are to be changed
- * @param attr attribute type
- * @return attribute value or UCOL_DEFAULT if the value is default
- * @param status to indicate whether the operation went on smoothly or there were errors
- * @see UColAttribute
- * @see UColAttributeValue
- * @see ucol_setAttribute
- * @internal ICU 3.0
- */
-U_INTERNAL UColAttributeValue U_EXPORT2
-ucol_getAttributeOrDefault(const UCollator *coll, UColAttribute attr, UErrorCode *status);
-
-/** Check whether two collators are equal. Collators are considered equal if they
- * will sort strings the same. This means that both the current attributes and the
- * rules must be equivalent. Currently used for RuleBasedCollator::operator==.
- * @param source first collator
- * @param target second collator
- * @return TRUE or FALSE
- * @internal ICU 3.0
- */
-U_INTERNAL UBool U_EXPORT2
-ucol_equals(const UCollator *source, const UCollator *target);
-
/** Calculates the set of unsafe code points, given a collator.
* A character is unsafe if you could append any character and cause the ordering to alter significantly.
* Collation sorts in normalized order, so anything that rearranges in normalization can cause this.
USet *unsafe,
UErrorCode *status);
-/** Reset UCA's static pointers. You don't want to use this, unless your static memory can go away.
- * @internal ICU 3.2.1
- */
-U_INTERNAL void U_EXPORT2
-ucol_forgetUCA(void);
-
/** Touches all resources needed for instantiating a collator from a short string definition,
* thus filling up the cache.
* @param definition A short string containing a locale and a set of attributes.
* ucol_cloneBinary. Binary image used in instantiation of the
* collator remains owned by the user and should stay around for
* the lifetime of the collator. The API also takes a base collator
- * which usualy should be UCA.
+ * which must be the root collator.
* @param bin binary image owned by the user and required through the
* lifetime of the collator
* @param length size of the image. If negative, the API will try to
* figure out the length of the image
- * @param base fallback collator, usually UCA. Base is required to be
- * present through the lifetime of the collator. Currently
- * it cannot be NULL.
+ * @param base Base collator, for lookup of untailored characters.
+ * Must be the root collator, must not be NULL.
+ * The base is required to be present through the lifetime of the collator.
* @param status for catching errors
* @return newly created collator
* @see ucol_cloneBinary