/*
*******************************************************************************
-* Copyright (c) 1996-2004, International Business Machines Corporation and others.
+* Copyright (c) 1996-2006, International Business Machines Corporation and others.
* All Rights Reserved.
*******************************************************************************
*/
* <em>Important: </em>The ICU collation service has been reimplemented
* in order to achieve better performance and UCA compliance.
* For details, see the
- * <a href="http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/collation/ICU_collation_design.htm">
+ * <a href="http://dev.icu-project.org/cgi-bin/viewcvs.cgi/~checkout~/icuhtml/design/collation/ICU_collation_design.htm">
* collation design document</a>.
* <p>
* For more information about the collation service see
- * <a href="http://oss.software.ibm.com/icu/userguide/Collate_Intro.html">the users guide</a>.
+ * <a href="http://icu.sourceforge.net/userguide/Collate_Intro.html">the users guide</a>.
* <p>
* Collation service provides correct sorting orders for most locales supported in ICU.
* If specific data for a locale is not available, the orders eventually falls back
* <p>
* Sort ordering may be customized by providing your own set of rules. For more on
* this subject see the
- * <a href="http://oss.software.ibm.com/icu/userguide/Collate_Customization.html">
+ * <a href="http://icu.sourceforge.net/userguide/Collate_Customization.html">
* Collation customization</a> section of the users guide.
* <p>
* @see UCollationResult
* @see UCollationElements
*/
-/** A collation element iterator.
-* For usage in C programs.
-*/
-struct collIterate;
-/** structure representing collation element iterator instance
- * @stable ICU 2.0
- */
-typedef struct collIterate collIterate;
-
/** A collator.
* For usage in C programs.
*/
* Diacritical differences on the same base letter represent a secondary
* difference. Set comparison level to UCOL_SECONDARY to ignore tertiary
* differences. Use this to set the strength of a Collator object.
- * Example of secondary difference, "ä" >> "a".
+ * Example of secondary difference, "ä" >> "a".
*
* Uppercase and lowercase versions of the same character represents a
* tertiary difference. Set comparison level to UCOL_TERTIARY to include
*
* Two characters are considered "identical" when they have the same
* unicode spellings. UCOL_IDENTICAL.
- * For example, "ä" == "ä".
+ * For example, "ä" == "ä".
*
* UCollationStrength is also used to determine the strength of sort keys
* generated from UCollator objects
* @stable ICU 2.0
*/
typedef enum {
- /** Attribute for direction of secondary weights - used in French.\
+ /** Attribute for direction of secondary weights - used in French.
* Acceptable values are UCOL_ON, which results in secondary weights
* being considered backwards and UCOL_OFF which treats secondary
* weights in the order they appear.*/
UCOL_FRENCH_COLLATION,
- /** Attribute for handling variable elements.\
+ /** Attribute for handling variable elements.
* Acceptable values are UCOL_NON_IGNORABLE (default)
* which treats all the codepoints with non-ignorable
* primary weights in the same way,
* to be ignored on primary level and moved to the quaternary
* level.*/
UCOL_ALTERNATE_HANDLING,
- /** Controls the ordering of upper and lower case letters.\
+ /** Controls the ordering of upper and lower case letters.
* Acceptable values are UCOL_OFF (default), which orders
* upper and lower case letters in accordance to their tertiary
* weights, UCOL_UPPER_FIRST which forces upper case letters to
* the opposite. */
UCOL_CASE_FIRST,
/** Controls whether an extra case level (positioned before the third
- * level) is generated or not.\ Acceptable values are UCOL_OFF (default),
+ * level) is generated or not. Acceptable values are UCOL_OFF (default),
* when case level is not generated, and UCOL_ON which causes the case
- * level to be generated.\ Contents of the case level are affected by
- * the value of UCOL_CASE_FIRST attribute.\ A simple way to ignore
+ * level to be generated. Contents of the case level are affected by
+ * the value of UCOL_CASE_FIRST attribute. A simple way to ignore
* accent differences in a string is to set the strength to UCOL_PRIMARY
* and enable case level. */
UCOL_CASE_LEVEL,
/** Controls whether the normalization check and necessary normalizations
- * are performed.\ When set to UCOL_OFF (default) no normalization check
- * is performed.\ The correctness of the result is guaranteed only if the
- * input data is in so-called FCD form (see users manual for more info).\
- * When set to UCOL_ON, an incremental check is performed to see whether the input data
- * is in the FCD form.\ If the data is not in the FCD form, incremental
- * NFD normalization is performed. */
+ * are performed. When set to UCOL_OFF (default) no normalization check
+ * is performed. The correctness of the result is guaranteed only if the
+ * input data is in so-called FCD form (see users manual for more info).
+ * When set to UCOL_ON, an incremental check is performed to see whether
+ * the input data is in the FCD form. If the data is not in the FCD form,
+ * incremental NFD normalization is performed. */
UCOL_NORMALIZATION_MODE,
/** An alias for UCOL_NORMALIZATION_MODE attribute */
UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE,
- /** The strength attribute.\ Can be either UCOL_PRIMARY, UCOL_SECONDARY,
- * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL.\ The usual strength
- * for most locales (except Japanese) is tertiary.\ Quaternary strength
+ /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY,
+ * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength
+ * for most locales (except Japanese) is tertiary. Quaternary strength
* is useful when combined with shifted setting for alternate handling
* attribute and for JIS x 4061 collation, when it is used to distinguish
* between Katakana and Hiragana (this is achieved by setting the
- * UCOL_HIRAGANA_QUATERNARY mode to on.\ Otherwise, quaternary level
+ * UCOL_HIRAGANA_QUATERNARY mode to on. Otherwise, quaternary level
* is affected only by the number of non ignorable code points in
- * the string.\ Identical strength is rarely useful, as it amounts
+ * the string. Identical strength is rarely useful, as it amounts
* to codepoints of the NFD form of the string. */
UCOL_STRENGTH,
- /** when turned on, this attribute
- * positions Hiragana before all
- * non-ignorables on quaternary level
- * This is a sneaky way to produce JIS
- * sort order */
+ /** When turned on, this attribute positions Hiragana before all
+ * non-ignorables on quaternary level This is a sneaky way to produce JIS
+ * sort order */
UCOL_HIRAGANA_QUATERNARY_MODE,
- /** when turned on, this attribute
- * generates a collation key
- * for the numeric value of substrings
- * of digits. This is a way to get '100'
- * to sort AFTER '2'.*/
+ /** When turned on, this attribute generates a collation key
+ * for the numeric value of substrings of digits.
+ * This is a way to get '100' to sort AFTER '2'. */
UCOL_NUMERIC_COLLATION,
UCOL_ATTRIBUTE_COUNT
} UColAttribute;
* occurred during parsing. This argument can currently be set
* to NULL, but at users own risk. Please provide a real structure.
* @param status A pointer to an UErrorCode to receive any errors
- * @return A pointer to a UCollator.\ It is not guaranteed that NULL be returned in case
+ * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case
* of error - please use status argument to check for errors.
* @see ucol_open
* @see ucol_safeClone
* Open a collator defined by a short form string.
* The structure and the syntax of the string is defined in the "Naming collators"
* section of the users guide:
- * http://oss.software.ibm.com/icu/userguide/Collate_Concepts.html#Naming_Collators
+ * http://icu.sourceforge.net/userguide/Collate_Concepts.html#Naming_Collators
* Attributes are overriden by the subsequent attributes. So, for "S2_S3", final
* strength will be 3. 3066bis locale overrides individual locale parts.
* The call to this function is equivalent to a call to ucol_open, followed by a
* @see ucol_setVariableTop
* @see ucol_getShortDefinitionString
* @see ucol_normalizeShortDefinitionString
- * @draft ICU 3.0
+ * @stable ICU 3.0
*
*/
-U_CAPI UCollator* U_EXPORT2
+U_STABLE UCollator* U_EXPORT2
ucol_openFromShortString( const char *definition,
UBool forceDefaults,
UParseError *parseError,
* @param status to hold the error code
* @return the size of the contraction set
*
- * @draft ICU 3.0
+ * @deprecated ICU 3.4, use ucol_getContractionsAndExpansions instead
*/
-U_CAPI int32_t U_EXPORT2
+U_DEPRECATED int32_t U_EXPORT2
ucol_getContractions( const UCollator *coll,
USet *conts,
UErrorCode *status);
+/**
+ * Get a set containing the expansions defined by the collator. The set includes
+ * both the UCA expansions and the expansions defined by the tailoring
+ * @param coll collator
+ * @param contractions if not NULL, the set to hold the contractions
+ * @param expansions if not NULL, the set to hold the expansions
+ * @param addPrefixes add the prefix contextual elements to contractions
+ * @param status to hold the error code
+ *
+ * @draft ICU 3.4
+ */
+U_DRAFT void U_EXPORT2
+ucol_getContractionsAndExpansions( const UCollator *coll,
+ USet *contractions, USet *expansions,
+ UBool addPrefixes, UErrorCode *status);
/**
* Close a UCollator.
- * Once closed, a UCollator should not be used.\ Every open collator should
- * be closed.\ Otherwise, a memory leak will result.
+ * Once closed, a UCollator should not be used. Every open collator should
+ * be closed. Otherwise, a memory leak will result.
* @param coll The UCollator to close.
* @see ucol_open
* @see ucol_openRules
* @param status input-output error code
* @return a string enumeration over locale strings. The caller is
* responsible for closing the result.
- * @draft ICU 3.0
+ * @stable ICU 3.0
*/
-U_DRAFT UEnumeration* U_EXPORT2
+U_STABLE UEnumeration* U_EXPORT2
ucol_openAvailableLocales(UErrorCode *status);
#endif
* @param status input-output error code
* @return a string enumeration over locale strings. The caller is
* responsible for closing the result.
- * @draft ICU 3.0
+ * @stable ICU 3.0
*/
-U_DRAFT UEnumeration* U_EXPORT2
+U_STABLE UEnumeration* U_EXPORT2
ucol_getKeywords(UErrorCode *status);
/**
* @param status input-output error code
* @return a string enumeration over collation keyword values, or NULL
* upon error. The caller is responsible for closing the result.
- * @draft ICU 3.0
+ * @stable ICU 3.0
*/
-U_DRAFT UEnumeration* U_EXPORT2
+U_STABLE UEnumeration* U_EXPORT2
ucol_getKeywordValues(const char *keyword, UErrorCode *status);
/**
* applications who wish to cache collators, or otherwise reuse
* collators when possible. The functional equivalent may change
* over time. For more information, please see the <a
- * href="http://oss.software.ibm.com/icu/userguide/locale.html#services">
+ * href="http://icu.sourceforge.net/userguide/locale.html#services">
* Locales and Services</a> section of the ICU User Guide.
* @param result fillin for the functionally equivalent locale
* @param resultCapacity capacity of the fillin buffer
* @return the actual buffer size needed for the locale. If greater
* than resultCapacity, the returned full name will be truncated and
* an error code will be returned.
- * @draft ICU 3.0
+ * @stable ICU 3.0
*/
-U_DRAFT int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
const char* keyword, const char* locale,
UBool* isAvailable, UErrorCode* status);
* This string will be normalized.
* The structure and the syntax of the string is defined in the "Naming collators"
* section of the users guide:
- * http://oss.software.ibm.com/icu/userguide/Collate_Concepts.html#Naming_Collators
+ * http://icu.sourceforge.net/userguide/Collate_Concepts.html#Naming_Collators
* This API supports preflighting.
* @param coll a collator
* @param locale a locale that will appear as a collators locale in the resulting
* @return length of the resulting string
* @see ucol_openFromShortString
* @see ucol_normalizeShortDefinitionString
- * @draft ICU 3.0
+ * @stable ICU 3.0
*/
-U_CAPI int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
ucol_getShortDefinitionString(const UCollator *coll,
const char *locale,
char *buffer,
* @see ucol_openFromShortString
* @see ucol_getShortDefinitionString
*
- * @draft ICU 3.0
+ * @stable ICU 3.0
*/
-U_CAPI int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
ucol_normalizeShortDefinitionString(const char *source,
char *destination,
int32_t capacity,
* UCA version number (3.1.1, 4.0).
* @param coll The UCollator to query.
* @param info the version # information, the result will be filled in
- * @draft ICU 2.8
+ * @stable ICU 2.8
*/
-U_DRAFT void U_EXPORT2
+U_STABLE void U_EXPORT2
ucol_getUCAVersion(const UCollator* coll, UVersionInfo info);
/**
* @return real locale name from which the collation data comes.
* If the collator was instantiated from rules, returns
* NULL.
- * @draft ICU 2.8 likely to change in ICU 3.0, based on feedback
+ * @stable ICU 2.8
*/
-U_DRAFT const char * U_EXPORT2
+U_STABLE const char * U_EXPORT2
ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
/**
U_STABLE USet * U_EXPORT2
ucol_getTailoredSet(const UCollator *coll, UErrorCode *status);
+#ifndef U_HIDE_INTERNAL_API
/**
* Returned by ucol_collatorToIdentifier to signify that collator is
* not encodable as an identifier.
* @internal ICU 3.0
*/
#define UCOL_SIT_COLLATOR_NOT_ENCODABLE 0x80000000
+#endif /* U_HIDE_INTERNAL_API */
/**
* Get a 31-bit identifier given a collator.
ucol_equals(const UCollator *source, const UCollator *target);
/** Calculates the set of unsafe code points, given a collator.
+ * A character is unsafe if you could append any character and cause the ordering to alter significantly.
+ * Collation sorts in normalized order, so anything that rearranges in normalization can cause this.
+ * Thus if you have a character like a_umlaut, and you add a lower_dot to it,
+ * then it normalizes to a_lower_dot + umlaut, and sorts differently.
* @param coll Collator
* @param unsafe a fill-in set to receive the unsafe points
* @param status for catching errors
USet *unsafe,
UErrorCode *status);
+/** Reset UCA's static pointers. You don't want to use this, unless your static memory can go away.
+ * @internal ICU 3.2.1
+ */
+U_INTERNAL void U_EXPORT2
+ucol_forgetUCA(void);
+
+/** Touches all resources needed for instantiating a collator from a short string definition,
+ * thus filling up the cache.
+ * @param definition A short string containing a locale and a set of attributes.
+ * Attributes not explicitly mentioned are left at the default
+ * state for a locale.
+ * @param parseError if not NULL, structure that will get filled with error's pre
+ * and post context in case of error.
+ * @param forceDefaults if FALSE, the settings that are the same as the collator
+ * default settings will not be applied (for example, setting
+ * French secondary on a French collator would not be executed).
+ * If TRUE, all the settings will be applied regardless of the
+ * collator default value. If the definition
+ * strings are to be cached, should be set to FALSE.
+ * @param status Error code. Apart from regular error conditions connected to
+ * instantiating collators (like out of memory or similar), this
+ * API will return an error if an invalid attribute or attribute/value
+ * combination is specified.
+ * @see ucol_openFromShortString
+ * @internal ICU 3.2.1
+ */
+U_INTERNAL void U_EXPORT2
+ucol_prepareShortStringOpen( const char *definition,
+ UBool forceDefaults,
+ UParseError *parseError,
+ UErrorCode *status);
+
/** Creates a binary image of a collator. This binary image can be stored and
* later used to instantiate a collator using ucol_openBinary.
* This API supports preflighting.
* @param status for catching errors
* @return size of the image
* @see ucol_openBinary
- * @draft ICU 3.2
+ * @stable ICU 3.2
*/
-U_DRAFT int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
ucol_cloneBinary(const UCollator *coll,
uint8_t *buffer, int32_t capacity,
UErrorCode *status);
* @param status for catching errors
* @return newly created collator
* @see ucol_cloneBinary
- * @draft ICU 3.2
+ * @stable ICU 3.2
*/
-U_DRAFT UCollator* U_EXPORT2
+U_STABLE UCollator* U_EXPORT2
ucol_openBinary(const uint8_t *bin, int32_t length,
const UCollator *base,
UErrorCode *status);