X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/9d88c94317aeac5dd26c1dbe8c2112dbe855d2b5..73c04bcfe1096173b00431f0cdc742894b15eef0:/icuSources/i18n/unicode/ucol.h?ds=sidebyside diff --git a/icuSources/i18n/unicode/ucol.h b/icuSources/i18n/unicode/ucol.h index 7ef0cb62..ed4c13c1 100644 --- a/icuSources/i18n/unicode/ucol.h +++ b/icuSources/i18n/unicode/ucol.h @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (c) 1996-2004, International Business Machines Corporation and others. +* Copyright (c) 1996-2006, International Business Machines Corporation and others. * All Rights Reserved. ******************************************************************************* */ @@ -29,11 +29,11 @@ * Important: The ICU collation service has been reimplemented * in order to achieve better performance and UCA compliance. * For details, see the - * + * * collation design document. *

* For more information about the collation service see - * the users guide. + * the users guide. *

* Collation service provides correct sorting orders for most locales supported in ICU. * If specific data for a locale is not available, the orders eventually falls back @@ -41,7 +41,7 @@ *

* Sort ordering may be customized by providing your own set of rules. For more on * this subject see the - * + * * Collation customization section of the users guide. *

* @see UCollationResult @@ -50,15 +50,6 @@ * @see UCollationElements */ -/** A collation element iterator. -* For usage in C programs. -*/ -struct collIterate; -/** structure representing collation element iterator instance - * @stable ICU 2.0 - */ -typedef struct collIterate collIterate; - /** A collator. * For usage in C programs. */ @@ -149,7 +140,7 @@ typedef enum { * Diacritical differences on the same base letter represent a secondary * difference. Set comparison level to UCOL_SECONDARY to ignore tertiary * differences. Use this to set the strength of a Collator object. - * Example of secondary difference, "ä" >> "a". + * Example of secondary difference, "ä" >> "a". * * Uppercase and lowercase versions of the same character represents a * tertiary difference. Set comparison level to UCOL_TERTIARY to include @@ -159,7 +150,7 @@ typedef enum { * * Two characters are considered "identical" when they have the same * unicode spellings. UCOL_IDENTICAL. - * For example, "ä" == "ä". + * For example, "ä" == "ä". * * UCollationStrength is also used to determine the strength of sort keys * generated from UCollator objects @@ -173,12 +164,12 @@ typedef UColAttributeValue UCollationStrength; * @stable ICU 2.0 */ typedef enum { - /** Attribute for direction of secondary weights - used in French.\ + /** Attribute for direction of secondary weights - used in French. * Acceptable values are UCOL_ON, which results in secondary weights * being considered backwards and UCOL_OFF which treats secondary * weights in the order they appear.*/ UCOL_FRENCH_COLLATION, - /** Attribute for handling variable elements.\ + /** Attribute for handling variable elements. * Acceptable values are UCOL_NON_IGNORABLE (default) * which treats all the codepoints with non-ignorable * primary weights in the same way, @@ -187,7 +178,7 @@ typedef enum { * to be ignored on primary level and moved to the quaternary * level.*/ UCOL_ALTERNATE_HANDLING, - /** Controls the ordering of upper and lower case letters.\ + /** Controls the ordering of upper and lower case letters. * Acceptable values are UCOL_OFF (default), which orders * upper and lower case letters in accordance to their tertiary * weights, UCOL_UPPER_FIRST which forces upper case letters to @@ -195,45 +186,41 @@ typedef enum { * the opposite. */ UCOL_CASE_FIRST, /** Controls whether an extra case level (positioned before the third - * level) is generated or not.\ Acceptable values are UCOL_OFF (default), + * level) is generated or not. Acceptable values are UCOL_OFF (default), * when case level is not generated, and UCOL_ON which causes the case - * level to be generated.\ Contents of the case level are affected by - * the value of UCOL_CASE_FIRST attribute.\ A simple way to ignore + * level to be generated. Contents of the case level are affected by + * the value of UCOL_CASE_FIRST attribute. A simple way to ignore * accent differences in a string is to set the strength to UCOL_PRIMARY * and enable case level. */ UCOL_CASE_LEVEL, /** Controls whether the normalization check and necessary normalizations - * are performed.\ When set to UCOL_OFF (default) no normalization check - * is performed.\ The correctness of the result is guaranteed only if the - * input data is in so-called FCD form (see users manual for more info).\ - * When set to UCOL_ON, an incremental check is performed to see whether the input data - * is in the FCD form.\ If the data is not in the FCD form, incremental - * NFD normalization is performed. */ + * are performed. When set to UCOL_OFF (default) no normalization check + * is performed. The correctness of the result is guaranteed only if the + * input data is in so-called FCD form (see users manual for more info). + * When set to UCOL_ON, an incremental check is performed to see whether + * the input data is in the FCD form. If the data is not in the FCD form, + * incremental NFD normalization is performed. */ UCOL_NORMALIZATION_MODE, /** An alias for UCOL_NORMALIZATION_MODE attribute */ UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE, - /** The strength attribute.\ Can be either UCOL_PRIMARY, UCOL_SECONDARY, - * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL.\ The usual strength - * for most locales (except Japanese) is tertiary.\ Quaternary strength + /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY, + * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength + * for most locales (except Japanese) is tertiary. Quaternary strength * is useful when combined with shifted setting for alternate handling * attribute and for JIS x 4061 collation, when it is used to distinguish * between Katakana and Hiragana (this is achieved by setting the - * UCOL_HIRAGANA_QUATERNARY mode to on.\ Otherwise, quaternary level + * UCOL_HIRAGANA_QUATERNARY mode to on. Otherwise, quaternary level * is affected only by the number of non ignorable code points in - * the string.\ Identical strength is rarely useful, as it amounts + * the string. Identical strength is rarely useful, as it amounts * to codepoints of the NFD form of the string. */ UCOL_STRENGTH, - /** when turned on, this attribute - * positions Hiragana before all - * non-ignorables on quaternary level - * This is a sneaky way to produce JIS - * sort order */ + /** When turned on, this attribute positions Hiragana before all + * non-ignorables on quaternary level This is a sneaky way to produce JIS + * sort order */ UCOL_HIRAGANA_QUATERNARY_MODE, - /** when turned on, this attribute - * generates a collation key - * for the numeric value of substrings - * of digits. This is a way to get '100' - * to sort AFTER '2'.*/ + /** When turned on, this attribute generates a collation key + * for the numeric value of substrings of digits. + * This is a way to get '100' to sort AFTER '2'. */ UCOL_NUMERIC_COLLATION, UCOL_ATTRIBUTE_COUNT } UColAttribute; @@ -286,7 +273,7 @@ ucol_open(const char *loc, UErrorCode *status); * occurred during parsing. This argument can currently be set * to NULL, but at users own risk. Please provide a real structure. * @param status A pointer to an UErrorCode to receive any errors - * @return A pointer to a UCollator.\ It is not guaranteed that NULL be returned in case + * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case * of error - please use status argument to check for errors. * @see ucol_open * @see ucol_safeClone @@ -305,7 +292,7 @@ ucol_openRules( const UChar *rules, * Open a collator defined by a short form string. * The structure and the syntax of the string is defined in the "Naming collators" * section of the users guide: - * http://oss.software.ibm.com/icu/userguide/Collate_Concepts.html#Naming_Collators + * http://icu.sourceforge.net/userguide/Collate_Concepts.html#Naming_Collators * Attributes are overriden by the subsequent attributes. So, for "S2_S3", final * strength will be 3. 3066bis locale overrides individual locale parts. * The call to this function is equivalent to a call to ucol_open, followed by a @@ -332,10 +319,10 @@ ucol_openRules( const UChar *rules, * @see ucol_setVariableTop * @see ucol_getShortDefinitionString * @see ucol_normalizeShortDefinitionString - * @draft ICU 3.0 + * @stable ICU 3.0 * */ -U_CAPI UCollator* U_EXPORT2 +U_STABLE UCollator* U_EXPORT2 ucol_openFromShortString( const char *definition, UBool forceDefaults, UParseError *parseError, @@ -352,18 +339,33 @@ ucol_openFromShortString( const char *definition, * @param status to hold the error code * @return the size of the contraction set * - * @draft ICU 3.0 + * @deprecated ICU 3.4, use ucol_getContractionsAndExpansions instead */ -U_CAPI int32_t U_EXPORT2 +U_DEPRECATED int32_t U_EXPORT2 ucol_getContractions( const UCollator *coll, USet *conts, UErrorCode *status); +/** + * Get a set containing the expansions defined by the collator. The set includes + * both the UCA expansions and the expansions defined by the tailoring + * @param coll collator + * @param contractions if not NULL, the set to hold the contractions + * @param expansions if not NULL, the set to hold the expansions + * @param addPrefixes add the prefix contextual elements to contractions + * @param status to hold the error code + * + * @draft ICU 3.4 + */ +U_DRAFT void U_EXPORT2 +ucol_getContractionsAndExpansions( const UCollator *coll, + USet *contractions, USet *expansions, + UBool addPrefixes, UErrorCode *status); /** * Close a UCollator. - * Once closed, a UCollator should not be used.\ Every open collator should - * be closed.\ Otherwise, a memory leak will result. + * Once closed, a UCollator should not be used. Every open collator should + * be closed. Otherwise, a memory leak will result. * @param coll The UCollator to close. * @see ucol_open * @see ucol_openRules @@ -544,9 +546,9 @@ ucol_countAvailable(void); * @param status input-output error code * @return a string enumeration over locale strings. The caller is * responsible for closing the result. - * @draft ICU 3.0 + * @stable ICU 3.0 */ -U_DRAFT UEnumeration* U_EXPORT2 +U_STABLE UEnumeration* U_EXPORT2 ucol_openAvailableLocales(UErrorCode *status); #endif @@ -557,9 +559,9 @@ ucol_openAvailableLocales(UErrorCode *status); * @param status input-output error code * @return a string enumeration over locale strings. The caller is * responsible for closing the result. - * @draft ICU 3.0 + * @stable ICU 3.0 */ -U_DRAFT UEnumeration* U_EXPORT2 +U_STABLE UEnumeration* U_EXPORT2 ucol_getKeywords(UErrorCode *status); /** @@ -571,9 +573,9 @@ ucol_getKeywords(UErrorCode *status); * @param status input-output error code * @return a string enumeration over collation keyword values, or NULL * upon error. The caller is responsible for closing the result. - * @draft ICU 3.0 + * @stable ICU 3.0 */ -U_DRAFT UEnumeration* U_EXPORT2 +U_STABLE UEnumeration* U_EXPORT2 ucol_getKeywordValues(const char *keyword, UErrorCode *status); /** @@ -589,7 +591,7 @@ ucol_getKeywordValues(const char *keyword, UErrorCode *status); * applications who wish to cache collators, or otherwise reuse * collators when possible. The functional equivalent may change * over time. For more information, please see the + * href="http://icu.sourceforge.net/userguide/locale.html#services"> * Locales and Services section of the ICU User Guide. * @param result fillin for the functionally equivalent locale * @param resultCapacity capacity of the fillin buffer @@ -604,9 +606,9 @@ ucol_getKeywordValues(const char *keyword, UErrorCode *status); * @return the actual buffer size needed for the locale. If greater * than resultCapacity, the returned full name will be truncated and * an error code will be returned. - * @draft ICU 3.0 + * @stable ICU 3.0 */ -U_DRAFT int32_t U_EXPORT2 +U_STABLE int32_t U_EXPORT2 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, const char* keyword, const char* locale, UBool* isAvailable, UErrorCode* status); @@ -629,7 +631,7 @@ ucol_getRules( const UCollator *coll, * This string will be normalized. * The structure and the syntax of the string is defined in the "Naming collators" * section of the users guide: - * http://oss.software.ibm.com/icu/userguide/Collate_Concepts.html#Naming_Collators + * http://icu.sourceforge.net/userguide/Collate_Concepts.html#Naming_Collators * This API supports preflighting. * @param coll a collator * @param locale a locale that will appear as a collators locale in the resulting @@ -641,9 +643,9 @@ ucol_getRules( const UCollator *coll, * @return length of the resulting string * @see ucol_openFromShortString * @see ucol_normalizeShortDefinitionString - * @draft ICU 3.0 + * @stable ICU 3.0 */ -U_CAPI int32_t U_EXPORT2 +U_STABLE int32_t U_EXPORT2 ucol_getShortDefinitionString(const UCollator *coll, const char *locale, char *buffer, @@ -667,10 +669,10 @@ ucol_getShortDefinitionString(const UCollator *coll, * @see ucol_openFromShortString * @see ucol_getShortDefinitionString * - * @draft ICU 3.0 + * @stable ICU 3.0 */ -U_CAPI int32_t U_EXPORT2 +U_STABLE int32_t U_EXPORT2 ucol_normalizeShortDefinitionString(const char *source, char *destination, int32_t capacity, @@ -804,9 +806,9 @@ ucol_getVersion(const UCollator* coll, UVersionInfo info); * UCA version number (3.1.1, 4.0). * @param coll The UCollator to query. * @param info the version # information, the result will be filled in - * @draft ICU 2.8 + * @stable ICU 2.8 */ -U_DRAFT void U_EXPORT2 +U_STABLE void U_EXPORT2 ucol_getUCAVersion(const UCollator* coll, UVersionInfo info); /** @@ -992,9 +994,9 @@ ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *statu * @return real locale name from which the collation data comes. * If the collator was instantiated from rules, returns * NULL. - * @draft ICU 2.8 likely to change in ICU 3.0, based on feedback + * @stable ICU 2.8 */ -U_DRAFT const char * U_EXPORT2 +U_STABLE const char * U_EXPORT2 ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); /** @@ -1010,12 +1012,14 @@ ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode U_STABLE USet * U_EXPORT2 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status); +#ifndef U_HIDE_INTERNAL_API /** * Returned by ucol_collatorToIdentifier to signify that collator is * not encodable as an identifier. * @internal ICU 3.0 */ #define UCOL_SIT_COLLATOR_NOT_ENCODABLE 0x80000000 +#endif /* U_HIDE_INTERNAL_API */ /** * Get a 31-bit identifier given a collator. @@ -1127,6 +1131,10 @@ U_INTERNAL UBool U_EXPORT2 ucol_equals(const UCollator *source, const UCollator *target); /** Calculates the set of unsafe code points, given a collator. + * A character is unsafe if you could append any character and cause the ordering to alter significantly. + * Collation sorts in normalized order, so anything that rearranges in normalization can cause this. + * Thus if you have a character like a_umlaut, and you add a lower_dot to it, + * then it normalizes to a_lower_dot + umlaut, and sorts differently. * @param coll Collator * @param unsafe a fill-in set to receive the unsafe points * @param status for catching errors @@ -1138,6 +1146,38 @@ ucol_getUnsafeSet( const UCollator *coll, USet *unsafe, UErrorCode *status); +/** Reset UCA's static pointers. You don't want to use this, unless your static memory can go away. + * @internal ICU 3.2.1 + */ +U_INTERNAL void U_EXPORT2 +ucol_forgetUCA(void); + +/** Touches all resources needed for instantiating a collator from a short string definition, + * thus filling up the cache. + * @param definition A short string containing a locale and a set of attributes. + * Attributes not explicitly mentioned are left at the default + * state for a locale. + * @param parseError if not NULL, structure that will get filled with error's pre + * and post context in case of error. + * @param forceDefaults if FALSE, the settings that are the same as the collator + * default settings will not be applied (for example, setting + * French secondary on a French collator would not be executed). + * If TRUE, all the settings will be applied regardless of the + * collator default value. If the definition + * strings are to be cached, should be set to FALSE. + * @param status Error code. Apart from regular error conditions connected to + * instantiating collators (like out of memory or similar), this + * API will return an error if an invalid attribute or attribute/value + * combination is specified. + * @see ucol_openFromShortString + * @internal ICU 3.2.1 + */ +U_INTERNAL void U_EXPORT2 +ucol_prepareShortStringOpen( const char *definition, + UBool forceDefaults, + UParseError *parseError, + UErrorCode *status); + /** Creates a binary image of a collator. This binary image can be stored and * later used to instantiate a collator using ucol_openBinary. * This API supports preflighting. @@ -1147,9 +1187,9 @@ ucol_getUnsafeSet( const UCollator *coll, * @param status for catching errors * @return size of the image * @see ucol_openBinary - * @draft ICU 3.2 + * @stable ICU 3.2 */ -U_DRAFT int32_t U_EXPORT2 +U_STABLE int32_t U_EXPORT2 ucol_cloneBinary(const UCollator *coll, uint8_t *buffer, int32_t capacity, UErrorCode *status); @@ -1169,9 +1209,9 @@ ucol_cloneBinary(const UCollator *coll, * @param status for catching errors * @return newly created collator * @see ucol_cloneBinary - * @draft ICU 3.2 + * @stable ICU 3.2 */ -U_DRAFT UCollator* U_EXPORT2 +U_STABLE UCollator* U_EXPORT2 ucol_openBinary(const uint8_t *bin, int32_t length, const UCollator *base, UErrorCode *status);