X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b25be06635768807f8f693286fa73bb2297bb06c..4388f060552cc537e71e957d32f35e9d75a61233:/icuSources/common/unicode/normalizer2.h diff --git a/icuSources/common/unicode/normalizer2.h b/icuSources/common/unicode/normalizer2.h index f448cc45..bfca5ab8 100644 --- a/icuSources/common/unicode/normalizer2.h +++ b/icuSources/common/unicode/normalizer2.h @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2009-2010, International Business Machines +* Copyright (C) 2009-2012, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -77,6 +77,84 @@ U_NAMESPACE_BEGIN */ class U_COMMON_API Normalizer2 : public UObject { public: + /** + * Destructor. + * @stable ICU 4.4 + */ + ~Normalizer2(); + +#ifndef U_HIDE_DRAFT_API + /** + * Returns a Normalizer2 instance for Unicode NFC normalization. + * Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @draft ICU 49 + */ + static const Normalizer2 * + getNFCInstance(UErrorCode &errorCode); + + /** + * Returns a Normalizer2 instance for Unicode NFD normalization. + * Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @draft ICU 49 + */ + static const Normalizer2 * + getNFDInstance(UErrorCode &errorCode); + + /** + * Returns a Normalizer2 instance for Unicode NFKC normalization. + * Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @draft ICU 49 + */ + static const Normalizer2 * + getNFKCInstance(UErrorCode &errorCode); + + /** + * Returns a Normalizer2 instance for Unicode NFKD normalization. + * Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @draft ICU 49 + */ + static const Normalizer2 * + getNFKDInstance(UErrorCode &errorCode); + + /** + * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization. + * Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @draft ICU 49 + */ + static const Normalizer2 * + getNFKCCasefoldInstance(UErrorCode &errorCode); +#endif /* U_HIDE_DRAFT_API */ + /** * Returns a Normalizer2 instance which uses the specified data file * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) @@ -175,18 +253,77 @@ public: UErrorCode &errorCode) const = 0; /** - * Gets the decomposition mapping of c. Equivalent to normalize(UnicodeString(c)) - * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster. + * Gets the decomposition mapping of c. + * Roughly equivalent to normalizing the String form of c + * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function + * returns FALSE and does not write a string + * if c does not have a decomposition mapping in this instance's data. * This function is independent of the mode of the Normalizer2. * @param c code point * @param decomposition String object which will be set to c's * decomposition mapping, if there is one. * @return TRUE if c has a decomposition, otherwise FALSE - * @draft ICU 4.6 + * @stable ICU 4.6 */ virtual UBool getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0; + /** + * Gets the raw decomposition mapping of c. + * + * This is similar to the getDecomposition() method but returns the + * raw decomposition mapping as specified in UnicodeData.txt or + * (for custom data) in the mapping files processed by the gennorm2 tool. + * By contrast, getDecomposition() returns the processed, + * recursively-decomposed version of this mapping. + * + * When used on a standard NFKC Normalizer2 instance, + * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property. + * + * When used on a standard NFC Normalizer2 instance, + * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can); + * in this case, the result contains either one or two code points (=1..4 UChars). + * + * This function is independent of the mode of the Normalizer2. + * The default implementation returns FALSE. + * @param c code point + * @param decomposition String object which will be set to c's + * raw decomposition mapping, if there is one. + * @return TRUE if c has a decomposition, otherwise FALSE + * @draft ICU 49 + */ + virtual UBool + getRawDecomposition(UChar32 c, UnicodeString &decomposition) const; + + /** + * Performs pairwise composition of a & b and returns the composite if there is one. + * + * Returns a composite code point c only if c has a two-way mapping to a+b. + * In standard Unicode normalization, this means that + * c has a canonical decomposition to a+b + * and c does not have the Full_Composition_Exclusion property. + * + * This function is independent of the mode of the Normalizer2. + * The default implementation returns a negative value. + * @param a A (normalization starter) code point. + * @param b Another code point. + * @return The non-negative composite code point if there is one; otherwise a negative value. + * @draft ICU 49 + */ + virtual UChar32 + composePair(UChar32 a, UChar32 b) const; + + /** + * Gets the combining class of c. + * The default implementation returns 0 + * but all standard implementations return the Unicode Canonical_Combining_Class value. + * @param c code point + * @return c's combining class + * @draft ICU 49 + */ + virtual uint8_t + getCombiningClass(UChar32 c) const; + /** * Tests if the string is normalized. * Internally, in cases where the quickCheck() method would return "maybe" @@ -324,6 +461,12 @@ public: FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) : norm2(n2), set(filterSet) {} + /** + * Destructor. + * @stable ICU 4.4 + */ + ~FilteredNormalizer2(); + /** * Writes the normalized form of the source string to the destination string * (replacing its contents) and returns the destination string. @@ -379,18 +522,57 @@ public: UErrorCode &errorCode) const; /** - * Gets the decomposition mapping of c. Equivalent to normalize(UnicodeString(c)) - * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster. + * Gets the decomposition mapping of c. + * For details see the base class documentation. + * * This function is independent of the mode of the Normalizer2. * @param c code point * @param decomposition String object which will be set to c's * decomposition mapping, if there is one. * @return TRUE if c has a decomposition, otherwise FALSE - * @draft ICU 4.6 + * @stable ICU 4.6 */ virtual UBool getDecomposition(UChar32 c, UnicodeString &decomposition) const; + /** + * Gets the raw decomposition mapping of c. + * For details see the base class documentation. + * + * This function is independent of the mode of the Normalizer2. + * @param c code point + * @param decomposition String object which will be set to c's + * raw decomposition mapping, if there is one. + * @return TRUE if c has a decomposition, otherwise FALSE + * @draft ICU 49 + */ + virtual UBool + getRawDecomposition(UChar32 c, UnicodeString &decomposition) const; + + /** + * Performs pairwise composition of a & b and returns the composite if there is one. + * For details see the base class documentation. + * + * This function is independent of the mode of the Normalizer2. + * @param a A (normalization starter) code point. + * @param b Another code point. + * @return The non-negative composite code point if there is one; otherwise a negative value. + * @draft ICU 49 + */ + virtual UChar32 + composePair(UChar32 a, UChar32 b) const; + + /** + * Gets the combining class of c. + * The default implementation returns 0 + * but all standard implementations return the Unicode Canonical_Combining_Class value. + * @param c code point + * @return c's combining class + * @draft ICU 49 + */ + virtual uint8_t + getCombiningClass(UChar32 c) const; + /** * Tests if the string is normalized. * For details see the Normalizer2 base class documentation.