ICU-57163.0.1.tar.gz

[apple/icu.git] / icuSources / common / unicode / unorm.h
diff --git a/icuSources/common/unicode/unorm.h b/icuSources/common/unicode/unorm.h

index 31c522451164b28d646843238e58dae433a33043..ab3e34e84b3add1e019eeeffd8f94f27244fc772 100644 (file)
--- a/icuSources/common/unicode/unorm.h
+++ b/icuSources/common/unicode/unorm.h
@@ -1,6 +1,6 @@
  /*
  *******************************************************************************
-* Copyright (c) 1996-2005, International Business Machines Corporation
+* Copyright (c) 1996-2016, International Business Machines Corporation
  *               and others. All Rights Reserved.
  *******************************************************************************
  * File unorm.h
@@ -20,12 +20,21 @@
  #if !UCONFIG_NO_NORMALIZATION
  
  #include "unicode/uiter.h"
+#include "unicode/unorm2.h"
+
+#ifndef U_HIDE_DEPRECATED_API
  
  /**
   * \file
- * \brief C API: Unicode Normalization 
+ * \brief C API: Unicode Normalization
+ *
+ * Old Unicode normalization API.
   *
- * <h2>Unicode normalization API</h2>
+ * This API has been replaced by the unorm2.h API and is only available
+ * for backward compatibility. The functions here simply delegate to the
+ * unorm2.h functions, for example unorm2_getInstance() and unorm2_normalize().
+ * There is one exception: The new API does not provide a replacement for unorm_compare().
+ * Its declaration has been moved to unorm2.h.
   *
   * <code>unorm_normalize</code> transforms Unicode text into an equivalent composed or
   * decomposed form, allowing for easier sorting and searching of text.
@@ -51,7 +60,7 @@
   *
   * To a user of your program, however, both of these sequences should be
   * treated as the same "user-level" character "A with acute accent".  When you are searching or
- * comparing text, you must ensure that these two sequences are treated 
+ * comparing text, you must ensure that these two sequences are treated
   * equivalently.  In addition, you must handle characters with more than one
   * accent.  Sometimes the order of a character's combining accents is
   * significant, while in other cases accent sequences in different orders are
@@ -79,8 +88,8 @@
   * will often want to use these mappings.
   *
   * <code>unorm_normalize</code> helps solve these problems by transforming text into the
- * canonical composed and decomposed forms as shown in the first example above.  
- * In addition, you can have it perform compatibility decompositions so that 
+ * canonical composed and decomposed forms as shown in the first example above.
+ * In addition, you can have it perform compatibility decompositions so that
   * you can treat compatibility characters the same as their equivalents.
   * Finally, <code>unorm_normalize</code> rearranges accents into the proper canonical
   * order, so that you do not have to worry about accent rearrangement on your
@@ -106,7 +115,7 @@
   * unorm_normalize(UNORM_FCD) may be implemented with UNORM_NFD.
   *
   * For more details on FCD see the collation design document:
- * http://dev.icu-project.org/cgi-bin/viewcvs.cgi/~checkout~/icuhtml/design/collation/ICU_collation_design.htm
+ * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm
   *
   * ICU collation performs either NFD or FCD normalization automatically if normalization
   * is turned on for the collator object.
@@ -122,25 +131,25 @@
  
  /**
   * Constants for normalization modes.
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use unorm2.h instead.
   */
  typedef enum {
-  /** No decomposition/composition. @stable ICU 2.0 */
-  UNORM_NONE = 1, 
-  /** Canonical decomposition. @stable ICU 2.0 */
+  /** No decomposition/composition. @deprecated ICU 56 Use unorm2.h instead. */
+  UNORM_NONE = 1,
+  /** Canonical decomposition. @deprecated ICU 56 Use unorm2.h instead. */
    UNORM_NFD = 2,
-  /** Compatibility decomposition. @stable ICU 2.0 */
+  /** Compatibility decomposition. @deprecated ICU 56 Use unorm2.h instead. */
    UNORM_NFKD = 3,
-  /** Canonical decomposition followed by canonical composition. @stable ICU 2.0 */
+  /** Canonical decomposition followed by canonical composition. @deprecated ICU 56 Use unorm2.h instead. */
    UNORM_NFC = 4,
-  /** Default normalization. @stable ICU 2.0 */
-  UNORM_DEFAULT = UNORM_NFC, 
-  /** Compatibility decomposition followed by canonical composition. @stable ICU 2.0 */
+  /** Default normalization. @deprecated ICU 56 Use unorm2.h instead. */
+  UNORM_DEFAULT = UNORM_NFC,
+  /** Compatibility decomposition followed by canonical composition. @deprecated ICU 56 Use unorm2.h instead. */
    UNORM_NFKC =5,
-  /** "Fast C or D" form. @stable ICU 2.0 */
+  /** "Fast C or D" form. @deprecated ICU 56 Use unorm2.h instead. */
    UNORM_FCD = 6,
  
-  /** One more than the highest normalization mode constant. @stable ICU 2.0 */
+  /** One more than the highest normalization mode constant. @deprecated ICU 56 Use unorm2.h instead. */
    UNORM_MODE_COUNT
  } UNormalizationMode;
  
@@ -149,14 +158,14 @@ typedef enum {
   * Use 0 for default options,
   * including normalization according to the Unicode version
   * that is currently supported by ICU (see u_getUnicodeVersion).
- * @stable ICU 2.6
+ * @deprecated ICU 56 Use unorm2.h instead.
   */
  enum {
      /**
       * Options bit set value to select Unicode 3.2 normalization
       * (except NormalizationCorrections).
       * At most one Unicode version can be selected at a time.
-     * @stable ICU 2.6
+     * @deprecated ICU 56 Use unorm2.h instead.
       */
      UNORM_UNICODE_3_2=0x20
  };
@@ -174,7 +183,7 @@ enum {
   * internal normalization functions.)
   *
   * @see unorm_compare
- * @stable ICU 2.6
+ * @deprecated ICU 56 Use unorm2.h instead.
   */
  #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
  
@@ -182,10 +191,11 @@ enum {
   * Normalize a string.
   * The string will be normalized according the specified normalization mode
   * and options.
+ * The source and result buffers must not be the same, nor overlap.
   *
   * @param source The string to normalize.
   * @param sourceLength The length of source, or -1 if NUL-terminated.
- * @param mode The normalization mode; one of UNORM_NONE, 
+ * @param mode The normalization mode; one of UNORM_NONE,
   *             UNORM_NFD, UNORM_NFC, UNORM_NFKC, UNORM_NFKD, UNORM_DEFAULT.
   * @param options The normalization options, ORed together (0 for no options).
   * @param result A pointer to a buffer to receive the result string.
@@ -194,44 +204,23 @@ enum {
   * @param status A pointer to a UErrorCode to receive any errors.
   * @return The total buffer size needed; if greater than resultLength,
   *         the output was truncated, and the error code is set to U_BUFFER_OVERFLOW_ERROR.
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use unorm2.h instead.
   */
-U_STABLE int32_t U_EXPORT2 
+U_STABLE int32_t U_EXPORT2
  unorm_normalize(const UChar *source, int32_t sourceLength,
                  UNormalizationMode mode, int32_t options,
                  UChar *result, int32_t resultLength,
                  UErrorCode *status);
-#endif
-/**
- * Result values for unorm_quickCheck().
- * For details see Unicode Technical Report 15.
- * @stable ICU 2.0
- */
-typedef enum UNormalizationCheckResult {
-  /** 
-   * Indicates that string is not in the normalized format
-   */
-  UNORM_NO,
-  /** 
-   * Indicates that string is in the normalized format
-   */
-  UNORM_YES,
-  /** 
-   * Indicates that string cannot be determined if it is in the normalized 
-   * format without further thorough checks.
-   */
-  UNORM_MAYBE
-} UNormalizationCheckResult;
-#if !UCONFIG_NO_NORMALIZATION
+
  /**
- * Performing quick check on a string, to quickly determine if the string is 
+ * Performing quick check on a string, to quickly determine if the string is
   * in a particular normalization format.
   * Three types of result can be returned UNORM_YES, UNORM_NO or
   * UNORM_MAYBE. Result UNORM_YES indicates that the argument
   * string is in the desired normalized format, UNORM_NO determines that
- * argument string is not in the desired normalized format. A 
- * UNORM_MAYBE result indicates that a more thorough check is required, 
- * the user may have to put the string in its normalized form and compare the 
+ * argument string is not in the desired normalized format. A
+ * UNORM_MAYBE result indicates that a more thorough check is required,
+ * the user may have to put the string in its normalized form and compare the
   * results.
   *
   * @param source       string for determining if it is in a normalized format
@@ -241,7 +230,7 @@ typedef enum UNormalizationCheckResult {
   * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
   *
   * @see unorm_isNormalized
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use unorm2.h instead.
   */
  U_STABLE UNormalizationCheckResult U_EXPORT2
  unorm_quickCheck(const UChar *source, int32_t sourcelength,
@@ -262,10 +251,10 @@ unorm_quickCheck(const UChar *source, int32_t sourcelength,
   *
   * @see unorm_quickCheck
   * @see unorm_isNormalized
- * @stable ICU 2.6
+ * @deprecated ICU 56 Use unorm2.h instead.
   */
  U_STABLE UNormalizationCheckResult U_EXPORT2
-unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength, 
+unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength,
                              UNormalizationMode mode, int32_t options,
                              UErrorCode *pErrorCode);
  
@@ -288,7 +277,7 @@ unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength,
   *         "mode" normalization form.
   *
   * @see unorm_quickCheck
- * @stable ICU 2.2
+ * @deprecated ICU 56 Use unorm2.h instead.
   */
  U_STABLE UBool U_EXPORT2
  unorm_isNormalized(const UChar *src, int32_t srcLength,
@@ -310,7 +299,7 @@ unorm_isNormalized(const UChar *src, int32_t srcLength,
   *
   * @see unorm_quickCheck
   * @see unorm_isNormalized
- * @stable ICU 2.6
+ * @deprecated ICU 56 Use unorm2.h instead.
   */
  U_STABLE UBool U_EXPORT2
  unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength,
@@ -388,7 +377,7 @@ unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength,
   * @see unorm_previous
   * @see unorm_normalize
   *
- * @stable ICU 2.1
+ * @deprecated ICU 56 Use unorm2.h instead.
   */
  U_STABLE int32_t U_EXPORT2
  unorm_next(UCharIterator *src,
@@ -421,7 +410,7 @@ unorm_next(UCharIterator *src,
   * @see unorm_next
   * @see unorm_normalize
   *
- * @stable ICU 2.1
+ * @deprecated ICU 56 Use unorm2.h instead.
   */
  U_STABLE int32_t U_EXPORT2
  unorm_previous(UCharIterator *src,
@@ -451,7 +440,7 @@ unorm_previous(UCharIterator *src,
   *
   * @param left Left source string, may be same as dest.
   * @param leftLength Length of left source string, or -1 if NUL-terminated.
- * @param right Right source string.
+ * @param right Right source string. Must not be the same as dest, nor overlap.
   * @param rightLength Length of right source string, or -1 if NUL-terminated.
   * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
   * @param destCapacity The number of UChars that fit into dest.
@@ -465,7 +454,7 @@ unorm_previous(UCharIterator *src,
   * @see unorm_next
   * @see unorm_previous
   *
- * @stable ICU 2.1
+ * @deprecated ICU 56 Use unorm2.h instead.
   */
  U_STABLE int32_t U_EXPORT2
  unorm_concatenate(const UChar *left, int32_t leftLength,
@@ -474,102 +463,6 @@ unorm_concatenate(const UChar *left, int32_t leftLength,
                    UNormalizationMode mode, int32_t options,
                    UErrorCode *pErrorCode);
  
-/**
- * Option bit for unorm_compare:
- * Both input strings are assumed to fulfill FCD conditions.
- * @stable ICU 2.2
- */
-#define UNORM_INPUT_IS_FCD          0x20000
-
-/**
- * Option bit for unorm_compare:
- * Perform case-insensitive comparison.
- * @stable ICU 2.2
- */
-#define U_COMPARE_IGNORE_CASE       0x10000
-
-#ifndef U_COMPARE_CODE_POINT_ORDER
-/* see also unistr.h and ustring.h */
-/**
- * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
- * Compare strings in code point order instead of code unit order.
- * @stable ICU 2.2
- */
-#define U_COMPARE_CODE_POINT_ORDER  0x8000
-#endif
-
-/**
- * Compare two strings for canonical equivalence.
- * Further options include case-insensitive comparison and
- * code point order (as opposed to code unit order).
- *
- * Canonical equivalence between two strings is defined as their normalized
- * forms (NFD or NFC) being identical.
- * This function compares strings incrementally instead of normalizing
- * (and optionally case-folding) both strings entirely,
- * improving performance significantly.
- *
- * Bulk normalization is only necessary if the strings do not fulfill the FCD
- * conditions. Only in this case, and only if the strings are relatively long,
- * is memory allocated temporarily.
- * For FCD strings and short non-FCD strings there is no memory allocation.
- *
- * Semantically, this is equivalent to
- *   strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2))))
- * where code point order and foldCase are all optional.
- *
- * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
- * the case folding must be performed first, then the normalization.
- *
- * @param s1 First source string.
- * @param length1 Length of first source string, or -1 if NUL-terminated.
- *
- * @param s2 Second source string.
- * @param length2 Length of second source string, or -1 if NUL-terminated.
- *
- * @param options A bit set of options:
- *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
- *     Case-sensitive comparison in code unit order, and the input strings
- *     are quick-checked for FCD.
- *
- *   - UNORM_INPUT_IS_FCD
- *     Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
- *     If not set, the function will quickCheck for FCD
- *     and normalize if necessary.
- *
- *   - U_COMPARE_CODE_POINT_ORDER
- *     Set to choose code point order instead of code unit order
- *     (see u_strCompare for details).
- *
- *   - U_COMPARE_IGNORE_CASE
- *     Set to compare strings case-insensitively using case folding,
- *     instead of case-sensitively.
- *     If set, then the following case folding options are used.
- *
- *   - Options as used with case-insensitive comparisons, currently:
- *
- *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
- *    (see u_strCaseCompare for details)
- *
- *   - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
- *
- * @param pErrorCode ICU error code in/out parameter.
- *                   Must fulfill U_SUCCESS before the function call.
- * @return <0 or 0 or >0 as usual for string comparisons
- *
- * @see unorm_normalize
- * @see UNORM_FCD
- * @see u_strCompare
- * @see u_strCaseCompare
- *
- * @stable ICU 2.2
- */
-U_STABLE int32_t U_EXPORT2
-unorm_compare(const UChar *s1, int32_t length1,
-              const UChar *s2, int32_t length2,
-              uint32_t options,
-              UErrorCode *pErrorCode);
-
+#endif  /* U_HIDE_DEPRECATED_API */
  #endif /* #if !UCONFIG_NO_NORMALIZATION */
-
  #endif