ICU-551.51.4.tar.gz

[apple/icu.git] / icuSources / common / unicode / normlzr.h
diff --git a/icuSources/common/unicode/normlzr.h b/icuSources/common/unicode/normlzr.h

index 946f4c9ded41e90fefba631324adf775904b8631..06cbfd477e6735148db1e618b0a2e0455c1d6d23 100644 (file)
--- a/icuSources/common/unicode/normlzr.h
+++ b/icuSources/common/unicode/normlzr.h
@@ -1,7 +1,7 @@
  /*
   ********************************************************************
- * COPYRIGHT: 
- * Copyright (c) 1996-2003, International Business Machines Corporation and
+ * COPYRIGHT:
+ * Copyright (c) 1996-2011, International Business Machines Corporation and
   * others. All Rights Reserved.
   ********************************************************************
   */
@@ -11,19 +11,28 @@
  
  #include "unicode/utypes.h"
  
+/**
+ * \file 
+ * \brief C++ API: Unicode Normalization
+ */
+ 
  #if !UCONFIG_NO_NORMALIZATION
  
-#include "unicode/uobject.h"
-#include "unicode/unistr.h"
  #include "unicode/chariter.h"
+#include "unicode/normalizer2.h"
+#include "unicode/unistr.h"
  #include "unicode/unorm.h"
-
-struct UCharIterator;
-typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
+#include "unicode/uobject.h"
  
  U_NAMESPACE_BEGIN
  /**
- * \brief C++ API: Unicode Normalization 
+ * The Normalizer class supports the standard normalization forms described in
+ * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
+ * Unicode Standard Annex #15: Unicode Normalization Forms</a>.
+ *
+ * Note: This API has been replaced by the Normalizer2 class and is only available
+ * for backward compatibility. This class simply delegates to the Normalizer2 class.
+ * There is one exception: The new API does not provide a replacement for Normalizer::compare().
   *
   * The Normalizer class consists of two parts:
   * - static functions that normalize strings or test if strings are normalized
@@ -32,13 +41,11 @@ U_NAMESPACE_BEGIN
   *
   * The Normalizer class is not suitable for subclassing.
   *
- * The static functions are basically wrappers around the C implementation,
- * using UnicodeString instead of UChar*.
   * For basic information about normalization forms and details about the C API
   * please see the documentation in unorm.h.
   *
   * The iterator API with the Normalizer constructors and the non-static functions
- * uses a CharacterIterator as input. It is possible to pass a string which
+ * use a CharacterIterator as input. It is possible to pass a string which
   * is then internally wrapped in a CharacterIterator.
   * The input text is not normalized all at once, but incrementally where needed
   * (providing efficient random access).
@@ -109,6 +116,14 @@ U_NAMESPACE_BEGIN
   * then the internal index is 0 and one can return to this getIndex()
   * later with setIndexOnly().
   *
+ * Note: While the setIndex() and getIndex() refer to indices in the
+ * underlying Unicode input text, the next() and previous() methods
+ * iterate through characters in the normalized output.
+ * This means that there is not necessarily a one-to-one correspondence
+ * between characters returned by next() and previous() and the indices
+ * passed to and returned from setIndex() and getIndex().
+ * It is for this reason that Normalizer does not implement the CharacterIterator interface.
+ *
   * @author Laura Werner, Mark Davis, Markus Scherer
   * @stable ICU 2.0
   */
@@ -136,7 +151,7 @@ public:
     * @stable ICU 2.0
     */
    Normalizer(const UnicodeString& str, UNormalizationMode mode);
-    
+
    /**
     * Creates a new <code>Normalizer</code> object for iterating over the
     * normalized form of a given string.
@@ -173,7 +188,7 @@ public:
     * Destructor
     * @stable ICU 2.0
     */
-  ~Normalizer();
+  virtual ~Normalizer();
  
  
    //-------------------------------------------------------------------------
@@ -194,7 +209,7 @@ public:
     * @param status    The error code.
     * @stable ICU 2.0
     */
-  static void normalize(const UnicodeString& source,
+  static void U_EXPORT2 normalize(const UnicodeString& source,
                          UNormalizationMode mode, int32_t options,
                          UnicodeString& result,
                          UErrorCode &status);
@@ -216,7 +231,7 @@ public:
     * @param status    The error code.
     * @stable ICU 2.0
     */
-  static void compose(const UnicodeString& source,
+  static void U_EXPORT2 compose(const UnicodeString& source,
                        UBool compat, int32_t options,
                        UnicodeString& result,
                        UErrorCode &status);
@@ -238,22 +253,22 @@ public:
     * @param status    The error code.
     * @stable ICU 2.0
     */
-  static void decompose(const UnicodeString& source,
+  static void U_EXPORT2 decompose(const UnicodeString& source,
                          UBool compat, int32_t options,
                          UnicodeString& result,
                          UErrorCode &status);
  
    /**
-   * Performing quick check on a string, to quickly determine if the string is 
+   * Performing quick check on a string, to quickly determine if the string is
     * in a particular normalization format.
     * This is a wrapper for unorm_quickCheck(), using a UnicodeString.
     *
     * Three types of result can be returned UNORM_YES, UNORM_NO or
     * UNORM_MAYBE. Result UNORM_YES indicates that the argument
     * string is in the desired normalized format, UNORM_NO determines that
-   * argument string is not in the desired normalized format. A 
-   * UNORM_MAYBE result indicates that a more thorough check is required, 
-   * the user may have to put the string in its normalized form and compare the 
+   * argument string is not in the desired normalized format. A
+   * UNORM_MAYBE result indicates that a more thorough check is required,
+   * the user may have to put the string in its normalized form and compare the
     * results.
     * @param source       string for determining if it is in a normalized format
     * @param mode         normalization format
@@ -277,9 +292,9 @@ public:
     * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
     *
     * @see isNormalized
-   * @draft ICU 2.6
+   * @stable ICU 2.6
     */
-  static inline UNormalizationCheckResult
+  static UNormalizationCheckResult
    quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
  
    /**
@@ -300,7 +315,7 @@ public:
     *         "mode" normalization form.
     *
     * @see quickCheck
-   * @draft ICU 2.2
+   * @stable ICU 2.2
     */
    static inline UBool
    isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
@@ -318,9 +333,9 @@ public:
     *         "mode" normalization form.
     *
     * @see quickCheck
-   * @draft ICU 2.6
+   * @stable ICU 2.6
     */
-  static inline UBool
+  static UBool
    isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
  
    /**
@@ -353,7 +368,7 @@ public:
     * @stable ICU 2.1
     */
    static UnicodeString &
-  concatenate(UnicodeString &left, UnicodeString &right,
+  U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right,
                UnicodeString &result,
                UNormalizationMode mode, int32_t options,
                UErrorCode &errorCode);
@@ -420,7 +435,7 @@ public:
     * @see u_strCompare
     * @see u_strCaseCompare
     *
-   * @draft ICU 2.2
+   * @stable ICU 2.2
     */
    static inline int32_t
    compare(const UnicodeString &s1, const UnicodeString &s2,
@@ -430,7 +445,7 @@ public:
    //-------------------------------------------------------------------------
    // Iteration API
    //-------------------------------------------------------------------------
-  
+
    /**
     * Return the current character in the normalized text.
     * current() may need to normalize some text at getIndex().
@@ -572,7 +587,7 @@ public:
    /**
     * Returns a pointer to a new Normalizer that is a clone of this one.
     * The caller is responsible for deleting the new clone.
-   * @return a pointer to a new Normalizer 
+   * @return a pointer to a new Normalizer
     * @stable ICU 2.0
     */
    Normalizer*        clone(void) const;
@@ -593,12 +608,12 @@ public:
     * Set the normalization mode for this object.
     * <p>
     * <b>Note:</b>If the normalization mode is changed while iterating
-   * over a string, calls to {@link next()} and {@link previous()} may
+   * over a string, calls to {@link #next() } and {@link #previous() } may
     * return previously buffers characters in the old normalization mode
     * until the iteration is able to re-sync at the next base character.
-   * It is safest to call {@link setIndexOnly()}, {@link reset()},
-   * {@link setText()}, {@link first()},
-   * {@link last()}, etc. after calling <code>setMode</code>.
+   * It is safest to call {@link #setIndexOnly }, {@link #reset() },
+   * {@link #setText }, {@link #first() },
+   * {@link #last() }, etc. after calling <code>setMode</code>.
     * <p>
     * @param newMode the new mode for this <code>Normalizer</code>.
     * @see #getUMode
@@ -634,7 +649,7 @@ public:
     * @see #getOption
     * @stable ICU 2.0
     */
-  void setOption(int32_t option, 
+  void setOption(int32_t option,
           UBool value);
  
    /**
@@ -657,7 +672,7 @@ public:
     * @param status a UErrorCode
     * @stable ICU 2.0
     */
-  void setText(const UnicodeString& newText, 
+  void setText(const UnicodeString& newText,
             UErrorCode &status);
  
    /**
@@ -668,7 +683,7 @@ public:
     * @param status a UErrorCode
     * @stable ICU 2.0
     */
-  void setText(const CharacterIterator& newText, 
+  void setText(const CharacterIterator& newText,
             UErrorCode &status);
  
    /**
@@ -692,18 +707,18 @@ public:
    void            getText(UnicodeString&  result);
  
    /**
-   * ICU "poor man's RTTI", returns a UClassID for the actual class.
-   * @return a UClassID for the actual class.
-   * @draft ICU 2.2
+   * ICU "poor man's RTTI", returns a UClassID for this class.
+   * @returns a UClassID for this class.
+   * @stable ICU 2.2
     */
-  virtual inline UClassID getDynamicClassID() const;
+  static UClassID U_EXPORT2 getStaticClassID();
  
    /**
-   * ICU "poor man's RTTI", returns a UClassID for this class.
-   * @returns a UClassID for this class.
-   * @draft ICU 2.2
+   * ICU "poor man's RTTI", returns a UClassID for the actual class.
+   * @return a UClassID for the actual class.
+   * @stable ICU 2.2
     */
-  static inline UClassID getStaticClassID();
+  virtual UClassID getDynamicClassID() const;
  
  private:
    //-------------------------------------------------------------------------
@@ -718,18 +733,20 @@ private:
    UBool nextNormalize();
    UBool previousNormalize();
  
-  void    init(CharacterIterator *iter);
+  void    init();
    void    clearBuffer(void);
  
    //-------------------------------------------------------------------------
    // Private data
    //-------------------------------------------------------------------------
  
+  FilteredNormalizer2*fFilteredNorm2;  // owned if not NULL
+  const Normalizer2  *fNorm2;  // not owned; may be equal to fFilteredNorm2
    UNormalizationMode  fUMode;
    int32_t             fOptions;
  
    // The input text and our position in it
-  UCharIterator       *text;
+  CharacterIterator  *text;
  
    // The normalization buffer is the result of normalization
    // of the source in [currentIndex..nextIndex[ .
@@ -738,76 +755,28 @@ private:
    // A buffer for holding intermediate results
    UnicodeString       buffer;
    int32_t         bufferPos;
-
-  /**
-   * The address of this static class variable serves as this class's ID
-   * for ICU "poor man's RTTI".
-   */
-  static const char fgClassID;
  };
  
  //-------------------------------------------------------------------------
  // Inline implementations
  //-------------------------------------------------------------------------
  
-inline UClassID
-Normalizer::getStaticClassID()
-{ return (UClassID)&fgClassID; }
-
-inline UClassID
-Normalizer::getDynamicClassID() const
-{ return Normalizer::getStaticClassID(); }
-
  inline UBool
  Normalizer::operator!= (const Normalizer& other) const
  { return ! operator==(other); }
  
  inline UNormalizationCheckResult
  Normalizer::quickCheck(const UnicodeString& source,
-                       UNormalizationMode mode, 
-                       UErrorCode &status) {
-    if(U_FAILURE(status)) {
-        return UNORM_MAYBE;
-    }
-
-    return unorm_quickCheck(source.getBuffer(), source.length(),
-                            mode, &status);
-}
-
-inline UNormalizationCheckResult
-Normalizer::quickCheck(const UnicodeString& source,
-                       UNormalizationMode mode, int32_t options,
+                       UNormalizationMode mode,
                         UErrorCode &status) {
-    if(U_FAILURE(status)) {
-        return UNORM_MAYBE;
-    }
-
-    return unorm_quickCheckWithOptions(source.getBuffer(), source.length(),
-                                       mode, options, &status);
-}
-
-inline UBool
-Normalizer::isNormalized(const UnicodeString& source,
-                         UNormalizationMode mode, 
-                         UErrorCode &status) {
-    if(U_FAILURE(status)) {
-        return FALSE;
-    }
-
-    return unorm_isNormalized(source.getBuffer(), source.length(),
-                              mode, &status);
+    return quickCheck(source, mode, 0, status);
  }
  
  inline UBool
  Normalizer::isNormalized(const UnicodeString& source,
-                         UNormalizationMode mode, int32_t options,
+                         UNormalizationMode mode,
                           UErrorCode &status) {
-    if(U_FAILURE(status)) {
-        return FALSE;
-    }
-
-    return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(),
-                                         mode, options, &status);
+    return isNormalized(source, mode, 0, status);
  }
  
  inline int32_t