+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
********************************************************************
- * COPYRIGHT:
- * Copyright (c) 1996-2003, International Business Machines Corporation and
+ * COPYRIGHT:
+ * Copyright (c) 1996-2015, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************
*/
#include "unicode/utypes.h"
+/**
+ * \file
+ * \brief C++ API: Unicode Normalization
+ */
+
#if !UCONFIG_NO_NORMALIZATION
-#include "unicode/uobject.h"
-#include "unicode/unistr.h"
#include "unicode/chariter.h"
+#include "unicode/normalizer2.h"
+#include "unicode/unistr.h"
#include "unicode/unorm.h"
+#include "unicode/uobject.h"
-struct UCharIterator;
-typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
-
+#if U_SHOW_CPLUSPLUS_API
U_NAMESPACE_BEGIN
/**
- * \brief C++ API: Unicode Normalization
+ * Old Unicode normalization API.
+ *
+ * This API has been replaced by the Normalizer2 class and is only available
+ * for backward compatibility. This class simply delegates to the Normalizer2 class.
+ * There is one exception: The new API does not provide a replacement for Normalizer::compare().
+ *
+ * The Normalizer class supports the standard normalization forms described in
+ * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
+ * Unicode Standard Annex #15: Unicode Normalization Forms</a>.
*
* The Normalizer class consists of two parts:
* - static functions that normalize strings or test if strings are normalized
*
* The Normalizer class is not suitable for subclassing.
*
- * The static functions are basically wrappers around the C implementation,
- * using UnicodeString instead of UChar*.
* For basic information about normalization forms and details about the C API
* please see the documentation in unorm.h.
*
* The iterator API with the Normalizer constructors and the non-static functions
- * uses a CharacterIterator as input. It is possible to pass a string which
+ * use a CharacterIterator as input. It is possible to pass a string which
* is then internally wrapped in a CharacterIterator.
* The input text is not normalized all at once, but incrementally where needed
* (providing efficient random access).
* then the internal index is 0 and one can return to this getIndex()
* later with setIndexOnly().
*
+ * Note: While the setIndex() and getIndex() refer to indices in the
+ * underlying Unicode input text, the next() and previous() methods
+ * iterate through characters in the normalized output.
+ * This means that there is not necessarily a one-to-one correspondence
+ * between characters returned by next() and previous() and the indices
+ * passed to and returned from setIndex() and getIndex().
+ * It is for this reason that Normalizer does not implement the CharacterIterator interface.
+ *
* @author Laura Werner, Mark Davis, Markus Scherer
* @stable ICU 2.0
*/
class U_COMMON_API Normalizer : public UObject {
public:
+#ifndef U_HIDE_DEPRECATED_API
/**
* If DONE is returned from an iteration function that returns a code point,
* then there are no more normalization results available.
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
enum {
DONE=0xffff
* will start at the beginning of the string.
*
* @param mode The normalization mode.
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
Normalizer(const UnicodeString& str, UNormalizationMode mode);
-
+
/**
* Creates a new <code>Normalizer</code> object for iterating over the
* normalized form of a given string.
*
* @param length Length of the string, or -1 if NUL-terminated.
* @param mode The normalization mode.
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
- Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
+ Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode);
/**
* Creates a new <code>Normalizer</code> object for iterating over the
* will start at the beginning of the string.
*
* @param mode The normalization mode.
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
+#endif /* U_HIDE_DEPRECATED_API */
/**
* Copy constructor.
* @param copy The object to be copied.
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
Normalizer(const Normalizer& copy);
/**
* Destructor
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
- ~Normalizer();
+ virtual ~Normalizer();
//-------------------------------------------------------------------------
// Static utility methods
//-------------------------------------------------------------------------
+#ifndef U_HIDE_DEPRECATED_API
/**
* Normalizes a <code>UnicodeString</code> according to the specified normalization mode.
* This is a wrapper for unorm_normalize(), using UnicodeString's.
* @param options the optional features to be enabled (0 for no options)
* @param result The normalized string (on output).
* @param status The error code.
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
- static void normalize(const UnicodeString& source,
+ static void U_EXPORT2 normalize(const UnicodeString& source,
UNormalizationMode mode, int32_t options,
UnicodeString& result,
UErrorCode &status);
* @param options the optional features to be enabled (0 for no options)
* @param result The composed string (on output).
* @param status The error code.
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
- static void compose(const UnicodeString& source,
+ static void U_EXPORT2 compose(const UnicodeString& source,
UBool compat, int32_t options,
UnicodeString& result,
UErrorCode &status);
* @param options the optional features to be enabled (0 for no options)
* @param result The decomposed string (on output).
* @param status The error code.
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
- static void decompose(const UnicodeString& source,
+ static void U_EXPORT2 decompose(const UnicodeString& source,
UBool compat, int32_t options,
UnicodeString& result,
UErrorCode &status);
/**
- * Performing quick check on a string, to quickly determine if the string is
+ * Performing quick check on a string, to quickly determine if the string is
* in a particular normalization format.
* This is a wrapper for unorm_quickCheck(), using a UnicodeString.
*
* Three types of result can be returned UNORM_YES, UNORM_NO or
* UNORM_MAYBE. Result UNORM_YES indicates that the argument
* string is in the desired normalized format, UNORM_NO determines that
- * argument string is not in the desired normalized format. A
- * UNORM_MAYBE result indicates that a more thorough check is required,
- * the user may have to put the string in its normalized form and compare the
+ * argument string is not in the desired normalized format. A
+ * UNORM_MAYBE result indicates that a more thorough check is required,
+ * the user may have to put the string in its normalized form and compare the
* results.
* @param source string for determining if it is in a normalized format
* @param mode normalization format
* @return UNORM_YES, UNORM_NO or UNORM_MAYBE
*
* @see isNormalized
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
static inline UNormalizationCheckResult
quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
* @return UNORM_YES, UNORM_NO or UNORM_MAYBE
*
* @see isNormalized
- * @draft ICU 2.6
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
- static inline UNormalizationCheckResult
+ static UNormalizationCheckResult
quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
/**
* "mode" normalization form.
*
* @see quickCheck
- * @draft ICU 2.2
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
static inline UBool
isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
* "mode" normalization form.
*
* @see quickCheck
- * @draft ICU 2.6
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
- static inline UBool
+ static UBool
isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
/**
* @see unorm_next
* @see unorm_previous
*
- * @stable ICU 2.1
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
static UnicodeString &
- concatenate(UnicodeString &left, UnicodeString &right,
+ U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right,
UnicodeString &result,
UNormalizationMode mode, int32_t options,
UErrorCode &errorCode);
+#endif /* U_HIDE_DEPRECATED_API */
/**
* Compare two strings for canonical equivalence.
* @see u_strCompare
* @see u_strCaseCompare
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
static inline int32_t
compare(const UnicodeString &s1, const UnicodeString &s2,
uint32_t options,
UErrorCode &errorCode);
+#ifndef U_HIDE_DEPRECATED_API
//-------------------------------------------------------------------------
// Iteration API
//-------------------------------------------------------------------------
-
+
/**
* Return the current character in the normalized text.
* current() may need to normalize some text at getIndex().
* The getIndex() is not changed.
*
* @return the current normalized code point
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
UChar32 current(void);
* (Post-increment semantics.)
*
* @return the first normalized code point
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
UChar32 first(void);
* (Pre-decrement semantics.)
*
* @return the last normalized code point
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
UChar32 last(void);
* The C API unorm_next() is more efficient and does not have this ambiguity.
*
* @return the next normalized code point
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
UChar32 next(void);
* The C API unorm_previous() is more efficient and does not have this ambiguity.
*
* @return the previous normalized code point
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
UChar32 previous(void);
* specified here.
*
* @param index the desired index in the input text.
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
void setIndexOnly(int32_t index);
/**
* Reset the index to the beginning of the text.
* This is equivalent to setIndexOnly(startIndex)).
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
void reset(void);
* was returned from with previous().
*
* @return the current index in the input text
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
int32_t getIndex(void) const;
* over which this <code>Normalizer</code> is iterating.
*
* @return the smallest index in the input text where the Normalizer operates
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
int32_t startIndex(void) const;
* before this index.
*
* @return the first index in the input text where the Normalizer does not operate
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
int32_t endIndex(void) const;
*
* @param that a Normalizer object to compare this one to
* @return comparison result
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
UBool operator==(const Normalizer& that) const;
*
* @param that a Normalizer object to compare this one to
* @return comparison result
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
inline UBool operator!=(const Normalizer& that) const;
/**
* Returns a pointer to a new Normalizer that is a clone of this one.
* The caller is responsible for deleting the new clone.
- * @return a pointer to a new Normalizer
- * @stable ICU 2.0
+ * @return a pointer to a new Normalizer
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
Normalizer* clone(void) const;
* Generates a hash code for this iterator.
*
* @return the hash code
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
int32_t hashCode(void) const;
* Set the normalization mode for this object.
* <p>
* <b>Note:</b>If the normalization mode is changed while iterating
- * over a string, calls to {@link next()} and {@link previous()} may
+ * over a string, calls to {@link #next() } and {@link #previous() } may
* return previously buffers characters in the old normalization mode
* until the iteration is able to re-sync at the next base character.
- * It is safest to call {@link setIndexOnly()}, {@link reset()},
- * {@link setText()}, {@link first()},
- * {@link last()}, etc. after calling <code>setMode</code>.
+ * It is safest to call {@link #setIndexOnly }, {@link #reset() },
+ * {@link #setText }, {@link #first() },
+ * {@link #last() }, etc. after calling <code>setMode</code>.
* <p>
* @param newMode the new mode for this <code>Normalizer</code>.
* @see #getUMode
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
void setMode(UNormalizationMode newMode);
*
* @return the mode for this <code>Normalizer</code>
* @see #setMode
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
UNormalizationMode getUMode(void) const;
* turn the option(s) on and <code>FALSE</code> to turn it/them off.
*
* @see #getOption
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
- void setOption(int32_t option,
+ void setOption(int32_t option,
UBool value);
/**
* @param option the option(s) that are to be checked
* @return TRUE if any of the option(s) are set
* @see #setOption
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
UBool getOption(int32_t option) const;
*
* @param newText a string that replaces the current input text
* @param status a UErrorCode
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
- void setText(const UnicodeString& newText,
+ void setText(const UnicodeString& newText,
UErrorCode &status);
/**
*
* @param newText a CharacterIterator object that replaces the current input text
* @param status a UErrorCode
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
- void setText(const CharacterIterator& newText,
+ void setText(const CharacterIterator& newText,
UErrorCode &status);
/**
* @param newText a string that replaces the current input text
* @param length the length of the string, or -1 if NUL-terminated
* @param status a UErrorCode
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
- void setText(const UChar* newText,
+ void setText(ConstChar16Ptr newText,
int32_t length,
UErrorCode &status);
/**
* Copies the input text into the UnicodeString argument.
*
* @param result Receives a copy of the text under iteration.
- * @stable ICU 2.0
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
void getText(UnicodeString& result);
/**
- * ICU "poor man's RTTI", returns a UClassID for the actual class.
- * @return a UClassID for the actual class.
- * @draft ICU 2.2
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ * @returns a UClassID for this class.
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
- virtual inline UClassID getDynamicClassID() const;
+ static UClassID U_EXPORT2 getStaticClassID();
+#endif /* U_HIDE_DEPRECATED_API */
/**
- * ICU "poor man's RTTI", returns a UClassID for this class.
- * @returns a UClassID for this class.
- * @draft ICU 2.2
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ * @return a UClassID for the actual class.
+ * @deprecated ICU 56 Use Normalizer2 instead.
*/
- static inline UClassID getStaticClassID();
+ virtual UClassID getDynamicClassID() const;
private:
//-------------------------------------------------------------------------
UBool nextNormalize();
UBool previousNormalize();
- void init(CharacterIterator *iter);
+ void init();
void clearBuffer(void);
//-------------------------------------------------------------------------
// Private data
//-------------------------------------------------------------------------
- UNormalizationMode fUMode;
+ FilteredNormalizer2*fFilteredNorm2; // owned if not NULL
+ const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2
+ UNormalizationMode fUMode; // deprecated
int32_t fOptions;
// The input text and our position in it
- UCharIterator *text;
+ CharacterIterator *text;
// The normalization buffer is the result of normalization
// of the source in [currentIndex..nextIndex[ .
// A buffer for holding intermediate results
UnicodeString buffer;
int32_t bufferPos;
-
- /**
- * The address of this static class variable serves as this class's ID
- * for ICU "poor man's RTTI".
- */
- static const char fgClassID;
};
//-------------------------------------------------------------------------
// Inline implementations
//-------------------------------------------------------------------------
-inline UClassID
-Normalizer::getStaticClassID()
-{ return (UClassID)&fgClassID; }
-
-inline UClassID
-Normalizer::getDynamicClassID() const
-{ return Normalizer::getStaticClassID(); }
-
+#ifndef U_HIDE_DEPRECATED_API
inline UBool
Normalizer::operator!= (const Normalizer& other) const
{ return ! operator==(other); }
inline UNormalizationCheckResult
Normalizer::quickCheck(const UnicodeString& source,
- UNormalizationMode mode,
- UErrorCode &status) {
- if(U_FAILURE(status)) {
- return UNORM_MAYBE;
- }
-
- return unorm_quickCheck(source.getBuffer(), source.length(),
- mode, &status);
-}
-
-inline UNormalizationCheckResult
-Normalizer::quickCheck(const UnicodeString& source,
- UNormalizationMode mode, int32_t options,
+ UNormalizationMode mode,
UErrorCode &status) {
- if(U_FAILURE(status)) {
- return UNORM_MAYBE;
- }
-
- return unorm_quickCheckWithOptions(source.getBuffer(), source.length(),
- mode, options, &status);
-}
-
-inline UBool
-Normalizer::isNormalized(const UnicodeString& source,
- UNormalizationMode mode,
- UErrorCode &status) {
- if(U_FAILURE(status)) {
- return FALSE;
- }
-
- return unorm_isNormalized(source.getBuffer(), source.length(),
- mode, &status);
+ return quickCheck(source, mode, 0, status);
}
inline UBool
Normalizer::isNormalized(const UnicodeString& source,
- UNormalizationMode mode, int32_t options,
+ UNormalizationMode mode,
UErrorCode &status) {
- if(U_FAILURE(status)) {
- return FALSE;
- }
-
- return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(),
- mode, options, &status);
+ return isNormalized(source, mode, 0, status);
}
+#endif /* U_HIDE_DEPRECATED_API */
inline int32_t
Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
uint32_t options,
UErrorCode &errorCode) {
// all argument checking is done in unorm_compare
- return unorm_compare(s1.getBuffer(), s1.length(),
- s2.getBuffer(), s2.length(),
+ return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(),
+ toUCharPtr(s2.getBuffer()), s2.length(),
options,
&errorCode);
}
U_NAMESPACE_END
+#endif // U_SHOW_CPLUSPLUS_API
#endif /* #if !UCONFIG_NO_NORMALIZATION */