/*
********************************************************************
* COPYRIGHT:
- * Copyright (c) 1996-2004, International Business Machines Corporation and
+ * Copyright (c) 1996-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************
*/
#include "unicode/utypes.h"
+/**
+ * \file
+ * \brief C++ API: Unicode Normalization
+ */
+
#if !UCONFIG_NO_NORMALIZATION
-#include "unicode/uobject.h"
-#include "unicode/unistr.h"
#include "unicode/chariter.h"
+#include "unicode/normalizer2.h"
+#include "unicode/unistr.h"
#include "unicode/unorm.h"
-
-struct UCharIterator;
-typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
+#include "unicode/uobject.h"
U_NAMESPACE_BEGIN
/**
- * \brief C++ API: Unicode Normalization
+ * The Normalizer class supports the standard normalization forms described in
+ * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
+ * Unicode Standard Annex #15: Unicode Normalization Forms</a>.
+ *
+ * Note: This API has been replaced by the Normalizer2 class and is only available
+ * for backward compatibility. This class simply delegates to the Normalizer2 class.
+ * There is one exception: The new API does not provide a replacement for Normalizer::compare().
*
* The Normalizer class consists of two parts:
* - static functions that normalize strings or test if strings are normalized
*
* The Normalizer class is not suitable for subclassing.
*
- * The static functions are basically wrappers around the C implementation,
- * using UnicodeString instead of UChar*.
* For basic information about normalization forms and details about the C API
* please see the documentation in unorm.h.
*
* The iterator API with the Normalizer constructors and the non-static functions
- * uses a CharacterIterator as input. It is possible to pass a string which
+ * use a CharacterIterator as input. It is possible to pass a string which
* is then internally wrapped in a CharacterIterator.
* The input text is not normalized all at once, but incrementally where needed
* (providing efficient random access).
* then the internal index is 0 and one can return to this getIndex()
* later with setIndexOnly().
*
+ * Note: While the setIndex() and getIndex() refer to indices in the
+ * underlying Unicode input text, the next() and previous() methods
+ * iterate through characters in the normalized output.
+ * This means that there is not necessarily a one-to-one correspondence
+ * between characters returned by next() and previous() and the indices
+ * passed to and returned from setIndex() and getIndex().
+ * It is for this reason that Normalizer does not implement the CharacterIterator interface.
+ *
* @author Laura Werner, Mark Davis, Markus Scherer
* @stable ICU 2.0
*/
* @see isNormalized
* @stable ICU 2.6
*/
- static inline UNormalizationCheckResult
+ static UNormalizationCheckResult
quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
/**
* @see quickCheck
* @stable ICU 2.6
*/
- static inline UBool
+ static UBool
isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
/**
* @stable ICU 2.1
*/
static UnicodeString &
- U_EXPORT2 concatenate(UnicodeString &left, UnicodeString &right,
+ U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right,
UnicodeString &result,
UNormalizationMode mode, int32_t options,
UErrorCode &errorCode);
UBool nextNormalize();
UBool previousNormalize();
- void init(CharacterIterator *iter);
+ void init();
void clearBuffer(void);
//-------------------------------------------------------------------------
// Private data
//-------------------------------------------------------------------------
+ FilteredNormalizer2*fFilteredNorm2; // owned if not NULL
+ const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2
UNormalizationMode fUMode;
int32_t fOptions;
// The input text and our position in it
- UCharIterator *text;
+ CharacterIterator *text;
// The normalization buffer is the result of normalization
// of the source in [currentIndex..nextIndex[ .
// A buffer for holding intermediate results
UnicodeString buffer;
int32_t bufferPos;
-
};
//-------------------------------------------------------------------------
Normalizer::quickCheck(const UnicodeString& source,
UNormalizationMode mode,
UErrorCode &status) {
- if(U_FAILURE(status)) {
- return UNORM_MAYBE;
- }
-
- return unorm_quickCheck(source.getBuffer(), source.length(),
- mode, &status);
-}
-
-inline UNormalizationCheckResult
-Normalizer::quickCheck(const UnicodeString& source,
- UNormalizationMode mode, int32_t options,
- UErrorCode &status) {
- if(U_FAILURE(status)) {
- return UNORM_MAYBE;
- }
-
- return unorm_quickCheckWithOptions(source.getBuffer(), source.length(),
- mode, options, &status);
+ return quickCheck(source, mode, 0, status);
}
inline UBool
Normalizer::isNormalized(const UnicodeString& source,
UNormalizationMode mode,
UErrorCode &status) {
- if(U_FAILURE(status)) {
- return FALSE;
- }
-
- return unorm_isNormalized(source.getBuffer(), source.length(),
- mode, &status);
-}
-
-inline UBool
-Normalizer::isNormalized(const UnicodeString& source,
- UNormalizationMode mode, int32_t options,
- UErrorCode &status) {
- if(U_FAILURE(status)) {
- return FALSE;
- }
-
- return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(),
- mode, options, &status);
+ return isNormalized(source, mode, 0, status);
}
inline int32_t