[apple/icu.git] / icuSources / common / normlzr.cpp

/*
 *************************************************************************
 * COPYRIGHT: 
 * Copyright (c) 1996-2010, International Business Machines Corporation and
 * others. All Rights Reserved.
 *************************************************************************
 */

#include "unicode/utypes.h"

#if !UCONFIG_NO_NORMALIZATION

#include "unicode/uniset.h"
#include "unicode/unistr.h"
#include "unicode/chariter.h"
#include "unicode/schriter.h"
#include "unicode/uchriter.h"
#include "unicode/normlzr.h"
#include "cmemory.h"
#include "normalizer2impl.h"
#include "uprops.h"  // for uniset_getUnicode32Instance()

U_NAMESPACE_BEGIN

UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer)

//-------------------------------------------------------------------------
// Constructors and other boilerplate
//-------------------------------------------------------------------------

Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) :
    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
    text(new StringCharacterIterator(str)),
    currentIndex(0), nextIndex(0),
    buffer(), bufferPos(0)
{
    init();
}

Normalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) :
    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
    text(new UCharCharacterIterator(str, length)),
    currentIndex(0), nextIndex(0),
    buffer(), bufferPos(0)
{
    init();
}

Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
    text(iter.clone()),
    currentIndex(0), nextIndex(0),
    buffer(), bufferPos(0)
{
    init();
}

Normalizer::Normalizer(const Normalizer &copy) :
    UObject(copy), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(copy.fUMode), fOptions(copy.fOptions),
    text(copy.text->clone()),
    currentIndex(copy.currentIndex), nextIndex(copy.nextIndex),
    buffer(copy.buffer), bufferPos(copy.bufferPos)
{
    init();
}

static const UChar _NUL=0;

void
Normalizer::init() {
    UErrorCode errorCode=U_ZERO_ERROR;
    fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode);
    if(fOptions&UNORM_UNICODE_3_2) {
        delete fFilteredNorm2;
        fNorm2=fFilteredNorm2=
            new FilteredNormalizer2(*fNorm2, *uniset_getUnicode32Instance(errorCode));
    }
    if(U_FAILURE(errorCode)) {
        errorCode=U_ZERO_ERROR;
        fNorm2=Normalizer2Factory::getNoopInstance(errorCode);
    }
}

Normalizer::~Normalizer()
{
    delete fFilteredNorm2;
    delete text;
}

Normalizer* 
Normalizer::clone() const
{
    return new Normalizer(*this);
}

/**
 * Generates a hash code for this iterator.
 */
int32_t Normalizer::hashCode() const
{
    return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
}
    
UBool Normalizer::operator==(const Normalizer& that) const
{
    return
        this==&that ||
        (fUMode==that.fUMode &&
        fOptions==that.fOptions &&
        *text==*that.text &&
        buffer==that.buffer &&
        bufferPos==that.bufferPos &&
        nextIndex==that.nextIndex);
}

//-------------------------------------------------------------------------
// Static utility methods
//-------------------------------------------------------------------------

void U_EXPORT2
Normalizer::normalize(const UnicodeString& source, 
                      UNormalizationMode mode, int32_t options,
                      UnicodeString& result, 
                      UErrorCode &status) {
    if(source.isBogus() || U_FAILURE(status)) {
        result.setToBogus();
        if(U_SUCCESS(status)) {
            status=U_ILLEGAL_ARGUMENT_ERROR;
        }
    } else {
        UnicodeString localDest;
        UnicodeString *dest;

        if(&source!=&result) {
            dest=&result;
        } else {
            // the source and result strings are the same object, use a temporary one
            dest=&localDest;
        }
        const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
        if(U_SUCCESS(status)) {
            if(options&UNORM_UNICODE_3_2) {
                FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
                    normalize(source, *dest, status);
            } else {
                n2->normalize(source, *dest, status);
            }
        }
        if(dest==&localDest && U_SUCCESS(status)) {
            result=*dest;
        }
    }
}

void U_EXPORT2
Normalizer::compose(const UnicodeString& source, 
                    UBool compat, int32_t options,
                    UnicodeString& result, 
                    UErrorCode &status) {
    normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status);
}

void U_EXPORT2
Normalizer::decompose(const UnicodeString& source, 
                      UBool compat, int32_t options,
                      UnicodeString& result, 
                      UErrorCode &status) {
    normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status);
}

UNormalizationCheckResult
Normalizer::quickCheck(const UnicodeString& source,
                       UNormalizationMode mode, int32_t options,
                       UErrorCode &status) {
    const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
    if(U_SUCCESS(status)) {
        if(options&UNORM_UNICODE_3_2) {
            return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
                quickCheck(source, status);
        } else {
            return n2->quickCheck(source, status);
        }
    } else {
        return UNORM_MAYBE;
    }
}

UBool
Normalizer::isNormalized(const UnicodeString& source,
                         UNormalizationMode mode, int32_t options,
                         UErrorCode &status) {
    const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
    if(U_SUCCESS(status)) {
        if(options&UNORM_UNICODE_3_2) {
            return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
                isNormalized(source, status);
        } else {
            return n2->isNormalized(source, status);
        }
    } else {
        return FALSE;
    }
}

UnicodeString & U_EXPORT2
Normalizer::concatenate(UnicodeString &left, UnicodeString &right,
                        UnicodeString &result,
                        UNormalizationMode mode, int32_t options,
                        UErrorCode &errorCode) {
    if(left.isBogus() || right.isBogus() || U_FAILURE(errorCode)) {
        result.setToBogus();
        if(U_SUCCESS(errorCode)) {
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
        }
    } else {
        UnicodeString localDest;
        UnicodeString *dest;

        if(&right!=&result) {
            dest=&result;
        } else {
            // the right and result strings are the same object, use a temporary one
            dest=&localDest;
        }
        *dest=left;
        const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode);
        if(U_SUCCESS(errorCode)) {
            if(options&UNORM_UNICODE_3_2) {
                FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(errorCode)).
                    append(*dest, right, errorCode);
            } else {
                n2->append(*dest, right, errorCode);
            }
        }
        if(dest==&localDest && U_SUCCESS(errorCode)) {
            result=*dest;
        }
    }
    return result;
}

//-------------------------------------------------------------------------
// Iteration API
//-------------------------------------------------------------------------

/**
 * Return the current character in the normalized text.
 */
UChar32 Normalizer::current() {
    if(bufferPos<buffer.length() || nextNormalize()) {
        return buffer.char32At(bufferPos);
    } else {
        return DONE;
    }
}

/**
 * Return the next character in the normalized text and advance
 * the iteration position by one.  If the end
 * of the text has already been reached, {@link #DONE} is returned.
 */
UChar32 Normalizer::next() {
    if(bufferPos<buffer.length() ||  nextNormalize()) {
        UChar32 c=buffer.char32At(bufferPos);
        bufferPos+=UTF_CHAR_LENGTH(c);
        return c;
    } else {
        return DONE;
    }
}

/**
 * Return the previous character in the normalized text and decrement
 * the iteration position by one.  If the beginning
 * of the text has already been reached, {@link #DONE} is returned.
 */
UChar32 Normalizer::previous() {
    if(bufferPos>0 || previousNormalize()) {
        UChar32 c=buffer.char32At(bufferPos-1);
        bufferPos-=UTF_CHAR_LENGTH(c);
        return c;
    } else {
        return DONE;
    }
}

void Normalizer::reset() {
    currentIndex=nextIndex=text->setToStart();
    clearBuffer();
}

void
Normalizer::setIndexOnly(int32_t index) {
    text->setIndex(index);  // pins index
    currentIndex=nextIndex=text->getIndex();
    clearBuffer();
}

/**
 * Return the first character in the normalized text.  This resets
 * the <tt>Normalizer's</tt> position to the beginning of the text.
 */
UChar32 Normalizer::first() {
    reset();
    return next();
}

/**
 * Return the last character in the normalized text.  This resets
 * the <tt>Normalizer's</tt> position to be just before the
 * the input text corresponding to that normalized character.
 */
UChar32 Normalizer::last() {
    currentIndex=nextIndex=text->setToEnd();
    clearBuffer();
    return previous();
}

/**
 * Retrieve the current iteration position in the input text that is
 * being normalized.  This method is useful in applications such as
 * searching, where you need to be able to determine the position in
 * the input text that corresponds to a given normalized output character.
 * <p>
 * <b>Note:</b> This method sets the position in the <em>input</em>, while
 * {@link #next} and {@link #previous} iterate through characters in the
 * <em>output</em>.  This means that there is not necessarily a one-to-one
 * correspondence between characters returned by <tt>next</tt> and
 * <tt>previous</tt> and the indices passed to and returned from
 * <tt>setIndex</tt> and {@link #getIndex}.
 *
 */
int32_t Normalizer::getIndex() const {
    if(bufferPos<buffer.length()) {
        return currentIndex;
    } else {
        return nextIndex;
    }
}

/**
 * Retrieve the index of the start of the input text.  This is the begin index
 * of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
 * over which this <tt>Normalizer</tt> is iterating
 */
int32_t Normalizer::startIndex() const {
    return text->startIndex();
}

/**
 * Retrieve the index of the end of the input text.  This is the end index
 * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
 * over which this <tt>Normalizer</tt> is iterating
 */
int32_t Normalizer::endIndex() const {
    return text->endIndex();
}

//-------------------------------------------------------------------------
// Property access methods
//-------------------------------------------------------------------------

void
Normalizer::setMode(UNormalizationMode newMode) 
{
    fUMode = newMode;
    init();
}

UNormalizationMode
Normalizer::getUMode() const
{
    return fUMode;
}

void
Normalizer::setOption(int32_t option, 
                      UBool value) 
{
    if (value) {
        fOptions |= option;
    } else {
        fOptions &= (~option);
    }
    init();
}

UBool
Normalizer::getOption(int32_t option) const
{
    return (fOptions & option) != 0;
}

/**
 * Set the input text over which this <tt>Normalizer</tt> will iterate.
 * The iteration position is set to the beginning of the input text.
 */
void
Normalizer::setText(const UnicodeString& newText, 
                    UErrorCode &status)
{
    if (U_FAILURE(status)) {
        return;
    }
    CharacterIterator *newIter = new StringCharacterIterator(newText);
    if (newIter == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    delete text;
    text = newIter;
    reset();
}

/**
 * Set the input text over which this <tt>Normalizer</tt> will iterate.
 * The iteration position is set to the beginning of the string.
 */
void
Normalizer::setText(const CharacterIterator& newText, 
                    UErrorCode &status) 
{
    if (U_FAILURE(status)) {
        return;
    }
    CharacterIterator *newIter = newText.clone();
    if (newIter == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    delete text;
    text = newIter;
    reset();
}

void
Normalizer::setText(const UChar* newText,
                    int32_t length,
                    UErrorCode &status)
{
    if (U_FAILURE(status)) {
        return;
    }
    CharacterIterator *newIter = new UCharCharacterIterator(newText, length);
    if (newIter == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    delete text;
    text = newIter;
    reset();
}

/**
 * Copies the text under iteration into the UnicodeString referred to by "result".
 * @param result Receives a copy of the text under iteration.
 */
void
Normalizer::getText(UnicodeString&  result) 
{
    text->getText(result);
}

//-------------------------------------------------------------------------
// Private utility methods
//-------------------------------------------------------------------------

void Normalizer::clearBuffer() {
    buffer.remove();
    bufferPos=0;
}

UBool
Normalizer::nextNormalize() {
    clearBuffer();
    currentIndex=nextIndex;
    text->setIndex(nextIndex);
    if(!text->hasNext()) {
        return FALSE;
    }
    // Skip at least one character so we make progress.
    UnicodeString segment(text->next32PostInc());
    while(text->hasNext()) {
        UChar32 c;
        if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) {
            text->move32(-1, CharacterIterator::kCurrent);
            break;
        }
        segment.append(c);
    }
    nextIndex=text->getIndex();
    UErrorCode errorCode=U_ZERO_ERROR;
    fNorm2->normalize(segment, buffer, errorCode);
    return U_SUCCESS(errorCode) && !buffer.isEmpty();
}

UBool
Normalizer::previousNormalize() {
    clearBuffer();
    nextIndex=currentIndex;
    text->setIndex(currentIndex);
    if(!text->hasPrevious()) {
        return FALSE;
    }
    UnicodeString segment;
    while(text->hasPrevious()) {
        UChar32 c=text->previous32();
        segment.insert(0, c);
        if(fNorm2->hasBoundaryBefore(c)) {
            break;
        }
    }
    currentIndex=text->getIndex();
    UErrorCode errorCode=U_ZERO_ERROR;
    fNorm2->normalize(segment, buffer, errorCode);
    bufferPos=buffer.length();
    return U_SUCCESS(errorCode) && !buffer.isEmpty();
}

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_NORMALIZATION */
Commit	Line	Data
b75a7d8f A	1	/*
	2	*************************************************************************
	3	* COPYRIGHT:
729e4ab9	4	* Copyright (c) 1996-2010, International Business Machines Corporation and
b75a7d8f A	5	* others. All Rights Reserved.
	6	*************************************************************************
	7	*/
	8
	9	#include "unicode/utypes.h"
	10
	11	#if !UCONFIG_NO_NORMALIZATION
	12
729e4ab9	13	#include "unicode/uniset.h"
b75a7d8f A	14	#include "unicode/unistr.h"
	15	#include "unicode/chariter.h"
	16	#include "unicode/schriter.h"
	17	#include "unicode/uchriter.h"
b75a7d8f A	18	#include "unicode/normlzr.h"
b75a7d8f A	19	#include "cmemory.h"
729e4ab9 A	20	#include "normalizer2impl.h"
729e4ab9 A	21	#include "uprops.h" // for uniset_getUnicode32Instance()
b75a7d8f A	22
	23	U_NAMESPACE_BEGIN
	24
374ca955	25	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer)
b75a7d8f A	26
	27	//-------------------------------------------------------------------------
	28	// Constructors and other boilerplate
	29	//-------------------------------------------------------------------------
	30
	31	Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) :
729e4ab9 A	32	UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
729e4ab9 A	33	text(new StringCharacterIterator(str)),
b75a7d8f A	34	currentIndex(0), nextIndex(0),
	35	buffer(), bufferPos(0)
	36	{
729e4ab9	37	init();
b75a7d8f A	38	}
	39
	40	Normalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) :
729e4ab9 A	41	UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
729e4ab9 A	42	text(new UCharCharacterIterator(str, length)),
b75a7d8f A	43	currentIndex(0), nextIndex(0),
	44	buffer(), bufferPos(0)
	45	{
729e4ab9	46	init();
b75a7d8f A	47	}
	48
	49	Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
729e4ab9 A	50	UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
729e4ab9 A	51	text(iter.clone()),
b75a7d8f A	52	currentIndex(0), nextIndex(0),
	53	buffer(), bufferPos(0)
	54	{
729e4ab9	55	init();
b75a7d8f A	56	}
	57
	58	Normalizer::Normalizer(const Normalizer &copy) :
729e4ab9 A	59	UObject(copy), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(copy.fUMode), fOptions(copy.fOptions),
729e4ab9 A	60	text(copy.text->clone()),
b75a7d8f A	61	currentIndex(copy.currentIndex), nextIndex(copy.nextIndex),
	62	buffer(copy.buffer), bufferPos(copy.bufferPos)
	63	{
729e4ab9	64	init();
b75a7d8f A	65	}
	66
	67	static const UChar _NUL=0;
	68
	69	void
729e4ab9	70	Normalizer::init() {
b75a7d8f	71	UErrorCode errorCode=U_ZERO_ERROR;
729e4ab9 A	72	fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode);
	73	if(fOptions&UNORM_UNICODE_3_2) {
	74	delete fFilteredNorm2;
	75	fNorm2=fFilteredNorm2=
	76	new FilteredNormalizer2(fNorm2, uniset_getUnicode32Instance(errorCode));
	77	}
	78	if(U_FAILURE(errorCode)) {
	79	errorCode=U_ZERO_ERROR;
	80	fNorm2=Normalizer2Factory::getNoopInstance(errorCode);
b75a7d8f A	81	}
	82	}
	83
	84	Normalizer::~Normalizer()
	85	{
729e4ab9 A	86	delete fFilteredNorm2;
729e4ab9 A	87	delete text;
b75a7d8f A	88	}
	89
	90	Normalizer*
	91	Normalizer::clone() const
	92	{
729e4ab9	93	return new Normalizer(*this);
b75a7d8f A	94	}
	95
	96	/**
	97	* Generates a hash code for this iterator.
	98	*/
	99	int32_t Normalizer::hashCode() const
	100	{
729e4ab9	101	return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
b75a7d8f A	102	}
	103
	104	UBool Normalizer::operator==(const Normalizer& that) const
	105	{
	106	return
	107	this==&that \|\|
729e4ab9	108	(fUMode==that.fUMode &&
b75a7d8f	109	fOptions==that.fOptions &&
729e4ab9	110	text==that.text &&
b75a7d8f A	111	buffer==that.buffer &&
b75a7d8f A	112	bufferPos==that.bufferPos &&
729e4ab9	113	nextIndex==that.nextIndex);
b75a7d8f A	114	}
	115
	116	//-------------------------------------------------------------------------
	117	// Static utility methods
	118	//-------------------------------------------------------------------------
	119
374ca955	120	void U_EXPORT2
b75a7d8f A	121	Normalizer::normalize(const UnicodeString& source,
	122	UNormalizationMode mode, int32_t options,
	123	UnicodeString& result,
	124	UErrorCode &status) {
	125	if(source.isBogus() \|\| U_FAILURE(status)) {
	126	result.setToBogus();
	127	if(U_SUCCESS(status)) {
	128	status=U_ILLEGAL_ARGUMENT_ERROR;
	129	}
	130	} else {
	131	UnicodeString localDest;
	132	UnicodeString *dest;
	133
	134	if(&source!=&result) {
	135	dest=&result;
	136	} else {
	137	// the source and result strings are the same object, use a temporary one
	138	dest=&localDest;
	139	}
729e4ab9 A	140	const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
	141	if(U_SUCCESS(status)) {
	142	if(options&UNORM_UNICODE_3_2) {
	143	FilteredNormalizer2(n2, uniset_getUnicode32Instance(status)).
	144	normalize(source, *dest, status);
	145	} else {
	146	n2->normalize(source, *dest, status);
	147	}
b75a7d8f	148	}
729e4ab9	149	if(dest==&localDest && U_SUCCESS(status)) {
b75a7d8f A	150	result=*dest;
b75a7d8f A	151	}
b75a7d8f A	152	}
	153	}
	154
374ca955	155	void U_EXPORT2
b75a7d8f A	156	Normalizer::compose(const UnicodeString& source,
	157	UBool compat, int32_t options,
	158	UnicodeString& result,
	159	UErrorCode &status) {
729e4ab9	160	normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status);
b75a7d8f A	161	}
b75a7d8f A	162
374ca955	163	void U_EXPORT2
b75a7d8f A	164	Normalizer::decompose(const UnicodeString& source,
	165	UBool compat, int32_t options,
	166	UnicodeString& result,
	167	UErrorCode &status) {
729e4ab9 A	168	normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status);
	169	}
	170
	171	UNormalizationCheckResult
	172	Normalizer::quickCheck(const UnicodeString& source,
	173	UNormalizationMode mode, int32_t options,
	174	UErrorCode &status) {
	175	const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
	176	if(U_SUCCESS(status)) {
	177	if(options&UNORM_UNICODE_3_2) {
	178	return FilteredNormalizer2(n2, uniset_getUnicode32Instance(status)).
	179	quickCheck(source, status);
	180	} else {
	181	return n2->quickCheck(source, status);
b75a7d8f A	182	}
b75a7d8f A	183	} else {
729e4ab9 A	184	return UNORM_MAYBE;
	185	}
	186	}
b75a7d8f	187
729e4ab9 A	188	UBool
	189	Normalizer::isNormalized(const UnicodeString& source,
	190	UNormalizationMode mode, int32_t options,
	191	UErrorCode &status) {
	192	const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
	193	if(U_SUCCESS(status)) {
	194	if(options&UNORM_UNICODE_3_2) {
	195	return FilteredNormalizer2(n2, uniset_getUnicode32Instance(status)).
	196	isNormalized(source, status);
b75a7d8f	197	} else {
729e4ab9	198	return n2->isNormalized(source, status);
b75a7d8f	199	}
729e4ab9 A	200	} else {
729e4ab9 A	201	return FALSE;
b75a7d8f A	202	}
	203	}
	204
374ca955	205	UnicodeString & U_EXPORT2
b75a7d8f A	206	Normalizer::concatenate(UnicodeString &left, UnicodeString &right,
	207	UnicodeString &result,
	208	UNormalizationMode mode, int32_t options,
	209	UErrorCode &errorCode) {
	210	if(left.isBogus() \|\| right.isBogus() \|\| U_FAILURE(errorCode)) {
	211	result.setToBogus();
	212	if(U_SUCCESS(errorCode)) {
	213	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
	214	}
	215	} else {
	216	UnicodeString localDest;
	217	UnicodeString *dest;
	218
729e4ab9	219	if(&right!=&result) {
b75a7d8f A	220	dest=&result;
b75a7d8f A	221	} else {
729e4ab9	222	// the right and result strings are the same object, use a temporary one
b75a7d8f A	223	dest=&localDest;
b75a7d8f A	224	}
729e4ab9 A	225	*dest=left;
	226	const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode);
	227	if(U_SUCCESS(errorCode)) {
	228	if(options&UNORM_UNICODE_3_2) {
	229	FilteredNormalizer2(n2, uniset_getUnicode32Instance(errorCode)).
	230	append(*dest, right, errorCode);
	231	} else {
	232	n2->append(*dest, right, errorCode);
	233	}
b75a7d8f	234	}
729e4ab9	235	if(dest==&localDest && U_SUCCESS(errorCode)) {
b75a7d8f A	236	result=*dest;
b75a7d8f A	237	}
b75a7d8f A	238	}
	239	return result;
	240	}
	241
	242	//-------------------------------------------------------------------------
	243	// Iteration API
	244	//-------------------------------------------------------------------------
	245
	246	/**
	247	* Return the current character in the normalized text.
	248	*/
	249	UChar32 Normalizer::current() {
	250	if(bufferPos<buffer.length() \|\| nextNormalize()) {
	251	return buffer.char32At(bufferPos);
	252	} else {
	253	return DONE;
	254	}
	255	}
	256
	257	/**
	258	* Return the next character in the normalized text and advance
	259	* the iteration position by one. If the end
	260	* of the text has already been reached, {@link #DONE} is returned.
	261	*/
	262	UChar32 Normalizer::next() {
	263	if(bufferPos<buffer.length() \|\| nextNormalize()) {
	264	UChar32 c=buffer.char32At(bufferPos);
	265	bufferPos+=UTF_CHAR_LENGTH(c);
	266	return c;
	267	} else {
	268	return DONE;
	269	}
	270	}
	271
	272	/**
	273	* Return the previous character in the normalized text and decrement
	274	* the iteration position by one. If the beginning
	275	* of the text has already been reached, {@link #DONE} is returned.
	276	*/
	277	UChar32 Normalizer::previous() {
	278	if(bufferPos>0 \|\| previousNormalize()) {
	279	UChar32 c=buffer.char32At(bufferPos-1);
	280	bufferPos-=UTF_CHAR_LENGTH(c);
	281	return c;
	282	} else {
	283	return DONE;
	284	}
	285	}
	286
	287	void Normalizer::reset() {
729e4ab9	288	currentIndex=nextIndex=text->setToStart();
b75a7d8f A	289	clearBuffer();
	290	}
	291
	292	void
	293	Normalizer::setIndexOnly(int32_t index) {
729e4ab9 A	294	text->setIndex(index); // pins index
729e4ab9 A	295	currentIndex=nextIndex=text->getIndex();
b75a7d8f A	296	clearBuffer();
	297	}
	298
	299	/**
729e4ab9 A	300	* Return the first character in the normalized text. This resets
729e4ab9 A	301	* the <tt>Normalizer's</tt> position to the beginning of the text.
b75a7d8f A	302	*/
	303	UChar32 Normalizer::first() {
	304	reset();
	305	return next();
	306	}
	307
	308	/**
729e4ab9	309	* Return the last character in the normalized text. This resets
b75a7d8f A	310	* the <tt>Normalizer's</tt> position to be just before the
	311	* the input text corresponding to that normalized character.
	312	*/
	313	UChar32 Normalizer::last() {
729e4ab9	314	currentIndex=nextIndex=text->setToEnd();
b75a7d8f A	315	clearBuffer();
	316	return previous();
	317	}
	318
	319	/**
	320	* Retrieve the current iteration position in the input text that is
	321	* being normalized. This method is useful in applications such as
	322	* searching, where you need to be able to determine the position in
	323	* the input text that corresponds to a given normalized output character.
	324	* <p>
	325	* <b>Note:</b> This method sets the position in the <em>input</em>, while
	326	* {@link #next} and {@link #previous} iterate through characters in the
	327	* <em>output</em>. This means that there is not necessarily a one-to-one
	328	* correspondence between characters returned by <tt>next</tt> and
	329	* <tt>previous</tt> and the indices passed to and returned from
	330	* <tt>setIndex</tt> and {@link #getIndex}.
	331	*
	332	*/
	333	int32_t Normalizer::getIndex() const {
	334	if(bufferPos<buffer.length()) {
	335	return currentIndex;
	336	} else {
	337	return nextIndex;
	338	}
	339	}
	340
	341	/**
729e4ab9	342	* Retrieve the index of the start of the input text. This is the begin index
b75a7d8f A	343	* of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
	344	* over which this <tt>Normalizer</tt> is iterating
	345	*/
	346	int32_t Normalizer::startIndex() const {
729e4ab9	347	return text->startIndex();
b75a7d8f A	348	}
	349
	350	/**
729e4ab9	351	* Retrieve the index of the end of the input text. This is the end index
b75a7d8f A	352	* of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
	353	* over which this <tt>Normalizer</tt> is iterating
	354	*/
	355	int32_t Normalizer::endIndex() const {
729e4ab9	356	return text->endIndex();
b75a7d8f A	357	}
	358
	359	//-------------------------------------------------------------------------
	360	// Property access methods
	361	//-------------------------------------------------------------------------
	362
	363	void
	364	Normalizer::setMode(UNormalizationMode newMode)
	365	{
	366	fUMode = newMode;
729e4ab9	367	init();
b75a7d8f A	368	}
	369
	370	UNormalizationMode
	371	Normalizer::getUMode() const
	372	{
	373	return fUMode;
	374	}
	375
	376	void
	377	Normalizer::setOption(int32_t option,
	378	UBool value)
	379	{
	380	if (value) {
	381	fOptions \|= option;
	382	} else {
	383	fOptions &= (~option);
	384	}
729e4ab9	385	init();
b75a7d8f A	386	}
	387
	388	UBool
	389	Normalizer::getOption(int32_t option) const
	390	{
	391	return (fOptions & option) != 0;
	392	}
	393
	394	/**
	395	* Set the input text over which this <tt>Normalizer</tt> will iterate.
729e4ab9	396	* The iteration position is set to the beginning of the input text.
b75a7d8f A	397	*/
	398	void
	399	Normalizer::setText(const UnicodeString& newText,
	400	UErrorCode &status)
	401	{
	402	if (U_FAILURE(status)) {
	403	return;
	404	}
	405	CharacterIterator *newIter = new StringCharacterIterator(newText);
	406	if (newIter == NULL) {
	407	status = U_MEMORY_ALLOCATION_ERROR;
	408	return;
	409	}
729e4ab9 A	410	delete text;
729e4ab9 A	411	text = newIter;
b75a7d8f A	412	reset();
	413	}
	414
	415	/**
	416	* Set the input text over which this <tt>Normalizer</tt> will iterate.
	417	* The iteration position is set to the beginning of the string.
	418	*/
	419	void
	420	Normalizer::setText(const CharacterIterator& newText,
	421	UErrorCode &status)
	422	{
	423	if (U_FAILURE(status)) {
	424	return;
	425	}
	426	CharacterIterator *newIter = newText.clone();
	427	if (newIter == NULL) {
	428	status = U_MEMORY_ALLOCATION_ERROR;
	429	return;
	430	}
729e4ab9 A	431	delete text;
729e4ab9 A	432	text = newIter;
b75a7d8f A	433	reset();
	434	}
	435
	436	void
	437	Normalizer::setText(const UChar* newText,
	438	int32_t length,
	439	UErrorCode &status)
	440	{
	441	if (U_FAILURE(status)) {
	442	return;
	443	}
	444	CharacterIterator *newIter = new UCharCharacterIterator(newText, length);
	445	if (newIter == NULL) {
	446	status = U_MEMORY_ALLOCATION_ERROR;
	447	return;
	448	}
729e4ab9 A	449	delete text;
729e4ab9 A	450	text = newIter;
b75a7d8f A	451	reset();
	452	}
	453
	454	/**
	455	* Copies the text under iteration into the UnicodeString referred to by "result".
	456	* @param result Receives a copy of the text under iteration.
	457	*/
	458	void
	459	Normalizer::getText(UnicodeString& result)
	460	{
729e4ab9	461	text->getText(result);
b75a7d8f A	462	}
	463
	464	//-------------------------------------------------------------------------
	465	// Private utility methods
	466	//-------------------------------------------------------------------------
	467
	468	void Normalizer::clearBuffer() {
	469	buffer.remove();
	470	bufferPos=0;
	471	}
	472
	473	UBool
	474	Normalizer::nextNormalize() {
b75a7d8f A	475	clearBuffer();
b75a7d8f A	476	currentIndex=nextIndex;
729e4ab9 A	477	text->setIndex(nextIndex);
729e4ab9 A	478	if(!text->hasNext()) {
b75a7d8f A	479	return FALSE;
b75a7d8f A	480	}
729e4ab9 A	481	// Skip at least one character so we make progress.
	482	UnicodeString segment(text->next32PostInc());
	483	while(text->hasNext()) {
	484	UChar32 c;
	485	if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) {
	486	text->move32(-1, CharacterIterator::kCurrent);
	487	break;
	488	}
	489	segment.append(c);
b75a7d8f	490	}
729e4ab9 A	491	nextIndex=text->getIndex();
	492	UErrorCode errorCode=U_ZERO_ERROR;
	493	fNorm2->normalize(segment, buffer, errorCode);
b75a7d8f A	494	return U_SUCCESS(errorCode) && !buffer.isEmpty();
	495	}
	496
	497	UBool
	498	Normalizer::previousNormalize() {
b75a7d8f A	499	clearBuffer();
b75a7d8f A	500	nextIndex=currentIndex;
729e4ab9 A	501	text->setIndex(currentIndex);
729e4ab9 A	502	if(!text->hasPrevious()) {
b75a7d8f A	503	return FALSE;
b75a7d8f A	504	}
729e4ab9 A	505	UnicodeString segment;
	506	while(text->hasPrevious()) {
	507	UChar32 c=text->previous32();
	508	segment.insert(0, c);
	509	if(fNorm2->hasBoundaryBefore(c)) {
	510	break;
	511	}
b75a7d8f	512	}
729e4ab9 A	513	currentIndex=text->getIndex();
	514	UErrorCode errorCode=U_ZERO_ERROR;
	515	fNorm2->normalize(segment, buffer, errorCode);
b75a7d8f	516	bufferPos=buffer.length();
b75a7d8f A	517	return U_SUCCESS(errorCode) && !buffer.isEmpty();
	518	}
	519
	520	U_NAMESPACE_END
	521
	522	#endif /* #if !UCONFIG_NO_NORMALIZATION */