[apple/icu.git] / icuSources / common / normlzr.cpp

/*
 *************************************************************************
 * COPYRIGHT: 
 * Copyright (c) 1996-2012, International Business Machines Corporation and
 * others. All Rights Reserved.
 *************************************************************************
 */

#include "unicode/utypes.h"

#if !UCONFIG_NO_NORMALIZATION

#include "unicode/uniset.h"
#include "unicode/unistr.h"
#include "unicode/chariter.h"
#include "unicode/schriter.h"
#include "unicode/uchriter.h"
#include "unicode/normlzr.h"
#include "unicode/utf16.h"
#include "cmemory.h"
#include "normalizer2impl.h"
#include "uprops.h"  // for uniset_getUnicode32Instance()

U_NAMESPACE_BEGIN

UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer)

//-------------------------------------------------------------------------
// Constructors and other boilerplate
//-------------------------------------------------------------------------

Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) :
    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
    text(new StringCharacterIterator(str)),
    currentIndex(0), nextIndex(0),
    buffer(), bufferPos(0)
{
    init();
}

Normalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) :
    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
    text(new UCharCharacterIterator(str, length)),
    currentIndex(0), nextIndex(0),
    buffer(), bufferPos(0)
{
    init();
}

Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
    text(iter.clone()),
    currentIndex(0), nextIndex(0),
    buffer(), bufferPos(0)
{
    init();
}

Normalizer::Normalizer(const Normalizer &copy) :
    UObject(copy), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(copy.fUMode), fOptions(copy.fOptions),
    text(copy.text->clone()),
    currentIndex(copy.currentIndex), nextIndex(copy.nextIndex),
    buffer(copy.buffer), bufferPos(copy.bufferPos)
{
    init();
}

void
Normalizer::init() {
    UErrorCode errorCode=U_ZERO_ERROR;
    fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode);
    if(fOptions&UNORM_UNICODE_3_2) {
        delete fFilteredNorm2;
        fNorm2=fFilteredNorm2=
            new FilteredNormalizer2(*fNorm2, *uniset_getUnicode32Instance(errorCode));
    }
    if(U_FAILURE(errorCode)) {
        errorCode=U_ZERO_ERROR;
        fNorm2=Normalizer2Factory::getNoopInstance(errorCode);
    }
}

Normalizer::~Normalizer()
{
    delete fFilteredNorm2;
    delete text;
}

Normalizer* 
Normalizer::clone() const
{
    return new Normalizer(*this);
}

/**
 * Generates a hash code for this iterator.
 */
int32_t Normalizer::hashCode() const
{
    return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
}
    
UBool Normalizer::operator==(const Normalizer& that) const
{
    return
        this==&that ||
        (fUMode==that.fUMode &&
        fOptions==that.fOptions &&
        *text==*that.text &&
        buffer==that.buffer &&
        bufferPos==that.bufferPos &&
        nextIndex==that.nextIndex);
}

//-------------------------------------------------------------------------
// Static utility methods
//-------------------------------------------------------------------------

void U_EXPORT2
Normalizer::normalize(const UnicodeString& source, 
                      UNormalizationMode mode, int32_t options,
                      UnicodeString& result, 
                      UErrorCode &status) {
    if(source.isBogus() || U_FAILURE(status)) {
        result.setToBogus();
        if(U_SUCCESS(status)) {
            status=U_ILLEGAL_ARGUMENT_ERROR;
        }
    } else {
        UnicodeString localDest;
        UnicodeString *dest;

        if(&source!=&result) {
            dest=&result;
        } else {
            // the source and result strings are the same object, use a temporary one
            dest=&localDest;
        }
        const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
        if(U_SUCCESS(status)) {
            if(options&UNORM_UNICODE_3_2) {
                FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
                    normalize(source, *dest, status);
            } else {
                n2->normalize(source, *dest, status);
            }
        }
        if(dest==&localDest && U_SUCCESS(status)) {
            result=*dest;
        }
    }
}

void U_EXPORT2
Normalizer::compose(const UnicodeString& source, 
                    UBool compat, int32_t options,
                    UnicodeString& result, 
                    UErrorCode &status) {
    normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status);
}

void U_EXPORT2
Normalizer::decompose(const UnicodeString& source, 
                      UBool compat, int32_t options,
                      UnicodeString& result, 
                      UErrorCode &status) {
    normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status);
}

UNormalizationCheckResult
Normalizer::quickCheck(const UnicodeString& source,
                       UNormalizationMode mode, int32_t options,
                       UErrorCode &status) {
    const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
    if(U_SUCCESS(status)) {
        if(options&UNORM_UNICODE_3_2) {
            return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
                quickCheck(source, status);
        } else {
            return n2->quickCheck(source, status);
        }
    } else {
        return UNORM_MAYBE;
    }
}

UBool
Normalizer::isNormalized(const UnicodeString& source,
                         UNormalizationMode mode, int32_t options,
                         UErrorCode &status) {
    const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
    if(U_SUCCESS(status)) {
        if(options&UNORM_UNICODE_3_2) {
            return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
                isNormalized(source, status);
        } else {
            return n2->isNormalized(source, status);
        }
    } else {
        return FALSE;
    }
}

UnicodeString & U_EXPORT2
Normalizer::concatenate(const UnicodeString &left, const UnicodeString &right,
                        UnicodeString &result,
                        UNormalizationMode mode, int32_t options,
                        UErrorCode &errorCode) {
    if(left.isBogus() || right.isBogus() || U_FAILURE(errorCode)) {
        result.setToBogus();
        if(U_SUCCESS(errorCode)) {
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
        }
    } else {
        UnicodeString localDest;
        UnicodeString *dest;

        if(&right!=&result) {
            dest=&result;
        } else {
            // the right and result strings are the same object, use a temporary one
            dest=&localDest;
        }
        *dest=left;
        const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode);
        if(U_SUCCESS(errorCode)) {
            if(options&UNORM_UNICODE_3_2) {
                FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(errorCode)).
                    append(*dest, right, errorCode);
            } else {
                n2->append(*dest, right, errorCode);
            }
        }
        if(dest==&localDest && U_SUCCESS(errorCode)) {
            result=*dest;
        }
    }
    return result;
}

//-------------------------------------------------------------------------
// Iteration API
//-------------------------------------------------------------------------

/**
 * Return the current character in the normalized text.
 */
UChar32 Normalizer::current() {
    if(bufferPos<buffer.length() || nextNormalize()) {
        return buffer.char32At(bufferPos);
    } else {
        return DONE;
    }
}

/**
 * Return the next character in the normalized text and advance
 * the iteration position by one.  If the end
 * of the text has already been reached, {@link #DONE} is returned.
 */
UChar32 Normalizer::next() {
    if(bufferPos<buffer.length() ||  nextNormalize()) {
        UChar32 c=buffer.char32At(bufferPos);
        bufferPos+=U16_LENGTH(c);
        return c;
    } else {
        return DONE;
    }
}

/**
 * Return the previous character in the normalized text and decrement
 * the iteration position by one.  If the beginning
 * of the text has already been reached, {@link #DONE} is returned.
 */
UChar32 Normalizer::previous() {
    if(bufferPos>0 || previousNormalize()) {
        UChar32 c=buffer.char32At(bufferPos-1);
        bufferPos-=U16_LENGTH(c);
        return c;
    } else {
        return DONE;
    }
}

void Normalizer::reset() {
    currentIndex=nextIndex=text->setToStart();
    clearBuffer();
}

void
Normalizer::setIndexOnly(int32_t index) {
    text->setIndex(index);  // pins index
    currentIndex=nextIndex=text->getIndex();
    clearBuffer();
}

/**
 * Return the first character in the normalized text.  This resets
 * the <tt>Normalizer's</tt> position to the beginning of the text.
 */
UChar32 Normalizer::first() {
    reset();
    return next();
}

/**
 * Return the last character in the normalized text.  This resets
 * the <tt>Normalizer's</tt> position to be just before the
 * the input text corresponding to that normalized character.
 */
UChar32 Normalizer::last() {
    currentIndex=nextIndex=text->setToEnd();
    clearBuffer();
    return previous();
}

/**
 * Retrieve the current iteration position in the input text that is
 * being normalized.  This method is useful in applications such as
 * searching, where you need to be able to determine the position in
 * the input text that corresponds to a given normalized output character.
 * <p>
 * <b>Note:</b> This method sets the position in the <em>input</em>, while
 * {@link #next} and {@link #previous} iterate through characters in the
 * <em>output</em>.  This means that there is not necessarily a one-to-one
 * correspondence between characters returned by <tt>next</tt> and
 * <tt>previous</tt> and the indices passed to and returned from
 * <tt>setIndex</tt> and {@link #getIndex}.
 *
 */
int32_t Normalizer::getIndex() const {
    if(bufferPos<buffer.length()) {
        return currentIndex;
    } else {
        return nextIndex;
    }
}

/**
 * Retrieve the index of the start of the input text.  This is the begin index
 * of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
 * over which this <tt>Normalizer</tt> is iterating
 */
int32_t Normalizer::startIndex() const {
    return text->startIndex();
}

/**
 * Retrieve the index of the end of the input text.  This is the end index
 * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
 * over which this <tt>Normalizer</tt> is iterating
 */
int32_t Normalizer::endIndex() const {
    return text->endIndex();
}

//-------------------------------------------------------------------------
// Property access methods
//-------------------------------------------------------------------------

void
Normalizer::setMode(UNormalizationMode newMode) 
{
    fUMode = newMode;
    init();
}

UNormalizationMode
Normalizer::getUMode() const
{
    return fUMode;
}

void
Normalizer::setOption(int32_t option, 
                      UBool value) 
{
    if (value) {
        fOptions |= option;
    } else {
        fOptions &= (~option);
    }
    init();
}

UBool
Normalizer::getOption(int32_t option) const
{
    return (fOptions & option) != 0;
}

/**
 * Set the input text over which this <tt>Normalizer</tt> will iterate.
 * The iteration position is set to the beginning of the input text.
 */
void
Normalizer::setText(const UnicodeString& newText, 
                    UErrorCode &status)
{
    if (U_FAILURE(status)) {
        return;
    }
    CharacterIterator *newIter = new StringCharacterIterator(newText);
    if (newIter == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    delete text;
    text = newIter;
    reset();
}

/**
 * Set the input text over which this <tt>Normalizer</tt> will iterate.
 * The iteration position is set to the beginning of the string.
 */
void
Normalizer::setText(const CharacterIterator& newText, 
                    UErrorCode &status) 
{
    if (U_FAILURE(status)) {
        return;
    }
    CharacterIterator *newIter = newText.clone();
    if (newIter == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    delete text;
    text = newIter;
    reset();
}

void
Normalizer::setText(const UChar* newText,
                    int32_t length,
                    UErrorCode &status)
{
    if (U_FAILURE(status)) {
        return;
    }
    CharacterIterator *newIter = new UCharCharacterIterator(newText, length);
    if (newIter == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    delete text;
    text = newIter;
    reset();
}

/**
 * Copies the text under iteration into the UnicodeString referred to by "result".
 * @param result Receives a copy of the text under iteration.
 */
void
Normalizer::getText(UnicodeString&  result) 
{
    text->getText(result);
}

//-------------------------------------------------------------------------
// Private utility methods
//-------------------------------------------------------------------------

void Normalizer::clearBuffer() {
    buffer.remove();
    bufferPos=0;
}

UBool
Normalizer::nextNormalize() {
    clearBuffer();
    currentIndex=nextIndex;
    text->setIndex(nextIndex);
    if(!text->hasNext()) {
        return FALSE;
    }
    // Skip at least one character so we make progress.
    UnicodeString segment(text->next32PostInc());
    while(text->hasNext()) {
        UChar32 c;
        if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) {
            text->move32(-1, CharacterIterator::kCurrent);
            break;
        }
        segment.append(c);
    }
    nextIndex=text->getIndex();
    UErrorCode errorCode=U_ZERO_ERROR;
    fNorm2->normalize(segment, buffer, errorCode);
    return U_SUCCESS(errorCode) && !buffer.isEmpty();
}

UBool
Normalizer::previousNormalize() {
    clearBuffer();
    nextIndex=currentIndex;
    text->setIndex(currentIndex);
    if(!text->hasPrevious()) {
        return FALSE;
    }
    UnicodeString segment;
    while(text->hasPrevious()) {
        UChar32 c=text->previous32();
        segment.insert(0, c);
        if(fNorm2->hasBoundaryBefore(c)) {
            break;
        }
    }
    currentIndex=text->getIndex();
    UErrorCode errorCode=U_ZERO_ERROR;
    fNorm2->normalize(segment, buffer, errorCode);
    bufferPos=buffer.length();
    return U_SUCCESS(errorCode) && !buffer.isEmpty();
}

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_NORMALIZATION */
Commit	Line	Data
b75a7d8f A	1	/*
	2	*************************************************************************
	3	* COPYRIGHT:
51004dcb	4	* Copyright (c) 1996-2012, International Business Machines Corporation and
b75a7d8f A	5	* others. All Rights Reserved.
	6	*************************************************************************
	7	*/
	8
	9	#include "unicode/utypes.h"
	10
	11	#if !UCONFIG_NO_NORMALIZATION
	12
729e4ab9	13	#include "unicode/uniset.h"
b75a7d8f A	14	#include "unicode/unistr.h"
	15	#include "unicode/chariter.h"
	16	#include "unicode/schriter.h"
	17	#include "unicode/uchriter.h"
b75a7d8f	18	#include "unicode/normlzr.h"
4388f060	19	#include "unicode/utf16.h"
b75a7d8f	20	#include "cmemory.h"
729e4ab9 A	21	#include "normalizer2impl.h"
729e4ab9 A	22	#include "uprops.h" // for uniset_getUnicode32Instance()
b75a7d8f A	23
	24	U_NAMESPACE_BEGIN
	25
374ca955	26	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer)
b75a7d8f A	27
	28	//-------------------------------------------------------------------------
	29	// Constructors and other boilerplate
	30	//-------------------------------------------------------------------------
	31
	32	Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) :
729e4ab9 A	33	UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
729e4ab9 A	34	text(new StringCharacterIterator(str)),
b75a7d8f A	35	currentIndex(0), nextIndex(0),
	36	buffer(), bufferPos(0)
	37	{
729e4ab9	38	init();
b75a7d8f A	39	}
	40
	41	Normalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) :
729e4ab9 A	42	UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
729e4ab9 A	43	text(new UCharCharacterIterator(str, length)),
b75a7d8f A	44	currentIndex(0), nextIndex(0),
	45	buffer(), bufferPos(0)
	46	{
729e4ab9	47	init();
b75a7d8f A	48	}
	49
	50	Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
729e4ab9 A	51	UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
729e4ab9 A	52	text(iter.clone()),
b75a7d8f A	53	currentIndex(0), nextIndex(0),
	54	buffer(), bufferPos(0)
	55	{
729e4ab9	56	init();
b75a7d8f A	57	}
	58
	59	Normalizer::Normalizer(const Normalizer &copy) :
729e4ab9 A	60	UObject(copy), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(copy.fUMode), fOptions(copy.fOptions),
729e4ab9 A	61	text(copy.text->clone()),
b75a7d8f A	62	currentIndex(copy.currentIndex), nextIndex(copy.nextIndex),
	63	buffer(copy.buffer), bufferPos(copy.bufferPos)
	64	{
729e4ab9	65	init();
b75a7d8f A	66	}
b75a7d8f A	67
b75a7d8f	68	void
729e4ab9	69	Normalizer::init() {
b75a7d8f	70	UErrorCode errorCode=U_ZERO_ERROR;
729e4ab9 A	71	fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode);
	72	if(fOptions&UNORM_UNICODE_3_2) {
	73	delete fFilteredNorm2;
	74	fNorm2=fFilteredNorm2=
	75	new FilteredNormalizer2(fNorm2, uniset_getUnicode32Instance(errorCode));
	76	}
	77	if(U_FAILURE(errorCode)) {
	78	errorCode=U_ZERO_ERROR;
	79	fNorm2=Normalizer2Factory::getNoopInstance(errorCode);
b75a7d8f A	80	}
	81	}
	82
	83	Normalizer::~Normalizer()
	84	{
729e4ab9 A	85	delete fFilteredNorm2;
729e4ab9 A	86	delete text;
b75a7d8f A	87	}
	88
	89	Normalizer*
	90	Normalizer::clone() const
	91	{
729e4ab9	92	return new Normalizer(*this);
b75a7d8f A	93	}
	94
	95	/**
	96	* Generates a hash code for this iterator.
	97	*/
	98	int32_t Normalizer::hashCode() const
	99	{
729e4ab9	100	return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
b75a7d8f A	101	}
	102
	103	UBool Normalizer::operator==(const Normalizer& that) const
	104	{
	105	return
	106	this==&that \|\|
729e4ab9	107	(fUMode==that.fUMode &&
b75a7d8f	108	fOptions==that.fOptions &&
729e4ab9	109	text==that.text &&
b75a7d8f A	110	buffer==that.buffer &&
b75a7d8f A	111	bufferPos==that.bufferPos &&
729e4ab9	112	nextIndex==that.nextIndex);
b75a7d8f A	113	}
	114
	115	//-------------------------------------------------------------------------
	116	// Static utility methods
	117	//-------------------------------------------------------------------------
	118
374ca955	119	void U_EXPORT2
b75a7d8f A	120	Normalizer::normalize(const UnicodeString& source,
	121	UNormalizationMode mode, int32_t options,
	122	UnicodeString& result,
	123	UErrorCode &status) {
	124	if(source.isBogus() \|\| U_FAILURE(status)) {
	125	result.setToBogus();
	126	if(U_SUCCESS(status)) {
	127	status=U_ILLEGAL_ARGUMENT_ERROR;
	128	}
	129	} else {
	130	UnicodeString localDest;
	131	UnicodeString *dest;
	132
	133	if(&source!=&result) {
	134	dest=&result;
	135	} else {
	136	// the source and result strings are the same object, use a temporary one
	137	dest=&localDest;
	138	}
729e4ab9 A	139	const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
	140	if(U_SUCCESS(status)) {
	141	if(options&UNORM_UNICODE_3_2) {
	142	FilteredNormalizer2(n2, uniset_getUnicode32Instance(status)).
	143	normalize(source, *dest, status);
	144	} else {
	145	n2->normalize(source, *dest, status);
	146	}
b75a7d8f	147	}
729e4ab9	148	if(dest==&localDest && U_SUCCESS(status)) {
b75a7d8f A	149	result=*dest;
b75a7d8f A	150	}
b75a7d8f A	151	}
	152	}
	153
374ca955	154	void U_EXPORT2
b75a7d8f A	155	Normalizer::compose(const UnicodeString& source,
	156	UBool compat, int32_t options,
	157	UnicodeString& result,
	158	UErrorCode &status) {
729e4ab9	159	normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status);
b75a7d8f A	160	}
b75a7d8f A	161
374ca955	162	void U_EXPORT2
b75a7d8f A	163	Normalizer::decompose(const UnicodeString& source,
	164	UBool compat, int32_t options,
	165	UnicodeString& result,
	166	UErrorCode &status) {
729e4ab9 A	167	normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status);
	168	}
	169
	170	UNormalizationCheckResult
	171	Normalizer::quickCheck(const UnicodeString& source,
	172	UNormalizationMode mode, int32_t options,
	173	UErrorCode &status) {
	174	const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
	175	if(U_SUCCESS(status)) {
	176	if(options&UNORM_UNICODE_3_2) {
	177	return FilteredNormalizer2(n2, uniset_getUnicode32Instance(status)).
	178	quickCheck(source, status);
	179	} else {
	180	return n2->quickCheck(source, status);
b75a7d8f A	181	}
b75a7d8f A	182	} else {
729e4ab9 A	183	return UNORM_MAYBE;
	184	}
	185	}
b75a7d8f	186
729e4ab9 A	187	UBool
	188	Normalizer::isNormalized(const UnicodeString& source,
	189	UNormalizationMode mode, int32_t options,
	190	UErrorCode &status) {
	191	const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
	192	if(U_SUCCESS(status)) {
	193	if(options&UNORM_UNICODE_3_2) {
	194	return FilteredNormalizer2(n2, uniset_getUnicode32Instance(status)).
	195	isNormalized(source, status);
b75a7d8f	196	} else {
729e4ab9	197	return n2->isNormalized(source, status);
b75a7d8f	198	}
729e4ab9 A	199	} else {
729e4ab9 A	200	return FALSE;
b75a7d8f A	201	}
	202	}
	203
374ca955	204	UnicodeString & U_EXPORT2
4388f060	205	Normalizer::concatenate(const UnicodeString &left, const UnicodeString &right,
b75a7d8f A	206	UnicodeString &result,
	207	UNormalizationMode mode, int32_t options,
	208	UErrorCode &errorCode) {
	209	if(left.isBogus() \|\| right.isBogus() \|\| U_FAILURE(errorCode)) {
	210	result.setToBogus();
	211	if(U_SUCCESS(errorCode)) {
	212	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
	213	}
	214	} else {
	215	UnicodeString localDest;
	216	UnicodeString *dest;
	217
729e4ab9	218	if(&right!=&result) {
b75a7d8f A	219	dest=&result;
b75a7d8f A	220	} else {
729e4ab9	221	// the right and result strings are the same object, use a temporary one
b75a7d8f A	222	dest=&localDest;
b75a7d8f A	223	}
729e4ab9 A	224	*dest=left;
	225	const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode);
	226	if(U_SUCCESS(errorCode)) {
	227	if(options&UNORM_UNICODE_3_2) {
	228	FilteredNormalizer2(n2, uniset_getUnicode32Instance(errorCode)).
	229	append(*dest, right, errorCode);
	230	} else {
	231	n2->append(*dest, right, errorCode);
	232	}
b75a7d8f	233	}
729e4ab9	234	if(dest==&localDest && U_SUCCESS(errorCode)) {
b75a7d8f A	235	result=*dest;
b75a7d8f A	236	}
b75a7d8f A	237	}
	238	return result;
	239	}
	240
	241	//-------------------------------------------------------------------------
	242	// Iteration API
	243	//-------------------------------------------------------------------------
	244
	245	/**
	246	* Return the current character in the normalized text.
	247	*/
	248	UChar32 Normalizer::current() {
	249	if(bufferPos<buffer.length() \|\| nextNormalize()) {
	250	return buffer.char32At(bufferPos);
	251	} else {
	252	return DONE;
	253	}
	254	}
	255
	256	/**
	257	* Return the next character in the normalized text and advance
	258	* the iteration position by one. If the end
	259	* of the text has already been reached, {@link #DONE} is returned.
	260	*/
	261	UChar32 Normalizer::next() {
	262	if(bufferPos<buffer.length() \|\| nextNormalize()) {
	263	UChar32 c=buffer.char32At(bufferPos);
4388f060	264	bufferPos+=U16_LENGTH(c);
b75a7d8f A	265	return c;
	266	} else {
	267	return DONE;
	268	}
	269	}
	270
	271	/**
	272	* Return the previous character in the normalized text and decrement
	273	* the iteration position by one. If the beginning
	274	* of the text has already been reached, {@link #DONE} is returned.
	275	*/
	276	UChar32 Normalizer::previous() {
	277	if(bufferPos>0 \|\| previousNormalize()) {
	278	UChar32 c=buffer.char32At(bufferPos-1);
4388f060	279	bufferPos-=U16_LENGTH(c);
b75a7d8f A	280	return c;
	281	} else {
	282	return DONE;
	283	}
	284	}
	285
	286	void Normalizer::reset() {
729e4ab9	287	currentIndex=nextIndex=text->setToStart();
b75a7d8f A	288	clearBuffer();
	289	}
	290
	291	void
	292	Normalizer::setIndexOnly(int32_t index) {
729e4ab9 A	293	text->setIndex(index); // pins index
729e4ab9 A	294	currentIndex=nextIndex=text->getIndex();
b75a7d8f A	295	clearBuffer();
	296	}
	297
	298	/**
729e4ab9 A	299	* Return the first character in the normalized text. This resets
729e4ab9 A	300	* the <tt>Normalizer's</tt> position to the beginning of the text.
b75a7d8f A	301	*/
	302	UChar32 Normalizer::first() {
	303	reset();
	304	return next();
	305	}
	306
	307	/**
729e4ab9	308	* Return the last character in the normalized text. This resets
b75a7d8f A	309	* the <tt>Normalizer's</tt> position to be just before the
	310	* the input text corresponding to that normalized character.
	311	*/
	312	UChar32 Normalizer::last() {
729e4ab9	313	currentIndex=nextIndex=text->setToEnd();
b75a7d8f A	314	clearBuffer();
	315	return previous();
	316	}
	317
	318	/**
	319	* Retrieve the current iteration position in the input text that is
	320	* being normalized. This method is useful in applications such as
	321	* searching, where you need to be able to determine the position in
	322	* the input text that corresponds to a given normalized output character.
	323	* <p>
	324	* <b>Note:</b> This method sets the position in the <em>input</em>, while
	325	* {@link #next} and {@link #previous} iterate through characters in the
	326	* <em>output</em>. This means that there is not necessarily a one-to-one
	327	* correspondence between characters returned by <tt>next</tt> and
	328	* <tt>previous</tt> and the indices passed to and returned from
	329	* <tt>setIndex</tt> and {@link #getIndex}.
	330	*
	331	*/
	332	int32_t Normalizer::getIndex() const {
	333	if(bufferPos<buffer.length()) {
	334	return currentIndex;
	335	} else {
	336	return nextIndex;
	337	}
	338	}
	339
	340	/**
729e4ab9	341	* Retrieve the index of the start of the input text. This is the begin index
b75a7d8f A	342	* of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
	343	* over which this <tt>Normalizer</tt> is iterating
	344	*/
	345	int32_t Normalizer::startIndex() const {
729e4ab9	346	return text->startIndex();
b75a7d8f A	347	}
	348
	349	/**
729e4ab9	350	* Retrieve the index of the end of the input text. This is the end index
b75a7d8f A	351	* of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
	352	* over which this <tt>Normalizer</tt> is iterating
	353	*/
	354	int32_t Normalizer::endIndex() const {
729e4ab9	355	return text->endIndex();
b75a7d8f A	356	}
	357
	358	//-------------------------------------------------------------------------
	359	// Property access methods
	360	//-------------------------------------------------------------------------
	361
	362	void
	363	Normalizer::setMode(UNormalizationMode newMode)
	364	{
	365	fUMode = newMode;
729e4ab9	366	init();
b75a7d8f A	367	}
	368
	369	UNormalizationMode
	370	Normalizer::getUMode() const
	371	{
	372	return fUMode;
	373	}
	374
	375	void
	376	Normalizer::setOption(int32_t option,
	377	UBool value)
	378	{
	379	if (value) {
	380	fOptions \|= option;
	381	} else {
	382	fOptions &= (~option);
	383	}
729e4ab9	384	init();
b75a7d8f A	385	}
	386
	387	UBool
	388	Normalizer::getOption(int32_t option) const
	389	{
	390	return (fOptions & option) != 0;
	391	}
	392
	393	/**
	394	* Set the input text over which this <tt>Normalizer</tt> will iterate.
729e4ab9	395	* The iteration position is set to the beginning of the input text.
b75a7d8f A	396	*/
	397	void
	398	Normalizer::setText(const UnicodeString& newText,
	399	UErrorCode &status)
	400	{
	401	if (U_FAILURE(status)) {
	402	return;
	403	}
	404	CharacterIterator *newIter = new StringCharacterIterator(newText);
	405	if (newIter == NULL) {
	406	status = U_MEMORY_ALLOCATION_ERROR;
	407	return;
	408	}
729e4ab9 A	409	delete text;
729e4ab9 A	410	text = newIter;
b75a7d8f A	411	reset();
	412	}
	413
	414	/**
	415	* Set the input text over which this <tt>Normalizer</tt> will iterate.
	416	* The iteration position is set to the beginning of the string.
	417	*/
	418	void
	419	Normalizer::setText(const CharacterIterator& newText,
	420	UErrorCode &status)
	421	{
	422	if (U_FAILURE(status)) {
	423	return;
	424	}
	425	CharacterIterator *newIter = newText.clone();
	426	if (newIter == NULL) {
	427	status = U_MEMORY_ALLOCATION_ERROR;
	428	return;
	429	}
729e4ab9 A	430	delete text;
729e4ab9 A	431	text = newIter;
b75a7d8f A	432	reset();
	433	}
	434
	435	void
	436	Normalizer::setText(const UChar* newText,
	437	int32_t length,
	438	UErrorCode &status)
	439	{
	440	if (U_FAILURE(status)) {
	441	return;
	442	}
	443	CharacterIterator *newIter = new UCharCharacterIterator(newText, length);
	444	if (newIter == NULL) {
	445	status = U_MEMORY_ALLOCATION_ERROR;
	446	return;
	447	}
729e4ab9 A	448	delete text;
729e4ab9 A	449	text = newIter;
b75a7d8f A	450	reset();
	451	}
	452
	453	/**
	454	* Copies the text under iteration into the UnicodeString referred to by "result".
	455	* @param result Receives a copy of the text under iteration.
	456	*/
	457	void
	458	Normalizer::getText(UnicodeString& result)
	459	{
729e4ab9	460	text->getText(result);
b75a7d8f A	461	}
	462
	463	//-------------------------------------------------------------------------
	464	// Private utility methods
	465	//-------------------------------------------------------------------------
	466
	467	void Normalizer::clearBuffer() {
	468	buffer.remove();
	469	bufferPos=0;
	470	}
	471
	472	UBool
	473	Normalizer::nextNormalize() {
b75a7d8f A	474	clearBuffer();
b75a7d8f A	475	currentIndex=nextIndex;
729e4ab9 A	476	text->setIndex(nextIndex);
729e4ab9 A	477	if(!text->hasNext()) {
b75a7d8f A	478	return FALSE;
b75a7d8f A	479	}
729e4ab9 A	480	// Skip at least one character so we make progress.
	481	UnicodeString segment(text->next32PostInc());
	482	while(text->hasNext()) {
	483	UChar32 c;
	484	if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) {
	485	text->move32(-1, CharacterIterator::kCurrent);
	486	break;
	487	}
	488	segment.append(c);
b75a7d8f	489	}
729e4ab9 A	490	nextIndex=text->getIndex();
	491	UErrorCode errorCode=U_ZERO_ERROR;
	492	fNorm2->normalize(segment, buffer, errorCode);
b75a7d8f A	493	return U_SUCCESS(errorCode) && !buffer.isEmpty();
	494	}
	495
	496	UBool
	497	Normalizer::previousNormalize() {
b75a7d8f A	498	clearBuffer();
b75a7d8f A	499	nextIndex=currentIndex;
729e4ab9 A	500	text->setIndex(currentIndex);
729e4ab9 A	501	if(!text->hasPrevious()) {
b75a7d8f A	502	return FALSE;
b75a7d8f A	503	}
729e4ab9 A	504	UnicodeString segment;
	505	while(text->hasPrevious()) {
	506	UChar32 c=text->previous32();
	507	segment.insert(0, c);
	508	if(fNorm2->hasBoundaryBefore(c)) {
	509	break;
	510	}
b75a7d8f	511	}
729e4ab9 A	512	currentIndex=text->getIndex();
	513	UErrorCode errorCode=U_ZERO_ERROR;
	514	fNorm2->normalize(segment, buffer, errorCode);
b75a7d8f	515	bufferPos=buffer.length();
b75a7d8f A	516	return U_SUCCESS(errorCode) && !buffer.isEmpty();
	517	}
	518
	519	U_NAMESPACE_END
	520
	521	#endif /* #if !UCONFIG_NO_NORMALIZATION */