[apple/icu.git] / icuSources / common / normlzr.cpp

/*
 *************************************************************************
 * COPYRIGHT: 
 * Copyright (c) 1996-2004, International Business Machines Corporation and
 * others. All Rights Reserved.
 *************************************************************************
 */

#include "unicode/utypes.h"

#if !UCONFIG_NO_NORMALIZATION

#include "unicode/unistr.h"
#include "unicode/chariter.h"
#include "unicode/schriter.h"
#include "unicode/uchriter.h"
#include "unicode/uiter.h"
#include "unicode/normlzr.h"
#include "cmemory.h"
#include "unormimp.h"

U_NAMESPACE_BEGIN

UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer)

//-------------------------------------------------------------------------
// Constructors and other boilerplate
//-------------------------------------------------------------------------

Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) :
    UObject(), fUMode(mode), fOptions(0),
    currentIndex(0), nextIndex(0),
    buffer(), bufferPos(0)
{
    init(new StringCharacterIterator(str));
}

Normalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) :
    UObject(), fUMode(mode), fOptions(0),
    currentIndex(0), nextIndex(0),
    buffer(), bufferPos(0)
{
    init(new UCharCharacterIterator(str, length));
}

Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
    UObject(), fUMode(mode), fOptions(0),
    currentIndex(0), nextIndex(0),
    buffer(), bufferPos(0)
{
    init(iter.clone());
}

Normalizer::Normalizer(const Normalizer &copy) :
    UObject(copy), fUMode(copy.fUMode), fOptions(copy.fOptions),
    currentIndex(copy.currentIndex), nextIndex(copy.nextIndex),
    buffer(copy.buffer), bufferPos(copy.bufferPos)
{
    init(((CharacterIterator *)(copy.text->context))->clone());
}

static const UChar _NUL=0;

void
Normalizer::init(CharacterIterator *iter) {
    UErrorCode errorCode=U_ZERO_ERROR;

    text=(UCharIterator *)uprv_malloc(sizeof(UCharIterator));
    if(text!=NULL) {
        if(unorm_haveData(&errorCode)) {
            uiter_setCharacterIterator(text, iter);
        } else {
            delete iter;
            uiter_setCharacterIterator(text, new UCharCharacterIterator(&_NUL, 0));
        }
    } else {
        delete iter;
    }
}

Normalizer::~Normalizer()
{
    if(text!=NULL) {
        delete (CharacterIterator *)text->context;
        uprv_free(text);
    }
}

Normalizer* 
Normalizer::clone() const
{
    if(this!=0) {
        return new Normalizer(*this);
    } else {
        return 0;
    }
}

/**
 * Generates a hash code for this iterator.
 */
int32_t Normalizer::hashCode() const
{
    return ((CharacterIterator *)(text->context))->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
}
    
UBool Normalizer::operator==(const Normalizer& that) const
{
    return
        this==&that ||
        fUMode==that.fUMode &&
        fOptions==that.fOptions &&
        *((CharacterIterator *)(text->context))==*((CharacterIterator *)(that.text->context)) &&
        buffer==that.buffer &&
        bufferPos==that.bufferPos &&
        nextIndex==that.nextIndex;
}

//-------------------------------------------------------------------------
// Static utility methods
//-------------------------------------------------------------------------

void U_EXPORT2
Normalizer::normalize(const UnicodeString& source, 
                      UNormalizationMode mode, int32_t options,
                      UnicodeString& result, 
                      UErrorCode &status) {
    if(source.isBogus() || U_FAILURE(status)) {
        result.setToBogus();
        if(U_SUCCESS(status)) {
            status=U_ILLEGAL_ARGUMENT_ERROR;
        }
    } else {
        UnicodeString localDest;
        UnicodeString *dest;

        if(&source!=&result) {
            dest=&result;
        } else {
            // the source and result strings are the same object, use a temporary one
            dest=&localDest;
        }

        UChar *buffer=dest->getBuffer(source.length());
        int32_t length=unorm_internalNormalize(buffer, dest->getCapacity(),
                                               source.getBuffer(), source.length(),
                                               mode, options,
                                               &status);
        dest->releaseBuffer(length);
        if(status==U_BUFFER_OVERFLOW_ERROR) {
            status=U_ZERO_ERROR;
            buffer=dest->getBuffer(length);
            length=unorm_internalNormalize(buffer, dest->getCapacity(),
                                           source.getBuffer(), source.length(),
                                           mode, options,
                                           &status);
            dest->releaseBuffer(length);
        }

        if(dest==&localDest) {
            result=*dest;
        }
        if(U_FAILURE(status)) {
            result.setToBogus();
        }
    }
}

void U_EXPORT2
Normalizer::compose(const UnicodeString& source, 
                    UBool compat, int32_t options,
                    UnicodeString& result, 
                    UErrorCode &status) {
    if(source.isBogus() || U_FAILURE(status)) {
        result.setToBogus();
        if(U_SUCCESS(status)) {
            status=U_ILLEGAL_ARGUMENT_ERROR;
        }
    } else {
        UnicodeString localDest;
        UnicodeString *dest;

        if(&source!=&result) {
            dest=&result;
        } else {
            // the source and result strings are the same object, use a temporary one
            dest=&localDest;
        }

        UChar *buffer=dest->getBuffer(source.length());
        int32_t length=unorm_compose(buffer, dest->getCapacity(),
                                     source.getBuffer(), source.length(),
                                     compat, options,
                                     &status);
        dest->releaseBuffer(length);
        if(status==U_BUFFER_OVERFLOW_ERROR) {
            status=U_ZERO_ERROR;
            buffer=dest->getBuffer(length);
            length=unorm_compose(buffer, dest->getCapacity(),
                                 source.getBuffer(), source.length(),
                                 compat, options,
                                 &status);
            dest->releaseBuffer(length);
        }

        if(dest==&localDest) {
            result=*dest;
        }
        if(U_FAILURE(status)) {
            result.setToBogus();
        }
    }
}

void U_EXPORT2
Normalizer::decompose(const UnicodeString& source, 
                      UBool compat, int32_t options,
                      UnicodeString& result, 
                      UErrorCode &status) {
    if(source.isBogus() || U_FAILURE(status)) {
        result.setToBogus();
        if(U_SUCCESS(status)) {
            status=U_ILLEGAL_ARGUMENT_ERROR;
        }
    } else {
        UnicodeString localDest;
        UnicodeString *dest;

        if(&source!=&result) {
            dest=&result;
        } else {
            // the source and result strings are the same object, use a temporary one
            dest=&localDest;
        }

        UChar *buffer=dest->getBuffer(source.length());
        int32_t length=unorm_decompose(buffer, dest->getCapacity(),
                                     source.getBuffer(), source.length(),
                                     compat, options,
                                     &status);
        dest->releaseBuffer(length);
        if(status==U_BUFFER_OVERFLOW_ERROR) {
            status=U_ZERO_ERROR;
            buffer=dest->getBuffer(length);
            length=unorm_decompose(buffer, dest->getCapacity(),
                                   source.getBuffer(), source.length(),
                                   compat, options,
                                   &status);
            dest->releaseBuffer(length);
        }

        if(dest==&localDest) {
            result=*dest;
        }
        if(U_FAILURE(status)) {
            result.setToBogus();
        }
    }
}

UnicodeString & U_EXPORT2
Normalizer::concatenate(UnicodeString &left, UnicodeString &right,
                        UnicodeString &result,
                        UNormalizationMode mode, int32_t options,
                        UErrorCode &errorCode) {
    if(left.isBogus() || right.isBogus() || U_FAILURE(errorCode)) {
        result.setToBogus();
        if(U_SUCCESS(errorCode)) {
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
        }
    } else {
        UnicodeString localDest;
        UnicodeString *dest;

        if(&left!=&result && &right!=&result) {
            dest=&result;
        } else {
            // the source and result strings are the same object, use a temporary one
            dest=&localDest;
        }

        UChar *buffer=dest->getBuffer(left.length()+right.length());
        int32_t length=unorm_concatenate(left.getBuffer(), left.length(),
                                         right.getBuffer(), right.length(),
                                         buffer, dest->getCapacity(),
                                         mode, options,
                                         &errorCode);
        dest->releaseBuffer(length);
        if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
            errorCode=U_ZERO_ERROR;
            buffer=dest->getBuffer(length);
            int32_t length=unorm_concatenate(left.getBuffer(), left.length(),
                                             right.getBuffer(), right.length(),
                                             buffer, dest->getCapacity(),
                                             mode, options,
                                             &errorCode);
            dest->releaseBuffer(length);
        }

        if(dest==&localDest) {
            result=*dest;
        }
        if(U_FAILURE(errorCode)) {
            result.setToBogus();
        }
    }
    return result;
}

//-------------------------------------------------------------------------
// Iteration API
//-------------------------------------------------------------------------

/**
 * Return the current character in the normalized text.
 */
UChar32 Normalizer::current() {
    if(bufferPos<buffer.length() || nextNormalize()) {
        return buffer.char32At(bufferPos);
    } else {
        return DONE;
    }
}

/**
 * Return the next character in the normalized text and advance
 * the iteration position by one.  If the end
 * of the text has already been reached, {@link #DONE} is returned.
 */
UChar32 Normalizer::next() {
    if(bufferPos<buffer.length() ||  nextNormalize()) {
        UChar32 c=buffer.char32At(bufferPos);
        bufferPos+=UTF_CHAR_LENGTH(c);
        return c;
    } else {
        return DONE;
    }
}

/**
 * Return the previous character in the normalized text and decrement
 * the iteration position by one.  If the beginning
 * of the text has already been reached, {@link #DONE} is returned.
 */
UChar32 Normalizer::previous() {
    if(bufferPos>0 || previousNormalize()) {
        UChar32 c=buffer.char32At(bufferPos-1);
        bufferPos-=UTF_CHAR_LENGTH(c);
        return c;
    } else {
        return DONE;
    }
}

void Normalizer::reset() {
    currentIndex=nextIndex=text->move(text, 0, UITER_START);
    clearBuffer();
}

void
Normalizer::setIndexOnly(int32_t index) {
    currentIndex=nextIndex=text->move(text, index, UITER_ZERO); // validates index
    clearBuffer();
}

/**
 * Return the first character in the normalized text->  This resets
 * the <tt>Normalizer's</tt> position to the beginning of the text->
 */
UChar32 Normalizer::first() {
    reset();
    return next();
}

/**
 * Return the last character in the normalized text->  This resets
 * the <tt>Normalizer's</tt> position to be just before the
 * the input text corresponding to that normalized character.
 */
UChar32 Normalizer::last() {
    currentIndex=nextIndex=text->move(text, 0, UITER_LIMIT);
    clearBuffer();
    return previous();
}

/**
 * Retrieve the current iteration position in the input text that is
 * being normalized.  This method is useful in applications such as
 * searching, where you need to be able to determine the position in
 * the input text that corresponds to a given normalized output character.
 * <p>
 * <b>Note:</b> This method sets the position in the <em>input</em>, while
 * {@link #next} and {@link #previous} iterate through characters in the
 * <em>output</em>.  This means that there is not necessarily a one-to-one
 * correspondence between characters returned by <tt>next</tt> and
 * <tt>previous</tt> and the indices passed to and returned from
 * <tt>setIndex</tt> and {@link #getIndex}.
 *
 */
int32_t Normalizer::getIndex() const {
    if(bufferPos<buffer.length()) {
        return currentIndex;
    } else {
        return nextIndex;
    }
}

/**
 * Retrieve the index of the start of the input text->  This is the begin index
 * of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
 * over which this <tt>Normalizer</tt> is iterating
 */
int32_t Normalizer::startIndex() const {
    return text->getIndex(text, UITER_START);
}

/**
 * Retrieve the index of the end of the input text->  This is the end index
 * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
 * over which this <tt>Normalizer</tt> is iterating
 */
int32_t Normalizer::endIndex() const {
    return text->getIndex(text, UITER_LIMIT);
}

//-------------------------------------------------------------------------
// Property access methods
//-------------------------------------------------------------------------

void
Normalizer::setMode(UNormalizationMode newMode) 
{
    fUMode = newMode;
}

UNormalizationMode
Normalizer::getUMode() const
{
    return fUMode;
}

void
Normalizer::setOption(int32_t option, 
                      UBool value) 
{
    if (value) {
        fOptions |= option;
    } else {
        fOptions &= (~option);
    }
}

UBool
Normalizer::getOption(int32_t option) const
{
    return (fOptions & option) != 0;
}

/**
 * Set the input text over which this <tt>Normalizer</tt> will iterate.
 * The iteration position is set to the beginning of the input text->
 */
void
Normalizer::setText(const UnicodeString& newText, 
                    UErrorCode &status)
{
    if (U_FAILURE(status)) {
        return;
    }
    CharacterIterator *newIter = new StringCharacterIterator(newText);
    if (newIter == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    delete (CharacterIterator *)(text->context);
    text->context = newIter;
    reset();
}

/**
 * Set the input text over which this <tt>Normalizer</tt> will iterate.
 * The iteration position is set to the beginning of the string.
 */
void
Normalizer::setText(const CharacterIterator& newText, 
                    UErrorCode &status) 
{
    if (U_FAILURE(status)) {
        return;
    }
    CharacterIterator *newIter = newText.clone();
    if (newIter == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    delete (CharacterIterator *)(text->context);
    text->context = newIter;
    reset();
}

void
Normalizer::setText(const UChar* newText,
                    int32_t length,
                    UErrorCode &status)
{
    if (U_FAILURE(status)) {
        return;
    }
    CharacterIterator *newIter = new UCharCharacterIterator(newText, length);
    if (newIter == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    delete (CharacterIterator *)(text->context);
    text->context = newIter;
    reset();
}

/**
 * Copies the text under iteration into the UnicodeString referred to by "result".
 * @param result Receives a copy of the text under iteration.
 */
void
Normalizer::getText(UnicodeString&  result) 
{
    ((CharacterIterator *)(text->context))->getText(result);
}

//-------------------------------------------------------------------------
// Private utility methods
//-------------------------------------------------------------------------

void Normalizer::clearBuffer() {
    buffer.remove();
    bufferPos=0;
}

UBool
Normalizer::nextNormalize() {
    UChar *p;
    int32_t length;
    UErrorCode errorCode;

    clearBuffer();
    currentIndex=nextIndex;
    text->move(text, nextIndex, UITER_ZERO);
    if(!text->hasNext(text)) {
        return FALSE;
    }

    errorCode=U_ZERO_ERROR;
    p=buffer.getBuffer(-1);
    length=unorm_next(text, p, buffer.getCapacity(),
                      fUMode, fOptions,
                      TRUE, 0,
                      &errorCode);
    buffer.releaseBuffer(length);
    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
        errorCode=U_ZERO_ERROR;
        text->move(text, nextIndex, UITER_ZERO);
        p=buffer.getBuffer(length);
        length=unorm_next(text, p, buffer.getCapacity(),
                          fUMode, fOptions,
                          TRUE, 0,
                          &errorCode);
        buffer.releaseBuffer(length);
    }

    nextIndex=text->getIndex(text, UITER_CURRENT);
    return U_SUCCESS(errorCode) && !buffer.isEmpty();
}

UBool
Normalizer::previousNormalize() {
    UChar *p;
    int32_t length;
    UErrorCode errorCode;

    clearBuffer();
    nextIndex=currentIndex;
    text->move(text, currentIndex, UITER_ZERO);
    if(!text->hasPrevious(text)) {
        return FALSE;
    }

    errorCode=U_ZERO_ERROR;
    p=buffer.getBuffer(-1);
    length=unorm_previous(text, p, buffer.getCapacity(),
                          fUMode, fOptions,
                          TRUE, 0,
                          &errorCode);
    buffer.releaseBuffer(length);
    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
        errorCode=U_ZERO_ERROR;
        text->move(text, currentIndex, UITER_ZERO);
        p=buffer.getBuffer(length);
        length=unorm_previous(text, p, buffer.getCapacity(),
                              fUMode, fOptions,
                              TRUE, 0,
                              &errorCode);
        buffer.releaseBuffer(length);
    }

    bufferPos=buffer.length();
    currentIndex=text->getIndex(text, UITER_CURRENT);
    return U_SUCCESS(errorCode) && !buffer.isEmpty();
}

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_NORMALIZATION */
Commit	Line	Data
b75a7d8f A	1	/*
	2	*************************************************************************
	3	* COPYRIGHT:
374ca955	4	* Copyright (c) 1996-2004, International Business Machines Corporation and
b75a7d8f A	5	* others. All Rights Reserved.
	6	*************************************************************************
	7	*/
	8
	9	#include "unicode/utypes.h"
	10
	11	#if !UCONFIG_NO_NORMALIZATION
	12
	13	#include "unicode/unistr.h"
	14	#include "unicode/chariter.h"
	15	#include "unicode/schriter.h"
	16	#include "unicode/uchriter.h"
	17	#include "unicode/uiter.h"
	18	#include "unicode/normlzr.h"
	19	#include "cmemory.h"
	20	#include "unormimp.h"
	21
	22	U_NAMESPACE_BEGIN
	23
374ca955	24	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer)
b75a7d8f A	25
	26	//-------------------------------------------------------------------------
	27	// Constructors and other boilerplate
	28	//-------------------------------------------------------------------------
	29
	30	Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) :
	31	UObject(), fUMode(mode), fOptions(0),
	32	currentIndex(0), nextIndex(0),
	33	buffer(), bufferPos(0)
	34	{
	35	init(new StringCharacterIterator(str));
	36	}
	37
	38	Normalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) :
	39	UObject(), fUMode(mode), fOptions(0),
	40	currentIndex(0), nextIndex(0),
	41	buffer(), bufferPos(0)
	42	{
	43	init(new UCharCharacterIterator(str, length));
	44	}
	45
	46	Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
	47	UObject(), fUMode(mode), fOptions(0),
	48	currentIndex(0), nextIndex(0),
	49	buffer(), bufferPos(0)
	50	{
	51	init(iter.clone());
	52	}
	53
	54	Normalizer::Normalizer(const Normalizer &copy) :
	55	UObject(copy), fUMode(copy.fUMode), fOptions(copy.fOptions),
	56	currentIndex(copy.currentIndex), nextIndex(copy.nextIndex),
	57	buffer(copy.buffer), bufferPos(copy.bufferPos)
	58	{
	59	init(((CharacterIterator *)(copy.text->context))->clone());
	60	}
	61
	62	static const UChar _NUL=0;
	63
	64	void
	65	Normalizer::init(CharacterIterator *iter) {
	66	UErrorCode errorCode=U_ZERO_ERROR;
	67
	68	text=(UCharIterator *)uprv_malloc(sizeof(UCharIterator));
	69	if(text!=NULL) {
	70	if(unorm_haveData(&errorCode)) {
	71	uiter_setCharacterIterator(text, iter);
	72	} else {
	73	delete iter;
	74	uiter_setCharacterIterator(text, new UCharCharacterIterator(&_NUL, 0));
	75	}
	76	} else {
	77	delete iter;
	78	}
	79	}
	80
	81	Normalizer::~Normalizer()
	82	{
	83	if(text!=NULL) {
	84	delete (CharacterIterator *)text->context;
	85	uprv_free(text);
	86	}
	87	}
	88
89	Normalizer*
90	Normalizer::clone() const
91	{
92	if(this!=0) {
93	return new Normalizer(*this);
94	} else {
95	return 0;
96	}
97	}
98
99	/**
100	* Generates a hash code for this iterator.
101	*/
102	int32_t Normalizer::hashCode() const
103	{
104	return ((CharacterIterator *)(text->context))->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
105	}
106
107	UBool Normalizer::operator==(const Normalizer& that) const
108	{
109	return
110	this==&that \|\|
111	fUMode==that.fUMode &&
112	fOptions==that.fOptions &&
113	((CharacterIterator )(text->context))==((CharacterIterator )(that.text->context)) &&
114	buffer==that.buffer &&
115	bufferPos==that.bufferPos &&
116	nextIndex==that.nextIndex;
117	}
118
119	//-------------------------------------------------------------------------
120	// Static utility methods
121	//-------------------------------------------------------------------------
122
374ca955	123	void U_EXPORT2
b75a7d8f A	124	Normalizer::normalize(const UnicodeString& source,
	125	UNormalizationMode mode, int32_t options,
	126	UnicodeString& result,
	127	UErrorCode &status) {
	128	if(source.isBogus() \|\| U_FAILURE(status)) {
	129	result.setToBogus();
	130	if(U_SUCCESS(status)) {
	131	status=U_ILLEGAL_ARGUMENT_ERROR;
	132	}
	133	} else {
	134	UnicodeString localDest;
	135	UnicodeString *dest;
	136
	137	if(&source!=&result) {
	138	dest=&result;
	139	} else {
	140	// the source and result strings are the same object, use a temporary one
	141	dest=&localDest;
	142	}
	143
	144	UChar *buffer=dest->getBuffer(source.length());
	145	int32_t length=unorm_internalNormalize(buffer, dest->getCapacity(),
	146	source.getBuffer(), source.length(),
	147	mode, options,
	148	&status);
	149	dest->releaseBuffer(length);
	150	if(status==U_BUFFER_OVERFLOW_ERROR) {
	151	status=U_ZERO_ERROR;
	152	buffer=dest->getBuffer(length);
	153	length=unorm_internalNormalize(buffer, dest->getCapacity(),
	154	source.getBuffer(), source.length(),
	155	mode, options,
	156	&status);
	157	dest->releaseBuffer(length);
	158	}
	159
	160	if(dest==&localDest) {
	161	result=*dest;
	162	}
	163	if(U_FAILURE(status)) {
	164	result.setToBogus();
	165	}
	166	}
	167	}
	168
374ca955	169	void U_EXPORT2
b75a7d8f A	170	Normalizer::compose(const UnicodeString& source,
	171	UBool compat, int32_t options,
	172	UnicodeString& result,
	173	UErrorCode &status) {
	174	if(source.isBogus() \|\| U_FAILURE(status)) {
	175	result.setToBogus();
	176	if(U_SUCCESS(status)) {
	177	status=U_ILLEGAL_ARGUMENT_ERROR;
	178	}
	179	} else {
	180	UnicodeString localDest;
	181	UnicodeString *dest;
	182
	183	if(&source!=&result) {
	184	dest=&result;
	185	} else {
	186	// the source and result strings are the same object, use a temporary one
	187	dest=&localDest;
	188	}
	189
	190	UChar *buffer=dest->getBuffer(source.length());
	191	int32_t length=unorm_compose(buffer, dest->getCapacity(),
	192	source.getBuffer(), source.length(),
	193	compat, options,
	194	&status);
	195	dest->releaseBuffer(length);
	196	if(status==U_BUFFER_OVERFLOW_ERROR) {
	197	status=U_ZERO_ERROR;
	198	buffer=dest->getBuffer(length);
	199	length=unorm_compose(buffer, dest->getCapacity(),
	200	source.getBuffer(), source.length(),
	201	compat, options,
	202	&status);
	203	dest->releaseBuffer(length);
	204	}
	205
	206	if(dest==&localDest) {
	207	result=*dest;
	208	}
	209	if(U_FAILURE(status)) {
	210	result.setToBogus();
	211	}
	212	}
	213	}
	214
374ca955	215	void U_EXPORT2
b75a7d8f A	216	Normalizer::decompose(const UnicodeString& source,
	217	UBool compat, int32_t options,
	218	UnicodeString& result,
	219	UErrorCode &status) {
	220	if(source.isBogus() \|\| U_FAILURE(status)) {
	221	result.setToBogus();
	222	if(U_SUCCESS(status)) {
	223	status=U_ILLEGAL_ARGUMENT_ERROR;
	224	}
	225	} else {
	226	UnicodeString localDest;
	227	UnicodeString *dest;
	228
	229	if(&source!=&result) {
	230	dest=&result;
	231	} else {
	232	// the source and result strings are the same object, use a temporary one
	233	dest=&localDest;
	234	}
	235
	236	UChar *buffer=dest->getBuffer(source.length());
	237	int32_t length=unorm_decompose(buffer, dest->getCapacity(),
	238	source.getBuffer(), source.length(),
	239	compat, options,
	240	&status);
	241	dest->releaseBuffer(length);
	242	if(status==U_BUFFER_OVERFLOW_ERROR) {
	243	status=U_ZERO_ERROR;
	244	buffer=dest->getBuffer(length);
	245	length=unorm_decompose(buffer, dest->getCapacity(),
	246	source.getBuffer(), source.length(),
	247	compat, options,
	248	&status);
	249	dest->releaseBuffer(length);
	250	}
	251
	252	if(dest==&localDest) {
	253	result=*dest;
	254	}
	255	if(U_FAILURE(status)) {
	256	result.setToBogus();
	257	}
	258	}
	259	}
	260
374ca955	261	UnicodeString & U_EXPORT2
b75a7d8f A	262	Normalizer::concatenate(UnicodeString &left, UnicodeString &right,
	263	UnicodeString &result,
	264	UNormalizationMode mode, int32_t options,
	265	UErrorCode &errorCode) {
	266	if(left.isBogus() \|\| right.isBogus() \|\| U_FAILURE(errorCode)) {
	267	result.setToBogus();
	268	if(U_SUCCESS(errorCode)) {
	269	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
	270	}
	271	} else {
	272	UnicodeString localDest;
	273	UnicodeString *dest;
	274
	275	if(&left!=&result && &right!=&result) {
	276	dest=&result;
	277	} else {
	278	// the source and result strings are the same object, use a temporary one
	279	dest=&localDest;
	280	}
	281
	282	UChar *buffer=dest->getBuffer(left.length()+right.length());
	283	int32_t length=unorm_concatenate(left.getBuffer(), left.length(),
	284	right.getBuffer(), right.length(),
	285	buffer, dest->getCapacity(),
	286	mode, options,
	287	&errorCode);
	288	dest->releaseBuffer(length);
	289	if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
	290	errorCode=U_ZERO_ERROR;
	291	buffer=dest->getBuffer(length);
	292	int32_t length=unorm_concatenate(left.getBuffer(), left.length(),
	293	right.getBuffer(), right.length(),
	294	buffer, dest->getCapacity(),
	295	mode, options,
	296	&errorCode);
	297	dest->releaseBuffer(length);
	298	}
	299
	300	if(dest==&localDest) {
	301	result=*dest;
	302	}
	303	if(U_FAILURE(errorCode)) {
	304	result.setToBogus();
	305	}
	306	}
	307	return result;
	308	}
	309
	310	//-------------------------------------------------------------------------
	311	// Iteration API
	312	//-------------------------------------------------------------------------
	313
	314	/**
	315	* Return the current character in the normalized text.
	316	*/
	317	UChar32 Normalizer::current() {
	318	if(bufferPos<buffer.length() \|\| nextNormalize()) {
	319	return buffer.char32At(bufferPos);
	320	} else {
	321	return DONE;
	322	}
	323	}
	324
	325	/**
326	* Return the next character in the normalized text and advance
327	* the iteration position by one. If the end
328	* of the text has already been reached, {@link #DONE} is returned.
329	*/
330	UChar32 Normalizer::next() {
331	if(bufferPos<buffer.length() \|\| nextNormalize()) {
332	UChar32 c=buffer.char32At(bufferPos);
333	bufferPos+=UTF_CHAR_LENGTH(c);
334	return c;
335	} else {
336	return DONE;
337	}
338	}
339
340	/**
341	* Return the previous character in the normalized text and decrement
342	* the iteration position by one. If the beginning
343	* of the text has already been reached, {@link #DONE} is returned.
344	*/
345	UChar32 Normalizer::previous() {
346	if(bufferPos>0 \|\| previousNormalize()) {
347	UChar32 c=buffer.char32At(bufferPos-1);
348	bufferPos-=UTF_CHAR_LENGTH(c);
349	return c;
350	} else {
351	return DONE;
352	}
353	}
354
355	void Normalizer::reset() {
356	currentIndex=nextIndex=text->move(text, 0, UITER_START);
357	clearBuffer();
358	}
359
360	void
361	Normalizer::setIndexOnly(int32_t index) {
362	currentIndex=nextIndex=text->move(text, index, UITER_ZERO); // validates index
363	clearBuffer();
364	}
365
366	/**
367	* Return the first character in the normalized text-> This resets
368	* the <tt>Normalizer's</tt> position to the beginning of the text->
369	*/
370	UChar32 Normalizer::first() {
371	reset();
372	return next();
373	}
374
375	/**
376	* Return the last character in the normalized text-> This resets
377	* the <tt>Normalizer's</tt> position to be just before the
378	* the input text corresponding to that normalized character.
379	*/
380	UChar32 Normalizer::last() {
381	currentIndex=nextIndex=text->move(text, 0, UITER_LIMIT);
382	clearBuffer();
383	return previous();
384	}
385
386	/**
387	* Retrieve the current iteration position in the input text that is
388	* being normalized. This method is useful in applications such as
389	* searching, where you need to be able to determine the position in
390	* the input text that corresponds to a given normalized output character.
391	* <p>
392	* <b>Note:</b> This method sets the position in the <em>input</em>, while
393	* {@link #next} and {@link #previous} iterate through characters in the
394	* <em>output</em>. This means that there is not necessarily a one-to-one
395	* correspondence between characters returned by <tt>next</tt> and
396	* <tt>previous</tt> and the indices passed to and returned from
397	* <tt>setIndex</tt> and {@link #getIndex}.
398	*
399	*/
400	int32_t Normalizer::getIndex() const {
401	if(bufferPos<buffer.length()) {
402	return currentIndex;
403	} else {
404	return nextIndex;
405	}
406	}
407
408	/**
409	* Retrieve the index of the start of the input text-> This is the begin index
410	* of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
411	* over which this <tt>Normalizer</tt> is iterating
412	*/
413	int32_t Normalizer::startIndex() const {
414	return text->getIndex(text, UITER_START);
415	}
416
417	/**
418	* Retrieve the index of the end of the input text-> This is the end index
419	* of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
420	* over which this <tt>Normalizer</tt> is iterating
421	*/
422	int32_t Normalizer::endIndex() const {
423	return text->getIndex(text, UITER_LIMIT);
424	}
425
426	//-------------------------------------------------------------------------
427	// Property access methods
428	//-------------------------------------------------------------------------
429
430	void
431	Normalizer::setMode(UNormalizationMode newMode)
432	{
433	fUMode = newMode;
434	}
435
436	UNormalizationMode
437	Normalizer::getUMode() const
438	{
439	return fUMode;
440	}
441
442	void
443	Normalizer::setOption(int32_t option,
444	UBool value)
445	{
446	if (value) {
447	fOptions \|= option;
448	} else {
449	fOptions &= (~option);
450	}
451	}
452
453	UBool
454	Normalizer::getOption(int32_t option) const
455	{
456	return (fOptions & option) != 0;
457	}
458
459	/**
460	* Set the input text over which this <tt>Normalizer</tt> will iterate.
461	* The iteration position is set to the beginning of the input text->
462	*/
463	void
464	Normalizer::setText(const UnicodeString& newText,
465	UErrorCode &status)
466	{
467	if (U_FAILURE(status)) {
468	return;
469	}
470	CharacterIterator *newIter = new StringCharacterIterator(newText);
471	if (newIter == NULL) {
472	status = U_MEMORY_ALLOCATION_ERROR;
473	return;
474	}
475	delete (CharacterIterator *)(text->context);
476	text->context = newIter;
477	reset();
478	}
479
480	/**
481	* Set the input text over which this <tt>Normalizer</tt> will iterate.
482	* The iteration position is set to the beginning of the string.
483	*/
484	void
485	Normalizer::setText(const CharacterIterator& newText,
486	UErrorCode &status)
487	{
488	if (U_FAILURE(status)) {
489	return;
490	}
491	CharacterIterator *newIter = newText.clone();
492	if (newIter == NULL) {
493	status = U_MEMORY_ALLOCATION_ERROR;
494	return;
495	}
496	delete (CharacterIterator *)(text->context);
497	text->context = newIter;
498	reset();
499	}
500
501	void
502	Normalizer::setText(const UChar* newText,
503	int32_t length,
504	UErrorCode &status)
505	{
506	if (U_FAILURE(status)) {
507	return;
508	}
509	CharacterIterator *newIter = new UCharCharacterIterator(newText, length);
510	if (newIter == NULL) {
511	status = U_MEMORY_ALLOCATION_ERROR;
512	return;
513	}
514	delete (CharacterIterator *)(text->context);
515	text->context = newIter;
516	reset();
517	}
518
519	/**
520	* Copies the text under iteration into the UnicodeString referred to by "result".
521	* @param result Receives a copy of the text under iteration.
522	*/
523	void
524	Normalizer::getText(UnicodeString& result)
525	{
526	((CharacterIterator *)(text->context))->getText(result);
527	}
528
529	//-------------------------------------------------------------------------
530	// Private utility methods
531	//-------------------------------------------------------------------------
532
533	void Normalizer::clearBuffer() {
534	buffer.remove();
535	bufferPos=0;
536	}
537
538	UBool
539	Normalizer::nextNormalize() {
540	UChar *p;
541	int32_t length;
542	UErrorCode errorCode;
543
544	clearBuffer();
545	currentIndex=nextIndex;
546	text->move(text, nextIndex, UITER_ZERO);
547	if(!text->hasNext(text)) {
548	return FALSE;
549	}
550
551	errorCode=U_ZERO_ERROR;
552	p=buffer.getBuffer(-1);
553	length=unorm_next(text, p, buffer.getCapacity(),
554	fUMode, fOptions,
555	TRUE, 0,
556	&errorCode);
557	buffer.releaseBuffer(length);
558	if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
559	errorCode=U_ZERO_ERROR;
560	text->move(text, nextIndex, UITER_ZERO);
561	p=buffer.getBuffer(length);
562	length=unorm_next(text, p, buffer.getCapacity(),
563	fUMode, fOptions,
564	TRUE, 0,
565	&errorCode);
566	buffer.releaseBuffer(length);
567	}
568
569	nextIndex=text->getIndex(text, UITER_CURRENT);
570	return U_SUCCESS(errorCode) && !buffer.isEmpty();
571	}
572
573	UBool
574	Normalizer::previousNormalize() {
575	UChar *p;
576	int32_t length;
577	UErrorCode errorCode;
578
579	clearBuffer();
580	nextIndex=currentIndex;
581	text->move(text, currentIndex, UITER_ZERO);
582	if(!text->hasPrevious(text)) {
583	return FALSE;
584	}
585
586	errorCode=U_ZERO_ERROR;
587	p=buffer.getBuffer(-1);
588	length=unorm_previous(text, p, buffer.getCapacity(),
589	fUMode, fOptions,
590	TRUE, 0,
591	&errorCode);
592	buffer.releaseBuffer(length);
593	if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
594	errorCode=U_ZERO_ERROR;
595	text->move(text, currentIndex, UITER_ZERO);
596	p=buffer.getBuffer(length);
597	length=unorm_previous(text, p, buffer.getCapacity(),
598	fUMode, fOptions,
599	TRUE, 0,
600	&errorCode);
601	buffer.releaseBuffer(length);
602	}
603
604	bufferPos=buffer.length();
605	currentIndex=text->getIndex(text, UITER_CURRENT);
606	return U_SUCCESS(errorCode) && !buffer.isEmpty();
607	}
608
609	U_NAMESPACE_END
610
611	#endif /* #if !UCONFIG_NO_NORMALIZATION */