git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/common/unicode/normlzr.h

... / ...

Commit	Line	Data
	1	// © 2016 and later: Unicode, Inc. and others.
	2	// License & terms of use: http://www.unicode.org/copyright.html
	3	/*
	4	********************************************************************
	5	* COPYRIGHT:
	6	* Copyright (c) 1996-2015, International Business Machines Corporation and
	7	* others. All Rights Reserved.
	8	********************************************************************
	9	*/
	10
	11	#ifndef NORMLZR_H
	12	#define NORMLZR_H
	13
	14	#include "unicode/utypes.h"
	15
	16	#if U_SHOW_CPLUSPLUS_API
	17
	18	/**
	19	* \file
	20	* \brief C++ API: Unicode Normalization
	21	*/
	22
	23	#if !UCONFIG_NO_NORMALIZATION
	24
	25	#include "unicode/chariter.h"
	26	#include "unicode/normalizer2.h"
	27	#include "unicode/unistr.h"
	28	#include "unicode/unorm.h"
	29	#include "unicode/uobject.h"
	30
	31	#if U_SHOW_CPLUSPLUS_API
	32	U_NAMESPACE_BEGIN
	33	/**
	34	* Old Unicode normalization API.
	35	*
	36	* This API has been replaced by the Normalizer2 class and is only available
	37	* for backward compatibility. This class simply delegates to the Normalizer2 class.
	38	* There is one exception: The new API does not provide a replacement for Normalizer::compare().
	39	*
	40	* The Normalizer class supports the standard normalization forms described in
	41	* <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
	42	* Unicode Standard Annex #15: Unicode Normalization Forms</a>.
	43	*
	44	* The Normalizer class consists of two parts:
	45	* - static functions that normalize strings or test if strings are normalized
	46	* - a Normalizer object is an iterator that takes any kind of text and
	47	* provides iteration over its normalized form
	48	*
	49	* The Normalizer class is not suitable for subclassing.
	50	*
	51	* For basic information about normalization forms and details about the C API
	52	* please see the documentation in unorm.h.
	53	*
	54	* The iterator API with the Normalizer constructors and the non-static functions
	55	* use a CharacterIterator as input. It is possible to pass a string which
	56	* is then internally wrapped in a CharacterIterator.
	57	* The input text is not normalized all at once, but incrementally where needed
	58	* (providing efficient random access).
	59	* This allows to pass in a large text but spend only a small amount of time
	60	* normalizing a small part of that text.
	61	* However, if the entire text is normalized, then the iterator will be
	62	* slower than normalizing the entire text at once and iterating over the result.
	63	* A possible use of the Normalizer iterator is also to report an index into the
	64	* original text that is close to where the normalized characters come from.
	65	*
	66	* <em>Important:</em> The iterator API was cleaned up significantly for ICU 2.0.
	67	* The earlier implementation reported the getIndex() inconsistently,
	68	* and previous() could not be used after setIndex(), next(), first(), and current().
	69	*
	70	* Normalizer allows to start normalizing from anywhere in the input text by
	71	* calling setIndexOnly(), first(), or last().
	72	* Without calling any of these, the iterator will start at the beginning of the text.
	73	*
	74	* At any time, next() returns the next normalized code point (UChar32),
	75	* with post-increment semantics (like CharacterIterator::next32PostInc()).
	76	* previous() returns the previous normalized code point (UChar32),
	77	* with pre-decrement semantics (like CharacterIterator::previous32()).
	78	*
	79	* current() returns the current code point
	80	* (respectively the one at the newly set index) without moving
	81	* the getIndex(). Note that if the text at the current position
	82	* needs to be normalized, then these functions will do that.
	83	* (This is why current() is not const.)
	84	* It is more efficient to call setIndexOnly() instead, which does not
	85	* normalize.
	86	*
	87	* getIndex() always refers to the position in the input text where the normalized
	88	* code points are returned from. It does not always change with each returned
	89	* code point.
	90	* The code point that is returned from any of the functions
	91	* corresponds to text at or after getIndex(), according to the
	92	* function's iteration semantics (post-increment or pre-decrement).
	93	*
	94	* next() returns a code point from at or after the getIndex()
	95	* from before the next() call. After the next() call, the getIndex()
	96	* might have moved to where the next code point will be returned from
	97	* (from a next() or current() call).
	98	* This is semantically equivalent to array access with array[index++]
	99	* (post-increment semantics).
	100	*
	101	* previous() returns a code point from at or after the getIndex()
	102	* from after the previous() call.
	103	* This is semantically equivalent to array access with array[--index]
	104	* (pre-decrement semantics).
	105	*
	106	* Internally, the Normalizer iterator normalizes a small piece of text
	107	* starting at the getIndex() and ending at a following "safe" index.
	108	* The normalized results is stored in an internal string buffer, and
	109	* the code points are iterated from there.
	110	* With multiple iteration calls, this is repeated until the next piece
	111	* of text needs to be normalized, and the getIndex() needs to be moved.
	112	*
	113	* The following "safe" index, the internal buffer, and the secondary
	114	* iteration index into that buffer are not exposed on the API.
	115	* This also means that it is currently not practical to return to
	116	* a particular, arbitrary position in the text because one would need to
	117	* know, and be able to set, in addition to the getIndex(), at least also the
	118	* current index into the internal buffer.
	119	* It is currently only possible to observe when getIndex() changes
	120	* (with careful consideration of the iteration semantics),
	121	* at which time the internal index will be 0.
	122	* For example, if getIndex() is different after next() than before it,
	123	* then the internal index is 0 and one can return to this getIndex()
	124	* later with setIndexOnly().
	125	*
	126	* Note: While the setIndex() and getIndex() refer to indices in the
	127	* underlying Unicode input text, the next() and previous() methods
	128	* iterate through characters in the normalized output.
	129	* This means that there is not necessarily a one-to-one correspondence
	130	* between characters returned by next() and previous() and the indices
	131	* passed to and returned from setIndex() and getIndex().
	132	* It is for this reason that Normalizer does not implement the CharacterIterator interface.
	133	*
	134	* @author Laura Werner, Mark Davis, Markus Scherer
	135	* @stable ICU 2.0
	136	*/
	137	class U_COMMON_API Normalizer : public UObject {
	138	public:
	139	#ifndef U_HIDE_DEPRECATED_API
	140	/**
	141	* If DONE is returned from an iteration function that returns a code point,
	142	* then there are no more normalization results available.
	143	* @deprecated ICU 56 Use Normalizer2 instead.
	144	*/
	145	enum {
	146	DONE=0xffff
	147	};
	148
	149	// Constructors
	150
	151	/**
	152	* Creates a new <code>Normalizer</code> object for iterating over the
	153	* normalized form of a given string.
	154	* <p>
	155	* @param str The string to be normalized. The normalization
	156	* will start at the beginning of the string.
	157	*
	158	* @param mode The normalization mode.
	159	* @deprecated ICU 56 Use Normalizer2 instead.
	160	*/
	161	Normalizer(const UnicodeString& str, UNormalizationMode mode);
	162
	163	/**
	164	* Creates a new <code>Normalizer</code> object for iterating over the
	165	* normalized form of a given string.
	166	* <p>
	167	* @param str The string to be normalized. The normalization
	168	* will start at the beginning of the string.
	169	*
	170	* @param length Length of the string, or -1 if NUL-terminated.
	171	* @param mode The normalization mode.
	172	* @deprecated ICU 56 Use Normalizer2 instead.
	173	*/
	174	Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode);
	175
	176	/**
	177	* Creates a new <code>Normalizer</code> object for iterating over the
	178	* normalized form of the given text.
	179	* <p>
	180	* @param iter The input text to be normalized. The normalization
	181	* will start at the beginning of the string.
	182	*
	183	* @param mode The normalization mode.
	184	* @deprecated ICU 56 Use Normalizer2 instead.
	185	*/
	186	Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
	187	#endif /* U_HIDE_DEPRECATED_API */
	188
	189	#ifndef U_FORCE_HIDE_DEPRECATED_API
	190	/**
	191	* Copy constructor.
	192	* @param copy The object to be copied.
	193	* @deprecated ICU 56 Use Normalizer2 instead.
	194	*/
	195	Normalizer(const Normalizer& copy);
	196
	197	/**
	198	* Destructor
	199	* @deprecated ICU 56 Use Normalizer2 instead.
	200	*/
	201	virtual ~Normalizer();
	202	#endif // U_FORCE_HIDE_DEPRECATED_API
	203
	204	//-------------------------------------------------------------------------
	205	// Static utility methods
	206	//-------------------------------------------------------------------------
	207
	208	#ifndef U_HIDE_DEPRECATED_API
	209	/**
	210	* Normalizes a <code>UnicodeString</code> according to the specified normalization mode.
	211	* This is a wrapper for unorm_normalize(), using UnicodeString's.
	212	*
	213	* The <code>options</code> parameter specifies which optional
	214	* <code>Normalizer</code> features are to be enabled for this operation.
	215	*
	216	* @param source the input string to be normalized.
	217	* @param mode the normalization mode
	218	* @param options the optional features to be enabled (0 for no options)
	219	* @param result The normalized string (on output).
	220	* @param status The error code.
	221	* @deprecated ICU 56 Use Normalizer2 instead.
	222	*/
	223	static void U_EXPORT2 normalize(const UnicodeString& source,
	224	UNormalizationMode mode, int32_t options,
	225	UnicodeString& result,
	226	UErrorCode &status);
	227
	228	/**
	229	* Compose a <code>UnicodeString</code>.
	230	* This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC.
	231	* This is a wrapper for unorm_normalize(), using UnicodeString's.
	232	*
	233	* The <code>options</code> parameter specifies which optional
	234	* <code>Normalizer</code> features are to be enabled for this operation.
	235	*
	236	* @param source the string to be composed.
	237	* @param compat Perform compatibility decomposition before composition.
	238	* If this argument is <code>FALSE</code>, only canonical
	239	* decomposition will be performed.
	240	* @param options the optional features to be enabled (0 for no options)
	241	* @param result The composed string (on output).
	242	* @param status The error code.
	243	* @deprecated ICU 56 Use Normalizer2 instead.
	244	*/
	245	static void U_EXPORT2 compose(const UnicodeString& source,
	246	UBool compat, int32_t options,
	247	UnicodeString& result,
	248	UErrorCode &status);
	249
	250	/**
	251	* Static method to decompose a <code>UnicodeString</code>.
	252	* This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD.
	253	* This is a wrapper for unorm_normalize(), using UnicodeString's.
	254	*
	255	* The <code>options</code> parameter specifies which optional
	256	* <code>Normalizer</code> features are to be enabled for this operation.
	257	*
	258	* @param source the string to be decomposed.
	259	* @param compat Perform compatibility decomposition.
	260	* If this argument is <code>FALSE</code>, only canonical
	261	* decomposition will be performed.
	262	* @param options the optional features to be enabled (0 for no options)
	263	* @param result The decomposed string (on output).
	264	* @param status The error code.
	265	* @deprecated ICU 56 Use Normalizer2 instead.
	266	*/
	267	static void U_EXPORT2 decompose(const UnicodeString& source,
	268	UBool compat, int32_t options,
	269	UnicodeString& result,
	270	UErrorCode &status);
	271
	272	/**
	273	* Performing quick check on a string, to quickly determine if the string is
	274	* in a particular normalization format.
	275	* This is a wrapper for unorm_quickCheck(), using a UnicodeString.
	276	*
	277	* Three types of result can be returned UNORM_YES, UNORM_NO or
	278	* UNORM_MAYBE. Result UNORM_YES indicates that the argument
	279	* string is in the desired normalized format, UNORM_NO determines that
	280	* argument string is not in the desired normalized format. A
	281	* UNORM_MAYBE result indicates that a more thorough check is required,
	282	* the user may have to put the string in its normalized form and compare the
	283	* results.
	284	* @param source string for determining if it is in a normalized format
	285	* @param mode normalization format
	286	* @param status A reference to a UErrorCode to receive any errors
	287	* @return UNORM_YES, UNORM_NO or UNORM_MAYBE
	288	*
	289	* @see isNormalized
	290	* @deprecated ICU 56 Use Normalizer2 instead.
	291	*/
	292	static inline UNormalizationCheckResult
	293	quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
	294
	295	/**
	296	* Performing quick check on a string; same as the other version of quickCheck
	297	* but takes an extra options parameter like most normalization functions.
	298	*
	299	* @param source string for determining if it is in a normalized format
	300	* @param mode normalization format
	301	* @param options the optional features to be enabled (0 for no options)
	302	* @param status A reference to a UErrorCode to receive any errors
	303	* @return UNORM_YES, UNORM_NO or UNORM_MAYBE
	304	*
	305	* @see isNormalized
	306	* @deprecated ICU 56 Use Normalizer2 instead.
	307	*/
	308	static UNormalizationCheckResult
	309	quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
	310
	311	/**
	312	* Test if a string is in a given normalization form.
	313	* This is semantically equivalent to source.equals(normalize(source, mode)) .
	314	*
	315	* Unlike unorm_quickCheck(), this function returns a definitive result,
	316	* never a "maybe".
	317	* For NFD, NFKD, and FCD, both functions work exactly the same.
	318	* For NFC and NFKC where quickCheck may return "maybe", this function will
	319	* perform further tests to arrive at a TRUE/FALSE result.
	320	*
	321	* @param src String that is to be tested if it is in a normalization format.
	322	* @param mode Which normalization form to test for.
	323	* @param errorCode ICU error code in/out parameter.
	324	* Must fulfill U_SUCCESS before the function call.
	325	* @return Boolean value indicating whether the source string is in the
	326	* "mode" normalization form.
	327	*
	328	* @see quickCheck
	329	* @deprecated ICU 56 Use Normalizer2 instead.
	330	*/
	331	static inline UBool
	332	isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
	333
	334	/**
	335	* Test if a string is in a given normalization form; same as the other version of isNormalized
	336	* but takes an extra options parameter like most normalization functions.
	337	*
	338	* @param src String that is to be tested if it is in a normalization format.
	339	* @param mode Which normalization form to test for.
	340	* @param options the optional features to be enabled (0 for no options)
	341	* @param errorCode ICU error code in/out parameter.
	342	* Must fulfill U_SUCCESS before the function call.
	343	* @return Boolean value indicating whether the source string is in the
	344	* "mode" normalization form.
	345	*
	346	* @see quickCheck
	347	* @deprecated ICU 56 Use Normalizer2 instead.
	348	*/
	349	static UBool
	350	isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
	351
	352	/**
	353	* Concatenate normalized strings, making sure that the result is normalized as well.
	354	*
	355	* If both the left and the right strings are in
	356	* the normalization form according to "mode/options",
	357	* then the result will be
	358	*
	359	* \code
	360	* dest=normalize(left+right, mode, options)
	361	* \endcode
	362	*
	363	* For details see unorm_concatenate in unorm.h.
	364	*
	365	* @param left Left source string.
	366	* @param right Right source string.
	367	* @param result The output string.
	368	* @param mode The normalization mode.
	369	* @param options A bit set of normalization options.
	370	* @param errorCode ICU error code in/out parameter.
	371	* Must fulfill U_SUCCESS before the function call.
	372	* @return result
	373	*
	374	* @see unorm_concatenate
	375	* @see normalize
	376	* @see unorm_next
	377	* @see unorm_previous
	378	*
	379	* @deprecated ICU 56 Use Normalizer2 instead.
	380	*/
	381	static UnicodeString &
	382	U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right,
	383	UnicodeString &result,
	384	UNormalizationMode mode, int32_t options,
	385	UErrorCode &errorCode);
	386	#endif /* U_HIDE_DEPRECATED_API */
	387
	388	/**
	389	* Compare two strings for canonical equivalence.
	390	* Further options include case-insensitive comparison and
	391	* code point order (as opposed to code unit order).
	392	*
	393	* Canonical equivalence between two strings is defined as their normalized
	394	* forms (NFD or NFC) being identical.
	395	* This function compares strings incrementally instead of normalizing
	396	* (and optionally case-folding) both strings entirely,
	397	* improving performance significantly.
	398	*
	399	* Bulk normalization is only necessary if the strings do not fulfill the FCD
	400	* conditions. Only in this case, and only if the strings are relatively long,
	401	* is memory allocated temporarily.
	402	* For FCD strings and short non-FCD strings there is no memory allocation.
	403	*
	404	* Semantically, this is equivalent to
	405	* strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
	406	* where code point order and foldCase are all optional.
	407	*
	408	* UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
	409	* the case folding must be performed first, then the normalization.
	410	*
	411	* @param s1 First source string.
	412	* @param s2 Second source string.
	413	*
	414	* @param options A bit set of options:
	415	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
	416	* Case-sensitive comparison in code unit order, and the input strings
	417	* are quick-checked for FCD.
	418	*
	419	* - UNORM_INPUT_IS_FCD
	420	* Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
	421	* If not set, the function will quickCheck for FCD
	422	* and normalize if necessary.
	423	*
	424	* - U_COMPARE_CODE_POINT_ORDER
	425	* Set to choose code point order instead of code unit order
	426	* (see u_strCompare for details).
	427	*
	428	* - U_COMPARE_IGNORE_CASE
	429	* Set to compare strings case-insensitively using case folding,
	430	* instead of case-sensitively.
	431	* If set, then the following case folding options are used.
	432	*
	433	* - Options as used with case-insensitive comparisons, currently:
	434	*
	435	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
	436	* (see u_strCaseCompare for details)
	437	*
	438	* - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
	439	*
	440	* @param errorCode ICU error code in/out parameter.
	441	* Must fulfill U_SUCCESS before the function call.
	442	* @return <0 or 0 or >0 as usual for string comparisons
	443	*
	444	* @see unorm_compare
	445	* @see normalize
	446	* @see UNORM_FCD
	447	* @see u_strCompare
	448	* @see u_strCaseCompare
	449	*
	450	* @stable ICU 2.2
	451	*/
	452	static inline int32_t
	453	compare(const UnicodeString &s1, const UnicodeString &s2,
	454	uint32_t options,
	455	UErrorCode &errorCode);
	456
	457	#ifndef U_HIDE_DEPRECATED_API
	458	//-------------------------------------------------------------------------
	459	// Iteration API
	460	//-------------------------------------------------------------------------
	461
	462	/**
	463	* Return the current character in the normalized text.
	464	* current() may need to normalize some text at getIndex().
	465	* The getIndex() is not changed.
	466	*
	467	* @return the current normalized code point
	468	* @deprecated ICU 56 Use Normalizer2 instead.
	469	*/
	470	UChar32 current(void);
	471
	472	/**
	473	* Return the first character in the normalized text.
	474	* This is equivalent to setIndexOnly(startIndex()) followed by next().
	475	* (Post-increment semantics.)
	476	*
	477	* @return the first normalized code point
	478	* @deprecated ICU 56 Use Normalizer2 instead.
	479	*/
	480	UChar32 first(void);
	481
	482	/**
	483	* Return the last character in the normalized text.
	484	* This is equivalent to setIndexOnly(endIndex()) followed by previous().
	485	* (Pre-decrement semantics.)
	486	*
	487	* @return the last normalized code point
	488	* @deprecated ICU 56 Use Normalizer2 instead.
	489	*/
	490	UChar32 last(void);
	491
	492	/**
	493	* Return the next character in the normalized text.
	494	* (Post-increment semantics.)
	495	* If the end of the text has already been reached, DONE is returned.
	496	* The DONE value could be confused with a U+FFFF non-character code point
	497	* in the text. If this is possible, you can test getIndex()<endIndex()
	498	* before calling next(), or (getIndex()<endIndex() \|\| last()!=DONE)
	499	* after calling next(). (Calling last() will change the iterator state!)
	500	*
	501	* The C API unorm_next() is more efficient and does not have this ambiguity.
	502	*
	503	* @return the next normalized code point
	504	* @deprecated ICU 56 Use Normalizer2 instead.
	505	*/
	506	UChar32 next(void);
	507
	508	/**
	509	* Return the previous character in the normalized text and decrement.
	510	* (Pre-decrement semantics.)
	511	* If the beginning of the text has already been reached, DONE is returned.
	512	* The DONE value could be confused with a U+FFFF non-character code point
	513	* in the text. If this is possible, you can test
	514	* (getIndex()>startIndex() \|\| first()!=DONE). (Calling first() will change
	515	* the iterator state!)
	516	*
	517	* The C API unorm_previous() is more efficient and does not have this ambiguity.
	518	*
	519	* @return the previous normalized code point
	520	* @deprecated ICU 56 Use Normalizer2 instead.
	521	*/
	522	UChar32 previous(void);
	523
	524	/**
	525	* Set the iteration position in the input text that is being normalized,
	526	* without any immediate normalization.
	527	* After setIndexOnly(), getIndex() will return the same index that is
	528	* specified here.
	529	*
	530	* @param index the desired index in the input text.
	531	* @deprecated ICU 56 Use Normalizer2 instead.
	532	*/
	533	void setIndexOnly(int32_t index);
	534
	535	/**
	536	* Reset the index to the beginning of the text.
	537	* This is equivalent to setIndexOnly(startIndex)).
	538	* @deprecated ICU 56 Use Normalizer2 instead.
	539	*/
	540	void reset(void);
	541
	542	/**
	543	* Retrieve the current iteration position in the input text that is
	544	* being normalized.
	545	*
	546	* A following call to next() will return a normalized code point from
	547	* the input text at or after this index.
	548	*
	549	* After a call to previous(), getIndex() will point at or before the
	550	* position in the input text where the normalized code point
	551	* was returned from with previous().
	552	*
	553	* @return the current index in the input text
	554	* @deprecated ICU 56 Use Normalizer2 instead.
	555	*/
	556	int32_t getIndex(void) const;
	557
	558	/**
	559	* Retrieve the index of the start of the input text. This is the begin index
	560	* of the <code>CharacterIterator</code> or the start (i.e. index 0) of the string
	561	* over which this <code>Normalizer</code> is iterating.
	562	*
	563	* @return the smallest index in the input text where the Normalizer operates
	564	* @deprecated ICU 56 Use Normalizer2 instead.
	565	*/
	566	int32_t startIndex(void) const;
	567
	568	/**
	569	* Retrieve the index of the end of the input text. This is the end index
	570	* of the <code>CharacterIterator</code> or the length of the string
	571	* over which this <code>Normalizer</code> is iterating.
	572	* This end index is exclusive, i.e., the Normalizer operates only on characters
	573	* before this index.
	574	*
	575	* @return the first index in the input text where the Normalizer does not operate
	576	* @deprecated ICU 56 Use Normalizer2 instead.
	577	*/
	578	int32_t endIndex(void) const;
	579
	580	/**
	581	* Returns TRUE when both iterators refer to the same character in the same
	582	* input text.
	583	*
	584	* @param that a Normalizer object to compare this one to
	585	* @return comparison result
	586	* @deprecated ICU 56 Use Normalizer2 instead.
	587	*/
	588	UBool operator==(const Normalizer& that) const;
	589
	590	/**
	591	* Returns FALSE when both iterators refer to the same character in the same
	592	* input text.
	593	*
	594	* @param that a Normalizer object to compare this one to
	595	* @return comparison result
	596	* @deprecated ICU 56 Use Normalizer2 instead.
	597	*/
	598	inline UBool operator!=(const Normalizer& that) const;
	599
	600	/**
	601	* Returns a pointer to a new Normalizer that is a clone of this one.
	602	* The caller is responsible for deleting the new clone.
	603	* @return a pointer to a new Normalizer
	604	* @deprecated ICU 56 Use Normalizer2 instead.
	605	*/
	606	Normalizer* clone() const;
	607
	608	/**
	609	* Generates a hash code for this iterator.
	610	*
	611	* @return the hash code
	612	* @deprecated ICU 56 Use Normalizer2 instead.
	613	*/
	614	int32_t hashCode(void) const;
	615
	616	//-------------------------------------------------------------------------
	617	// Property access methods
	618	//-------------------------------------------------------------------------
	619
	620	/**
	621	* Set the normalization mode for this object.
	622	* <p>
	623	* <b>Note:</b>If the normalization mode is changed while iterating
	624	* over a string, calls to {@link #next() } and {@link #previous() } may
	625	* return previously buffers characters in the old normalization mode
	626	* until the iteration is able to re-sync at the next base character.
	627	* It is safest to call {@link #setIndexOnly }, {@link #reset() },
	628	* {@link #setText }, {@link #first() },
	629	* {@link #last() }, etc. after calling <code>setMode</code>.
	630	* <p>
	631	* @param newMode the new mode for this <code>Normalizer</code>.
	632	* @see #getUMode
	633	* @deprecated ICU 56 Use Normalizer2 instead.
	634	*/
	635	void setMode(UNormalizationMode newMode);
	636
	637	/**
	638	* Return the normalization mode for this object.
	639	*
	640	* This is an unusual name because there used to be a getMode() that
	641	* returned a different type.
	642	*
	643	* @return the mode for this <code>Normalizer</code>
	644	* @see #setMode
	645	* @deprecated ICU 56 Use Normalizer2 instead.
	646	*/
	647	UNormalizationMode getUMode(void) const;
	648
	649	/**
	650	* Set options that affect this <code>Normalizer</code>'s operation.
	651	* Options do not change the basic composition or decomposition operation
	652	* that is being performed, but they control whether
	653	* certain optional portions of the operation are done.
	654	* Currently the only available option is obsolete.
	655	*
	656	* It is possible to specify multiple options that are all turned on or off.
	657	*
	658	* @param option the option(s) whose value is/are to be set.
	659	* @param value the new setting for the option. Use <code>TRUE</code> to
	660	* turn the option(s) on and <code>FALSE</code> to turn it/them off.
	661	*
	662	* @see #getOption
	663	* @deprecated ICU 56 Use Normalizer2 instead.
	664	*/
	665	void setOption(int32_t option,
	666	UBool value);
	667
	668	/**
	669	* Determine whether an option is turned on or off.
	670	* If multiple options are specified, then the result is TRUE if any
	671	* of them are set.
	672	* <p>
	673	* @param option the option(s) that are to be checked
	674	* @return TRUE if any of the option(s) are set
	675	* @see #setOption
	676	* @deprecated ICU 56 Use Normalizer2 instead.
	677	*/
	678	UBool getOption(int32_t option) const;
	679
	680	/**
	681	* Set the input text over which this <code>Normalizer</code> will iterate.
	682	* The iteration position is set to the beginning.
	683	*
	684	* @param newText a string that replaces the current input text
	685	* @param status a UErrorCode
	686	* @deprecated ICU 56 Use Normalizer2 instead.
	687	*/
	688	void setText(const UnicodeString& newText,
	689	UErrorCode &status);
	690
	691	/**
	692	* Set the input text over which this <code>Normalizer</code> will iterate.
	693	* The iteration position is set to the beginning.
	694	*
	695	* @param newText a CharacterIterator object that replaces the current input text
	696	* @param status a UErrorCode
	697	* @deprecated ICU 56 Use Normalizer2 instead.
	698	*/
	699	void setText(const CharacterIterator& newText,
	700	UErrorCode &status);
	701
	702	/**
	703	* Set the input text over which this <code>Normalizer</code> will iterate.
	704	* The iteration position is set to the beginning.
	705	*
	706	* @param newText a string that replaces the current input text
	707	* @param length the length of the string, or -1 if NUL-terminated
	708	* @param status a UErrorCode
	709	* @deprecated ICU 56 Use Normalizer2 instead.
	710	*/
	711	void setText(ConstChar16Ptr newText,
	712	int32_t length,
	713	UErrorCode &status);
	714	/**
	715	* Copies the input text into the UnicodeString argument.
	716	*
	717	* @param result Receives a copy of the text under iteration.
	718	* @deprecated ICU 56 Use Normalizer2 instead.
	719	*/
	720	void getText(UnicodeString& result);
	721
	722	/**
	723	* ICU "poor man's RTTI", returns a UClassID for this class.
	724	* @returns a UClassID for this class.
	725	* @deprecated ICU 56 Use Normalizer2 instead.
	726	*/
	727	static UClassID U_EXPORT2 getStaticClassID();
	728	#endif /* U_HIDE_DEPRECATED_API */
	729
	730	#ifndef U_FORCE_HIDE_DEPRECATED_API
	731	/**
	732	* ICU "poor man's RTTI", returns a UClassID for the actual class.
	733	* @return a UClassID for the actual class.
	734	* @deprecated ICU 56 Use Normalizer2 instead.
	735	*/
	736	virtual UClassID getDynamicClassID() const;
	737	#endif // U_FORCE_HIDE_DEPRECATED_API
	738
	739	private:
	740	//-------------------------------------------------------------------------
	741	// Private functions
	742	//-------------------------------------------------------------------------
	743
	744	Normalizer(); // default constructor not implemented
	745	Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
	746
	747	// Private utility methods for iteration
	748	// For documentation, see the source code
	749	UBool nextNormalize();
	750	UBool previousNormalize();
	751
	752	void init();
	753	void clearBuffer(void);
	754
	755	//-------------------------------------------------------------------------
	756	// Private data
	757	//-------------------------------------------------------------------------
	758
	759	FilteredNormalizer2*fFilteredNorm2; // owned if not NULL
	760	const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2
	761	UNormalizationMode fUMode; // deprecated
	762	int32_t fOptions;
	763
	764	// The input text and our position in it
	765	CharacterIterator *text;
	766
	767	// The normalization buffer is the result of normalization
	768	// of the source in [currentIndex..nextIndex[ .
	769	int32_t currentIndex, nextIndex;
	770
	771	// A buffer for holding intermediate results
	772	UnicodeString buffer;
	773	int32_t bufferPos;
	774	};
	775
	776	//-------------------------------------------------------------------------
	777	// Inline implementations
	778	//-------------------------------------------------------------------------
	779
	780	#ifndef U_HIDE_DEPRECATED_API
	781	inline UBool
	782	Normalizer::operator!= (const Normalizer& other) const
	783	{ return ! operator==(other); }
	784
	785	inline UNormalizationCheckResult
	786	Normalizer::quickCheck(const UnicodeString& source,
	787	UNormalizationMode mode,
	788	UErrorCode &status) {
	789	return quickCheck(source, mode, 0, status);
	790	}
	791
	792	inline UBool
	793	Normalizer::isNormalized(const UnicodeString& source,
	794	UNormalizationMode mode,
	795	UErrorCode &status) {
	796	return isNormalized(source, mode, 0, status);
	797	}
	798	#endif /* U_HIDE_DEPRECATED_API */
	799
	800	inline int32_t
	801	Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
	802	uint32_t options,
	803	UErrorCode &errorCode) {
	804	// all argument checking is done in unorm_compare
	805	return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(),
	806	toUCharPtr(s2.getBuffer()), s2.length(),
	807	options,
	808	&errorCode);
	809	}
	810
	811	U_NAMESPACE_END
	812	#endif // U_SHOW_CPLUSPLUS_API
	813
	814	#endif /* #if !UCONFIG_NO_NORMALIZATION */
	815
	816	#endif // NORMLZR_H
	817
	818	#endif /* U_SHOW_CPLUSPLUS_API */