[apple/icu.git] / icuSources / common / filterednormalizer2.cpp

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
*   Copyright (C) 2009-2012, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  filterednormalizer2.cpp
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2009dec10
*   created by: Markus W. Scherer
*/

#include "unicode/utypes.h"

#if !UCONFIG_NO_NORMALIZATION

#include "unicode/edits.h"
#include "unicode/normalizer2.h"
#include "unicode/stringoptions.h"
#include "unicode/uniset.h"
#include "unicode/unistr.h"
#include "unicode/unorm.h"
#include "cpputils.h"

U_NAMESPACE_BEGIN

FilteredNormalizer2::~FilteredNormalizer2() {}

UnicodeString &
FilteredNormalizer2::normalize(const UnicodeString &src,
                               UnicodeString &dest,
                               UErrorCode &errorCode) const {
    uprv_checkCanGetBuffer(src, errorCode);
    if(U_FAILURE(errorCode)) {
        dest.setToBogus();
        return dest;
    }
    if(&dest==&src) {
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return dest;
    }
    dest.remove();
    return normalize(src, dest, USET_SPAN_SIMPLE, errorCode);
}

// Internal: No argument checking, and appends to dest.
// Pass as input spanCondition the one that is likely to yield a non-zero
// span length at the start of src.
// For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,
// USET_SPAN_SIMPLE should be passed in for the start of src
// and USET_SPAN_NOT_CONTAINED should be passed in if we continue after
// an in-filter prefix.
UnicodeString &
FilteredNormalizer2::normalize(const UnicodeString &src,
                               UnicodeString &dest,
                               USetSpanCondition spanCondition,
                               UErrorCode &errorCode) const {
    UnicodeString tempDest;  // Don't throw away destination buffer between iterations.
    for(int32_t prevSpanLimit=0; prevSpanLimit<src.length();) {
        int32_t spanLimit=set.span(src, prevSpanLimit, spanCondition);
        int32_t spanLength=spanLimit-prevSpanLimit;
        if(spanCondition==USET_SPAN_NOT_CONTAINED) {
            if(spanLength!=0) {
                dest.append(src, prevSpanLimit, spanLength);
            }
            spanCondition=USET_SPAN_SIMPLE;
        } else {
            if(spanLength!=0) {
                // Not norm2.normalizeSecondAndAppend() because we do not want
                // to modify the non-filter part of dest.
                dest.append(norm2.normalize(src.tempSubStringBetween(prevSpanLimit, spanLimit),
                                            tempDest, errorCode));
                if(U_FAILURE(errorCode)) {
                    break;
                }
            }
            spanCondition=USET_SPAN_NOT_CONTAINED;
        }
        prevSpanLimit=spanLimit;
    }
    return dest;
}

void
FilteredNormalizer2::normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
                                   Edits *edits, UErrorCode &errorCode) const {
    if (U_FAILURE(errorCode)) {
        return;
    }
    if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
        edits->reset();
    }
    options |= U_EDITS_NO_RESET;  // Do not reset for each span.
    normalizeUTF8(options, src.data(), src.length(), sink, edits, USET_SPAN_SIMPLE, errorCode);
}

void
FilteredNormalizer2::normalizeUTF8(uint32_t options, const char *src, int32_t length,
                                   ByteSink &sink, Edits *edits,
                                   USetSpanCondition spanCondition,
                                   UErrorCode &errorCode) const {
    while (length > 0) {
        int32_t spanLength = set.spanUTF8(src, length, spanCondition);
        if (spanCondition == USET_SPAN_NOT_CONTAINED) {
            if (spanLength != 0) {
                if (edits != nullptr) {
                    edits->addUnchanged(spanLength);
                }
                if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
                    sink.Append(src, spanLength);
                }
            }
            spanCondition = USET_SPAN_SIMPLE;
        } else {
            if (spanLength != 0) {
                // Not norm2.normalizeSecondAndAppend() because we do not want
                // to modify the non-filter part of dest.
                norm2.normalizeUTF8(options, StringPiece(src, spanLength), sink, edits, errorCode);
                if (U_FAILURE(errorCode)) {
                    break;
                }
            }
            spanCondition = USET_SPAN_NOT_CONTAINED;
        }
        src += spanLength;
        length -= spanLength;
    }
}

UnicodeString &
FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
                                              const UnicodeString &second,
                                              UErrorCode &errorCode) const {
    return normalizeSecondAndAppend(first, second, TRUE, errorCode);
}

UnicodeString &
FilteredNormalizer2::append(UnicodeString &first,
                            const UnicodeString &second,
                            UErrorCode &errorCode) const {
    return normalizeSecondAndAppend(first, second, FALSE, errorCode);
}

UnicodeString &
FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
                                              const UnicodeString &second,
                                              UBool doNormalize,
                                              UErrorCode &errorCode) const {
    uprv_checkCanGetBuffer(first, errorCode);
    uprv_checkCanGetBuffer(second, errorCode);
    if(U_FAILURE(errorCode)) {
        return first;
    }
    if(&first==&second) {
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return first;
    }
    if(first.isEmpty()) {
        if(doNormalize) {
            return normalize(second, first, errorCode);
        } else {
            return first=second;
        }
    }
    // merge the in-filter suffix of the first string with the in-filter prefix of the second
    int32_t prefixLimit=set.span(second, 0, USET_SPAN_SIMPLE);
    if(prefixLimit!=0) {
        UnicodeString prefix(second.tempSubString(0, prefixLimit));
        int32_t suffixStart=set.spanBack(first, INT32_MAX, USET_SPAN_SIMPLE);
        if(suffixStart==0) {
            if(doNormalize) {
                norm2.normalizeSecondAndAppend(first, prefix, errorCode);
            } else {
                norm2.append(first, prefix, errorCode);
            }
        } else {
            UnicodeString middle(first, suffixStart, INT32_MAX);
            if(doNormalize) {
                norm2.normalizeSecondAndAppend(middle, prefix, errorCode);
            } else {
                norm2.append(middle, prefix, errorCode);
            }
            first.replace(suffixStart, INT32_MAX, middle);
        }
    }
    if(prefixLimit<second.length()) {
        UnicodeString rest(second.tempSubString(prefixLimit, INT32_MAX));
        if(doNormalize) {
            normalize(rest, first, USET_SPAN_NOT_CONTAINED, errorCode);
        } else {
            first.append(rest);
        }
    }
    return first;
}

UBool
FilteredNormalizer2::getDecomposition(UChar32 c, UnicodeString &decomposition) const {
    return set.contains(c) && norm2.getDecomposition(c, decomposition);
}

UBool
FilteredNormalizer2::getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
    return set.contains(c) && norm2.getRawDecomposition(c, decomposition);
}

UChar32
FilteredNormalizer2::composePair(UChar32 a, UChar32 b) const {
    return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : U_SENTINEL;
}

uint8_t
FilteredNormalizer2::getCombiningClass(UChar32 c) const {
    return set.contains(c) ? norm2.getCombiningClass(c) : 0;
}

UBool
FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
    uprv_checkCanGetBuffer(s, errorCode);
    if(U_FAILURE(errorCode)) {
        return FALSE;
    }
    USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
    for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
        int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
        if(spanCondition==USET_SPAN_NOT_CONTAINED) {
            spanCondition=USET_SPAN_SIMPLE;
        } else {
            if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) ||
                U_FAILURE(errorCode)
            ) {
                return FALSE;
            }
            spanCondition=USET_SPAN_NOT_CONTAINED;
        }
        prevSpanLimit=spanLimit;
    }
    return TRUE;
}

UBool
FilteredNormalizer2::isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const {
    if(U_FAILURE(errorCode)) {
        return FALSE;
    }
    const char *s = sp.data();
    int32_t length = sp.length();
    USetSpanCondition spanCondition = USET_SPAN_SIMPLE;
    while (length > 0) {
        int32_t spanLength = set.spanUTF8(s, length, spanCondition);
        if (spanCondition == USET_SPAN_NOT_CONTAINED) {
            spanCondition = USET_SPAN_SIMPLE;
        } else {
            if (!norm2.isNormalizedUTF8(StringPiece(s, spanLength), errorCode) ||
                    U_FAILURE(errorCode)) {
                return FALSE;
            }
            spanCondition = USET_SPAN_NOT_CONTAINED;
        }
        s += spanLength;
        length -= spanLength;
    }
    return TRUE;
}

UNormalizationCheckResult
FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
    uprv_checkCanGetBuffer(s, errorCode);
    if(U_FAILURE(errorCode)) {
        return UNORM_MAYBE;
    }
    UNormalizationCheckResult result=UNORM_YES;
    USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
    for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
        int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
        if(spanCondition==USET_SPAN_NOT_CONTAINED) {
            spanCondition=USET_SPAN_SIMPLE;
        } else {
            UNormalizationCheckResult qcResult=
                norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
            if(U_FAILURE(errorCode) || qcResult==UNORM_NO) {
                return qcResult;
            } else if(qcResult==UNORM_MAYBE) {
                result=qcResult;
            }
            spanCondition=USET_SPAN_NOT_CONTAINED;
        }
        prevSpanLimit=spanLimit;
    }
    return result;
}

int32_t
FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
    uprv_checkCanGetBuffer(s, errorCode);
    if(U_FAILURE(errorCode)) {
        return 0;
    }
    USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
    for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
        int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
        if(spanCondition==USET_SPAN_NOT_CONTAINED) {
            spanCondition=USET_SPAN_SIMPLE;
        } else {
            int32_t yesLimit=
                prevSpanLimit+
                norm2.spanQuickCheckYes(
                    s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
            if(U_FAILURE(errorCode) || yesLimit<spanLimit) {
                return yesLimit;
            }
            spanCondition=USET_SPAN_NOT_CONTAINED;
        }
        prevSpanLimit=spanLimit;
    }
    return s.length();
}

UBool
FilteredNormalizer2::hasBoundaryBefore(UChar32 c) const {
    return !set.contains(c) || norm2.hasBoundaryBefore(c);
}

UBool
FilteredNormalizer2::hasBoundaryAfter(UChar32 c) const {
    return !set.contains(c) || norm2.hasBoundaryAfter(c);
}

UBool
FilteredNormalizer2::isInert(UChar32 c) const {
    return !set.contains(c) || norm2.isInert(c);
}

U_NAMESPACE_END

// C API ------------------------------------------------------------------- ***

U_NAMESPACE_USE

U_CAPI UNormalizer2 * U_EXPORT2
unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode) {
    if(U_FAILURE(*pErrorCode)) {
        return NULL;
    }
    if(filterSet==NULL) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return NULL;
    }
    Normalizer2 *fn2=new FilteredNormalizer2(*(Normalizer2 *)norm2,
                                             *UnicodeSet::fromUSet(filterSet));
    if(fn2==NULL) {
        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    }
    return (UNormalizer2 *)fn2;
}

#endif  // !UCONFIG_NO_NORMALIZATION
Commit	Line	Data
f3c0d7a5 A	1	// © 2016 and later: Unicode, Inc. and others.
f3c0d7a5 A	2	// License & terms of use: http://www.unicode.org/copyright.html
729e4ab9 A	3	/*
	4	*******************************************************************************
	5	*
51004dcb	6	* Copyright (C) 2009-2012, International Business Machines
729e4ab9 A	7	* Corporation and others. All Rights Reserved.
	8	*
	9	*******************************************************************************
	10	* file name: filterednormalizer2.cpp
f3c0d7a5	11	* encoding: UTF-8
729e4ab9 A	12	* tab size: 8 (not used)
	13	* indentation:4
	14	*
	15	* created on: 2009dec10
	16	* created by: Markus W. Scherer
	17	*/
	18
	19	#include "unicode/utypes.h"
	20
	21	#if !UCONFIG_NO_NORMALIZATION
	22
0f5d89e8	23	#include "unicode/edits.h"
729e4ab9	24	#include "unicode/normalizer2.h"
0f5d89e8	25	#include "unicode/stringoptions.h"
729e4ab9 A	26	#include "unicode/uniset.h"
	27	#include "unicode/unistr.h"
	28	#include "unicode/unorm.h"
	29	#include "cpputils.h"
	30
	31	U_NAMESPACE_BEGIN
	32
4388f060 A	33	FilteredNormalizer2::~FilteredNormalizer2() {}
4388f060 A	34
729e4ab9 A	35	UnicodeString &
	36	FilteredNormalizer2::normalize(const UnicodeString &src,
	37	UnicodeString &dest,
	38	UErrorCode &errorCode) const {
	39	uprv_checkCanGetBuffer(src, errorCode);
	40	if(U_FAILURE(errorCode)) {
	41	dest.setToBogus();
	42	return dest;
	43	}
	44	if(&dest==&src) {
	45	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
	46	return dest;
	47	}
	48	dest.remove();
	49	return normalize(src, dest, USET_SPAN_SIMPLE, errorCode);
	50	}
	51
	52	// Internal: No argument checking, and appends to dest.
	53	// Pass as input spanCondition the one that is likely to yield a non-zero
	54	// span length at the start of src.
	55	// For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,
	56	// USET_SPAN_SIMPLE should be passed in for the start of src
	57	// and USET_SPAN_NOT_CONTAINED should be passed in if we continue after
	58	// an in-filter prefix.
	59	UnicodeString &
	60	FilteredNormalizer2::normalize(const UnicodeString &src,
	61	UnicodeString &dest,
	62	USetSpanCondition spanCondition,
	63	UErrorCode &errorCode) const {
	64	UnicodeString tempDest; // Don't throw away destination buffer between iterations.
	65	for(int32_t prevSpanLimit=0; prevSpanLimit<src.length();) {
	66	int32_t spanLimit=set.span(src, prevSpanLimit, spanCondition);
	67	int32_t spanLength=spanLimit-prevSpanLimit;
	68	if(spanCondition==USET_SPAN_NOT_CONTAINED) {
	69	if(spanLength!=0) {
	70	dest.append(src, prevSpanLimit, spanLength);
	71	}
	72	spanCondition=USET_SPAN_SIMPLE;
	73	} else {
	74	if(spanLength!=0) {
	75	// Not norm2.normalizeSecondAndAppend() because we do not want
	76	// to modify the non-filter part of dest.
	77	dest.append(norm2.normalize(src.tempSubStringBetween(prevSpanLimit, spanLimit),
	78	tempDest, errorCode));
	79	if(U_FAILURE(errorCode)) {
	80	break;
	81	}
	82	}
	83	spanCondition=USET_SPAN_NOT_CONTAINED;
	84	}
	85	prevSpanLimit=spanLimit;
	86	}
	87	return dest;
	88	}
	89
0f5d89e8 A	90	void
	91	FilteredNormalizer2::normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
	92	Edits *edits, UErrorCode &errorCode) const {
	93	if (U_FAILURE(errorCode)) {
	94	return;
	95	}
	96	if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
	97	edits->reset();
	98	}
	99	options \|= U_EDITS_NO_RESET; // Do not reset for each span.
	100	normalizeUTF8(options, src.data(), src.length(), sink, edits, USET_SPAN_SIMPLE, errorCode);
	101	}
	102
	103	void
	104	FilteredNormalizer2::normalizeUTF8(uint32_t options, const char *src, int32_t length,
	105	ByteSink &sink, Edits *edits,
	106	USetSpanCondition spanCondition,
	107	UErrorCode &errorCode) const {
	108	while (length > 0) {
	109	int32_t spanLength = set.spanUTF8(src, length, spanCondition);
	110	if (spanCondition == USET_SPAN_NOT_CONTAINED) {
	111	if (spanLength != 0) {
	112	if (edits != nullptr) {
	113	edits->addUnchanged(spanLength);
	114	}
	115	if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
	116	sink.Append(src, spanLength);
	117	}
	118	}
	119	spanCondition = USET_SPAN_SIMPLE;
	120	} else {
	121	if (spanLength != 0) {
	122	// Not norm2.normalizeSecondAndAppend() because we do not want
	123	// to modify the non-filter part of dest.
	124	norm2.normalizeUTF8(options, StringPiece(src, spanLength), sink, edits, errorCode);
	125	if (U_FAILURE(errorCode)) {
	126	break;
	127	}
	128	}
	129	spanCondition = USET_SPAN_NOT_CONTAINED;
	130	}
	131	src += spanLength;
	132	length -= spanLength;
	133	}
	134	}
	135
729e4ab9 A	136	UnicodeString &
	137	FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
	138	const UnicodeString &second,
	139	UErrorCode &errorCode) const {
	140	return normalizeSecondAndAppend(first, second, TRUE, errorCode);
	141	}
	142
	143	UnicodeString &
	144	FilteredNormalizer2::append(UnicodeString &first,
	145	const UnicodeString &second,
	146	UErrorCode &errorCode) const {
	147	return normalizeSecondAndAppend(first, second, FALSE, errorCode);
	148	}
	149
	150	UnicodeString &
	151	FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
	152	const UnicodeString &second,
	153	UBool doNormalize,
	154	UErrorCode &errorCode) const {
	155	uprv_checkCanGetBuffer(first, errorCode);
	156	uprv_checkCanGetBuffer(second, errorCode);
	157	if(U_FAILURE(errorCode)) {
	158	return first;
	159	}
	160	if(&first==&second) {
	161	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
	162	return first;
	163	}
	164	if(first.isEmpty()) {
	165	if(doNormalize) {
	166	return normalize(second, first, errorCode);
	167	} else {
	168	return first=second;
	169	}
	170	}
	171	// merge the in-filter suffix of the first string with the in-filter prefix of the second
	172	int32_t prefixLimit=set.span(second, 0, USET_SPAN_SIMPLE);
	173	if(prefixLimit!=0) {
	174	UnicodeString prefix(second.tempSubString(0, prefixLimit));
	175	int32_t suffixStart=set.spanBack(first, INT32_MAX, USET_SPAN_SIMPLE);
	176	if(suffixStart==0) {
	177	if(doNormalize) {
	178	norm2.normalizeSecondAndAppend(first, prefix, errorCode);
	179	} else {
	180	norm2.append(first, prefix, errorCode);
	181	}
	182	} else {
	183	UnicodeString middle(first, suffixStart, INT32_MAX);
	184	if(doNormalize) {
	185	norm2.normalizeSecondAndAppend(middle, prefix, errorCode);
	186	} else {
	187	norm2.append(middle, prefix, errorCode);
	188	}
	189	first.replace(suffixStart, INT32_MAX, middle);
	190	}
	191	}
	192	if(prefixLimit<second.length()) {
	193	UnicodeString rest(second.tempSubString(prefixLimit, INT32_MAX));
	194	if(doNormalize) {
	195	normalize(rest, first, USET_SPAN_NOT_CONTAINED, errorCode);
	196	} else {
	197	first.append(rest);
	198	}
	199	}
200	return first;
201	}
202
203	UBool
204	FilteredNormalizer2::getDecomposition(UChar32 c, UnicodeString &decomposition) const {
205	return set.contains(c) && norm2.getDecomposition(c, decomposition);
206	}
207
4388f060 A	208	UBool
	209	FilteredNormalizer2::getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
	210	return set.contains(c) && norm2.getRawDecomposition(c, decomposition);
	211	}
	212
	213	UChar32
	214	FilteredNormalizer2::composePair(UChar32 a, UChar32 b) const {
	215	return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : U_SENTINEL;
	216	}
	217
	218	uint8_t
	219	FilteredNormalizer2::getCombiningClass(UChar32 c) const {
	220	return set.contains(c) ? norm2.getCombiningClass(c) : 0;
	221	}
	222
729e4ab9 A	223	UBool
	224	FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
	225	uprv_checkCanGetBuffer(s, errorCode);
	226	if(U_FAILURE(errorCode)) {
	227	return FALSE;
	228	}
	229	USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
	230	for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
	231	int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
	232	if(spanCondition==USET_SPAN_NOT_CONTAINED) {
	233	spanCondition=USET_SPAN_SIMPLE;
	234	} else {
	235	if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) \|\|
	236	U_FAILURE(errorCode)
	237	) {
	238	return FALSE;
	239	}
	240	spanCondition=USET_SPAN_NOT_CONTAINED;
	241	}
	242	prevSpanLimit=spanLimit;
	243	}
	244	return TRUE;
	245	}
	246
0f5d89e8 A	247	UBool
	248	FilteredNormalizer2::isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const {
	249	if(U_FAILURE(errorCode)) {
	250	return FALSE;
	251	}
	252	const char *s = sp.data();
	253	int32_t length = sp.length();
	254	USetSpanCondition spanCondition = USET_SPAN_SIMPLE;
	255	while (length > 0) {
	256	int32_t spanLength = set.spanUTF8(s, length, spanCondition);
	257	if (spanCondition == USET_SPAN_NOT_CONTAINED) {
	258	spanCondition = USET_SPAN_SIMPLE;
	259	} else {
	260	if (!norm2.isNormalizedUTF8(StringPiece(s, spanLength), errorCode) \|\|
	261	U_FAILURE(errorCode)) {
	262	return FALSE;
	263	}
	264	spanCondition = USET_SPAN_NOT_CONTAINED;
	265	}
	266	s += spanLength;
	267	length -= spanLength;
	268	}
	269	return TRUE;
	270	}
	271
729e4ab9 A	272	UNormalizationCheckResult
	273	FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
	274	uprv_checkCanGetBuffer(s, errorCode);
	275	if(U_FAILURE(errorCode)) {
	276	return UNORM_MAYBE;
	277	}
	278	UNormalizationCheckResult result=UNORM_YES;
	279	USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
	280	for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
	281	int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
	282	if(spanCondition==USET_SPAN_NOT_CONTAINED) {
	283	spanCondition=USET_SPAN_SIMPLE;
	284	} else {
	285	UNormalizationCheckResult qcResult=
	286	norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
	287	if(U_FAILURE(errorCode) \|\| qcResult==UNORM_NO) {
	288	return qcResult;
	289	} else if(qcResult==UNORM_MAYBE) {
	290	result=qcResult;
	291	}
	292	spanCondition=USET_SPAN_NOT_CONTAINED;
	293	}
	294	prevSpanLimit=spanLimit;
	295	}
	296	return result;
	297	}
	298
	299	int32_t
	300	FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
	301	uprv_checkCanGetBuffer(s, errorCode);
	302	if(U_FAILURE(errorCode)) {
	303	return 0;
	304	}
	305	USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
	306	for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
	307	int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
	308	if(spanCondition==USET_SPAN_NOT_CONTAINED) {
	309	spanCondition=USET_SPAN_SIMPLE;
	310	} else {
	311	int32_t yesLimit=
	312	prevSpanLimit+
	313	norm2.spanQuickCheckYes(
	314	s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
	315	if(U_FAILURE(errorCode) \|\| yesLimit<spanLimit) {
	316	return yesLimit;
	317	}
	318	spanCondition=USET_SPAN_NOT_CONTAINED;
	319	}
	320	prevSpanLimit=spanLimit;
	321	}
	322	return s.length();
	323	}
	324
	325	UBool
	326	FilteredNormalizer2::hasBoundaryBefore(UChar32 c) const {
	327	return !set.contains(c) \|\| norm2.hasBoundaryBefore(c);
	328	}
	329
	330	UBool
	331	FilteredNormalizer2::hasBoundaryAfter(UChar32 c) const {
	332	return !set.contains(c) \|\| norm2.hasBoundaryAfter(c);
	333	}
	334
	335	UBool
336	FilteredNormalizer2::isInert(UChar32 c) const {
337	return !set.contains(c) \|\| norm2.isInert(c);
338	}
339
340	U_NAMESPACE_END
341
342	// C API ------------------------------------------------------------------- ***
343
344	U_NAMESPACE_USE
345
51004dcb	346	U_CAPI UNormalizer2 * U_EXPORT2
729e4ab9 A	347	unorm2_openFiltered(const UNormalizer2 norm2, const USet filterSet, UErrorCode *pErrorCode) {
	348	if(U_FAILURE(*pErrorCode)) {
	349	return NULL;
	350	}
	351	if(filterSet==NULL) {
	352	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
	353	return NULL;
	354	}
	355	Normalizer2 fn2=new FilteredNormalizer2((Normalizer2 *)norm2,
	356	*UnicodeSet::fromUSet(filterSet));
	357	if(fn2==NULL) {
	358	*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
	359	}
	360	return (UNormalizer2 *)fn2;
	361	}
	362
	363	#endif // !UCONFIG_NO_NORMALIZATION