git.saurik.com Git - apple/icu.git/blame_incremental

0 / 736 ( 0%)

Commit	Line	Data
	1	/*
	2	***************************************************************************
	3	* Copyright (C) 2008-2013, International Business Machines Corporation
	4	* and others. All Rights Reserved.
	5	***************************************************************************
	6	* file name: uspoof.cpp
	7	* encoding: US-ASCII
	8	* tab size: 8 (not used)
	9	* indentation:4
	10	*
	11	* created on: 2008Feb13
	12	* created by: Andy Heninger
	13	*
	14	* Unicode Spoof Detection
	15	*/
	16	#include "unicode/utypes.h"
	17	#include "unicode/normalizer2.h"
	18	#include "unicode/uspoof.h"
	19	#include "unicode/ustring.h"
	20	#include "unicode/utf16.h"
	21	#include "cmemory.h"
	22	#include "cstring.h"
	23	#include "identifier_info.h"
	24	#include "mutex.h"
	25	#include "scriptset.h"
	26	#include "uassert.h"
	27	#include "ucln_in.h"
	28	#include "uspoof_impl.h"
	29	#include "umutex.h"
	30
	31
	32	#if !UCONFIG_NO_NORMALIZATION
	33
	34	U_NAMESPACE_USE
	35
	36
	37	//
	38	// Static Objects used by the spoof impl, their thread safe initialization and their cleanup.
	39	//
	40	static UnicodeSet *gInclusionSet = NULL;
	41	static UnicodeSet *gRecommendedSet = NULL;
	42	static const Normalizer2 *gNfdNormalizer = NULL;
	43	static UMutex gInitMutex = U_MUTEX_INITIALIZER;
	44
	45	static UBool U_CALLCONV
	46	uspoof_cleanup(void) {
	47	delete gInclusionSet;
	48	gInclusionSet = NULL;
	49	delete gRecommendedSet;
	50	gRecommendedSet = NULL;
	51	gNfdNormalizer = NULL;
	52	return TRUE;
	53	}
	54
	55	static void initializeStatics() {
	56	Mutex m(&gInitMutex);
	57	UErrorCode status = U_ZERO_ERROR;
	58	if (gInclusionSet == NULL) {
	59	gInclusionSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\
	60	\\-.\\u00B7\\u05F3\\u05F4\\u0F0B\\u200C\\u200D\\u2019]"), status);
	61	gRecommendedSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\
	62	[0-z\\u00C0-\\u017E\\u01A0\\u01A1\\u01AF\\u01B0\\u01CD-\
	63	\\u01DC\\u01DE-\\u01E3\\u01E6-\\u01F5\\u01F8-\\u021B\\u021E\
	64	\\u021F\\u0226-\\u0233\\u02BB\\u02BC\\u02EC\\u0300-\\u0304\
	65	\\u0306-\\u030C\\u030F-\\u0311\\u0313\\u0314\\u031B\\u0323-\
	66	\\u0328\\u032D\\u032E\\u0330\\u0331\\u0335\\u0338\\u0339\
	67	\\u0342-\\u0345\\u037B-\\u03CE\\u03FC-\\u045F\\u048A-\\u0525\
	68	\\u0531-\\u0586\\u05D0-\\u05F2\\u0621-\\u063F\\u0641-\\u0655\
	69	\\u0660-\\u0669\\u0670-\\u068D\\u068F-\\u06D5\\u06E5\\u06E6\
	70	\\u06EE-\\u06FF\\u0750-\\u07B1\\u0901-\\u0939\\u093C-\\u094D\
	71	\\u0950\\u0960-\\u0972\\u0979-\\u0A4D\\u0A5C-\\u0A74\\u0A81-\
	72	\\u0B43\\u0B47-\\u0B61\\u0B66-\\u0C56\\u0C60\\u0C61\\u0C66-\
	73	\\u0CD6\\u0CE0-\\u0CEF\\u0D02-\\u0D28\\u0D2A-\\u0D39\\u0D3D-\
	74	\\u0D43\\u0D46-\\u0D4D\\u0D57-\\u0D61\\u0D66-\\u0D8E\\u0D91-\
	75	\\u0DA5\\u0DA7-\\u0DDE\\u0DF2\\u0E01-\\u0ED9\\u0F00\\u0F20-\
	76	\\u0F8B\\u0F90-\\u109D\\u10D0-\\u10F0\\u10F7-\\u10FA\\u1200-\
	77	\\u135A\\u135F\\u1380-\\u138F\\u1401-\\u167F\\u1780-\\u17A2\
	78	\\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-\\u17CA\\u17D2\\u17D7-\
	79	\\u17DC\\u17E0-\\u17E9\\u1810-\\u18A8\\u18AA-\\u18F5\\u1E00-\
	80	\\u1E99\\u1F00-\\u1FFC\\u2D30-\\u2D65\\u2D80-\\u2DDE\\u3005-\
	81	\\u3007\\u3041-\\u31B7\\u3400-\\u9FCB\\uA000-\\uA48C\\uA67F\
	82	\\uA717-\\uA71F\\uA788\\uAA60-\\uAA7B\\uAC00-\\uD7A3\\uFA0E-\
	83	\\uFA29\\U00020000-\
	84	\\U0002B734]-[[:Cn:][:nfkcqc=n:][:XIDC=n:]]]"), status);
	85	gNfdNormalizer = Normalizer2::getNFDInstance(status);
	86	}
	87	ucln_i18n_registerCleanup(UCLN_I18N_SPOOF, uspoof_cleanup);
	88
	89	return;
	90	}
	91
	92
	93	U_CAPI USpoofChecker * U_EXPORT2
	94	uspoof_open(UErrorCode *status) {
	95	if (U_FAILURE(*status)) {
	96	return NULL;
	97	}
	98	initializeStatics();
	99	SpoofImpl si = new SpoofImpl(SpoofData::getDefault(status), *status);
	100	if (U_FAILURE(*status)) {
	101	delete si;
	102	si = NULL;
	103	}
	104	return reinterpret_cast<USpoofChecker *>(si);
	105	}
	106
	107
	108	U_CAPI USpoofChecker * U_EXPORT2
	109	uspoof_openFromSerialized(const void data, int32_t length, int32_t pActualLength,
	110	UErrorCode *status) {
	111	if (U_FAILURE(*status)) {
	112	return NULL;
	113	}
	114	initializeStatics();
	115	SpoofData sd = new SpoofData(data, length, status);
	116	SpoofImpl si = new SpoofImpl(sd, status);
	117	if (U_FAILURE(*status)) {
	118	delete sd;
	119	delete si;
	120	return NULL;
	121	}
	122	if (sd == NULL \|\| si == NULL) {
	123	*status = U_MEMORY_ALLOCATION_ERROR;
	124	delete sd;
	125	delete si;
	126	return NULL;
	127	}
	128
	129	if (pActualLength != NULL) {
	130	*pActualLength = sd->fRawData->fLength;
	131	}
	132	return reinterpret_cast<USpoofChecker *>(si);
	133	}
	134
	135
	136	U_CAPI USpoofChecker * U_EXPORT2
	137	uspoof_clone(const USpoofChecker sc, UErrorCode status) {
	138	const SpoofImpl src = SpoofImpl::validateThis(sc, status);
	139	if (src == NULL) {
	140	return NULL;
	141	}
	142	SpoofImpl result = new SpoofImpl(src, *status); // copy constructor
	143	if (U_FAILURE(*status)) {
	144	delete result;
	145	result = NULL;
	146	}
	147	return reinterpret_cast<USpoofChecker *>(result);
	148	}
	149
	150
	151	U_CAPI void U_EXPORT2
	152	uspoof_close(USpoofChecker *sc) {
	153	UErrorCode status = U_ZERO_ERROR;
	154	SpoofImpl *This = SpoofImpl::validateThis(sc, status);
	155	delete This;
	156	}
	157
	158
	159	U_CAPI void U_EXPORT2
	160	uspoof_setChecks(USpoofChecker sc, int32_t checks, UErrorCode status) {
	161	SpoofImpl This = SpoofImpl::validateThis(sc, status);
	162	if (This == NULL) {
	163	return;
	164	}
	165
	166	// Verify that the requested checks are all ones (bits) that
	167	// are acceptable, known values.
	168	if (checks & ~USPOOF_ALL_CHECKS) {
	169	*status = U_ILLEGAL_ARGUMENT_ERROR;
	170	return;
	171	}
	172
	173	This->fChecks = checks;
	174	}
	175
	176
	177	U_CAPI int32_t U_EXPORT2
	178	uspoof_getChecks(const USpoofChecker sc, UErrorCode status) {
	179	const SpoofImpl This = SpoofImpl::validateThis(sc, status);
	180	if (This == NULL) {
	181	return 0;
	182	}
	183	return This->fChecks;
	184	}
	185
	186	U_CAPI void U_EXPORT2
	187	uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel) {
	188	UErrorCode status = U_ZERO_ERROR;
	189	SpoofImpl *This = SpoofImpl::validateThis(sc, status);
	190	if (This != NULL) {
	191	This->fRestrictionLevel = restrictionLevel;
	192	}
	193	}
	194
	195	U_CAPI URestrictionLevel U_EXPORT2
	196	uspoof_getRestrictionLevel(const USpoofChecker *sc) {
	197	UErrorCode status = U_ZERO_ERROR;
	198	const SpoofImpl *This = SpoofImpl::validateThis(sc, status);
	199	if (This == NULL) {
	200	return USPOOF_UNRESTRICTIVE;
	201	}
	202	return This->fRestrictionLevel;
	203	}
	204
	205	U_CAPI void U_EXPORT2
	206	uspoof_setAllowedLocales(USpoofChecker sc, const char localesList, UErrorCode *status) {
	207	SpoofImpl This = SpoofImpl::validateThis(sc, status);
	208	if (This == NULL) {
	209	return;
	210	}
	211	This->setAllowedLocales(localesList, *status);
	212	}
	213
	214	U_CAPI const char * U_EXPORT2
	215	uspoof_getAllowedLocales(USpoofChecker sc, UErrorCode status) {
	216	SpoofImpl This = SpoofImpl::validateThis(sc, status);
	217	if (This == NULL) {
	218	return NULL;
	219	}
	220	return This->getAllowedLocales(*status);
	221	}
	222
	223
	224	U_CAPI const USet * U_EXPORT2
	225	uspoof_getAllowedChars(const USpoofChecker sc, UErrorCode status) {
	226	const UnicodeSet *result = uspoof_getAllowedUnicodeSet(sc, status);
	227	return result->toUSet();
	228	}
	229
	230	U_CAPI const UnicodeSet * U_EXPORT2
	231	uspoof_getAllowedUnicodeSet(const USpoofChecker sc, UErrorCode status) {
	232	const SpoofImpl This = SpoofImpl::validateThis(sc, status);
	233	if (This == NULL) {
	234	return NULL;
	235	}
	236	return This->fAllowedCharsSet;
	237	}
	238
	239
	240	U_CAPI void U_EXPORT2
	241	uspoof_setAllowedChars(USpoofChecker sc, const USet chars, UErrorCode *status) {
	242	const UnicodeSet *set = UnicodeSet::fromUSet(chars);
	243	uspoof_setAllowedUnicodeSet(sc, set, status);
	244	}
	245
	246
	247	U_CAPI void U_EXPORT2
	248	uspoof_setAllowedUnicodeSet(USpoofChecker sc, const UnicodeSet chars, UErrorCode *status) {
	249	SpoofImpl This = SpoofImpl::validateThis(sc, status);
	250	if (This == NULL) {
	251	return;
	252	}
	253	if (chars->isBogus()) {
	254	*status = U_ILLEGAL_ARGUMENT_ERROR;
	255	return;
	256	}
	257	UnicodeSet clonedSet = static_cast<UnicodeSet >(chars->clone());
	258	if (clonedSet == NULL \|\| clonedSet->isBogus()) {
	259	*status = U_MEMORY_ALLOCATION_ERROR;
	260	return;
	261	}
	262	clonedSet->freeze();
	263	delete This->fAllowedCharsSet;
	264	This->fAllowedCharsSet = clonedSet;
	265	This->fChecks \|= USPOOF_CHAR_LIMIT;
	266	}
	267
	268
	269	U_CAPI int32_t U_EXPORT2
	270	uspoof_check(const USpoofChecker *sc,
	271	const UChar *id, int32_t length,
	272	int32_t *position,
	273	UErrorCode *status) {
	274
	275	const SpoofImpl This = SpoofImpl::validateThis(sc, status);
	276	if (This == NULL) {
	277	return 0;
	278	}
	279	if (length < -1) {
	280	*status = U_ILLEGAL_ARGUMENT_ERROR;
	281	return 0;
	282	}
	283	UnicodeString idStr((length == -1), id, length); // Aliasing constructor.
	284	int32_t result = uspoof_checkUnicodeString(sc, idStr, position, status);
	285	return result;
	286	}
	287
	288
	289	U_CAPI int32_t U_EXPORT2
	290	uspoof_checkUTF8(const USpoofChecker *sc,
	291	const char *id, int32_t length,
	292	int32_t *position,
	293	UErrorCode *status) {
	294
	295	if (U_FAILURE(*status)) {
	296	return 0;
	297	}
	298	UnicodeString idStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : uprv_strlen(id)));
	299	int32_t result = uspoof_checkUnicodeString(sc, idStr, position, status);
	300	return result;
	301	}
	302
	303
	304	U_CAPI int32_t U_EXPORT2
	305	uspoof_areConfusable(const USpoofChecker *sc,
	306	const UChar *id1, int32_t length1,
	307	const UChar *id2, int32_t length2,
	308	UErrorCode *status) {
	309	SpoofImpl::validateThis(sc, *status);
	310	if (U_FAILURE(*status)) {
	311	return 0;
	312	}
	313	if (length1 < -1 \|\| length2 < -1) {
	314	*status = U_ILLEGAL_ARGUMENT_ERROR;
	315	return 0;
	316	}
	317
	318	UnicodeString id1Str((length1==-1), id1, length1); // Aliasing constructor
	319	UnicodeString id2Str((length2==-1), id2, length2); // Aliasing constructor
	320	return uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status);
	321	}
	322
	323
	324	U_CAPI int32_t U_EXPORT2
	325	uspoof_areConfusableUTF8(const USpoofChecker *sc,
	326	const char *id1, int32_t length1,
	327	const char *id2, int32_t length2,
	328	UErrorCode *status) {
	329	SpoofImpl::validateThis(sc, *status);
	330	if (U_FAILURE(*status)) {
	331	return 0;
	332	}
	333	if (length1 < -1 \|\| length2 < -1) {
	334	*status = U_ILLEGAL_ARGUMENT_ERROR;
	335	return 0;
	336	}
	337	UnicodeString id1Str = UnicodeString::fromUTF8(StringPiece(id1, length1>=0? length1 : uprv_strlen(id1)));
	338	UnicodeString id2Str = UnicodeString::fromUTF8(StringPiece(id2, length2>=0? length2 : uprv_strlen(id2)));
	339	int32_t results = uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status);
	340	return results;
	341	}
	342
	343
	344	U_CAPI int32_t U_EXPORT2
	345	uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
	346	const icu::UnicodeString &id1,
	347	const icu::UnicodeString &id2,
	348	UErrorCode *status) {
	349	const SpoofImpl This = SpoofImpl::validateThis(sc, status);
	350	if (U_FAILURE(*status)) {
	351	return 0;
	352	}
	353	//
	354	// See section 4 of UAX 39 for the algorithm for checking whether two strings are confusable,
	355	// and for definitions of the types (single, whole, mixed-script) of confusables.
	356
	357	// We only care about a few of the check flags. Ignore the others.
	358	// If no tests relavant to this function have been specified, return an error.
	359	// TODO: is this really the right thing to do? It's probably an error on the caller's part,
	360	// but logically we would just return 0 (no error).
	361	if ((This->fChecks & (USPOOF_SINGLE_SCRIPT_CONFUSABLE \| USPOOF_MIXED_SCRIPT_CONFUSABLE \|
	362	USPOOF_WHOLE_SCRIPT_CONFUSABLE)) == 0) {
	363	*status = U_INVALID_STATE_ERROR;
	364	return 0;
	365	}
	366	int32_t flagsForSkeleton = This->fChecks & USPOOF_ANY_CASE;
	367
	368	int32_t result = 0;
	369	IdentifierInfo identifierInfo = This->getIdentifierInfo(status);
	370	if (U_FAILURE(*status)) {
	371	return 0;
	372	}
	373	identifierInfo->setIdentifier(id1, *status);
	374	int32_t id1ScriptCount = identifierInfo->getScriptCount();
	375	identifierInfo->setIdentifier(id2, *status);
	376	int32_t id2ScriptCount = identifierInfo->getScriptCount();
	377	This->releaseIdentifierInfo(identifierInfo);
	378	identifierInfo = NULL;
	379
	380	if (This->fChecks & USPOOF_SINGLE_SCRIPT_CONFUSABLE) {
	381	UnicodeString id1Skeleton;
	382	UnicodeString id2Skeleton;
	383	if (id1ScriptCount <= 1 && id2ScriptCount <= 1) {
	384	flagsForSkeleton \|= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
	385	uspoof_getSkeletonUnicodeString(sc, flagsForSkeleton, id1, id1Skeleton, status);
	386	uspoof_getSkeletonUnicodeString(sc, flagsForSkeleton, id2, id2Skeleton, status);
	387	if (id1Skeleton == id2Skeleton) {
	388	result \|= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
	389	}
	390	}
	391	}
	392
	393	if (result & USPOOF_SINGLE_SCRIPT_CONFUSABLE) {
	394	// If the two inputs are single script confusable they cannot also be
	395	// mixed or whole script confusable, according to the UAX39 definitions.
	396	// So we can skip those tests.
	397	return result;
	398	}
	399
	400	// Two identifiers are whole script confusable if each is of a single script
	401	// and they are mixed script confusable.
	402	UBool possiblyWholeScriptConfusables =
	403	id1ScriptCount <= 1 && id2ScriptCount <= 1 && (This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE);
	404
	405	//
	406	// Mixed Script Check
	407	//
	408	if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) \|\| possiblyWholeScriptConfusables ) {
	409	// For getSkeleton(), resetting the USPOOF_SINGLE_SCRIPT_CONFUSABLE flag will get us
	410	// the mixed script table skeleton, which is what we want.
	411	// The Any Case / Lower Case bit in the skelton flags was set at the top of the function.
	412	UnicodeString id1Skeleton;
	413	UnicodeString id2Skeleton;
	414	flagsForSkeleton &= ~USPOOF_SINGLE_SCRIPT_CONFUSABLE;
	415	uspoof_getSkeletonUnicodeString(sc, flagsForSkeleton, id1, id1Skeleton, status);
	416	uspoof_getSkeletonUnicodeString(sc, flagsForSkeleton, id2, id2Skeleton, status);
	417	if (id1Skeleton == id2Skeleton) {
	418	result \|= USPOOF_MIXED_SCRIPT_CONFUSABLE;
	419	if (possiblyWholeScriptConfusables) {
	420	result \|= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
	421	}
	422	}
	423	}
	424
	425	return result;
	426	}
	427
	428
	429
	430
	431	U_CAPI int32_t U_EXPORT2
	432	uspoof_checkUnicodeString(const USpoofChecker *sc,
	433	const icu::UnicodeString &id,
	434	int32_t *position,
	435	UErrorCode *status) {
	436	const SpoofImpl This = SpoofImpl::validateThis(sc, status);
	437	if (This == NULL) {
	438	return 0;
	439	}
	440	int32_t result = 0;
	441
	442	IdentifierInfo *identifierInfo = NULL;
	443	if ((This->fChecks) & (USPOOF_RESTRICTION_LEVEL \| USPOOF_MIXED_NUMBERS)) {
	444	identifierInfo = This->getIdentifierInfo(*status);
	445	if (U_FAILURE(*status)) {
	446	goto cleanupAndReturn;
	447	}
	448	identifierInfo->setIdentifier(id, *status);
	449	identifierInfo->setIdentifierProfile(*This->fAllowedCharsSet);
	450	}
	451
	452
	453	if ((This->fChecks) & USPOOF_RESTRICTION_LEVEL) {
	454	URestrictionLevel idRestrictionLevel = identifierInfo->getRestrictionLevel(*status);
	455	if (idRestrictionLevel > This->fRestrictionLevel) {
	456	result \|= USPOOF_RESTRICTION_LEVEL;
	457	}
	458	if (This->fChecks & USPOOF_AUX_INFO) {
	459	result \|= idRestrictionLevel;
	460	}
	461	}
	462
	463	if ((This->fChecks) & USPOOF_MIXED_NUMBERS) {
	464	const UnicodeSet *numerics = identifierInfo->getNumerics();
	465	if (numerics->size() > 1) {
	466	result \|= USPOOF_MIXED_NUMBERS;
	467	}
	468
	469	// TODO: ICU4J returns the UnicodeSet of the numerics found in the identifier.
	470	// We have no easy way to do the same in C.
	471	// if (checkResult != null) {
	472	// checkResult.numerics = numerics;
	473	// }
	474	}
	475
	476
	477	if (This->fChecks & (USPOOF_CHAR_LIMIT)) {
	478	int32_t i;
	479	UChar32 c;
	480	int32_t length = id.length();
	481	for (i=0; i<length ;) {
	482	c = id.char32At(i);
	483	i += U16_LENGTH(c);
	484	if (!This->fAllowedCharsSet->contains(c)) {
	485	result \|= USPOOF_CHAR_LIMIT;
	486	break;
	487	}
	488	}
	489	}
	490
	491	if (This->fChecks &
	492	(USPOOF_WHOLE_SCRIPT_CONFUSABLE \| USPOOF_MIXED_SCRIPT_CONFUSABLE \| USPOOF_INVISIBLE)) {
	493	// These are the checks that need to be done on NFD input
	494	UnicodeString nfdText;
	495	gNfdNormalizer->normalize(id, nfdText, *status);
	496	int32_t nfdLength = nfdText.length();
	497
	498	if (This->fChecks & USPOOF_INVISIBLE) {
	499
	500	// scan for more than one occurence of the same non-spacing mark
	501	// in a sequence of non-spacing marks.
	502	int32_t i;
	503	UChar32 c;
	504	UChar32 firstNonspacingMark = 0;
	505	UBool haveMultipleMarks = FALSE;
	506	UnicodeSet marksSeenSoFar; // Set of combining marks in a single combining sequence.
	507
	508	for (i=0; i<nfdLength ;) {
	509	c = nfdText.char32At(i);
	510	i += U16_LENGTH(c);
	511	if (u_charType(c) != U_NON_SPACING_MARK) {
	512	firstNonspacingMark = 0;
	513	if (haveMultipleMarks) {
	514	marksSeenSoFar.clear();
	515	haveMultipleMarks = FALSE;
	516	}
	517	continue;
	518	}
	519	if (firstNonspacingMark == 0) {
	520	firstNonspacingMark = c;
	521	continue;
	522	}
	523	if (!haveMultipleMarks) {
	524	marksSeenSoFar.add(firstNonspacingMark);
	525	haveMultipleMarks = TRUE;
	526	}
	527	if (marksSeenSoFar.contains(c)) {
	528	// report the error, and stop scanning.
	529	// No need to find more than the first failure.
	530	result \|= USPOOF_INVISIBLE;
	531	break;
	532	}
	533	marksSeenSoFar.add(c);
	534	}
	535	}
	536
	537
	538	if (This->fChecks & (USPOOF_WHOLE_SCRIPT_CONFUSABLE \| USPOOF_MIXED_SCRIPT_CONFUSABLE)) {
	539	// The basic test is the same for both whole and mixed script confusables.
	540	// Compute the set of scripts that every input character has a confusable in.
	541	// For this computation an input character is always considered to be
	542	// confusable with itself in its own script.
	543	//
	544	// If the number of such scripts is two or more, and the input consisted of
	545	// characters all from a single script, we have a whole script confusable.
	546	// (The two scripts will be the original script and the one that is confusable)
	547	//
	548	// If the number of such scripts >= one, and the original input contained characters from
	549	// more than one script, we have a mixed script confusable. (We can transform
	550	// some of the characters, and end up with a visually similar string all in
	551	// one script.)
	552
	553	if (identifierInfo == NULL) {
	554	identifierInfo = This->getIdentifierInfo(*status);
	555	if (U_FAILURE(*status)) {
	556	goto cleanupAndReturn;
	557	}
	558	identifierInfo->setIdentifier(id, *status);
	559	}
	560
	561	int32_t scriptCount = identifierInfo->getScriptCount();
	562
	563	ScriptSet scripts;
	564	This->wholeScriptCheck(nfdText, &scripts, *status);
	565	int32_t confusableScriptCount = scripts.countMembers();
	566	//printf("confusableScriptCount = %d\n", confusableScriptCount);
	567
	568	if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) &&
	569	confusableScriptCount >= 2 &&
	570	scriptCount == 1) {
	571	result \|= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
	572	}
	573
	574	if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) &&
	575	confusableScriptCount >= 1 &&
	576	scriptCount > 1) {
	577	result \|= USPOOF_MIXED_SCRIPT_CONFUSABLE;
	578	}
	579	}
	580	}
	581
	582	cleanupAndReturn:
	583	This->releaseIdentifierInfo(identifierInfo);
	584	if (position != NULL) {
	585	*position = 0;
	586	}
	587	return result;
	588	}
	589
	590
	591	U_CAPI int32_t U_EXPORT2
	592	uspoof_getSkeleton(const USpoofChecker *sc,
	593	uint32_t type,
	594	const UChar *id, int32_t length,
	595	UChar *dest, int32_t destCapacity,
	596	UErrorCode *status) {
	597
	598	SpoofImpl::validateThis(sc, *status);
	599	if (U_FAILURE(*status)) {
	600	return 0;
	601	}
	602	if (length<-1 \|\| destCapacity<0 \|\| (destCapacity==0 && dest!=NULL)) {
	603	*status = U_ILLEGAL_ARGUMENT_ERROR;
	604	return 0;
	605	}
	606
	607	UnicodeString idStr((length==-1), id, length); // Aliasing constructor
	608	UnicodeString destStr;
	609	uspoof_getSkeletonUnicodeString(sc, type, idStr, destStr, status);
	610	destStr.extract(dest, destCapacity, *status);
	611	return destStr.length();
	612	}
	613
	614
	615
	616	U_I18N_API UnicodeString & U_EXPORT2
	617	uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
	618	uint32_t type,
	619	const UnicodeString &id,
	620	UnicodeString &dest,
	621	UErrorCode *status) {
	622	const SpoofImpl This = SpoofImpl::validateThis(sc, status);
	623	if (U_FAILURE(*status)) {
	624	return dest;
	625	}
	626
	627	int32_t tableMask = 0;
	628	switch (type) {
	629	case 0:
	630	tableMask = USPOOF_ML_TABLE_FLAG;
	631	break;
	632	case USPOOF_SINGLE_SCRIPT_CONFUSABLE:
	633	tableMask = USPOOF_SL_TABLE_FLAG;
	634	break;
	635	case USPOOF_ANY_CASE:
	636	tableMask = USPOOF_MA_TABLE_FLAG;
	637	break;
	638	case USPOOF_SINGLE_SCRIPT_CONFUSABLE \| USPOOF_ANY_CASE:
	639	tableMask = USPOOF_SA_TABLE_FLAG;
	640	break;
	641	default:
	642	*status = U_ILLEGAL_ARGUMENT_ERROR;
	643	return dest;
	644	}
	645
	646	UnicodeString nfdId;
	647	gNfdNormalizer->normalize(id, nfdId, *status);
	648
	649	// Apply the skeleton mapping to the NFD normalized input string
	650	// Accumulate the skeleton, possibly unnormalized, in a UnicodeString.
	651	int32_t inputIndex = 0;
	652	UnicodeString skelStr;
	653	int32_t normalizedLen = nfdId.length();
	654	for (inputIndex=0; inputIndex < normalizedLen; ) {
	655	UChar32 c = nfdId.char32At(inputIndex);
	656	inputIndex += U16_LENGTH(c);
	657	This->confusableLookup(c, tableMask, skelStr);
	658	}
	659
	660	gNfdNormalizer->normalize(skelStr, dest, *status);
	661	return dest;
	662	}
	663
	664
	665	U_CAPI int32_t U_EXPORT2
	666	uspoof_getSkeletonUTF8(const USpoofChecker *sc,
	667	uint32_t type,
	668	const char *id, int32_t length,
	669	char *dest, int32_t destCapacity,
	670	UErrorCode *status) {
	671	SpoofImpl::validateThis(sc, *status);
	672	if (U_FAILURE(*status)) {
	673	return 0;
	674	}
	675	if (length<-1 \|\| destCapacity<0 \|\| (destCapacity==0 && dest!=NULL)) {
	676	*status = U_ILLEGAL_ARGUMENT_ERROR;
	677	return 0;
	678	}
	679
	680	UnicodeString srcStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : uprv_strlen(id)));
	681	UnicodeString destStr;
	682	uspoof_getSkeletonUnicodeString(sc, type, srcStr, destStr, status);
	683	if (U_FAILURE(*status)) {
	684	return 0;
	685	}
	686
	687	int32_t lengthInUTF8 = 0;
	688	u_strToUTF8(dest, destCapacity, &lengthInUTF8,
	689	destStr.getBuffer(), destStr.length(), status);
	690	return lengthInUTF8;
	691	}
	692
	693
	694	U_CAPI int32_t U_EXPORT2
	695	uspoof_serialize(USpoofChecker sc,void buf, int32_t capacity, UErrorCode *status) {
	696	SpoofImpl This = SpoofImpl::validateThis(sc, status);
	697	if (This == NULL) {
	698	U_ASSERT(U_FAILURE(*status));
	699	return 0;
	700	}
	701	int32_t dataSize = This->fSpoofData->fRawData->fLength;
	702	if (capacity < dataSize) {
	703	*status = U_BUFFER_OVERFLOW_ERROR;
	704	return dataSize;
	705	}
	706	uprv_memcpy(buf, This->fSpoofData->fRawData, dataSize);
	707	return dataSize;
	708	}
	709
	710	U_CAPI const USet * U_EXPORT2
	711	uspoof_getInclusionSet(UErrorCode *) {
	712	initializeStatics();
	713	return gInclusionSet->toUSet();
	714	}
	715
	716	U_CAPI const USet * U_EXPORT2
	717	uspoof_getRecommendedSet(UErrorCode *) {
	718	initializeStatics();
	719	return gRecommendedSet->toUSet();
	720	}
	721
	722	U_I18N_API const UnicodeSet * U_EXPORT2
	723	uspoof_getInclusionUnicodeSet(UErrorCode *) {
	724	initializeStatics();
	725	return gInclusionSet;
	726	}
	727
	728	U_I18N_API const UnicodeSet * U_EXPORT2
	729	uspoof_getRecommendedUnicodeSet(UErrorCode *) {
	730	initializeStatics();
	731	return gRecommendedSet;
	732	}
	733
	734
	735
	736	#endif // !UCONFIG_NO_NORMALIZATION