git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	// © 2016 and later: Unicode, Inc. and others.
	2	// License & terms of use: http://www.unicode.org/copyright.html
	3	/*
	4	*******************************************************************************
	5	*
	6	* Copyright (C) 2008-2011, International Business Machines
	7	* Corporation, Google and others. All Rights Reserved.
	8	*
	9	*******************************************************************************
	10	*/
	11	// Author : eldawy@google.com (Mohamed Eldawy)
	12	// ucnvsel.cpp
	13	//
	14	// Purpose: To generate a list of encodings capable of handling
	15	// a given Unicode text
	16	//
	17	// Started 09-April-2008
	18
	19	/**
	20	* \file
	21	*
	22	* This is an implementation of an encoding selector.
	23	* The goal is, given a unicode string, find the encodings
	24	* this string can be mapped to. To make processing faster
	25	* a trie is built when you call ucnvsel_open() that
	26	* stores all encodings a codepoint can map to
	27	*/
	28
	29	#include "unicode/ucnvsel.h"
	30
	31	#if !UCONFIG_NO_CONVERSION
	32
	33	#include <string.h>
	34
	35	#include "unicode/uchar.h"
	36	#include "unicode/uniset.h"
	37	#include "unicode/ucnv.h"
	38	#include "unicode/ustring.h"
	39	#include "unicode/uchriter.h"
	40	#include "utrie2.h"
	41	#include "propsvec.h"
	42	#include "uassert.h"
	43	#include "ucmndata.h"
	44	#include "udataswp.h"
	45	#include "uenumimp.h"
	46	#include "cmemory.h"
	47	#include "cstring.h"
	48
	49	U_NAMESPACE_USE
	50
	51	struct UConverterSelector {
	52	UTrie2 *trie; // 16 bit trie containing offsets into pv
	53	uint32_t* pv; // table of bits!
	54	int32_t pvCount;
	55	char** encodings; // which encodings did user ask to use?
	56	int32_t encodingsCount;
	57	int32_t encodingStrLength;
	58	uint8_t* swapped;
	59	UBool ownPv, ownEncodingStrings;
	60	};
	61
	62	static void generateSelectorData(UConverterSelector* result,
	63	UPropsVectors *upvec,
	64	const USet* excludedCodePoints,
	65	const UConverterUnicodeSet whichSet,
	66	UErrorCode* status) {
	67	if (U_FAILURE(*status)) {
	68	return;
	69	}
	70
	71	int32_t columns = (result->encodingsCount+31)/32;
	72
	73	// set errorValue to all-ones
	74	for (int32_t col = 0; col < columns; col++) {
	75	upvec_setValue(upvec, UPVEC_ERROR_VALUE_CP, UPVEC_ERROR_VALUE_CP,
	76	col, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0), status);
	77	}
	78
	79	for (int32_t i = 0; i < result->encodingsCount; ++i) {
	80	uint32_t mask;
	81	uint32_t column;
	82	int32_t item_count;
	83	int32_t j;
	84	UConverter* test_converter = ucnv_open(result->encodings[i], status);
	85	if (U_FAILURE(*status)) {
	86	return;
	87	}
	88	USet* unicode_point_set;
	89	unicode_point_set = uset_open(1, 0); // empty set
	90
	91	ucnv_getUnicodeSet(test_converter, unicode_point_set,
	92	whichSet, status);
	93	if (U_FAILURE(*status)) {
	94	ucnv_close(test_converter);
	95	return;
	96	}
	97
	98	column = i / 32;
	99	mask = 1 << (i%32);
	100	// now iterate over intervals on set i!
	101	item_count = uset_getItemCount(unicode_point_set);
	102
	103	for (j = 0; j < item_count; ++j) {
	104	UChar32 start_char;
	105	UChar32 end_char;
	106	UErrorCode smallStatus = U_ZERO_ERROR;
	107	uset_getItem(unicode_point_set, j, &start_char, &end_char, NULL, 0,
	108	&smallStatus);
	109	if (U_FAILURE(smallStatus)) {
	110	// this will be reached for the converters that fill the set with
	111	// strings. Those should be ignored by our system
	112	} else {
	113	upvec_setValue(upvec, start_char, end_char, column, static_cast<uint32_t>(~0), mask,
	114	status);
	115	}
	116	}
	117	ucnv_close(test_converter);
	118	uset_close(unicode_point_set);
	119	if (U_FAILURE(*status)) {
	120	return;
	121	}
	122	}
	123
	124	// handle excluded encodings! Simply set their values to all 1's in the upvec
	125	if (excludedCodePoints) {
	126	int32_t item_count = uset_getItemCount(excludedCodePoints);
	127	for (int32_t j = 0; j < item_count; ++j) {
	128	UChar32 start_char;
	129	UChar32 end_char;
	130
	131	uset_getItem(excludedCodePoints, j, &start_char, &end_char, NULL, 0,
	132	status);
	133	for (int32_t col = 0; col < columns; col++) {
	134	upvec_setValue(upvec, start_char, end_char, col, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0),
	135	status);
	136	}
	137	}
	138	}
	139
	140	// alright. Now, let's put things in the same exact form you'd get when you
	141	// unserialize things.
	142	result->trie = upvec_compactToUTrie2WithRowIndexes(upvec, status);
	143	result->pv = upvec_cloneArray(upvec, &result->pvCount, NULL, status);
	144	result->pvCount = columns; // number of uint32_t = rows columns
	145	result->ownPv = TRUE;
	146	}
	147
	148	/* open a selector. If converterListSize is 0, build for all converters.
	149	If excludedCodePoints is NULL, don't exclude any codepoints */
	150	U_CAPI UConverterSelector* U_EXPORT2
	151	ucnvsel_open(const char* const* converterList, int32_t converterListSize,
	152	const USet* excludedCodePoints,
	153	const UConverterUnicodeSet whichSet, UErrorCode* status) {
	154	// check if already failed
	155	if (U_FAILURE(*status)) {
	156	return NULL;
	157	}
	158	// ensure args make sense!
	159	if (converterListSize < 0 \|\| (converterList == NULL && converterListSize != 0)) {
	160	*status = U_ILLEGAL_ARGUMENT_ERROR;
	161	return NULL;
	162	}
	163
	164	// allocate a new converter
	165	LocalUConverterSelectorPointer newSelector(
	166	(UConverterSelector*)uprv_malloc(sizeof(UConverterSelector)));
	167	if (newSelector.isNull()) {
	168	*status = U_MEMORY_ALLOCATION_ERROR;
	169	return NULL;
	170	}
	171	uprv_memset(newSelector.getAlias(), 0, sizeof(UConverterSelector));
	172
	173	if (converterListSize == 0) {
	174	converterList = NULL;
	175	converterListSize = ucnv_countAvailable();
	176	}
	177	newSelector->encodings =
	178	(char*)uprv_malloc(converterListSize sizeof(char*));
	179	if (!newSelector->encodings) {
	180	*status = U_MEMORY_ALLOCATION_ERROR;
	181	return NULL;
	182	}
	183	newSelector->encodings[0] = NULL; // now we can call ucnvsel_close()
	184
	185	// make a backup copy of the list of converters
	186	int32_t totalSize = 0;
	187	int32_t i;
	188	for (i = 0; i < converterListSize; i++) {
	189	totalSize +=
	190	(int32_t)uprv_strlen(converterList != NULL ? converterList[i] : ucnv_getAvailableName(i)) + 1;
	191	}
	192	// 4-align the totalSize to 4-align the size of the serialized form
	193	int32_t encodingStrPadding = totalSize & 3;
	194	if (encodingStrPadding != 0) {
	195	encodingStrPadding = 4 - encodingStrPadding;
	196	}
	197	newSelector->encodingStrLength = totalSize += encodingStrPadding;
	198	char* allStrings = (char*) uprv_malloc(totalSize);
	199	if (!allStrings) {
	200	*status = U_MEMORY_ALLOCATION_ERROR;
	201	return NULL;
	202	}
	203
	204	for (i = 0; i < converterListSize; i++) {
	205	newSelector->encodings[i] = allStrings;
	206	uprv_strcpy(newSelector->encodings[i],
	207	converterList != NULL ? converterList[i] : ucnv_getAvailableName(i));
	208	allStrings += uprv_strlen(newSelector->encodings[i]) + 1;
	209	}
	210	while (encodingStrPadding > 0) {
	211	*allStrings++ = 0;
	212	--encodingStrPadding;
	213	}
	214
	215	newSelector->ownEncodingStrings = TRUE;
	216	newSelector->encodingsCount = converterListSize;
	217	UPropsVectors *upvec = upvec_open((converterListSize+31)/32, status);
	218	generateSelectorData(newSelector.getAlias(), upvec, excludedCodePoints, whichSet, status);
	219	upvec_close(upvec);
	220
	221	if (U_FAILURE(*status)) {
	222	return NULL;
	223	}
	224
	225	return newSelector.orphan();
	226	}
	227
	228	/* close opened selector */
	229	U_CAPI void U_EXPORT2
	230	ucnvsel_close(UConverterSelector *sel) {
	231	if (!sel) {
	232	return;
	233	}
	234	if (sel->ownEncodingStrings) {
	235	uprv_free(sel->encodings[0]);
	236	}
	237	uprv_free(sel->encodings);
	238	if (sel->ownPv) {
	239	uprv_free(sel->pv);
	240	}
	241	utrie2_close(sel->trie);
	242	uprv_free(sel->swapped);
	243	uprv_free(sel);
	244	}
	245
	246	static const UDataInfo dataInfo = {
	247	sizeof(UDataInfo),
	248	0,
	249
	250	U_IS_BIG_ENDIAN,
	251	U_CHARSET_FAMILY,
	252	U_SIZEOF_UCHAR,
	253	0,
	254
	255	{ 0x43, 0x53, 0x65, 0x6c }, /* dataFormat="CSel" */
	256	{ 1, 0, 0, 0 }, /* formatVersion */
	257	{ 0, 0, 0, 0 } /* dataVersion */
	258	};
	259
	260	enum {
	261	UCNVSEL_INDEX_TRIE_SIZE, // trie size in bytes
	262	UCNVSEL_INDEX_PV_COUNT, // number of uint32_t in the bit vectors
	263	UCNVSEL_INDEX_NAMES_COUNT, // number of encoding names
	264	UCNVSEL_INDEX_NAMES_LENGTH, // number of encoding name bytes including padding
	265	UCNVSEL_INDEX_SIZE = 15, // bytes following the DataHeader
	266	UCNVSEL_INDEX_COUNT = 16
	267	};
	268
	269	/*
	270	* Serialized form of a UConverterSelector, formatVersion 1:
	271	*
	272	* The serialized form begins with a standard ICU DataHeader with a UDataInfo
	273	* as the template above.
	274	* This is followed by:
	275	* int32_t indexes[UCNVSEL_INDEX_COUNT]; // see index entry constants above
	276	* serialized UTrie2; // indexes[UCNVSEL_INDEX_TRIE_SIZE] bytes
	277	* uint32_t pv[indexes[UCNVSEL_INDEX_PV_COUNT]]; // bit vectors
	278	* char* encodingNames[indexes[UCNVSEL_INDEX_NAMES_LENGTH]]; // NUL-terminated strings + padding
	279	*/
	280
	281	/* serialize a selector */
	282	U_CAPI int32_t U_EXPORT2
	283	ucnvsel_serialize(const UConverterSelector* sel,
	284	void* buffer, int32_t bufferCapacity, UErrorCode* status) {
	285	// check if already failed
	286	if (U_FAILURE(*status)) {
	287	return 0;
	288	}
	289	// ensure args make sense!
	290	uint8_t p = (uint8_t )buffer;
	291	if (bufferCapacity < 0 \|\|
	292	(bufferCapacity > 0 && (p == NULL \|\| (U_POINTER_MASK_LSB(p, 3) != 0)))
	293	) {
	294	*status = U_ILLEGAL_ARGUMENT_ERROR;
	295	return 0;
	296	}
	297	// add up the size of the serialized form
	298	int32_t serializedTrieSize = utrie2_serialize(sel->trie, NULL, 0, status);
	299	if (status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(status)) {
	300	return 0;
	301	}
	302	*status = U_ZERO_ERROR;
	303
	304	DataHeader header;
	305	uprv_memset(&header, 0, sizeof(header));
	306	header.dataHeader.headerSize = (uint16_t)((sizeof(header) + 15) & ~15);
	307	header.dataHeader.magic1 = 0xda;
	308	header.dataHeader.magic2 = 0x27;
	309	uprv_memcpy(&header.info, &dataInfo, sizeof(dataInfo));
	310
	311	int32_t indexes[UCNVSEL_INDEX_COUNT] = {
	312	serializedTrieSize,
	313	sel->pvCount,
	314	sel->encodingsCount,
	315	sel->encodingStrLength
	316	};
	317
	318	int32_t totalSize =
	319	header.dataHeader.headerSize +
	320	(int32_t)sizeof(indexes) +
	321	serializedTrieSize +
	322	sel->pvCount * 4 +
	323	sel->encodingStrLength;
	324	indexes[UCNVSEL_INDEX_SIZE] = totalSize - header.dataHeader.headerSize;
	325	if (totalSize > bufferCapacity) {
	326	*status = U_BUFFER_OVERFLOW_ERROR;
	327	return totalSize;
	328	}
	329	// ok, save!
	330	int32_t length = header.dataHeader.headerSize;
	331	uprv_memcpy(p, &header, sizeof(header));
	332	uprv_memset(p + sizeof(header), 0, length - sizeof(header));
	333	p += length;
	334
	335	length = (int32_t)sizeof(indexes);
	336	uprv_memcpy(p, indexes, length);
	337	p += length;
	338
	339	utrie2_serialize(sel->trie, p, serializedTrieSize, status);
	340	p += serializedTrieSize;
	341
	342	length = sel->pvCount * 4;
	343	uprv_memcpy(p, sel->pv, length);
	344	p += length;
	345
	346	uprv_memcpy(p, sel->encodings[0], sel->encodingStrLength);
	347	p += sel->encodingStrLength;
	348
	349	return totalSize;
	350	}
	351
	352	/**
	353	* swap a selector into the desired Endianness and Asciiness of
	354	* the system. Just as FYI, selectors are always saved in the format
	355	* of the system that created them. They are only converted if used
	356	* on another system. In other words, selectors created on different
	357	* system can be different even if the params are identical (endianness
	358	* and Asciiness differences only)
	359	*
	360	* @param ds pointer to data swapper containing swapping info
	361	* @param inData pointer to incoming data
	362	* @param length length of inData in bytes
	363	* @param outData pointer to output data. Capacity should
	364	* be at least equal to capacity of inData
	365	* @param status an in/out ICU UErrorCode
	366	* @return 0 on failure, number of bytes swapped on success
	367	* number of bytes swapped can be smaller than length
	368	*/
	369	static int32_t
	370	ucnvsel_swap(const UDataSwapper *ds,
	371	const void *inData, int32_t length,
	372	void outData, UErrorCode status) {
	373	/* udata_swapDataHeader checks the arguments */
	374	int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, status);
	375	if(U_FAILURE(*status)) {
	376	return 0;
	377	}
	378
	379	/* check data format and format version */
	380	const UDataInfo pInfo = (const UDataInfo )((const char *)inData + 4);
	381	if(!(
	382	pInfo->dataFormat[0] == 0x43 && /* dataFormat="CSel" */
	383	pInfo->dataFormat[1] == 0x53 &&
	384	pInfo->dataFormat[2] == 0x65 &&
	385	pInfo->dataFormat[3] == 0x6c
	386	)) {
	387	udata_printError(ds, "ucnvsel_swap(): data format %02x.%02x.%02x.%02x is not recognized as UConverterSelector data\n",
	388	pInfo->dataFormat[0], pInfo->dataFormat[1],
	389	pInfo->dataFormat[2], pInfo->dataFormat[3]);
	390	*status = U_INVALID_FORMAT_ERROR;
	391	return 0;
	392	}
	393	if(pInfo->formatVersion[0] != 1) {
	394	udata_printError(ds, "ucnvsel_swap(): format version %02x is not supported\n",
	395	pInfo->formatVersion[0]);
	396	*status = U_UNSUPPORTED_ERROR;
	397	return 0;
	398	}
	399
	400	if(length >= 0) {
	401	length -= headerSize;
	402	if(length < 16*4) {
	403	udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for UConverterSelector data\n",
	404	length);
	405	*status = U_INDEX_OUTOFBOUNDS_ERROR;
	406	return 0;
	407	}
	408	}
	409
	410	const uint8_t inBytes = (const uint8_t )inData + headerSize;
	411	uint8_t outBytes = (uint8_t )outData + headerSize;
	412
	413	/* read the indexes */
	414	const int32_t inIndexes = (const int32_t )inBytes;
	415	int32_t indexes[16];
	416	int32_t i;
	417	for(i = 0; i < 16; ++i) {
	418	indexes[i] = udata_readInt32(ds, inIndexes[i]);
	419	}
	420
	421	/* get the total length of the data */
	422	int32_t size = indexes[UCNVSEL_INDEX_SIZE];
	423	if(length >= 0) {
	424	if(length < size) {
	425	udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for all of UConverterSelector data\n",
	426	length);
	427	*status = U_INDEX_OUTOFBOUNDS_ERROR;
	428	return 0;
	429	}
	430
	431	/* copy the data for inaccessible bytes */
	432	if(inBytes != outBytes) {
	433	uprv_memcpy(outBytes, inBytes, size);
	434	}
	435
	436	int32_t offset = 0, count;
	437
	438	/* swap the int32_t indexes[] */
	439	count = UCNVSEL_INDEX_COUNT*4;
	440	ds->swapArray32(ds, inBytes, count, outBytes, status);
	441	offset += count;
	442
	443	/* swap the UTrie2 */
	444	count = indexes[UCNVSEL_INDEX_TRIE_SIZE];
	445	utrie2_swap(ds, inBytes + offset, count, outBytes + offset, status);
	446	offset += count;
	447
	448	/* swap the uint32_t pv[] */
	449	count = indexes[UCNVSEL_INDEX_PV_COUNT]*4;
	450	ds->swapArray32(ds, inBytes + offset, count, outBytes + offset, status);
	451	offset += count;
	452
	453	/* swap the encoding names */
	454	count = indexes[UCNVSEL_INDEX_NAMES_LENGTH];
	455	ds->swapInvChars(ds, inBytes + offset, count, outBytes + offset, status);
	456	offset += count;
	457
	458	U_ASSERT(offset == size);
	459	}
	460
	461	return headerSize + size;
	462	}
	463
	464	/* unserialize a selector */
	465	U_CAPI UConverterSelector* U_EXPORT2
	466	ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status) {
	467	// check if already failed
	468	if (U_FAILURE(*status)) {
	469	return NULL;
	470	}
	471	// ensure args make sense!
	472	const uint8_t p = (const uint8_t )buffer;
	473	if (length <= 0 \|\|
	474	(length > 0 && (p == NULL \|\| (U_POINTER_MASK_LSB(p, 3) != 0)))
	475	) {
	476	*status = U_ILLEGAL_ARGUMENT_ERROR;
	477	return NULL;
	478	}
	479	// header
	480	if (length < 32) {
	481	// not even enough space for a minimal header
	482	*status = U_INDEX_OUTOFBOUNDS_ERROR;
	483	return NULL;
	484	}
	485	const DataHeader pHeader = (const DataHeader )p;
	486	if (!(
	487	pHeader->dataHeader.magic1==0xda &&
	488	pHeader->dataHeader.magic2==0x27 &&
	489	pHeader->info.dataFormat[0] == 0x43 &&
	490	pHeader->info.dataFormat[1] == 0x53 &&
	491	pHeader->info.dataFormat[2] == 0x65 &&
	492	pHeader->info.dataFormat[3] == 0x6c
	493	)) {
	494	/* header not valid or dataFormat not recognized */
	495	*status = U_INVALID_FORMAT_ERROR;
	496	return NULL;
	497	}
	498	if (pHeader->info.formatVersion[0] != 1) {
	499	*status = U_UNSUPPORTED_ERROR;
	500	return NULL;
	501	}
	502	uint8_t* swapped = NULL;
	503	if (pHeader->info.isBigEndian != U_IS_BIG_ENDIAN \|\|
	504	pHeader->info.charsetFamily != U_CHARSET_FAMILY
	505	) {
	506	// swap the data
	507	UDataSwapper *ds =
	508	udata_openSwapperForInputData(p, length, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, status);
	509	int32_t totalSize = ucnvsel_swap(ds, p, -1, NULL, status);
	510	if (U_FAILURE(*status)) {
	511	udata_closeSwapper(ds);
	512	return NULL;
	513	}
	514	if (length < totalSize) {
	515	udata_closeSwapper(ds);
	516	*status = U_INDEX_OUTOFBOUNDS_ERROR;
	517	return NULL;
	518	}
	519	swapped = (uint8_t*)uprv_malloc(totalSize);
	520	if (swapped == NULL) {
	521	udata_closeSwapper(ds);
	522	*status = U_MEMORY_ALLOCATION_ERROR;
	523	return NULL;
	524	}
	525	ucnvsel_swap(ds, p, length, swapped, status);
	526	udata_closeSwapper(ds);
	527	if (U_FAILURE(*status)) {
	528	uprv_free(swapped);
	529	return NULL;
	530	}
	531	p = swapped;
	532	pHeader = (const DataHeader *)p;
	533	}
	534	if (length < (pHeader->dataHeader.headerSize + 16 * 4)) {
	535	// not even enough space for the header and the indexes
	536	uprv_free(swapped);
	537	*status = U_INDEX_OUTOFBOUNDS_ERROR;
	538	return NULL;
	539	}
	540	p += pHeader->dataHeader.headerSize;
	541	length -= pHeader->dataHeader.headerSize;
	542	// indexes
	543	const int32_t indexes = (const int32_t )p;
	544	if (length < indexes[UCNVSEL_INDEX_SIZE]) {
	545	uprv_free(swapped);
	546	*status = U_INDEX_OUTOFBOUNDS_ERROR;
	547	return NULL;
	548	}
	549	p += UCNVSEL_INDEX_COUNT * 4;
	550	// create and populate the selector object
	551	UConverterSelector* sel = (UConverterSelector*)uprv_malloc(sizeof(UConverterSelector));
	552	char **encodings =
	553	(char **)uprv_malloc(
	554	indexes[UCNVSEL_INDEX_NAMES_COUNT] * sizeof(char *));
	555	if (sel == NULL \|\| encodings == NULL) {
	556	uprv_free(swapped);
	557	uprv_free(sel);
	558	uprv_free(encodings);
	559	*status = U_MEMORY_ALLOCATION_ERROR;
	560	return NULL;
	561	}
	562	uprv_memset(sel, 0, sizeof(UConverterSelector));
	563	sel->pvCount = indexes[UCNVSEL_INDEX_PV_COUNT];
	564	sel->encodings = encodings;
	565	sel->encodingsCount = indexes[UCNVSEL_INDEX_NAMES_COUNT];
	566	sel->encodingStrLength = indexes[UCNVSEL_INDEX_NAMES_LENGTH];
	567	sel->swapped = swapped;
	568	// trie
	569	sel->trie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
	570	p, indexes[UCNVSEL_INDEX_TRIE_SIZE], NULL,
	571	status);
	572	p += indexes[UCNVSEL_INDEX_TRIE_SIZE];
	573	if (U_FAILURE(*status)) {
	574	ucnvsel_close(sel);
	575	return NULL;
	576	}
	577	// bit vectors
	578	sel->pv = (uint32_t *)p;
	579	p += sel->pvCount * 4;
	580	// encoding names
	581	char* s = (char*)p;
	582	for (int32_t i = 0; i < sel->encodingsCount; ++i) {
	583	sel->encodings[i] = s;
	584	s += uprv_strlen(s) + 1;
	585	}
	586	p += sel->encodingStrLength;
	587
	588	return sel;
	589	}
	590
	591	// a bunch of functions for the enumeration thingie! Nothing fancy here. Just
	592	// iterate over the selected encodings
	593	struct Enumerator {
	594	int16_t* index;
	595	int16_t length;
	596	int16_t cur;
	597	const UConverterSelector* sel;
	598	};
	599
	600	U_CDECL_BEGIN
	601
	602	static void U_CALLCONV
	603	ucnvsel_close_selector_iterator(UEnumeration *enumerator) {
	604	uprv_free(((Enumerator*)(enumerator->context))->index);
	605	uprv_free(enumerator->context);
	606	uprv_free(enumerator);
	607	}
	608
	609
	610	static int32_t U_CALLCONV
	611	ucnvsel_count_encodings(UEnumeration enumerator, UErrorCode status) {
	612	// check if already failed
	613	if (U_FAILURE(*status)) {
	614	return 0;
	615	}
	616	return ((Enumerator*)(enumerator->context))->length;
	617	}
	618
	619
	620	static const char* U_CALLCONV ucnvsel_next_encoding(UEnumeration* enumerator,
	621	int32_t* resultLength,
	622	UErrorCode* status) {
	623	// check if already failed
	624	if (U_FAILURE(*status)) {
	625	return NULL;
	626	}
	627
	628	int16_t cur = ((Enumerator*)(enumerator->context))->cur;
	629	const UConverterSelector* sel;
	630	const char* result;
	631	if (cur >= ((Enumerator*)(enumerator->context))->length) {
	632	return NULL;
	633	}
	634	sel = ((Enumerator*)(enumerator->context))->sel;
	635	result = sel->encodings[((Enumerator*)(enumerator->context))->index[cur] ];
	636	((Enumerator*)(enumerator->context))->cur++;
	637	if (resultLength) {
	638	*resultLength = (int32_t)uprv_strlen(result);
	639	}
	640	return result;
	641	}
	642
	643	static void U_CALLCONV ucnvsel_reset_iterator(UEnumeration* enumerator,
	644	UErrorCode* status) {
	645	// check if already failed
	646	if (U_FAILURE(*status)) {
	647	return ;
	648	}
	649	((Enumerator*)(enumerator->context))->cur = 0;
	650	}
	651
	652	U_CDECL_END
	653
	654
	655	static const UEnumeration defaultEncodings = {
	656	NULL,
	657	NULL,
	658	ucnvsel_close_selector_iterator,
	659	ucnvsel_count_encodings,
	660	uenum_unextDefault,
	661	ucnvsel_next_encoding,
	662	ucnvsel_reset_iterator
	663	};
	664
	665
	666	// internal fn to intersect two sets of masks
	667	// returns whether the mask has reduced to all zeros
	668	static UBool intersectMasks(uint32_t* dest, const uint32_t* source1, int32_t len) {
	669	int32_t i;
	670	uint32_t oredDest = 0;
	671	for (i = 0 ; i < len ; ++i) {
	672	oredDest \|= (dest[i] &= source1[i]);
	673	}
	674	return oredDest == 0;
	675	}
	676
	677	// internal fn to count how many 1's are there in a mask
	678	// algorithm taken from http://graphics.stanford.edu/~seander/bithacks.html
	679	static int16_t countOnes(uint32_t* mask, int32_t len) {
	680	int32_t i, totalOnes = 0;
	681	for (i = 0 ; i < len ; ++i) {
	682	uint32_t ent = mask[i];
	683	for (; ent; totalOnes++)
	684	{
	685	ent &= ent - 1; // clear the least significant bit set
	686	}
	687	}
	688	return static_cast<int16_t>(totalOnes);
	689	}
	690
	691
	692	/* internal function! */
	693	static UEnumeration selectForMask(const UConverterSelector sel,
	694	uint32_t theMask, UErrorCode status) {
	695	LocalMemory<uint32_t> mask(theMask);
	696	// this is the context we will use. Store a table of indices to which
	697	// encodings are legit.
	698	LocalMemory<Enumerator> result(static_cast<Enumerator *>(uprv_malloc(sizeof(Enumerator))));
	699	if (result.isNull()) {
	700	*status = U_MEMORY_ALLOCATION_ERROR;
	701	return nullptr;
	702	}
	703	result->index = nullptr; // this will be allocated later!
	704	result->length = result->cur = 0;
	705	result->sel = sel;
	706
	707	LocalMemory<UEnumeration> en(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));
	708	if (en.isNull()) {
	709	// TODO(markus): Combine Enumerator and UEnumeration into one struct.
	710	*status = U_MEMORY_ALLOCATION_ERROR;
	711	return nullptr;
	712	}
	713	memcpy(en.getAlias(), &defaultEncodings, sizeof(UEnumeration));
	714
	715	int32_t columns = (sel->encodingsCount+31)/32;
	716	int16_t numOnes = countOnes(mask.getAlias(), columns);
	717	// now, we know the exact space we need for index
	718	if (numOnes > 0) {
	719	result->index = static_cast<int16_t>(uprv_malloc(numOnes sizeof(int16_t)));
	720	if (result->index == nullptr) {
	721	*status = U_MEMORY_ALLOCATION_ERROR;
	722	return nullptr;
	723	}
	724	int32_t i, j;
	725	int16_t k = 0;
	726	for (j = 0 ; j < columns; j++) {
	727	uint32_t v = mask[j];
	728	for (i = 0 ; i < 32 && k < sel->encodingsCount; i++, k++) {
	729	if ((v & 1) != 0) {
	730	result->index[result->length++] = k;
	731	}
	732	v >>= 1;
	733	}
	734	}
	735	} //otherwise, index will remain NULL (and will never be touched by
	736	//the enumerator code anyway)
	737	en->context = result.orphan();
	738	return en.orphan();
	739	}
	740
	741	/* check a string against the selector - UTF16 version */
	742	U_CAPI UEnumeration * U_EXPORT2
	743	ucnvsel_selectForString(const UConverterSelector* sel,
	744	const UChar s, int32_t length, UErrorCode status) {
	745	// check if already failed
	746	if (U_FAILURE(*status)) {
	747	return NULL;
	748	}
	749	// ensure args make sense!
	750	if (sel == NULL \|\| (s == NULL && length != 0)) {
	751	*status = U_ILLEGAL_ARGUMENT_ERROR;
	752	return NULL;
	753	}
	754
	755	int32_t columns = (sel->encodingsCount+31)/32;
	756	uint32_t* mask = (uint32_t) uprv_malloc(columns 4);
	757	if (mask == NULL) {
	758	*status = U_MEMORY_ALLOCATION_ERROR;
	759	return NULL;
	760	}
	761	uprv_memset(mask, ~0, columns *4);
	762
	763	if(s!=NULL) {
	764	const UChar *limit;
	765	if (length >= 0) {
	766	limit = s + length;
	767	} else {
	768	limit = NULL;
	769	}
	770
	771	while (limit == NULL ? *s != 0 : s != limit) {
	772	UChar32 c;
	773	uint16_t pvIndex;
	774	UTRIE2_U16_NEXT16(sel->trie, s, limit, c, pvIndex);
	775	if (intersectMasks(mask, sel->pv+pvIndex, columns)) {
	776	break;
	777	}
	778	}
	779	}
	780	return selectForMask(sel, mask, status);
	781	}
	782
	783	/* check a string against the selector - UTF8 version */
	784	U_CAPI UEnumeration * U_EXPORT2
	785	ucnvsel_selectForUTF8(const UConverterSelector* sel,
	786	const char s, int32_t length, UErrorCode status) {
	787	// check if already failed
	788	if (U_FAILURE(*status)) {
	789	return NULL;
	790	}
	791	// ensure args make sense!
	792	if (sel == NULL \|\| (s == NULL && length != 0)) {
	793	*status = U_ILLEGAL_ARGUMENT_ERROR;
	794	return NULL;
	795	}
	796
	797	int32_t columns = (sel->encodingsCount+31)/32;
	798	uint32_t* mask = (uint32_t) uprv_malloc(columns 4);
	799	if (mask == NULL) {
	800	*status = U_MEMORY_ALLOCATION_ERROR;
	801	return NULL;
	802	}
	803	uprv_memset(mask, ~0, columns *4);
	804
	805	if (length < 0) {
	806	length = (int32_t)uprv_strlen(s);
	807	}
	808
	809	if(s!=NULL) {
	810	const char *limit = s + length;
	811
	812	while (s != limit) {
	813	uint16_t pvIndex;
	814	UTRIE2_U8_NEXT16(sel->trie, s, limit, pvIndex);
	815	if (intersectMasks(mask, sel->pv+pvIndex, columns)) {
	816	break;
	817	}
	818	}
	819	}
	820	return selectForMask(sel, mask, status);
	821	}
	822
	823	#endif // !UCONFIG_NO_CONVERSION