git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	******************************************************************************
	3	*
	4	* Copyright (C) 1999-2014, International Business Machines
	5	* Corporation and others. All Rights Reserved.
	6	*
	7	******************************************************************************
	8	* file name: unames.c
	9	* encoding: US-ASCII
	10	* tab size: 8 (not used)
	11	* indentation:4
	12	*
	13	* created on: 1999oct04
	14	* created by: Markus W. Scherer
	15	*/
	16
	17	#include "unicode/utypes.h"
	18	#include "unicode/putil.h"
	19	#include "unicode/uchar.h"
	20	#include "unicode/udata.h"
	21	#include "unicode/utf.h"
	22	#include "unicode/utf16.h"
	23	#include "uassert.h"
	24	#include "ustr_imp.h"
	25	#include "umutex.h"
	26	#include "cmemory.h"
	27	#include "cstring.h"
	28	#include "ucln_cmn.h"
	29	#include "udataswp.h"
	30	#include "uprops.h"
	31
	32	U_NAMESPACE_BEGIN
	33
	34	/* prototypes ------------------------------------------------------------- */
	35
	36	static const char DATA_NAME[] = "unames";
	37	static const char DATA_TYPE[] = "icu";
	38
	39	#define GROUP_SHIFT 5
	40	#define LINES_PER_GROUP (1L<<GROUP_SHIFT)
	41	#define GROUP_MASK (LINES_PER_GROUP-1)
	42
	43	/*
	44	* This struct was replaced by explicitly accessing equivalent
	45	* fields from triples of uint16_t.
	46	* The Group struct was padded to 8 bytes on compilers for early ARM CPUs,
	47	* which broke the assumption that sizeof(Group)==6 and that the ++ operator
	48	* would advance by 6 bytes (3 uint16_t).
	49	*
	50	* We can't just change the data structure because it's loaded from a data file,
	51	* and we don't want to make it less compact, so we changed the access code.
	52	*
	53	* For details see ICU tickets 6331 and 6008.
	54	typedef struct {
	55	uint16_t groupMSB,
	56	offsetHigh, offsetLow; / * avoid padding * /
	57	} Group;
	58	*/
	59	enum {
	60	GROUP_MSB,
	61	GROUP_OFFSET_HIGH,
	62	GROUP_OFFSET_LOW,
	63	GROUP_LENGTH
	64	};
	65
	66	/*
	67	* Get the 32-bit group offset.
	68	* @param group (const uint16_t *) pointer to a Group triple of uint16_t
	69	* @return group offset (int32_t)
	70	*/
	71	#define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16\|(group)[GROUP_OFFSET_LOW])
	72
	73	#define NEXT_GROUP(group) ((group)+GROUP_LENGTH)
	74	#define PREV_GROUP(group) ((group)-GROUP_LENGTH)
	75
	76	typedef struct {
	77	uint32_t start, end;
	78	uint8_t type, variant;
	79	uint16_t size;
	80	} AlgorithmicRange;
	81
	82	typedef struct {
	83	uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;
	84	} UCharNames;
	85
	86	/*
	87	* Get the groups table from a UCharNames struct.
	88	* The groups table consists of one uint16_t groupCount followed by
	89	* groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH
	90	* and the comment for the old struct Group above.
	91	*
	92	* @param names (const UCharNames *) pointer to the UCharNames indexes
	93	* @return (const uint16_t *) pointer to the groups table
	94	*/
	95	#define GET_GROUPS(names) (const uint16_t )((const char )names+names->groupsOffset)
	96
	97	typedef struct {
	98	const char *otherName;
	99	UChar32 code;
	100	} FindName;
	101
	102	#define DO_FIND_NAME NULL
	103
	104	static UDataMemory *uCharNamesData=NULL;
	105	static UCharNames *uCharNames=NULL;
	106	static icu::UInitOnce gCharNamesInitOnce = U_INITONCE_INITIALIZER;
	107
	108	/*
	109	* Maximum length of character names (regular & 1.0).
	110	*/
	111	static int32_t gMaxNameLength=0;
	112
	113	/*
	114	* Set of chars used in character names (regular & 1.0).
	115	* Chars are platform-dependent (can be EBCDIC).
	116	*/
	117	static uint32_t gNameSet[8]={ 0 };
	118
	119	#define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT
	120	#define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1
	121	#define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2
	122
	123	#define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)
	124
	125	static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {
	126	"unassigned",
	127	"uppercase letter",
	128	"lowercase letter",
	129	"titlecase letter",
	130	"modifier letter",
	131	"other letter",
	132	"non spacing mark",
	133	"enclosing mark",
	134	"combining spacing mark",
	135	"decimal digit number",
	136	"letter number",
	137	"other number",
	138	"space separator",
	139	"line separator",
	140	"paragraph separator",
	141	"control",
	142	"format",
	143	"private use area",
	144	"surrogate",
	145	"dash punctuation",
	146	"start punctuation",
	147	"end punctuation",
	148	"connector punctuation",
	149	"other punctuation",
	150	"math symbol",
	151	"currency symbol",
	152	"modifier symbol",
	153	"other symbol",
	154	"initial punctuation",
	155	"final punctuation",
	156	"noncharacter",
	157	"lead surrogate",
	158	"trail surrogate"
	159	};
	160
	161	/* implementation ----------------------------------------------------------- */
	162
	163	static UBool U_CALLCONV unames_cleanup(void)
	164	{
	165	if(uCharNamesData) {
	166	udata_close(uCharNamesData);
	167	uCharNamesData = NULL;
	168	}
	169	if(uCharNames) {
	170	uCharNames = NULL;
	171	}
	172	gCharNamesInitOnce.reset();
	173	gMaxNameLength=0;
	174	return TRUE;
	175	}
	176
	177	static UBool U_CALLCONV
	178	isAcceptable(void * /context/,
	179	const char * /type/, const char * /name/,
	180	const UDataInfo *pInfo) {
	181	return (UBool)(
	182	pInfo->size>=20 &&
	183	pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
	184	pInfo->charsetFamily==U_CHARSET_FAMILY &&
	185	pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */
	186	pInfo->dataFormat[1]==0x6e &&
	187	pInfo->dataFormat[2]==0x61 &&
	188	pInfo->dataFormat[3]==0x6d &&
	189	pInfo->formatVersion[0]==1);
	190	}
	191
	192	static void U_CALLCONV
	193	loadCharNames(UErrorCode &status) {
	194	U_ASSERT(uCharNamesData == NULL);
	195	U_ASSERT(uCharNames == NULL);
	196
	197	uCharNamesData = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &status);
	198	if(U_FAILURE(status)) {
	199	uCharNamesData = NULL;
	200	} else {
	201	uCharNames = (UCharNames *)udata_getMemory(uCharNamesData);
	202	}
	203	ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
	204	}
	205
	206
	207	static UBool
	208	isDataLoaded(UErrorCode *pErrorCode) {
	209	umtx_initOnce(gCharNamesInitOnce, &loadCharNames, *pErrorCode);
	210	return U_SUCCESS(*pErrorCode);
	211	}
	212
	213	#define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \
	214	if((bufferLength)>0) { \
	215	*(buffer)++=c; \
	216	--(bufferLength); \
	217	} \
	218	++(bufferPos); \
	219	}
	220
	221	#define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT
	222
	223	/*
	224	* Important: expandName() and compareName() are almost the same -
	225	* apply fixes to both.
	226	*
	227	* UnicodeData.txt uses ';' as a field separator, so no
	228	* field can contain ';' as part of its contents.
	229	* In unames.dat, it is marked as token[';']==-1 only if the
	230	* semicolon is used in the data file - which is iff we
	231	* have Unicode 1.0 names or ISO comments or aliases.
	232	* So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases
	233	* although we know that it will never be part of a name.
	234	*/
	235	static uint16_t
	236	expandName(UCharNames *names,
	237	const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
	238	char *buffer, uint16_t bufferLength) {
	239	uint16_t tokens=(uint16_t )names+8;
	240	uint16_t token, tokenCount=*tokens++, bufferPos=0;
	241	uint8_t tokenStrings=(uint8_t )names+names->tokenStringOffset;
	242	uint8_t c;
	243
	244	if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
	245	/*
	246	* skip the modern name if it is not requested _and_
	247	* if the semicolon byte value is a character, not a token number
	248	*/
	249	if((uint8_t)';'>=tokenCount \|\| tokens[(uint8_t)';']==(uint16_t)(-1)) {
	250	int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
	251	do {
	252	while(nameLength>0) {
	253	--nameLength;
	254	if(*name++==';') {
	255	break;
	256	}
	257	}
	258	} while(--fieldIndex>0);
	259	} else {
	260	/*
	261	* the semicolon byte value is a token number, therefore
	262	* only modern names are stored in unames.dat and there is no
	263	* such requested alternate name here
	264	*/
	265	nameLength=0;
	266	}
	267	}
	268
	269	/* write each letter directly, and write a token word per token */
	270	while(nameLength>0) {
	271	--nameLength;
	272	c=*name++;
	273
	274	if(c>=tokenCount) {
	275	if(c!=';') {
	276	/* implicit letter */
	277	WRITE_CHAR(buffer, bufferLength, bufferPos, c);
	278	} else {
	279	/* finished */
	280	break;
	281	}
	282	} else {
	283	token=tokens[c];
	284	if(token==(uint16_t)(-2)) {
	285	/* this is a lead byte for a double-byte token */
	286	token=tokens[c<<8\|*name++];
	287	--nameLength;
	288	}
	289	if(token==(uint16_t)(-1)) {
	290	if(c!=';') {
	291	/* explicit letter */
	292	WRITE_CHAR(buffer, bufferLength, bufferPos, c);
	293	} else {
	294	/* stop, but skip the semicolon if we are seeking
	295	extended names and there was no 2.0 name but there
	296	is a 1.0 name. */
	297	if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) {
	298	if ((uint8_t)';'>=tokenCount \|\| tokens[(uint8_t)';']==(uint16_t)(-1)) {
	299	continue;
	300	}
	301	}
	302	/* finished */
	303	break;
	304	}
	305	} else {
	306	/* write token word */
	307	uint8_t *tokenString=tokenStrings+token;
	308	while((c=*tokenString++)!=0) {
	309	WRITE_CHAR(buffer, bufferLength, bufferPos, c);
	310	}
	311	}
	312	}
	313	}
	314
	315	/* zero-terminate */
	316	if(bufferLength>0) {
	317	*buffer=0;
	318	}
	319
	320	return bufferPos;
	321	}
	322
	323	/*
	324	* compareName() is almost the same as expandName() except that it compares
	325	* the currently expanded name to an input name.
	326	* It returns the match/no match result as soon as possible.
	327	*/
	328	static UBool
	329	compareName(UCharNames *names,
	330	const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
	331	const char *otherName) {
	332	uint16_t tokens=(uint16_t )names+8;
	333	uint16_t token, tokenCount=*tokens++;
	334	uint8_t tokenStrings=(uint8_t )names+names->tokenStringOffset;
	335	uint8_t c;
	336	const char *origOtherName = otherName;
	337
	338	if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
	339	/*
	340	* skip the modern name if it is not requested _and_
	341	* if the semicolon byte value is a character, not a token number
	342	*/
	343	if((uint8_t)';'>=tokenCount \|\| tokens[(uint8_t)';']==(uint16_t)(-1)) {
	344	int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
	345	do {
	346	while(nameLength>0) {
	347	--nameLength;
	348	if(*name++==';') {
	349	break;
	350	}
	351	}
	352	} while(--fieldIndex>0);
	353	} else {
	354	/*
	355	* the semicolon byte value is a token number, therefore
	356	* only modern names are stored in unames.dat and there is no
	357	* such requested alternate name here
	358	*/
	359	nameLength=0;
	360	}
	361	}
	362
	363	/* compare each letter directly, and compare a token word per token */
	364	while(nameLength>0) {
	365	--nameLength;
	366	c=*name++;
	367
	368	if(c>=tokenCount) {
	369	if(c!=';') {
	370	/* implicit letter */
	371	if((char)c!=*otherName++) {
	372	return FALSE;
	373	}
	374	} else {
	375	/* finished */
	376	break;
	377	}
	378	} else {
	379	token=tokens[c];
	380	if(token==(uint16_t)(-2)) {
	381	/* this is a lead byte for a double-byte token */
	382	token=tokens[c<<8\|*name++];
	383	--nameLength;
	384	}
	385	if(token==(uint16_t)(-1)) {
	386	if(c!=';') {
	387	/* explicit letter */
	388	if((char)c!=*otherName++) {
	389	return FALSE;
	390	}
	391	} else {
	392	/* stop, but skip the semicolon if we are seeking
	393	extended names and there was no 2.0 name but there
	394	is a 1.0 name. */
	395	if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) {
	396	if ((uint8_t)';'>=tokenCount \|\| tokens[(uint8_t)';']==(uint16_t)(-1)) {
	397	continue;
	398	}
	399	}
	400	/* finished */
	401	break;
	402	}
	403	} else {
	404	/* write token word */
	405	uint8_t *tokenString=tokenStrings+token;
	406	while((c=*tokenString++)!=0) {
	407	if((char)c!=*otherName++) {
	408	return FALSE;
	409	}
	410	}
	411	}
	412	}
	413	}
	414
	415	/* complete match? */
	416	return (UBool)(*otherName==0);
	417	}
	418
	419	static uint8_t getCharCat(UChar32 cp) {
	420	uint8_t cat;
	421
	422	if (U_IS_UNICODE_NONCHAR(cp)) {
	423	return U_NONCHARACTER_CODE_POINT;
	424	}
	425
	426	if ((cat = u_charType(cp)) == U_SURROGATE) {
	427	cat = U_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
	428	}
	429
	430	return cat;
	431	}
	432
	433	static const char *getCharCatName(UChar32 cp) {
	434	uint8_t cat = getCharCat(cp);
	435
	436	/* Return unknown if the table of names above is not up to
	437	date. */
	438
	439	if (cat >= UPRV_LENGTHOF(charCatNames)) {
	440	return "unknown";
	441	} else {
	442	return charCatNames[cat];
	443	}
	444	}
	445
	446	static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
	447	const char *catname = getCharCatName(code);
	448	uint16_t length = 0;
	449
	450	UChar32 cp;
	451	int ndigits, i;
	452
	453	WRITE_CHAR(buffer, bufferLength, length, '<');
	454	while (catname[length - 1]) {
	455	WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);
	456	}
	457	WRITE_CHAR(buffer, bufferLength, length, '-');
	458	for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)
	459	;
	460	if (ndigits < 4)
	461	ndigits = 4;
	462	for (cp = code, i = ndigits; (cp \|\| i > 0) && bufferLength; cp >>= 4, bufferLength--) {
	463	uint8_t v = (uint8_t)(cp & 0xf);
	464	buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
	465	}
	466	buffer += ndigits;
	467	length += ndigits;
	468	WRITE_CHAR(buffer, bufferLength, length, '>');
	469
	470	return length;
	471	}
	472
	473	/*
	474	* getGroup() does a binary search for the group that contains the
	475	* Unicode code point "code".
	476	* The return value is always a valid Group* that may contain "code"
	477	* or else is the highest group before "code".
	478	* If the lowest group is after "code", then that one is returned.
	479	*/
	480	static const uint16_t *
	481	getGroup(UCharNames *names, uint32_t code) {
	482	const uint16_t *groups=GET_GROUPS(names);
	483	uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
	484	start=0,
	485	limit=*groups++,
	486	number;
	487
	488	/* binary search for the group of names that contains the one for code */
	489	while(start<limit-1) {
	490	number=(uint16_t)((start+limit)/2);
	491	if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) {
	492	limit=number;
	493	} else {
	494	start=number;
	495	}
	496	}
	497
	498	/* return this regardless of whether it is an exact match */
	499	return groups+start*GROUP_LENGTH;
	500	}
	501
	502	/*
	503	* expandGroupLengths() reads a block of compressed lengths of 32 strings and
	504	* expands them into offsets and lengths for each string.
	505	* Lengths are stored with a variable-width encoding in consecutive nibbles:
	506	* If a nibble<0xc, then it is the length itself (0=empty string).
	507	* If a nibble>=0xc, then it forms a length value with the following nibble.
	508	* Calculation see below.
	509	* The offsets and lengths arrays must be at least 33 (one more) long because
	510	* there is no check here at the end if the last nibble is still used.
	511	*/
	512	static const uint8_t *
	513	expandGroupLengths(const uint8_t *s,
	514	uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) {
	515	/* read the lengths of the 32 strings in this group and get each string's offset */
	516	uint16_t i=0, offset=0, length=0;
	517	uint8_t lengthByte;
	518
	519	/* all 32 lengths must be read to get the offset of the first group string */
	520	while(i<LINES_PER_GROUP) {
	521	lengthByte=*s++;
	522
	523	/* read even nibble - MSBs of lengthByte */
	524	if(length>=12) {
	525	/* double-nibble length spread across two bytes */
	526	length=(uint16_t)(((length&0x3)<<4\|lengthByte>>4)+12);
	527	lengthByte&=0xf;
	528	} else if((lengthByte /* &0xf0 */)>=0xc0) {
	529	/* double-nibble length spread across this one byte */
	530	length=(uint16_t)((lengthByte&0x3f)+12);
	531	} else {
	532	/* single-nibble length in MSBs */
	533	length=(uint16_t)(lengthByte>>4);
	534	lengthByte&=0xf;
	535	}
	536
	537	*offsets++=offset;
	538	*lengths++=length;
	539
	540	offset+=length;
	541	++i;
	542
	543	/* read odd nibble - LSBs of lengthByte */
	544	if((lengthByte&0xf0)==0) {
	545	/* this nibble was not consumed for a double-nibble length above */
	546	length=lengthByte;
	547	if(length<12) {
	548	/* single-nibble length in LSBs */
	549	*offsets++=offset;
	550	*lengths++=length;
	551
	552	offset+=length;
	553	++i;
	554	}
	555	} else {
	556	length=0; /* prevent double-nibble detection in the next iteration */
	557	}
	558	}
	559
	560	/* now, s is at the first group string */
	561	return s;
	562	}
	563
	564	static uint16_t
	565	expandGroupName(UCharNames names, const uint16_t group,
	566	uint16_t lineNumber, UCharNameChoice nameChoice,
	567	char *buffer, uint16_t bufferLength) {
	568	uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
	569	const uint8_t s=(uint8_t )names+names->groupStringOffset+GET_GROUP_OFFSET(group);
	570	s=expandGroupLengths(s, offsets, lengths);
	571	return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,
	572	buffer, bufferLength);
	573	}
	574
	575	static uint16_t
	576	getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
	577	char *buffer, uint16_t bufferLength) {
	578	const uint16_t *group=getGroup(names, code);
	579	if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) {
	580	return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,
	581	buffer, bufferLength);
	582	} else {
	583	/* group not found */
	584	/* zero-terminate */
	585	if(bufferLength>0) {
	586	*buffer=0;
	587	}
	588	return 0;
	589	}
	590	}
	591
	592	/*
	593	* enumGroupNames() enumerates all the names in a 32-group
	594	* and either calls the enumerator function or finds a given input name.
	595	*/
	596	static UBool
	597	enumGroupNames(UCharNames names, const uint16_t group,
	598	UChar32 start, UChar32 end,
	599	UEnumCharNamesFn fn, void context,
	600	UCharNameChoice nameChoice) {
	601	uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
	602	const uint8_t s=(uint8_t )names+names->groupStringOffset+GET_GROUP_OFFSET(group);
	603
	604	s=expandGroupLengths(s, offsets, lengths);
	605	if(fn!=DO_FIND_NAME) {
	606	char buffer[200];
	607	uint16_t length;
	608
	609	while(start<=end) {
	610	length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer));
	611	if (!length && nameChoice == U_EXTENDED_CHAR_NAME) {
	612	buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
	613	}
	614	/* here, we assume that the buffer is large enough */
	615	if(length>0) {
	616	if(!fn(context, start, nameChoice, buffer, length)) {
	617	return FALSE;
	618	}
	619	}
	620	++start;
	621	}
	622	} else {
	623	const char otherName=((FindName )context)->otherName;
	624	while(start<=end) {
	625	if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) {
	626	((FindName *)context)->code=start;
	627	return FALSE;
	628	}
	629	++start;
	630	}
	631	}
	632	return TRUE;
	633	}
	634
	635	/*
	636	* enumExtNames enumerate extended names.
	637	* It only needs to do it if it is called with a real function and not
	638	* with the dummy DO_FIND_NAME, because u_charFromName() does a check
	639	* for extended names by itself.
	640	*/
	641	static UBool
	642	enumExtNames(UChar32 start, UChar32 end,
	643	UEnumCharNamesFn fn, void context)
	644	{
	645	if(fn!=DO_FIND_NAME) {
	646	char buffer[200];
	647	uint16_t length;
	648
	649	while(start<=end) {
	650	buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
	651	/* here, we assume that the buffer is large enough */
	652	if(length>0) {
	653	if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) {
	654	return FALSE;
	655	}
	656	}
	657	++start;
	658	}
	659	}
	660
	661	return TRUE;
	662	}
	663
	664	static UBool
	665	enumNames(UCharNames *names,
	666	UChar32 start, UChar32 limit,
	667	UEnumCharNamesFn fn, void context,
	668	UCharNameChoice nameChoice) {
	669	uint16_t startGroupMSB, endGroupMSB, groupCount;
	670	const uint16_t group, groupLimit;
	671
	672	startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);
	673	endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT);
	674
	675	/* find the group that contains start, or the highest before it */
	676	group=getGroup(names, start);
	677
	678	if(startGroupMSB<group[GROUP_MSB] && nameChoice==U_EXTENDED_CHAR_NAME) {
	679	/* enumerate synthetic names between start and the group start */
	680	UChar32 extLimit=((UChar32)group[GROUP_MSB]<<GROUP_SHIFT);
	681	if(extLimit>limit) {
	682	extLimit=limit;
	683	}
	684	if(!enumExtNames(start, extLimit-1, fn, context)) {
	685	return FALSE;
	686	}
	687	start=extLimit;
	688	}
	689
	690	if(startGroupMSB==endGroupMSB) {
	691	if(startGroupMSB==group[GROUP_MSB]) {
	692	/* if start and limit-1 are in the same group, then enumerate only in that one */
	693	return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice);
	694	}
	695	} else {
	696	const uint16_t *groups=GET_GROUPS(names);
	697	groupCount=*groups++;
	698	groupLimit=groups+groupCount*GROUP_LENGTH;
	699
	700	if(startGroupMSB==group[GROUP_MSB]) {
	701	/* enumerate characters in the partial start group */
	702	if((start&GROUP_MASK)!=0) {
	703	if(!enumGroupNames(names, group,
	704	start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1,
	705	fn, context, nameChoice)) {
	706	return FALSE;
	707	}
	708	group=NEXT_GROUP(group); /* continue with the next group */
	709	}
	710	} else if(startGroupMSB>group[GROUP_MSB]) {
	711	/* make sure that we start enumerating with the first group after start */
	712	const uint16_t *nextGroup=NEXT_GROUP(group);
	713	if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) {
	714	UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
	715	if (end > limit) {
	716	end = limit;
	717	}
	718	if (!enumExtNames(start, end - 1, fn, context)) {
	719	return FALSE;
	720	}
	721	}
	722	group=nextGroup;
	723	}
	724
	725	/* enumerate entire groups between the start- and end-groups */
	726	while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) {
	727	const uint16_t *nextGroup;
	728	start=(UChar32)group[GROUP_MSB]<<GROUP_SHIFT;
	729	if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) {
	730	return FALSE;
	731	}
	732	nextGroup=NEXT_GROUP(group);
	733	if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) {
	734	UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
	735	if (end > limit) {
	736	end = limit;
	737	}
	738	if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1, fn, context)) {
	739	return FALSE;
	740	}
	741	}
	742	group=nextGroup;
	743	}
	744
	745	/* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */
	746	if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) {
	747	return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice);
	748	} else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) {
	749	UChar32 next = (PREV_GROUP(group)[GROUP_MSB] + 1) << GROUP_SHIFT;
	750	if (next > start) {
	751	start = next;
	752	}
	753	} else {
	754	return TRUE;
	755	}
	756	}
	757
	758	/* we have not found a group, which means everything is made of
	759	extended names. */
	760	if (nameChoice == U_EXTENDED_CHAR_NAME) {
	761	if (limit > UCHAR_MAX_VALUE + 1) {
	762	limit = UCHAR_MAX_VALUE + 1;
	763	}
	764	return enumExtNames(start, limit - 1, fn, context);
	765	}
	766
	767	return TRUE;
	768	}
	769
	770	static uint16_t
	771	writeFactorSuffix(const uint16_t *factors, uint16_t count,
	772	const char s, / suffix elements */
	773	uint32_t code,
	774	uint16_t indexes[8], /* output fields from here */
	775	const char elementBases[8], const char elements[8],
	776	char *buffer, uint16_t bufferLength) {
	777	uint16_t i, factor, bufferPos=0;
	778	char c;
	779
	780	/* write elements according to the factors */
	781
	782	/*
	783	* the factorized elements are determined by modulo arithmetic
	784	* with the factors of this algorithm
	785	*
	786	* note that for fewer operations, count is decremented here
	787	*/
	788	--count;
	789	for(i=count; i>0; --i) {
	790	factor=factors[i];
	791	indexes[i]=(uint16_t)(code%factor);
	792	code/=factor;
	793	}
	794	/*
	795	* we don't need to calculate the last modulus because start<=code<=end
	796	* guarantees here that code<=factors[0]
	797	*/
	798	indexes[0]=(uint16_t)code;
	799
	800	/* write each element */
	801	for(;;) {
	802	if(elementBases!=NULL) {
	803	*elementBases++=s;
	804	}
	805
	806	/* skip indexes[i] strings */
	807	factor=indexes[i];
	808	while(factor>0) {
	809	while(*s++!=0) {}
	810	--factor;
	811	}
	812	if(elements!=NULL) {
	813	*elements++=s;
	814	}
	815
	816	/* write element */
	817	while((c=*s++)!=0) {
	818	WRITE_CHAR(buffer, bufferLength, bufferPos, c);
	819	}
	820
	821	/* we do not need to perform the rest of this loop for i==count - break here */
	822	if(i>=count) {
	823	break;
	824	}
	825
	826	/* skip the rest of the strings for this factors[i] */
	827	factor=(uint16_t)(factors[i]-indexes[i]-1);
	828	while(factor>0) {
	829	while(*s++!=0) {}
	830	--factor;
	831	}
	832
	833	++i;
	834	}
	835
	836	/* zero-terminate */
	837	if(bufferLength>0) {
	838	*buffer=0;
	839	}
	840
	841	return bufferPos;
	842	}
	843
	844	/*
	845	* Important:
	846	* Parts of findAlgName() are almost the same as some of getAlgName().
	847	* Fixes must be applied to both.
	848	*/
	849	static uint16_t
	850	getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
	851	char *buffer, uint16_t bufferLength) {
	852	uint16_t bufferPos=0;
	853
	854	/* Only the normative character name can be algorithmic. */
	855	if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
	856	/* zero-terminate */
	857	if(bufferLength>0) {
	858	*buffer=0;
	859	}
	860	return 0;
	861	}
	862
	863	switch(range->type) {
	864	case 0: {
	865	/* name = prefix hex-digits */
	866	const char s=(const char )(range+1);
	867	char c;
	868
	869	uint16_t i, count;
	870
	871	/* copy prefix */
	872	while((c=*s++)!=0) {
	873	WRITE_CHAR(buffer, bufferLength, bufferPos, c);
	874	}
	875
	876	/* write hexadecimal code point value */
	877	count=range->variant;
	878
	879	/* zero-terminate */
	880	if(count<bufferLength) {
	881	buffer[count]=0;
	882	}
	883
	884	for(i=count; i>0;) {
	885	if(--i<bufferLength) {
	886	c=(char)(code&0xf);
	887	if(c<10) {
	888	c+='0';
	889	} else {
	890	c+='A'-10;
	891	}
	892	buffer[i]=c;
	893	}
	894	code>>=4;
	895	}
	896
	897	bufferPos+=count;
	898	break;
	899	}
	900	case 1: {
	901	/* name = prefix factorized-elements */
	902	uint16_t indexes[8];
	903	const uint16_t factors=(const uint16_t )(range+1);
	904	uint16_t count=range->variant;
	905	const char s=(const char )(factors+count);
	906	char c;
	907
	908	/* copy prefix */
	909	while((c=*s++)!=0) {
	910	WRITE_CHAR(buffer, bufferLength, bufferPos, c);
	911	}
	912
	913	bufferPos+=writeFactorSuffix(factors, count,
	914	s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);
	915	break;
	916	}
	917	default:
	918	/* undefined type */
	919	/* zero-terminate */
	920	if(bufferLength>0) {
	921	*buffer=0;
	922	}
	923	break;
	924	}
	925
	926	return bufferPos;
	927	}
	928
	929	/*
	930	* Important: enumAlgNames() and findAlgName() are almost the same.
	931	* Any fix must be applied to both.
	932	*/
	933	static UBool
	934	enumAlgNames(AlgorithmicRange *range,
	935	UChar32 start, UChar32 limit,
	936	UEnumCharNamesFn fn, void context,
	937	UCharNameChoice nameChoice) {
	938	char buffer[200];
	939	uint16_t length;
	940
	941	if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
	942	return TRUE;
	943	}
	944
	945	switch(range->type) {
	946	case 0: {
	947	char s, end;
	948	char c;
	949
	950	/* get the full name of the start character */
	951	length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer));
	952	if(length<=0) {
	953	return TRUE;
	954	}
	955
	956	/* call the enumerator function with this first character */
	957	if(!fn(context, start, nameChoice, buffer, length)) {
	958	return FALSE;
	959	}
	960
	961	/* go to the end of the name; all these names have the same length */
	962	end=buffer;
	963	while(*end!=0) {
	964	++end;
	965	}
	966
	967	/* enumerate the rest of the names */
	968	while(++start<limit) {
	969	/* increment the hexadecimal number on a character-basis */
	970	s=end;
	971	for (;;) {
	972	c=*--s;
	973	if(('0'<=c && c<'9') \|\| ('A'<=c && c<'F')) {
	974	*s=(char)(c+1);
	975	break;
	976	} else if(c=='9') {
	977	*s='A';
	978	break;
	979	} else if(c=='F') {
	980	*s='0';
	981	}
	982	}
	983
	984	if(!fn(context, start, nameChoice, buffer, length)) {
	985	return FALSE;
	986	}
	987	}
	988	break;
	989	}
	990	case 1: {
	991	uint16_t indexes[8];
	992	const char elementBases[8], elements[8];
	993	const uint16_t factors=(const uint16_t )(range+1);
	994	uint16_t count=range->variant;
	995	const char s=(const char )(factors+count);
	996	char suffix, t;
	997	uint16_t prefixLength, i, idx;
	998
	999	char c;
	1000
	1001	/* name = prefix factorized-elements */
	1002
	1003	/* copy prefix */
	1004	suffix=buffer;
	1005	prefixLength=0;
	1006	while((c=*s++)!=0) {
	1007	*suffix++=c;
	1008	++prefixLength;
	1009	}
	1010
	1011	/* append the suffix of the start character */
	1012	length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count,
	1013	s, (uint32_t)start-range->start,
	1014	indexes, elementBases, elements,
	1015	suffix, (uint16_t)(sizeof(buffer)-prefixLength)));
	1016
	1017	/* call the enumerator function with this first character */
	1018	if(!fn(context, start, nameChoice, buffer, length)) {
	1019	return FALSE;
	1020	}
	1021
	1022	/* enumerate the rest of the names */
	1023	while(++start<limit) {
	1024	/* increment the indexes in lexical order bound by the factors */
	1025	i=count;
	1026	for (;;) {
	1027	idx=(uint16_t)(indexes[--i]+1);
	1028	if(idx<factors[i]) {
	1029	/* skip one index and its element string */
	1030	indexes[i]=idx;
	1031	s=elements[i];
	1032	while(*s++!=0) {
	1033	}
	1034	elements[i]=s;
	1035	break;
	1036	} else {
	1037	/* reset this index to 0 and its element string to the first one */
	1038	indexes[i]=0;
	1039	elements[i]=elementBases[i];
	1040	}
	1041	}
	1042
	1043	/* to make matters a little easier, just append all elements to the suffix */
	1044	t=suffix;
	1045	length=prefixLength;
	1046	for(i=0; i<count; ++i) {
	1047	s=elements[i];
	1048	while((c=*s++)!=0) {
	1049	*t++=c;
	1050	++length;
	1051	}
	1052	}
	1053	/* zero-terminate */
	1054	*t=0;
	1055
	1056	if(!fn(context, start, nameChoice, buffer, length)) {
	1057	return FALSE;
	1058	}
	1059	}
	1060	break;
	1061	}
	1062	default:
	1063	/* undefined type */
	1064	break;
	1065	}
	1066
	1067	return TRUE;
	1068	}
	1069
	1070	/*
	1071	* findAlgName() is almost the same as enumAlgNames() except that it
	1072	* returns the code point for a name if it fits into the range.
	1073	* It returns 0xffff otherwise.
	1074	*/
	1075	static UChar32
	1076	findAlgName(AlgorithmicRange range, UCharNameChoice nameChoice, const char otherName) {
	1077	UChar32 code;
	1078
	1079	if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
	1080	return 0xffff;
	1081	}
	1082
	1083	switch(range->type) {
	1084	case 0: {
	1085	/* name = prefix hex-digits */
	1086	const char s=(const char )(range+1);
	1087	char c;
	1088
	1089	uint16_t i, count;
	1090
	1091	/* compare prefix */
	1092	while((c=*s++)!=0) {
	1093	if((char)c!=*otherName++) {
	1094	return 0xffff;
	1095	}
	1096	}
	1097
	1098	/* read hexadecimal code point value */
	1099	count=range->variant;
	1100	code=0;
	1101	for(i=0; i<count; ++i) {
	1102	c=*otherName++;
	1103	if('0'<=c && c<='9') {
	1104	code=(code<<4)\|(c-'0');
	1105	} else if('A'<=c && c<='F') {
	1106	code=(code<<4)\|(c-'A'+10);
	1107	} else {
	1108	return 0xffff;
	1109	}
	1110	}
	1111
	1112	/* does it fit into the range? */
	1113	if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) {
	1114	return code;
	1115	}
	1116	break;
	1117	}
	1118	case 1: {
	1119	char buffer[64];
	1120	uint16_t indexes[8];
	1121	const char elementBases[8], elements[8];
	1122	const uint16_t factors=(const uint16_t )(range+1);
	1123	uint16_t count=range->variant;
	1124	const char s=(const char )(factors+count), *t;
	1125	UChar32 start, limit;
	1126	uint16_t i, idx;
	1127
	1128	char c;
	1129
	1130	/* name = prefix factorized-elements */
	1131
	1132	/* compare prefix */
	1133	while((c=*s++)!=0) {
	1134	if((char)c!=*otherName++) {
	1135	return 0xffff;
	1136	}
	1137	}
	1138
	1139	start=(UChar32)range->start;
	1140	limit=(UChar32)(range->end+1);
	1141
	1142	/* initialize the suffix elements for enumeration; indexes should all be set to 0 */
	1143	writeFactorSuffix(factors, count, s, 0,
	1144	indexes, elementBases, elements, buffer, sizeof(buffer));
	1145
	1146	/* compare the first suffix */
	1147	if(0==uprv_strcmp(otherName, buffer)) {
	1148	return start;
	1149	}
	1150
	1151	/* enumerate and compare the rest of the suffixes */
	1152	while(++start<limit) {
	1153	/* increment the indexes in lexical order bound by the factors */
	1154	i=count;
	1155	for (;;) {
	1156	idx=(uint16_t)(indexes[--i]+1);
	1157	if(idx<factors[i]) {
	1158	/* skip one index and its element string */
	1159	indexes[i]=idx;
	1160	s=elements[i];
	1161	while(*s++!=0) {}
	1162	elements[i]=s;
	1163	break;
	1164	} else {
	1165	/* reset this index to 0 and its element string to the first one */
	1166	indexes[i]=0;
	1167	elements[i]=elementBases[i];
	1168	}
	1169	}
	1170
	1171	/* to make matters a little easier, just compare all elements of the suffix */
	1172	t=otherName;
	1173	for(i=0; i<count; ++i) {
	1174	s=elements[i];
	1175	while((c=*s++)!=0) {
	1176	if(c!=*t++) {
	1177	s=""; /* does not match */
	1178	i=99;
	1179	}
	1180	}
	1181	}
	1182	if(i<99 && *t==0) {
	1183	return start;
	1184	}
	1185	}
	1186	break;
	1187	}
	1188	default:
	1189	/* undefined type */
	1190	break;
	1191	}
	1192
	1193	return 0xffff;
	1194	}
	1195
	1196	/* sets of name characters, maximum name lengths ---------------------------- */
	1197
	1198	#define SET_ADD(set, c) ((set)[(uint8_t)c>>5]\|=((uint32_t)1<<((uint8_t)c&0x1f)))
	1199	#define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0)
	1200
	1201	static int32_t
	1202	calcStringSetLength(uint32_t set[8], const char *s) {
	1203	int32_t length=0;
	1204	char c;
	1205
	1206	while((c=*s++)!=0) {
	1207	SET_ADD(set, c);
	1208	++length;
	1209	}
	1210	return length;
	1211	}
	1212
	1213	static int32_t
	1214	calcAlgNameSetsLengths(int32_t maxNameLength) {
	1215	AlgorithmicRange *range;
	1216	uint32_t *p;
	1217	uint32_t rangeCount;
	1218	int32_t length;
	1219
	1220	/* enumerate algorithmic ranges */
	1221	p=(uint32_t )((uint8_t )uCharNames+uCharNames->algNamesOffset);
	1222	rangeCount=*p;
	1223	range=(AlgorithmicRange *)(p+1);
	1224	while(rangeCount>0) {
	1225	switch(range->type) {
	1226	case 0:
	1227	/* name = prefix + (range->variant times) hex-digits */
	1228	/* prefix */
	1229	length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant;
	1230	if(length>maxNameLength) {
	1231	maxNameLength=length;
	1232	}
	1233	break;
	1234	case 1: {
	1235	/* name = prefix factorized-elements */
	1236	const uint16_t factors=(const uint16_t )(range+1);
	1237	const char *s;
	1238	int32_t i, count=range->variant, factor, factorLength, maxFactorLength;
	1239
	1240	/* prefix length */
	1241	s=(const char *)(factors+count);
	1242	length=calcStringSetLength(gNameSet, s);
	1243	s+=length+1; /* start of factor suffixes */
	1244
	1245	/* get the set and maximum factor suffix length for each factor */
	1246	for(i=0; i<count; ++i) {
	1247	maxFactorLength=0;
	1248	for(factor=factors[i]; factor>0; --factor) {
	1249	factorLength=calcStringSetLength(gNameSet, s);
	1250	s+=factorLength+1;
	1251	if(factorLength>maxFactorLength) {
	1252	maxFactorLength=factorLength;
	1253	}
	1254	}
	1255	length+=maxFactorLength;
	1256	}
	1257
	1258	if(length>maxNameLength) {
	1259	maxNameLength=length;
	1260	}
	1261	break;
	1262	}
	1263	default:
	1264	/* unknown type */
	1265	break;
	1266	}
	1267
	1268	range=(AlgorithmicRange )((uint8_t )range+range->size);
	1269	--rangeCount;
	1270	}
	1271	return maxNameLength;
	1272	}
	1273
	1274	static int32_t
	1275	calcExtNameSetsLengths(int32_t maxNameLength) {
	1276	int32_t i, length;
	1277
	1278	for(i=0; i<UPRV_LENGTHOF(charCatNames); ++i) {
	1279	/*
	1280	* for each category, count the length of the category name
	1281	* plus 9=
	1282	* 2 for <>
	1283	* 1 for -
	1284	* 6 for most hex digits per code point
	1285	*/
	1286	length=9+calcStringSetLength(gNameSet, charCatNames[i]);
	1287	if(length>maxNameLength) {
	1288	maxNameLength=length;
	1289	}
	1290	}
	1291	return maxNameLength;
	1292	}
	1293
	1294	static int32_t
	1295	calcNameSetLength(const uint16_t tokens, uint16_t tokenCount, const uint8_t tokenStrings, int8_t *tokenLengths,
	1296	uint32_t set[8],
	1297	const uint8_t *pLine, const uint8_t lineLimit) {
	1298	const uint8_t line=pLine;
	1299	int32_t length=0, tokenLength;
	1300	uint16_t c, token;
	1301
	1302	while(line!=lineLimit && (c=*line++)!=(uint8_t)';') {
	1303	if(c>=tokenCount) {
	1304	/* implicit letter */
	1305	SET_ADD(set, c);
	1306	++length;
	1307	} else {
	1308	token=tokens[c];
	1309	if(token==(uint16_t)(-2)) {
	1310	/* this is a lead byte for a double-byte token */
	1311	c=c<<8\|*line++;
	1312	token=tokens[c];
	1313	}
	1314	if(token==(uint16_t)(-1)) {
	1315	/* explicit letter */
	1316	SET_ADD(set, c);
	1317	++length;
	1318	} else {
	1319	/* count token word */
	1320	if(tokenLengths!=NULL) {
	1321	/* use cached token length */
	1322	tokenLength=tokenLengths[c];
	1323	if(tokenLength==0) {
	1324	tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
	1325	tokenLengths[c]=(int8_t)tokenLength;
	1326	}
	1327	} else {
	1328	tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
	1329	}
	1330	length+=tokenLength;
	1331	}
	1332	}
	1333	}
	1334
	1335	*pLine=line;
	1336	return length;
	1337	}
	1338
	1339	static void
	1340	calcGroupNameSetsLengths(int32_t maxNameLength) {
	1341	uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
	1342
	1343	uint16_t tokens=(uint16_t )uCharNames+8;
	1344	uint16_t tokenCount=*tokens++;
	1345	uint8_t tokenStrings=(uint8_t )uCharNames+uCharNames->tokenStringOffset;
	1346
	1347	int8_t *tokenLengths;
	1348
	1349	const uint16_t *group;
	1350	const uint8_t s, line, *lineLimit;
	1351
	1352	int32_t groupCount, lineNumber, length;
	1353
	1354	tokenLengths=(int8_t *)uprv_malloc(tokenCount);
	1355	if(tokenLengths!=NULL) {
	1356	uprv_memset(tokenLengths, 0, tokenCount);
	1357	}
	1358
	1359	group=GET_GROUPS(uCharNames);
	1360	groupCount=*group++;
	1361
	1362	/* enumerate all groups */
	1363	while(groupCount>0) {
	1364	s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(group);
	1365	s=expandGroupLengths(s, offsets, lengths);
	1366
	1367	/* enumerate all lines in each group */
	1368	for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) {
	1369	line=s+offsets[lineNumber];
	1370	length=lengths[lineNumber];
	1371	if(length==0) {
	1372	continue;
	1373	}
	1374
	1375	lineLimit=line+length;
	1376
	1377	/* read regular name */
	1378	length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
	1379	if(length>maxNameLength) {
	1380	maxNameLength=length;
	1381	}
	1382	if(line==lineLimit) {
	1383	continue;
	1384	}
	1385
	1386	/* read Unicode 1.0 name */
	1387	length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
	1388	if(length>maxNameLength) {
	1389	maxNameLength=length;
	1390	}
	1391	if(line==lineLimit) {
	1392	continue;
	1393	}
	1394
	1395	/* read ISO comment */
	1396	/length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);/
	1397	}
	1398
	1399	group=NEXT_GROUP(group);
	1400	--groupCount;
	1401	}
	1402
	1403	if(tokenLengths!=NULL) {
	1404	uprv_free(tokenLengths);
	1405	}
	1406
	1407	/* set gMax... - name length last for threading */
	1408	gMaxNameLength=maxNameLength;
	1409	}
	1410
	1411	static UBool
	1412	calcNameSetsLengths(UErrorCode *pErrorCode) {
	1413	static const char extChars[]="0123456789ABCDEF<>-";
	1414	int32_t i, maxNameLength;
	1415
	1416	if(gMaxNameLength!=0) {
	1417	return TRUE;
	1418	}
	1419
	1420	if(!isDataLoaded(pErrorCode)) {
	1421	return FALSE;
	1422	}
	1423
	1424	/* set hex digits, used in various names, and <>-, used in extended names */
	1425	for(i=0; i<(int32_t)sizeof(extChars)-1; ++i) {
	1426	SET_ADD(gNameSet, extChars[i]);
	1427	}
	1428
	1429	/* set sets and lengths from algorithmic names */
	1430	maxNameLength=calcAlgNameSetsLengths(0);
	1431
	1432	/* set sets and lengths from extended names */
	1433	maxNameLength=calcExtNameSetsLengths(maxNameLength);
	1434
	1435	/* set sets and lengths from group names, set global maximum values */
	1436	calcGroupNameSetsLengths(maxNameLength);
	1437
	1438	return TRUE;
	1439	}
	1440
	1441	U_NAMESPACE_END
	1442
	1443	/* public API --------------------------------------------------------------- */
	1444
	1445	U_NAMESPACE_USE
	1446
	1447	U_CAPI int32_t U_EXPORT2
	1448	u_charName(UChar32 code, UCharNameChoice nameChoice,
	1449	char *buffer, int32_t bufferLength,
	1450	UErrorCode *pErrorCode) {
	1451	AlgorithmicRange *algRange;
	1452	uint32_t *p;
	1453	uint32_t i;
	1454	int32_t length;
	1455
	1456	/* check the argument values */
	1457	if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {
	1458	return 0;
	1459	} else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT \|\|
	1460	bufferLength<0 \|\| (bufferLength>0 && buffer==NULL)
	1461	) {
	1462	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
	1463	return 0;
	1464	}
	1465
	1466	if((uint32_t)code>UCHAR_MAX_VALUE \|\| !isDataLoaded(pErrorCode)) {
	1467	return u_terminateChars(buffer, bufferLength, 0, pErrorCode);
	1468	}
	1469
	1470	length=0;
	1471
	1472	/* try algorithmic names first */
	1473	p=(uint32_t )((uint8_t )uCharNames+uCharNames->algNamesOffset);
	1474	i=*p;
	1475	algRange=(AlgorithmicRange *)(p+1);
	1476	while(i>0) {
	1477	if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {
	1478	length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
	1479	break;
	1480	}
	1481	algRange=(AlgorithmicRange )((uint8_t )algRange+algRange->size);
	1482	--i;
	1483	}
	1484
	1485	if(i==0) {
	1486	if (nameChoice == U_EXTENDED_CHAR_NAME) {
	1487	length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);
	1488	if (!length) {
	1489	/* extended character name */
	1490	length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);
	1491	}
	1492	} else {
	1493	/* normal character name */
	1494	length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
	1495	}
	1496	}
	1497
	1498	return u_terminateChars(buffer, bufferLength, length, pErrorCode);
	1499	}
	1500
	1501	U_CAPI int32_t U_EXPORT2
	1502	u_getISOComment(UChar32 /c/,
	1503	char *dest, int32_t destCapacity,
	1504	UErrorCode *pErrorCode) {
	1505	/* check the argument values */
	1506	if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {
	1507	return 0;
	1508	} else if(destCapacity<0 \|\| (destCapacity>0 && dest==NULL)) {
	1509	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
	1510	return 0;
	1511	}
	1512
	1513	return u_terminateChars(dest, destCapacity, 0, pErrorCode);
	1514	}
	1515
	1516	U_CAPI UChar32 U_EXPORT2
	1517	u_charFromName(UCharNameChoice nameChoice,
	1518	const char *name,
	1519	UErrorCode *pErrorCode) {
	1520	char upper[120], lower[120];
	1521	FindName findName;
	1522	AlgorithmicRange *algRange;
	1523	uint32_t *p;
	1524	uint32_t i;
	1525	UChar32 cp = 0;
	1526	char c0;
	1527	UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */
	1528
	1529	if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {
	1530	return error;
	1531	}
	1532
	1533	if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT \|\| name==NULL \|\| *name==0) {
	1534	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
	1535	return error;
	1536	}
	1537
	1538	if(!isDataLoaded(pErrorCode)) {
	1539	return error;
	1540	}
	1541
	1542	/* construct the uppercase and lowercase of the name first */
	1543	for(i=0; i<sizeof(upper); ++i) {
	1544	if((c0=*name++)!=0) {
	1545	upper[i]=uprv_toupper(c0);
	1546	lower[i]=uprv_tolower(c0);
	1547	} else {
	1548	upper[i]=lower[i]=0;
	1549	break;
	1550	}
	1551	}
	1552	if(i==sizeof(upper)) {
	1553	/* name too long, there is no such character */
	1554	*pErrorCode = U_ILLEGAL_CHAR_FOUND;
	1555	return error;
	1556	}
	1557	// i==strlen(name)==strlen(lower)==strlen(upper)
	1558
	1559	/* try extended names first */
	1560	if (lower[0] == '<') {
	1561	if (nameChoice == U_EXTENDED_CHAR_NAME) {
	1562	// Parse a string like "<category-HHHH>" where HHHH is a hex code point.
	1563	if (lower[--i] == '>' && i >= 3 && lower[--i] != '-') {
	1564	while (i >= 3 && lower[--i] != '-') {}
	1565
	1566	if (i >= 2 && lower[i] == '-') {
	1567	uint32_t cIdx;
	1568
	1569	lower[i] = 0;
	1570
	1571	for (++i; lower[i] != '>'; ++i) {
	1572	if (lower[i] >= '0' && lower[i] <= '9') {
	1573	cp = (cp << 4) + lower[i] - '0';
	1574	} else if (lower[i] >= 'a' && lower[i] <= 'f') {
	1575	cp = (cp << 4) + lower[i] - 'a' + 10;
	1576	} else {
	1577	*pErrorCode = U_ILLEGAL_CHAR_FOUND;
	1578	return error;
	1579	}
	1580	}
	1581
	1582	/* Now validate the category name.
	1583	We could use a binary search, or a trie, if
	1584	we really wanted to. */
	1585
	1586	for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
	1587
	1588	if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
	1589	if (getCharCat(cp) == cIdx) {
	1590	return cp;
	1591	}
	1592	break;
	1593	}
	1594	}
	1595	}
	1596	}
	1597	}
	1598
	1599	*pErrorCode = U_ILLEGAL_CHAR_FOUND;
	1600	return error;
	1601	}
	1602
	1603	/* try algorithmic names now */
	1604	p=(uint32_t )((uint8_t )uCharNames+uCharNames->algNamesOffset);
	1605	i=*p;
	1606	algRange=(AlgorithmicRange *)(p+1);
	1607	while(i>0) {
	1608	if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) {
	1609	return cp;
	1610	}
	1611	algRange=(AlgorithmicRange )((uint8_t )algRange+algRange->size);
	1612	--i;
	1613	}
	1614
	1615	/* normal character name */
	1616	findName.otherName=upper;
	1617	findName.code=error;
	1618	enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice);
	1619	if (findName.code == error) {
	1620	*pErrorCode = U_ILLEGAL_CHAR_FOUND;
	1621	}
	1622	return findName.code;
	1623	}
	1624
	1625	U_CAPI void U_EXPORT2
	1626	u_enumCharNames(UChar32 start, UChar32 limit,
	1627	UEnumCharNamesFn *fn,
	1628	void *context,
	1629	UCharNameChoice nameChoice,
	1630	UErrorCode *pErrorCode) {
	1631	AlgorithmicRange *algRange;
	1632	uint32_t *p;
	1633	uint32_t i;
	1634
	1635	if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {
	1636	return;
	1637	}
	1638
	1639	if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT \|\| fn==NULL) {
	1640	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
	1641	return;
	1642	}
	1643
	1644	if((uint32_t) limit > UCHAR_MAX_VALUE + 1) {
	1645	limit = UCHAR_MAX_VALUE + 1;
	1646	}
	1647	if((uint32_t)start>=(uint32_t)limit) {
	1648	return;
	1649	}
	1650
	1651	if(!isDataLoaded(pErrorCode)) {
	1652	return;
	1653	}
	1654
	1655	/* interleave the data-driven ones with the algorithmic ones */
	1656	/* iterate over all algorithmic ranges; assume that they are in ascending order */
	1657	p=(uint32_t )((uint8_t )uCharNames+uCharNames->algNamesOffset);
	1658	i=*p;
	1659	algRange=(AlgorithmicRange *)(p+1);
	1660	while(i>0) {
	1661	/* enumerate the character names before the current algorithmic range */
	1662	/* here: start<limit */
	1663	if((uint32_t)start<algRange->start) {
	1664	if((uint32_t)limit<=algRange->start) {
	1665	enumNames(uCharNames, start, limit, fn, context, nameChoice);
	1666	return;
	1667	}
	1668	if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {
	1669	return;
	1670	}
	1671	start=(UChar32)algRange->start;
	1672	}
	1673	/* enumerate the character names in the current algorithmic range */
	1674	/* here: algRange->start<=start<limit */
	1675	if((uint32_t)start<=algRange->end) {
	1676	if((uint32_t)limit<=(algRange->end+1)) {
	1677	enumAlgNames(algRange, start, limit, fn, context, nameChoice);
	1678	return;
	1679	}
	1680	if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) {
	1681	return;
	1682	}
	1683	start=(UChar32)algRange->end+1;
	1684	}
	1685	/* continue to the next algorithmic range (here: start<limit) */
	1686	algRange=(AlgorithmicRange )((uint8_t )algRange+algRange->size);
	1687	--i;
	1688	}
	1689	/* enumerate the character names after the last algorithmic range */
	1690	enumNames(uCharNames, start, limit, fn, context, nameChoice);
	1691	}
	1692
	1693	U_CAPI int32_t U_EXPORT2
	1694	uprv_getMaxCharNameLength() {
	1695	UErrorCode errorCode=U_ZERO_ERROR;
	1696	if(calcNameSetsLengths(&errorCode)) {
	1697	return gMaxNameLength;
	1698	} else {
	1699	return 0;
	1700	}
	1701	}
	1702
	1703	/**
	1704	* Converts the char set cset into a Unicode set uset.
	1705	* @param cset Set of 256 bit flags corresponding to a set of chars.
	1706	* @param uset USet to receive characters. Existing contents are deleted.
	1707	*/
	1708	static void
	1709	charSetToUSet(uint32_t cset[8], const USetAdder *sa) {
	1710	UChar us[256];
	1711	char cs[256];
	1712
	1713	int32_t i, length;
	1714	UErrorCode errorCode;
	1715
	1716	errorCode=U_ZERO_ERROR;
	1717
	1718	if(!calcNameSetsLengths(&errorCode)) {
	1719	return;
	1720	}
	1721
	1722	/* build a char string with all chars that are used in character names */
	1723	length=0;
	1724	for(i=0; i<256; ++i) {
	1725	if(SET_CONTAINS(cset, i)) {
	1726	cs[length++]=(char)i;
	1727	}
	1728	}
	1729
	1730	/* convert the char string to a UChar string */
	1731	u_charsToUChars(cs, us, length);
	1732
	1733	/* add each UChar to the USet */
	1734	for(i=0; i<length; ++i) {
	1735	if(us[i]!=0 \|\| cs[i]==0) { /* non-invariant chars become (UChar)0 */
	1736	sa->add(sa->set, us[i]);
	1737	}
	1738	}
	1739	}
	1740
	1741	/**
	1742	* Fills set with characters that are used in Unicode character names.
	1743	* @param set USet to receive characters.
	1744	*/
	1745	U_CAPI void U_EXPORT2
	1746	uprv_getCharNameCharacters(const USetAdder *sa) {
	1747	charSetToUSet(gNameSet, sa);
	1748	}
	1749
	1750	/* data swapping ------------------------------------------------------------ */
	1751
	1752	/*
	1753	* The token table contains non-negative entries for token bytes,
	1754	* and -1 for bytes that represent themselves in the data file's charset.
	1755	* -2 entries are used for lead bytes.
	1756	*
	1757	* Direct bytes (-1 entries) must be translated from the input charset family
	1758	* to the output charset family.
	1759	* makeTokenMap() writes a permutation mapping for this.
	1760	* Use it once for single-/lead-byte tokens and once more for all trail byte
	1761	* tokens. (';' is an unused trail byte marked with -1.)
	1762	*/
	1763	static void
	1764	makeTokenMap(const UDataSwapper *ds,
	1765	int16_t tokens[], uint16_t tokenCount,
	1766	uint8_t map[256],
	1767	UErrorCode *pErrorCode) {
	1768	UBool usedOutChar[256];
	1769	uint16_t i, j;
	1770	uint8_t c1, c2;
	1771
	1772	if(U_FAILURE(*pErrorCode)) {
	1773	return;
	1774	}
	1775
	1776	if(ds->inCharset==ds->outCharset) {
	1777	/* Same charset family: identity permutation */
	1778	for(i=0; i<256; ++i) {
	1779	map[i]=(uint8_t)i;
	1780	}
	1781	} else {
	1782	uprv_memset(map, 0, 256);
	1783	uprv_memset(usedOutChar, 0, 256);
	1784
	1785	if(tokenCount>256) {
	1786	tokenCount=256;
	1787	}
	1788
	1789	/* set the direct bytes (byte 0 always maps to itself) */
	1790	for(i=1; i<tokenCount; ++i) {
	1791	if(tokens[i]==-1) {
	1792	/* convert the direct byte character */
	1793	c1=(uint8_t)i;
	1794	ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode);
	1795	if(U_FAILURE(*pErrorCode)) {
	1796	udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n",
	1797	i, ds->inCharset);
	1798	return;
	1799	}
	1800
	1801	/* enter the converted character into the map and mark it used */
	1802	map[c1]=c2;
	1803	usedOutChar[c2]=TRUE;
	1804	}
	1805	}
	1806
	1807	/* set the mappings for the rest of the permutation */
	1808	for(i=j=1; i<tokenCount; ++i) {
	1809	/* set mappings that were not set for direct bytes */
	1810	if(map[i]==0) {
	1811	/* set an output byte value that was not used as an output byte above */
	1812	while(usedOutChar[j]) {
	1813	++j;
	1814	}
	1815	map[i]=(uint8_t)j++;
	1816	}
	1817	}
	1818
	1819	/*
	1820	* leave mappings at tokenCount and above unset if tokenCount<256
	1821	* because they won't be used
	1822	*/
	1823	}
	1824	}
	1825
	1826	U_CAPI int32_t U_EXPORT2
	1827	uchar_swapNames(const UDataSwapper *ds,
	1828	const void inData, int32_t length, void outData,
	1829	UErrorCode *pErrorCode) {
	1830	const UDataInfo *pInfo;
	1831	int32_t headerSize;
	1832
	1833	const uint8_t *inBytes;
	1834	uint8_t *outBytes;
	1835
	1836	uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,
	1837	offset, i, count, stringsCount;
	1838
	1839	const AlgorithmicRange *inRange;
	1840	AlgorithmicRange *outRange;
	1841
	1842	/* udata_swapDataHeader checks the arguments */
	1843	headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
	1844	if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {
	1845	return 0;
	1846	}
	1847
	1848	/* check data format and format version */
	1849	pInfo=(const UDataInfo )((const char )inData+4);
	1850	if(!(
	1851	pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */
	1852	pInfo->dataFormat[1]==0x6e &&
	1853	pInfo->dataFormat[2]==0x61 &&
	1854	pInfo->dataFormat[3]==0x6d &&
	1855	pInfo->formatVersion[0]==1
	1856	)) {
	1857	udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",
	1858	pInfo->dataFormat[0], pInfo->dataFormat[1],
	1859	pInfo->dataFormat[2], pInfo->dataFormat[3],
	1860	pInfo->formatVersion[0]);
	1861	*pErrorCode=U_UNSUPPORTED_ERROR;
	1862	return 0;
	1863	}
	1864
	1865	inBytes=(const uint8_t *)inData+headerSize;
	1866	outBytes=(uint8_t *)outData+headerSize;
	1867	if(length<0) {
	1868	algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]);
	1869	} else {
	1870	length-=headerSize;
	1871	if( length<20 \|\|
	1872	(uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]))
	1873	) {
	1874	udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n",
	1875	length);
	1876	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
	1877	return 0;
	1878	}
	1879	}
	1880
	1881	if(length<0) {
	1882	/* preflighting: iterate through algorithmic ranges */
	1883	offset=algNamesOffset;
	1884	count=ds->readUInt32(((const uint32_t )(inBytes+offset)));
	1885	offset+=4;
	1886
	1887	for(i=0; i<count; ++i) {
	1888	inRange=(const AlgorithmicRange *)(inBytes+offset);
	1889	offset+=ds->readUInt16(inRange->size);
	1890	}
	1891	} else {
	1892	/* swap data */
	1893	const uint16_t *p;
	1894	uint16_t q, temp;
	1895
	1896	int16_t tokens[512];
	1897	uint16_t tokenCount;
	1898
	1899	uint8_t map[256], trailMap[256];
	1900
	1901	/* copy the data for inaccessible bytes */
	1902	if(inBytes!=outBytes) {
	1903	uprv_memcpy(outBytes, inBytes, length);
	1904	}
	1905
	1906	/* the initial 4 offsets first */
	1907	tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]);
	1908	groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]);
	1909	groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]);
	1910	ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode);
	1911
	1912	/*
	1913	* now the tokens table
	1914	* it needs to be permutated along with the compressed name strings
	1915	*/
	1916	p=(const uint16_t *)(inBytes+16);
	1917	q=(uint16_t *)(outBytes+16);
	1918
	1919	/* read and swap the tokenCount */
	1920	tokenCount=ds->readUInt16(*p);
	1921	ds->swapArray16(ds, p, 2, q, pErrorCode);
	1922	++p;
	1923	++q;
	1924
	1925	/* read the first 512 tokens and make the token maps */
	1926	if(tokenCount<=512) {
	1927	count=tokenCount;
	1928	} else {
	1929	count=512;
	1930	}
	1931	for(i=0; i<count; ++i) {
	1932	tokens[i]=udata_readInt16(ds, p[i]);
	1933	}
	1934	for(; i<512; ++i) {
	1935	tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */
	1936	}
	1937	makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);
	1938	makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode);
	1939	if(U_FAILURE(*pErrorCode)) {
	1940	return 0;
	1941	}
	1942
	1943	/*
	1944	* swap and permutate the tokens
	1945	* go through a temporary array to support in-place swapping
	1946	*/
	1947	temp=(uint16_t )uprv_malloc(tokenCount2);
	1948	if(temp==NULL) {
	1949	udata_printError(ds, "out of memory swapping %u unames.icu tokens\n",
	1950	tokenCount);
	1951	*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
	1952	return 0;
	1953	}
	1954
	1955	/* swap and permutate single-/lead-byte tokens */
	1956	for(i=0; i<tokenCount && i<256; ++i) {
	1957	ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode);
	1958	}
	1959
	1960	/* swap and permutate trail-byte tokens */
	1961	for(; i<tokenCount; ++i) {
	1962	ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode);
	1963	}
	1964
	1965	/* copy the result into the output and free the temporary array */
	1966	uprv_memcpy(q, temp, tokenCount*2);
	1967	uprv_free(temp);
	1968
	1969	/*
	1970	* swap the token strings but not a possible padding byte after
	1971	* the terminating NUL of the last string
	1972	*/
	1973	udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset),
	1974	outBytes+tokenStringOffset, pErrorCode);
	1975	if(U_FAILURE(*pErrorCode)) {
	1976	udata_printError(ds, "uchar_swapNames(token strings) failed\n");
	1977	return 0;
	1978	}
	1979
	1980	/* swap the group table */
	1981	count=ds->readUInt16(((const uint16_t )(inBytes+groupsOffset)));
	1982	ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count3)2),
	1983	outBytes+groupsOffset, pErrorCode);
	1984
	1985	/*
	1986	* swap the group strings
	1987	* swap the string bytes but not the nibble-encoded string lengths
	1988	*/
	1989	if(ds->inCharset!=ds->outCharset) {
	1990	uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1];
	1991
	1992	const uint8_t inStrings, nextInStrings;
	1993	uint8_t *outStrings;
	1994
	1995	uint8_t c;
	1996
	1997	inStrings=inBytes+groupStringOffset;
	1998	outStrings=outBytes+groupStringOffset;
	1999
	2000	stringsCount=algNamesOffset-groupStringOffset;
	2001
	2002	/* iterate through string groups until only a few padding bytes are left */
	2003	while(stringsCount>32) {
	2004	nextInStrings=expandGroupLengths(inStrings, offsets, lengths);
	2005
	2006	/* move past the length bytes */
	2007	stringsCount-=(uint32_t)(nextInStrings-inStrings);
	2008	outStrings+=nextInStrings-inStrings;
	2009	inStrings=nextInStrings;
	2010
	2011	count=offsets[31]+lengths[31]; /* total number of string bytes in this group */
	2012	stringsCount-=count;
	2013
	2014	/* swap the string bytes using map[] and trailMap[] */
	2015	while(count>0) {
	2016	c=*inStrings++;
	2017	*outStrings++=map[c];
	2018	if(tokens[c]!=-2) {
	2019	--count;
	2020	} else {
	2021	/* token lead byte: swap the trail byte, too */
	2022	outStrings++=trailMap[inStrings++];
	2023	count-=2;
	2024	}
	2025	}
	2026	}
	2027	}
	2028
	2029	/* swap the algorithmic ranges */
	2030	offset=algNamesOffset;
	2031	count=ds->readUInt32(((const uint32_t )(inBytes+offset)));
	2032	ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode);
	2033	offset+=4;
	2034
	2035	for(i=0; i<count; ++i) {
	2036	if(offset>(uint32_t)length) {
	2037	udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",
	2038	length, i);
	2039	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
	2040	return 0;
	2041	}
	2042
	2043	inRange=(const AlgorithmicRange *)(inBytes+offset);
	2044	outRange=(AlgorithmicRange *)(outBytes+offset);
	2045	offset+=ds->readUInt16(inRange->size);
	2046
	2047	ds->swapArray32(ds, inRange, 8, outRange, pErrorCode);
	2048	ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode);
	2049	switch(inRange->type) {
	2050	case 0:
	2051	/* swap prefix string */
	2052	ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)),
	2053	outRange+1, pErrorCode);
	2054	if(U_FAILURE(*pErrorCode)) {
	2055	udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n",
	2056	i);
	2057	return 0;
	2058	}
	2059	break;
	2060	case 1:
	2061	{
	2062	/* swap factors and the prefix and factor strings */
	2063	uint32_t factorsCount;
	2064
	2065	factorsCount=inRange->variant;
	2066	p=(const uint16_t *)(inRange+1);
	2067	q=(uint16_t *)(outRange+1);
	2068	ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode);
	2069
	2070	/* swap the strings, up to the last terminating NUL */
	2071	p+=factorsCount;
	2072	q+=factorsCount;
	2073	stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p);
	2074	while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) {
	2075	--stringsCount;
	2076	}
	2077	ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode);
	2078	}
	2079	break;
	2080	default:
	2081	udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n",
	2082	inRange->type, i);
	2083	*pErrorCode=U_UNSUPPORTED_ERROR;
	2084	return 0;
	2085	}
	2086	}
	2087	}
	2088
	2089	return headerSize+(int32_t)offset;
	2090	}
	2091
	2092	/*
	2093	* Hey, Emacs, please set the following:
	2094	*
	2095	* Local Variables:
	2096	* indent-tabs-mode: nil
	2097	* End:
	2098	*
	2099	*/