git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	*******************************************************************************
	3	* Copyright (C) 1996-2012, International Business Machines
	4	* Corporation and others. All Rights Reserved.
	5	*******************************************************************************
	6	* file name: ucol.cpp
	7	* encoding: US-ASCII
	8	* tab size: 8 (not used)
	9	* indentation:4
	10	*
	11	* Modification history
	12	* Date Name Comments
	13	* 1996-1999 various members of ICU team maintained C API for collation framework
	14	* 02/16/2001 synwee Added internal method getPrevSpecialCE
	15	* 03/01/2001 synwee Added maxexpansion functionality.
	16	* 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant
	17	*/
	18
	19	#include "unicode/utypes.h"
	20
	21	#if !UCONFIG_NO_COLLATION
	22
	23	#include "unicode/bytestream.h"
	24	#include "unicode/coleitr.h"
	25	#include "unicode/unorm.h"
	26	#include "unicode/udata.h"
	27	#include "unicode/ustring.h"
	28
	29	#include "ucol_imp.h"
	30	#include "bocsu.h"
	31
	32	#include "normalizer2impl.h"
	33	#include "unorm_it.h"
	34	#include "umutex.h"
	35	#include "cmemory.h"
	36	#include "ucln_in.h"
	37	#include "cstring.h"
	38	#include "utracimp.h"
	39	#include "putilimp.h"
	40	#include "uassert.h"
	41	#include "unicode/coll.h"
	42
	43	#ifdef UCOL_DEBUG
	44	#include <stdio.h>
	45	#endif
	46
	47	U_NAMESPACE_USE
	48
	49	#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
	50
	51	#define LAST_BYTE_MASK_ 0xFF
	52	#define SECOND_LAST_BYTE_SHIFT_ 8
	53
	54	#define ZERO_CC_LIMIT_ 0xC0
	55
	56	// This is static pointer to the NFC implementation instance.
	57	// it is always the same between calls to u_cleanup
	58	// and therefore writing to it is not synchronized.
	59	// It is cleaned in ucol_cleanup
	60	static const Normalizer2Impl *g_nfcImpl = NULL;
	61
	62	// These are values from UCA required for
	63	// implicit generation and supressing sort key compression
	64	// they should regularly be in the UCA, but if one
	65	// is running without UCA, it could be a problem
	66	static const int32_t maxRegularPrimary = 0x7A;
	67	static const int32_t minImplicitPrimary = 0xE0;
	68	static const int32_t maxImplicitPrimary = 0xE4;
	69
	70	U_CDECL_BEGIN
	71	static UBool U_CALLCONV
	72	ucol_cleanup(void)
	73	{
	74	g_nfcImpl = NULL;
	75	return TRUE;
	76	}
	77
	78	static int32_t U_CALLCONV
	79	_getFoldingOffset(uint32_t data) {
	80	return (int32_t)(data&0xFFFFFF);
	81	}
	82
	83	U_CDECL_END
	84
	85	// init FCD data
	86	static inline
	87	UBool initializeFCD(UErrorCode *status) {
	88	if (g_nfcImpl != NULL) {
	89	return TRUE;
	90	} else {
	91	// The result is constant, until the library is reloaded.
	92	g_nfcImpl = Normalizer2Factory::getNFCImpl(*status);
	93	// Note: Alternatively, we could also store this pointer in each collIterate struct,
	94	// same as Normalizer2Factory::getImpl(collIterate->nfd).
	95	ucln_i18n_registerCleanup(UCLN_I18N_UCOL, ucol_cleanup);
	96	return U_SUCCESS(*status);
	97	}
	98	}
	99
	100	static
	101	inline void IInit_collIterate(const UCollator collator, const UChar sourceString,
	102	int32_t sourceLen, collIterate *s,
	103	UErrorCode *status)
	104	{
	105	(s)->string = (s)->pos = sourceString;
	106	(s)->origFlags = 0;
	107	(s)->flags = 0;
	108	if (sourceLen >= 0) {
	109	s->flags \|= UCOL_ITER_HASLEN;
	110	(s)->endp = (UChar *)sourceString+sourceLen;
	111	}
	112	else {
	113	/* change to enable easier checking for end of string for fcdpositon */
	114	(s)->endp = NULL;
	115	}
	116	(s)->extendCEs = NULL;
	117	(s)->extendCEsSize = 0;
	118	(s)->CEpos = (s)->toReturn = (s)->CEs;
	119	(s)->offsetBuffer = NULL;
	120	(s)->offsetBufferSize = 0;
	121	(s)->offsetReturn = (s)->offsetStore = NULL;
	122	(s)->offsetRepeatCount = (s)->offsetRepeatValue = 0;
	123	(s)->coll = (collator);
	124	(s)->nfd = Normalizer2Factory::getNFDInstance(*status);
	125	(s)->fcdPosition = 0;
	126	if(collator->normalizationMode == UCOL_ON) {
	127	(s)->flags \|= UCOL_ITER_NORM;
	128	}
	129	if(collator->hiraganaQ == UCOL_ON && collator->strength >= UCOL_QUATERNARY) {
	130	(s)->flags \|= UCOL_HIRAGANA_Q;
	131	}
	132	(s)->iterator = NULL;
	133	//(s)->iteratorIndex = 0;
	134	}
	135
	136	U_CAPI void U_EXPORT2
	137	uprv_init_collIterate(const UCollator collator, const UChar sourceString,
	138	int32_t sourceLen, collIterate *s,
	139	UErrorCode *status) {
	140	/* Out-of-line version for use from other files. */
	141	IInit_collIterate(collator, sourceString, sourceLen, s, status);
	142	}
	143
	144	U_CAPI collIterate * U_EXPORT2
	145	uprv_new_collIterate(UErrorCode *status) {
	146	if(U_FAILURE(*status)) {
	147	return NULL;
	148	}
	149	collIterate *s = new collIterate;
	150	if(s == NULL) {
	151	*status = U_MEMORY_ALLOCATION_ERROR;
	152	return NULL;
	153	}
	154	return s;
	155	}
	156
	157	U_CAPI void U_EXPORT2
	158	uprv_delete_collIterate(collIterate *s) {
	159	delete s;
	160	}
	161
	162	U_CAPI UBool U_EXPORT2
	163	uprv_collIterateAtEnd(collIterate *s) {
	164	return s == NULL \|\| s->pos == s->endp;
	165	}
	166
	167	/**
	168	* Backup the state of the collIterate struct data
	169	* @param data collIterate to backup
	170	* @param backup storage
	171	*/
	172	static
	173	inline void backupState(const collIterate data, collIterateState backup)
	174	{
	175	backup->fcdPosition = data->fcdPosition;
	176	backup->flags = data->flags;
	177	backup->origFlags = data->origFlags;
	178	backup->pos = data->pos;
	179	backup->bufferaddress = data->writableBuffer.getBuffer();
	180	backup->buffersize = data->writableBuffer.length();
	181	backup->iteratorMove = 0;
	182	backup->iteratorIndex = 0;
	183	if(data->iterator != NULL) {
	184	//backup->iteratorIndex = data->iterator->getIndex(data->iterator, UITER_CURRENT);
	185	backup->iteratorIndex = data->iterator->getState(data->iterator);
	186	// no we try to fixup if we're using a normalizing iterator and we get UITER_NO_STATE
	187	if(backup->iteratorIndex == UITER_NO_STATE) {
	188	while((backup->iteratorIndex = data->iterator->getState(data->iterator)) == UITER_NO_STATE) {
	189	backup->iteratorMove++;
	190	data->iterator->move(data->iterator, -1, UITER_CURRENT);
	191	}
	192	data->iterator->move(data->iterator, backup->iteratorMove, UITER_CURRENT);
	193	}
	194	}
	195	}
	196
	197	/**
	198	* Loads the state into the collIterate struct data
	199	* @param data collIterate to backup
	200	* @param backup storage
	201	* @param forwards boolean to indicate if forwards iteration is used,
	202	* false indicates backwards iteration
	203	*/
	204	static
	205	inline void loadState(collIterate data, const collIterateState backup,
	206	UBool forwards)
	207	{
	208	UErrorCode status = U_ZERO_ERROR;
	209	data->flags = backup->flags;
	210	data->origFlags = backup->origFlags;
	211	if(data->iterator != NULL) {
	212	//data->iterator->move(data->iterator, backup->iteratorIndex, UITER_ZERO);
	213	data->iterator->setState(data->iterator, backup->iteratorIndex, &status);
	214	if(backup->iteratorMove != 0) {
	215	data->iterator->move(data->iterator, backup->iteratorMove, UITER_CURRENT);
	216	}
	217	}
	218	data->pos = backup->pos;
	219
	220	if ((data->flags & UCOL_ITER_INNORMBUF) &&
	221	data->writableBuffer.getBuffer() != backup->bufferaddress) {
	222	/*
	223	this is when a new buffer has been reallocated and we'll have to
	224	calculate the new position.
	225	note the new buffer has to contain the contents of the old buffer.
	226	*/
	227	if (forwards) {
	228	data->pos = data->writableBuffer.getTerminatedBuffer() +
	229	(data->pos - backup->bufferaddress);
	230	}
	231	else {
	232	/* backwards direction */
	233	int32_t temp = backup->buffersize -
	234	(int32_t)(data->pos - backup->bufferaddress);
	235	data->pos = data->writableBuffer.getTerminatedBuffer() + (data->writableBuffer.length() - temp);
	236	}
	237	}
	238	if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {
	239	/*
	240	this is alittle tricky.
	241	if we are initially not in the normalization buffer, even if we
	242	normalize in the later stage, the data in the buffer will be
	243	ignored, since we skip back up to the data string.
	244	however if we are already in the normalization buffer, any
	245	further normalization will pull data into the normalization
	246	buffer and modify the fcdPosition.
	247	since we are keeping the data in the buffer for use, the
	248	fcdPosition can not be reverted back.
	249	arrgghh....
	250	*/
	251	data->fcdPosition = backup->fcdPosition;
	252	}
	253	}
	254
	255	static UBool
	256	reallocCEs(collIterate *data, int32_t newCapacity) {
	257	uint32_t *oldCEs = data->extendCEs;
	258	if(oldCEs == NULL) {
	259	oldCEs = data->CEs;
	260	}
	261	int32_t length = data->CEpos - oldCEs;
	262	uint32_t newCEs = (uint32_t )uprv_malloc(newCapacity * 4);
	263	if(newCEs == NULL) {
	264	return FALSE;
	265	}
	266	uprv_memcpy(newCEs, oldCEs, length * 4);
	267	uprv_free(data->extendCEs);
	268	data->extendCEs = newCEs;
	269	data->extendCEsSize = newCapacity;
	270	data->CEpos = newCEs + length;
	271	return TRUE;
	272	}
	273
	274	static UBool
	275	increaseCEsCapacity(collIterate *data) {
	276	int32_t oldCapacity;
	277	if(data->extendCEs != NULL) {
	278	oldCapacity = data->extendCEsSize;
	279	} else {
	280	oldCapacity = LENGTHOF(data->CEs);
	281	}
	282	return reallocCEs(data, 2 * oldCapacity);
	283	}
	284
	285	static UBool
	286	ensureCEsCapacity(collIterate *data, int32_t minCapacity) {
	287	int32_t oldCapacity;
	288	if(data->extendCEs != NULL) {
	289	oldCapacity = data->extendCEsSize;
	290	} else {
	291	oldCapacity = LENGTHOF(data->CEs);
	292	}
	293	if(minCapacity <= oldCapacity) {
	294	return TRUE;
	295	}
	296	oldCapacity *= 2;
	297	return reallocCEs(data, minCapacity > oldCapacity ? minCapacity : oldCapacity);
	298	}
	299
	300	void collIterate::appendOffset(int32_t offset, UErrorCode &errorCode) {
	301	if(U_FAILURE(errorCode)) {
	302	return;
	303	}
	304	int32_t length = offsetStore == NULL ? 0 : (int32_t)(offsetStore - offsetBuffer);
	305	U_ASSERT(length >= offsetBufferSize \|\| offsetStore != NULL);
	306	if(length >= offsetBufferSize) {
	307	int32_t newCapacity = 2 * offsetBufferSize + UCOL_EXPAND_CE_BUFFER_SIZE;
	308	int32_t newBuffer = reinterpret_cast<int32_t >(uprv_malloc(newCapacity * 4));
	309	if(newBuffer == NULL) {
	310	errorCode = U_MEMORY_ALLOCATION_ERROR;
	311	return;
	312	}
	313	if(length > 0) {
	314	uprv_memcpy(newBuffer, offsetBuffer, length * 4);
	315	}
	316	uprv_free(offsetBuffer);
	317	offsetBuffer = newBuffer;
	318	offsetStore = offsetBuffer + length;
	319	offsetBufferSize = newCapacity;
	320	}
	321	*offsetStore++ = offset;
	322	}
	323
	324	/*
	325	* collIter_eos()
	326	* Checks for a collIterate being positioned at the end of
	327	* its source string.
	328	*
	329	*/
	330	static
	331	inline UBool collIter_eos(collIterate *s) {
	332	if(s->flags & UCOL_USE_ITERATOR) {
	333	return !(s->iterator->hasNext(s->iterator));
	334	}
	335	if ((s->flags & UCOL_ITER_HASLEN) == 0 && *s->pos != 0) {
	336	// Null terminated string, but not at null, so not at end.
	337	// Whether in main or normalization buffer doesn't matter.
	338	return FALSE;
	339	}
	340
	341	// String with length. Can't be in normalization buffer, which is always
	342	// null termintated.
	343	if (s->flags & UCOL_ITER_HASLEN) {
	344	return (s->pos == s->endp);
	345	}
	346
	347	// We are at a null termination, could be either normalization buffer or main string.
	348	if ((s->flags & UCOL_ITER_INNORMBUF) == 0) {
	349	// At null at end of main string.
	350	return TRUE;
	351	}
	352
	353	// At null at end of normalization buffer. Need to check whether there there are
	354	// any characters left in the main buffer.
	355	if(s->origFlags & UCOL_USE_ITERATOR) {
	356	return !(s->iterator->hasNext(s->iterator));
	357	} else if ((s->origFlags & UCOL_ITER_HASLEN) == 0) {
	358	// Null terminated main string. fcdPosition is the 'return' position into main buf.
	359	return (*s->fcdPosition == 0);
	360	}
	361	else {
	362	// Main string with an end pointer.
	363	return s->fcdPosition == s->endp;
	364	}
	365	}
	366
	367	/*
	368	* collIter_bos()
	369	* Checks for a collIterate being positioned at the start of
	370	* its source string.
	371	*
	372	*/
	373	static
	374	inline UBool collIter_bos(collIterate *source) {
	375	// if we're going backwards, we need to know whether there is more in the
	376	// iterator, even if we are in the side buffer
	377	if(source->flags & UCOL_USE_ITERATOR \|\| source->origFlags & UCOL_USE_ITERATOR) {
	378	return !source->iterator->hasPrevious(source->iterator);
	379	}
	380	if (source->pos <= source->string \|\|
	381	((source->flags & UCOL_ITER_INNORMBUF) &&
	382	*(source->pos - 1) == 0 && source->fcdPosition == NULL)) {
	383	return TRUE;
	384	}
	385	return FALSE;
	386	}
	387
	388	/*static
	389	inline UBool collIter_SimpleBos(collIterate *source) {
	390	// if we're going backwards, we need to know whether there is more in the
	391	// iterator, even if we are in the side buffer
	392	if(source->flags & UCOL_USE_ITERATOR \|\| source->origFlags & UCOL_USE_ITERATOR) {
	393	return !source->iterator->hasPrevious(source->iterator);
	394	}
	395	if (source->pos == source->string) {
	396	return TRUE;
	397	}
	398	return FALSE;
	399	}*/
	400	//return (data->pos == data->string) \|\|
	401
	402
	403	/****************************************************************************/
	404	/* Following are the open/close functions */
	405	/* */
	406	/****************************************************************************/
	407
	408	static UCollator*
	409	ucol_initFromBinary(const uint8_t *bin, int32_t length,
	410	const UCollator *base,
	411	UCollator *fillIn,
	412	UErrorCode *status)
	413	{
	414	UCollator *result = fillIn;
	415	if(U_FAILURE(*status)) {
	416	return NULL;
	417	}
	418	/*
	419	if(base == NULL) {
	420	// we don't support null base yet
	421	*status = U_ILLEGAL_ARGUMENT_ERROR;
	422	return NULL;
	423	}
	424	*/
	425	// We need these and we could be running without UCA
	426	uprv_uca_initImplicitConstants(status);
	427	UCATableHeader colData = (UCATableHeader )bin;
	428	// do we want version check here? We're trying to figure out whether collators are compatible
	429	if((base && (uprv_memcmp(colData->UCAVersion, base->image->UCAVersion, sizeof(UVersionInfo)) != 0 \|\|
	430	uprv_memcmp(colData->UCDVersion, base->image->UCDVersion, sizeof(UVersionInfo)) != 0)) \|\|
	431	colData->version[0] != UCOL_BUILDER_VERSION)
	432	{
	433	*status = U_COLLATOR_VERSION_MISMATCH;
	434	return NULL;
	435	}
	436	else {
	437	if((uint32_t)length > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) {
	438	result = ucol_initCollator((const UCATableHeader *)bin, result, base, status);
	439	if(U_FAILURE(*status)){
	440	return NULL;
	441	}
	442	result->hasRealData = TRUE;
	443	}
	444	else {
	445	if(base) {
	446	result = ucol_initCollator(base->image, result, base, status);
	447	ucol_setOptionsFromHeader(result, (UColOptionSet )(bin+((const UCATableHeader )bin)->options), status);
	448	if(U_FAILURE(*status)){
	449	return NULL;
	450	}
	451	result->hasRealData = FALSE;
	452	}
	453	else {
	454	*status = U_USELESS_COLLATOR_ERROR;
	455	return NULL;
	456	}
	457	}
	458	result->freeImageOnClose = FALSE;
	459	}
	460	result->actualLocale = NULL;
	461	result->validLocale = NULL;
	462	result->requestedLocale = NULL;
	463	result->rules = NULL;
	464	result->rulesLength = 0;
	465	result->freeRulesOnClose = FALSE;
	466	result->ucaRules = NULL;
	467	return result;
	468	}
	469
	470	U_CAPI UCollator* U_EXPORT2
	471	ucol_openBinary(const uint8_t *bin, int32_t length,
	472	const UCollator *base,
	473	UErrorCode *status)
	474	{
	475	return ucol_initFromBinary(bin, length, base, NULL, status);
	476	}
	477
	478	U_CAPI int32_t U_EXPORT2
	479	ucol_cloneBinary(const UCollator *coll,
	480	uint8_t *buffer, int32_t capacity,
	481	UErrorCode *status)
	482	{
	483	int32_t length = 0;
	484	if(U_FAILURE(*status)) {
	485	return length;
	486	}
	487	if(capacity < 0) {
	488	*status = U_ILLEGAL_ARGUMENT_ERROR;
	489	return length;
	490	}
	491	if(coll->hasRealData == TRUE) {
	492	length = coll->image->size;
	493	if(length <= capacity) {
	494	uprv_memcpy(buffer, coll->image, length);
	495	} else {
	496	*status = U_BUFFER_OVERFLOW_ERROR;
	497	}
	498	} else {
	499	length = (int32_t)(paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet)));
	500	if(length <= capacity) {
	501	/* build the UCATableHeader with minimal entries */
	502	/* do not copy the header from the UCA file because its values are wrong! */
	503	/* uprv_memcpy(result, UCA->image, sizeof(UCATableHeader)); */
	504
	505	/* reset everything */
	506	uprv_memset(buffer, 0, length);
	507
	508	/* set the tailoring-specific values */
	509	UCATableHeader myData = (UCATableHeader )buffer;
	510	myData->size = length;
	511
	512	/* offset for the options, the only part of the data that is present after the header */
	513	myData->options = sizeof(UCATableHeader);
	514
	515	/* need to always set the expansion value for an upper bound of the options */
	516	myData->expansion = myData->options + sizeof(UColOptionSet);
	517
	518	myData->magic = UCOL_HEADER_MAGIC;
	519	myData->isBigEndian = U_IS_BIG_ENDIAN;
	520	myData->charSetFamily = U_CHARSET_FAMILY;
	521
	522	/* copy UCA's version; genrb will override all but the builder version with tailoring data */
	523	uprv_memcpy(myData->version, coll->image->version, sizeof(UVersionInfo));
	524
	525	uprv_memcpy(myData->UCAVersion, coll->image->UCAVersion, sizeof(UVersionInfo));
	526	uprv_memcpy(myData->UCDVersion, coll->image->UCDVersion, sizeof(UVersionInfo));
	527	uprv_memcpy(myData->formatVersion, coll->image->formatVersion, sizeof(UVersionInfo));
	528	myData->jamoSpecial = coll->image->jamoSpecial;
	529
	530	/* copy the collator options */
	531	uprv_memcpy(buffer+paddedsize(sizeof(UCATableHeader)), coll->options, sizeof(UColOptionSet));
	532	} else {
	533	*status = U_BUFFER_OVERFLOW_ERROR;
	534	}
	535	}
	536	return length;
	537	}
	538
	539	U_CAPI UCollator* U_EXPORT2
	540	ucol_safeClone(const UCollator coll, void stackBuffer, int32_t * pBufferSize, UErrorCode *status)
	541	{
	542	UCollator * localCollator;
	543	int32_t bufferSizeNeeded = (int32_t)sizeof(UCollator);
	544	char stackBufferChars = (char )stackBuffer;
	545	int32_t imageSize = 0;
	546	int32_t rulesSize = 0;
	547	int32_t rulesPadding = 0;
	548	int32_t defaultReorderCodesSize = 0;
	549	int32_t reorderCodesSize = 0;
	550	uint8_t *image;
	551	UChar *rules;
	552	int32_t* defaultReorderCodes;
	553	int32_t* reorderCodes;
	554	uint8_t* leadBytePermutationTable;
	555	UBool colAllocated = FALSE;
	556	UBool imageAllocated = FALSE;
	557
	558	if (status == NULL \|\| U_FAILURE(*status)){
	559	return 0;
	560	}
	561	if ((stackBuffer && !pBufferSize) \|\| !coll){
	562	*status = U_ILLEGAL_ARGUMENT_ERROR;
	563	return 0;
	564	}
	565
	566	if (coll->rules && coll->freeRulesOnClose) {
	567	rulesSize = (int32_t)(coll->rulesLength + 1)*sizeof(UChar);
	568	rulesPadding = (int32_t)(bufferSizeNeeded % sizeof(UChar));
	569	bufferSizeNeeded += rulesSize + rulesPadding;
	570	}
	571	// no padding for alignment needed from here since the next two are 4 byte quantities
	572	if (coll->defaultReorderCodes) {
	573	defaultReorderCodesSize = coll->defaultReorderCodesLength * sizeof(int32_t);
	574	bufferSizeNeeded += defaultReorderCodesSize;
	575	}
	576	if (coll->reorderCodes) {
	577	reorderCodesSize = coll->reorderCodesLength * sizeof(int32_t);
	578	bufferSizeNeeded += reorderCodesSize;
	579	}
	580	if (coll->leadBytePermutationTable) {
	581	bufferSizeNeeded += 256 * sizeof(uint8_t);
	582	}
	583
	584	if (stackBuffer && pBufferSize <= 0) { / 'preflighting' request - set needed size into pBufferSize /
	585	*pBufferSize = bufferSizeNeeded;
	586	return 0;
	587	}
	588
	589	/* Pointers on 64-bit platforms need to be aligned
	590	* on a 64-bit boundry in memory.
	591	*/
	592	if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
	593	int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
	594	if (*pBufferSize > offsetUp) {
	595	*pBufferSize -= offsetUp;
	596	stackBufferChars += offsetUp;
	597	}
	598	else {
	599	/* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
	600	*pBufferSize = 1;
	601	}
	602	}
	603	stackBuffer = (void *)stackBufferChars;
	604
	605	if (stackBuffer == NULL \|\| *pBufferSize < bufferSizeNeeded) {
	606	/* allocate one here...*/
	607	stackBufferChars = (char *)uprv_malloc(bufferSizeNeeded);
	608	// Null pointer check.
	609	if (stackBufferChars == NULL) {
	610	*status = U_MEMORY_ALLOCATION_ERROR;
	611	return NULL;
	612	}
	613	colAllocated = TRUE;
	614	if (U_SUCCESS(*status)) {
	615	*status = U_SAFECLONE_ALLOCATED_WARNING;
	616	}
	617	}
	618	localCollator = (UCollator *)stackBufferChars;
	619	rules = (UChar *)(stackBufferChars + sizeof(UCollator) + rulesPadding);
	620	defaultReorderCodes = (int32_t)((uint8_t)rules + rulesSize);
	621	reorderCodes = (int32_t)((uint8_t)defaultReorderCodes + defaultReorderCodesSize);
	622	leadBytePermutationTable = (uint8_t*)reorderCodes + reorderCodesSize;
	623
	624	{
	625	UErrorCode tempStatus = U_ZERO_ERROR;
	626	imageSize = ucol_cloneBinary(coll, NULL, 0, &tempStatus);
	627	}
	628	if (coll->freeImageOnClose) {
	629	image = (uint8_t *)uprv_malloc(imageSize);
	630	// Null pointer check
	631	if (image == NULL) {
	632	*status = U_MEMORY_ALLOCATION_ERROR;
	633	return NULL;
	634	}
	635	ucol_cloneBinary(coll, image, imageSize, status);
	636	imageAllocated = TRUE;
	637	}
	638	else {
	639	image = (uint8_t *)coll->image;
	640	}
	641	localCollator = ucol_initFromBinary(image, imageSize, coll->UCA, localCollator, status);
	642	if (U_FAILURE(*status)) {
	643	return NULL;
	644	}
	645
	646	if (coll->rules) {
	647	if (coll->freeRulesOnClose) {
	648	localCollator->rules = u_strcpy(rules, coll->rules);
	649	//bufferEnd += rulesSize;
	650	}
	651	else {
	652	localCollator->rules = coll->rules;
	653	}
	654	localCollator->freeRulesOnClose = FALSE;
	655	localCollator->rulesLength = coll->rulesLength;
	656	}
	657
	658	// collator reordering
	659	if (coll->defaultReorderCodes) {
	660	localCollator->defaultReorderCodes =
	661	(int32_t) uprv_memcpy(defaultReorderCodes, coll->defaultReorderCodes, coll->defaultReorderCodesLength sizeof(int32_t));
	662	localCollator->defaultReorderCodesLength = coll->defaultReorderCodesLength;
	663	localCollator->freeDefaultReorderCodesOnClose = FALSE;
	664	}
	665	if (coll->reorderCodes) {
	666	localCollator->reorderCodes =
	667	(int32_t)uprv_memcpy(reorderCodes, coll->reorderCodes, coll->reorderCodesLength sizeof(int32_t));
	668	localCollator->reorderCodesLength = coll->reorderCodesLength;
	669	localCollator->freeReorderCodesOnClose = FALSE;
	670	}
	671	if (coll->leadBytePermutationTable) {
	672	localCollator->leadBytePermutationTable =
	673	(uint8_t*) uprv_memcpy(leadBytePermutationTable, coll->leadBytePermutationTable, 256);
	674	localCollator->freeLeadBytePermutationTableOnClose = FALSE;
	675	}
	676
	677	int32_t i;
	678	for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
	679	ucol_setAttribute(localCollator, (UColAttribute)i, ucol_getAttribute(coll, (UColAttribute)i, status), status);
	680	}
	681	// zero copies of pointers
	682	localCollator->actualLocale = NULL;
	683	localCollator->validLocale = NULL;
	684	localCollator->requestedLocale = NULL;
	685	localCollator->ucaRules = coll->ucaRules; // There should only be one copy here.
	686	localCollator->freeOnClose = colAllocated;
	687	localCollator->freeImageOnClose = imageAllocated;
	688	return localCollator;
	689	}
	690
	691	U_CAPI void U_EXPORT2
	692	ucol_close(UCollator *coll)
	693	{
	694	UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE);
	695	UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll);
	696	if(coll != NULL) {
	697	// these are always owned by each UCollator struct,
	698	// so we always free them
	699	if(coll->validLocale != NULL) {
	700	uprv_free(coll->validLocale);
	701	}
	702	if(coll->actualLocale != NULL) {
	703	uprv_free(coll->actualLocale);
	704	}
	705	if(coll->requestedLocale != NULL) {
	706	uprv_free(coll->requestedLocale);
	707	}
	708	if(coll->latinOneCEs != NULL) {
	709	uprv_free(coll->latinOneCEs);
	710	}
	711	if(coll->options != NULL && coll->freeOptionsOnClose) {
	712	uprv_free(coll->options);
	713	}
	714	if(coll->rules != NULL && coll->freeRulesOnClose) {
	715	uprv_free((UChar *)coll->rules);
	716	}
	717	if(coll->image != NULL && coll->freeImageOnClose) {
	718	uprv_free((UCATableHeader *)coll->image);
	719	}
	720
	721	if(coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) {
	722	uprv_free(coll->leadBytePermutationTable);
	723	}
	724	if(coll->defaultReorderCodes != NULL && coll->freeDefaultReorderCodesOnClose == TRUE) {
	725	uprv_free(coll->defaultReorderCodes);
	726	}
	727	if(coll->reorderCodes != NULL && coll->freeReorderCodesOnClose == TRUE) {
	728	uprv_free(coll->reorderCodes);
	729	}
	730
	731	if(coll->delegate != NULL) {
	732	delete (Collator*)coll->delegate;
	733	}
	734
	735	/* Here, it would be advisable to close: */
	736	/* - UData for UCA (unless we stuff it in the root resb */
	737	/* Again, do we need additional housekeeping... HMMM! */
	738	UTRACE_DATA1(UTRACE_INFO, "coll->freeOnClose: %d", coll->freeOnClose);
	739	if(coll->freeOnClose){
	740	/* for safeClone, if freeOnClose is FALSE,
	741	don't free the other instance data */
	742	uprv_free(coll);
	743	}
	744	}
	745	UTRACE_EXIT();
	746	}
	747
	748	/* This one is currently used by genrb & tests. After constructing from rules (tailoring),*/
	749	/* you should be able to get the binary chunk to write out... Doesn't look very full now */
	750	U_CFUNC uint8_t* U_EXPORT2
	751	ucol_cloneRuleData(const UCollator coll, int32_t length, UErrorCode *status)
	752	{
	753	uint8_t *result = NULL;
	754	if(U_FAILURE(*status)) {
	755	return NULL;
	756	}
	757	if(coll->hasRealData == TRUE) {
	758	*length = coll->image->size;
	759	result = (uint8_t )uprv_malloc(length);
	760	/* test for NULL */
	761	if (result == NULL) {
	762	*status = U_MEMORY_ALLOCATION_ERROR;
	763	return NULL;
	764	}
	765	uprv_memcpy(result, coll->image, *length);
	766	} else {
	767	*length = (int32_t)(paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet)));
	768	result = (uint8_t )uprv_malloc(length);
	769	/* test for NULL */
	770	if (result == NULL) {
	771	*status = U_MEMORY_ALLOCATION_ERROR;
	772	return NULL;
	773	}
	774
	775	/* build the UCATableHeader with minimal entries */
	776	/* do not copy the header from the UCA file because its values are wrong! */
	777	/* uprv_memcpy(result, UCA->image, sizeof(UCATableHeader)); */
	778
	779	/* reset everything */
	780	uprv_memset(result, 0, *length);
	781
	782	/* set the tailoring-specific values */
	783	UCATableHeader myData = (UCATableHeader )result;
	784	myData->size = *length;
	785
	786	/* offset for the options, the only part of the data that is present after the header */
	787	myData->options = sizeof(UCATableHeader);
	788
	789	/* need to always set the expansion value for an upper bound of the options */
	790	myData->expansion = myData->options + sizeof(UColOptionSet);
	791
	792	myData->magic = UCOL_HEADER_MAGIC;
	793	myData->isBigEndian = U_IS_BIG_ENDIAN;
	794	myData->charSetFamily = U_CHARSET_FAMILY;
	795
	796	/* copy UCA's version; genrb will override all but the builder version with tailoring data */
	797	uprv_memcpy(myData->version, coll->image->version, sizeof(UVersionInfo));
	798
	799	uprv_memcpy(myData->UCAVersion, coll->image->UCAVersion, sizeof(UVersionInfo));
	800	uprv_memcpy(myData->UCDVersion, coll->image->UCDVersion, sizeof(UVersionInfo));
	801	uprv_memcpy(myData->formatVersion, coll->image->formatVersion, sizeof(UVersionInfo));
	802	myData->jamoSpecial = coll->image->jamoSpecial;
	803
	804	/* copy the collator options */
	805	uprv_memcpy(result+paddedsize(sizeof(UCATableHeader)), coll->options, sizeof(UColOptionSet));
	806	}
	807	return result;
	808	}
	809
	810	void ucol_setOptionsFromHeader(UCollator* result, UColOptionSet * opts, UErrorCode *status) {
	811	if(U_FAILURE(*status)) {
	812	return;
	813	}
	814	result->caseFirst = (UColAttributeValue)opts->caseFirst;
	815	result->caseLevel = (UColAttributeValue)opts->caseLevel;
	816	result->frenchCollation = (UColAttributeValue)opts->frenchCollation;
	817	result->normalizationMode = (UColAttributeValue)opts->normalizationMode;
	818	if(result->normalizationMode == UCOL_ON && !initializeFCD(status)) {
	819	return;
	820	}
	821	result->strength = (UColAttributeValue)opts->strength;
	822	result->variableTopValue = opts->variableTopValue;
	823	result->alternateHandling = (UColAttributeValue)opts->alternateHandling;
	824	result->hiraganaQ = (UColAttributeValue)opts->hiraganaQ;
	825	result->numericCollation = (UColAttributeValue)opts->numericCollation;
	826	result->caseFirstisDefault = TRUE;
	827	result->caseLevelisDefault = TRUE;
	828	result->frenchCollationisDefault = TRUE;
	829	result->normalizationModeisDefault = TRUE;
	830	result->strengthisDefault = TRUE;
	831	result->variableTopValueisDefault = TRUE;
	832	result->alternateHandlingisDefault = TRUE;
	833	result->hiraganaQisDefault = TRUE;
	834	result->numericCollationisDefault = TRUE;
	835
	836	ucol_updateInternalState(result, status);
	837
	838	result->options = opts;
	839	}
	840
	841
	842	/**
	843	* Approximate determination if a character is at a contraction end.
	844	* Guaranteed to be TRUE if a character is at the end of a contraction,
	845	* otherwise it is not deterministic.
	846	* @param c character to be determined
	847	* @param coll collator
	848	*/
	849	static
	850	inline UBool ucol_contractionEndCP(UChar c, const UCollator *coll) {
	851	if (c < coll->minContrEndCP) {
	852	return FALSE;
	853	}
	854
	855	int32_t hash = c;
	856	uint8_t htbyte;
	857	if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) {
	858	if (U16_IS_TRAIL(c)) {
	859	return TRUE;
	860	}
	861	hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256;
	862	}
	863	htbyte = coll->contrEndCP[hash>>3];
	864	return (((htbyte >> (hash & 7)) & 1) == 1);
	865	}
	866
	867
	868
	869	/*
	870	* i_getCombiningClass()
	871	* A fast, at least partly inline version of u_getCombiningClass()
	872	* This is a candidate for further optimization. Used heavily
	873	* in contraction processing.
	874	*/
	875	static
	876	inline uint8_t i_getCombiningClass(UChar32 c, const UCollator *coll) {
	877	uint8_t sCC = 0;
	878	if ((c >= 0x300 && ucol_unsafeCP(c, coll)) \|\| c > 0xFFFF) {
	879	sCC = u_getCombiningClass(c);
	880	}
	881	return sCC;
	882	}
	883
	884	UCollator* ucol_initCollator(const UCATableHeader image, UCollator fillIn, const UCollator UCA, UErrorCode status) {
	885	UChar c;
	886	UCollator *result = fillIn;
	887	if(U_FAILURE(*status) \|\| image == NULL) {
	888	return NULL;
	889	}
	890
	891	if(result == NULL) {
	892	result = (UCollator *)uprv_malloc(sizeof(UCollator));
	893	if(result == NULL) {
	894	*status = U_MEMORY_ALLOCATION_ERROR;
	895	return result;
	896	}
	897	result->freeOnClose = TRUE;
	898	} else {
	899	result->freeOnClose = FALSE;
	900	}
	901
	902	result->delegate = NULL;
	903
	904	result->image = image;
	905	result->mapping.getFoldingOffset = _getFoldingOffset;
	906	const uint8_t mapping = (uint8_t)result->image+result->image->mappingPosition;
	907	utrie_unserialize(&result->mapping, mapping, result->image->endExpansionCE - result->image->mappingPosition, status);
	908	if(U_FAILURE(*status)) {
	909	if(result->freeOnClose == TRUE) {
	910	uprv_free(result);
	911	result = NULL;
	912	}
	913	return result;
	914	}
	915
	916	result->latinOneMapping = UTRIE_GET32_LATIN1(&result->mapping);
	917	result->contractionCEs = (uint32_t)((uint8_t)result->image+result->image->contractionCEs);
	918	result->contractionIndex = (UChar)((uint8_t)result->image+result->image->contractionIndex);
	919	result->expansion = (uint32_t)((uint8_t)result->image+result->image->expansion);
	920	result->rules = NULL;
	921	result->rulesLength = 0;
	922	result->freeRulesOnClose = FALSE;
	923	result->defaultReorderCodes = NULL;
	924	result->defaultReorderCodesLength = 0;
	925	result->freeDefaultReorderCodesOnClose = FALSE;
	926	result->reorderCodes = NULL;
	927	result->reorderCodesLength = 0;
	928	result->freeReorderCodesOnClose = FALSE;
	929	result->leadBytePermutationTable = NULL;
	930	result->freeLeadBytePermutationTableOnClose = FALSE;
	931
	932	/* get the version info from UCATableHeader and populate the Collator struct*/
	933	result->dataVersion[0] = result->image->version[0]; /* UCA Builder version*/
	934	result->dataVersion[1] = result->image->version[1]; /* UCA Tailoring rules version*/
	935	result->dataVersion[2] = 0;
	936	result->dataVersion[3] = 0;
	937
	938	result->unsafeCP = (uint8_t *)result->image + result->image->unsafeCP;
	939	result->minUnsafeCP = 0;
	940	for (c=0; c<0x300; c++) { // Find the smallest unsafe char.
	941	if (ucol_unsafeCP(c, result)) break;
	942	}
	943	result->minUnsafeCP = c;
	944
	945	result->contrEndCP = (uint8_t *)result->image + result->image->contrEndCP;
	946	result->minContrEndCP = 0;
	947	for (c=0; c<0x300; c++) { // Find the Contraction-ending char.
	948	if (ucol_contractionEndCP(c, result)) break;
	949	}
	950	result->minContrEndCP = c;
	951
	952	/* max expansion tables */
	953	result->endExpansionCE = (uint32_t)((uint8_t)result->image +
	954	result->image->endExpansionCE);
	955	result->lastEndExpansionCE = result->endExpansionCE +
	956	result->image->endExpansionCECount - 1;
	957	result->expansionCESize = (uint8_t*)result->image +
	958	result->image->expansionCESize;
	959
	960
	961	//result->errorCode = *status;
	962
	963	result->latinOneCEs = NULL;
	964
	965	result->latinOneRegenTable = FALSE;
	966	result->latinOneFailed = FALSE;
	967	result->UCA = UCA;
	968
	969	/* Normally these will be set correctly later. This is the default if you use UCA or the default. */
	970	result->ucaRules = NULL;
	971	result->actualLocale = NULL;
	972	result->validLocale = NULL;
	973	result->requestedLocale = NULL;
	974	result->hasRealData = FALSE; // real data lives in .dat file...
	975	result->freeImageOnClose = FALSE;
	976
	977	/* set attributes */
	978	ucol_setOptionsFromHeader(
	979	result,
	980	(UColOptionSet)((uint8_t)result->image+result->image->options),
	981	status);
	982	result->freeOptionsOnClose = FALSE;
	983
	984	return result;
	985	}
	986
	987	/* new Mark's code */
	988
	989	/**
	990	* For generation of Implicit CEs
	991	* @author Davis
	992	*
	993	* Cleaned up so that changes can be made more easily.
	994	* Old values:
	995	# First Implicit: E26A792D
	996	# Last Implicit: E3DC70C0
	997	# First CJK: E0030300
	998	# Last CJK: E0A9DD00
	999	# First CJK_A: E0A9DF00
	1000	# Last CJK_A: E0DE3100
	1001	*/
	1002	/* Following is a port of Mark's code for new treatment of implicits.
	1003	* It is positioned here, since ucol_initUCA need to initialize the
	1004	* variables below according to the data in the fractional UCA.
	1005	*/
	1006
	1007	/**
	1008	* Function used to:
	1009	* a) collapse the 2 different Han ranges from UCA into one (in the right order), and
	1010	* b) bump any non-CJK characters by 10FFFF.
	1011	* The relevant blocks are:
	1012	* A: 4E00..9FFF; CJK Unified Ideographs
	1013	* F900..FAFF; CJK Compatibility Ideographs
	1014	* B: 3400..4DBF; CJK Unified Ideographs Extension A
	1015	* 20000..XX; CJK Unified Ideographs Extension B (and others later on)
	1016	* As long as
	1017	* no new B characters are allocated between 4E00 and FAFF, and
	1018	* no new A characters are outside of this range,
	1019	* (very high probability) this simple code will work.
	1020	* The reordered blocks are:
	1021	* Block1 is CJK
	1022	* Block2 is CJK_COMPAT_USED
	1023	* Block3 is CJK_A
	1024	* (all contiguous)
	1025	* Any other CJK gets its normal code point
	1026	* Any non-CJK gets +10FFFF
	1027	* When we reorder Block1, we make sure that it is at the very start,
	1028	* so that it will use a 3-byte form.
	1029	* Warning: the we only pick up the compatibility characters that are
	1030	* NOT decomposed, so that block is smaller!
	1031	*/
	1032
	1033	// CONSTANTS
	1034	static const UChar32
	1035	NON_CJK_OFFSET = 0x110000,
	1036	UCOL_MAX_INPUT = 0x220001; // 2 * Unicode range + 2
	1037
	1038	/**
	1039	* Precomputed by initImplicitConstants()
	1040	*/
	1041	static int32_t
	1042	final3Multiplier = 0,
	1043	final4Multiplier = 0,
	1044	final3Count = 0,
	1045	final4Count = 0,
	1046	medialCount = 0,
	1047	min3Primary = 0,
	1048	min4Primary = 0,
	1049	max4Primary = 0,
	1050	minTrail = 0,
	1051	maxTrail = 0,
	1052	max3Trail = 0,
	1053	max4Trail = 0,
	1054	min4Boundary = 0;
	1055
	1056	static const UChar32
	1057	// 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;
	1058	// 9FCC;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;; (Unicode 6.1)
	1059	CJK_BASE = 0x4E00,
	1060	CJK_LIMIT = 0x9FCC+1,
	1061	// Unified CJK ideographs in the compatibility ideographs block.
	1062	CJK_COMPAT_USED_BASE = 0xFA0E,
	1063	CJK_COMPAT_USED_LIMIT = 0xFA2F+1,
	1064	// 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
	1065	// 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
	1066	CJK_A_BASE = 0x3400,
	1067	CJK_A_LIMIT = 0x4DB5+1,
	1068	// 20000;<CJK Ideograph Extension B, First>;Lo;0;L;;;;;N;;;;;
	1069	// 2A6D6;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;;
	1070	CJK_B_BASE = 0x20000,
	1071	CJK_B_LIMIT = 0x2A6D6+1,
	1072	// 2A700;<CJK Ideograph Extension C, First>;Lo;0;L;;;;;N;;;;;
	1073	// 2B734;<CJK Ideograph Extension C, Last>;Lo;0;L;;;;;N;;;;;
	1074	CJK_C_BASE = 0x2A700,
	1075	CJK_C_LIMIT = 0x2B734+1,
	1076	// 2B740;<CJK Ideograph Extension D, First>;Lo;0;L;;;;;N;;;;;
	1077	// 2B81D;<CJK Ideograph Extension D, Last>;Lo;0;L;;;;;N;;;;;
	1078	CJK_D_BASE = 0x2B740,
	1079	CJK_D_LIMIT = 0x2B81D+1;
	1080	// when adding to this list, look for all occurrences (in project)
	1081	// of CJK_C_BASE and CJK_C_LIMIT, etc. to check for code that needs changing!!!!
	1082
	1083	static UChar32 swapCJK(UChar32 i) {
	1084	if (i < CJK_A_BASE) {
	1085	// non-CJK
	1086	} else if (i < CJK_A_LIMIT) {
	1087	// Extension A has lower code points than the original Unihan+compat
	1088	// but sorts higher.
	1089	return i - CJK_A_BASE
	1090	+ (CJK_LIMIT - CJK_BASE)
	1091	+ (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
	1092	} else if (i < CJK_BASE) {
	1093	// non-CJK
	1094	} else if (i < CJK_LIMIT) {
	1095	return i - CJK_BASE;
	1096	} else if (i < CJK_COMPAT_USED_BASE) {
	1097	// non-CJK
	1098	} else if (i < CJK_COMPAT_USED_LIMIT) {
	1099	return i - CJK_COMPAT_USED_BASE
	1100	+ (CJK_LIMIT - CJK_BASE);
	1101	} else if (i < CJK_B_BASE) {
	1102	// non-CJK
	1103	} else if (i < CJK_B_LIMIT) {
	1104	return i; // non-BMP-CJK
	1105	} else if (i < CJK_C_BASE) {
	1106	// non-CJK
	1107	} else if (i < CJK_C_LIMIT) {
	1108	return i; // non-BMP-CJK
	1109	} else if (i < CJK_D_BASE) {
	1110	// non-CJK
	1111	} else if (i < CJK_D_LIMIT) {
	1112	return i; // non-BMP-CJK
	1113	}
	1114	return i + NON_CJK_OFFSET; // non-CJK
	1115	}
	1116
	1117	U_CAPI UChar32 U_EXPORT2
	1118	uprv_uca_getRawFromCodePoint(UChar32 i) {
	1119	return swapCJK(i)+1;
	1120	}
	1121
	1122	U_CAPI UChar32 U_EXPORT2
	1123	uprv_uca_getCodePointFromRaw(UChar32 i) {
	1124	i--;
	1125	UChar32 result = 0;
	1126	if(i >= NON_CJK_OFFSET) {
	1127	result = i - NON_CJK_OFFSET;
	1128	} else if(i >= CJK_B_BASE) {
	1129	result = i;
	1130	} else if(i < CJK_A_LIMIT + (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) { // rest of CJKs, compacted
	1131	if(i < CJK_LIMIT - CJK_BASE) {
	1132	result = i + CJK_BASE;
	1133	} else if(i < (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) {
	1134	result = i + CJK_COMPAT_USED_BASE - (CJK_LIMIT - CJK_BASE);
	1135	} else {
	1136	result = i + CJK_A_BASE - (CJK_LIMIT - CJK_BASE) - (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
	1137	}
	1138	} else {
	1139	result = -1;
	1140	}
	1141	return result;
	1142	}
	1143
	1144	// GET IMPLICIT PRIMARY WEIGHTS
	1145	// Return value is left justified primary key
	1146	U_CAPI uint32_t U_EXPORT2
	1147	uprv_uca_getImplicitFromRaw(UChar32 cp) {
	1148	/*
	1149	if (cp < 0 \|\| cp > UCOL_MAX_INPUT) {
	1150	throw new IllegalArgumentException("Code point out of range " + Utility.hex(cp));
	1151	}
	1152	*/
	1153	int32_t last0 = cp - min4Boundary;
	1154	if (last0 < 0) {
	1155	int32_t last1 = cp / final3Count;
	1156	last0 = cp % final3Count;
	1157
	1158	int32_t last2 = last1 / medialCount;
	1159	last1 %= medialCount;
	1160
	1161	last0 = minTrail + last0*final3Multiplier; // spread out, leaving gap at start
	1162	last1 = minTrail + last1; // offset
	1163	last2 = min3Primary + last2; // offset
	1164	/*
	1165	if (last2 >= min4Primary) {
	1166	throw new IllegalArgumentException("4-byte out of range: " + Utility.hex(cp) + ", " + Utility.hex(last2));
	1167	}
	1168	*/
	1169	return (last2 << 24) + (last1 << 16) + (last0 << 8);
	1170	} else {
	1171	int32_t last1 = last0 / final4Count;
	1172	last0 %= final4Count;
	1173
	1174	int32_t last2 = last1 / medialCount;
	1175	last1 %= medialCount;
	1176
	1177	int32_t last3 = last2 / medialCount;
	1178	last2 %= medialCount;
	1179
	1180	last0 = minTrail + last0*final4Multiplier; // spread out, leaving gap at start
	1181	last1 = minTrail + last1; // offset
	1182	last2 = minTrail + last2; // offset
	1183	last3 = min4Primary + last3; // offset
	1184	/*
	1185	if (last3 > max4Primary) {
	1186	throw new IllegalArgumentException("4-byte out of range: " + Utility.hex(cp) + ", " + Utility.hex(last3));
	1187	}
	1188	*/
	1189	return (last3 << 24) + (last2 << 16) + (last1 << 8) + last0;
	1190	}
	1191	}
	1192
	1193	static uint32_t U_EXPORT2
	1194	uprv_uca_getImplicitPrimary(UChar32 cp) {
	1195	//fprintf(stdout, "Incoming: %04x\n", cp);
	1196	//if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp));
	1197
	1198	cp = swapCJK(cp);
	1199	cp++;
	1200	// we now have a range of numbers from 0 to 21FFFF.
	1201
	1202	//if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));
	1203	//fprintf(stdout, "CJK swapped: %04x\n", cp);
	1204
	1205	return uprv_uca_getImplicitFromRaw(cp);
	1206	}
	1207
	1208	/**
	1209	* Converts implicit CE into raw integer ("code point")
	1210	* @param implicit
	1211	* @return -1 if illegal format
	1212	*/
	1213	U_CAPI UChar32 U_EXPORT2
	1214	uprv_uca_getRawFromImplicit(uint32_t implicit) {
	1215	UChar32 result;
	1216	UChar32 b3 = implicit & 0xFF;
	1217	UChar32 b2 = (implicit >> 8) & 0xFF;
	1218	UChar32 b1 = (implicit >> 16) & 0xFF;
	1219	UChar32 b0 = (implicit >> 24) & 0xFF;
	1220
	1221	// simple parameter checks
	1222	if (b0 < min3Primary \|\| b0 > max4Primary
	1223	\|\| b1 < minTrail \|\| b1 > maxTrail)
	1224	return -1;
	1225	// normal offsets
	1226	b1 -= minTrail;
	1227
	1228	// take care of the final values, and compose
	1229	if (b0 < min4Primary) {
	1230	if (b2 < minTrail \|\| b2 > max3Trail \|\| b3 != 0)
	1231	return -1;
	1232	b2 -= minTrail;
	1233	UChar32 remainder = b2 % final3Multiplier;
	1234	if (remainder != 0)
	1235	return -1;
	1236	b0 -= min3Primary;
	1237	b2 /= final3Multiplier;
	1238	result = ((b0 * medialCount) + b1) * final3Count + b2;
	1239	} else {
	1240	if (b2 < minTrail \|\| b2 > maxTrail
	1241	\|\| b3 < minTrail \|\| b3 > max4Trail)
	1242	return -1;
	1243	b2 -= minTrail;
	1244	b3 -= minTrail;
	1245	UChar32 remainder = b3 % final4Multiplier;
	1246	if (remainder != 0)
	1247	return -1;
	1248	b3 /= final4Multiplier;
	1249	b0 -= min4Primary;
	1250	result = (((b0 * medialCount) + b1) * medialCount + b2) * final4Count + b3 + min4Boundary;
	1251	}
	1252	// final check
	1253	if (result < 0 \|\| result > UCOL_MAX_INPUT)
	1254	return -1;
	1255	return result;
	1256	}
	1257
	1258
	1259	static inline int32_t divideAndRoundUp(int a, int b) {
	1260	return 1 + (a-1)/b;
	1261	}
	1262
	1263	/* this function is either called from initUCA or from genUCA before
	1264	* doing canonical closure for the UCA.
	1265	*/
	1266
	1267	/**
	1268	* Set up to generate implicits.
	1269	* Maintenance Note: this function may end up being called more than once, due
	1270	* to threading races during initialization. Make sure that
	1271	* none of the Constants is ever transiently assigned an
	1272	* incorrect value.
	1273	* @param minPrimary
	1274	* @param maxPrimary
	1275	* @param minTrail final byte
	1276	* @param maxTrail final byte
	1277	* @param gap3 the gap we leave for tailoring for 3-byte forms
	1278	* @param gap4 the gap we leave for tailoring for 4-byte forms
	1279	*/
	1280	static void initImplicitConstants(int minPrimary, int maxPrimary,
	1281	int minTrailIn, int maxTrailIn,
	1282	int gap3, int primaries3count,
	1283	UErrorCode *status) {
	1284	// some simple parameter checks
	1285	if ((minPrimary < 0 \|\| minPrimary >= maxPrimary \|\| maxPrimary > 0xFF)
	1286	\|\| (minTrailIn < 0 \|\| minTrailIn >= maxTrailIn \|\| maxTrailIn > 0xFF)
	1287	\|\| (primaries3count < 1))
	1288	{
	1289	*status = U_ILLEGAL_ARGUMENT_ERROR;
	1290	return;
	1291	};
	1292
	1293	minTrail = minTrailIn;
	1294	maxTrail = maxTrailIn;
	1295
	1296	min3Primary = minPrimary;
	1297	max4Primary = maxPrimary;
	1298	// compute constants for use later.
	1299	// number of values we can use in trailing bytes
	1300	// leave room for empty values between AND above, e.g. if gap = 2
	1301	// range 3..7 => +3 -4 -5 -6 -7: so 1 value
	1302	// range 3..8 => +3 -4 -5 +6 -7 -8: so 2 values
	1303	// range 3..9 => +3 -4 -5 +6 -7 -8 -9: so 2 values
	1304	final3Multiplier = gap3 + 1;
	1305	final3Count = (maxTrail - minTrail + 1) / final3Multiplier;
	1306	max3Trail = minTrail + (final3Count - 1) * final3Multiplier;
	1307
	1308	// medials can use full range
	1309	medialCount = (maxTrail - minTrail + 1);
	1310	// find out how many values fit in each form
	1311	int32_t threeByteCount = medialCount * final3Count;
	1312	// now determine where the 3/4 boundary is.
	1313	// we use 3 bytes below the boundary, and 4 above
	1314	int32_t primariesAvailable = maxPrimary - minPrimary + 1;
	1315	int32_t primaries4count = primariesAvailable - primaries3count;
	1316
	1317
	1318	int32_t min3ByteCoverage = primaries3count * threeByteCount;
	1319	min4Primary = minPrimary + primaries3count;
	1320	min4Boundary = min3ByteCoverage;
	1321	// Now expand out the multiplier for the 4 bytes, and redo.
	1322
	1323	int32_t totalNeeded = UCOL_MAX_INPUT - min4Boundary;
	1324	int32_t neededPerPrimaryByte = divideAndRoundUp(totalNeeded, primaries4count);
	1325	int32_t neededPerFinalByte = divideAndRoundUp(neededPerPrimaryByte, medialCount * medialCount);
	1326	int32_t gap4 = (maxTrail - minTrail - 1) / neededPerFinalByte;
	1327	if (gap4 < 1) {
	1328	*status = U_ILLEGAL_ARGUMENT_ERROR;
	1329	return;
	1330	}
	1331	final4Multiplier = gap4 + 1;
	1332	final4Count = neededPerFinalByte;
	1333	max4Trail = minTrail + (final4Count - 1) * final4Multiplier;
	1334	}
	1335
	1336	/**
	1337	* Supply parameters for generating implicit CEs
	1338	*/
	1339	U_CAPI void U_EXPORT2
	1340	uprv_uca_initImplicitConstants(UErrorCode *status) {
	1341	// 13 is the largest 4-byte gap we can use without getting 2 four-byte forms.
	1342	//initImplicitConstants(minPrimary, maxPrimary, 0x04, 0xFE, 1, 1, status);
	1343	initImplicitConstants(minImplicitPrimary, maxImplicitPrimary, 0x04, 0xFE, 1, 1, status);
	1344	}
	1345
	1346
	1347	/* collIterNormalize Incremental Normalization happens here. */
	1348	/* pick up the range of chars identifed by FCD, */
	1349	/* normalize it into the collIterate's writable buffer, */
	1350	/* switch the collIterate's state to use the writable buffer. */
	1351	/* */
	1352	static
	1353	void collIterNormalize(collIterate *collationSource)
	1354	{
	1355	UErrorCode status = U_ZERO_ERROR;
	1356	const UChar srcP = collationSource->pos - 1; / Start of chars to normalize */
	1357	const UChar endP = collationSource->fcdPosition; / End of region to normalize+1 */
	1358
	1359	collationSource->nfd->normalize(UnicodeString(FALSE, srcP, (int32_t)(endP - srcP)),
	1360	collationSource->writableBuffer,
	1361	status);
	1362	if (U_FAILURE(status)) {
	1363	#ifdef UCOL_DEBUG
	1364	fprintf(stderr, "collIterNormalize(), NFD failed, status = %s\n", u_errorName(status));
	1365	#endif
	1366	return;
	1367	}
	1368
	1369	collationSource->pos = collationSource->writableBuffer.getTerminatedBuffer();
	1370	collationSource->origFlags = collationSource->flags;
	1371	collationSource->flags \|= UCOL_ITER_INNORMBUF;
	1372	collationSource->flags &= ~(UCOL_ITER_NORM \| UCOL_ITER_HASLEN \| UCOL_USE_ITERATOR);
	1373	}
	1374
	1375
	1376	// This function takes the iterator and extracts normalized stuff up to the next boundary
	1377	// It is similar in the end results to the collIterNormalize, but for the cases when we
	1378	// use an iterator
	1379	/*static
	1380	inline void normalizeIterator(collIterate *collationSource) {
	1381	UErrorCode status = U_ZERO_ERROR;
	1382	UBool wasNormalized = FALSE;
	1383	//int32_t iterIndex = collationSource->iterator->getIndex(collationSource->iterator, UITER_CURRENT);
	1384	uint32_t iterIndex = collationSource->iterator->getState(collationSource->iterator);
	1385	int32_t normLen = unorm_next(collationSource->iterator, collationSource->writableBuffer,
	1386	(int32_t)collationSource->writableBufSize, UNORM_FCD, 0, TRUE, &wasNormalized, &status);
	1387	if(status == U_BUFFER_OVERFLOW_ERROR \|\| normLen == (int32_t)collationSource->writableBufSize) {
	1388	// reallocate and terminate
	1389	if(!u_growBufferFromStatic(collationSource->stackWritableBuffer,
	1390	&collationSource->writableBuffer,
	1391	(int32_t *)&collationSource->writableBufSize, normLen + 1,
	1392	0)
	1393	) {
	1394	#ifdef UCOL_DEBUG
	1395	fprintf(stderr, "normalizeIterator(), out of memory\n");
	1396	#endif
	1397	return;
	1398	}
	1399	status = U_ZERO_ERROR;
	1400	//collationSource->iterator->move(collationSource->iterator, iterIndex, UITER_ZERO);
	1401	collationSource->iterator->setState(collationSource->iterator, iterIndex, &status);
	1402	normLen = unorm_next(collationSource->iterator, collationSource->writableBuffer,
	1403	(int32_t)collationSource->writableBufSize, UNORM_FCD, 0, TRUE, &wasNormalized, &status);
	1404	}
	1405	// Terminate the buffer - we already checked that it is big enough
	1406	collationSource->writableBuffer[normLen] = 0;
	1407	if(collationSource->writableBuffer != collationSource->stackWritableBuffer) {
	1408	collationSource->flags \|= UCOL_ITER_ALLOCATED;
	1409	}
	1410	collationSource->pos = collationSource->writableBuffer;
	1411	collationSource->origFlags = collationSource->flags;
	1412	collationSource->flags \|= UCOL_ITER_INNORMBUF;
	1413	collationSource->flags &= ~(UCOL_ITER_NORM \| UCOL_ITER_HASLEN \| UCOL_USE_ITERATOR);
	1414	}*/
	1415
	1416
	1417	/* Incremental FCD check and normalize */
	1418	/* Called from getNextCE when normalization state is suspect. */
	1419	/* When entering, the state is known to be this: */
	1420	/* o We are working in the main buffer of the collIterate, not the side */
	1421	/* writable buffer. When in the side buffer, normalization mode is always off, */
	1422	/* so we won't get here. */
	1423	/* o The leading combining class from the current character is 0 or */
	1424	/* the trailing combining class of the previous char was zero. */
	1425	/* True because the previous call to this function will have always exited */
	1426	/* that way, and we get called for every char where cc might be non-zero. */
	1427	static
	1428	inline UBool collIterFCD(collIterate *collationSource) {
	1429	const UChar srcP, endP;
	1430	uint8_t leadingCC;
	1431	uint8_t prevTrailingCC = 0;
	1432	uint16_t fcd;
	1433	UBool needNormalize = FALSE;
	1434
	1435	srcP = collationSource->pos-1;
	1436
	1437	if (collationSource->flags & UCOL_ITER_HASLEN) {
	1438	endP = collationSource->endp;
	1439	} else {
	1440	endP = NULL;
	1441	}
	1442
	1443	// Get the trailing combining class of the current character. If it's zero, we are OK.
	1444	fcd = g_nfcImpl->nextFCD16(srcP, endP);
	1445	if (fcd != 0) {
	1446	prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
	1447
	1448	if (prevTrailingCC != 0) {
	1449	// The current char has a non-zero trailing CC. Scan forward until we find
	1450	// a char with a leading cc of zero.
	1451	while (endP == NULL \|\| srcP != endP)
	1452	{
	1453	const UChar *savedSrcP = srcP;
	1454
	1455	fcd = g_nfcImpl->nextFCD16(srcP, endP);
	1456	leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
	1457	if (leadingCC == 0) {
	1458	srcP = savedSrcP; // Hit char that is not part of combining sequence.
	1459	// back up over it. (Could be surrogate pair!)
	1460	break;
	1461	}
	1462
	1463	if (leadingCC < prevTrailingCC) {
	1464	needNormalize = TRUE;
	1465	}
	1466
	1467	prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
	1468	}
	1469	}
	1470	}
	1471
	1472	collationSource->fcdPosition = (UChar *)srcP;
	1473
	1474	return needNormalize;
	1475	}
	1476
	1477	/****************************************************************************/
	1478	/* Following are the CE retrieval functions */
	1479	/* */
	1480	/****************************************************************************/
	1481
	1482	static uint32_t getImplicit(UChar32 cp, collIterate *collationSource);
	1483	static uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource);
	1484
	1485	/* there should be a macro version of this function in the header file */
	1486	/* This is the first function that tries to fetch a collation element */
	1487	/* If it's not succesfull or it encounters a more difficult situation */
	1488	/* some more sofisticated and slower functions are invoked */
	1489	static
	1490	inline uint32_t ucol_IGetNextCE(const UCollator coll, collIterate collationSource, UErrorCode *status) {
	1491	uint32_t order = 0;
	1492	if (collationSource->CEpos > collationSource->toReturn) { /* Are there any CEs from previous expansions? */
	1493	order = (collationSource->toReturn++); / if so, return them */
	1494	if(collationSource->CEpos == collationSource->toReturn) {
	1495	collationSource->CEpos = collationSource->toReturn = collationSource->extendCEs ? collationSource->extendCEs : collationSource->CEs;
	1496	}
	1497	return order;
	1498	}
	1499
	1500	UChar ch = 0;
	1501	collationSource->offsetReturn = NULL;
	1502
	1503	do {
	1504	for (;;) /* Loop handles case when incremental normalize switches */
	1505	{ /* to or from the side buffer / original string, and we */
	1506	/* need to start again to get the next character. */
	1507
	1508	if ((collationSource->flags & (UCOL_ITER_HASLEN \| UCOL_ITER_INNORMBUF \| UCOL_ITER_NORM \| UCOL_HIRAGANA_Q \| UCOL_USE_ITERATOR)) == 0)
	1509	{
	1510	// The source string is null terminated and we're not working from the side buffer,
	1511	// and we're not normalizing. This is the fast path.
	1512	// (We can be in the side buffer for Thai pre-vowel reordering even when not normalizing.)
	1513	ch = *collationSource->pos++;
	1514	if (ch != 0) {
	1515	break;
	1516	}
	1517	else {
	1518	return UCOL_NO_MORE_CES;
	1519	}
	1520	}
	1521
	1522	if (collationSource->flags & UCOL_ITER_HASLEN) {
	1523	// Normal path for strings when length is specified.
	1524	// (We can't be in side buffer because it is always null terminated.)
	1525	if (collationSource->pos >= collationSource->endp) {
	1526	// Ran off of the end of the main source string. We're done.
	1527	return UCOL_NO_MORE_CES;
	1528	}
	1529	ch = *collationSource->pos++;
	1530	}
	1531	else if(collationSource->flags & UCOL_USE_ITERATOR) {
	1532	UChar32 iterCh = collationSource->iterator->next(collationSource->iterator);
	1533	if(iterCh == U_SENTINEL) {
	1534	return UCOL_NO_MORE_CES;
	1535	}
	1536	ch = (UChar)iterCh;
	1537	}
	1538	else
	1539	{
	1540	// Null terminated string.
	1541	ch = *collationSource->pos++;
	1542	if (ch == 0) {
	1543	// Ran off end of buffer.
	1544	if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
	1545	// Ran off end of main string. backing up one character.
	1546	collationSource->pos--;
	1547	return UCOL_NO_MORE_CES;
	1548	}
	1549	else
	1550	{
	1551	// Hit null in the normalize side buffer.
	1552	// Usually this means the end of the normalized data,
	1553	// except for one odd case: a null followed by combining chars,
	1554	// which is the case if we are at the start of the buffer.
	1555	if (collationSource->pos == collationSource->writableBuffer.getBuffer()+1) {
	1556	break;
	1557	}
	1558
	1559	// Null marked end of side buffer.
	1560	// Revert to the main string and
	1561	// loop back to top to try again to get a character.
	1562	collationSource->pos = collationSource->fcdPosition;
	1563	collationSource->flags = collationSource->origFlags;
	1564	continue;
	1565	}
	1566	}
	1567	}
	1568
	1569	if(collationSource->flags&UCOL_HIRAGANA_Q) {
	1570	/* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag
	1571	* based on whether the previous codepoint was Hiragana or Katakana.
	1572	*/
	1573	if(((ch>=0x3040 && ch<=0x3096) \|\| (ch >= 0x309d && ch <= 0x309f)) \|\|
	1574	((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 0x3099 && ch <= 0x309C))) {
	1575	collationSource->flags \|= UCOL_WAS_HIRAGANA;
	1576	} else {
	1577	collationSource->flags &= ~UCOL_WAS_HIRAGANA;
	1578	}
	1579	}
	1580
	1581	// We've got a character. See if there's any fcd and/or normalization stuff to do.
	1582	// Note that UCOL_ITER_NORM flag is always zero when we are in the side buffer.
	1583	if ((collationSource->flags & UCOL_ITER_NORM) == 0) {
	1584	break;
	1585	}
	1586
	1587	if (collationSource->fcdPosition >= collationSource->pos) {
	1588	// An earlier FCD check has already covered the current character.
	1589	// We can go ahead and process this char.
	1590	break;
	1591	}
	1592
	1593	if (ch < ZERO_CC_LIMIT_ ) {
	1594	// Fast fcd safe path. Trailing combining class == 0. This char is OK.
	1595	break;
	1596	}
	1597
	1598	if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
	1599	// We need to peek at the next character in order to tell if we are FCD
	1600	if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSource->pos >= collationSource->endp) {
	1601	// We are at the last char of source string.
	1602	// It is always OK for FCD check.
	1603	break;
	1604	}
	1605
	1606	// Not at last char of source string (or we'll check against terminating null). Do the FCD fast test
	1607	if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) {
	1608	break;
	1609	}
	1610	}
	1611
	1612
	1613	// Need a more complete FCD check and possible normalization.
	1614	if (collIterFCD(collationSource)) {
	1615	collIterNormalize(collationSource);
	1616	}
	1617	if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
	1618	// No normalization was needed. Go ahead and process the char we already had.
	1619	break;
	1620	}
	1621
	1622	// Some normalization happened. Next loop iteration will pick up a char
	1623	// from the normalization buffer.
	1624
	1625	} // end for (;;)
	1626
	1627
	1628	if (ch <= 0xFF) {
	1629	/* For latin-1 characters we never need to fall back to the UCA table */
	1630	/* because all of the UCA data is replicated in the latinOneMapping array */
	1631	order = coll->latinOneMapping[ch];
	1632	if (order > UCOL_NOT_FOUND) {
	1633	order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status);
	1634	}
	1635	}
	1636	else
	1637	{
	1638	// Always use UCA for Han, Hangul
	1639	// (Han extension A is before main Han block)
	1640	// ** Han compatibility chars ?? **
	1641	if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
	1642	(ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) {
	1643	if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) {
	1644	// between the two target ranges; do normal lookup
	1645	// ** this range is YI, Modifier tone letters, **
	1646	// ** Latin-D, Syloti Nagari, Phagas-pa. **
	1647	// ** Latin-D might be tailored, so we need to **
	1648	// ** do the normal lookup for these guys. **
	1649	order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
	1650	} else {
	1651	// in one of the target ranges; use UCA
	1652	order = UCOL_NOT_FOUND;
	1653	}
	1654	} else {
	1655	order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
	1656	}
	1657
	1658	if(order > UCOL_NOT_FOUND) { /* if a CE is special */
	1659	order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status); /* and try to get the special CE */
	1660	}
	1661
	1662	if(order == UCOL_NOT_FOUND && coll->UCA) { /* We couldn't find a good CE in the tailoring */
	1663	/* if we got here, the codepoint MUST be over 0xFF - so we look directly in the trie */
	1664	order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
	1665
	1666	if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */
	1667	order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSource, status);
	1668	}
	1669	}
	1670	}
	1671	} while ( order == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_LAST_HANGUL );
	1672
	1673	if(order == UCOL_NOT_FOUND) {
	1674	order = getImplicit(ch, collationSource);
	1675	}
	1676	return order; /* return the CE */
	1677	}
	1678
	1679	/* ucol_getNextCE, out-of-line version for use from other files. */
	1680	U_CAPI uint32_t U_EXPORT2
	1681	ucol_getNextCE(const UCollator coll, collIterate collationSource, UErrorCode *status) {
	1682	return ucol_IGetNextCE(coll, collationSource, status);
	1683	}
	1684
	1685
	1686	/**
	1687	* Incremental previous normalization happens here. Pick up the range of chars
	1688	* identifed by FCD, normalize it into the collIterate's writable buffer,
	1689	* switch the collIterate's state to use the writable buffer.
	1690	* @param data collation iterator data
	1691	*/
	1692	static
	1693	void collPrevIterNormalize(collIterate *data)
	1694	{
	1695	UErrorCode status = U_ZERO_ERROR;
	1696	const UChar pEnd = data->pos; / End normalize + 1 */
	1697	const UChar *pStart;
	1698
	1699	/* Start normalize */
	1700	if (data->fcdPosition == NULL) {
	1701	pStart = data->string;
	1702	}
	1703	else {
	1704	pStart = data->fcdPosition + 1;
	1705	}
	1706
	1707	int32_t normLen =
	1708	data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)((pEnd - pStart) + 1)),
	1709	data->writableBuffer,
	1710	status).
	1711	length();
	1712	if(U_FAILURE(status)) {
	1713	return;
	1714	}
	1715	/*
	1716	this puts the null termination infront of the normalized string instead
	1717	of the end
	1718	*/
	1719	data->writableBuffer.insert(0, (UChar)0);
	1720
	1721	/*
	1722	* The usual case at this point is that we've got a base
	1723	* character followed by marks that were normalized. If
	1724	* fcdPosition is NULL, that means that we backed up to
	1725	* the beginning of the string and there's no base character.
	1726	*
	1727	* Forward processing will usually normalize when it sees
	1728	* the first mark, so that mark will get it's natural offset
	1729	* and the rest will get the offset of the character following
	1730	* the marks. The base character will also get its natural offset.
	1731	*
	1732	* We write the offset of the base character, if there is one,
	1733	* followed by the offset of the first mark and then the offsets
	1734	* of the rest of the marks.
	1735	*/
	1736	int32_t firstMarkOffset = 0;
	1737	int32_t trailOffset = (int32_t)(data->pos - data->string + 1);
	1738	int32_t trailCount = normLen - 1;
	1739
	1740	if (data->fcdPosition != NULL) {
	1741	int32_t baseOffset = (int32_t)(data->fcdPosition - data->string);
	1742	UChar baseChar = *data->fcdPosition;
	1743
	1744	firstMarkOffset = baseOffset + 1;
	1745
	1746	/*
	1747	* If the base character is the start of a contraction, forward processing
	1748	* will normalize the marks while checking for the contraction, which means
	1749	* that the offset of the first mark will the same as the other marks.
	1750	*
	1751	* ** THIS IS PROBABLY NOT A COMPLETE TEST **
	1752	*/
	1753	if (baseChar >= 0x100) {
	1754	uint32_t baseOrder = UTRIE_GET32_FROM_LEAD(&data->coll->mapping, baseChar);
	1755
	1756	if (baseOrder == UCOL_NOT_FOUND && data->coll->UCA) {
	1757	baseOrder = UTRIE_GET32_FROM_LEAD(&data->coll->UCA->mapping, baseChar);
	1758	}
	1759
	1760	if (baseOrder > UCOL_NOT_FOUND && getCETag(baseOrder) == CONTRACTION_TAG) {
	1761	firstMarkOffset = trailOffset;
	1762	}
	1763	}
	1764
	1765	data->appendOffset(baseOffset, status);
	1766	}
	1767
	1768	data->appendOffset(firstMarkOffset, status);
	1769
	1770	for (int32_t i = 0; i < trailCount; i += 1) {
	1771	data->appendOffset(trailOffset, status);
	1772	}
	1773
	1774	data->offsetRepeatValue = trailOffset;
	1775
	1776	data->offsetReturn = data->offsetStore - 1;
	1777	if (data->offsetReturn == data->offsetBuffer) {
	1778	data->offsetStore = data->offsetBuffer;
	1779	}
	1780
	1781	data->pos = data->writableBuffer.getTerminatedBuffer() + 1 + normLen;
	1782	data->origFlags = data->flags;
	1783	data->flags \|= UCOL_ITER_INNORMBUF;
	1784	data->flags &= ~(UCOL_ITER_NORM \| UCOL_ITER_HASLEN);
	1785	}
	1786
	1787
	1788	/**
	1789	* Incremental FCD check for previous iteration and normalize. Called from
	1790	* getPrevCE when normalization state is suspect.
	1791	* When entering, the state is known to be this:
	1792	* o We are working in the main buffer of the collIterate, not the side
	1793	* writable buffer. When in the side buffer, normalization mode is always
	1794	* off, so we won't get here.
	1795	* o The leading combining class from the current character is 0 or the
	1796	* trailing combining class of the previous char was zero.
	1797	* True because the previous call to this function will have always exited
	1798	* that way, and we get called for every char where cc might be non-zero.
	1799	* @param data collation iterate struct
	1800	* @return normalization status, TRUE for normalization to be done, FALSE
	1801	* otherwise
	1802	*/
	1803	static
	1804	inline UBool collPrevIterFCD(collIterate *data)
	1805	{
	1806	const UChar src, start;
	1807	uint8_t leadingCC;
	1808	uint8_t trailingCC = 0;
	1809	uint16_t fcd;
	1810	UBool result = FALSE;
	1811
	1812	start = data->string;
	1813	src = data->pos + 1;
	1814
	1815	/* Get the trailing combining class of the current character. */
	1816	fcd = g_nfcImpl->previousFCD16(start, src);
	1817
	1818	leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
	1819
	1820	if (leadingCC != 0) {
	1821	/*
	1822	The current char has a non-zero leading combining class.
	1823	Scan backward until we find a char with a trailing cc of zero.
	1824	*/
	1825	for (;;)
	1826	{
	1827	if (start == src) {
	1828	data->fcdPosition = NULL;
	1829	return result;
	1830	}
	1831
	1832	fcd = g_nfcImpl->previousFCD16(start, src);
	1833
	1834	trailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
	1835
	1836	if (trailingCC == 0) {
	1837	break;
	1838	}
	1839
	1840	if (leadingCC < trailingCC) {
	1841	result = TRUE;
	1842	}
	1843
	1844	leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
	1845	}
	1846	}
	1847
	1848	data->fcdPosition = (UChar *)src;
	1849
	1850	return result;
	1851	}
	1852
	1853	/** gets a code unit from the string at a given offset
	1854	* Handles both normal and iterative cases.
	1855	* No error checking - caller beware!
	1856	*/
	1857	static inline
	1858	UChar peekCodeUnit(collIterate *source, int32_t offset) {
	1859	if(source->pos != NULL) {
	1860	return *(source->pos + offset);
	1861	} else if(source->iterator != NULL) {
	1862	UChar32 c;
	1863	if(offset != 0) {
	1864	source->iterator->move(source->iterator, offset, UITER_CURRENT);
	1865	c = source->iterator->next(source->iterator);
	1866	source->iterator->move(source->iterator, -offset-1, UITER_CURRENT);
	1867	} else {
	1868	c = source->iterator->current(source->iterator);
	1869	}
	1870	return c >= 0 ? (UChar)c : 0xfffd; // If the caller works properly, we should never see c<0.
	1871	} else {
	1872	return 0xfffd;
	1873	}
	1874	}
	1875
	1876	// Code point version. Treats the offset as a _code point_ delta.
	1877	// We cannot use U16_FWD_1_UNSAFE and similar because we might not have well-formed UTF-16.
	1878	// We cannot use U16_FWD_1 and similar because we do not know the start and limit of the buffer.
	1879	static inline
	1880	UChar32 peekCodePoint(collIterate *source, int32_t offset) {
	1881	UChar32 c;
	1882	if(source->pos != NULL) {
	1883	const UChar *p = source->pos;
	1884	if(offset >= 0) {
	1885	// Skip forward over (offset-1) code points.
	1886	while(--offset >= 0) {
	1887	if(U16_IS_LEAD(p++) && U16_IS_TRAIL(p)) {
	1888	++p;
	1889	}
	1890	}
	1891	// Read the code point there.
	1892	c = *p++;
	1893	UChar trail;
	1894	if(U16_IS_LEAD(c) && U16_IS_TRAIL(trail = *p)) {
	1895	c = U16_GET_SUPPLEMENTARY(c, trail);
	1896	}
	1897	} else /* offset<0 */ {
	1898	// Skip backward over (offset-1) code points.
	1899	while(++offset < 0) {
	1900	if(U16_IS_TRAIL(--p) && U16_IS_LEAD((p - 1))) {
	1901	--p;
	1902	}
	1903	}
	1904	// Read the code point before that.
	1905	c = *--p;
	1906	UChar lead;
	1907	if(U16_IS_TRAIL(c) && U16_IS_LEAD(lead = *(p - 1))) {
	1908	c = U16_GET_SUPPLEMENTARY(lead, c);
	1909	}
	1910	}
	1911	} else if(source->iterator != NULL) {
	1912	if(offset >= 0) {
	1913	// Skip forward over (offset-1) code points.
	1914	int32_t fwd = offset;
	1915	while(fwd-- > 0) {
	1916	uiter_next32(source->iterator);
	1917	}
	1918	// Read the code point there.
	1919	c = uiter_current32(source->iterator);
	1920	// Return to the starting point, skipping backward over (offset-1) code points.
	1921	while(offset-- > 0) {
	1922	uiter_previous32(source->iterator);
	1923	}
	1924	} else /* offset<0 */ {
	1925	// Read backward, reading offset code points, remember only the last-read one.
	1926	int32_t back = offset;
	1927	do {
	1928	c = uiter_previous32(source->iterator);
	1929	} while(++back < 0);
	1930	// Return to the starting position, skipping forward over offset code points.
	1931	do {
	1932	uiter_next32(source->iterator);
	1933	} while(++offset < 0);
	1934	}
	1935	} else {
	1936	c = U_SENTINEL;
	1937	}
	1938	return c;
	1939	}
	1940
	1941	/**
	1942	* Determines if we are at the start of the data string in the backwards
	1943	* collation iterator
	1944	* @param data collation iterator
	1945	* @return TRUE if we are at the start
	1946	*/
	1947	static
	1948	inline UBool isAtStartPrevIterate(collIterate *data) {
	1949	if(data->pos == NULL && data->iterator != NULL) {
	1950	return !data->iterator->hasPrevious(data->iterator);
	1951	}
	1952	//return (collIter_bos(data)) \|\|
	1953	return (data->pos == data->string) \|\|
	1954	((data->flags & UCOL_ITER_INNORMBUF) && (data->pos != NULL) &&
	1955	*(data->pos - 1) == 0 && data->fcdPosition == NULL);
	1956	}
	1957
	1958	static
	1959	inline void goBackOne(collIterate *data) {
	1960	# if 0
	1961	// somehow, it looks like we need to keep iterator synced up
	1962	// at all times, as above.
	1963	if(data->pos) {
	1964	data->pos--;
	1965	}
	1966	if(data->iterator) {
	1967	data->iterator->previous(data->iterator);
	1968	}
	1969	#endif
	1970	if(data->iterator && (data->flags & UCOL_USE_ITERATOR)) {
	1971	data->iterator->previous(data->iterator);
	1972	}
	1973	if(data->pos) {
	1974	data->pos --;
	1975	}
	1976	}
	1977
	1978	/**
	1979	* Inline function that gets a simple CE.
	1980	* So what it does is that it will first check the expansion buffer. If the
	1981	* expansion buffer is not empty, ie the end pointer to the expansion buffer
	1982	* is different from the string pointer, we return the collation element at the
	1983	* return pointer and decrement it.
	1984	* For more complicated CEs it resorts to getComplicatedCE.
	1985	* @param coll collator data
	1986	* @param data collation iterator struct
	1987	* @param status error status
	1988	*/
	1989	static
	1990	inline uint32_t ucol_IGetPrevCE(const UCollator coll, collIterate data,
	1991	UErrorCode *status)
	1992	{
	1993	uint32_t result = (uint32_t)UCOL_NULLORDER;
	1994
	1995	if (data->offsetReturn != NULL) {
	1996	if (data->offsetRepeatCount > 0) {
	1997	data->offsetRepeatCount -= 1;
	1998	} else {
	1999	if (data->offsetReturn == data->offsetBuffer) {
	2000	data->offsetReturn = NULL;
	2001	data->offsetStore = data->offsetBuffer;
	2002	} else {
	2003	data->offsetReturn -= 1;
	2004	}
	2005	}
	2006	}
	2007
	2008	if ((data->extendCEs && data->toReturn > data->extendCEs) \|\|
	2009	(!data->extendCEs && data->toReturn > data->CEs))
	2010	{
	2011	data->toReturn -= 1;
	2012	result = *(data->toReturn);
	2013	if (data->CEs == data->toReturn \|\| data->extendCEs == data->toReturn) {
	2014	data->CEpos = data->toReturn;
	2015	}
	2016	}
	2017	else {
	2018	UChar ch = 0;
	2019
	2020	do {
	2021	/*
	2022	Loop handles case when incremental normalize switches to or from the
	2023	side buffer / original string, and we need to start again to get the
	2024	next character.
	2025	*/
	2026	for (;;) {
	2027	if (data->flags & UCOL_ITER_HASLEN) {
	2028	/*
	2029	Normal path for strings when length is specified.
	2030	Not in side buffer because it is always null terminated.
	2031	*/
	2032	if (data->pos <= data->string) {
	2033	/* End of the main source string */
	2034	return UCOL_NO_MORE_CES;
	2035	}
	2036	data->pos --;
	2037	ch = *data->pos;
	2038	}
	2039	// we are using an iterator to go back. Pray for us!
	2040	else if (data->flags & UCOL_USE_ITERATOR) {
	2041	UChar32 iterCh = data->iterator->previous(data->iterator);
	2042	if(iterCh == U_SENTINEL) {
	2043	return UCOL_NO_MORE_CES;
	2044	} else {
	2045	ch = (UChar)iterCh;
	2046	}
	2047	}
	2048	else {
	2049	data->pos --;
	2050	ch = *data->pos;
	2051	/* we are in the side buffer. */
	2052	if (ch == 0) {
	2053	/*
	2054	At the start of the normalize side buffer.
	2055	Go back to string.
	2056	Because pointer points to the last accessed character,
	2057	hence we have to increment it by one here.
	2058	*/
	2059	data->flags = data->origFlags;
	2060	data->offsetRepeatValue = 0;
	2061
	2062	if (data->fcdPosition == NULL) {
	2063	data->pos = data->string;
	2064	return UCOL_NO_MORE_CES;
	2065	}
	2066	else {
	2067	data->pos = data->fcdPosition + 1;
	2068	}
	2069
	2070	continue;
	2071	}
	2072	}
	2073
	2074	if(data->flags&UCOL_HIRAGANA_Q) {
	2075	if(ch>=0x3040 && ch<=0x309f) {
	2076	data->flags \|= UCOL_WAS_HIRAGANA;
	2077	} else {
	2078	data->flags &= ~UCOL_WAS_HIRAGANA;
	2079	}
	2080	}
	2081
	2082	/*
	2083	* got a character to determine if there's fcd and/or normalization
	2084	* stuff to do.
	2085	* if the current character is not fcd.
	2086	* if current character is at the start of the string
	2087	* Trailing combining class == 0.
	2088	* Note if pos is in the writablebuffer, norm is always 0
	2089	*/
	2090	if (ch < ZERO_CC_LIMIT_ \|\|
	2091	// this should propel us out of the loop in the iterator case
	2092	(data->flags & UCOL_ITER_NORM) == 0 \|\|
	2093	(data->fcdPosition != NULL && data->fcdPosition <= data->pos)
	2094	\|\| data->string == data->pos) {
	2095	break;
	2096	}
	2097
	2098	if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
	2099	/* if next character is FCD */
	2100	if (data->pos == data->string) {
	2101	/* First char of string is always OK for FCD check */
	2102	break;
	2103	}
	2104
	2105	/* Not first char of string, do the FCD fast test */
	2106	if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) {
	2107	break;
	2108	}
	2109	}
	2110
	2111	/* Need a more complete FCD check and possible normalization. */
	2112	if (collPrevIterFCD(data)) {
	2113	collPrevIterNormalize(data);
	2114	}
	2115
	2116	if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {
	2117	/* No normalization. Go ahead and process the char. */
	2118	break;
	2119	}
	2120
	2121	/*
	2122	Some normalization happened.
	2123	Next loop picks up a char from the normalization buffer.
	2124	*/
	2125	}
	2126
	2127	/* attempt to handle contractions, after removal of the backwards
	2128	contraction
	2129	*/
	2130	if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) {
	2131	result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status);
	2132	} else {
	2133	if (ch <= 0xFF) {
	2134	result = coll->latinOneMapping[ch];
	2135	}
	2136	else {
	2137	// Always use UCA for [3400..9FFF], [AC00..D7AF]
	2138	// ** [FA0E..FA2F] ?? **
	2139	if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
	2140	(ch >= 0x3400 && ch <= 0xD7AF)) {
	2141	if (ch > 0x9FFF && ch < 0xAC00) {
	2142	// between the two target ranges; do normal lookup
	2143	// ** this range is YI, Modifier tone letters, **
	2144	// ** Latin-D, Syloti Nagari, Phagas-pa. **
	2145	// ** Latin-D might be tailored, so we need to **
	2146	// ** do the normal lookup for these guys. **
	2147	result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
	2148	} else {
	2149	result = UCOL_NOT_FOUND;
	2150	}
	2151	} else {
	2152	result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
	2153	}
	2154	}
	2155	if (result > UCOL_NOT_FOUND) {
	2156	result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
	2157	}
	2158	if (result == UCOL_NOT_FOUND) { // Not found in master list
	2159	if (!isAtStartPrevIterate(data) &&
	2160	ucol_contractionEndCP(ch, data->coll))
	2161	{
	2162	result = UCOL_CONTRACTION;
	2163	} else {
	2164	if(coll->UCA) {
	2165	result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
	2166	}
	2167	}
	2168
	2169	if (result > UCOL_NOT_FOUND) {
	2170	if(coll->UCA) {
	2171	result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status);
	2172	}
	2173	}
	2174	}
	2175	}
	2176	} while ( result == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_LAST_HANGUL );
	2177
	2178	if(result == UCOL_NOT_FOUND) {
	2179	result = getPrevImplicit(ch, data);
	2180	}
	2181	}
	2182
	2183	return result;
	2184	}
	2185
	2186
	2187	/* ucol_getPrevCE, out-of-line version for use from other files. */
	2188	U_CFUNC uint32_t U_EXPORT2
	2189	ucol_getPrevCE(const UCollator coll, collIterate data,
	2190	UErrorCode *status) {
	2191	return ucol_IGetPrevCE(coll, data, status);
	2192	}
	2193
	2194
	2195	/* this should be connected to special Jamo handling */
	2196	U_CFUNC uint32_t U_EXPORT2
	2197	ucol_getFirstCE(const UCollator coll, UChar u, UErrorCode status) {
	2198	collIterate colIt;
	2199	IInit_collIterate(coll, &u, 1, &colIt, status);
	2200	if(U_FAILURE(*status)) {
	2201	return 0;
	2202	}
	2203	return ucol_IGetNextCE(coll, &colIt, status);
	2204	}
	2205
	2206	/**
	2207	* Inserts the argument character into the end of the buffer pushing back the
	2208	* null terminator.
	2209	* @param data collIterate struct data
	2210	* @param ch character to be appended
	2211	* @return the position of the new addition
	2212	*/
	2213	static
	2214	inline const UChar * insertBufferEnd(collIterate *data, UChar ch)
	2215	{
	2216	int32_t oldLength = data->writableBuffer.length();
	2217	return data->writableBuffer.append(ch).getTerminatedBuffer() + oldLength;
	2218	}
	2219
	2220	/**
	2221	* Inserts the argument string into the end of the buffer pushing back the
	2222	* null terminator.
	2223	* @param data collIterate struct data
	2224	* @param string to be appended
	2225	* @param length of the string to be appended
	2226	* @return the position of the new addition
	2227	*/
	2228	static
	2229	inline const UChar * insertBufferEnd(collIterate data, const UChar str, int32_t length)
	2230	{
	2231	int32_t oldLength = data->writableBuffer.length();
	2232	return data->writableBuffer.append(str, length).getTerminatedBuffer() + oldLength;
	2233	}
	2234
	2235	/**
	2236	* Special normalization function for contraction in the forwards iterator.
	2237	* This normalization sequence will place the current character at source->pos
	2238	* and its following normalized sequence into the buffer.
	2239	* The fcd position, pos will be changed.
	2240	* pos will now point to positions in the buffer.
	2241	* Flags will be changed accordingly.
	2242	* @param data collation iterator data
	2243	*/
	2244	static
	2245	inline void normalizeNextContraction(collIterate *data)
	2246	{
	2247	int32_t strsize;
	2248	UErrorCode status = U_ZERO_ERROR;
	2249	/* because the pointer points to the next character */
	2250	const UChar *pStart = data->pos - 1;
	2251	const UChar *pEnd;
	2252
	2253	if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {
	2254	data->writableBuffer.setTo(*(pStart - 1));
	2255	strsize = 1;
	2256	}
	2257	else {
	2258	strsize = data->writableBuffer.length();
	2259	}
	2260
	2261	pEnd = data->fcdPosition;
	2262
	2263	data->writableBuffer.append(
	2264	data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)(pEnd - pStart)), status));
	2265	if(U_FAILURE(status)) {
	2266	return;
	2267	}
	2268
	2269	data->pos = data->writableBuffer.getTerminatedBuffer() + strsize;
	2270	data->origFlags = data->flags;
	2271	data->flags \|= UCOL_ITER_INNORMBUF;
	2272	data->flags &= ~(UCOL_ITER_NORM \| UCOL_ITER_HASLEN);
	2273	}
	2274
	2275	/**
	2276	* Contraction character management function that returns the next character
	2277	* for the forwards iterator.
	2278	* Does nothing if the next character is in buffer and not the first character
	2279	* in it.
	2280	* Else it checks next character in data string to see if it is normalizable.
	2281	* If it is not, the character is simply copied into the buffer, else
	2282	* the whole normalized substring is copied into the buffer, including the
	2283	* current character.
	2284	* @param data collation element iterator data
	2285	* @return next character
	2286	*/
	2287	static
	2288	inline UChar getNextNormalizedChar(collIterate *data)
	2289	{
	2290	UChar nextch;
	2291	UChar ch;
	2292	// Here we need to add the iterator code. One problem is the way
	2293	// end of string is handled. If we just return next char, it could
	2294	// be the sentinel. Most of the cases already check for this, but we
	2295	// need to be sure.
	2296	if ((data->flags & (UCOL_ITER_NORM \| UCOL_ITER_INNORMBUF)) == 0 ) {
	2297	/* if no normalization and not in buffer. */
	2298	if(data->flags & UCOL_USE_ITERATOR) {
	2299	return (UChar)data->iterator->next(data->iterator);
	2300	} else {
	2301	return *(data->pos ++);
	2302	}
	2303	}
	2304
	2305	//if (data->flags & UCOL_ITER_NORM && data->flags & UCOL_USE_ITERATOR) {
	2306	//normalizeIterator(data);
	2307	//}
	2308
	2309	UBool innormbuf = (UBool)(data->flags & UCOL_ITER_INNORMBUF);
	2310	if ((innormbuf && *data->pos != 0) \|\|
	2311	(data->fcdPosition != NULL && !innormbuf &&
	2312	data->pos < data->fcdPosition)) {
	2313	/*
	2314	if next character is in normalized buffer, no further normalization
	2315	is required
	2316	*/
	2317	return *(data->pos ++);
	2318	}
	2319
	2320	if (data->flags & UCOL_ITER_HASLEN) {
	2321	/* in data string */
	2322	if (data->pos + 1 == data->endp) {
	2323	return *(data->pos ++);
	2324	}
	2325	}
	2326	else {
	2327	if (innormbuf) {
	2328	// inside the normalization buffer, but at the end
	2329	// (since we encountered zero). This means, in the
	2330	// case we're using char iterator, that we need to
	2331	// do another round of normalization.
	2332	//if(data->origFlags & UCOL_USE_ITERATOR) {
	2333	// we need to restore original flags,
	2334	// otherwise, we'll lose them
	2335	//data->flags = data->origFlags;
	2336	//normalizeIterator(data);
	2337	//return *(data->pos++);
	2338	//} else {
	2339	/*
	2340	in writable buffer, at this point fcdPosition can not be
	2341	pointing to the end of the data string. see contracting tag.
	2342	*/
	2343	if(data->fcdPosition) {
	2344	if (*(data->fcdPosition + 1) == 0 \|\|
	2345	data->fcdPosition + 1 == data->endp) {
	2346	/* at the end of the string, dump it into the normalizer */
	2347	data->pos = insertBufferEnd(data, *(data->fcdPosition)) + 1;
	2348	// Check if data->pos received a null pointer
	2349	if (data->pos == NULL) {
	2350	return (UChar)-1; // Return to indicate error.
	2351	}
	2352	return *(data->fcdPosition ++);
	2353	}
	2354	data->pos = data->fcdPosition;
	2355	} else if(data->origFlags & UCOL_USE_ITERATOR) {
	2356	// if we are here, we're using a normalizing iterator.
	2357	// we should just continue further.
	2358	data->flags = data->origFlags;
	2359	data->pos = NULL;
	2360	return (UChar)data->iterator->next(data->iterator);
	2361	}
	2362	//}
	2363	}
	2364	else {
	2365	if (*(data->pos + 1) == 0) {
	2366	return *(data->pos ++);
	2367	}
	2368	}
	2369	}
	2370
	2371	ch = *data->pos ++;
	2372	nextch = *data->pos;
	2373
	2374	/*
	2375	* if the current character is not fcd.
	2376	* Trailing combining class == 0.
	2377	*/
	2378	if ((data->fcdPosition == NULL \|\| data->fcdPosition < data->pos) &&
	2379	(nextch >= NFC_ZERO_CC_BLOCK_LIMIT_ \|\|
	2380	ch >= NFC_ZERO_CC_BLOCK_LIMIT_)) {
	2381	/*
	2382	Need a more complete FCD check and possible normalization.
	2383	normalize substring will be appended to buffer
	2384	*/
	2385	if (collIterFCD(data)) {
	2386	normalizeNextContraction(data);
	2387	return *(data->pos ++);
	2388	}
	2389	else if (innormbuf) {
	2390	/* fcdposition shifted even when there's no normalization, if we
	2391	don't input the rest into this, we'll get the wrong position when
	2392	we reach the end of the writableBuffer */
	2393	int32_t length = (int32_t)(data->fcdPosition - data->pos + 1);
	2394	data->pos = insertBufferEnd(data, data->pos - 1, length);
	2395	// Check if data->pos received a null pointer
	2396	if (data->pos == NULL) {
	2397	return (UChar)-1; // Return to indicate error.
	2398	}
	2399	return *(data->pos ++);
	2400	}
	2401	}
	2402
	2403	if (innormbuf) {
	2404	/*
	2405	no normalization is to be done hence only one character will be
	2406	appended to the buffer.
	2407	*/
	2408	data->pos = insertBufferEnd(data, ch) + 1;
	2409	// Check if data->pos received a null pointer
	2410	if (data->pos == NULL) {
	2411	return (UChar)-1; // Return to indicate error.
	2412	}
	2413	}
	2414
	2415	/* points back to the pos in string */
	2416	return ch;
	2417	}
	2418
	2419
	2420
	2421	/**
	2422	* Function to copy the buffer into writableBuffer and sets the fcd position to
	2423	* the correct position
	2424	* @param source data string source
	2425	* @param buffer character buffer
	2426	*/
	2427	static
	2428	inline void setDiscontiguosAttribute(collIterate *source, const UnicodeString &buffer)
	2429	{
	2430	/* okay confusing part here. to ensure that the skipped characters are
	2431	considered later, we need to place it in the appropriate position in the
	2432	normalization buffer and reassign the pos pointer. simple case if pos
	2433	reside in string, simply copy to normalization buffer and
	2434	fcdposition = pos, pos = start of normalization buffer. if pos in
	2435	normalization buffer, we'll insert the copy infront of pos and point pos
	2436	to the start of the normalization buffer. why am i doing these copies?
	2437	well, so that the whole chunk of codes in the getNextCE, ucol_prv_getSpecialCE does
	2438	not require any changes, which be really painful. */
	2439	if (source->flags & UCOL_ITER_INNORMBUF) {
	2440	int32_t replaceLength = source->pos - source->writableBuffer.getBuffer();
	2441	source->writableBuffer.replace(0, replaceLength, buffer);
	2442	}
	2443	else {
	2444	source->fcdPosition = source->pos;
	2445	source->origFlags = source->flags;
	2446	source->flags \|= UCOL_ITER_INNORMBUF;
	2447	source->flags &= ~(UCOL_ITER_NORM \| UCOL_ITER_HASLEN \| UCOL_USE_ITERATOR);
	2448	source->writableBuffer = buffer;
	2449	}
	2450
	2451	source->pos = source->writableBuffer.getTerminatedBuffer();
	2452	}
	2453
	2454	/**
	2455	* Function to get the discontiguos collation element within the source.
	2456	* Note this function will set the position to the appropriate places.
	2457	* @param coll current collator used
	2458	* @param source data string source
	2459	* @param constart index to the start character in the contraction table
	2460	* @return discontiguos collation element offset
	2461	*/
	2462	static
	2463	uint32_t getDiscontiguous(const UCollator coll, collIterate source,
	2464	const UChar *constart)
	2465	{
	2466	/* source->pos currently points to the second combining character after
	2467	the start character */
	2468	const UChar *temppos = source->pos;
	2469	UnicodeString buffer;
	2470	const UChar *tempconstart = constart;
	2471	uint8_t tempflags = source->flags;
	2472	UBool multicontraction = FALSE;
	2473	collIterateState discState;
	2474
	2475	backupState(source, &discState);
	2476
	2477	buffer.setTo(peekCodePoint(source, -1));
	2478	for (;;) {
	2479	UChar *UCharOffset;
	2480	UChar schar,
	2481	tchar;
	2482	uint32_t result;
	2483
	2484	if (((source->flags & UCOL_ITER_HASLEN) && source->pos >= source->endp)
	2485	\|\| (peekCodeUnit(source, 0) == 0 &&
	2486	//\|\| (*source->pos == 0 &&
	2487	((source->flags & UCOL_ITER_INNORMBUF) == 0 \|\|
	2488	source->fcdPosition == NULL \|\|
	2489	source->fcdPosition == source->endp \|\|
	2490	*(source->fcdPosition) == 0 \|\|
	2491	u_getCombiningClass(*(source->fcdPosition)) == 0)) \|\|
	2492	/* end of string in null terminated string or stopped by a
	2493	null character, note fcd does not always point to a base
	2494	character after the discontiguos change */
	2495	u_getCombiningClass(peekCodePoint(source, 0)) == 0) {
	2496	//u_getCombiningClass(*(source->pos)) == 0) {
	2497	//constart = (UChar *)coll->image + getContractOffset(CE);
	2498	if (multicontraction) {
	2499	source->pos = temppos - 1;
	2500	setDiscontiguosAttribute(source, buffer);
	2501	return *(coll->contractionCEs +
	2502	(tempconstart - coll->contractionIndex));
	2503	}
	2504	constart = tempconstart;
	2505	break;
	2506	}
	2507
	2508	UCharOffset = (UChar )(tempconstart + 1); / skip the backward offset*/
	2509	schar = getNextNormalizedChar(source);
	2510
	2511	while (schar > (tchar = *UCharOffset)) {
	2512	UCharOffset++;
	2513	}
	2514
	2515	if (schar != tchar) {
	2516	/* not the correct codepoint. we stuff the current codepoint into
	2517	the discontiguos buffer and try the next character */
	2518	buffer.append(schar);
	2519	continue;
	2520	}
	2521	else {
	2522	if (u_getCombiningClass(schar) ==
	2523	u_getCombiningClass(peekCodePoint(source, -2))) {
	2524	buffer.append(schar);
	2525	continue;
	2526	}
	2527	result = *(coll->contractionCEs +
	2528	(UCharOffset - coll->contractionIndex));
	2529	}
	2530
	2531	if (result == UCOL_NOT_FOUND) {
	2532	break;
	2533	} else if (isContraction(result)) {
	2534	/* this is a multi-contraction*/
	2535	tempconstart = (UChar *)coll->image + getContractOffset(result);
	2536	if (*(coll->contractionCEs + (constart - coll->contractionIndex))
	2537	!= UCOL_NOT_FOUND) {
	2538	multicontraction = TRUE;
	2539	temppos = source->pos + 1;
	2540	}
	2541	} else {
	2542	setDiscontiguosAttribute(source, buffer);
	2543	return result;
	2544	}
	2545	}
	2546
	2547	/* no problems simply reverting just like that,
	2548	if we are in string before getting into this function, points back to
	2549	string hence no problem.
	2550	if we are in normalization buffer before getting into this function,
	2551	since we'll never use another normalization within this function, we
	2552	know that fcdposition points to a base character. the normalization buffer
	2553	never change, hence this revert works. */
	2554	loadState(source, &discState, TRUE);
	2555	goBackOne(source);
	2556
	2557	//source->pos = temppos - 1;
	2558	source->flags = tempflags;
	2559	return *(coll->contractionCEs + (constart - coll->contractionIndex));
	2560	}
	2561
	2562	/* now uses Mark's getImplicitPrimary code */
	2563	static
	2564	inline uint32_t getImplicit(UChar32 cp, collIterate *collationSource) {
	2565	uint32_t r = uprv_uca_getImplicitPrimary(cp);
	2566	*(collationSource->CEpos++) = ((r & 0x0000FFFF)<<16) \| 0x000000C0;
	2567	collationSource->offsetRepeatCount += 1;
	2568	return (r & UCOL_PRIMARYMASK) \| 0x00000505; // This was 'order'
	2569	}
	2570
	2571	/**
	2572	* Inserts the argument character into the front of the buffer replacing the
	2573	* front null terminator.
	2574	* @param data collation element iterator data
	2575	* @param ch character to be appended
	2576	*/
	2577	static
	2578	inline void insertBufferFront(collIterate *data, UChar ch)
	2579	{
	2580	data->pos = data->writableBuffer.setCharAt(0, ch).insert(0, (UChar)0).getTerminatedBuffer() + 2;
	2581	}
	2582
	2583	/**
	2584	* Special normalization function for contraction in the previous iterator.
	2585	* This normalization sequence will place the current character at source->pos
	2586	* and its following normalized sequence into the buffer.
	2587	* The fcd position, pos will be changed.
	2588	* pos will now point to positions in the buffer.
	2589	* Flags will be changed accordingly.
	2590	* @param data collation iterator data
	2591	*/
	2592	static
	2593	inline void normalizePrevContraction(collIterate data, UErrorCode status)
	2594	{
	2595	const UChar pEnd = data->pos + 1; / End normalize + 1 */
	2596	const UChar *pStart;
	2597
	2598	UnicodeString endOfBuffer;
	2599	if (data->flags & UCOL_ITER_HASLEN) {
	2600	/*
	2601	normalization buffer not used yet, we'll pull down the next
	2602	character into the end of the buffer
	2603	*/
	2604	endOfBuffer.setTo(*pEnd);
	2605	}
	2606	else {
	2607	endOfBuffer.setTo(data->writableBuffer, 1); // after the leading NUL
	2608	}
	2609
	2610	if (data->fcdPosition == NULL) {
	2611	pStart = data->string;
	2612	}
	2613	else {
	2614	pStart = data->fcdPosition + 1;
	2615	}
	2616	int32_t normLen =
	2617	data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)(pEnd - pStart)),
	2618	data->writableBuffer,
	2619	*status).
	2620	length();
	2621	if(U_FAILURE(*status)) {
	2622	return;
	2623	}
	2624	/*
	2625	this puts the null termination infront of the normalized string instead
	2626	of the end
	2627	*/
	2628	data->pos =
	2629	data->writableBuffer.insert(0, (UChar)0).append(endOfBuffer).getTerminatedBuffer() +
	2630	1 + normLen;
	2631	data->origFlags = data->flags;
	2632	data->flags \|= UCOL_ITER_INNORMBUF;
	2633	data->flags &= ~(UCOL_ITER_NORM \| UCOL_ITER_HASLEN);
	2634	}
	2635
	2636	/**
	2637	* Contraction character management function that returns the previous character
	2638	* for the backwards iterator.
	2639	* Does nothing if the previous character is in buffer and not the first
	2640	* character in it.
	2641	* Else it checks previous character in data string to see if it is
	2642	* normalizable.
	2643	* If it is not, the character is simply copied into the buffer, else
	2644	* the whole normalized substring is copied into the buffer, including the
	2645	* current character.
	2646	* @param data collation element iterator data
	2647	* @return previous character
	2648	*/
	2649	static
	2650	inline UChar getPrevNormalizedChar(collIterate data, UErrorCode status)
	2651	{
	2652	UChar prevch;
	2653	UChar ch;
	2654	const UChar *start;
	2655	UBool innormbuf = (UBool)(data->flags & UCOL_ITER_INNORMBUF);
	2656	if ((data->flags & (UCOL_ITER_NORM \| UCOL_ITER_INNORMBUF)) == 0 \|\|
	2657	(innormbuf && *(data->pos - 1) != 0)) {
	2658	/*
	2659	if no normalization.
	2660	if previous character is in normalized buffer, no further normalization
	2661	is required
	2662	*/
	2663	if(data->flags & UCOL_USE_ITERATOR) {
	2664	data->iterator->move(data->iterator, -1, UITER_CURRENT);
	2665	return (UChar)data->iterator->next(data->iterator);
	2666	} else {
	2667	return *(data->pos - 1);
	2668	}
	2669	}
	2670
	2671	start = data->pos;
	2672	if ((data->fcdPosition==NULL)\|\|(data->flags & UCOL_ITER_HASLEN)) {
	2673	/* in data string */
	2674	if ((start - 1) == data->string) {
	2675	return *(start - 1);
	2676	}
	2677	start --;
	2678	ch = *start;
	2679	prevch = *(start - 1);
	2680	}
	2681	else {
	2682	/*
	2683	in writable buffer, at this point fcdPosition can not be NULL.
	2684	see contracting tag.
	2685	*/
	2686	if (data->fcdPosition == data->string) {
	2687	/* at the start of the string, just dump it into the normalizer */
	2688	insertBufferFront(data, *(data->fcdPosition));
	2689	data->fcdPosition = NULL;
	2690	return *(data->pos - 1);
	2691	}
	2692	start = data->fcdPosition;
	2693	ch = *start;
	2694	prevch = *(start - 1);
	2695	}
	2696	/*
	2697	* if the current character is not fcd.
	2698	* Trailing combining class == 0.
	2699	*/
	2700	if (data->fcdPosition > start &&
	2701	(ch >= NFC_ZERO_CC_BLOCK_LIMIT_ \|\| prevch >= NFC_ZERO_CC_BLOCK_LIMIT_))
	2702	{
	2703	/*
	2704	Need a more complete FCD check and possible normalization.
	2705	normalize substring will be appended to buffer
	2706	*/
	2707	const UChar *backuppos = data->pos;
	2708	data->pos = start;
	2709	if (collPrevIterFCD(data)) {
	2710	normalizePrevContraction(data, status);
	2711	return *(data->pos - 1);
	2712	}
	2713	data->pos = backuppos;
	2714	data->fcdPosition ++;
	2715	}
	2716
	2717	if (innormbuf) {
	2718	/*
	2719	no normalization is to be done hence only one character will be
	2720	appended to the buffer.
	2721	*/
	2722	insertBufferFront(data, ch);
	2723	data->fcdPosition --;
	2724	}
	2725
	2726	return ch;
	2727	}
	2728
	2729	/* This function handles the special CEs like contractions, expansions, surrogates, Thai */
	2730	/* It is called by getNextCE */
	2731
	2732	/* The following should be even */
	2733	#define UCOL_MAX_DIGITS_FOR_NUMBER 254
	2734
	2735	uint32_t ucol_prv_getSpecialCE(const UCollator coll, UChar ch, uint32_t CE, collIterate source, UErrorCode *status) {
	2736	collIterateState entryState;
	2737	backupState(source, &entryState);
	2738	UChar32 cp = ch;
	2739
	2740	for (;;) {
	2741	// This loop will repeat only in the case of contractions, and only when a contraction
	2742	// is found and the first CE resulting from that contraction is itself a special
	2743	// (an expansion, for example.) All other special CE types are fully handled the
	2744	// first time through, and the loop exits.
	2745
	2746	const uint32_t *CEOffset = NULL;
	2747	switch(getCETag(CE)) {
	2748	case NOT_FOUND_TAG:
	2749	/* This one is not found, and we'll let somebody else bother about it... no more games */
	2750	return CE;
	2751	case SPEC_PROC_TAG:
	2752	{
	2753	// Special processing is getting a CE that is preceded by a certain prefix
	2754	// Currently this is only needed for optimizing Japanese length and iteration marks.
	2755	// When we encouter a special processing tag, we go backwards and try to see if
	2756	// we have a match.
	2757	// Contraction tables are used - so the whole process is not unlike contraction.
	2758	// prefix data is stored backwards in the table.
	2759	const UChar *UCharOffset;
	2760	UChar schar, tchar;
	2761	collIterateState prefixState;
	2762	backupState(source, &prefixState);
	2763	loadState(source, &entryState, TRUE);
	2764	goBackOne(source); // We want to look at the point where we entered - actually one
	2765	// before that...
	2766
	2767	for(;;) {
	2768	// This loop will run once per source string character, for as long as we
	2769	// are matching a potential contraction sequence
	2770
	2771	// First we position ourselves at the begining of contraction sequence
	2772	const UChar ContractionStart = UCharOffset = (UChar )coll->image+getContractOffset(CE);
	2773	if (collIter_bos(source)) {
	2774	CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
	2775	break;
	2776	}
	2777	schar = getPrevNormalizedChar(source, status);
	2778	goBackOne(source);
	2779
	2780	while(schar > (tchar = UCharOffset)) { / since the contraction codepoints should be ordered, we skip all that are smaller */
	2781	UCharOffset++;
	2782	}
	2783
	2784	if (schar == tchar) {
	2785	// Found the source string char in the table.
	2786	// Pick up the corresponding CE from the table.
	2787	CE = *(coll->contractionCEs +
	2788	(UCharOffset - coll->contractionIndex));
	2789	}
	2790	else
	2791	{
	2792	// Source string char was not in the table.
	2793	// We have not found the prefix.
	2794	CE = *(coll->contractionCEs +
	2795	(ContractionStart - coll->contractionIndex));
	2796	}
	2797
	2798	if(!isPrefix(CE)) {
	2799	// The source string char was in the contraction table, and the corresponding
	2800	// CE is not a prefix CE. We found the prefix, break
	2801	// out of loop, this CE will end up being returned. This is the normal
	2802	// way out of prefix handling when the source actually contained
	2803	// the prefix.
	2804	break;
	2805	}
	2806	}
	2807	if(CE != UCOL_NOT_FOUND) { // we found something and we can merilly continue
	2808	loadState(source, &prefixState, TRUE);
	2809	if(source->origFlags & UCOL_USE_ITERATOR) {
	2810	source->flags = source->origFlags;
	2811	}
	2812	} else { // prefix search was a failure, we have to backup all the way to the start
	2813	loadState(source, &entryState, TRUE);
	2814	}
	2815	break;
	2816	}
	2817	case CONTRACTION_TAG:
	2818	{
	2819	/* This should handle contractions */
	2820	collIterateState state;
	2821	backupState(source, &state);
	2822	uint32_t firstCE = (coll->contractionCEs + ((UChar )coll->image+getContractOffset(CE) - coll->contractionIndex)); //UCOL_NOT_FOUND;
	2823	const UChar *UCharOffset;
	2824	UChar schar, tchar;
	2825
	2826	for (;;) {
	2827	/* This loop will run once per source string character, for as long as we */
	2828	/* are matching a potential contraction sequence */
	2829
	2830	/* First we position ourselves at the begining of contraction sequence */
	2831	const UChar ContractionStart = UCharOffset = (UChar )coll->image+getContractOffset(CE);
	2832
	2833	if (collIter_eos(source)) {
	2834	// Ran off the end of the source string.
	2835	CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
	2836	// So we'll pick whatever we have at the point...
	2837	if (CE == UCOL_NOT_FOUND) {
	2838	// back up the source over all the chars we scanned going into this contraction.
	2839	CE = firstCE;
	2840	loadState(source, &state, TRUE);
	2841	if(source->origFlags & UCOL_USE_ITERATOR) {
	2842	source->flags = source->origFlags;
	2843	}
	2844	}
	2845	break;
	2846	}
	2847
	2848	uint8_t maxCC = (uint8_t)((UCharOffset)&0xFF); /get the discontiguos stuff / / skip the backward offset, see above */
	2849	uint8_t allSame = (uint8_t)(*(UCharOffset++)>>8);
	2850
	2851	schar = getNextNormalizedChar(source);
	2852	while(schar > (tchar = UCharOffset)) { / since the contraction codepoints should be ordered, we skip all that are smaller */
	2853	UCharOffset++;
	2854	}
	2855
	2856	if (schar == tchar) {
	2857	// Found the source string char in the contraction table.
	2858	// Pick up the corresponding CE from the table.
	2859	CE = *(coll->contractionCEs +
	2860	(UCharOffset - coll->contractionIndex));
	2861	}
	2862	else
	2863	{
	2864	// Source string char was not in contraction table.
	2865	// Unless we have a discontiguous contraction, we have finished
	2866	// with this contraction.
	2867	// in order to do the proper detection, we
	2868	// need to see if we're dealing with a supplementary
	2869	/* We test whether the next two char are surrogate pairs.
	2870	* This test is done if the iterator is not NULL.
	2871	* If there is no surrogate pair, the iterator
	2872	* goes back one if needed. */
	2873	UChar32 miss = schar;
	2874	if (source->iterator) {
	2875	UChar32 surrNextChar; /* the next char in the iteration to test */
	2876	int32_t prevPos; /* holds the previous position before move forward of the source iterator */
	2877	if(U16_IS_LEAD(schar) && source->iterator->hasNext(source->iterator)) {
	2878	prevPos = source->iterator->index;
	2879	surrNextChar = getNextNormalizedChar(source);
	2880	if (U16_IS_TRAIL(surrNextChar)) {
	2881	miss = U16_GET_SUPPLEMENTARY(schar, surrNextChar);
	2882	} else if (prevPos < source->iterator->index){
	2883	goBackOne(source);
	2884	}
	2885	}
	2886	} else if (U16_IS_LEAD(schar)) {
	2887	miss = U16_GET_SUPPLEMENTARY(schar, getNextNormalizedChar(source));
	2888	}
	2889
	2890	uint8_t sCC;
	2891	if (miss < 0x300 \|\|
	2892	maxCC == 0 \|\|
	2893	(sCC = i_getCombiningClass(miss, coll)) == 0 \|\|
	2894	sCC>maxCC \|\|
	2895	(allSame != 0 && sCC == maxCC) \|\|
	2896	collIter_eos(source))
	2897	{
	2898	// Contraction can not be discontiguous.
	2899	goBackOne(source); // back up the source string by one,
	2900	// because the character we just looked at was
	2901	// not part of the contraction. */
	2902	if(U_IS_SUPPLEMENTARY(miss)) {
	2903	goBackOne(source);
	2904	}
	2905	CE = *(coll->contractionCEs +
	2906	(ContractionStart - coll->contractionIndex));
	2907	} else {
	2908	//
	2909	// Contraction is possibly discontiguous.
	2910	// Scan more of source string looking for a match
	2911	//
	2912	UChar tempchar;
	2913	/* find the next character if schar is not a base character
	2914	and we are not yet at the end of the string */
	2915	tempchar = getNextNormalizedChar(source);
	2916	// probably need another supplementary thingie here
	2917	goBackOne(source);
	2918	if (i_getCombiningClass(tempchar, coll) == 0) {
	2919	goBackOne(source);
	2920	if(U_IS_SUPPLEMENTARY(miss)) {
	2921	goBackOne(source);
	2922	}
	2923	/* Spit out the last char of the string, wasn't tasty enough */
	2924	CE = *(coll->contractionCEs +
	2925	(ContractionStart - coll->contractionIndex));
	2926	} else {
	2927	CE = getDiscontiguous(coll, source, ContractionStart);
	2928	}
	2929	}
	2930	} // else after if(schar == tchar)
	2931
	2932	if(CE == UCOL_NOT_FOUND) {
	2933	/* The Source string did not match the contraction that we were checking. */
	2934	/* Back up the source position to undo the effects of having partially */
	2935	/* scanned through what ultimately proved to not be a contraction. */
	2936	loadState(source, &state, TRUE);
	2937	CE = firstCE;
	2938	break;
	2939	}
	2940
	2941	if(!isContraction(CE)) {
	2942	// The source string char was in the contraction table, and the corresponding
	2943	// CE is not a contraction CE. We completed the contraction, break
	2944	// out of loop, this CE will end up being returned. This is the normal
	2945	// way out of contraction handling when the source actually contained
	2946	// the contraction.
	2947	break;
	2948	}
	2949
	2950
	2951	// The source string char was in the contraction table, and the corresponding
	2952	// CE is IS a contraction CE. We will continue looping to check the source
	2953	// string for the remaining chars in the contraction.
	2954	uint32_t tempCE = *(coll->contractionCEs + (ContractionStart - coll->contractionIndex));
	2955	if(tempCE != UCOL_NOT_FOUND) {
	2956	// We have scanned a a section of source string for which there is a
	2957	// CE from the contraction table. Remember the CE and scan position, so
	2958	// that we can return to this point if further scanning fails to
	2959	// match a longer contraction sequence.
	2960	firstCE = tempCE;
	2961
	2962	goBackOne(source);
	2963	backupState(source, &state);
	2964	getNextNormalizedChar(source);
	2965
	2966	// Another way to do this is:
	2967	//collIterateState tempState;
	2968	//backupState(source, &tempState);
	2969	//goBackOne(source);
	2970	//backupState(source, &state);
	2971	//loadState(source, &tempState, TRUE);
	2972
	2973	// The problem is that for incomplete contractions we have to remember the previous
	2974	// position. Before, the only thing I needed to do was state.pos--;
	2975	// After iterator introduction and especially after introduction of normalizing
	2976	// iterators, it became much more difficult to decrease the saved state.
	2977	// I'm not yet sure which of the two methods above is faster.
	2978	}
	2979	} // for(;;)
	2980	break;
	2981	} // case CONTRACTION_TAG:
	2982	case LONG_PRIMARY_TAG:
	2983	{
	2984	*(source->CEpos++) = ((CE & 0xFF)<<24)\|UCOL_CONTINUATION_MARKER;
	2985	CE = ((CE & 0xFFFF00) << 8) \| (UCOL_BYTE_COMMON << 8) \| UCOL_BYTE_COMMON;
	2986	source->offsetRepeatCount += 1;
	2987	return CE;
	2988	}
	2989	case EXPANSION_TAG:
	2990	{
	2991	/* This should handle expansion. */
	2992	/* NOTE: we can encounter both continuations and expansions in an expansion! */
	2993	/* I have to decide where continuations are going to be dealt with */
	2994	uint32_t size;
	2995	uint32_t i; /* general counter */
	2996
	2997	CEOffset = (uint32_t )coll->image+getExpansionOffset(CE); / find the offset to expansion table */
	2998	size = getExpansionCount(CE);
	2999	CE = *CEOffset++;
	3000	//source->offsetRepeatCount = -1;
	3001
	3002	if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */
	3003	for(i = 1; i<size; i++) {
	3004	(source->CEpos++) = CEOffset++;
	3005	source->offsetRepeatCount += 1;
	3006	}
	3007	} else { /* else, we do */
	3008	while(*CEOffset != 0) {
	3009	(source->CEpos++) = CEOffset++;
	3010	source->offsetRepeatCount += 1;
	3011	}
	3012	}
	3013
	3014	return CE;
	3015	}
	3016	case DIGIT_TAG:
	3017	{
	3018	/*
	3019	We do a check to see if we want to collate digits as numbers; if so we generate
	3020	a custom collation key. Otherwise we pull out the value stored in the expansion table.
	3021	*/
	3022	//uint32_t size;
	3023	uint32_t i; /* general counter */
	3024
	3025	if (source->coll->numericCollation == UCOL_ON){
	3026	collIterateState digitState = {0,0,0,0,0,0,0,0,0};
	3027	UChar32 char32 = 0;
	3028	int32_t digVal = 0;
	3029
	3030	uint32_t digIndx = 0;
	3031	uint32_t endIndex = 0;
	3032	uint32_t trailingZeroIndex = 0;
	3033
	3034	uint8_t collateVal = 0;
	3035
	3036	UBool nonZeroValReached = FALSE;
	3037
	3038	uint8_t numTempBuf[UCOL_MAX_DIGITS_FOR_NUMBER/2 + 3]; // I just need a temporary place to store my generated CEs.
	3039	/*
	3040	We parse the source string until we hit a char that's NOT a digit.
	3041	Use this u_charDigitValue. This might be slow because we have to
	3042	handle surrogates...
	3043	*/
	3044	/*
	3045	if (U16_IS_LEAD(ch)){
	3046	if (!collIter_eos(source)) {
	3047	backupState(source, &digitState);
	3048	UChar trail = getNextNormalizedChar(source);
	3049	if(U16_IS_TRAIL(trail)) {
	3050	char32 = U16_GET_SUPPLEMENTARY(ch, trail);
	3051	} else {
	3052	loadState(source, &digitState, TRUE);
	3053	char32 = ch;
	3054	}
	3055	} else {
	3056	char32 = ch;
	3057	}
	3058	} else {
	3059	char32 = ch;
	3060	}
	3061	digVal = u_charDigitValue(char32);
	3062	*/
	3063	digVal = u_charDigitValue(cp); // if we have arrived here, we have
	3064	// already processed possible supplementaries that trigered the digit tag -
	3065	// all supplementaries are marked in the UCA.
	3066	/*
	3067	We pad a zero in front of the first element anyways. This takes
	3068	care of the (probably) most common case where people are sorting things followed
	3069	by a single digit
	3070	*/
	3071	digIndx++;
	3072	for(;;){
	3073	// Make sure we have enough space. No longer needed;
	3074	// at this point digIndx now has a max value of UCOL_MAX_DIGITS_FOR_NUMBER
	3075	// (it has been pre-incremented) so we just ensure that numTempBuf is big enough
	3076	// (UCOL_MAX_DIGITS_FOR_NUMBER/2 + 3).
	3077
	3078	// Skipping over leading zeroes.
	3079	if (digVal != 0) {
	3080	nonZeroValReached = TRUE;
	3081	}
	3082	if (nonZeroValReached) {
	3083	/*
	3084	We parse the digit string into base 100 numbers (this fits into a byte).
	3085	We only add to the buffer in twos, thus if we are parsing an odd character,
	3086	that serves as the 'tens' digit while the if we are parsing an even one, that
	3087	is the 'ones' digit. We dumped the parsed base 100 value (collateVal) into
	3088	a buffer. We multiply each collateVal by 2 (to give us room) and add 5 (to avoid
	3089	overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less
	3090	than all the other bytes.
	3091	*/
	3092
	3093	if (digIndx % 2 == 1){
	3094	collateVal += (uint8_t)digVal;
	3095
	3096	// We don't enter the low-order-digit case unless we've already seen
	3097	// the high order, or for the first digit, which is always non-zero.
	3098	if (collateVal != 0)
	3099	trailingZeroIndex = 0;
	3100
	3101	numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
	3102	collateVal = 0;
	3103	}
	3104	else{
	3105	// We drop the collation value into the buffer so if we need to do
	3106	// a "front patch" we don't have to check to see if we're hitting the
	3107	// last element.
	3108	collateVal = (uint8_t)(digVal * 10);
	3109
	3110	// Check for trailing zeroes.
	3111	if (collateVal == 0)
	3112	{
	3113	if (!trailingZeroIndex)
	3114	trailingZeroIndex = (digIndx/2) + 2;
	3115	}
	3116	else
	3117	trailingZeroIndex = 0;
	3118
	3119	numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
	3120	}
	3121	digIndx++;
	3122	}
	3123
	3124	// Get next character.
	3125	if (!collIter_eos(source)){
	3126	ch = getNextNormalizedChar(source);
	3127	if (U16_IS_LEAD(ch)){
	3128	if (!collIter_eos(source)) {
	3129	backupState(source, &digitState);
	3130	UChar trail = getNextNormalizedChar(source);
	3131	if(U16_IS_TRAIL(trail)) {
	3132	char32 = U16_GET_SUPPLEMENTARY(ch, trail);
	3133	} else {
	3134	loadState(source, &digitState, TRUE);
	3135	char32 = ch;
	3136	}
	3137	}
	3138	} else {
	3139	char32 = ch;
	3140	}
	3141
	3142	if ((digVal = u_charDigitValue(char32)) == -1 \|\| digIndx > UCOL_MAX_DIGITS_FOR_NUMBER){
	3143	// Resetting position to point to the next unprocessed char. We
	3144	// overshot it when doing our test/set for numbers.
	3145	if (char32 > 0xFFFF) { // For surrogates.
	3146	loadState(source, &digitState, TRUE);
	3147	//goBackOne(source);
	3148	}
	3149	goBackOne(source);
	3150	break;
	3151	}
	3152	} else {
	3153	break;
	3154	}
	3155	}
	3156
	3157	if (nonZeroValReached == FALSE){
	3158	digIndx = 2;
	3159	numTempBuf[2] = 6;
	3160	}
	3161
	3162	endIndex = trailingZeroIndex ? trailingZeroIndex : ((digIndx/2) + 2) ;
	3163	if (digIndx % 2 != 0){
	3164	/*
	3165	We missed a value. Since digIndx isn't even, stuck too many values into the buffer (this is what
	3166	we get for padding the first byte with a zero). "Front-patch" now by pushing all nybbles forward.
	3167	Doing it this way ensures that at least 50% of the time (statistically speaking) we'll only be doing a
	3168	single pass and optimizes for strings with single digits. I'm just assuming that's the more common case.
	3169	*/
	3170
	3171	for(i = 2; i < endIndex; i++){
	3172	numTempBuf[i] = (((((numTempBuf[i] - 6)/2) % 10) * 10) +
	3173	(((numTempBuf[i+1])-6)/2) / 10) * 2 + 6;
	3174	}
	3175	--digIndx;
	3176	}
	3177
	3178	// Subtract one off of the last byte.
	3179	numTempBuf[endIndex-1] -= 1;
	3180
	3181	/*
	3182	We want to skip over the first two slots in the buffer. The first slot
	3183	is reserved for the header byte UCOL_CODAN_PLACEHOLDER. The second slot is for the
	3184	sign/exponent byte: 0x80 + (decimalPos/2) & 7f.
	3185	*/
	3186	numTempBuf[0] = UCOL_CODAN_PLACEHOLDER;
	3187	numTempBuf[1] = (uint8_t)(0x80 + ((digIndx/2) & 0x7F));
	3188
	3189	// Now transfer the collation key to our collIterate struct.
	3190	// The total size for our collation key is endIndx bumped up to the next largest even value divided by two.
	3191	//size = ((endIndex+1) & ~1)/2;
	3192	CE = (((numTempBuf[0] << 8) \| numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) \| //Primary weight
	3193	(UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) \| // Secondary weight
	3194	UCOL_BYTE_COMMON; // Tertiary weight.
	3195	i = 2; // Reset the index into the buffer.
	3196	while(i < endIndex)
	3197	{
	3198	uint32_t primWeight = numTempBuf[i++] << 8;
	3199	if ( i < endIndex)
	3200	primWeight \|= numTempBuf[i++];
	3201	*(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) \| UCOL_CONTINUATION_MARKER;
	3202	}
	3203
	3204	} else {
	3205	// no numeric mode, we'll just switch to whatever we stashed and continue
	3206	CEOffset = (uint32_t )coll->image+getExpansionOffset(CE); / find the offset to expansion table */
	3207	CE = *CEOffset++;
	3208	break;
	3209	}
	3210	return CE;
	3211	}
	3212	/* various implicits optimization */
	3213	case IMPLICIT_TAG: /* everything that is not defined otherwise */
	3214	/* UCA is filled with these. Tailorings are NOT_FOUND */
	3215	return getImplicit(cp, source);
	3216	case CJK_IMPLICIT_TAG: /* 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D*/
	3217	// TODO: remove CJK_IMPLICIT_TAG completely - handled by the getImplicit
	3218	return getImplicit(cp, source);
	3219	case HANGUL_SYLLABLE_TAG: /* AC00-D7AF*/
	3220	{
	3221	static const uint32_t
	3222	SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7;
	3223	//const uint32_t LCount = 19;
	3224	static const uint32_t VCount = 21;
	3225	static const uint32_t TCount = 28;
	3226	//const uint32_t NCount = VCount * TCount; // 588
	3227	//const uint32_t SCount = LCount * NCount; // 11172
	3228	uint32_t L = ch - SBase;
	3229
	3230	// divide into pieces
	3231
	3232	uint32_t T = L % TCount; // we do it in this order since some compilers can do % and / in one operation
	3233	L /= TCount;
	3234	uint32_t V = L % VCount;
	3235	L /= VCount;
	3236
	3237	// offset them
	3238
	3239	L += LBase;
	3240	V += VBase;
	3241	T += TBase;
	3242
	3243	// return the first CE, but first put the rest into the expansion buffer
	3244	if (!source->coll->image->jamoSpecial) { // FAST PATH
	3245
	3246	*(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, V);
	3247	if (T != TBase) {
	3248	*(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, T);
	3249	}
	3250
	3251	return UTRIE_GET32_FROM_LEAD(&coll->mapping, L);
	3252
	3253	} else { // Jamo is Special
	3254	// Since Hanguls pass the FCD check, it is
	3255	// guaranteed that we won't be in
	3256	// the normalization buffer if something like this happens
	3257
	3258	// However, if we are using a uchar iterator and normalization
	3259	// is ON, the Hangul that lead us here is going to be in that
	3260	// normalization buffer. Here we want to restore the uchar
	3261	// iterator state and pull out of the normalization buffer
	3262	if(source->iterator != NULL && source->flags & UCOL_ITER_INNORMBUF) {
	3263	source->flags = source->origFlags; // restore the iterator
	3264	source->pos = NULL;
	3265	}
	3266
	3267	// Move Jamos into normalization buffer
	3268	UChar *buffer = source->writableBuffer.getBuffer(4);
	3269	int32_t bufferLength;
	3270	buffer[0] = (UChar)L;
	3271	buffer[1] = (UChar)V;
	3272	if (T != TBase) {
	3273	buffer[2] = (UChar)T;
	3274	bufferLength = 3;
	3275	} else {
	3276	bufferLength = 2;
	3277	}
	3278	source->writableBuffer.releaseBuffer(bufferLength);
	3279
	3280	// Indicate where to continue in main input string after exhausting the writableBuffer
	3281	source->fcdPosition = source->pos;
	3282
	3283	source->pos = source->writableBuffer.getTerminatedBuffer();
	3284	source->origFlags = source->flags;
	3285	source->flags \|= UCOL_ITER_INNORMBUF;
	3286	source->flags &= ~(UCOL_ITER_NORM \| UCOL_ITER_HASLEN);
	3287
	3288	return(UCOL_IGNORABLE);
	3289	}
	3290	}
	3291	case SURROGATE_TAG:
	3292	/* we encountered a leading surrogate. We shall get the CE by using the following code unit */
	3293	/* two things can happen here: next code point can be a trailing surrogate - we will use it */
	3294	/* to retrieve the CE, or it is not a trailing surrogate (or the string is done). In that case */
	3295	/* we treat it like an unassigned code point. */
	3296	{
	3297	UChar trail;
	3298	collIterateState state;
	3299	backupState(source, &state);
	3300	if (collIter_eos(source) \|\| !(U16_IS_TRAIL((trail = getNextNormalizedChar(source))))) {
	3301	// we chould have stepped one char forward and it might have turned that it
	3302	// was not a trail surrogate. In that case, we have to backup.
	3303	loadState(source, &state, TRUE);
	3304	return UCOL_NOT_FOUND;
	3305	} else {
	3306	/* TODO: CE contain the data from the previous CE + the mask. It should at least be unmasked */
	3307	CE = UTRIE_GET32_FROM_OFFSET_TRAIL(&coll->mapping, CE&0xFFFFFF, trail);
	3308	if(CE == UCOL_NOT_FOUND) { // there are tailored surrogates in this block, but not this one.
	3309	// We need to backup
	3310	loadState(source, &state, TRUE);
	3311	return CE;
	3312	}
	3313	// calculate the supplementary code point value, if surrogate was not tailored
	3314	cp = ((((uint32_t)ch)<<10UL)+(trail)-(((uint32_t)0xd800<<10UL)+0xdc00-0x10000));
	3315	}
	3316	}
	3317	break;
	3318	case LEAD_SURROGATE_TAG: /* D800-DBFF*/
	3319	UChar nextChar;
	3320	if( source->flags & UCOL_USE_ITERATOR) {
	3321	if(U_IS_TRAIL(nextChar = (UChar)source->iterator->current(source->iterator))) {
	3322	cp = U16_GET_SUPPLEMENTARY(ch, nextChar);
	3323	source->iterator->next(source->iterator);
	3324	return getImplicit(cp, source);
	3325	}
	3326	} else if((((source->flags & UCOL_ITER_HASLEN) == 0 ) \|\| (source->pos<source->endp)) &&
	3327	U_IS_TRAIL((nextChar=*source->pos))) {
	3328	cp = U16_GET_SUPPLEMENTARY(ch, nextChar);
	3329	source->pos++;
	3330	return getImplicit(cp, source);
	3331	}
	3332	return UCOL_NOT_FOUND;
	3333	case TRAIL_SURROGATE_TAG: /* DC00-DFFF*/
	3334	return UCOL_NOT_FOUND; /* broken surrogate sequence */
	3335	case CHARSET_TAG:
	3336	/* not yet implemented */
	3337	/* probably after 1.8 */
	3338	return UCOL_NOT_FOUND;
	3339	default:
	3340	*status = U_INTERNAL_PROGRAM_ERROR;
	3341	CE=0;
	3342	break;
	3343	}
	3344	if (CE <= UCOL_NOT_FOUND) break;
	3345	}
	3346	return CE;
	3347	}
	3348
	3349
	3350	/* now uses Mark's getImplicitPrimary code */
	3351	static
	3352	inline uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource) {
	3353	uint32_t r = uprv_uca_getImplicitPrimary(cp);
	3354
	3355	*(collationSource->CEpos++) = (r & UCOL_PRIMARYMASK) \| 0x00000505;
	3356	collationSource->toReturn = collationSource->CEpos;
	3357
	3358	// ** doesn't work if using iterator **
	3359	if (collationSource->flags & UCOL_ITER_INNORMBUF) {
	3360	collationSource->offsetRepeatCount = 1;
	3361	} else {
	3362	int32_t firstOffset = (int32_t)(collationSource->pos - collationSource->string);
	3363
	3364	UErrorCode errorCode = U_ZERO_ERROR;
	3365	collationSource->appendOffset(firstOffset, errorCode);
	3366	collationSource->appendOffset(firstOffset + 1, errorCode);
	3367
	3368	collationSource->offsetReturn = collationSource->offsetStore - 1;
	3369	*(collationSource->offsetBuffer) = firstOffset;
	3370	if (collationSource->offsetReturn == collationSource->offsetBuffer) {
	3371	collationSource->offsetStore = collationSource->offsetBuffer;
	3372	}
	3373	}
	3374
	3375	return ((r & 0x0000FFFF)<<16) \| 0x000000C0;
	3376	}
	3377
	3378	/**
	3379	* This function handles the special CEs like contractions, expansions,
	3380	* surrogates, Thai.
	3381	* It is called by both getPrevCE
	3382	*/
	3383	uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
	3384	collIterate *source,
	3385	UErrorCode *status)
	3386	{
	3387	const uint32_t *CEOffset = NULL;
	3388	UChar *UCharOffset = NULL;
	3389	UChar schar;
	3390	const UChar *constart = NULL;
	3391	uint32_t size;
	3392	UChar buffer[UCOL_MAX_BUFFER];
	3393	uint32_t *endCEBuffer;
	3394	UChar *strbuffer;
	3395	int32_t noChars = 0;
	3396	int32_t CECount = 0;
	3397
	3398	for(;;)
	3399	{
	3400	/* the only ces that loops are thai and contractions */
	3401	switch (getCETag(CE))
	3402	{
	3403	case NOT_FOUND_TAG: /* this tag always returns */
	3404	return CE;
	3405
	3406	case SPEC_PROC_TAG:
	3407	{
	3408	// Special processing is getting a CE that is preceded by a certain prefix
	3409	// Currently this is only needed for optimizing Japanese length and iteration marks.
	3410	// When we encouter a special processing tag, we go backwards and try to see if
	3411	// we have a match.
	3412	// Contraction tables are used - so the whole process is not unlike contraction.
	3413	// prefix data is stored backwards in the table.
	3414	const UChar *UCharOffset;
	3415	UChar schar, tchar;
	3416	collIterateState prefixState;
	3417	backupState(source, &prefixState);
	3418	for(;;) {
	3419	// This loop will run once per source string character, for as long as we
	3420	// are matching a potential contraction sequence
	3421
	3422	// First we position ourselves at the begining of contraction sequence
	3423	const UChar ContractionStart = UCharOffset = (UChar )coll->image+getContractOffset(CE);
	3424
	3425	if (collIter_bos(source)) {
	3426	CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
	3427	break;
	3428	}
	3429	schar = getPrevNormalizedChar(source, status);
	3430	goBackOne(source);
	3431
	3432	while(schar > (tchar = UCharOffset)) { / since the contraction codepoints should be ordered, we skip all that are smaller */
	3433	UCharOffset++;
	3434	}
	3435
	3436	if (schar == tchar) {
	3437	// Found the source string char in the table.
	3438	// Pick up the corresponding CE from the table.
	3439	CE = *(coll->contractionCEs +
	3440	(UCharOffset - coll->contractionIndex));
	3441	}
	3442	else
	3443	{
	3444	// if there is a completely ignorable code point in the middle of
	3445	// a prefix, we need to act as if it's not there
	3446	// assumption: 'real' noncharacters (fffe, ffff, fdd0-fdef are set to zero)
	3447	// lone surrogates cannot be set to zero as it would break other processing
	3448	uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, schar);
	3449	// it's easy for BMP code points
	3450	if(isZeroCE == 0) {
	3451	continue;
	3452	} else if(U16_IS_SURROGATE(schar)) {
	3453	// for supplementary code points, we have to check the next one
	3454	// situations where we are going to ignore
	3455	// 1. beginning of the string: schar is a lone surrogate
	3456	// 2. schar is a lone surrogate
	3457	// 3. schar is a trail surrogate in a valid surrogate sequence
	3458	// that is explicitly set to zero.
	3459	if (!collIter_bos(source)) {
	3460	UChar lead;
	3461	if(!U16_IS_SURROGATE_LEAD(schar) && U16_IS_LEAD(lead = getPrevNormalizedChar(source, status))) {
	3462	isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, lead);
	3463	if(isSpecial(isZeroCE) && getCETag(isZeroCE) == SURROGATE_TAG) {
	3464	uint32_t finalCE = UTRIE_GET32_FROM_OFFSET_TRAIL(&coll->mapping, isZeroCE&0xFFFFFF, schar);
	3465	if(finalCE == 0) {
	3466	// this is a real, assigned completely ignorable code point
	3467	goBackOne(source);
	3468	continue;
	3469	}
	3470	}
	3471	} else {
	3472	// lone surrogate, treat like unassigned
	3473	return UCOL_NOT_FOUND;
	3474	}
	3475	} else {
	3476	// lone surrogate at the beggining, treat like unassigned
	3477	return UCOL_NOT_FOUND;
	3478	}
	3479	}
	3480	// Source string char was not in the table.
	3481	// We have not found the prefix.
	3482	CE = *(coll->contractionCEs +
	3483	(ContractionStart - coll->contractionIndex));
	3484	}
	3485
	3486	if(!isPrefix(CE)) {
	3487	// The source string char was in the contraction table, and the corresponding
	3488	// CE is not a prefix CE. We found the prefix, break
	3489	// out of loop, this CE will end up being returned. This is the normal
	3490	// way out of prefix handling when the source actually contained
	3491	// the prefix.
	3492	break;
	3493	}
	3494	}
	3495	loadState(source, &prefixState, TRUE);
	3496	break;
	3497	}
	3498
	3499	case CONTRACTION_TAG: {
	3500	/* to ensure that the backwards and forwards iteration matches, we
	3501	take the current region of most possible match and pass it through
	3502	the forward iteration. this will ensure that the obstinate problem of
	3503	overlapping contractions will not occur.
	3504	*/
	3505	schar = peekCodeUnit(source, 0);
	3506	constart = (UChar *)coll->image + getContractOffset(CE);
	3507	if (isAtStartPrevIterate(source)
	3508	/* commented away contraction end checks after adding the checks
	3509	in getPrevCE */) {
	3510	/* start of string or this is not the end of any contraction */
	3511	CE = *(coll->contractionCEs +
	3512	(constart - coll->contractionIndex));
	3513	break;
	3514	}
	3515	strbuffer = buffer;
	3516	UCharOffset = strbuffer + (UCOL_MAX_BUFFER - 1);
	3517	*(UCharOffset --) = 0;
	3518	noChars = 0;
	3519	// have to swap thai characters
	3520	while (ucol_unsafeCP(schar, coll)) {
	3521	*(UCharOffset) = schar;
	3522	noChars++;
	3523	UCharOffset --;
	3524	schar = getPrevNormalizedChar(source, status);
	3525	goBackOne(source);
	3526	// TODO: when we exhaust the contraction buffer,
	3527	// it needs to get reallocated. The problem is
	3528	// that the size depends on the string which is
	3529	// not iterated over. However, since we're travelling
	3530	// backwards, we already had to set the iterator at
	3531	// the end - so we might as well know where we are?
	3532	if (UCharOffset + 1 == buffer) {
	3533	/* we have exhausted the buffer */
	3534	int32_t newsize = 0;
	3535	if(source->pos) { // actually dealing with a position
	3536	newsize = (int32_t)(source->pos - source->string + 1);
	3537	} else { // iterator
	3538	newsize = 4 * UCOL_MAX_BUFFER;
	3539	}
	3540	strbuffer = (UChar )uprv_malloc(sizeof(UChar)
	3541	(newsize + UCOL_MAX_BUFFER));
	3542	/* test for NULL */
	3543	if (strbuffer == NULL) {
	3544	*status = U_MEMORY_ALLOCATION_ERROR;
	3545	return UCOL_NO_MORE_CES;
	3546	}
	3547	UCharOffset = strbuffer + newsize;
	3548	uprv_memcpy(UCharOffset, buffer,
	3549	UCOL_MAX_BUFFER * sizeof(UChar));
	3550	UCharOffset --;
	3551	}
	3552	if ((source->pos && (source->pos == source->string \|\|
	3553	((source->flags & UCOL_ITER_INNORMBUF) &&
	3554	*(source->pos - 1) == 0 && source->fcdPosition == NULL)))
	3555	\|\| (source->iterator && !source->iterator->hasPrevious(source->iterator))) {
	3556	break;
	3557	}
	3558	}
	3559	/* adds the initial base character to the string */
	3560	*(UCharOffset) = schar;
	3561	noChars++;
	3562
	3563	int32_t offsetBias;
	3564
	3565	// ** doesn't work if using iterator **
	3566	if (source->flags & UCOL_ITER_INNORMBUF) {
	3567	offsetBias = -1;
	3568	} else {
	3569	offsetBias = (int32_t)(source->pos - source->string);
	3570	}
	3571
	3572	/* a new collIterate is used to simplify things, since using the current
	3573	collIterate will mean that the forward and backwards iteration will
	3574	share and change the same buffers. we don't want to get into that. */
	3575	collIterate temp;
	3576	int32_t rawOffset;
	3577
	3578	IInit_collIterate(coll, UCharOffset, noChars, &temp, status);
	3579	if(U_FAILURE(*status)) {
	3580	return UCOL_NULLORDER;
	3581	}
	3582	temp.flags &= ~UCOL_ITER_NORM;
	3583	temp.flags \|= source->flags & UCOL_FORCE_HAN_IMPLICIT;
	3584
	3585	rawOffset = (int32_t)(temp.pos - temp.string); // should always be zero?
	3586	CE = ucol_IGetNextCE(coll, &temp, status);
	3587
	3588	if (source->extendCEs) {
	3589	endCEBuffer = source->extendCEs + source->extendCEsSize;
	3590	CECount = (int32_t)((source->CEpos - source->extendCEs)/sizeof(uint32_t));
	3591	} else {
	3592	endCEBuffer = source->CEs + UCOL_EXPAND_CE_BUFFER_SIZE;
	3593	CECount = (int32_t)((source->CEpos - source->CEs)/sizeof(uint32_t));
	3594	}
	3595
	3596	while (CE != UCOL_NO_MORE_CES) {
	3597	*(source->CEpos ++) = CE;
	3598
	3599	if (offsetBias >= 0) {
	3600	source->appendOffset(rawOffset + offsetBias, *status);
	3601	}
	3602
	3603	CECount++;
	3604	if (source->CEpos == endCEBuffer) {
	3605	/* ran out of CE space, reallocate to new buffer.
	3606	If reallocation fails, reset pointers and bail out,
	3607	there's no guarantee of the right character position after
	3608	this bail*/
	3609	if (!increaseCEsCapacity(source)) {
	3610	*status = U_MEMORY_ALLOCATION_ERROR;
	3611	break;
	3612	}
	3613
	3614	endCEBuffer = source->extendCEs + source->extendCEsSize;
	3615	}
	3616
	3617	if ((temp.flags & UCOL_ITER_INNORMBUF) != 0) {
	3618	rawOffset = (int32_t)(temp.fcdPosition - temp.string);
	3619	} else {
	3620	rawOffset = (int32_t)(temp.pos - temp.string);
	3621	}
	3622
	3623	CE = ucol_IGetNextCE(coll, &temp, status);
	3624	}
	3625
	3626	if (strbuffer != buffer) {
	3627	uprv_free(strbuffer);
	3628	}
	3629	if (U_FAILURE(*status)) {
	3630	return (uint32_t)UCOL_NULLORDER;
	3631	}
	3632
	3633	if (source->offsetRepeatValue != 0) {
	3634	if (CECount > noChars) {
	3635	source->offsetRepeatCount += temp.offsetRepeatCount;
	3636	} else {
	3637	// ** does this really skip the right offsets? **
	3638	source->offsetReturn -= (noChars - CECount);
	3639	}
	3640	}
	3641
	3642	if (offsetBias >= 0) {
	3643	source->offsetReturn = source->offsetStore - 1;
	3644	if (source->offsetReturn == source->offsetBuffer) {
	3645	source->offsetStore = source->offsetBuffer;
	3646	}
	3647	}
	3648
	3649	source->toReturn = source->CEpos - 1;
	3650	if (source->toReturn == source->CEs) {
	3651	source->CEpos = source->CEs;
	3652	}
	3653
	3654	return *(source->toReturn);
	3655	}
	3656	case LONG_PRIMARY_TAG:
	3657	{
	3658	*(source->CEpos++) = ((CE & 0xFFFF00) << 8) \| (UCOL_BYTE_COMMON << 8) \| UCOL_BYTE_COMMON;
	3659	*(source->CEpos++) = ((CE & 0xFF)<<24)\|UCOL_CONTINUATION_MARKER;
	3660	source->toReturn = source->CEpos - 1;
	3661
	3662	if (source->flags & UCOL_ITER_INNORMBUF) {
	3663	source->offsetRepeatCount = 1;
	3664	} else {
	3665	int32_t firstOffset = (int32_t)(source->pos - source->string);
	3666
	3667	source->appendOffset(firstOffset, *status);
	3668	source->appendOffset(firstOffset + 1, *status);
	3669
	3670	source->offsetReturn = source->offsetStore - 1;
	3671	*(source->offsetBuffer) = firstOffset;
	3672	if (source->offsetReturn == source->offsetBuffer) {
	3673	source->offsetStore = source->offsetBuffer;
	3674	}
	3675	}
	3676
	3677
	3678	return *(source->toReturn);
	3679	}
	3680
	3681	case EXPANSION_TAG: /* this tag always returns */
	3682	{
	3683	/*
	3684	This should handle expansion.
	3685	NOTE: we can encounter both continuations and expansions in an expansion!
	3686	I have to decide where continuations are going to be dealt with
	3687	*/
	3688	int32_t firstOffset = (int32_t)(source->pos - source->string);
	3689
	3690	// ** doesn't work if using iterator **
	3691	if (source->offsetReturn != NULL) {
	3692	if (! (source->flags & UCOL_ITER_INNORMBUF) && source->offsetReturn == source->offsetBuffer) {
	3693	source->offsetStore = source->offsetBuffer;
	3694	}else {
	3695	firstOffset = -1;
	3696	}
	3697	}
	3698
	3699	/* find the offset to expansion table */
	3700	CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
	3701	size = getExpansionCount(CE);
	3702	if (size != 0) {
	3703	/*
	3704	if there are less than 16 elements in expansion, we don't terminate
	3705	*/
	3706	uint32_t count;
	3707
	3708	for (count = 0; count < size; count++) {
	3709	(source->CEpos ++) = CEOffset++;
	3710
	3711	if (firstOffset >= 0) {
	3712	source->appendOffset(firstOffset + 1, *status);
	3713	}
	3714	}
	3715	} else {
	3716	/* else, we do */
	3717	while (*CEOffset != 0) {
	3718	(source->CEpos ++) = CEOffset ++;
	3719
	3720	if (firstOffset >= 0) {
	3721	source->appendOffset(firstOffset + 1, *status);
	3722	}
	3723	}
	3724	}
	3725
	3726	if (firstOffset >= 0) {
	3727	source->offsetReturn = source->offsetStore - 1;
	3728	*(source->offsetBuffer) = firstOffset;
	3729	if (source->offsetReturn == source->offsetBuffer) {
	3730	source->offsetStore = source->offsetBuffer;
	3731	}
	3732	} else {
	3733	source->offsetRepeatCount += size - 1;
	3734	}
	3735
	3736	source->toReturn = source->CEpos - 1;
	3737	// in case of one element expansion, we
	3738	// want to immediately return CEpos
	3739	if(source->toReturn == source->CEs) {
	3740	source->CEpos = source->CEs;
	3741	}
	3742
	3743	return *(source->toReturn);
	3744	}
	3745
	3746	case DIGIT_TAG:
	3747	{
	3748	/*
	3749	We do a check to see if we want to collate digits as numbers; if so we generate
	3750	a custom collation key. Otherwise we pull out the value stored in the expansion table.
	3751	*/
	3752	uint32_t i; /* general counter */
	3753
	3754	if (source->coll->numericCollation == UCOL_ON){
	3755	uint32_t digIndx = 0;
	3756	uint32_t endIndex = 0;
	3757	uint32_t leadingZeroIndex = 0;
	3758	uint32_t trailingZeroCount = 0;
	3759
	3760	uint8_t collateVal = 0;
	3761
	3762	UBool nonZeroValReached = FALSE;
	3763
	3764	uint8_t numTempBuf[UCOL_MAX_DIGITS_FOR_NUMBER/2 + 2]; // I just need a temporary place to store my generated CEs.
	3765	/*
	3766	We parse the source string until we hit a char that's NOT a digit.
	3767	Use this u_charDigitValue. This might be slow because we have to
	3768	handle surrogates...
	3769	*/
	3770	/*
	3771	We need to break up the digit string into collection elements of UCOL_MAX_DIGITS_FOR_NUMBER or less,
	3772	with any chunks smaller than that being on the right end of the digit string - i.e. the first collation
	3773	element we process when going backward. To determine how long that chunk might be, we may need to make
	3774	two passes through the loop that collects digits - one to see how long the string is (and how much is
	3775	leading zeros) to determine the length of that right-hand chunk, and a second (if the whole string has
	3776	more than UCOL_MAX_DIGITS_FOR_NUMBER non-leading-zero digits) to actually process that collation
	3777	element chunk after resetting the state to the initialState at the right side of the digit string.
	3778	*/
	3779	uint32_t ceLimit = 0;
	3780	UChar initial_ch = ch;
	3781	collIterateState initialState = {0,0,0,0,0,0,0,0,0};
	3782	backupState(source, &initialState);
	3783
	3784	for(;;) {
	3785	collIterateState state = {0,0,0,0,0,0,0,0,0};
	3786	UChar32 char32 = 0;
	3787	int32_t digVal = 0;
	3788
	3789	if (U16_IS_TRAIL (ch)) {
	3790	if (!collIter_bos(source)){
	3791	UChar lead = getPrevNormalizedChar(source, status);
	3792	if(U16_IS_LEAD(lead)) {
	3793	char32 = U16_GET_SUPPLEMENTARY(lead,ch);
	3794	goBackOne(source);
	3795	} else {
	3796	char32 = ch;
	3797	}
	3798	} else {
	3799	char32 = ch;
	3800	}
	3801	} else {
	3802	char32 = ch;
	3803	}
	3804	digVal = u_charDigitValue(char32);
	3805
	3806	for(;;) {
	3807	// Make sure we have enough space. No longer needed;
	3808	// at this point the largest value of digIndx when we need to save data in numTempBuf
	3809	// is UCOL_MAX_DIGITS_FOR_NUMBER-1 (digIndx is post-incremented) so we just ensure
	3810	// that numTempBuf is big enough (UCOL_MAX_DIGITS_FOR_NUMBER/2 + 2).
	3811
	3812	// Skip over trailing zeroes, and keep a count of them.
	3813	if (digVal != 0)
	3814	nonZeroValReached = TRUE;
	3815
	3816	if (nonZeroValReached) {
	3817	/*
	3818	We parse the digit string into base 100 numbers (this fits into a byte).
	3819	We only add to the buffer in twos, thus if we are parsing an odd character,
	3820	that serves as the 'tens' digit while the if we are parsing an even one, that
	3821	is the 'ones' digit. We dumped the parsed base 100 value (collateVal) into
	3822	a buffer. We multiply each collateVal by 2 (to give us room) and add 5 (to avoid
	3823	overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less
	3824	than all the other bytes.
	3825
	3826	Since we're doing in this reverse we want to put the first digit encountered into the
	3827	ones place and the second digit encountered into the tens place.
	3828	*/
	3829
	3830	if ((digIndx + trailingZeroCount) % 2 == 1) {
	3831	// High-order digit case (tens place)
	3832	collateVal += (uint8_t)(digVal * 10);
	3833
	3834	// We cannot set leadingZeroIndex unless it has been set for the
	3835	// low-order digit. Therefore, all we can do for the high-order
	3836	// digit is turn it off, never on.
	3837	// The only time we will have a high digit without a low is for
	3838	// the very first non-zero digit, so no zero check is necessary.
	3839	if (collateVal != 0)
	3840	leadingZeroIndex = 0;
	3841
	3842	// The first pass through, digIndx may exceed the limit, but in that case
	3843	// we no longer care about numTempBuf contents since they will be discarded
	3844	if ( digIndx < UCOL_MAX_DIGITS_FOR_NUMBER ) {
	3845	numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
	3846	}
	3847	collateVal = 0;
	3848	} else {
	3849	// Low-order digit case (ones place)
	3850	collateVal = (uint8_t)digVal;
	3851
	3852	// Check for leading zeroes.
	3853	if (collateVal == 0) {
	3854	if (!leadingZeroIndex)
	3855	leadingZeroIndex = (digIndx/2) + 2;
	3856	} else
	3857	leadingZeroIndex = 0;
	3858
	3859	// No need to write to buffer; the case of a last odd digit
	3860	// is handled below.
	3861	}
	3862	++digIndx;
	3863	} else
	3864	++trailingZeroCount;
	3865
	3866	if (!collIter_bos(source)) {
	3867	ch = getPrevNormalizedChar(source, status);
	3868	//goBackOne(source);
	3869	if (U16_IS_TRAIL(ch)) {
	3870	backupState(source, &state);
	3871	if (!collIter_bos(source)) {
	3872	goBackOne(source);
	3873	UChar lead = getPrevNormalizedChar(source, status);
	3874
	3875	if(U16_IS_LEAD(lead)) {
	3876	char32 = U16_GET_SUPPLEMENTARY(lead,ch);
	3877	} else {
	3878	loadState(source, &state, FALSE);
	3879	char32 = ch;
	3880	}
	3881	}
	3882	} else
	3883	char32 = ch;
	3884
	3885	if ((digVal = u_charDigitValue(char32)) == -1 \|\| (ceLimit > 0 && (digIndx + trailingZeroCount) >= ceLimit)) {
	3886	if (char32 > 0xFFFF) {// For surrogates.
	3887	loadState(source, &state, FALSE);
	3888	}
	3889	// Don't need to "reverse" the goBackOne call,
	3890	// as this points to the next position to process..
	3891	//if (char32 > 0xFFFF) // For surrogates.
	3892	//getNextNormalizedChar(source);
	3893	break;
	3894	}
	3895
	3896	goBackOne(source);
	3897	}else
	3898	break;
	3899	}
	3900
	3901	if (digIndx + trailingZeroCount <= UCOL_MAX_DIGITS_FOR_NUMBER) {
	3902	// our collation element is not too big, go ahead and finish with it
	3903	break;
	3904	}
	3905	// our digit string is too long for a collation element;
	3906	// set the limit for it, reset the state and begin again
	3907	ceLimit = (digIndx + trailingZeroCount) % UCOL_MAX_DIGITS_FOR_NUMBER;
	3908	if ( ceLimit == 0 ) {
	3909	ceLimit = UCOL_MAX_DIGITS_FOR_NUMBER;
	3910	}
	3911	ch = initial_ch;
	3912	loadState(source, &initialState, FALSE);
	3913	digIndx = endIndex = leadingZeroIndex = trailingZeroCount = 0;
	3914	collateVal = 0;
	3915	nonZeroValReached = FALSE;
	3916	}
	3917
	3918	if (! nonZeroValReached) {
	3919	digIndx = 2;
	3920	trailingZeroCount = 0;
	3921	numTempBuf[2] = 6;
	3922	}
	3923
	3924	if ((digIndx + trailingZeroCount) % 2 != 0) {
	3925	numTempBuf[((digIndx)/2) + 2] = collateVal*2 + 6;
	3926	digIndx += 1; // The implicit leading zero
	3927	}
	3928	if (trailingZeroCount % 2 != 0) {
	3929	// We had to consume one trailing zero for the low digit
	3930	// of the least significant byte
	3931	digIndx += 1; // The trailing zero not in the exponent
	3932	trailingZeroCount -= 1;
	3933	}
	3934
	3935	endIndex = leadingZeroIndex ? leadingZeroIndex : ((digIndx/2) + 2) ;
	3936
	3937	// Subtract one off of the last byte. Really the first byte here, but it's reversed...
	3938	numTempBuf[2] -= 1;
	3939
	3940	/*
	3941	We want to skip over the first two slots in the buffer. The first slot
	3942	is reserved for the header byte UCOL_CODAN_PLACEHOLDER. The second slot is for the
	3943	sign/exponent byte: 0x80 + (decimalPos/2) & 7f.
	3944	The exponent must be adjusted by the number of leading zeroes, and the number of
	3945	trailing zeroes.
	3946	*/
	3947	numTempBuf[0] = UCOL_CODAN_PLACEHOLDER;
	3948	uint32_t exponent = (digIndx+trailingZeroCount)/2;
	3949	if (leadingZeroIndex)
	3950	exponent -= ((digIndx/2) + 2 - leadingZeroIndex);
	3951	numTempBuf[1] = (uint8_t)(0x80 + (exponent & 0x7F));
	3952
	3953	// Now transfer the collation key to our collIterate struct.
	3954	// The total size for our collation key is half of endIndex, rounded up.
	3955	int32_t size = (endIndex+1)/2;
	3956	if(!ensureCEsCapacity(source, size)) {
	3957	return UCOL_NULLORDER;
	3958	}
	3959	*(source->CEpos++) = (((numTempBuf[0] << 8) \| numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) \| //Primary weight
	3960	(UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) \| // Secondary weight
	3961	UCOL_BYTE_COMMON; // Tertiary weight.
	3962	i = endIndex - 1; // Reset the index into the buffer.
	3963	while(i >= 2) {
	3964	uint32_t primWeight = numTempBuf[i--] << 8;
	3965	if ( i >= 2)
	3966	primWeight \|= numTempBuf[i--];
	3967	*(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) \| UCOL_CONTINUATION_MARKER;
	3968	}
	3969
	3970	source->toReturn = source->CEpos -1;
	3971	return *(source->toReturn);
	3972	} else {
	3973	CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
	3974	CE = *(CEOffset++);
	3975	break;
	3976	}
	3977	}
	3978
	3979	case HANGUL_SYLLABLE_TAG: /* AC00-D7AF*/
	3980	{
	3981	static const uint32_t
	3982	SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7;
	3983	//const uint32_t LCount = 19;
	3984	static const uint32_t VCount = 21;
	3985	static const uint32_t TCount = 28;
	3986	//const uint32_t NCount = VCount * TCount; /* 588 */
	3987	//const uint32_t SCount = LCount * NCount; /* 11172 */
	3988
	3989	uint32_t L = ch - SBase;
	3990	/*
	3991	divide into pieces.
	3992	we do it in this order since some compilers can do % and / in one
	3993	operation
	3994	*/
	3995	uint32_t T = L % TCount;
	3996	L /= TCount;
	3997	uint32_t V = L % VCount;
	3998	L /= VCount;
	3999
	4000	/* offset them */
	4001	L += LBase;
	4002	V += VBase;
	4003	T += TBase;
	4004
	4005	int32_t firstOffset = (int32_t)(source->pos - source->string);
	4006	source->appendOffset(firstOffset, *status);
	4007
	4008	/*
	4009	* return the first CE, but first put the rest into the expansion buffer
	4010	*/
	4011	if (!source->coll->image->jamoSpecial) {
	4012	*(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, L);
	4013	*(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, V);
	4014	source->appendOffset(firstOffset + 1, *status);
	4015
	4016	if (T != TBase) {
	4017	*(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, T);
	4018	source->appendOffset(firstOffset + 1, *status);
	4019	}
	4020
	4021	source->toReturn = source->CEpos - 1;
	4022
	4023	source->offsetReturn = source->offsetStore - 1;
	4024	if (source->offsetReturn == source->offsetBuffer) {
	4025	source->offsetStore = source->offsetBuffer;
	4026	}
	4027
	4028	return *(source->toReturn);
	4029	} else {
	4030	// Since Hanguls pass the FCD check, it is
	4031	// guaranteed that we won't be in
	4032	// the normalization buffer if something like this happens
	4033
	4034	// Move Jamos into normalization buffer
	4035	UChar *tempbuffer = source->writableBuffer.getBuffer(5);
	4036	int32_t tempbufferLength, jamoOffset;
	4037	tempbuffer[0] = 0;
	4038	tempbuffer[1] = (UChar)L;
	4039	tempbuffer[2] = (UChar)V;
	4040	if (T != TBase) {
	4041	tempbuffer[3] = (UChar)T;
	4042	tempbufferLength = 4;
	4043	} else {
	4044	tempbufferLength = 3;
	4045	}
	4046	source->writableBuffer.releaseBuffer(tempbufferLength);
	4047
	4048	// Indicate where to continue in main input string after exhausting the writableBuffer
	4049	if (source->pos == source->string) {
	4050	jamoOffset = 0;
	4051	source->fcdPosition = NULL;
	4052	} else {
	4053	jamoOffset = source->pos - source->string;
	4054	source->fcdPosition = source->pos-1;
	4055	}
	4056
	4057	// Append offsets for the additional chars
	4058	// (not the 0, and not the L whose offsets match the original Hangul)
	4059	int32_t jamoRemaining = tempbufferLength - 2;
	4060	jamoOffset++; // appended offsets should match end of original Hangul
	4061	while (jamoRemaining-- > 0) {
	4062	source->appendOffset(jamoOffset, *status);
	4063	}
	4064
	4065	source->offsetRepeatValue = jamoOffset;
	4066
	4067	source->offsetReturn = source->offsetStore - 1;
	4068	if (source->offsetReturn == source->offsetBuffer) {
	4069	source->offsetStore = source->offsetBuffer;
	4070	}
	4071
	4072	source->pos = source->writableBuffer.getTerminatedBuffer() + tempbufferLength;
	4073	source->origFlags = source->flags;
	4074	source->flags \|= UCOL_ITER_INNORMBUF;
	4075	source->flags &= ~(UCOL_ITER_NORM \| UCOL_ITER_HASLEN);
	4076
	4077	return(UCOL_IGNORABLE);
	4078	}
	4079	}
	4080
	4081	case IMPLICIT_TAG: /* everything that is not defined otherwise */
	4082	return getPrevImplicit(ch, source);
	4083
	4084	// TODO: Remove CJK implicits as they are handled by the getImplicitPrimary function
	4085	case CJK_IMPLICIT_TAG: /* 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D*/
	4086	return getPrevImplicit(ch, source);
	4087
	4088	case SURROGATE_TAG: /* This is a surrogate pair */
	4089	/* essentially an engaged lead surrogate. */
	4090	/* if you have encountered it here, it means that a */
	4091	/* broken sequence was encountered and this is an error */
	4092	return UCOL_NOT_FOUND;
	4093
	4094	case LEAD_SURROGATE_TAG: /* D800-DBFF*/
	4095	return UCOL_NOT_FOUND; /* broken surrogate sequence */
	4096
	4097	case TRAIL_SURROGATE_TAG: /* DC00-DFFF*/
	4098	{
	4099	UChar32 cp = 0;
	4100	UChar prevChar;
	4101	const UChar *prev;
	4102	if (isAtStartPrevIterate(source)) {
	4103	/* we are at the start of the string, wrong place to be at */
	4104	return UCOL_NOT_FOUND;
	4105	}
	4106	if (source->pos != source->writableBuffer.getBuffer()) {
	4107	prev = source->pos - 1;
	4108	} else {
	4109	prev = source->fcdPosition;
	4110	}
	4111	prevChar = *prev;
	4112
	4113	/* Handles Han and Supplementary characters here.*/
	4114	if (U16_IS_LEAD(prevChar)) {
	4115	cp = ((((uint32_t)prevChar)<<10UL)+(ch)-(((uint32_t)0xd800<<10UL)+0xdc00-0x10000));
	4116	source->pos = prev;
	4117	} else {
	4118	return UCOL_NOT_FOUND; /* like unassigned */
	4119	}
	4120
	4121	return getPrevImplicit(cp, source);
	4122	}
	4123
	4124	/* UCA is filled with these. Tailorings are NOT_FOUND */
	4125	/* not yet implemented */
	4126	case CHARSET_TAG: /* this tag always returns */
	4127	/* probably after 1.8 */
	4128	return UCOL_NOT_FOUND;
	4129
	4130	default: /* this tag always returns */
	4131	*status = U_INTERNAL_PROGRAM_ERROR;
	4132	CE=0;
	4133	break;
	4134	}
	4135
	4136	if (CE <= UCOL_NOT_FOUND) {
	4137	break;
	4138	}
	4139	}
	4140
	4141	return CE;
	4142	}
	4143
	4144	/* This should really be a macro */
	4145	/* This function is used to reverse parts of a buffer. We need this operation when doing continuation */
	4146	/* secondaries in French */
	4147	/*
	4148	void uprv_ucol_reverse_buffer(uint8_t start, uint8_t end) {
	4149	uint8_t temp;
	4150	while(start<end) {
	4151	temp = *start;
	4152	start++ = end;
	4153	*end-- = temp;
	4154	}
	4155	}
	4156	*/
	4157
	4158	#define uprv_ucol_reverse_buffer(TYPE, start, end) { \
	4159	TYPE tempA; \
	4160	while((start)<(end)) { \
	4161	tempA = *(start); \
	4162	(start)++ = (end); \
	4163	*(end)-- = tempA; \
	4164	} \
	4165	}
	4166
	4167	/****************************************************************************/
	4168	/* Following are the sortkey generation functions */
	4169	/* */
	4170	/****************************************************************************/
	4171
	4172	/**
	4173	* Merge two sort keys.
	4174	* This is useful, for example, to combine sort keys from first and last names
	4175	* to sort such pairs.
	4176	* Merged sort keys consider on each collation level the first part first entirely,
	4177	* then the second one.
	4178	* It is possible to merge multiple sort keys by consecutively merging
	4179	* another one with the intermediate result.
	4180	*
	4181	* The length of the merge result is the sum of the lengths of the input sort keys
	4182	* minus 1.
	4183	*
	4184	* @param src1 the first sort key
	4185	* @param src1Length the length of the first sort key, including the zero byte at the end;
	4186	* can be -1 if the function is to find the length
	4187	* @param src2 the second sort key
	4188	* @param src2Length the length of the second sort key, including the zero byte at the end;
	4189	* can be -1 if the function is to find the length
	4190	* @param dest the buffer where the merged sort key is written,
	4191	* can be NULL if destCapacity==0
	4192	* @param destCapacity the number of bytes in the dest buffer
	4193	* @return the length of the merged sort key, src1Length+src2Length-1;
	4194	* can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments),
	4195	* in which cases the contents of dest is undefined
	4196	*
	4197	* @draft
	4198	*/
	4199	U_CAPI int32_t U_EXPORT2
	4200	ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
	4201	const uint8_t *src2, int32_t src2Length,
	4202	uint8_t *dest, int32_t destCapacity) {
	4203	int32_t destLength;
	4204	uint8_t b;
	4205
	4206	/* check arguments */
	4207	if( src1==NULL \|\| src1Length<-2 \|\| src1Length==0 \|\| (src1Length>0 && src1[src1Length-1]!=0) \|\|
	4208	src2==NULL \|\| src2Length<-2 \|\| src2Length==0 \|\| (src2Length>0 && src2[src2Length-1]!=0) \|\|
	4209	destCapacity<0 \|\| (destCapacity>0 && dest==NULL)
	4210	) {
	4211	/* error, attempt to write a zero byte and return 0 */
	4212	if(dest!=NULL && destCapacity>0) {
	4213	*dest=0;
	4214	}
	4215	return 0;
	4216	}
	4217
	4218	/* check lengths and capacity */
	4219	if(src1Length<0) {
	4220	src1Length=(int32_t)uprv_strlen((const char *)src1)+1;
	4221	}
	4222	if(src2Length<0) {
	4223	src2Length=(int32_t)uprv_strlen((const char *)src2)+1;
	4224	}
	4225
	4226	destLength=src1Length+src2Length-1;
	4227	if(destLength>destCapacity) {
	4228	/* the merged sort key does not fit into the destination */
	4229	return destLength;
	4230	}
	4231
	4232	/* merge the sort keys with the same number of levels */
	4233	while(src1!=0 && src2!=0) { /* while both have another level */
	4234	/* copy level from src1 not including 00 or 01 */
	4235	while((b=*src1)>=2) {
	4236	++src1;
	4237	*dest++=b;
	4238	}
	4239
	4240	/* add a 02 merge separator */
	4241	*dest++=2;
	4242
	4243	/* copy level from src2 not including 00 or 01 */
	4244	while((b=*src2)>=2) {
	4245	++src2;
	4246	*dest++=b;
	4247	}
	4248
	4249	/* if both sort keys have another level, then add a 01 level separator and continue */
	4250	if(src1==1 && src2==1) {
	4251	++src1;
	4252	++src2;
	4253	*dest++=1;
	4254	}
	4255	}
	4256
	4257	/*
	4258	* here, at least one sort key is finished now, but the other one
	4259	* might have some contents left from containing more levels;
	4260	* that contents is just appended to the result
	4261	*/
	4262	if(*src1!=0) {
	4263	/* src1 is not finished, therefore src2==0, and src1 is appended /
	4264	src2=src1;
	4265	}
	4266	/* append src2, "the other, unfinished sort key" */
	4267	uprv_strcpy((char )dest, (const char )src2);
	4268
	4269	/* trust that neither sort key contained illegally embedded zero bytes */
	4270	return destLength;
	4271	}
	4272
	4273	U_NAMESPACE_BEGIN
	4274
	4275	class SortKeyByteSink : public ByteSink {
	4276	public:
	4277	static const uint32_t FILL_ORIGINAL_BUFFER = 1;
	4278	static const uint32_t DONT_GROW = 2;
	4279	SortKeyByteSink(char *dest, int32_t destCapacity, uint32_t flags=0)
	4280	: ownedBuffer_(NULL), buffer_(dest), capacity_(destCapacity),
	4281	appended_(0),
	4282	fill_(flags & FILL_ORIGINAL_BUFFER),
	4283	grow_((flags & DONT_GROW) == 0) {
	4284	if (buffer_ == NULL \|\| capacity_ < 0) {
	4285	buffer_ = reinterpret_cast<char *>(&lastResortByte_);
	4286	capacity_ = 0;
	4287	}
	4288	}
	4289	virtual ~SortKeyByteSink();
	4290
	4291	virtual void Append(const char *bytes, int32_t n);
	4292	void Append(const uint8_t bytes, int32_t n) { Append(reinterpret_cast<const char >(bytes), n); }
	4293	void Append(uint8_t b) {
	4294	if (appended_ < capacity_) {
	4295	buffer_[appended_++] = (char)b;
	4296	} else {
	4297	Append(&b, 1);
	4298	}
	4299	}
	4300	void Append(uint8_t b1, uint8_t b2) {
	4301	int32_t a2 = appended_ + 2;
	4302	if (a2 <= capacity_) {
	4303	buffer_[appended_] = (char)b1;
	4304	buffer_[appended_ + 1] = (char)b2;
	4305	appended_ = a2;
	4306	} else {
	4307	char bytes[2] = { (char)b1, (char)b2 };
	4308	Append(bytes, 2);
	4309	}
	4310	}
	4311	void Append(const SortKeyByteSink &other) { Append(other.buffer_, other.appended_); }
	4312	virtual char *GetAppendBuffer(int32_t min_capacity,
	4313	int32_t desired_capacity_hint,
	4314	char *scratch, int32_t scratch_capacity,
	4315	int32_t *result_capacity);
	4316	int32_t NumberOfBytesAppended() const { return appended_; }
	4317	uint8_t &LastByte() {
	4318	if (buffer_ != NULL && appended_ > 0) {
	4319	return reinterpret_cast<uint8_t *>(buffer_)[appended_ - 1];
	4320	} else {
	4321	return lastResortByte_;
	4322	}
	4323	}
	4324	uint8_t *GetLastFewBytes(int32_t n) {
	4325	if (buffer_ != NULL && appended_ >= n) {
	4326	return reinterpret_cast<uint8_t *>(buffer_) + appended_ - n;
	4327	} else {
	4328	return NULL;
	4329	}
	4330	}
	4331	char *GetBuffer() { return buffer_; }
	4332	uint8_t GetUnsignedBuffer() { return reinterpret_cast<uint8_t >(buffer_); }
	4333	uint8_t *OrphanUnsignedBuffer(int32_t &orphanedCapacity);
	4334	UBool IsOk() const { return buffer_ != NULL; } // otherwise out-of-memory
	4335
	4336	private:
	4337	SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented
	4338	SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented
	4339
	4340	UBool Resize(int32_t appendCapacity, int32_t length);
	4341	void SetNotOk() {
	4342	buffer_ = NULL;
	4343	capacity_ = 0;
	4344	}
	4345
	4346	static uint8_t lastResortByte_; // last-resort return value from LastByte()
	4347
	4348	char *ownedBuffer_;
	4349	char *buffer_;
	4350	int32_t capacity_;
	4351	int32_t appended_;
	4352	UBool fill_;
	4353	UBool grow_;
	4354	};
	4355
	4356	uint8_t SortKeyByteSink::lastResortByte_ = 0;
	4357
	4358	SortKeyByteSink::~SortKeyByteSink() {
	4359	uprv_free(ownedBuffer_);
	4360	}
	4361
	4362	void
	4363	SortKeyByteSink::Append(const char *bytes, int32_t n) {
	4364	if (n <= 0) {
	4365	return;
	4366	}
	4367	int32_t length = appended_;
	4368	appended_ += n;
	4369	if ((buffer_ + length) == bytes) {
	4370	return; // the caller used GetAppendBuffer() and wrote the bytes already
	4371	}
	4372	if (buffer_ == NULL) {
	4373	return; // allocation failed before already
	4374	}
	4375	int32_t available = capacity_ - length;
	4376	if (bytes == NULL) {
	4377	// assume that the caller failed to allocate memory
	4378	if (fill_) {
	4379	if (n > available) {
	4380	n = available;
	4381	}
	4382	uprv_memset(buffer_, 0, n);
	4383	}
	4384	SetNotOk(); // propagate the out-of-memory error
	4385	return;
	4386	}
	4387	if (n > available) {
	4388	if (fill_ && available > 0) {
	4389	// Fill the original buffer completely.
	4390	uprv_memcpy(buffer_ + length, bytes, available);
	4391	bytes += available;
	4392	length += available;
	4393	n -= available;
	4394	available = 0;
	4395	}
	4396	fill_ = FALSE;
	4397	if (!Resize(n, length)) {
	4398	SetNotOk();
	4399	return;
	4400	}
	4401	}
	4402	uprv_memcpy(buffer_ + length, bytes, n);
	4403	}
	4404
	4405	char *
	4406	SortKeyByteSink::GetAppendBuffer(int32_t min_capacity,
	4407	int32_t desired_capacity_hint,
	4408	char *scratch,
	4409	int32_t scratch_capacity,
	4410	int32_t *result_capacity) {
	4411	if (min_capacity < 1 \|\| scratch_capacity < min_capacity) {
	4412	*result_capacity = 0;
	4413	return NULL;
	4414	}
	4415	int32_t available = capacity_ - appended_;
	4416	if (available >= min_capacity) {
	4417	*result_capacity = available;
	4418	return buffer_ + appended_;
	4419	} else if (Resize(desired_capacity_hint, appended_)) {
	4420	*result_capacity = capacity_ - appended_;
	4421	return buffer_ + appended_;
	4422	} else {
	4423	*result_capacity = scratch_capacity;
	4424	return scratch;
	4425	}
	4426	}
	4427
	4428	UBool
	4429	SortKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
	4430	if (!grow_) {
	4431	return FALSE;
	4432	}
	4433	int32_t newCapacity = 2 * capacity_;
	4434	int32_t altCapacity = length + 2 * appendCapacity;
	4435	if (newCapacity < altCapacity) {
	4436	newCapacity = altCapacity;
	4437	}
	4438	if (newCapacity < 1024) {
	4439	newCapacity = 1024;
	4440	}
	4441	char newBuffer = (char )uprv_malloc(newCapacity);
	4442	if (newBuffer == NULL) {
	4443	return FALSE;
	4444	}
	4445	uprv_memcpy(newBuffer, buffer_, length);
	4446	uprv_free(ownedBuffer_);
	4447	ownedBuffer_ = buffer_ = newBuffer;
	4448	capacity_ = newCapacity;
	4449	return TRUE;
	4450	}
	4451
	4452	uint8_t *
	4453	SortKeyByteSink::OrphanUnsignedBuffer(int32_t &orphanedCapacity) {
	4454	if (buffer_ == NULL \|\| appended_ == 0) {
	4455	orphanedCapacity = 0;
	4456	return NULL;
	4457	}
	4458	if (ownedBuffer_ != NULL) {
	4459	// orphan & forget the ownedBuffer_
	4460	uint8_t returnBuffer = reinterpret_cast<uint8_t >(ownedBuffer_);
	4461	ownedBuffer_ = buffer_ = NULL;
	4462	orphanedCapacity = capacity_;
	4463	capacity_ = appended_ = 0;
	4464	return returnBuffer;
	4465	}
	4466	// clone the buffer_
	4467	uint8_t newBuffer = (uint8_t )uprv_malloc(appended_);
	4468	if (newBuffer == NULL) {
	4469	orphanedCapacity = 0;
	4470	return NULL;
	4471	}
	4472	uprv_memcpy(newBuffer, buffer_, appended_);
	4473	orphanedCapacity = appended_;
	4474	return newBuffer;
	4475	}
	4476
	4477	U_NAMESPACE_END
	4478
	4479	/* sortkey API */
	4480	U_CAPI int32_t U_EXPORT2
	4481	ucol_getSortKey(const UCollator *coll,
	4482	const UChar *source,
	4483	int32_t sourceLength,
	4484	uint8_t *result,
	4485	int32_t resultLength)
	4486	{
	4487	UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY);
	4488	if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
	4489	UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, source,
	4490	((sourceLength==-1 && source!=NULL) ? u_strlen(source) : sourceLength));
	4491	}
	4492
	4493	if(coll->delegate != NULL) {
	4494	return ((const Collator*)coll->delegate)->getSortKey(source, sourceLength, result, resultLength);
	4495	}
	4496
	4497	UErrorCode status = U_ZERO_ERROR;
	4498	int32_t keySize = 0;
	4499
	4500	if(source != NULL) {
	4501	// source == NULL is actually an error situation, but we would need to
	4502	// have an error code to return it. Until we introduce a new
	4503	// API, it stays like this
	4504
	4505	/* this uses the function pointer that is set in updateinternalstate */
	4506	/* currently, there are two funcs: */
	4507	/ucol_calcSortKey(...);/
	4508	/ucol_calcSortKeySimpleTertiary(...);/
	4509
	4510	SortKeyByteSink sink(reinterpret_cast<char *>(result), resultLength,
	4511	SortKeyByteSink::FILL_ORIGINAL_BUFFER \| SortKeyByteSink::DONT_GROW);
	4512	coll->sortKeyGen(coll, source, sourceLength, sink, &status);
	4513	keySize = sink.NumberOfBytesAppended();
	4514	}
	4515	UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize);
	4516	UTRACE_EXIT_STATUS(status);
	4517	return keySize;
	4518	}
	4519
	4520	/* this function is called by the C++ API for sortkey generation */
	4521	U_CFUNC int32_t
	4522	ucol_getSortKeyWithAllocation(const UCollator *coll,
	4523	const UChar *source, int32_t sourceLength,
	4524	uint8_t *&result, int32_t &resultCapacity,
	4525	UErrorCode *pErrorCode) {
	4526	SortKeyByteSink sink(reinterpret_cast<char *>(result), resultCapacity);
	4527	coll->sortKeyGen(coll, source, sourceLength, sink, pErrorCode);
	4528	int32_t resultLen = sink.NumberOfBytesAppended();
	4529	if (U_SUCCESS(*pErrorCode)) {
	4530	if (!sink.IsOk()) {
	4531	*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
	4532	} else if (result != sink.GetUnsignedBuffer()) {
	4533	result = sink.OrphanUnsignedBuffer(resultCapacity);
	4534	}
	4535	}
	4536	return resultLen;
	4537	}
	4538
	4539	// Is this primary weight compressible?
	4540	// Returns false for multi-lead-byte scripts (digits, Latin, Han, implicit).
	4541	// TODO: This should use per-lead-byte flags from FractionalUCA.txt.
	4542	static inline UBool
	4543	isCompressible(const UCollator * /coll/, uint8_t primary1) {
	4544	return UCOL_BYTE_FIRST_NON_LATIN_PRIMARY <= primary1 && primary1 <= maxRegularPrimary;
	4545	}
	4546
	4547	static
	4548	inline void doCaseShift(SortKeyByteSink &cases, uint32_t &caseShift) {
	4549	if (caseShift == 0) {
	4550	cases.Append(UCOL_CASE_BYTE_START);
	4551	caseShift = UCOL_CASE_SHIFT_START;
	4552	}
	4553	}
	4554
	4555	// Packs the secondary buffer when processing French locale.
	4556	static void
	4557	packFrench(uint8_t *secondaries, int32_t secsize, SortKeyByteSink &result) {
	4558	secondaries += secsize; // We read the secondary-level bytes back to front.
	4559	uint8_t secondary;
	4560	int32_t count2 = 0;
	4561	int32_t i = 0;
	4562	// we use i here since the key size already accounts for terminators, so we'll discard the increment
	4563	for(i = 0; i<secsize; i++) {
	4564	secondary = *(secondaries-i-1);
	4565	/* This is compression code. */
	4566	if (secondary == UCOL_COMMON2) {
	4567	++count2;
	4568	} else {
	4569	if (count2 > 0) {
	4570	if (secondary > UCOL_COMMON2) { // not necessary for 4th level.
	4571	while (count2 > UCOL_TOP_COUNT2) {
	4572	result.Append((uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2));
	4573	count2 -= (uint32_t)UCOL_TOP_COUNT2;
	4574	}
	4575	result.Append((uint8_t)(UCOL_COMMON_TOP2 - (count2-1)));
	4576	} else {
	4577	while (count2 > UCOL_BOT_COUNT2) {
	4578	result.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
	4579	count2 -= (uint32_t)UCOL_BOT_COUNT2;
	4580	}
	4581	result.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
	4582	}
	4583	count2 = 0;
	4584	}
	4585	result.Append(secondary);
	4586	}
	4587	}
	4588	if (count2 > 0) {
	4589	while (count2 > UCOL_BOT_COUNT2) {
	4590	result.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
	4591	count2 -= (uint32_t)UCOL_BOT_COUNT2;
	4592	}
	4593	result.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
	4594	}
	4595	}
	4596
	4597	#define DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY 0
	4598
	4599	/* This is the sortkey work horse function */
	4600	U_CFUNC void U_CALLCONV
	4601	ucol_calcSortKey(const UCollator *coll,
	4602	const UChar *source,
	4603	int32_t sourceLength,
	4604	SortKeyByteSink &result,
	4605	UErrorCode *status)
	4606	{
	4607	if(U_FAILURE(*status)) {
	4608	return;
	4609	}
	4610
	4611	/* Stack allocated buffers for buffers we use */
	4612	char second[UCOL_SECONDARY_MAX_BUFFER], tert[UCOL_TERTIARY_MAX_BUFFER];
	4613	char caseB[UCOL_CASE_MAX_BUFFER], quad[UCOL_QUAD_MAX_BUFFER];
	4614
	4615	SortKeyByteSink &primaries = result;
	4616	SortKeyByteSink secondaries(second, LENGTHOF(second));
	4617	SortKeyByteSink tertiaries(tert, LENGTHOF(tert));
	4618	SortKeyByteSink cases(caseB, LENGTHOF(caseB));
	4619	SortKeyByteSink quads(quad, LENGTHOF(quad));
	4620
	4621	UnicodeString normSource;
	4622
	4623	int32_t len = (sourceLength == -1 ? u_strlen(source) : sourceLength);
	4624
	4625	UColAttributeValue strength = coll->strength;
	4626
	4627	uint8_t compareSec = (uint8_t)((strength >= UCOL_SECONDARY)?0:0xFF);
	4628	uint8_t compareTer = (uint8_t)((strength >= UCOL_TERTIARY)?0:0xFF);
	4629	uint8_t compareQuad = (uint8_t)((strength >= UCOL_QUATERNARY)?0:0xFF);
	4630	UBool compareIdent = (strength == UCOL_IDENTICAL);
	4631	UBool doCase = (coll->caseLevel == UCOL_ON);
	4632	UBool isFrenchSec = (coll->frenchCollation == UCOL_ON) && (compareSec == 0);
	4633	UBool shifted = (coll->alternateHandling == UCOL_SHIFTED);
	4634	//UBool qShifted = shifted && (compareQuad == 0);
	4635	UBool doHiragana = (coll->hiraganaQ == UCOL_ON) && (compareQuad == 0);
	4636
	4637	uint32_t variableTopValue = coll->variableTopValue;
	4638	// TODO: UCOL_COMMON_BOT4 should be a function of qShifted. If we have no
	4639	// qShifted, we don't need to set UCOL_COMMON_BOT4 so high.
	4640	uint8_t UCOL_COMMON_BOT4 = (uint8_t)((coll->variableTopValue>>8)+1);
	4641	uint8_t UCOL_HIRAGANA_QUAD = 0;
	4642	if(doHiragana) {
	4643	UCOL_HIRAGANA_QUAD=UCOL_COMMON_BOT4++;
	4644	/* allocate one more space for hiragana, value for hiragana */
	4645	}
	4646	uint8_t UCOL_BOT_COUNT4 = (uint8_t)(0xFF - UCOL_COMMON_BOT4);
	4647
	4648	/* support for special features like caselevel and funky secondaries */
	4649	int32_t lastSecondaryLength = 0;
	4650	uint32_t caseShift = 0;
	4651
	4652	/* If we need to normalize, we'll do it all at once at the beginning! */
	4653	const Normalizer2 *norm2;
	4654	if(compareIdent) {
	4655	norm2 = Normalizer2Factory::getNFDInstance(*status);
	4656	} else if(coll->normalizationMode != UCOL_OFF) {
	4657	norm2 = Normalizer2Factory::getFCDInstance(*status);
	4658	} else {
	4659	norm2 = NULL;
	4660	}
	4661	if(norm2 != NULL) {
	4662	normSource.setTo(FALSE, source, len);
	4663	int32_t qcYesLength = norm2->spanQuickCheckYes(normSource, *status);
	4664	if(qcYesLength != len) {
	4665	UnicodeString unnormalized = normSource.tempSubString(qcYesLength);
	4666	normSource.truncate(qcYesLength);
	4667	norm2->normalizeSecondAndAppend(normSource, unnormalized, *status);
	4668	source = normSource.getBuffer();
	4669	len = normSource.length();
	4670	}
	4671	}
	4672	collIterate s;
	4673	IInit_collIterate(coll, source, len, &s, status);
	4674	if(U_FAILURE(*status)) {
	4675	return;
	4676	}
	4677	s.flags &= ~UCOL_ITER_NORM; // source passed the FCD test or else was normalized.
	4678
	4679	uint32_t order = 0;
	4680
	4681	uint8_t primary1 = 0;
	4682	uint8_t primary2 = 0;
	4683	uint8_t secondary = 0;
	4684	uint8_t tertiary = 0;
	4685	uint8_t caseSwitch = coll->caseSwitch;
	4686	uint8_t tertiaryMask = coll->tertiaryMask;
	4687	int8_t tertiaryAddition = coll->tertiaryAddition;
	4688	uint8_t tertiaryTop = coll->tertiaryTop;
	4689	uint8_t tertiaryBottom = coll->tertiaryBottom;
	4690	uint8_t tertiaryCommon = coll->tertiaryCommon;
	4691	uint8_t caseBits = 0;
	4692
	4693	UBool wasShifted = FALSE;
	4694	UBool notIsContinuation = FALSE;
	4695
	4696	uint32_t count2 = 0, count3 = 0, count4 = 0;
	4697	uint8_t leadPrimary = 0;
	4698
	4699	for(;;) {
	4700	order = ucol_IGetNextCE(coll, &s, status);
	4701	if(order == UCOL_NO_MORE_CES) {
	4702	break;
	4703	}
	4704
	4705	if(order == 0) {
	4706	continue;
	4707	}
	4708
	4709	notIsContinuation = !isContinuation(order);
	4710
	4711	if(notIsContinuation) {
	4712	tertiary = (uint8_t)(order & UCOL_BYTE_SIZE_MASK);
	4713	} else {
	4714	tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION));
	4715	}
	4716
	4717	secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
	4718	primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
	4719	primary1 = (uint8_t)(order >> 8);
	4720
	4721	uint8_t originalPrimary1 = primary1;
	4722	if(notIsContinuation && coll->leadBytePermutationTable != NULL) {
	4723	primary1 = coll->leadBytePermutationTable[primary1];
	4724	}
	4725
	4726	if((shifted && ((notIsContinuation && order <= variableTopValue && primary1 > 0)
	4727	\|\| (!notIsContinuation && wasShifted)))
	4728	\|\| (wasShifted && primary1 == 0)) /* amendment to the UCA says that primary ignorables */
	4729	{
	4730	/* and other ignorables should be removed if following a shifted code point */
	4731	if(primary1 == 0) { /* if we were shifted and we got an ignorable code point */
	4732	/* we should just completely ignore it */
	4733	continue;
	4734	}
	4735	if(compareQuad == 0) {
	4736	if(count4 > 0) {
	4737	while (count4 > UCOL_BOT_COUNT4) {
	4738	quads.Append((uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4));
	4739	count4 -= UCOL_BOT_COUNT4;
	4740	}
	4741	quads.Append((uint8_t)(UCOL_COMMON_BOT4 + (count4-1)));
	4742	count4 = 0;
	4743	}
	4744	/* We are dealing with a variable and we're treating them as shifted */
	4745	/* This is a shifted ignorable */
	4746	if(primary1 != 0) { /* we need to check this since we could be in continuation */
	4747	quads.Append(primary1);
	4748	}
	4749	if(primary2 != 0) {
	4750	quads.Append(primary2);
	4751	}
	4752	}
	4753	wasShifted = TRUE;
	4754	} else {
	4755	wasShifted = FALSE;
	4756	/* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */
	4757	/* Usually, we'll have non-zero primary1 & primary2, except in cases of a-z and friends, when primary2 will */
	4758	/* regular and simple sortkey calc */
	4759	if(primary1 != UCOL_IGNORABLE) {
	4760	if(notIsContinuation) {
	4761	if(leadPrimary == primary1) {
	4762	primaries.Append(primary2);
	4763	} else {
	4764	if(leadPrimary != 0) {
	4765	primaries.Append((uint8_t)((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN));
	4766	}
	4767	if(primary2 == UCOL_IGNORABLE) {
	4768	/* one byter, not compressed */
	4769	primaries.Append(primary1);
	4770	leadPrimary = 0;
	4771	} else if(isCompressible(coll, originalPrimary1)) {
	4772	/* compress */
	4773	primaries.Append(leadPrimary = primary1, primary2);
	4774	} else {
	4775	leadPrimary = 0;
	4776	primaries.Append(primary1, primary2);
	4777	}
	4778	}
	4779	} else { /* we are in continuation, so we're gonna add primary to the key don't care about compression */
	4780	if(primary2 == UCOL_IGNORABLE) {
	4781	primaries.Append(primary1);
	4782	} else {
	4783	primaries.Append(primary1, primary2);
	4784	}
	4785	}
	4786	}
	4787
	4788	if(secondary > compareSec) {
	4789	if(!isFrenchSec) {
	4790	/* This is compression code. */
	4791	if (secondary == UCOL_COMMON2 && notIsContinuation) {
	4792	++count2;
	4793	} else {
	4794	if (count2 > 0) {
	4795	if (secondary > UCOL_COMMON2) { // not necessary for 4th level.
	4796	while (count2 > UCOL_TOP_COUNT2) {
	4797	secondaries.Append((uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2));
	4798	count2 -= (uint32_t)UCOL_TOP_COUNT2;
	4799	}
	4800	secondaries.Append((uint8_t)(UCOL_COMMON_TOP2 - (count2-1)));
	4801	} else {
	4802	while (count2 > UCOL_BOT_COUNT2) {
	4803	secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
	4804	count2 -= (uint32_t)UCOL_BOT_COUNT2;
	4805	}
	4806	secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
	4807	}
	4808	count2 = 0;
	4809	}
	4810	secondaries.Append(secondary);
	4811	}
	4812	} else {
	4813	/* Do the special handling for French secondaries */
	4814	/* We need to get continuation elements and do intermediate restore */
	4815	/* abc1c2c3de with french secondaries need to be edc1c2c3ba NOT edc3c2c1ba */
	4816	if(notIsContinuation) {
	4817	if (lastSecondaryLength > 1) {
	4818	uint8_t *frenchStartPtr = secondaries.GetLastFewBytes(lastSecondaryLength);
	4819	if (frenchStartPtr != NULL) {
	4820	/* reverse secondaries from frenchStartPtr up to frenchEndPtr */
	4821	uint8_t *frenchEndPtr = frenchStartPtr + lastSecondaryLength - 1;
	4822	uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr);
	4823	}
	4824	}
	4825	lastSecondaryLength = 1;
	4826	} else {
	4827	++lastSecondaryLength;
	4828	}
	4829	secondaries.Append(secondary);
	4830	}
	4831	}
	4832
	4833	if(doCase && (primary1 > 0 \|\| strength >= UCOL_SECONDARY)) {
	4834	// do the case level if we need to do it. We don't want to calculate
	4835	// case level for primary ignorables if we have only primary strength and case level
	4836	// otherwise we would break well formedness of CEs
	4837	doCaseShift(cases, caseShift);
	4838	if(notIsContinuation) {
	4839	caseBits = (uint8_t)(tertiary & 0xC0);
	4840
	4841	if(tertiary != 0) {
	4842	if(coll->caseFirst == UCOL_UPPER_FIRST) {
	4843	if((caseBits & 0xC0) == 0) {
	4844	cases.LastByte() \|= 1 << (--caseShift);
	4845	} else {
	4846	cases.LastByte() \|= 0 << (--caseShift);
	4847	/* second bit */
	4848	doCaseShift(cases, caseShift);
	4849	cases.LastByte() \|= ((caseBits>>6)&1) << (--caseShift);
	4850	}
	4851	} else {
	4852	if((caseBits & 0xC0) == 0) {
	4853	cases.LastByte() \|= 0 << (--caseShift);
	4854	} else {
	4855	cases.LastByte() \|= 1 << (--caseShift);
	4856	/* second bit */
	4857	doCaseShift(cases, caseShift);
	4858	cases.LastByte() \|= ((caseBits>>7)&1) << (--caseShift);
	4859	}
	4860	}
	4861	}
	4862	}
	4863	} else {
	4864	if(notIsContinuation) {
	4865	tertiary ^= caseSwitch;
	4866	}
	4867	}
	4868
	4869	tertiary &= tertiaryMask;
	4870	if(tertiary > compareTer) {
	4871	/* This is compression code. */
	4872	/* sequence size check is included in the if clause */
	4873	if (tertiary == tertiaryCommon && notIsContinuation) {
	4874	++count3;
	4875	} else {
	4876	if(tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMON3_NORMAL) {
	4877	tertiary += tertiaryAddition;
	4878	} else if(tertiary <= tertiaryCommon && tertiaryCommon == UCOL_COMMON3_UPPERFIRST) {
	4879	tertiary -= tertiaryAddition;
	4880	}
	4881	if (count3 > 0) {
	4882	if ((tertiary > tertiaryCommon)) {
	4883	while (count3 > coll->tertiaryTopCount) {
	4884	tertiaries.Append((uint8_t)(tertiaryTop - coll->tertiaryTopCount));
	4885	count3 -= (uint32_t)coll->tertiaryTopCount;
	4886	}
	4887	tertiaries.Append((uint8_t)(tertiaryTop - (count3-1)));
	4888	} else {
	4889	while (count3 > coll->tertiaryBottomCount) {
	4890	tertiaries.Append((uint8_t)(tertiaryBottom + coll->tertiaryBottomCount));
	4891	count3 -= (uint32_t)coll->tertiaryBottomCount;
	4892	}
	4893	tertiaries.Append((uint8_t)(tertiaryBottom + (count3-1)));
	4894	}
	4895	count3 = 0;
	4896	}
	4897	tertiaries.Append(tertiary);
	4898	}
	4899	}
	4900
	4901	if(/qShifted/(compareQuad==0) && notIsContinuation) {
	4902	if(s.flags & UCOL_WAS_HIRAGANA) { // This was Hiragana and we need to note it
	4903	if(count4>0) { // Close this part
	4904	while (count4 > UCOL_BOT_COUNT4) {
	4905	quads.Append((uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4));
	4906	count4 -= UCOL_BOT_COUNT4;
	4907	}
	4908	quads.Append((uint8_t)(UCOL_COMMON_BOT4 + (count4-1)));
	4909	count4 = 0;
	4910	}
	4911	quads.Append(UCOL_HIRAGANA_QUAD); // Add the Hiragana
	4912	} else { // This wasn't Hiragana, so we can continue adding stuff
	4913	count4++;
	4914	}
	4915	}
	4916	}
	4917	}
	4918
	4919	/* Here, we are generally done with processing */
	4920	/* bailing out would not be too productive */
	4921
	4922	if(U_SUCCESS(*status)) {
	4923	/* we have done all the CE's, now let's put them together to form a key */
	4924	if(compareSec == 0) {
	4925	if (count2 > 0) {
	4926	while (count2 > UCOL_BOT_COUNT2) {
	4927	secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
	4928	count2 -= (uint32_t)UCOL_BOT_COUNT2;
	4929	}
	4930	secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
	4931	}
	4932	result.Append(UCOL_LEVELTERMINATOR);
	4933	if(!isFrenchSec \|\| !secondaries.IsOk()) {
	4934	result.Append(secondaries);
	4935	} else {
	4936	// If there are any unresolved continuation secondaries,
	4937	// reverse them here so that we can reverse the whole secondary thing.
	4938	if (lastSecondaryLength > 1) {
	4939	uint8_t *frenchStartPtr = secondaries.GetLastFewBytes(lastSecondaryLength);
	4940	if (frenchStartPtr != NULL) {
	4941	/* reverse secondaries from frenchStartPtr up to frenchEndPtr */
	4942	uint8_t *frenchEndPtr = frenchStartPtr + lastSecondaryLength - 1;
	4943	uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr);
	4944	}
	4945	}
	4946	packFrench(secondaries.GetUnsignedBuffer(), secondaries.NumberOfBytesAppended(), result);
	4947	}
	4948	}
	4949
	4950	if(doCase) {
	4951	result.Append(UCOL_LEVELTERMINATOR);
	4952	result.Append(cases);
	4953	}
	4954
	4955	if(compareTer == 0) {
	4956	if (count3 > 0) {
	4957	if (coll->tertiaryCommon != UCOL_COMMON_BOT3) {
	4958	while (count3 >= coll->tertiaryTopCount) {
	4959	tertiaries.Append((uint8_t)(tertiaryTop - coll->tertiaryTopCount));
	4960	count3 -= (uint32_t)coll->tertiaryTopCount;
	4961	}
	4962	tertiaries.Append((uint8_t)(tertiaryTop - count3));
	4963	} else {
	4964	while (count3 > coll->tertiaryBottomCount) {
	4965	tertiaries.Append((uint8_t)(tertiaryBottom + coll->tertiaryBottomCount));
	4966	count3 -= (uint32_t)coll->tertiaryBottomCount;
	4967	}
	4968	tertiaries.Append((uint8_t)(tertiaryBottom + (count3-1)));
	4969	}
	4970	}
	4971	result.Append(UCOL_LEVELTERMINATOR);
	4972	result.Append(tertiaries);
	4973
	4974	if(compareQuad == 0/qShifted == TRUE/) {
	4975	if(count4 > 0) {
	4976	while (count4 > UCOL_BOT_COUNT4) {
	4977	quads.Append((uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4));
	4978	count4 -= UCOL_BOT_COUNT4;
	4979	}
	4980	quads.Append((uint8_t)(UCOL_COMMON_BOT4 + (count4-1)));
	4981	}
	4982	result.Append(UCOL_LEVELTERMINATOR);
	4983	result.Append(quads);
	4984	}
	4985
	4986	if(compareIdent) {
	4987	result.Append(UCOL_LEVELTERMINATOR);
	4988	u_writeIdenticalLevelRun(s.string, len, result);
	4989	}
	4990	}
	4991	result.Append(0);
	4992	}
	4993
	4994	/* To avoid memory leak, free the offset buffer if necessary. */
	4995	ucol_freeOffsetBuffer(&s);
	4996	}
	4997
	4998
	4999	U_CFUNC void U_CALLCONV
	5000	ucol_calcSortKeySimpleTertiary(const UCollator *coll,
	5001	const UChar *source,
	5002	int32_t sourceLength,
	5003	SortKeyByteSink &result,
	5004	UErrorCode *status)
	5005	{
	5006	U_ALIGN_CODE(16);
	5007
	5008	if(U_FAILURE(*status)) {
	5009	return;
	5010	}
	5011
	5012	/* Stack allocated buffers for buffers we use */
	5013	char second[UCOL_SECONDARY_MAX_BUFFER], tert[UCOL_TERTIARY_MAX_BUFFER];
	5014
	5015	SortKeyByteSink &primaries = result;
	5016	SortKeyByteSink secondaries(second, LENGTHOF(second));
	5017	SortKeyByteSink tertiaries(tert, LENGTHOF(tert));
	5018
	5019	UnicodeString normSource;
	5020
	5021	int32_t len = sourceLength;
	5022
	5023	/* If we need to normalize, we'll do it all at once at the beginning! */
	5024	if(coll->normalizationMode != UCOL_OFF) {
	5025	normSource.setTo(len < 0, source, len);
	5026	const Normalizer2 norm2 = Normalizer2Factory::getFCDInstance(status);
	5027	int32_t qcYesLength = norm2->spanQuickCheckYes(normSource, *status);
	5028	if(qcYesLength != normSource.length()) {
	5029	UnicodeString unnormalized = normSource.tempSubString(qcYesLength);
	5030	normSource.truncate(qcYesLength);
	5031	norm2->normalizeSecondAndAppend(normSource, unnormalized, *status);
	5032	source = normSource.getBuffer();
	5033	len = normSource.length();
	5034	}
	5035	}
	5036	collIterate s;
	5037	IInit_collIterate(coll, (UChar *)source, len, &s, status);
	5038	if(U_FAILURE(*status)) {
	5039	return;
	5040	}
	5041	s.flags &= ~UCOL_ITER_NORM; // source passed the FCD test or else was normalized.
	5042
	5043	uint32_t order = 0;
	5044
	5045	uint8_t primary1 = 0;
	5046	uint8_t primary2 = 0;
	5047	uint8_t secondary = 0;
	5048	uint8_t tertiary = 0;
	5049	uint8_t caseSwitch = coll->caseSwitch;
	5050	uint8_t tertiaryMask = coll->tertiaryMask;
	5051	int8_t tertiaryAddition = coll->tertiaryAddition;
	5052	uint8_t tertiaryTop = coll->tertiaryTop;
	5053	uint8_t tertiaryBottom = coll->tertiaryBottom;
	5054	uint8_t tertiaryCommon = coll->tertiaryCommon;
	5055
	5056	UBool notIsContinuation = FALSE;
	5057
	5058	uint32_t count2 = 0, count3 = 0;
	5059	uint8_t leadPrimary = 0;
	5060
	5061	for(;;) {
	5062	order = ucol_IGetNextCE(coll, &s, status);
	5063
	5064	if(order == 0) {
	5065	continue;
	5066	}
	5067
	5068	if(order == UCOL_NO_MORE_CES) {
	5069	break;
	5070	}
	5071
	5072	notIsContinuation = !isContinuation(order);
	5073
	5074	if(notIsContinuation) {
	5075	tertiary = (uint8_t)((order & tertiaryMask));
	5076	} else {
	5077	tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION));
	5078	}
	5079
	5080	secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
	5081	primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
	5082	primary1 = (uint8_t)(order >> 8);
	5083
	5084	uint8_t originalPrimary1 = primary1;
	5085	if (coll->leadBytePermutationTable != NULL && notIsContinuation) {
	5086	primary1 = coll->leadBytePermutationTable[primary1];
	5087	}
	5088
	5089	/* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */
	5090	/* Usually, we'll have non-zero primary1 & primary2, except in cases of a-z and friends, when primary2 will */
	5091	/* be zero with non zero primary1. primary3 is different than 0 only for long primaries - see above. */
	5092	/* regular and simple sortkey calc */
	5093	if(primary1 != UCOL_IGNORABLE) {
	5094	if(notIsContinuation) {
	5095	if(leadPrimary == primary1) {
	5096	primaries.Append(primary2);
	5097	} else {
	5098	if(leadPrimary != 0) {
	5099	primaries.Append((uint8_t)((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN));
	5100	}
	5101	if(primary2 == UCOL_IGNORABLE) {
	5102	/* one byter, not compressed */
	5103	primaries.Append(primary1);
	5104	leadPrimary = 0;
	5105	} else if(isCompressible(coll, originalPrimary1)) {
	5106	/* compress */
	5107	primaries.Append(leadPrimary = primary1, primary2);
	5108	} else {
	5109	leadPrimary = 0;
	5110	primaries.Append(primary1, primary2);
	5111	}
	5112	}
	5113	} else { /* we are in continuation, so we're gonna add primary to the key don't care about compression */
	5114	if(primary2 == UCOL_IGNORABLE) {
	5115	primaries.Append(primary1);
	5116	} else {
	5117	primaries.Append(primary1, primary2);
	5118	}
	5119	}
	5120	}
	5121
	5122	if(secondary > 0) { /* I think that != 0 test should be != IGNORABLE */
	5123	/* This is compression code. */
	5124	if (secondary == UCOL_COMMON2 && notIsContinuation) {
	5125	++count2;
	5126	} else {
	5127	if (count2 > 0) {
	5128	if (secondary > UCOL_COMMON2) { // not necessary for 4th level.
	5129	while (count2 > UCOL_TOP_COUNT2) {
	5130	secondaries.Append((uint8_t)(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2));
	5131	count2 -= (uint32_t)UCOL_TOP_COUNT2;
	5132	}
	5133	secondaries.Append((uint8_t)(UCOL_COMMON_TOP2 - (count2-1)));
	5134	} else {
	5135	while (count2 > UCOL_BOT_COUNT2) {
	5136	secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
	5137	count2 -= (uint32_t)UCOL_BOT_COUNT2;
	5138	}
	5139	secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
	5140	}
	5141	count2 = 0;
	5142	}
	5143	secondaries.Append(secondary);
	5144	}
	5145	}
	5146
	5147	if(notIsContinuation) {
	5148	tertiary ^= caseSwitch;
	5149	}
	5150
	5151	if(tertiary > 0) {
	5152	/* This is compression code. */
	5153	/* sequence size check is included in the if clause */
	5154	if (tertiary == tertiaryCommon && notIsContinuation) {
	5155	++count3;
	5156	} else {
	5157	if(tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMON3_NORMAL) {
	5158	tertiary += tertiaryAddition;
	5159	} else if (tertiary <= tertiaryCommon && tertiaryCommon == UCOL_COMMON3_UPPERFIRST) {
	5160	tertiary -= tertiaryAddition;
	5161	}
	5162	if (count3 > 0) {
	5163	if ((tertiary > tertiaryCommon)) {
	5164	while (count3 > coll->tertiaryTopCount) {
	5165	tertiaries.Append((uint8_t)(tertiaryTop - coll->tertiaryTopCount));
	5166	count3 -= (uint32_t)coll->tertiaryTopCount;
	5167	}
	5168	tertiaries.Append((uint8_t)(tertiaryTop - (count3-1)));
	5169	} else {
	5170	while (count3 > coll->tertiaryBottomCount) {
	5171	tertiaries.Append((uint8_t)(tertiaryBottom + coll->tertiaryBottomCount));
	5172	count3 -= (uint32_t)coll->tertiaryBottomCount;
	5173	}
	5174	tertiaries.Append((uint8_t)(tertiaryBottom + (count3-1)));
	5175	}
	5176	count3 = 0;
	5177	}
	5178	tertiaries.Append(tertiary);
	5179	}
	5180	}
	5181	}
	5182
	5183	if(U_SUCCESS(*status)) {
	5184	/* we have done all the CE's, now let's put them together to form a key */
	5185	if (count2 > 0) {
	5186	while (count2 > UCOL_BOT_COUNT2) {
	5187	secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2));
	5188	count2 -= (uint32_t)UCOL_BOT_COUNT2;
	5189	}
	5190	secondaries.Append((uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));
	5191	}
	5192	result.Append(UCOL_LEVELTERMINATOR);
	5193	result.Append(secondaries);
	5194
	5195	if (count3 > 0) {
	5196	if (coll->tertiaryCommon != UCOL_COMMON3_NORMAL) {
	5197	while (count3 >= coll->tertiaryTopCount) {
	5198	tertiaries.Append((uint8_t)(tertiaryTop - coll->tertiaryTopCount));
	5199	count3 -= (uint32_t)coll->tertiaryTopCount;
	5200	}
	5201	tertiaries.Append((uint8_t)(tertiaryTop - count3));
	5202	} else {
	5203	while (count3 > coll->tertiaryBottomCount) {
	5204	tertiaries.Append((uint8_t)(tertiaryBottom + coll->tertiaryBottomCount));
	5205	count3 -= (uint32_t)coll->tertiaryBottomCount;
	5206	}
	5207	tertiaries.Append((uint8_t)(tertiaryBottom + (count3-1)));
	5208	}
	5209	}
	5210	result.Append(UCOL_LEVELTERMINATOR);
	5211	result.Append(tertiaries);
	5212
	5213	result.Append(0);
	5214	}
	5215
	5216	/* To avoid memory leak, free the offset buffer if necessary. */
	5217	ucol_freeOffsetBuffer(&s);
	5218
	5219	if (U_SUCCESS(*status) && !result.IsOk()) {
	5220	*status = U_BUFFER_OVERFLOW_ERROR;
	5221	}
	5222	}
	5223
	5224	static inline
	5225	UBool isShiftedCE(uint32_t CE, uint32_t LVT, UBool *wasShifted) {
	5226	UBool notIsContinuation = !isContinuation(CE);
	5227	uint8_t primary1 = (uint8_t)((CE >> 24) & 0xFF);
	5228	if((LVT && ((notIsContinuation && (CE & 0xFFFF0000)<= LVT && primary1 > 0)
	5229	\|\| (!notIsContinuation && *wasShifted)))
	5230	\|\| (wasShifted && primary1 == 0)) / amendment to the UCA says that primary ignorables */
	5231	{
	5232	// The stuff below should probably be in the sortkey code... maybe not...
	5233	if(primary1 != 0) { /* if we were shifted and we got an ignorable code point */
	5234	/* we should just completely ignore it */
	5235	*wasShifted = TRUE;
	5236	//continue;
	5237	}
	5238	//*wasShifted = TRUE;
	5239	return TRUE;
	5240	} else {
	5241	*wasShifted = FALSE;
	5242	return FALSE;
	5243	}
	5244	}
	5245	static inline
	5246	void terminatePSKLevel(int32_t level, int32_t maxLevel, int32_t &i, uint8_t *dest) {
	5247	if(level < maxLevel) {
	5248	dest[i++] = UCOL_LEVELTERMINATOR;
	5249	} else {
	5250	dest[i++] = 0;
	5251	}
	5252	}
	5253
	5254	/** enumeration of level identifiers for partial sort key generation */
	5255	enum {
	5256	UCOL_PSK_PRIMARY = 0,
	5257	UCOL_PSK_SECONDARY = 1,
	5258	UCOL_PSK_CASE = 2,
	5259	UCOL_PSK_TERTIARY = 3,
	5260	UCOL_PSK_QUATERNARY = 4,
	5261	UCOL_PSK_QUIN = 5, /** This is an extra level, not used - but we have three bits to blow */
	5262	UCOL_PSK_IDENTICAL = 6,
	5263	UCOL_PSK_NULL = 7, /** level for the end of sort key. Will just produce zeros */
	5264	UCOL_PSK_LIMIT
	5265	};
	5266
	5267	/** collation state enum. *_SHIFT value is how much to shift right
	5268	* to get the state piece to the right. *_MASK value should be
	5269	* ANDed with the shifted state. This data is stored in state[1]
	5270	* field.
	5271	*/
	5272	enum {
	5273	UCOL_PSK_LEVEL_SHIFT = 0, /** level identificator. stores an enum value from above */
	5274	UCOL_PSK_LEVEL_MASK = 7, /** three bits */
	5275	UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT = 3, /** number of bytes of primary or quaternary already written */
	5276	UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK = 1,
	5277	/** can be only 0 or 1, since we get up to two bytes from primary or quaternary
	5278	* This field is also used to denote that the French secondary level is finished
	5279	*/
	5280	UCOL_PSK_WAS_SHIFTED_SHIFT = 4,/** was the last value shifted */
	5281	UCOL_PSK_WAS_SHIFTED_MASK = 1, /** can be 0 or 1 (Boolean) */
	5282	UCOL_PSK_USED_FRENCH_SHIFT = 5,/** how many French bytes have we already written */
	5283	UCOL_PSK_USED_FRENCH_MASK = 3, /** up to 4 bytes. See comment just below */
	5284	/** When we do French we need to reverse secondary values. However, continuations
	5285	* need to stay the same. So if you had abc1c2c3de, you need to have edc1c2c3ba
	5286	*/
	5287	UCOL_PSK_BOCSU_BYTES_SHIFT = 7,
	5288	UCOL_PSK_BOCSU_BYTES_MASK = 3,
	5289	UCOL_PSK_CONSUMED_CES_SHIFT = 9,
	5290	UCOL_PSK_CONSUMED_CES_MASK = 0x7FFFF
	5291	};
	5292
	5293	// macro calculating the number of expansion CEs available
	5294	#define uprv_numAvailableExpCEs(s) (s).CEpos - (s).toReturn
	5295
	5296
	5297	/** main sortkey part procedure. On the first call,
	5298	* you should pass in a collator, an iterator, empty state
	5299	* state[0] == state[1] == 0, a buffer to hold results
	5300	* number of bytes you need and an error code pointer.
	5301	* Make sure your buffer is big enough to hold the wanted
	5302	* number of sortkey bytes. I don't check.
	5303	* The only meaningful status you can get back is
	5304	* U_BUFFER_OVERFLOW_ERROR, which basically means that you
	5305	* have been dealt a raw deal and that you probably won't
	5306	* be able to use partial sortkey generation for this
	5307	* particular combination of string and collator. This
	5308	* is highly unlikely, but you should still check the error code.
	5309	* Any other status means that you're not in a sane situation
	5310	* anymore. After the first call, preserve state values and
	5311	* use them on subsequent calls to obtain more bytes of a sortkey.
	5312	* Use until the number of bytes written is smaller than the requested
	5313	* number of bytes. Generated sortkey is not compatible with the
	5314	* one generated by ucol_getSortKey, as we don't do any compression.
	5315	* However, levels are still terminated by a 1 (one) and the sortkey
	5316	* is terminated by a 0 (zero). Identical level is the same as in the
	5317	* regular sortkey - internal bocu-1 implementation is used.
	5318	* For curious, although you cannot do much about this, here is
	5319	* the structure of state words.
	5320	* state[0] - iterator state. Depends on the iterator implementation,
	5321	* but allows the iterator to continue where it stopped in
	5322	* the last iteration.
	5323	* state[1] - collation processing state. Here is the distribution
	5324	* of the bits:
	5325	* 0, 1, 2 - level of the sortkey - primary, secondary, case, tertiary
	5326	* quaternary, quin (we don't use this one), identical and
	5327	* null (producing only zeroes - first one to terminate the
	5328	* sortkey and subsequent to fill the buffer).
	5329	* 3 - byte count. Number of bytes written on the primary level.
	5330	* 4 - was shifted. Whether the previous iteration finished in the
	5331	* shifted state.
	5332	* 5, 6 - French continuation bytes written. See the comment in the enum
	5333	* 7,8 - Bocsu bytes used. Number of bytes from a bocu sequence on
	5334	* the identical level.
	5335	* 9..31 - CEs consumed. Number of getCE or next32 operations performed
	5336	* since thes last successful update of the iterator state.
	5337	*/
	5338	U_CAPI int32_t U_EXPORT2
	5339	ucol_nextSortKeyPart(const UCollator *coll,
	5340	UCharIterator *iter,
	5341	uint32_t state[2],
	5342	uint8_t *dest, int32_t count,
	5343	UErrorCode *status)
	5344	{
	5345	/* error checking */
	5346	if(status==NULL \|\| U_FAILURE(*status)) {
	5347	return 0;
	5348	}
	5349	UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART);
	5350	if( coll==NULL \|\| iter==NULL \|\|
	5351	state==NULL \|\|
	5352	count<0 \|\| (count>0 && dest==NULL)
	5353	) {
	5354	*status=U_ILLEGAL_ARGUMENT_ERROR;
	5355	UTRACE_EXIT_STATUS(status);
	5356	return 0;
	5357	}
	5358
	5359	UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d",
	5360	coll, iter, state[0], state[1], dest, count);
	5361
	5362	if(count==0) {
	5363	/* nothing to do */
	5364	UTRACE_EXIT_VALUE(0);
	5365	return 0;
	5366	}
	5367	/** Setting up situation according to the state we got from the previous iteration */
	5368	// The state of the iterator from the previous invocation
	5369	uint32_t iterState = state[0];
	5370	// Has the last iteration ended in the shifted state
	5371	UBool wasShifted = ((state[1] >> UCOL_PSK_WAS_SHIFTED_SHIFT) & UCOL_PSK_WAS_SHIFTED_MASK)?TRUE:FALSE;
	5372	// What is the current level of the sortkey?
	5373	int32_t level= (state[1] >> UCOL_PSK_LEVEL_SHIFT) & UCOL_PSK_LEVEL_MASK;
	5374	// Have we written only one byte from a two byte primary in the previous iteration?
	5375	// Also on secondary level - have we finished with the French secondary?
	5376	int32_t byteCountOrFrenchDone = (state[1] >> UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK;
	5377	// number of bytes in the continuation buffer for French
	5378	int32_t usedFrench = (state[1] >> UCOL_PSK_USED_FRENCH_SHIFT) & UCOL_PSK_USED_FRENCH_MASK;
	5379	// Number of bytes already written from a bocsu sequence. Since
	5380	// the longes bocsu sequence is 4 long, this can be up to 3.
	5381	int32_t bocsuBytesUsed = (state[1] >> UCOL_PSK_BOCSU_BYTES_SHIFT) & UCOL_PSK_BOCSU_BYTES_MASK;
	5382	// Number of elements that need to be consumed in this iteration because
	5383	// the iterator returned UITER_NO_STATE at the end of the last iteration,
	5384	// so we had to save the last valid state.
	5385	int32_t cces = (state[1] >> UCOL_PSK_CONSUMED_CES_SHIFT) & UCOL_PSK_CONSUMED_CES_MASK;
	5386
	5387	/** values that depend on the collator attributes */
	5388	// strength of the collator.
	5389	int32_t strength = ucol_getAttribute(coll, UCOL_STRENGTH, status);
	5390	// maximal level of the partial sortkey. Need to take whether case level is done
	5391	int32_t maxLevel = 0;
	5392	if(strength < UCOL_TERTIARY) {
	5393	if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, status) == UCOL_ON) {
	5394	maxLevel = UCOL_PSK_CASE;
	5395	} else {
	5396	maxLevel = strength;
	5397	}
	5398	} else {
	5399	if(strength == UCOL_TERTIARY) {
	5400	maxLevel = UCOL_PSK_TERTIARY;
	5401	} else if(strength == UCOL_QUATERNARY) {
	5402	maxLevel = UCOL_PSK_QUATERNARY;
	5403	} else { // identical
	5404	maxLevel = UCOL_IDENTICAL;
	5405	}
	5406	}
	5407	// value for the quaternary level if Hiragana is encountered. Used for JIS X 4061 collation
	5408	uint8_t UCOL_HIRAGANA_QUAD =
	5409	(ucol_getAttribute(coll, UCOL_HIRAGANA_QUATERNARY_MODE, status) == UCOL_ON)?0xFE:0xFF;
	5410	// Boundary value that decides whether a CE is shifted or not
	5411	uint32_t LVT = (coll->alternateHandling == UCOL_SHIFTED)?(coll->variableTopValue<<16):0;
	5412	// Are we doing French collation?
	5413	UBool doingFrench = (ucol_getAttribute(coll, UCOL_FRENCH_COLLATION, status) == UCOL_ON);
	5414
	5415	/** initializing the collation state */
	5416	UBool notIsContinuation = FALSE;
	5417	uint32_t CE = UCOL_NO_MORE_CES;
	5418
	5419	collIterate s;
	5420	IInit_collIterate(coll, NULL, -1, &s, status);
	5421	if(U_FAILURE(*status)) {
	5422	UTRACE_EXIT_STATUS(*status);
	5423	return 0;
	5424	}
	5425	s.iterator = iter;
	5426	s.flags \|= UCOL_USE_ITERATOR;
	5427	// This variable tells us whether we have produced some other levels in this iteration
	5428	// before we moved to the identical level. In that case, we need to switch the
	5429	// type of the iterator.
	5430	UBool doingIdenticalFromStart = FALSE;
	5431	// Normalizing iterator
	5432	// The division for the array length may truncate the array size to
	5433	// a little less than UNORM_ITER_SIZE, but that size is dimensioned too high
	5434	// for all platforms anyway.
	5435	UAlignedMemory stackNormIter[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
	5436	UNormIterator *normIter = NULL;
	5437	// If the normalization is turned on for the collator and we are below identical level
	5438	// we will use a FCD normalizing iterator
	5439	if(ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status) == UCOL_ON && level < UCOL_PSK_IDENTICAL) {
	5440	normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);
	5441	s.iterator = unorm_setIter(normIter, iter, UNORM_FCD, status);
	5442	s.flags &= ~UCOL_ITER_NORM;
	5443	if(U_FAILURE(*status)) {
	5444	UTRACE_EXIT_STATUS(*status);
	5445	return 0;
	5446	}
	5447	} else if(level == UCOL_PSK_IDENTICAL) {
	5448	// for identical level, we need a NFD iterator. We need to instantiate it here, since we
	5449	// will be updating the state - and this cannot be done on an ordinary iterator.
	5450	normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);
	5451	s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);
	5452	s.flags &= ~UCOL_ITER_NORM;
	5453	if(U_FAILURE(*status)) {
	5454	UTRACE_EXIT_STATUS(*status);
	5455	return 0;
	5456	}
	5457	doingIdenticalFromStart = TRUE;
	5458	}
	5459
	5460	// This is the tentative new state of the iterator. The problem
	5461	// is that the iterator might return an undefined state, in
	5462	// which case we should save the last valid state and increase
	5463	// the iterator skip value.
	5464	uint32_t newState = 0;
	5465
	5466	// First, we set the iterator to the last valid position
	5467	// from the last iteration. This was saved in state[0].
	5468	if(iterState == 0) {
	5469	/* initial state */
	5470	if(level == UCOL_PSK_SECONDARY && doingFrench && !byteCountOrFrenchDone) {
	5471	s.iterator->move(s.iterator, 0, UITER_LIMIT);
	5472	} else {
	5473	s.iterator->move(s.iterator, 0, UITER_START);
	5474	}
	5475	} else {
	5476	/* reset to previous state */
	5477	s.iterator->setState(s.iterator, iterState, status);
	5478	if(U_FAILURE(*status)) {
	5479	UTRACE_EXIT_STATUS(*status);
	5480	return 0;
	5481	}
	5482	}
	5483
	5484
	5485
	5486	// This variable tells us whether we can attempt to update the state
	5487	// of iterator. Situations where we don't want to update iterator state
	5488	// are the existence of expansion CEs that are not yet processed, and
	5489	// finishing the case level without enough space in the buffer to insert
	5490	// a level terminator.
	5491	UBool canUpdateState = TRUE;
	5492
	5493	// Consume all the CEs that were consumed at the end of the previous
	5494	// iteration without updating the iterator state. On identical level,
	5495	// consume the code points.
	5496	int32_t counter = cces;
	5497	if(level < UCOL_PSK_IDENTICAL) {
	5498	while(counter-->0) {
	5499	// If we're doing French and we are on the secondary level,
	5500	// we go backwards.
	5501	if(level == UCOL_PSK_SECONDARY && doingFrench) {
	5502	CE = ucol_IGetPrevCE(coll, &s, status);
	5503	} else {
	5504	CE = ucol_IGetNextCE(coll, &s, status);
	5505	}
	5506	if(CE==UCOL_NO_MORE_CES) {
	5507	/* should not happen */
	5508	*status=U_INTERNAL_PROGRAM_ERROR;
	5509	UTRACE_EXIT_STATUS(*status);
	5510	return 0;
	5511	}
	5512	if(uprv_numAvailableExpCEs(s)) {
	5513	canUpdateState = FALSE;
	5514	}
	5515	}
	5516	} else {
	5517	while(counter-->0) {
	5518	uiter_next32(s.iterator);
	5519	}
	5520	}
	5521
	5522	// French secondary needs to know whether the iterator state of zero came from previous level OR
	5523	// from a new invocation...
	5524	UBool wasDoingPrimary = FALSE;
	5525	// destination buffer byte counter. When this guy
	5526	// gets to count, we're done with the iteration
	5527	int32_t i = 0;
	5528	// used to count the zero bytes written after we
	5529	// have finished with the sort key
	5530	int32_t j = 0;
	5531
	5532
	5533	// Hm.... I think we're ready to plunge in. Basic story is as following:
	5534	// we have a fall through case based on level. This is used for initial
	5535	// positioning on iteration start. Every level processor contains a
	5536	// for(;;) which will be broken when we exhaust all the CEs. Other
	5537	// way to exit is a goto saveState, which happens when we have filled
	5538	// out our buffer.
	5539	switch(level) {
	5540	case UCOL_PSK_PRIMARY:
	5541	wasDoingPrimary = TRUE;
	5542	for(;;) {
	5543	if(i==count) {
	5544	goto saveState;
	5545	}
	5546	// We should save the state only if we
	5547	// are sure that we are done with the
	5548	// previous iterator state
	5549	if(canUpdateState && byteCountOrFrenchDone == 0) {
	5550	newState = s.iterator->getState(s.iterator);
	5551	if(newState != UITER_NO_STATE) {
	5552	iterState = newState;
	5553	cces = 0;
	5554	}
	5555	}
	5556	CE = ucol_IGetNextCE(coll, &s, status);
	5557	cces++;
	5558	if(CE==UCOL_NO_MORE_CES) {
	5559	// Add the level separator
	5560	terminatePSKLevel(level, maxLevel, i, dest);
	5561	byteCountOrFrenchDone=0;
	5562	// Restart the iteration an move to the
	5563	// second level
	5564	s.iterator->move(s.iterator, 0, UITER_START);
	5565	cces = 0;
	5566	level = UCOL_PSK_SECONDARY;
	5567	break;
	5568	}
	5569	if(!isContinuation(CE)){
	5570	if(coll->leadBytePermutationTable != NULL){
	5571	CE = (coll->leadBytePermutationTable[CE>>24] << 24) \| (CE & 0x00FFFFFF);
	5572	}
	5573	}
	5574	if(!isShiftedCE(CE, LVT, &wasShifted)) {
	5575	CE >>= UCOL_PRIMARYORDERSHIFT; /* get primary */
	5576	if(CE != 0) {
	5577	if(byteCountOrFrenchDone == 0) {
	5578	// get the second byte of primary
	5579	dest[i++]=(uint8_t)(CE >> 8);
	5580	} else {
	5581	byteCountOrFrenchDone = 0;
	5582	}
	5583	if((CE &=0xff)!=0) {
	5584	if(i==count) {
	5585	/* overflow */
	5586	byteCountOrFrenchDone = 1;
	5587	cces--;
	5588	goto saveState;
	5589	}
	5590	dest[i++]=(uint8_t)CE;
	5591	}
	5592	}
	5593	}
	5594	if(uprv_numAvailableExpCEs(s)) {
	5595	canUpdateState = FALSE;
	5596	} else {
	5597	canUpdateState = TRUE;
	5598	}
	5599	}
	5600	/* fall through to next level */
	5601	case UCOL_PSK_SECONDARY:
	5602	if(strength >= UCOL_SECONDARY) {
	5603	if(!doingFrench) {
	5604	for(;;) {
	5605	if(i == count) {
	5606	goto saveState;
	5607	}
	5608	// We should save the state only if we
	5609	// are sure that we are done with the
	5610	// previous iterator state
	5611	if(canUpdateState) {
	5612	newState = s.iterator->getState(s.iterator);
	5613	if(newState != UITER_NO_STATE) {
	5614	iterState = newState;
	5615	cces = 0;
	5616	}
	5617	}
	5618	CE = ucol_IGetNextCE(coll, &s, status);
	5619	cces++;
	5620	if(CE==UCOL_NO_MORE_CES) {
	5621	// Add the level separator
	5622	terminatePSKLevel(level, maxLevel, i, dest);
	5623	byteCountOrFrenchDone = 0;
	5624	// Restart the iteration an move to the
	5625	// second level
	5626	s.iterator->move(s.iterator, 0, UITER_START);
	5627	cces = 0;
	5628	level = UCOL_PSK_CASE;
	5629	break;
	5630	}
	5631	if(!isShiftedCE(CE, LVT, &wasShifted)) {
	5632	CE >>= 8; /* get secondary */
	5633	if(CE != 0) {
	5634	dest[i++]=(uint8_t)CE;
	5635	}
	5636	}
	5637	if(uprv_numAvailableExpCEs(s)) {
	5638	canUpdateState = FALSE;
	5639	} else {
	5640	canUpdateState = TRUE;
	5641	}
	5642	}
	5643	} else { // French secondary processing
	5644	uint8_t frenchBuff[UCOL_MAX_BUFFER];
	5645	int32_t frenchIndex = 0;
	5646	// Here we are going backwards.
	5647	// If the iterator is at the beggining, it should be
	5648	// moved to end.
	5649	if(wasDoingPrimary) {
	5650	s.iterator->move(s.iterator, 0, UITER_LIMIT);
	5651	cces = 0;
	5652	}
	5653	for(;;) {
	5654	if(i == count) {
	5655	goto saveState;
	5656	}
	5657	if(canUpdateState) {
	5658	newState = s.iterator->getState(s.iterator);
	5659	if(newState != UITER_NO_STATE) {
	5660	iterState = newState;
	5661	cces = 0;
	5662	}
	5663	}
	5664	CE = ucol_IGetPrevCE(coll, &s, status);
	5665	cces++;
	5666	if(CE==UCOL_NO_MORE_CES) {
	5667	// Add the level separator
	5668	terminatePSKLevel(level, maxLevel, i, dest);
	5669	byteCountOrFrenchDone = 0;
	5670	// Restart the iteration an move to the next level
	5671	s.iterator->move(s.iterator, 0, UITER_START);
	5672	level = UCOL_PSK_CASE;
	5673	break;
	5674	}
	5675	if(isContinuation(CE)) { // if it's a continuation, we want to save it and
	5676	// reverse when we get a first non-continuation CE.
	5677	CE >>= 8;
	5678	frenchBuff[frenchIndex++] = (uint8_t)CE;
	5679	} else if(!isShiftedCE(CE, LVT, &wasShifted)) {
	5680	CE >>= 8; /* get secondary */
	5681	if(!frenchIndex) {
	5682	if(CE != 0) {
	5683	dest[i++]=(uint8_t)CE;
	5684	}
	5685	} else {
	5686	frenchBuff[frenchIndex++] = (uint8_t)CE;
	5687	frenchIndex -= usedFrench;
	5688	usedFrench = 0;
	5689	while(i < count && frenchIndex) {
	5690	dest[i++] = frenchBuff[--frenchIndex];
	5691	usedFrench++;
	5692	}
	5693	}
	5694	}
	5695	if(uprv_numAvailableExpCEs(s)) {
	5696	canUpdateState = FALSE;
	5697	} else {
	5698	canUpdateState = TRUE;
	5699	}
	5700	}
	5701	}
	5702	} else {
	5703	level = UCOL_PSK_CASE;
	5704	}
	5705	/* fall through to next level */
	5706	case UCOL_PSK_CASE:
	5707	if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, status) == UCOL_ON) {
	5708	uint32_t caseShift = UCOL_CASE_SHIFT_START;
	5709	uint8_t caseByte = UCOL_CASE_BYTE_START;
	5710	uint8_t caseBits = 0;
	5711
	5712	for(;;) {
	5713	U_ASSERT(caseShift <= UCOL_CASE_SHIFT_START);
	5714	if(i == count) {
	5715	goto saveState;
	5716	}
	5717	// We should save the state only if we
	5718	// are sure that we are done with the
	5719	// previous iterator state
	5720	if(canUpdateState) {
	5721	newState = s.iterator->getState(s.iterator);
	5722	if(newState != UITER_NO_STATE) {
	5723	iterState = newState;
	5724	cces = 0;
	5725	}
	5726	}
	5727	CE = ucol_IGetNextCE(coll, &s, status);
	5728	cces++;
	5729	if(CE==UCOL_NO_MORE_CES) {
	5730	// On the case level we might have an unfinished
	5731	// case byte. Add one if it's started.
	5732	if(caseShift != UCOL_CASE_SHIFT_START) {
	5733	dest[i++] = caseByte;
	5734	}
	5735	cces = 0;
	5736	// We have finished processing CEs on this level.
	5737	// However, we don't know if we have enough space
	5738	// to add a case level terminator.
	5739	if(i < count) {
	5740	// Add the level separator
	5741	terminatePSKLevel(level, maxLevel, i, dest);
	5742	// Restart the iteration and move to the
	5743	// next level
	5744	s.iterator->move(s.iterator, 0, UITER_START);
	5745	level = UCOL_PSK_TERTIARY;
	5746	} else {
	5747	canUpdateState = FALSE;
	5748	}
	5749	break;
	5750	}
	5751
	5752	if(!isShiftedCE(CE, LVT, &wasShifted)) {
	5753	if(!isContinuation(CE) && ((CE & UCOL_PRIMARYMASK) != 0 \|\| strength > UCOL_PRIMARY)) {
	5754	// do the case level if we need to do it. We don't want to calculate
	5755	// case level for primary ignorables if we have only primary strength and case level
	5756	// otherwise we would break well formedness of CEs
	5757	CE = (uint8_t)(CE & UCOL_BYTE_SIZE_MASK);
	5758	caseBits = (uint8_t)(CE & 0xC0);
	5759	// this copies the case level logic from the
	5760	// sort key generation code
	5761	if(CE != 0) {
	5762	if (caseShift == 0) {
	5763	dest[i++] = caseByte;
	5764	caseShift = UCOL_CASE_SHIFT_START;
	5765	caseByte = UCOL_CASE_BYTE_START;
	5766	}
	5767	if(coll->caseFirst == UCOL_UPPER_FIRST) {
	5768	if((caseBits & 0xC0) == 0) {
	5769	caseByte \|= 1 << (--caseShift);
	5770	} else {
	5771	caseByte \|= 0 << (--caseShift);
	5772	/* second bit */
	5773	if(caseShift == 0) {
	5774	dest[i++] = caseByte;
	5775	caseShift = UCOL_CASE_SHIFT_START;
	5776	caseByte = UCOL_CASE_BYTE_START;
	5777	}
	5778	caseByte \|= ((caseBits>>6)&1) << (--caseShift);
	5779	}
	5780	} else {
	5781	if((caseBits & 0xC0) == 0) {
	5782	caseByte \|= 0 << (--caseShift);
	5783	} else {
	5784	caseByte \|= 1 << (--caseShift);
	5785	/* second bit */
	5786	if(caseShift == 0) {
	5787	dest[i++] = caseByte;
	5788	caseShift = UCOL_CASE_SHIFT_START;
	5789	caseByte = UCOL_CASE_BYTE_START;
	5790	}
	5791	caseByte \|= ((caseBits>>7)&1) << (--caseShift);
	5792	}
	5793	}
	5794	}
	5795
	5796	}
	5797	}
	5798	// Not sure this is correct for the case level - revisit
	5799	if(uprv_numAvailableExpCEs(s)) {
	5800	canUpdateState = FALSE;
	5801	} else {
	5802	canUpdateState = TRUE;
	5803	}
	5804	}
	5805	} else {
	5806	level = UCOL_PSK_TERTIARY;
	5807	}
	5808	/* fall through to next level */
	5809	case UCOL_PSK_TERTIARY:
	5810	if(strength >= UCOL_TERTIARY) {
	5811	for(;;) {
	5812	if(i == count) {
	5813	goto saveState;
	5814	}
	5815	// We should save the state only if we
	5816	// are sure that we are done with the
	5817	// previous iterator state
	5818	if(canUpdateState) {
	5819	newState = s.iterator->getState(s.iterator);
	5820	if(newState != UITER_NO_STATE) {
	5821	iterState = newState;
	5822	cces = 0;
	5823	}
	5824	}
	5825	CE = ucol_IGetNextCE(coll, &s, status);
	5826	cces++;
	5827	if(CE==UCOL_NO_MORE_CES) {
	5828	// Add the level separator
	5829	terminatePSKLevel(level, maxLevel, i, dest);
	5830	byteCountOrFrenchDone = 0;
	5831	// Restart the iteration an move to the
	5832	// second level
	5833	s.iterator->move(s.iterator, 0, UITER_START);
	5834	cces = 0;
	5835	level = UCOL_PSK_QUATERNARY;
	5836	break;
	5837	}
	5838	if(!isShiftedCE(CE, LVT, &wasShifted)) {
	5839	notIsContinuation = !isContinuation(CE);
	5840
	5841	if(notIsContinuation) {
	5842	CE = (uint8_t)(CE & UCOL_BYTE_SIZE_MASK);
	5843	CE ^= coll->caseSwitch;
	5844	CE &= coll->tertiaryMask;
	5845	} else {
	5846	CE = (uint8_t)((CE & UCOL_REMOVE_CONTINUATION));
	5847	}
	5848
	5849	if(CE != 0) {
	5850	dest[i++]=(uint8_t)CE;
	5851	}
	5852	}
	5853	if(uprv_numAvailableExpCEs(s)) {
	5854	canUpdateState = FALSE;
	5855	} else {
	5856	canUpdateState = TRUE;
	5857	}
	5858	}
	5859	} else {
	5860	// if we're not doing tertiary
	5861	// skip to the end
	5862	level = UCOL_PSK_NULL;
	5863	}
	5864	/* fall through to next level */
	5865	case UCOL_PSK_QUATERNARY:
	5866	if(strength >= UCOL_QUATERNARY) {
	5867	for(;;) {
	5868	if(i == count) {
	5869	goto saveState;
	5870	}
	5871	// We should save the state only if we
	5872	// are sure that we are done with the
	5873	// previous iterator state
	5874	if(canUpdateState) {
	5875	newState = s.iterator->getState(s.iterator);
	5876	if(newState != UITER_NO_STATE) {
	5877	iterState = newState;
	5878	cces = 0;
	5879	}
	5880	}
	5881	CE = ucol_IGetNextCE(coll, &s, status);
	5882	cces++;
	5883	if(CE==UCOL_NO_MORE_CES) {
	5884	// Add the level separator
	5885	terminatePSKLevel(level, maxLevel, i, dest);
	5886	//dest[i++] = UCOL_LEVELTERMINATOR;
	5887	byteCountOrFrenchDone = 0;
	5888	// Restart the iteration an move to the
	5889	// second level
	5890	s.iterator->move(s.iterator, 0, UITER_START);
	5891	cces = 0;
	5892	level = UCOL_PSK_QUIN;
	5893	break;
	5894	}
	5895	if(CE==0)
	5896	continue;
	5897	if(isShiftedCE(CE, LVT, &wasShifted)) {
	5898	CE >>= 16; /* get primary */
	5899	if(CE != 0) {
	5900	if(byteCountOrFrenchDone == 0) {
	5901	dest[i++]=(uint8_t)(CE >> 8);
	5902	} else {
	5903	byteCountOrFrenchDone = 0;
	5904	}
	5905	if((CE &=0xff)!=0) {
	5906	if(i==count) {
	5907	/* overflow */
	5908	byteCountOrFrenchDone = 1;
	5909	goto saveState;
	5910	}
	5911	dest[i++]=(uint8_t)CE;
	5912	}
	5913	}
	5914	} else {
	5915	notIsContinuation = !isContinuation(CE);
	5916	if(notIsContinuation) {
	5917	if(s.flags & UCOL_WAS_HIRAGANA) { // This was Hiragana and we need to note it
	5918	dest[i++] = UCOL_HIRAGANA_QUAD;
	5919	} else {
	5920	dest[i++] = 0xFF;
	5921	}
	5922	}
	5923	}
	5924	if(uprv_numAvailableExpCEs(s)) {
	5925	canUpdateState = FALSE;
	5926	} else {
	5927	canUpdateState = TRUE;
	5928	}
	5929	}
	5930	} else {
	5931	// if we're not doing quaternary
	5932	// skip to the end
	5933	level = UCOL_PSK_NULL;
	5934	}
	5935	/* fall through to next level */
	5936	case UCOL_PSK_QUIN:
	5937	level = UCOL_PSK_IDENTICAL;
	5938	/* fall through to next level */
	5939	case UCOL_PSK_IDENTICAL:
	5940	if(strength >= UCOL_IDENTICAL) {
	5941	UChar32 first, second;
	5942	int32_t bocsuBytesWritten = 0;
	5943	// We always need to do identical on
	5944	// the NFD form of the string.
	5945	if(normIter == NULL) {
	5946	// we arrived from the level below and
	5947	// normalization was not turned on.
	5948	// therefore, we need to make a fresh NFD iterator
	5949	normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);
	5950	s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);
	5951	} else if(!doingIdenticalFromStart) {
	5952	// there is an iterator, but we did some other levels.
	5953	// therefore, we have a FCD iterator - need to make
	5954	// a NFD one.
	5955	// normIter being at the beginning does not guarantee
	5956	// that the underlying iterator is at the beginning
	5957	iter->move(iter, 0, UITER_START);
	5958	s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);
	5959	}
	5960	// At this point we have a NFD iterator that is positioned
	5961	// in the right place
	5962	if(U_FAILURE(*status)) {
	5963	UTRACE_EXIT_STATUS(*status);
	5964	return 0;
	5965	}
	5966	first = uiter_previous32(s.iterator);
	5967	// maybe we're at the start of the string
	5968	if(first == U_SENTINEL) {
	5969	first = 0;
	5970	} else {
	5971	uiter_next32(s.iterator);
	5972	}
	5973
	5974	j = 0;
	5975	for(;;) {
	5976	if(i == count) {
	5977	if(j+1 < bocsuBytesWritten) {
	5978	bocsuBytesUsed = j+1;
	5979	}
	5980	goto saveState;
	5981	}
	5982
	5983	// On identical level, we will always save
	5984	// the state if we reach this point, since
	5985	// we don't depend on getNextCE for content
	5986	// all the content is in our buffer and we
	5987	// already either stored the full buffer OR
	5988	// otherwise we won't arrive here.
	5989	newState = s.iterator->getState(s.iterator);
	5990	if(newState != UITER_NO_STATE) {
	5991	iterState = newState;
	5992	cces = 0;
	5993	}
	5994
	5995	uint8_t buff[4];
	5996	second = uiter_next32(s.iterator);
	5997	cces++;
	5998
	5999	// end condition for identical level
	6000	if(second == U_SENTINEL) {
	6001	terminatePSKLevel(level, maxLevel, i, dest);
	6002	level = UCOL_PSK_NULL;
	6003	break;
	6004	}
	6005	bocsuBytesWritten = u_writeIdenticalLevelRunTwoChars(first, second, buff);
	6006	first = second;
	6007
	6008	j = 0;
	6009	if(bocsuBytesUsed != 0) {
	6010	while(bocsuBytesUsed-->0) {
	6011	j++;
	6012	}
	6013	}
	6014
	6015	while(i < count && j < bocsuBytesWritten) {
	6016	dest[i++] = buff[j++];
	6017	}
	6018	}
	6019
	6020	} else {
	6021	level = UCOL_PSK_NULL;
	6022	}
	6023	/* fall through to next level */
	6024	case UCOL_PSK_NULL:
	6025	j = i;
	6026	while(j<count) {
	6027	dest[j++]=0;
	6028	}
	6029	break;
	6030	default:
	6031	*status = U_INTERNAL_PROGRAM_ERROR;
	6032	UTRACE_EXIT_STATUS(*status);
	6033	return 0;
	6034	}
	6035
	6036	saveState:
	6037	// Now we need to return stuff. First we want to see whether we have
	6038	// done everything for the current state of iterator.
	6039	if(byteCountOrFrenchDone
	6040	\|\| canUpdateState == FALSE
	6041	\|\| (newState = s.iterator->getState(s.iterator)) == UITER_NO_STATE)
	6042	{
	6043	// Any of above mean that the previous transaction
	6044	// wasn't finished and that we should store the
	6045	// previous iterator state.
	6046	state[0] = iterState;
	6047	} else {
	6048	// The transaction is complete. We will continue in the next iteration.
	6049	state[0] = s.iterator->getState(s.iterator);
	6050	cces = 0;
	6051	}
	6052	// Store the number of bocsu bytes written.
	6053	if((bocsuBytesUsed & UCOL_PSK_BOCSU_BYTES_MASK) != bocsuBytesUsed) {
	6054	*status = U_INDEX_OUTOFBOUNDS_ERROR;
	6055	}
	6056	state[1] = (bocsuBytesUsed & UCOL_PSK_BOCSU_BYTES_MASK) << UCOL_PSK_BOCSU_BYTES_SHIFT;
	6057
	6058	// Next we put in the level of comparison
	6059	state[1] \|= ((level & UCOL_PSK_LEVEL_MASK) << UCOL_PSK_LEVEL_SHIFT);
	6060
	6061	// If we are doing French, we need to store whether we have just finished the French level
	6062	if(level == UCOL_PSK_SECONDARY && doingFrench) {
	6063	state[1] \|= (((state[0] == 0) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK) << UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT);
	6064	} else {
	6065	state[1] \|= ((byteCountOrFrenchDone & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK) << UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT);
	6066	}
	6067
	6068	// Was the latest CE shifted
	6069	if(wasShifted) {
	6070	state[1] \|= 1 << UCOL_PSK_WAS_SHIFTED_SHIFT;
	6071	}
	6072	// Check for cces overflow
	6073	if((cces & UCOL_PSK_CONSUMED_CES_MASK) != cces) {
	6074	*status = U_INDEX_OUTOFBOUNDS_ERROR;
	6075	}
	6076	// Store cces
	6077	state[1] \|= ((cces & UCOL_PSK_CONSUMED_CES_MASK) << UCOL_PSK_CONSUMED_CES_SHIFT);
	6078
	6079	// Check for French overflow
	6080	if((usedFrench & UCOL_PSK_USED_FRENCH_MASK) != usedFrench) {
	6081	*status = U_INDEX_OUTOFBOUNDS_ERROR;
	6082	}
	6083	// Store number of bytes written in the French secondary continuation sequence
	6084	state[1] \|= ((usedFrench & UCOL_PSK_USED_FRENCH_MASK) << UCOL_PSK_USED_FRENCH_SHIFT);
	6085
	6086
	6087	// If we have used normalizing iterator, get rid of it
	6088	if(normIter != NULL) {
	6089	unorm_closeIter(normIter);
	6090	}
	6091
	6092	/* To avoid memory leak, free the offset buffer if necessary. */
	6093	ucol_freeOffsetBuffer(&s);
	6094
	6095	// Return number of meaningful sortkey bytes.
	6096	UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d",
	6097	dest,i, state[0], state[1]);
	6098	UTRACE_EXIT_VALUE(i);
	6099	return i;
	6100	}
	6101
	6102	/**
	6103	* Produce a bound for a given sortkey and a number of levels.
	6104	*/
	6105	U_CAPI int32_t U_EXPORT2
	6106	ucol_getBound(const uint8_t *source,
	6107	int32_t sourceLength,
	6108	UColBoundMode boundType,
	6109	uint32_t noOfLevels,
	6110	uint8_t *result,
	6111	int32_t resultLength,
	6112	UErrorCode *status)
	6113	{
	6114	// consistency checks
	6115	if(status == NULL \|\| U_FAILURE(*status)) {
	6116	return 0;
	6117	}
	6118	if(source == NULL) {
	6119	*status = U_ILLEGAL_ARGUMENT_ERROR;
	6120	return 0;
	6121	}
	6122
	6123	int32_t sourceIndex = 0;
	6124	// Scan the string until we skip enough of the key OR reach the end of the key
	6125	do {
	6126	sourceIndex++;
	6127	if(source[sourceIndex] == UCOL_LEVELTERMINATOR) {
	6128	noOfLevels--;
	6129	}
	6130	} while (noOfLevels > 0
	6131	&& (source[sourceIndex] != 0 \|\| sourceIndex < sourceLength));
	6132
	6133	if((source[sourceIndex] == 0 \|\| sourceIndex == sourceLength)
	6134	&& noOfLevels > 0) {
	6135	*status = U_SORT_KEY_TOO_SHORT_WARNING;
	6136	}
	6137
	6138
	6139	// READ ME: this code assumes that the values for boundType
	6140	// enum will not changes. They are set so that the enum value
	6141	// corresponds to the number of extra bytes each bound type
	6142	// needs.
	6143	if(result != NULL && resultLength >= sourceIndex+boundType) {
	6144	uprv_memcpy(result, source, sourceIndex);
	6145	switch(boundType) {
	6146	// Lower bound just gets terminated. No extra bytes
	6147	case UCOL_BOUND_LOWER: // = 0
	6148	break;
	6149	// Upper bound needs one extra byte
	6150	case UCOL_BOUND_UPPER: // = 1
	6151	result[sourceIndex++] = 2;
	6152	break;
	6153	// Upper long bound needs two extra bytes
	6154	case UCOL_BOUND_UPPER_LONG: // = 2
	6155	result[sourceIndex++] = 0xFF;
	6156	result[sourceIndex++] = 0xFF;
	6157	break;
	6158	default:
	6159	*status = U_ILLEGAL_ARGUMENT_ERROR;
	6160	return 0;
	6161	}
	6162	result[sourceIndex++] = 0;
	6163
	6164	return sourceIndex;
	6165	} else {
	6166	return sourceIndex+boundType+1;
	6167	}
	6168	}
	6169
	6170	/****************************************************************************/
	6171	/* Following are the functions that deal with the properties of a collator */
	6172	/* there are new APIs and some compatibility APIs */
	6173	/****************************************************************************/
	6174
	6175	static inline void
	6176	ucol_addLatinOneEntry(UCollator *coll, UChar ch, uint32_t CE,
	6177	int32_t primShift, int32_t secShift, int32_t *terShift)
	6178	{
	6179	uint8_t primary1 = 0, primary2 = 0, secondary = 0, tertiary = 0;
	6180	UBool reverseSecondary = FALSE;
	6181	UBool continuation = isContinuation(CE);
	6182	if(!continuation) {
	6183	tertiary = (uint8_t)((CE & coll->tertiaryMask));
	6184	tertiary ^= coll->caseSwitch;
	6185	reverseSecondary = TRUE;
	6186	} else {
	6187	tertiary = (uint8_t)((CE & UCOL_REMOVE_CONTINUATION));
	6188	tertiary &= UCOL_REMOVE_CASE;
	6189	reverseSecondary = FALSE;
	6190	}
	6191
	6192	secondary = (uint8_t)((CE >>= 8) & UCOL_BYTE_SIZE_MASK);
	6193	primary2 = (uint8_t)((CE >>= 8) & UCOL_BYTE_SIZE_MASK);
	6194	primary1 = (uint8_t)(CE >> 8);
	6195
	6196	if(primary1 != 0) {
	6197	if (coll->leadBytePermutationTable != NULL && !continuation) {
	6198	primary1 = coll->leadBytePermutationTable[primary1];
	6199	}
	6200
	6201	coll->latinOneCEs[ch] \|= (primary1 << *primShift);
	6202	*primShift -= 8;
	6203	}
	6204	if(primary2 != 0) {
	6205	if(*primShift < 0) {
	6206	coll->latinOneCEs[ch] = UCOL_BAIL_OUT_CE;
	6207	coll->latinOneCEs[coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
	6208	coll->latinOneCEs[2*coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
	6209	return;
	6210	}
	6211	coll->latinOneCEs[ch] \|= (primary2 << *primShift);
	6212	*primShift -= 8;
	6213	}
	6214	if(secondary != 0) {
	6215	if(reverseSecondary && coll->frenchCollation == UCOL_ON) { // reverse secondary
	6216	coll->latinOneCEs[coll->latinOneTableLen+ch] >>= 8; // make space for secondary
	6217	coll->latinOneCEs[coll->latinOneTableLen+ch] \|= (secondary << 24);
	6218	} else { // normal case
	6219	coll->latinOneCEs[coll->latinOneTableLen+ch] \|= (secondary << *secShift);
	6220	}
	6221	*secShift -= 8;
	6222	}
	6223	if(tertiary != 0) {
	6224	coll->latinOneCEs[2coll->latinOneTableLen+ch] \|= (tertiary << terShift);
	6225	*terShift -= 8;
	6226	}
	6227	}
	6228
	6229	static inline UBool
	6230	ucol_resizeLatinOneTable(UCollator coll, int32_t size, UErrorCode status) {
	6231	uint32_t newTable = (uint32_t )uprv_malloc(sizesizeof(uint32_t)3);
	6232	if(newTable == NULL) {
	6233	*status = U_MEMORY_ALLOCATION_ERROR;
	6234	coll->latinOneFailed = TRUE;
	6235	return FALSE;
	6236	}
	6237	int32_t sizeToCopy = ((size<coll->latinOneTableLen)?size:coll->latinOneTableLen)*sizeof(uint32_t);
	6238	uprv_memset(newTable, 0, sizesizeof(uint32_t)3);
	6239	uprv_memcpy(newTable, coll->latinOneCEs, sizeToCopy);
	6240	uprv_memcpy(newTable+size, coll->latinOneCEs+coll->latinOneTableLen, sizeToCopy);
	6241	uprv_memcpy(newTable+2size, coll->latinOneCEs+2coll->latinOneTableLen, sizeToCopy);
	6242	coll->latinOneTableLen = size;
	6243	uprv_free(coll->latinOneCEs);
	6244	coll->latinOneCEs = newTable;
	6245	return TRUE;
	6246	}
	6247
	6248	static UBool
	6249	ucol_setUpLatinOne(UCollator coll, UErrorCode status) {
	6250	UBool result = TRUE;
	6251	if(coll->latinOneCEs == NULL) {
	6252	coll->latinOneCEs = (uint32_t )uprv_malloc(sizeof(uint32_t)UCOL_LATINONETABLELEN*3);
	6253	if(coll->latinOneCEs == NULL) {
	6254	*status = U_MEMORY_ALLOCATION_ERROR;
	6255	return FALSE;
	6256	}
	6257	coll->latinOneTableLen = UCOL_LATINONETABLELEN;
	6258	}
	6259	UChar ch = 0;
	6260	UCollationElements *it = ucol_openElements(coll, &ch, 1, status);
	6261	// Check for null pointer
	6262	if (U_FAILURE(*status)) {
	6263	return FALSE;
	6264	}
	6265	uprv_memset(coll->latinOneCEs, 0, sizeof(uint32_t)coll->latinOneTableLen3);
	6266
	6267	int32_t primShift = 24, secShift = 24, terShift = 24;
	6268	uint32_t CE = 0;
	6269	int32_t contractionOffset = UCOL_ENDOFLATINONERANGE+1;
	6270
	6271	// TODO: make safe if you get more than you wanted...
	6272	for(ch = 0; ch <= UCOL_ENDOFLATINONERANGE; ch++) {
	6273	primShift = 24; secShift = 24; terShift = 24;
	6274	if(ch < 0x100) {
	6275	CE = coll->latinOneMapping[ch];
	6276	} else {
	6277	CE = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
	6278	if(CE == UCOL_NOT_FOUND && coll->UCA) {
	6279	CE = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
	6280	}
	6281	}
	6282	if(CE < UCOL_NOT_FOUND) {
	6283	ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift);
	6284	} else {
	6285	switch (getCETag(CE)) {
	6286	case EXPANSION_TAG:
	6287	case DIGIT_TAG:
	6288	ucol_setText(it, &ch, 1, status);
	6289	while((int32_t)(CE = ucol_next(it, status)) != UCOL_NULLORDER) {
	6290	if(primShift < 0 \|\| secShift < 0 \|\| terShift < 0) {
	6291	coll->latinOneCEs[ch] = UCOL_BAIL_OUT_CE;
	6292	coll->latinOneCEs[coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
	6293	coll->latinOneCEs[2*coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
	6294	break;
	6295	}
	6296	ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift);
	6297	}
	6298	break;
	6299	case CONTRACTION_TAG:
	6300	// here is the trick
	6301	// F2 is contraction. We do something very similar to contractions
	6302	// but have two indices, one in the real contraction table and the
	6303	// other to where we stuffed things. This hopes that we don't have
	6304	// many contractions (this should work for latin-1 tables).
	6305	{
	6306	if((CE & 0x00FFF000) != 0) {
	6307	*status = U_UNSUPPORTED_ERROR;
	6308	goto cleanup_after_failure;
	6309	}
	6310
	6311	const UChar UCharOffset = (UChar )coll->image+getContractOffset(CE);
	6312
	6313	CE \|= (contractionOffset & 0xFFF) << 12; // insert the offset in latin-1 table
	6314
	6315	coll->latinOneCEs[ch] = CE;
	6316	coll->latinOneCEs[coll->latinOneTableLen+ch] = CE;
	6317	coll->latinOneCEs[2*coll->latinOneTableLen+ch] = CE;
	6318
	6319	// We're going to jump into contraction table, pick the elements
	6320	// and use them
	6321	do {
	6322	CE = *(coll->contractionCEs +
	6323	(UCharOffset - coll->contractionIndex));
	6324	if(CE > UCOL_NOT_FOUND && getCETag(CE) == EXPANSION_TAG) {
	6325	uint32_t size;
	6326	uint32_t i; /* general counter */
	6327	uint32_t CEOffset = (uint32_t )coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
	6328	size = getExpansionCount(CE);
	6329	//CE = *CEOffset++;
	6330	if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */
	6331	for(i = 0; i<size; i++) {
	6332	if(primShift < 0 \|\| secShift < 0 \|\| terShift < 0) {
	6333	coll->latinOneCEs[(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
	6334	coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
	6335	coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
	6336	break;
	6337	}
	6338	ucol_addLatinOneEntry(coll, (UChar)contractionOffset, *CEOffset++, &primShift, &secShift, &terShift);
	6339	}
	6340	} else { /* else, we do */
	6341	while(*CEOffset != 0) {
	6342	if(primShift < 0 \|\| secShift < 0 \|\| terShift < 0) {
	6343	coll->latinOneCEs[(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
	6344	coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
	6345	coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
	6346	break;
	6347	}
	6348	ucol_addLatinOneEntry(coll, (UChar)contractionOffset, *CEOffset++, &primShift, &secShift, &terShift);
	6349	}
	6350	}
	6351	contractionOffset++;
	6352	} else if(CE < UCOL_NOT_FOUND) {
	6353	ucol_addLatinOneEntry(coll, (UChar)contractionOffset++, CE, &primShift, &secShift, &terShift);
	6354	} else {
	6355	coll->latinOneCEs[(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
	6356	coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
	6357	coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
	6358	contractionOffset++;
	6359	}
	6360	UCharOffset++;
	6361	primShift = 24; secShift = 24; terShift = 24;
	6362	if(contractionOffset == coll->latinOneTableLen) { // we need to reallocate
	6363	if(!ucol_resizeLatinOneTable(coll, 2*coll->latinOneTableLen, status)) {
	6364	goto cleanup_after_failure;
	6365	}
	6366	}
	6367	} while(*UCharOffset != 0xFFFF);
	6368	}
	6369	break;;
	6370	case SPEC_PROC_TAG:
	6371	{
	6372	// 0xB7 is a precontext character defined in UCA5.1, a special
	6373	// handle is implemeted in order to save LatinOne table for
	6374	// most locales.
	6375	if (ch==0xb7) {
	6376	ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift);
	6377	}
	6378	else {
	6379	goto cleanup_after_failure;
	6380	}
	6381	}
	6382	break;
	6383	default:
	6384	goto cleanup_after_failure;
	6385	}
	6386	}
	6387	}
	6388	// compact table
	6389	if(contractionOffset < coll->latinOneTableLen) {
	6390	if(!ucol_resizeLatinOneTable(coll, contractionOffset, status)) {
	6391	goto cleanup_after_failure;
	6392	}
	6393	}
	6394	ucol_closeElements(it);
	6395	return result;
	6396
	6397	cleanup_after_failure:
	6398	// status should already be set before arriving here.
	6399	coll->latinOneFailed = TRUE;
	6400	ucol_closeElements(it);
	6401	return FALSE;
	6402	}
	6403
	6404	void ucol_updateInternalState(UCollator coll, UErrorCode status) {
	6405	if(U_SUCCESS(*status)) {
	6406	if(coll->caseFirst == UCOL_UPPER_FIRST) {
	6407	coll->caseSwitch = UCOL_CASE_SWITCH;
	6408	} else {
	6409	coll->caseSwitch = UCOL_NO_CASE_SWITCH;
	6410	}
	6411
	6412	if(coll->caseLevel == UCOL_ON \|\| coll->caseFirst == UCOL_OFF) {
	6413	coll->tertiaryMask = UCOL_REMOVE_CASE;
	6414	coll->tertiaryCommon = UCOL_COMMON3_NORMAL;
	6415	coll->tertiaryAddition = (int8_t)UCOL_FLAG_BIT_MASK_CASE_SW_OFF; /* Should be 0x80 */
	6416	coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_OFF;
	6417	coll->tertiaryBottom = UCOL_COMMON_BOT3;
	6418	} else {
	6419	coll->tertiaryMask = UCOL_KEEP_CASE;
	6420	coll->tertiaryAddition = UCOL_FLAG_BIT_MASK_CASE_SW_ON;
	6421	if(coll->caseFirst == UCOL_UPPER_FIRST) {
	6422	coll->tertiaryCommon = UCOL_COMMON3_UPPERFIRST;
	6423	coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_UPPER;
	6424	coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_UPPER;
	6425	} else {
	6426	coll->tertiaryCommon = UCOL_COMMON3_NORMAL;
	6427	coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_LOWER;
	6428	coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_LOWER;
	6429	}
	6430	}
	6431
	6432	/* Set the compression values */
	6433	uint8_t tertiaryTotal = (uint8_t)(coll->tertiaryTop - coll->tertiaryBottom - 1);
	6434	coll->tertiaryTopCount = (uint8_t)(UCOL_PROPORTION3tertiaryTotal); / we multilply double with int, but need only int */
	6435	coll->tertiaryBottomCount = (uint8_t)(tertiaryTotal - coll->tertiaryTopCount);
	6436
	6437	if(coll->caseLevel == UCOL_OFF && coll->strength == UCOL_TERTIARY
	6438	&& coll->frenchCollation == UCOL_OFF && coll->alternateHandling == UCOL_NON_IGNORABLE)
	6439	{
	6440	coll->sortKeyGen = ucol_calcSortKeySimpleTertiary;
	6441	} else {
	6442	coll->sortKeyGen = ucol_calcSortKey;
	6443	}
	6444	if(coll->caseLevel == UCOL_OFF && coll->strength <= UCOL_TERTIARY && coll->numericCollation == UCOL_OFF
	6445	&& coll->alternateHandling == UCOL_NON_IGNORABLE && !coll->latinOneFailed)
	6446	{
	6447	if(coll->latinOneCEs == NULL \|\| coll->latinOneRegenTable) {
	6448	if(ucol_setUpLatinOne(coll, status)) { // if we succeed in building latin1 table, we'll use it
	6449	//fprintf(stderr, "F");
	6450	coll->latinOneUse = TRUE;
	6451	} else {
	6452	coll->latinOneUse = FALSE;
	6453	}
	6454	if(*status == U_UNSUPPORTED_ERROR) {
	6455	*status = U_ZERO_ERROR;
	6456	}
	6457	} else { // latin1Table exists and it doesn't need to be regenerated, just use it
	6458	coll->latinOneUse = TRUE;
	6459	}
	6460	} else {
	6461	coll->latinOneUse = FALSE;
	6462	}
	6463	}
	6464	}
	6465
	6466	U_CAPI uint32_t U_EXPORT2
	6467	ucol_setVariableTop(UCollator coll, const UChar varTop, int32_t len, UErrorCode *status) {
	6468	if(U_FAILURE(*status) \|\| coll == NULL) {
	6469	return 0;
	6470	}
	6471	if(len == -1) {
	6472	len = u_strlen(varTop);
	6473	}
	6474	if(len == 0) {
	6475	*status = U_ILLEGAL_ARGUMENT_ERROR;
	6476	return 0;
	6477	}
	6478
	6479	if(coll->delegate!=NULL) {
	6480	return ((Collator)coll->delegate)->setVariableTop(varTop, len, status);
	6481	}
	6482
	6483
	6484	collIterate s;
	6485	IInit_collIterate(coll, varTop, len, &s, status);
	6486	if(U_FAILURE(*status)) {
	6487	return 0;
	6488	}
	6489
	6490	uint32_t CE = ucol_IGetNextCE(coll, &s, status);
	6491
	6492	/* here we check if we have consumed all characters */
	6493	/* you can put in either one character or a contraction */
	6494	/* you shouldn't put more... */
	6495	if(s.pos != s.endp \|\| CE == UCOL_NO_MORE_CES) {
	6496	*status = U_CE_NOT_FOUND_ERROR;
	6497	return 0;
	6498	}
	6499
	6500	uint32_t nextCE = ucol_IGetNextCE(coll, &s, status);
	6501
	6502	if(isContinuation(nextCE) && (nextCE & UCOL_PRIMARYMASK) != 0) {
	6503	*status = U_PRIMARY_TOO_LONG_ERROR;
	6504	return 0;
	6505	}
	6506	if(coll->variableTopValue != (CE & UCOL_PRIMARYMASK)>>16) {
	6507	coll->variableTopValueisDefault = FALSE;
	6508	coll->variableTopValue = (CE & UCOL_PRIMARYMASK)>>16;
	6509	}
	6510
	6511	/* To avoid memory leak, free the offset buffer if necessary. */
	6512	ucol_freeOffsetBuffer(&s);
	6513
	6514	return CE & UCOL_PRIMARYMASK;
	6515	}
	6516
	6517	U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator coll, UErrorCode status) {
	6518	if(U_FAILURE(*status) \|\| coll == NULL) {
	6519	return 0;
	6520	}
	6521	if(coll->delegate!=NULL) {
	6522	return ((const Collator)coll->delegate)->getVariableTop(status);
	6523	}
	6524	return coll->variableTopValue<<16;
	6525	}
	6526
	6527	U_CAPI void U_EXPORT2
	6528	ucol_restoreVariableTop(UCollator coll, const uint32_t varTop, UErrorCode status) {
	6529	if(U_FAILURE(*status) \|\| coll == NULL) {
	6530	return;
	6531	}
	6532
	6533	if(coll->variableTopValue != (varTop & UCOL_PRIMARYMASK)>>16) {
	6534	coll->variableTopValueisDefault = FALSE;
	6535	coll->variableTopValue = (varTop & UCOL_PRIMARYMASK)>>16;
	6536	}
	6537	}
	6538	/* Attribute setter API */
	6539	U_CAPI void U_EXPORT2
	6540	ucol_setAttribute(UCollator coll, UColAttribute attr, UColAttributeValue value, UErrorCode status) {
	6541	if(U_FAILURE(*status) \|\| coll == NULL) {
	6542	return;
	6543	}
	6544
	6545	if(coll->delegate != NULL) {
	6546	((Collator)coll->delegate)->setAttribute(attr,value,status);
	6547	return;
	6548	}
	6549
	6550	UColAttributeValue oldFrench = coll->frenchCollation;
	6551	UColAttributeValue oldCaseFirst = coll->caseFirst;
	6552	switch(attr) {
	6553	case UCOL_NUMERIC_COLLATION: /* sort substrings of digits as numbers */
	6554	if(value == UCOL_ON) {
	6555	coll->numericCollation = UCOL_ON;
	6556	coll->numericCollationisDefault = FALSE;
	6557	} else if (value == UCOL_OFF) {
	6558	coll->numericCollation = UCOL_OFF;
	6559	coll->numericCollationisDefault = FALSE;
	6560	} else if (value == UCOL_DEFAULT) {
	6561	coll->numericCollationisDefault = TRUE;
	6562	coll->numericCollation = (UColAttributeValue)coll->options->numericCollation;
	6563	} else {
	6564	*status = U_ILLEGAL_ARGUMENT_ERROR;
	6565	}
	6566	break;
	6567	case UCOL_HIRAGANA_QUATERNARY_MODE: /* special quaternary values for Hiragana */
	6568	if(value == UCOL_ON) {
	6569	coll->hiraganaQ = UCOL_ON;
	6570	coll->hiraganaQisDefault = FALSE;
	6571	} else if (value == UCOL_OFF) {
	6572	coll->hiraganaQ = UCOL_OFF;
	6573	coll->hiraganaQisDefault = FALSE;
	6574	} else if (value == UCOL_DEFAULT) {
	6575	coll->hiraganaQisDefault = TRUE;
	6576	coll->hiraganaQ = (UColAttributeValue)coll->options->hiraganaQ;
	6577	} else {
	6578	*status = U_ILLEGAL_ARGUMENT_ERROR;
	6579	}
	6580	break;
	6581	case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*/
	6582	if(value == UCOL_ON) {
	6583	coll->frenchCollation = UCOL_ON;
	6584	coll->frenchCollationisDefault = FALSE;
	6585	} else if (value == UCOL_OFF) {
	6586	coll->frenchCollation = UCOL_OFF;
	6587	coll->frenchCollationisDefault = FALSE;
	6588	} else if (value == UCOL_DEFAULT) {
	6589	coll->frenchCollationisDefault = TRUE;
	6590	coll->frenchCollation = (UColAttributeValue)coll->options->frenchCollation;
	6591	} else {
	6592	*status = U_ILLEGAL_ARGUMENT_ERROR ;
	6593	}
	6594	break;
	6595	case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/
	6596	if(value == UCOL_SHIFTED) {
	6597	coll->alternateHandling = UCOL_SHIFTED;
	6598	coll->alternateHandlingisDefault = FALSE;
	6599	} else if (value == UCOL_NON_IGNORABLE) {
	6600	coll->alternateHandling = UCOL_NON_IGNORABLE;
	6601	coll->alternateHandlingisDefault = FALSE;
	6602	} else if (value == UCOL_DEFAULT) {
	6603	coll->alternateHandlingisDefault = TRUE;
	6604	coll->alternateHandling = (UColAttributeValue)coll->options->alternateHandling ;
	6605	} else {
	6606	*status = U_ILLEGAL_ARGUMENT_ERROR ;
	6607	}
	6608	break;
	6609	case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */
	6610	if(value == UCOL_LOWER_FIRST) {
	6611	coll->caseFirst = UCOL_LOWER_FIRST;
	6612	coll->caseFirstisDefault = FALSE;
	6613	} else if (value == UCOL_UPPER_FIRST) {
	6614	coll->caseFirst = UCOL_UPPER_FIRST;
	6615	coll->caseFirstisDefault = FALSE;
	6616	} else if (value == UCOL_OFF) {
	6617	coll->caseFirst = UCOL_OFF;
	6618	coll->caseFirstisDefault = FALSE;
	6619	} else if (value == UCOL_DEFAULT) {
	6620	coll->caseFirst = (UColAttributeValue)coll->options->caseFirst;
	6621	coll->caseFirstisDefault = TRUE;
	6622	} else {
	6623	*status = U_ILLEGAL_ARGUMENT_ERROR ;
	6624	}
	6625	break;
	6626	case UCOL_CASE_LEVEL: /* do we have an extra case level */
	6627	if(value == UCOL_ON) {
	6628	coll->caseLevel = UCOL_ON;
	6629	coll->caseLevelisDefault = FALSE;
	6630	} else if (value == UCOL_OFF) {
	6631	coll->caseLevel = UCOL_OFF;
	6632	coll->caseLevelisDefault = FALSE;
	6633	} else if (value == UCOL_DEFAULT) {
	6634	coll->caseLevel = (UColAttributeValue)coll->options->caseLevel;
	6635	coll->caseLevelisDefault = TRUE;
	6636	} else {
	6637	*status = U_ILLEGAL_ARGUMENT_ERROR ;
	6638	}
	6639	break;
	6640	case UCOL_NORMALIZATION_MODE: /* attribute for normalization */
	6641	if(value == UCOL_ON) {
	6642	coll->normalizationMode = UCOL_ON;
	6643	coll->normalizationModeisDefault = FALSE;
	6644	initializeFCD(status);
	6645	} else if (value == UCOL_OFF) {
	6646	coll->normalizationMode = UCOL_OFF;
	6647	coll->normalizationModeisDefault = FALSE;
	6648	} else if (value == UCOL_DEFAULT) {
	6649	coll->normalizationModeisDefault = TRUE;
	6650	coll->normalizationMode = (UColAttributeValue)coll->options->normalizationMode;
	6651	if(coll->normalizationMode == UCOL_ON) {
	6652	initializeFCD(status);
	6653	}
	6654	} else {
	6655	*status = U_ILLEGAL_ARGUMENT_ERROR ;
	6656	}
	6657	break;
	6658	case UCOL_STRENGTH: /* attribute for strength */
	6659	if (value == UCOL_DEFAULT) {
	6660	coll->strengthisDefault = TRUE;
	6661	coll->strength = (UColAttributeValue)coll->options->strength;
	6662	} else if (value <= UCOL_IDENTICAL) {
	6663	coll->strengthisDefault = FALSE;
	6664	coll->strength = value;
	6665	} else {
	6666	*status = U_ILLEGAL_ARGUMENT_ERROR ;
	6667	}
	6668	break;
	6669	case UCOL_ATTRIBUTE_COUNT:
	6670	default:
	6671	*status = U_ILLEGAL_ARGUMENT_ERROR;
	6672	break;
	6673	}
	6674	if(oldFrench != coll->frenchCollation \|\| oldCaseFirst != coll->caseFirst) {
	6675	coll->latinOneRegenTable = TRUE;
	6676	} else {
	6677	coll->latinOneRegenTable = FALSE;
	6678	}
	6679	ucol_updateInternalState(coll, status);
	6680	}
	6681
	6682	U_CAPI UColAttributeValue U_EXPORT2
	6683	ucol_getAttribute(const UCollator coll, UColAttribute attr, UErrorCode status) {
	6684	if(U_FAILURE(*status) \|\| coll == NULL) {
	6685	return UCOL_DEFAULT;
	6686	}
	6687
	6688	if(coll->delegate != NULL) {
	6689	return ((Collator)coll->delegate)->getAttribute(attr,status);
	6690	}
	6691
	6692	switch(attr) {
	6693	case UCOL_NUMERIC_COLLATION:
	6694	return coll->numericCollation;
	6695	case UCOL_HIRAGANA_QUATERNARY_MODE:
	6696	return coll->hiraganaQ;
	6697	case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*/
	6698	return coll->frenchCollation;
	6699	case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/
	6700	return coll->alternateHandling;
	6701	case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */
	6702	return coll->caseFirst;
	6703	case UCOL_CASE_LEVEL: /* do we have an extra case level */
	6704	return coll->caseLevel;
	6705	case UCOL_NORMALIZATION_MODE: /* attribute for normalization */
	6706	return coll->normalizationMode;
	6707	case UCOL_STRENGTH: /* attribute for strength */
	6708	return coll->strength;
	6709	case UCOL_ATTRIBUTE_COUNT:
	6710	default:
	6711	*status = U_ILLEGAL_ARGUMENT_ERROR;
	6712	break;
	6713	}
	6714	return UCOL_DEFAULT;
	6715	}
	6716
	6717	U_CAPI void U_EXPORT2
	6718	ucol_setStrength( UCollator *coll,
	6719	UCollationStrength strength)
	6720	{
	6721	UErrorCode status = U_ZERO_ERROR;
	6722	ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status);
	6723	}
	6724
	6725	U_CAPI UCollationStrength U_EXPORT2
	6726	ucol_getStrength(const UCollator *coll)
	6727	{
	6728	UErrorCode status = U_ZERO_ERROR;
	6729	return ucol_getAttribute(coll, UCOL_STRENGTH, &status);
	6730	}
	6731
	6732	U_DRAFT int32_t U_EXPORT2
	6733	ucol_getReorderCodes(const UCollator *coll,
	6734	int32_t *dest,
	6735	int32_t destCapacity,
	6736	UErrorCode *status) {
	6737	if (U_FAILURE(*status)) {
	6738	return 0;
	6739	}
	6740
	6741	if(coll->delegate!=NULL) {
	6742	return ((const Collator)coll->delegate)->getReorderCodes(dest, destCapacity, status);
	6743	}
	6744
	6745	if (destCapacity < 0 \|\| (destCapacity > 0 && dest == NULL)) {
	6746	*status = U_ILLEGAL_ARGUMENT_ERROR;
	6747	return 0;
	6748	}
	6749
	6750	#ifdef UCOL_DEBUG
	6751	printf("coll->reorderCodesLength = %d\n", coll->reorderCodesLength);
	6752	printf("coll->defaultReorderCodesLength = %d\n", coll->defaultReorderCodesLength);
	6753	#endif
	6754
	6755	if (coll->reorderCodesLength > destCapacity) {
	6756	*status = U_BUFFER_OVERFLOW_ERROR;
	6757	return coll->reorderCodesLength;
	6758	}
	6759	for (int32_t i = 0; i < coll->reorderCodesLength; i++) {
	6760	dest[i] = coll->reorderCodes[i];
	6761	}
	6762	return coll->reorderCodesLength;
	6763	}
	6764
	6765	U_DRAFT void U_EXPORT2
	6766	ucol_setReorderCodes(UCollator* coll,
	6767	const int32_t* reorderCodes,
	6768	int32_t reorderCodesLength,
	6769	UErrorCode *status) {
	6770	if (U_FAILURE(*status)) {
	6771	return;
	6772	}
	6773
	6774	if (reorderCodesLength < 0 \|\| (reorderCodesLength > 0 && reorderCodes == NULL)) {
	6775	*status = U_ILLEGAL_ARGUMENT_ERROR;
	6776	return;
	6777	}
	6778
	6779	if(coll->delegate!=NULL) {
	6780	((Collator)coll->delegate)->setReorderCodes(reorderCodes, reorderCodesLength, status);
	6781	return;
	6782	}
	6783
	6784	if (coll->reorderCodes != NULL && coll->freeReorderCodesOnClose == TRUE) {
	6785	uprv_free(coll->reorderCodes);
	6786	}
	6787	coll->reorderCodes = NULL;
	6788	coll->reorderCodesLength = 0;
	6789	if (reorderCodesLength == 0) {
	6790	if (coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) {
	6791	uprv_free(coll->leadBytePermutationTable);
	6792	}
	6793	coll->leadBytePermutationTable = NULL;
	6794	return;
	6795	}
	6796	coll->reorderCodes = (int32_t) uprv_malloc(reorderCodesLength sizeof(int32_t));
	6797	if (coll->reorderCodes == NULL) {
	6798	*status = U_MEMORY_ALLOCATION_ERROR;
	6799	return;
	6800	}
	6801	coll->freeReorderCodesOnClose = TRUE;
	6802	for (int32_t i = 0; i < reorderCodesLength; i++) {
	6803	coll->reorderCodes[i] = reorderCodes[i];
	6804	}
	6805	coll->reorderCodesLength = reorderCodesLength;
	6806	ucol_buildPermutationTable(coll, status);
	6807	}
	6808
	6809	U_DRAFT int32_t U_EXPORT2
	6810	ucol_getEquivalentReorderCodes(int32_t reorderCode,
	6811	int32_t* dest,
	6812	int32_t destCapacity,
	6813	UErrorCode *pErrorCode) {
	6814	bool equivalentCodesSet[USCRIPT_CODE_LIMIT];
	6815	uint16_t leadBytes[256];
	6816	int leadBytesCount;
	6817	int leadByteIndex;
	6818	int16_t reorderCodesForLeadByte[USCRIPT_CODE_LIMIT];
	6819	int reorderCodesForLeadByteCount;
	6820	int reorderCodeIndex;
	6821
	6822	int32_t equivalentCodesCount = 0;
	6823	int setIndex;
	6824
	6825	if (U_FAILURE(*pErrorCode)) {
	6826	return 0;
	6827	}
	6828
	6829	if (destCapacity < 0 \|\| (destCapacity > 0 && dest == NULL)) {
	6830	*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
	6831	return 0;
	6832	}
	6833
	6834	uprv_memset(equivalentCodesSet, 0, USCRIPT_CODE_LIMIT * sizeof(bool));
	6835
	6836	const UCollator* uca = ucol_initUCA(pErrorCode);
	6837	if (U_FAILURE(*pErrorCode)) {
	6838	return 0;
	6839	}
	6840	leadBytesCount = ucol_getLeadBytesForReorderCode(uca, reorderCode, leadBytes, 256);
	6841	for (leadByteIndex = 0; leadByteIndex < leadBytesCount; leadByteIndex++) {
	6842	reorderCodesForLeadByteCount = ucol_getReorderCodesForLeadByte(
	6843	uca, leadBytes[leadByteIndex], reorderCodesForLeadByte, USCRIPT_CODE_LIMIT);
	6844	for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodesForLeadByteCount; reorderCodeIndex++) {
	6845	equivalentCodesSet[reorderCodesForLeadByte[reorderCodeIndex]] = true;
	6846	}
	6847	}
	6848
	6849	for (setIndex = 0; setIndex < USCRIPT_CODE_LIMIT; setIndex++) {
	6850	if (equivalentCodesSet[setIndex] == true) {
	6851	equivalentCodesCount++;
	6852	}
	6853	}
	6854
	6855	if (destCapacity == 0) {
	6856	return equivalentCodesCount;
	6857	}
	6858
	6859	equivalentCodesCount = 0;
	6860	for (setIndex = 0; setIndex < USCRIPT_CODE_LIMIT; setIndex++) {
	6861	if (equivalentCodesSet[setIndex] == true) {
	6862	dest[equivalentCodesCount++] = setIndex;
	6863	if (equivalentCodesCount >= destCapacity) {
	6864	break;
	6865	}
	6866	}
	6867	}
	6868	return equivalentCodesCount;
	6869	}
	6870
	6871
	6872	/****************************************************************************/
	6873	/* Following are misc functions */
	6874	/* there are new APIs and some compatibility APIs */
	6875	/****************************************************************************/
	6876
	6877	U_CAPI void U_EXPORT2
	6878	ucol_getVersion(const UCollator* coll,
	6879	UVersionInfo versionInfo)
	6880	{
	6881	if(coll->delegate!=NULL) {
	6882	((const Collator*)coll->delegate)->getVersion(versionInfo);
	6883	return;
	6884	}
	6885	/* RunTime version */
	6886	uint8_t rtVersion = UCOL_RUNTIME_VERSION;
	6887	/* Builder version*/
	6888	uint8_t bdVersion = coll->image->version[0];
	6889
	6890	/* Charset Version. Need to get the version from cnv files
	6891	* makeconv should populate cnv files with version and
	6892	* an api has to be provided in ucnv.h to obtain this version
	6893	*/
	6894	uint8_t csVersion = 0;
	6895
	6896	/* combine the version info */
	6897	uint16_t cmbVersion = (uint16_t)((rtVersion<<11) \| (bdVersion<<6) \| (csVersion));
	6898
	6899	/* Tailoring rules */
	6900	versionInfo[0] = (uint8_t)(cmbVersion>>8);
	6901	versionInfo[1] = (uint8_t)cmbVersion;
	6902	versionInfo[2] = coll->image->version[1];
	6903	if(coll->UCA) {
	6904	/* Include the minor number when getting the UCA version. (major & 1f) << 3 \| (minor & 7) */
	6905	versionInfo[3] = (coll->UCA->image->UCAVersion[0] & 0x1f) << 3 \| (coll->UCA->image->UCAVersion[1] & 0x07);
	6906	} else {
	6907	versionInfo[3] = 0;
	6908	}
	6909	}
	6910
	6911
	6912	/* This internal API checks whether a character is tailored or not */
	6913	U_CAPI UBool U_EXPORT2
	6914	ucol_isTailored(const UCollator coll, const UChar u, UErrorCode status) {
	6915	if(U_FAILURE(*status) \|\| coll == NULL \|\| coll == coll->UCA) {
	6916	return FALSE;
	6917	}
	6918
	6919	uint32_t CE = UCOL_NOT_FOUND;
	6920	const UChar *ContractionStart = NULL;
	6921	if(u < 0x100) { /* latin-1 */
	6922	CE = coll->latinOneMapping[u];
	6923	if(coll->UCA && CE == coll->UCA->latinOneMapping[u]) {
	6924	return FALSE;
	6925	}
	6926	} else { /* regular */
	6927	CE = UTRIE_GET32_FROM_LEAD(&coll->mapping, u);
	6928	}
	6929
	6930	if(isContraction(CE)) {
	6931	ContractionStart = (UChar *)coll->image+getContractOffset(CE);
	6932	CE = *(coll->contractionCEs + (ContractionStart- coll->contractionIndex));
	6933	}
	6934
	6935	return (UBool)(CE != UCOL_NOT_FOUND);
	6936	}
	6937
	6938
	6939	/****************************************************************************/
	6940	/* Following are the string compare functions */
	6941	/* */
	6942	/****************************************************************************/
	6943
	6944
	6945	/* ucol_checkIdent internal function. Does byte level string compare. */
	6946	/* Used by strcoll if strength == identical and strings */
	6947	/* are otherwise equal. */
	6948	/* */
	6949	/* Comparison must be done on NFD normalized strings. */
	6950	/* FCD is not good enough. */
	6951
	6952	static
	6953	UCollationResult ucol_checkIdent(collIterate sColl, collIterate tColl, UBool normalize, UErrorCode *status)
	6954	{
	6955	// When we arrive here, we can have normal strings or UCharIterators. Currently they are both
	6956	// of same type, but that doesn't really mean that it will stay that way.
	6957	int32_t comparison;
	6958
	6959	if (sColl->flags & UCOL_USE_ITERATOR) {
	6960	// The division for the array length may truncate the array size to
	6961	// a little less than UNORM_ITER_SIZE, but that size is dimensioned too high
	6962	// for all platforms anyway.
	6963	UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
	6964	UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
	6965	UNormIterator sNIt = NULL, tNIt = NULL;
	6966	sNIt = unorm_openIter(stackNormIter1, sizeof(stackNormIter1), status);
	6967	tNIt = unorm_openIter(stackNormIter2, sizeof(stackNormIter2), status);
	6968	sColl->iterator->move(sColl->iterator, 0, UITER_START);
	6969	tColl->iterator->move(tColl->iterator, 0, UITER_START);
	6970	UCharIterator *sIt = unorm_setIter(sNIt, sColl->iterator, UNORM_NFD, status);
	6971	UCharIterator *tIt = unorm_setIter(tNIt, tColl->iterator, UNORM_NFD, status);
	6972	comparison = u_strCompareIter(sIt, tIt, TRUE);
	6973	unorm_closeIter(sNIt);
	6974	unorm_closeIter(tNIt);
	6975	} else {
	6976	int32_t sLen = (sColl->flags & UCOL_ITER_HASLEN) ? (int32_t)(sColl->endp - sColl->string) : -1;
	6977	const UChar *sBuf = sColl->string;
	6978	int32_t tLen = (tColl->flags & UCOL_ITER_HASLEN) ? (int32_t)(tColl->endp - tColl->string) : -1;
	6979	const UChar *tBuf = tColl->string;
	6980
	6981	if (normalize) {
	6982	*status = U_ZERO_ERROR;
	6983	// Note: We could use Normalizer::compare() or similar, but for short strings
	6984	// which may not be in FCD it might be faster to just NFD them.
	6985	// Note: spanQuickCheckYes() + normalizeSecondAndAppend() rather than
	6986	// NFD'ing immediately might be faster for long strings,
	6987	// but string comparison is usually done on relatively short strings.
	6988	sColl->nfd->normalize(UnicodeString((sColl->flags & UCOL_ITER_HASLEN) == 0, sBuf, sLen),
	6989	sColl->writableBuffer,
	6990	*status);
	6991	tColl->nfd->normalize(UnicodeString((tColl->flags & UCOL_ITER_HASLEN) == 0, tBuf, tLen),
	6992	tColl->writableBuffer,
	6993	*status);
	6994	if(U_FAILURE(*status)) {
	6995	return UCOL_LESS;
	6996	}
	6997	comparison = sColl->writableBuffer.compareCodePointOrder(tColl->writableBuffer);
	6998	} else {
	6999	comparison = u_strCompare(sBuf, sLen, tBuf, tLen, TRUE);
	7000	}
	7001	}
	7002
	7003	if (comparison < 0) {
	7004	return UCOL_LESS;
	7005	} else if (comparison == 0) {
	7006	return UCOL_EQUAL;
	7007	} else /* comparison > 0 */ {
	7008	return UCOL_GREATER;
	7009	}
	7010	}
	7011
	7012	/* CEBuf - A struct and some inline functions to handle the saving */
	7013	/* of CEs in a buffer within ucol_strcoll */
	7014
	7015	#define UCOL_CEBUF_SIZE 512
	7016	typedef struct ucol_CEBuf {
	7017	uint32_t *buf;
	7018	uint32_t *endp;
	7019	uint32_t *pos;
	7020	uint32_t localArray[UCOL_CEBUF_SIZE];
	7021	} ucol_CEBuf;
	7022
	7023
	7024	static
	7025	inline void UCOL_INIT_CEBUF(ucol_CEBuf *b) {
	7026	(b)->buf = (b)->pos = (b)->localArray;
	7027	(b)->endp = (b)->buf + UCOL_CEBUF_SIZE;
	7028	}
	7029
	7030	static
	7031	void ucol_CEBuf_Expand(ucol_CEBuf b, collIterate ci, UErrorCode *status) {
	7032	uint32_t oldSize;
	7033	uint32_t newSize;
	7034	uint32_t *newBuf;
	7035
	7036	ci->flags \|= UCOL_ITER_ALLOCATED;
	7037	oldSize = (uint32_t)(b->pos - b->buf);
	7038	newSize = oldSize * 2;
	7039	newBuf = (uint32_t )uprv_malloc(newSize sizeof(uint32_t));
	7040	if(newBuf == NULL) {
	7041	*status = U_MEMORY_ALLOCATION_ERROR;
	7042	}
	7043	else {
	7044	uprv_memcpy(newBuf, b->buf, oldSize * sizeof(uint32_t));
	7045	if (b->buf != b->localArray) {
	7046	uprv_free(b->buf);
	7047	}
	7048	b->buf = newBuf;
	7049	b->endp = b->buf + newSize;
	7050	b->pos = b->buf + oldSize;
	7051	}
	7052	}
	7053
	7054	static
	7055	inline void UCOL_CEBUF_PUT(ucol_CEBuf b, uint32_t ce, collIterate ci, UErrorCode *status) {
	7056	if (b->pos == b->endp) {
	7057	ucol_CEBuf_Expand(b, ci, status);
	7058	}
	7059	if (U_SUCCESS(*status)) {
	7060	*(b)->pos++ = ce;
	7061	}
	7062	}
	7063
	7064	/* This is a trick string compare function that goes in and uses sortkeys to compare */
	7065	/* It is used when compare gets in trouble and needs to bail out */
	7066	static UCollationResult ucol_compareUsingSortKeys(collIterate *sColl,
	7067	collIterate *tColl,
	7068	UErrorCode *status)
	7069	{
	7070	uint8_t sourceKey[UCOL_MAX_BUFFER], targetKey[UCOL_MAX_BUFFER];
	7071	uint8_t *sourceKeyP = sourceKey;
	7072	uint8_t *targetKeyP = targetKey;
	7073	int32_t sourceKeyLen = UCOL_MAX_BUFFER, targetKeyLen = UCOL_MAX_BUFFER;
	7074	const UCollator *coll = sColl->coll;
	7075	const UChar *source = NULL;
	7076	const UChar *target = NULL;
	7077	int32_t result = UCOL_EQUAL;
	7078	UnicodeString sourceString, targetString;
	7079	int32_t sourceLength;
	7080	int32_t targetLength;
	7081
	7082	if(sColl->flags & UCOL_USE_ITERATOR) {
	7083	sColl->iterator->move(sColl->iterator, 0, UITER_START);
	7084	tColl->iterator->move(tColl->iterator, 0, UITER_START);
	7085	UChar32 c;
	7086	while((c=sColl->iterator->next(sColl->iterator))>=0) {
	7087	sourceString.append((UChar)c);
	7088	}
	7089	while((c=tColl->iterator->next(tColl->iterator))>=0) {
	7090	targetString.append((UChar)c);
	7091	}
	7092	source = sourceString.getBuffer();
	7093	sourceLength = sourceString.length();
	7094	target = targetString.getBuffer();
	7095	targetLength = targetString.length();
	7096	} else { // no iterators
	7097	sourceLength = (sColl->flags&UCOL_ITER_HASLEN)?(int32_t)(sColl->endp-sColl->string):-1;
	7098	targetLength = (tColl->flags&UCOL_ITER_HASLEN)?(int32_t)(tColl->endp-tColl->string):-1;
	7099	source = sColl->string;
	7100	target = tColl->string;
	7101	}
	7102
	7103
	7104
	7105	sourceKeyLen = ucol_getSortKey(coll, source, sourceLength, sourceKeyP, sourceKeyLen);
	7106	if(sourceKeyLen > UCOL_MAX_BUFFER) {
	7107	sourceKeyP = (uint8_t)uprv_malloc(sourceKeyLensizeof(uint8_t));
	7108	if(sourceKeyP == NULL) {
	7109	*status = U_MEMORY_ALLOCATION_ERROR;
	7110	goto cleanup_and_do_compare;
	7111	}
	7112	sourceKeyLen = ucol_getSortKey(coll, source, sourceLength, sourceKeyP, sourceKeyLen);
	7113	}
	7114
	7115	targetKeyLen = ucol_getSortKey(coll, target, targetLength, targetKeyP, targetKeyLen);
	7116	if(targetKeyLen > UCOL_MAX_BUFFER) {
	7117	targetKeyP = (uint8_t)uprv_malloc(targetKeyLensizeof(uint8_t));
	7118	if(targetKeyP == NULL) {
	7119	*status = U_MEMORY_ALLOCATION_ERROR;
	7120	goto cleanup_and_do_compare;
	7121	}
	7122	targetKeyLen = ucol_getSortKey(coll, target, targetLength, targetKeyP, targetKeyLen);
	7123	}
	7124
	7125	result = uprv_strcmp((const char)sourceKeyP, (const char)targetKeyP);
	7126
	7127	cleanup_and_do_compare:
	7128	if(sourceKeyP != NULL && sourceKeyP != sourceKey) {
	7129	uprv_free(sourceKeyP);
	7130	}
	7131
	7132	if(targetKeyP != NULL && targetKeyP != targetKey) {
	7133	uprv_free(targetKeyP);
	7134	}
	7135
	7136	if(result<0) {
	7137	return UCOL_LESS;
	7138	} else if(result>0) {
	7139	return UCOL_GREATER;
	7140	} else {
	7141	return UCOL_EQUAL;
	7142	}
	7143	}
	7144
	7145
	7146	static UCollationResult
	7147	ucol_strcollRegular(collIterate sColl, collIterate tColl, UErrorCode *status)
	7148	{
	7149	U_ALIGN_CODE(16);
	7150
	7151	const UCollator *coll = sColl->coll;
	7152
	7153
	7154	// setting up the collator parameters
	7155	UColAttributeValue strength = coll->strength;
	7156	UBool initialCheckSecTer = (strength >= UCOL_SECONDARY);
	7157
	7158	UBool checkSecTer = initialCheckSecTer;
	7159	UBool checkTertiary = (strength >= UCOL_TERTIARY);
	7160	UBool checkQuad = (strength >= UCOL_QUATERNARY);
	7161	UBool checkIdent = (strength == UCOL_IDENTICAL);
	7162	UBool checkCase = (coll->caseLevel == UCOL_ON);
	7163	UBool isFrenchSec = (coll->frenchCollation == UCOL_ON) && checkSecTer;
	7164	UBool shifted = (coll->alternateHandling == UCOL_SHIFTED);
	7165	UBool qShifted = shifted && checkQuad;
	7166	UBool doHiragana = (coll->hiraganaQ == UCOL_ON) && checkQuad;
	7167
	7168	if(doHiragana && shifted) {
	7169	return (ucol_compareUsingSortKeys(sColl, tColl, status));
	7170	}
	7171	uint8_t caseSwitch = coll->caseSwitch;
	7172	uint8_t tertiaryMask = coll->tertiaryMask;
	7173
	7174	// This is the lowest primary value that will not be ignored if shifted
	7175	uint32_t LVT = (shifted)?(coll->variableTopValue<<16):0;
	7176
	7177	UCollationResult result = UCOL_EQUAL;
	7178	UCollationResult hirResult = UCOL_EQUAL;
	7179
	7180	// Preparing the CE buffers. They will be filled during the primary phase
	7181	ucol_CEBuf sCEs;
	7182	ucol_CEBuf tCEs;
	7183	UCOL_INIT_CEBUF(&sCEs);
	7184	UCOL_INIT_CEBUF(&tCEs);
	7185
	7186	uint32_t secS = 0, secT = 0;
	7187	uint32_t sOrder=0, tOrder=0;
	7188
	7189	// Non shifted primary processing is quite simple
	7190	if(!shifted) {
	7191	for(;;) {
	7192
	7193	// We fetch CEs until we hit a non ignorable primary or end.
	7194	do {
	7195	// We get the next CE
	7196	sOrder = ucol_IGetNextCE(coll, sColl, status);
	7197	// Stuff it in the buffer
	7198	UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
	7199	// And keep just the primary part.
	7200	sOrder &= UCOL_PRIMARYMASK;
	7201	} while(sOrder == 0);
	7202
	7203	// see the comments on the above block
	7204	do {
	7205	tOrder = ucol_IGetNextCE(coll, tColl, status);
	7206	UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
	7207	tOrder &= UCOL_PRIMARYMASK;
	7208	} while(tOrder == 0);
	7209
	7210	// if both primaries are the same
	7211	if(sOrder == tOrder) {
	7212	// and there are no more CEs, we advance to the next level
	7213	if(sOrder == UCOL_NO_MORE_CES_PRIMARY) {
	7214	break;
	7215	}
	7216	if(doHiragana && hirResult == UCOL_EQUAL) {
	7217	if((sColl->flags & UCOL_WAS_HIRAGANA) != (tColl->flags & UCOL_WAS_HIRAGANA)) {
	7218	hirResult = ((sColl->flags & UCOL_WAS_HIRAGANA) > (tColl->flags & UCOL_WAS_HIRAGANA))
	7219	? UCOL_LESS:UCOL_GREATER;
	7220	}
	7221	}
	7222	} else {
	7223	// only need to check one for continuation
	7224	// if one is then the other must be or the preceding CE would be a prefix of the other
	7225	if (coll->leadBytePermutationTable != NULL && !isContinuation(sOrder)) {
	7226	sOrder = (coll->leadBytePermutationTable[sOrder>>24] << 24) \| (sOrder & 0x00FFFFFF);
	7227	tOrder = (coll->leadBytePermutationTable[tOrder>>24] << 24) \| (tOrder & 0x00FFFFFF);
	7228	}
	7229	// if two primaries are different, we are done
	7230	result = (sOrder < tOrder) ? UCOL_LESS: UCOL_GREATER;
	7231	goto commonReturn;
	7232	}
	7233	} // no primary difference... do the rest from the buffers
	7234	} else { // shifted - do a slightly more complicated processing :)
	7235	for(;;) {
	7236	UBool sInShifted = FALSE;
	7237	UBool tInShifted = FALSE;
	7238	// This version of code can be refactored. However, it seems easier to understand this way.
	7239	// Source loop. Sam as the target loop.
	7240	for(;;) {
	7241	sOrder = ucol_IGetNextCE(coll, sColl, status);
	7242	if(sOrder == UCOL_NO_MORE_CES) {
	7243	UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
	7244	break;
	7245	} else if(sOrder == 0 \|\| (sInShifted && (sOrder & UCOL_PRIMARYMASK) == 0)) {
	7246	/* UCA amendment - ignore ignorables that follow shifted code points */
	7247	continue;
	7248	} else if(isContinuation(sOrder)) {
	7249	if((sOrder & UCOL_PRIMARYMASK) > 0) { /* There is primary value */
	7250	if(sInShifted) {
	7251	sOrder = (sOrder & UCOL_PRIMARYMASK) \| 0xC0; /* preserve interesting continuation */
	7252	UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
	7253	continue;
	7254	} else {
	7255	UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
	7256	break;
	7257	}
	7258	} else { /* Just lower level values */
	7259	if(sInShifted) {
	7260	continue;
	7261	} else {
	7262	UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
	7263	continue;
	7264	}
	7265	}
	7266	} else { /* regular */
	7267	if(coll->leadBytePermutationTable != NULL){
	7268	sOrder = (coll->leadBytePermutationTable[sOrder>>24] << 24) \| (sOrder & 0x00FFFFFF);
	7269	}
	7270	if((sOrder & UCOL_PRIMARYMASK) > LVT) {
	7271	UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
	7272	break;
	7273	} else {
	7274	if((sOrder & UCOL_PRIMARYMASK) > 0) {
	7275	sInShifted = TRUE;
	7276	sOrder &= UCOL_PRIMARYMASK;
	7277	UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
	7278	continue;
	7279	} else {
	7280	UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
	7281	sInShifted = FALSE;
	7282	continue;
	7283	}
	7284	}
	7285	}
	7286	}
	7287	sOrder &= UCOL_PRIMARYMASK;
	7288	sInShifted = FALSE;
	7289
	7290	for(;;) {
	7291	tOrder = ucol_IGetNextCE(coll, tColl, status);
	7292	if(tOrder == UCOL_NO_MORE_CES) {
	7293	UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
	7294	break;
	7295	} else if(tOrder == 0 \|\| (tInShifted && (tOrder & UCOL_PRIMARYMASK) == 0)) {
	7296	/* UCA amendment - ignore ignorables that follow shifted code points */
	7297	continue;
	7298	} else if(isContinuation(tOrder)) {
	7299	if((tOrder & UCOL_PRIMARYMASK) > 0) { /* There is primary value */
	7300	if(tInShifted) {
	7301	tOrder = (tOrder & UCOL_PRIMARYMASK) \| 0xC0; /* preserve interesting continuation */
	7302	UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
	7303	continue;
	7304	} else {
	7305	UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
	7306	break;
	7307	}
	7308	} else { /* Just lower level values */
	7309	if(tInShifted) {
	7310	continue;
	7311	} else {
	7312	UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
	7313	continue;
	7314	}
	7315	}
	7316	} else { /* regular */
	7317	if(coll->leadBytePermutationTable != NULL){
	7318	tOrder = (coll->leadBytePermutationTable[tOrder>>24] << 24) \| (tOrder & 0x00FFFFFF);
	7319	}
	7320	if((tOrder & UCOL_PRIMARYMASK) > LVT) {
	7321	UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
	7322	break;
	7323	} else {
	7324	if((tOrder & UCOL_PRIMARYMASK) > 0) {
	7325	tInShifted = TRUE;
	7326	tOrder &= UCOL_PRIMARYMASK;
	7327	UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
	7328	continue;
	7329	} else {
	7330	UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
	7331	tInShifted = FALSE;
	7332	continue;
	7333	}
	7334	}
	7335	}
	7336	}
	7337	tOrder &= UCOL_PRIMARYMASK;
	7338	tInShifted = FALSE;
	7339
	7340	if(sOrder == tOrder) {
	7341	/*
	7342	if(doHiragana && hirResult == UCOL_EQUAL) {
	7343	if((sColl.flags & UCOL_WAS_HIRAGANA) != (tColl.flags & UCOL_WAS_HIRAGANA)) {
	7344	hirResult = ((sColl.flags & UCOL_WAS_HIRAGANA) > (tColl.flags & UCOL_WAS_HIRAGANA))
	7345	? UCOL_LESS:UCOL_GREATER;
	7346	}
	7347	}
	7348	*/
	7349	if(sOrder == UCOL_NO_MORE_CES_PRIMARY) {
	7350	break;
	7351	} else {
	7352	sOrder = 0;
	7353	tOrder = 0;
	7354	continue;
	7355	}
	7356	} else {
	7357	result = (sOrder < tOrder) ? UCOL_LESS : UCOL_GREATER;
	7358	goto commonReturn;
	7359	}
	7360	} /* no primary difference... do the rest from the buffers */
	7361	}
	7362
	7363	/* now, we're gonna reexamine collected CEs */
	7364	uint32_t *sCE;
	7365	uint32_t *tCE;
	7366
	7367	/* This is the secondary level of comparison */
	7368	if(checkSecTer) {
	7369	if(!isFrenchSec) { /* normal */
	7370	sCE = sCEs.buf;
	7371	tCE = tCEs.buf;
	7372	for(;;) {
	7373	while (secS == 0) {
	7374	secS = *(sCE++) & UCOL_SECONDARYMASK;
	7375	}
	7376
	7377	while(secT == 0) {
	7378	secT = *(tCE++) & UCOL_SECONDARYMASK;
	7379	}
	7380
	7381	if(secS == secT) {
	7382	if(secS == UCOL_NO_MORE_CES_SECONDARY) {
	7383	break;
	7384	} else {
	7385	secS = 0; secT = 0;
	7386	continue;
	7387	}
	7388	} else {
	7389	result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;
	7390	goto commonReturn;
	7391	}
	7392	}
	7393	} else { /* do the French */
	7394	uint32_t *sCESave = NULL;
	7395	uint32_t *tCESave = NULL;
	7396	sCE = sCEs.pos-2; /* this could also be sCEs-- if needs to be optimized */
	7397	tCE = tCEs.pos-2;
	7398	for(;;) {
	7399	while (secS == 0 && sCE >= sCEs.buf) {
	7400	if(sCESave == NULL) {
	7401	secS = *(sCE--);
	7402	if(isContinuation(secS)) {
	7403	while(isContinuation(secS = *(sCE--)))
	7404	;
	7405	/* after this, secS has the start of continuation, and sCEs points before that */
	7406	sCESave = sCE; /* we save it, so that we know where to come back AND that we need to go forward */
	7407	sCE+=2; /* need to point to the first continuation CP */
	7408	/* However, now you can just continue doing stuff */
	7409	}
	7410	} else {
	7411	secS = *(sCE++);
	7412	if(!isContinuation(secS)) { /* This means we have finished with this cont */
	7413	sCE = sCESave; /* reset the pointer to before continuation */
	7414	sCESave = NULL;
	7415	secS = 0; /* Fetch a fresh CE before the continuation sequence. */
	7416	continue;
	7417	}
	7418	}
	7419	secS &= UCOL_SECONDARYMASK; /* remove the continuation bit */
	7420	}
	7421
	7422	while(secT == 0 && tCE >= tCEs.buf) {
	7423	if(tCESave == NULL) {
	7424	secT = *(tCE--);
	7425	if(isContinuation(secT)) {
	7426	while(isContinuation(secT = *(tCE--)))
	7427	;
	7428	/* after this, secS has the start of continuation, and sCEs points before that */
	7429	tCESave = tCE; /* we save it, so that we know where to come back AND that we need to go forward */
	7430	tCE+=2; /* need to point to the first continuation CP */
	7431	/* However, now you can just continue doing stuff */
	7432	}
	7433	} else {
	7434	secT = *(tCE++);
	7435	if(!isContinuation(secT)) { /* This means we have finished with this cont */
	7436	tCE = tCESave; /* reset the pointer to before continuation */
	7437	tCESave = NULL;
	7438	secT = 0; /* Fetch a fresh CE before the continuation sequence. */
	7439	continue;
	7440	}
	7441	}
	7442	secT &= UCOL_SECONDARYMASK; /* remove the continuation bit */
	7443	}
	7444
	7445	if(secS == secT) {
	7446	if(secS == UCOL_NO_MORE_CES_SECONDARY \|\| (sCE < sCEs.buf && tCE < tCEs.buf)) {
	7447	break;
	7448	} else {
	7449	secS = 0; secT = 0;
	7450	continue;
	7451	}
	7452	} else {
	7453	result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;
	7454	goto commonReturn;
	7455	}
	7456	}
	7457	}
	7458	}
	7459
	7460	/* doing the case bit */
	7461	if(checkCase) {
	7462	sCE = sCEs.buf;
	7463	tCE = tCEs.buf;
	7464	for(;;) {
	7465	while((secS & UCOL_REMOVE_CASE) == 0) {
	7466	if(!isContinuation(*sCE++)) {
	7467	secS =*(sCE-1);
	7468	if(((secS & UCOL_PRIMARYMASK) != 0) \|\| strength > UCOL_PRIMARY) {
	7469	// primary ignorables should not be considered on the case level when the strength is primary
	7470	// otherwise, the CEs stop being well-formed
	7471	secS &= UCOL_TERT_CASE_MASK;
	7472	secS ^= caseSwitch;
	7473	} else {
	7474	secS = 0;
	7475	}
	7476	} else {
	7477	secS = 0;
	7478	}
	7479	}
	7480
	7481	while((secT & UCOL_REMOVE_CASE) == 0) {
	7482	if(!isContinuation(*tCE++)) {
	7483	secT = *(tCE-1);
	7484	if(((secT & UCOL_PRIMARYMASK) != 0) \|\| strength > UCOL_PRIMARY) {
	7485	// primary ignorables should not be considered on the case level when the strength is primary
	7486	// otherwise, the CEs stop being well-formed
	7487	secT &= UCOL_TERT_CASE_MASK;
	7488	secT ^= caseSwitch;
	7489	} else {
	7490	secT = 0;
	7491	}
	7492	} else {
	7493	secT = 0;
	7494	}
	7495	}
	7496
	7497	if((secS & UCOL_CASE_BIT_MASK) < (secT & UCOL_CASE_BIT_MASK)) {
	7498	result = UCOL_LESS;
	7499	goto commonReturn;
	7500	} else if((secS & UCOL_CASE_BIT_MASK) > (secT & UCOL_CASE_BIT_MASK)) {
	7501	result = UCOL_GREATER;
	7502	goto commonReturn;
	7503	}
	7504
	7505	if((secS & UCOL_REMOVE_CASE) == UCOL_NO_MORE_CES_TERTIARY \|\| (secT & UCOL_REMOVE_CASE) == UCOL_NO_MORE_CES_TERTIARY ) {
	7506	break;
	7507	} else {
	7508	secS = 0;
	7509	secT = 0;
	7510	}
	7511	}
	7512	}
	7513
	7514	/* Tertiary level */
	7515	if(checkTertiary) {
	7516	secS = 0;
	7517	secT = 0;
	7518	sCE = sCEs.buf;
	7519	tCE = tCEs.buf;
	7520	for(;;) {
	7521	while((secS & UCOL_REMOVE_CASE) == 0) {
	7522	secS = *(sCE++) & tertiaryMask;
	7523	if(!isContinuation(secS)) {
	7524	secS ^= caseSwitch;
	7525	} else {
	7526	secS &= UCOL_REMOVE_CASE;
	7527	}
	7528	}
	7529
	7530	while((secT & UCOL_REMOVE_CASE) == 0) {
	7531	secT = *(tCE++) & tertiaryMask;
	7532	if(!isContinuation(secT)) {
	7533	secT ^= caseSwitch;
	7534	} else {
	7535	secT &= UCOL_REMOVE_CASE;
	7536	}
	7537	}
	7538
	7539	if(secS == secT) {
	7540	if((secS & UCOL_REMOVE_CASE) == 1) {
	7541	break;
	7542	} else {
	7543	secS = 0; secT = 0;
	7544	continue;
	7545	}
	7546	} else {
	7547	result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;
	7548	goto commonReturn;
	7549	}
	7550	}
	7551	}
	7552
	7553
	7554	if(qShifted /checkQuad/) {
	7555	UBool sInShifted = TRUE;
	7556	UBool tInShifted = TRUE;
	7557	secS = 0;
	7558	secT = 0;
	7559	sCE = sCEs.buf;
	7560	tCE = tCEs.buf;
	7561	for(;;) {
	7562	while((secS == 0 && secS != UCOL_NO_MORE_CES) \|\| (isContinuation(secS) && !sInShifted)) {
	7563	secS = *(sCE++);
	7564	if(isContinuation(secS)) {
	7565	if(!sInShifted) {
	7566	continue;
	7567	}
	7568	} else if(secS > LVT \|\| (secS & UCOL_PRIMARYMASK) == 0) { /* non continuation */
	7569	secS = UCOL_PRIMARYMASK;
	7570	sInShifted = FALSE;
	7571	} else {
	7572	sInShifted = TRUE;
	7573	}
	7574	}
	7575	secS &= UCOL_PRIMARYMASK;
	7576
	7577
	7578	while((secT == 0 && secT != UCOL_NO_MORE_CES) \|\| (isContinuation(secT) && !tInShifted)) {
	7579	secT = *(tCE++);
	7580	if(isContinuation(secT)) {
	7581	if(!tInShifted) {
	7582	continue;
	7583	}
	7584	} else if(secT > LVT \|\| (secT & UCOL_PRIMARYMASK) == 0) {
	7585	secT = UCOL_PRIMARYMASK;
	7586	tInShifted = FALSE;
	7587	} else {
	7588	tInShifted = TRUE;
	7589	}
	7590	}
	7591	secT &= UCOL_PRIMARYMASK;
	7592
	7593	if(secS == secT) {
	7594	if(secS == UCOL_NO_MORE_CES_PRIMARY) {
	7595	break;
	7596	} else {
	7597	secS = 0; secT = 0;
	7598	continue;
	7599	}
	7600	} else {
	7601	result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;
	7602	goto commonReturn;
	7603	}
	7604	}
	7605	} else if(doHiragana && hirResult != UCOL_EQUAL) {
	7606	// If we're fine on quaternaries, we might be different
	7607	// on Hiragana. This, however, might fail us in shifted.
	7608	result = hirResult;
	7609	goto commonReturn;
	7610	}
	7611
	7612	/* For IDENTICAL comparisons, we use a bitwise character comparison */
	7613	/* as a tiebreaker if all else is equal. */
	7614	/* Getting here should be quite rare - strings are not identical - */
	7615	/* that is checked first, but compared == through all other checks. */
	7616	if(checkIdent)
	7617	{
	7618	//result = ucol_checkIdent(&sColl, &tColl, coll->normalizationMode == UCOL_ON);
	7619	result = ucol_checkIdent(sColl, tColl, TRUE, status);
	7620	}
	7621
	7622	commonReturn:
	7623	if ((sColl->flags \| tColl->flags) & UCOL_ITER_ALLOCATED) {
	7624	if (sCEs.buf != sCEs.localArray ) {
	7625	uprv_free(sCEs.buf);
	7626	}
	7627	if (tCEs.buf != tCEs.localArray ) {
	7628	uprv_free(tCEs.buf);
	7629	}
	7630	}
	7631
	7632	return result;
	7633	}
	7634
	7635	static UCollationResult
	7636	ucol_strcollRegular(const UCollator *coll,
	7637	const UChar *source, int32_t sourceLength,
	7638	const UChar *target, int32_t targetLength,
	7639	UErrorCode *status) {
	7640	collIterate sColl, tColl;
	7641	// Preparing the context objects for iterating over strings
	7642	IInit_collIterate(coll, source, sourceLength, &sColl, status);
	7643	IInit_collIterate(coll, target, targetLength, &tColl, status);
	7644	if(U_FAILURE(*status)) {
	7645	return UCOL_LESS;
	7646	}
	7647	return ucol_strcollRegular(&sColl, &tColl, status);
	7648	}
	7649
	7650	static inline uint32_t
	7651	ucol_getLatinOneContraction(const UCollator *coll, int32_t strength,
	7652	uint32_t CE, const UChar s, int32_t index, int32_t len)
	7653	{
	7654	const UChar UCharOffset = (UChar )coll->image+getContractOffset(CE&0xFFF);
	7655	int32_t latinOneOffset = (CE & 0x00FFF000) >> 12;
	7656	int32_t offset = 1;
	7657	UChar schar = 0, tchar = 0;
	7658
	7659	for(;;) {
	7660	if(len == -1) {
	7661	if(s[*index] == 0) { // end of string
	7662	return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]);
	7663	} else {
	7664	schar = s[*index];
	7665	}
	7666	} else {
	7667	if(*index == len) {
	7668	return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]);
	7669	} else {
	7670	schar = s[*index];
	7671	}
	7672	}
	7673
	7674	while(schar > (tchar = (UCharOffset+offset))) { / since the contraction codepoints should be ordered, we skip all that are smaller */
	7675	offset++;
	7676	}
	7677
	7678	if (schar == tchar) {
	7679	(*index)++;
	7680	return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset+offset]);
	7681	}
	7682	else
	7683	{
	7684	if(schar & 0xFF00 /> UCOL_ENDOFLATIN1RANGE/) {
	7685	return UCOL_BAIL_OUT_CE;
	7686	}
	7687	// skip completely ignorables
	7688	uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, schar);
	7689	if(isZeroCE == 0) { // we have to ignore completely ignorables
	7690	(*index)++;
	7691	continue;
	7692	}
	7693
	7694	return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]);
	7695	}
	7696	}
	7697	}
	7698
	7699
	7700	/**
	7701	* This is a fast strcoll, geared towards text in Latin-1.
	7702	* It supports contractions of size two, French secondaries
	7703	* and case switching. You can use it with strengths primary
	7704	* to tertiary. It does not support shifted and case level.
	7705	* It relies on the table build by setupLatin1Table. If it
	7706	* doesn't understand something, it will go to the regular
	7707	* strcoll.
	7708	*/
	7709	static UCollationResult
	7710	ucol_strcollUseLatin1( const UCollator *coll,
	7711	const UChar *source,
	7712	int32_t sLen,
	7713	const UChar *target,
	7714	int32_t tLen,
	7715	UErrorCode *status)
	7716	{
	7717	U_ALIGN_CODE(16);
	7718	int32_t strength = coll->strength;
	7719
	7720	int32_t sIndex = 0, tIndex = 0;
	7721	UChar sChar = 0, tChar = 0;
	7722	uint32_t sOrder=0, tOrder=0;
	7723
	7724	UBool endOfSource = FALSE;
	7725
	7726	uint32_t *elements = coll->latinOneCEs;
	7727
	7728	UBool haveContractions = FALSE; // if we have contractions in our string
	7729	// we cannot do French secondary
	7730
	7731	// Do the primary level
	7732	for(;;) {
	7733	while(sOrder==0) { // this loop skips primary ignorables
	7734	// sOrder=getNextlatinOneCE(source);
	7735	if(sLen==-1) { // handling zero terminated strings
	7736	sChar=source[sIndex++];
	7737	if(sChar==0) {
	7738	endOfSource = TRUE;
	7739	break;
	7740	}
	7741	} else { // handling strings with known length
	7742	if(sIndex==sLen) {
	7743	endOfSource = TRUE;
	7744	break;
	7745	}
	7746	sChar=source[sIndex++];
	7747	}
	7748	if(sChar&0xFF00) { // if we encounter non-latin-1, we bail out (sChar > 0xFF, but this is faster on win32)
	7749	//fprintf(stderr, "R");
	7750	return ucol_strcollRegular(coll, source, sLen, target, tLen, status);
	7751	}
	7752	sOrder = elements[sChar];
	7753	if(sOrder >= UCOL_NOT_FOUND) { // if we got a special
	7754	// specials can basically be either contractions or bail-out signs. If we get anything
	7755	// else, we'll bail out anywasy
	7756	if(getCETag(sOrder) == CONTRACTION_TAG) {
	7757	sOrder = ucol_getLatinOneContraction(coll, UCOL_PRIMARY, sOrder, source, &sIndex, sLen);
	7758	haveContractions = TRUE; // if there are contractions, we cannot do French secondary
	7759	// However, if there are contractions in the table, but we always use just one char,
	7760	// we might be able to do French. This should be checked out.
	7761	}
	7762	if(sOrder >= UCOL_NOT_FOUND /== UCOL_BAIL_OUT_CE/) {
	7763	//fprintf(stderr, "S");
	7764	return ucol_strcollRegular(coll, source, sLen, target, tLen, status);
	7765	}
	7766	}
	7767	}
	7768
	7769	while(tOrder==0) { // this loop skips primary ignorables
	7770	// tOrder=getNextlatinOneCE(target);
	7771	if(tLen==-1) { // handling zero terminated strings
	7772	tChar=target[tIndex++];
	7773	if(tChar==0) {
	7774	if(endOfSource) { // this is different than source loop,
	7775	// as we already know that source loop is done here,
	7776	// so we can either finish the primary loop if both
	7777	// strings are done or anounce the result if only
	7778	// target is done. Same below.
	7779	goto endOfPrimLoop;
	7780	} else {
	7781	return UCOL_GREATER;
	7782	}
	7783	}
	7784	} else { // handling strings with known length
	7785	if(tIndex==tLen) {
	7786	if(endOfSource) {
	7787	goto endOfPrimLoop;
	7788	} else {
	7789	return UCOL_GREATER;
	7790	}
	7791	}
	7792	tChar=target[tIndex++];
	7793	}
	7794	if(tChar&0xFF00) { // if we encounter non-latin-1, we bail out (sChar > 0xFF, but this is faster on win32)
	7795	//fprintf(stderr, "R");
	7796	return ucol_strcollRegular(coll, source, sLen, target, tLen, status);
	7797	}
	7798	tOrder = elements[tChar];
	7799	if(tOrder >= UCOL_NOT_FOUND) {
	7800	// Handling specials, see the comments for source
	7801	if(getCETag(tOrder) == CONTRACTION_TAG) {
	7802	tOrder = ucol_getLatinOneContraction(coll, UCOL_PRIMARY, tOrder, target, &tIndex, tLen);
	7803	haveContractions = TRUE;
	7804	}
	7805	if(tOrder >= UCOL_NOT_FOUND /== UCOL_BAIL_OUT_CE/) {
	7806	//fprintf(stderr, "S");
	7807	return ucol_strcollRegular(coll, source, sLen, target, tLen, status);
	7808	}
	7809	}
	7810	}
	7811	if(endOfSource) { // source is finished, but target is not, say the result.
	7812	return UCOL_LESS;
	7813	}
	7814
	7815	if(sOrder == tOrder) { // if we have same CEs, we continue the loop
	7816	sOrder = 0; tOrder = 0;
	7817	continue;
	7818	} else {
	7819	// compare current top bytes
	7820	if(((sOrder^tOrder)&0xFF000000)!=0) {
	7821	// top bytes differ, return difference
	7822	if(sOrder < tOrder) {
	7823	return UCOL_LESS;
	7824	} else if(sOrder > tOrder) {
	7825	return UCOL_GREATER;
	7826	}
	7827	// instead of return (int32_t)(sOrder>>24)-(int32_t)(tOrder>>24);
	7828	// since we must return enum value
	7829	}
	7830
	7831	// top bytes match, continue with following bytes
	7832	sOrder<<=8;
	7833	tOrder<<=8;
	7834	}
	7835	}
	7836
	7837	endOfPrimLoop:
	7838	// after primary loop, we definitely know the sizes of strings,
	7839	// so we set it and use simpler loop for secondaries and tertiaries
	7840	sLen = sIndex; tLen = tIndex;
	7841	if(strength >= UCOL_SECONDARY) {
	7842	// adjust the table beggining
	7843	elements += coll->latinOneTableLen;
	7844	endOfSource = FALSE;
	7845
	7846	if(coll->frenchCollation == UCOL_OFF) { // non French
	7847	// This loop is a simplified copy of primary loop
	7848	// at this point we know that whole strings are latin-1, so we don't
	7849	// check for that. We also know that we only have contractions as
	7850	// specials.
	7851	sIndex = 0; tIndex = 0;
	7852	for(;;) {
	7853	while(sOrder==0) {
	7854	if(sIndex==sLen) {
	7855	endOfSource = TRUE;
	7856	break;
	7857	}
	7858	sChar=source[sIndex++];
	7859	sOrder = elements[sChar];
	7860	if(sOrder > UCOL_NOT_FOUND) {
	7861	sOrder = ucol_getLatinOneContraction(coll, UCOL_SECONDARY, sOrder, source, &sIndex, sLen);
	7862	}
	7863	}
	7864
	7865	while(tOrder==0) {
	7866	if(tIndex==tLen) {
	7867	if(endOfSource) {
	7868	goto endOfSecLoop;
	7869	} else {
	7870	return UCOL_GREATER;
	7871	}
	7872	}
	7873	tChar=target[tIndex++];
	7874	tOrder = elements[tChar];
	7875	if(tOrder > UCOL_NOT_FOUND) {
	7876	tOrder = ucol_getLatinOneContraction(coll, UCOL_SECONDARY, tOrder, target, &tIndex, tLen);
	7877	}
	7878	}
	7879	if(endOfSource) {
	7880	return UCOL_LESS;
	7881	}
	7882
	7883	if(sOrder == tOrder) {
	7884	sOrder = 0; tOrder = 0;
	7885	continue;
	7886	} else {
	7887	// see primary loop for comments on this
	7888	if(((sOrder^tOrder)&0xFF000000)!=0) {
	7889	if(sOrder < tOrder) {
	7890	return UCOL_LESS;
	7891	} else if(sOrder > tOrder) {
	7892	return UCOL_GREATER;
	7893	}
	7894	}
	7895	sOrder<<=8;
	7896	tOrder<<=8;
	7897	}
	7898	}
	7899	} else { // French
	7900	if(haveContractions) { // if we have contractions, we have to bail out
	7901	// since we don't really know how to handle them here
	7902	return ucol_strcollRegular(coll, source, sLen, target, tLen, status);
	7903	}
	7904	// For French, we go backwards
	7905	sIndex = sLen; tIndex = tLen;
	7906	for(;;) {
	7907	while(sOrder==0) {
	7908	if(sIndex==0) {
	7909	endOfSource = TRUE;
	7910	break;
	7911	}
	7912	sChar=source[--sIndex];
	7913	sOrder = elements[sChar];
	7914	// don't even look for contractions
	7915	}
	7916
	7917	while(tOrder==0) {
	7918	if(tIndex==0) {
	7919	if(endOfSource) {
	7920	goto endOfSecLoop;
	7921	} else {
	7922	return UCOL_GREATER;
	7923	}
	7924	}
	7925	tChar=target[--tIndex];
	7926	tOrder = elements[tChar];
	7927	// don't even look for contractions
	7928	}
	7929	if(endOfSource) {
	7930	return UCOL_LESS;
	7931	}
	7932
	7933	if(sOrder == tOrder) {
	7934	sOrder = 0; tOrder = 0;
	7935	continue;
	7936	} else {
	7937	// see the primary loop for comments
	7938	if(((sOrder^tOrder)&0xFF000000)!=0) {
	7939	if(sOrder < tOrder) {
	7940	return UCOL_LESS;
	7941	} else if(sOrder > tOrder) {
	7942	return UCOL_GREATER;
	7943	}
	7944	}
	7945	sOrder<<=8;
	7946	tOrder<<=8;
	7947	}
	7948	}
	7949	}
	7950	}
	7951
	7952	endOfSecLoop:
	7953	if(strength >= UCOL_TERTIARY) {
	7954	// tertiary loop is the same as secondary (except no French)
	7955	elements += coll->latinOneTableLen;
	7956	sIndex = 0; tIndex = 0;
	7957	endOfSource = FALSE;
	7958	for(;;) {
	7959	while(sOrder==0) {
	7960	if(sIndex==sLen) {
	7961	endOfSource = TRUE;
	7962	break;
	7963	}
	7964	sChar=source[sIndex++];
	7965	sOrder = elements[sChar];
	7966	if(sOrder > UCOL_NOT_FOUND) {
	7967	sOrder = ucol_getLatinOneContraction(coll, UCOL_TERTIARY, sOrder, source, &sIndex, sLen);
	7968	}
	7969	}
	7970	while(tOrder==0) {
	7971	if(tIndex==tLen) {
	7972	if(endOfSource) {
	7973	return UCOL_EQUAL; // if both strings are at the end, they are equal
	7974	} else {
	7975	return UCOL_GREATER;
	7976	}
	7977	}
	7978	tChar=target[tIndex++];
	7979	tOrder = elements[tChar];
	7980	if(tOrder > UCOL_NOT_FOUND) {
	7981	tOrder = ucol_getLatinOneContraction(coll, UCOL_TERTIARY, tOrder, target, &tIndex, tLen);
	7982	}
	7983	}
	7984	if(endOfSource) {
	7985	return UCOL_LESS;
	7986	}
	7987	if(sOrder == tOrder) {
	7988	sOrder = 0; tOrder = 0;
	7989	continue;
	7990	} else {
	7991	if(((sOrder^tOrder)&0xff000000)!=0) {
	7992	if(sOrder < tOrder) {
	7993	return UCOL_LESS;
	7994	} else if(sOrder > tOrder) {
	7995	return UCOL_GREATER;
	7996	}
	7997	}
	7998	sOrder<<=8;
	7999	tOrder<<=8;
	8000	}
	8001	}
	8002	}
	8003	return UCOL_EQUAL;
	8004	}
	8005
	8006
	8007	U_CAPI UCollationResult U_EXPORT2
	8008	ucol_strcollIter( const UCollator *coll,
	8009	UCharIterator *sIter,
	8010	UCharIterator *tIter,
	8011	UErrorCode *status)
	8012	{
	8013	if(!status \|\| U_FAILURE(*status)) {
	8014	return UCOL_EQUAL;
	8015	}
	8016
	8017	UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER);
	8018	UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIter);
	8019
	8020	if (sIter == tIter) {
	8021	UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)
	8022	return UCOL_EQUAL;
	8023	}
	8024	if(sIter == NULL \|\| tIter == NULL \|\| coll == NULL) {
	8025	*status = U_ILLEGAL_ARGUMENT_ERROR;
	8026	UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)
	8027	return UCOL_EQUAL;
	8028	}
	8029
	8030	UCollationResult result = UCOL_EQUAL;
	8031
	8032	// Preparing the context objects for iterating over strings
	8033	collIterate sColl, tColl;
	8034	IInit_collIterate(coll, NULL, -1, &sColl, status);
	8035	IInit_collIterate(coll, NULL, -1, &tColl, status);
	8036	if(U_FAILURE(*status)) {
	8037	UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)
	8038	return UCOL_EQUAL;
	8039	}
	8040	// The division for the array length may truncate the array size to
	8041	// a little less than UNORM_ITER_SIZE, but that size is dimensioned too high
	8042	// for all platforms anyway.
	8043	UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
	8044	UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
	8045	UNormIterator sNormIter = NULL, tNormIter = NULL;
	8046
	8047	sColl.iterator = sIter;
	8048	sColl.flags \|= UCOL_USE_ITERATOR;
	8049	tColl.flags \|= UCOL_USE_ITERATOR;
	8050	tColl.iterator = tIter;
	8051
	8052	if(ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status) == UCOL_ON) {
	8053	sNormIter = unorm_openIter(stackNormIter1, sizeof(stackNormIter1), status);
	8054	sColl.iterator = unorm_setIter(sNormIter, sIter, UNORM_FCD, status);
	8055	sColl.flags &= ~UCOL_ITER_NORM;
	8056
	8057	tNormIter = unorm_openIter(stackNormIter2, sizeof(stackNormIter2), status);
	8058	tColl.iterator = unorm_setIter(tNormIter, tIter, UNORM_FCD, status);
	8059	tColl.flags &= ~UCOL_ITER_NORM;
	8060	}
	8061
	8062	UChar32 sChar = U_SENTINEL, tChar = U_SENTINEL;
	8063
	8064	while((sChar = sColl.iterator->next(sColl.iterator)) ==
	8065	(tChar = tColl.iterator->next(tColl.iterator))) {
	8066	if(sChar == U_SENTINEL) {
	8067	result = UCOL_EQUAL;
	8068	goto end_compare;
	8069	}
	8070	}
	8071
	8072	if(sChar == U_SENTINEL) {
	8073	tChar = tColl.iterator->previous(tColl.iterator);
	8074	}
	8075
	8076	if(tChar == U_SENTINEL) {
	8077	sChar = sColl.iterator->previous(sColl.iterator);
	8078	}
	8079
	8080	sChar = sColl.iterator->previous(sColl.iterator);
	8081	tChar = tColl.iterator->previous(tColl.iterator);
	8082
	8083	if (ucol_unsafeCP((UChar)sChar, coll) \|\| ucol_unsafeCP((UChar)tChar, coll))
	8084	{
	8085	// We are stopped in the middle of a contraction.
	8086	// Scan backwards through the == part of the string looking for the start of the contraction.
	8087	// It doesn't matter which string we scan, since they are the same in this region.
	8088	do
	8089	{
	8090	sChar = sColl.iterator->previous(sColl.iterator);
	8091	tChar = tColl.iterator->previous(tColl.iterator);
	8092	}
	8093	while (sChar != U_SENTINEL && ucol_unsafeCP((UChar)sChar, coll));
	8094	}
	8095
	8096
	8097	if(U_SUCCESS(*status)) {
	8098	result = ucol_strcollRegular(&sColl, &tColl, status);
	8099	}
	8100
	8101	end_compare:
	8102	if(sNormIter \|\| tNormIter) {
	8103	unorm_closeIter(sNormIter);
	8104	unorm_closeIter(tNormIter);
	8105	}
	8106
	8107	UTRACE_EXIT_VALUE_STATUS(result, *status)
	8108	return result;
	8109	}
	8110
	8111
	8112	/* */
	8113	/* ucol_strcoll Main public API string comparison function */
	8114	/* */
	8115	U_CAPI UCollationResult U_EXPORT2
	8116	ucol_strcoll( const UCollator *coll,
	8117	const UChar *source,
	8118	int32_t sourceLength,
	8119	const UChar *target,
	8120	int32_t targetLength)
	8121	{
	8122	U_ALIGN_CODE(16);
	8123
	8124	UTRACE_ENTRY(UTRACE_UCOL_STRCOLL);
	8125	if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
	8126	UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target);
	8127	UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLength);
	8128	UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLength);
	8129	}
	8130
	8131	if(source == NULL \|\| target == NULL) {
	8132	// do not crash, but return. Should have
	8133	// status argument to return error.
	8134	UTRACE_EXIT_VALUE(UCOL_EQUAL);
	8135	return UCOL_EQUAL;
	8136	}
	8137
	8138	/* Quick check if source and target are same strings. */
	8139	/* They should either both be NULL terminated or the explicit length should be set on both. */
	8140	if (source==target && sourceLength==targetLength) {
	8141	UTRACE_EXIT_VALUE(UCOL_EQUAL);
	8142	return UCOL_EQUAL;
	8143	}
	8144
	8145	if(coll->delegate != NULL) {
	8146	UErrorCode status = U_ZERO_ERROR;
	8147	return ((const Collator*)coll->delegate)->compare(source,sourceLength,target,targetLength, status);
	8148	}
	8149
	8150	/* Scan the strings. Find: */
	8151	/* The length of any leading portion that is equal */
	8152	/* Whether they are exactly equal. (in which case we just return) */
	8153	const UChar *pSrc = source;
	8154	const UChar *pTarg = target;
	8155	int32_t equalLength;
	8156
	8157	if (sourceLength == -1 && targetLength == -1) {
	8158	// Both strings are null terminated.
	8159	// Scan through any leading equal portion.
	8160	while (pSrc == pTarg && *pSrc != 0) {
	8161	pSrc++;
	8162	pTarg++;
	8163	}
	8164	if (pSrc == 0 && pTarg == 0) {
	8165	UTRACE_EXIT_VALUE(UCOL_EQUAL);
	8166	return UCOL_EQUAL;
	8167	}
	8168	equalLength = (int32_t)(pSrc - source);
	8169	}
	8170	else
	8171	{
	8172	// One or both strings has an explicit length.
	8173	const UChar *pSrcEnd = source + sourceLength;
	8174	const UChar *pTargEnd = target + targetLength;
	8175
	8176	// Scan while the strings are bitwise ==, or until one is exhausted.
	8177	for (;;) {
	8178	if (pSrc == pSrcEnd \|\| pTarg == pTargEnd) {
	8179	break;
	8180	}
	8181	if ((pSrc == 0 && sourceLength == -1) \|\| (pTarg == 0 && targetLength == -1)) {
	8182	break;
	8183	}
	8184	if (pSrc != pTarg) {
	8185	break;
	8186	}
	8187	pSrc++;
	8188	pTarg++;
	8189	}
	8190	equalLength = (int32_t)(pSrc - source);
	8191
	8192	// If we made it all the way through both strings, we are done. They are ==
	8193	if ((pSrc ==pSrcEnd \|\| (pSrcEnd <pSrc && pSrc==0)) && / At end of src string, however it was specified. */
	8194	(pTarg==pTargEnd \|\| (pTargEnd<pTarg && pTarg==0))) / and also at end of dest string */
	8195	{
	8196	UTRACE_EXIT_VALUE(UCOL_EQUAL);
	8197	return UCOL_EQUAL;
	8198	}
	8199	}
	8200	if (equalLength > 0) {
	8201	/* There is an identical portion at the beginning of the two strings. */
	8202	/* If the identical portion ends within a contraction or a comibining */
	8203	/* character sequence, back up to the start of that sequence. */
	8204
	8205	// These values should already be set by the code above.
	8206	//pSrc = source + equalLength; /* point to the first differing chars */
	8207	//pTarg = target + equalLength;
	8208	if ((pSrc != source+sourceLength && ucol_unsafeCP(*pSrc, coll)) \|\|
	8209	(pTarg != target+targetLength && ucol_unsafeCP(*pTarg, coll)))
	8210	{
	8211	// We are stopped in the middle of a contraction.
	8212	// Scan backwards through the == part of the string looking for the start of the contraction.
	8213	// It doesn't matter which string we scan, since they are the same in this region.
	8214	do
	8215	{
	8216	equalLength--;
	8217	pSrc--;
	8218	}
	8219	while (equalLength>0 && ucol_unsafeCP(*pSrc, coll));
	8220	}
	8221
	8222	source += equalLength;
	8223	target += equalLength;
	8224	if (sourceLength > 0) {
	8225	sourceLength -= equalLength;
	8226	}
	8227	if (targetLength > 0) {
	8228	targetLength -= equalLength;
	8229	}
	8230	}
	8231
	8232	UErrorCode status = U_ZERO_ERROR;
	8233	UCollationResult returnVal;
	8234	if(!coll->latinOneUse \|\| (sourceLength > 0 && source&0xff00) \|\| (targetLength > 0 && target&0xff00)) {
	8235	returnVal = ucol_strcollRegular(coll, source, sourceLength, target, targetLength, &status);
	8236	} else {
	8237	returnVal = ucol_strcollUseLatin1(coll, source, sourceLength, target, targetLength, &status);
	8238	}
	8239	UTRACE_EXIT_VALUE(returnVal);
	8240	return returnVal;
	8241	}
	8242
	8243	/* convenience function for comparing strings */
	8244	U_CAPI UBool U_EXPORT2
	8245	ucol_greater( const UCollator *coll,
	8246	const UChar *source,
	8247	int32_t sourceLength,
	8248	const UChar *target,
	8249	int32_t targetLength)
	8250	{
	8251	return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
	8252	== UCOL_GREATER);
	8253	}
	8254
	8255	/* convenience function for comparing strings */
	8256	U_CAPI UBool U_EXPORT2
	8257	ucol_greaterOrEqual( const UCollator *coll,
	8258	const UChar *source,
	8259	int32_t sourceLength,
	8260	const UChar *target,
	8261	int32_t targetLength)
	8262	{
	8263	return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
	8264	!= UCOL_LESS);
	8265	}
	8266
	8267	/* convenience function for comparing strings */
	8268	U_CAPI UBool U_EXPORT2
	8269	ucol_equal( const UCollator *coll,
	8270	const UChar *source,
	8271	int32_t sourceLength,
	8272	const UChar *target,
	8273	int32_t targetLength)
	8274	{
	8275	return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
	8276	== UCOL_EQUAL);
	8277	}
	8278
	8279	U_CAPI void U_EXPORT2
	8280	ucol_getUCAVersion(const UCollator* coll, UVersionInfo info) {
	8281	if(coll && coll->UCA) {
	8282	uprv_memcpy(info, coll->UCA->image->UCAVersion, sizeof(UVersionInfo));
	8283	}
	8284	}
	8285
	8286	#endif /* #if !UCONFIG_NO_COLLATION */