git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	**********************************************************************
	3	* Copyright (C) 2001-2015 IBM and others. All rights reserved.
	4	**********************************************************************
	5	* Date Name Description
	6	* 07/02/2001 synwee Creation.
	7	**********************************************************************
	8	*/
	9
	10	#include "unicode/utypes.h"
	11
	12	#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
	13
	14	#include "unicode/usearch.h"
	15	#include "unicode/ustring.h"
	16	#include "unicode/uchar.h"
	17	#include "unicode/utf16.h"
	18	#include "normalizer2impl.h"
	19	#include "usrchimp.h"
	20	#include "cmemory.h"
	21	#include "ucln_in.h"
	22	#include "uassert.h"
	23	#include "ustr_imp.h"
	24
	25	U_NAMESPACE_USE
	26
	27	// don't use Boyer-Moore
	28	// (and if we decide to turn this on again there are several new TODOs that will need to be addressed)
	29	#define BOYER_MOORE 0
	30
	31	// internal definition ---------------------------------------------------
	32
	33	#define LAST_BYTE_MASK_ 0xFF
	34	#define SECOND_LAST_BYTE_SHIFT_ 8
	35	#define SUPPLEMENTARY_MIN_VALUE_ 0x10000
	36
	37	static const Normalizer2Impl *g_nfcImpl = NULL;
	38
	39	// internal methods -------------------------------------------------
	40
	41	/**
	42	* Fast collation element iterator setOffset.
	43	* This function does not check for bounds.
	44	* @param coleiter collation element iterator
	45	* @param offset to set
	46	*/
	47	static
	48	inline void setColEIterOffset(UCollationElements *elems,
	49	int32_t offset)
	50	{
	51	// Note: Not "fast" any more after the 2013 collation rewrite.
	52	// We do not want to expose more internals than necessary.
	53	UErrorCode status = U_ZERO_ERROR;
	54	ucol_setOffset(elems, offset, &status);
	55	}
	56
	57	/**
	58	* Getting the mask for collation strength
	59	* @param strength collation strength
	60	* @return collation element mask
	61	*/
	62	static
	63	inline uint32_t getMask(UCollationStrength strength)
	64	{
	65	switch (strength)
	66	{
	67	case UCOL_PRIMARY:
	68	return UCOL_PRIMARYORDERMASK;
	69	case UCOL_SECONDARY:
	70	return UCOL_SECONDARYORDERMASK \| UCOL_PRIMARYORDERMASK;
	71	default:
	72	return UCOL_TERTIARYORDERMASK \| UCOL_SECONDARYORDERMASK \|
	73	UCOL_PRIMARYORDERMASK;
	74	}
	75	}
	76
	77	/**
	78	* This is to squeeze the 21bit ces into a 256 table
	79	* @param ce collation element
	80	* @return collapsed version of the collation element
	81	*/
	82	static
	83	inline int hash(uint32_t ce)
	84	{
	85	// the old value UCOL_PRIMARYORDER(ce) % MAX_TABLE_SIZE_ does not work
	86	// well with the new collation where most of the latin 1 characters
	87	// are of the value xx000xxx. their hashes will most of the time be 0
	88	// to be discussed on the hash algo.
	89	return UCOL_PRIMARYORDER(ce) % MAX_TABLE_SIZE_;
	90	}
	91
	92	U_CDECL_BEGIN
	93	static UBool U_CALLCONV
	94	usearch_cleanup(void) {
	95	g_nfcImpl = NULL;
	96	return TRUE;
	97	}
	98	U_CDECL_END
	99
	100	/**
	101	* Initializing the fcd tables.
	102	* Internal method, status assumed to be a success.
	103	* @param status output error if any, caller to check status before calling
	104	* method, status assumed to be success when passed in.
	105	*/
	106	static
	107	inline void initializeFCD(UErrorCode *status)
	108	{
	109	if (g_nfcImpl == NULL) {
	110	g_nfcImpl = Normalizer2Factory::getNFCImpl(*status);
	111	ucln_i18n_registerCleanup(UCLN_I18N_USEARCH, usearch_cleanup);
	112	}
	113	}
	114
	115	/**
	116	* Gets the fcd value for a character at the argument index.
	117	* This method takes into accounts of the supplementary characters.
	118	* @param str UTF16 string where character for fcd retrieval resides
	119	* @param offset position of the character whose fcd is to be retrieved, to be
	120	* overwritten with the next character position, taking
	121	* surrogate characters into consideration.
	122	* @param strlength length of the argument string
	123	* @return fcd value
	124	*/
	125	static
	126	uint16_t getFCD(const UChar str, int32_t offset,
	127	int32_t strlength)
	128	{
	129	const UChar temp = str + offset;
	130	uint16_t result = g_nfcImpl->nextFCD16(temp, str + strlength);
	131	*offset = (int32_t)(temp - str);
	132	return result;
	133	}
	134
	135	/**
	136	* Getting the modified collation elements taking into account the collation
	137	* attributes
	138	* @param strsrch string search data
	139	* @param sourcece
	140	* @return the modified collation element
	141	*/
	142	static
	143	inline int32_t getCE(const UStringSearch *strsrch, uint32_t sourcece)
	144	{
	145	// note for tertiary we can't use the collator->tertiaryMask, that
	146	// is a preprocessed mask that takes into account case options. since
	147	// we are only concerned with exact matches, we don't need that.
	148	sourcece &= strsrch->ceMask;
	149
	150	if (strsrch->toShift) {
	151	// alternate handling here, since only the 16 most significant digits
	152	// is only used, we can safely do a compare without masking
	153	// if the ce is a variable, we mask and get only the primary values
	154	// no shifting to quartenary is required since all primary values
	155	// less than variabletop will need to be masked off anyway.
	156	if (strsrch->variableTop > sourcece) {
	157	if (strsrch->strength >= UCOL_QUATERNARY) {
	158	sourcece &= UCOL_PRIMARYORDERMASK;
	159	}
	160	else {
	161	sourcece = UCOL_IGNORABLE;
	162	}
	163	}
	164	} else if (strsrch->strength >= UCOL_QUATERNARY && sourcece == UCOL_IGNORABLE) {
	165	sourcece = 0xFFFF;
	166	}
	167
	168	return sourcece;
	169	}
	170
	171	/**
	172	* Allocate a memory and returns NULL if it failed.
	173	* Internal method, status assumed to be a success.
	174	* @param size to allocate
	175	* @param status output error if any, caller to check status before calling
	176	* method, status assumed to be success when passed in.
	177	* @return newly allocated array, NULL otherwise
	178	*/
	179	static
	180	inline void * allocateMemory(uint32_t size, UErrorCode *status)
	181	{
	182	uint32_t result = (uint32_t )uprv_malloc(size);
	183	if (result == NULL) {
	184	*status = U_MEMORY_ALLOCATION_ERROR;
	185	}
	186	return result;
	187	}
	188
	189	/**
	190	* Adds a uint32_t value to a destination array.
	191	* Creates a new array if we run out of space. The caller will have to
	192	* manually deallocate the newly allocated array.
	193	* Internal method, status assumed to be success, caller has to check status
	194	* before calling this method. destination not to be NULL and has at least
	195	* size destinationlength.
	196	* @param destination target array
	197	* @param offset destination offset to add value
	198	* @param destinationlength target array size, return value for the new size
	199	* @param value to be added
	200	* @param increments incremental size expected
	201	* @param status output error if any, caller to check status before calling
	202	* method, status assumed to be success when passed in.
	203	* @return new destination array, destination if there was no new allocation
	204	*/
	205	static
	206	inline int32_t * addTouint32_tArray(int32_t *destination,
	207	uint32_t offset,
	208	uint32_t *destinationlength,
	209	uint32_t value,
	210	uint32_t increments,
	211	UErrorCode *status)
	212	{
	213	uint32_t newlength = *destinationlength;
	214	if (offset + 1 == newlength) {
	215	newlength += increments;
	216	int32_t temp = (int32_t )allocateMemory(
	217	sizeof(int32_t) * newlength, status);
	218	if (U_FAILURE(*status)) {
	219	return NULL;
	220	}
	221	uprv_memcpy(temp, destination, sizeof(int32_t) * offset);
	222	*destinationlength = newlength;
	223	destination = temp;
	224	}
	225	destination[offset] = value;
	226	return destination;
	227	}
	228
	229	/**
	230	* Adds a uint64_t value to a destination array.
	231	* Creates a new array if we run out of space. The caller will have to
	232	* manually deallocate the newly allocated array.
	233	* Internal method, status assumed to be success, caller has to check status
	234	* before calling this method. destination not to be NULL and has at least
	235	* size destinationlength.
	236	* @param destination target array
	237	* @param offset destination offset to add value
	238	* @param destinationlength target array size, return value for the new size
	239	* @param value to be added
	240	* @param increments incremental size expected
	241	* @param status output error if any, caller to check status before calling
	242	* method, status assumed to be success when passed in.
	243	* @return new destination array, destination if there was no new allocation
	244	*/
	245	static
	246	inline int64_t * addTouint64_tArray(int64_t *destination,
	247	uint32_t offset,
	248	uint32_t *destinationlength,
	249	uint64_t value,
	250	uint32_t increments,
	251	UErrorCode *status)
	252	{
	253	uint32_t newlength = *destinationlength;
	254	if (offset + 1 == newlength) {
	255	newlength += increments;
	256	int64_t temp = (int64_t )allocateMemory(
	257	sizeof(int64_t) * newlength, status);
	258
	259	if (U_FAILURE(*status)) {
	260	return NULL;
	261	}
	262
	263	uprv_memcpy(temp, destination, sizeof(int64_t) * offset);
	264	*destinationlength = newlength;
	265	destination = temp;
	266	}
	267
	268	destination[offset] = value;
	269
	270	return destination;
	271	}
	272
	273	/**
	274	* Initializing the ce table for a pattern.
	275	* Stores non-ignorable collation keys.
	276	* Table size will be estimated by the size of the pattern text. Table
	277	* expansion will be perform as we go along. Adding 1 to ensure that the table
	278	* size definitely increases.
	279	* Internal method, status assumed to be a success.
	280	* @param strsrch string search data
	281	* @param status output error if any, caller to check status before calling
	282	* method, status assumed to be success when passed in.
	283	* @return total number of expansions
	284	*/
	285	static
	286	inline uint16_t initializePatternCETable(UStringSearch *strsrch,
	287	UErrorCode *status)
	288	{
	289	UPattern *pattern = &(strsrch->pattern);
	290	uint32_t cetablesize = INITIAL_ARRAY_SIZE_;
	291	int32_t *cetable = pattern->cesBuffer;
	292	uint32_t patternlength = pattern->textLength;
	293	UCollationElements *coleiter = strsrch->utilIter;
	294
	295	if (coleiter == NULL) {
	296	coleiter = ucol_openElements(strsrch->collator, pattern->text,
	297	patternlength, status);
	298	// status will be checked in ucol_next(..) later and if it is an
	299	// error UCOL_NULLORDER the result of ucol_next(..) and 0 will be
	300	// returned.
	301	strsrch->utilIter = coleiter;
	302	}
	303	else {
	304	ucol_setText(coleiter, pattern->text, pattern->textLength, status);
	305	}
	306	if(U_FAILURE(*status)) {
	307	return 0;
	308	}
	309
	310	if (pattern->ces != cetable && pattern->ces) {
	311	uprv_free(pattern->ces);
	312	}
	313
	314	uint16_t offset = 0;
	315	uint16_t result = 0;
	316	int32_t ce;
	317
	318	while ((ce = ucol_next(coleiter, status)) != UCOL_NULLORDER &&
	319	U_SUCCESS(*status)) {
	320	uint32_t newce = getCE(strsrch, ce);
	321	if (newce) {
	322	int32_t *temp = addTouint32_tArray(cetable, offset, &cetablesize,
	323	newce,
	324	patternlength - ucol_getOffset(coleiter) + 1,
	325	status);
	326	if (U_FAILURE(*status)) {
	327	return 0;
	328	}
	329	offset ++;
	330	if (cetable != temp && cetable != pattern->cesBuffer) {
	331	uprv_free(cetable);
	332	}
	333	cetable = temp;
	334	}
	335	result += (uint16_t)(ucol_getMaxExpansion(coleiter, ce) - 1);
	336	}
	337
	338	cetable[offset] = 0;
	339	pattern->ces = cetable;
	340	pattern->cesLength = offset;
	341
	342	return result;
	343	}
	344
	345	/**
	346	* Initializing the pce table for a pattern.
	347	* Stores non-ignorable collation keys.
	348	* Table size will be estimated by the size of the pattern text. Table
	349	* expansion will be perform as we go along. Adding 1 to ensure that the table
	350	* size definitely increases.
	351	* Internal method, status assumed to be a success.
	352	* @param strsrch string search data
	353	* @param status output error if any, caller to check status before calling
	354	* method, status assumed to be success when passed in.
	355	* @return total number of expansions
	356	*/
	357	static
	358	inline uint16_t initializePatternPCETable(UStringSearch *strsrch,
	359	UErrorCode *status)
	360	{
	361	UPattern *pattern = &(strsrch->pattern);
	362	uint32_t pcetablesize = INITIAL_ARRAY_SIZE_;
	363	int64_t *pcetable = pattern->pcesBuffer;
	364	uint32_t patternlength = pattern->textLength;
	365	UCollationElements *coleiter = strsrch->utilIter;
	366
	367	if (coleiter == NULL) {
	368	coleiter = ucol_openElements(strsrch->collator, pattern->text,
	369	patternlength, status);
	370	// status will be checked in ucol_next(..) later and if it is an
	371	// error UCOL_NULLORDER the result of ucol_next(..) and 0 will be
	372	// returned.
	373	strsrch->utilIter = coleiter;
	374	} else {
	375	ucol_setText(coleiter, pattern->text, pattern->textLength, status);
	376	}
	377	if(U_FAILURE(*status)) {
	378	return 0;
	379	}
	380
	381	if (pattern->pces != pcetable && pattern->pces != NULL) {
	382	uprv_free(pattern->pces);
	383	}
	384
	385	uint16_t offset = 0;
	386	uint16_t result = 0;
	387	int64_t pce;
	388
	389	icu::UCollationPCE iter(coleiter);
	390
	391	// ** Should processed CEs be signed or unsigned?
	392	// ** (the rest of the code in this file seems to play fast-and-loose with
	393	// ** whether a CE is signed or unsigned. For example, look at routine above this one.)
	394	while ((pce = iter.nextProcessed(NULL, NULL, status)) != UCOL_PROCESSED_NULLORDER &&
	395	U_SUCCESS(*status)) {
	396	int64_t *temp = addTouint64_tArray(pcetable, offset, &pcetablesize,
	397	pce,
	398	patternlength - ucol_getOffset(coleiter) + 1,
	399	status);
	400
	401	if (U_FAILURE(*status)) {
	402	return 0;
	403	}
	404
	405	offset += 1;
	406
	407	if (pcetable != temp && pcetable != pattern->pcesBuffer) {
	408	uprv_free(pcetable);
	409	}
	410
	411	pcetable = temp;
	412	//result += (uint16_t)(ucol_getMaxExpansion(coleiter, ce) - 1);
	413	}
	414
	415	pcetable[offset] = 0;
	416	pattern->pces = pcetable;
	417	pattern->pcesLength = offset;
	418
	419	return result;
	420	}
	421
	422	/**
	423	* Initializes the pattern struct.
	424	* Internal method, status assumed to be success.
	425	* @param strsrch UStringSearch data storage
	426	* @param status output error if any, caller to check status before calling
	427	* method, status assumed to be success when passed in.
	428	* @return expansionsize the total expansion size of the pattern
	429	*/
	430	static
	431	inline int16_t initializePattern(UStringSearch strsrch, UErrorCode status)
	432	{
	433	if (U_FAILURE(*status)) { return 0; }
	434	UPattern *pattern = &(strsrch->pattern);
	435	const UChar *patterntext = pattern->text;
	436	int32_t length = pattern->textLength;
	437	int32_t index = 0;
	438
	439	// Since the strength is primary, accents are ignored in the pattern.
	440	if (strsrch->strength == UCOL_PRIMARY) {
	441	pattern->hasPrefixAccents = 0;
	442	pattern->hasSuffixAccents = 0;
	443	} else {
	444	pattern->hasPrefixAccents = getFCD(patterntext, &index, length) >>
	445	SECOND_LAST_BYTE_SHIFT_;
	446	index = length;
	447	U16_BACK_1(patterntext, 0, index);
	448	pattern->hasSuffixAccents = getFCD(patterntext, &index, length) &
	449	LAST_BYTE_MASK_;
	450	}
	451
	452	// HACK
	453	if (strsrch->pattern.pces != NULL) {
	454	if (strsrch->pattern.pces != strsrch->pattern.pcesBuffer) {
	455	uprv_free(strsrch->pattern.pces);
	456	}
	457
	458	strsrch->pattern.pces = NULL;
	459	}
	460
	461	// since intializePattern is an internal method status is a success.
	462	return initializePatternCETable(strsrch, status);
	463	}
	464
	465	/**
	466	* Initializing shift tables, with the default values.
	467	* If a corresponding default value is 0, the shift table is not set.
	468	* @param shift table for forwards shift
	469	* @param backshift table for backwards shift
	470	* @param cetable table containing pattern ce
	471	* @param cesize size of the pattern ces
	472	* @param expansionsize total size of the expansions
	473	* @param defaultforward the default forward value
	474	* @param defaultbackward the default backward value
	475	*/
	476	static
	477	inline void setShiftTable(int16_t shift[], int16_t backshift[],
	478	int32_t *cetable, int32_t cesize,
	479	int16_t expansionsize,
	480	int16_t defaultforward,
	481	int16_t defaultbackward)
	482	{
	483	// estimate the value to shift. to do that we estimate the smallest
	484	// number of characters to give the relevant ces, ie approximately
	485	// the number of ces minus their expansion, since expansions can come
	486	// from a character.
	487	int32_t count;
	488	for (count = 0; count < MAX_TABLE_SIZE_; count ++) {
	489	shift[count] = defaultforward;
	490	}
	491	cesize --; // down to the last index
	492	for (count = 0; count < cesize; count ++) {
	493	// number of ces from right of array to the count
	494	int temp = defaultforward - count - 1;
	495	shift[hash(cetable[count])] = temp > 1 ? temp : 1;
	496	}
	497	shift[hash(cetable[cesize])] = 1;
	498	// for ignorables we just shift by one. see test examples.
	499	shift[hash(0)] = 1;
	500
	501	for (count = 0; count < MAX_TABLE_SIZE_; count ++) {
	502	backshift[count] = defaultbackward;
	503	}
	504	for (count = cesize; count > 0; count --) {
	505	// the original value count does not seem to work
	506	backshift[hash(cetable[count])] = count > expansionsize ?
	507	(int16_t)(count - expansionsize) : 1;
	508	}
	509	backshift[hash(cetable[0])] = 1;
	510	backshift[hash(0)] = 1;
	511	}
	512
	513	/**
	514	* Building of the pattern collation element list and the boyer moore strsrch
	515	* table.
	516	* The canonical match will only be performed after the default match fails.
	517	* For both cases we need to remember the size of the composed and decomposed
	518	* versions of the string. Since the Boyer-Moore shift calculations shifts by
	519	* a number of characters in the text and tries to match the pattern from that
	520	* offset, the shift value can not be too large in case we miss some
	521	* characters. To choose a right shift size, we estimate the NFC form of the
	522	* and use its size as a shift guide. The NFC form should be the small
	523	* possible representation of the pattern. Anyways, we'll err on the smaller
	524	* shift size. Hence the calculation for minlength.
	525	* Canonical match will be performed slightly differently. We'll split the
	526	* pattern into 3 parts, the prefix accents (PA), the middle string bounded by
	527	* the first and last base character (MS), the ending accents (EA). Matches
	528	* will be done on MS first, and only when we match MS then some processing
	529	* will be required for the prefix and end accents in order to determine if
	530	* they match PA and EA. Hence the default shift values
	531	* for the canonical match will take the size of either end's accent into
	532	* consideration. Forwards search will take the end accents into consideration
	533	* for the default shift values and the backwards search will take the prefix
	534	* accents into consideration.
	535	* If pattern has no non-ignorable ce, we return a illegal argument error.
	536	* Internal method, status assumed to be success.
	537	* @param strsrch UStringSearch data storage
	538	* @param status for output errors if it occurs, status is assumed to be a
	539	* success when it is passed in.
	540	*/
	541	static
	542	inline void initialize(UStringSearch strsrch, UErrorCode status)
	543	{
	544	int16_t expandlength = initializePattern(strsrch, status);
	545	if (U_SUCCESS(*status) && strsrch->pattern.cesLength > 0) {
	546	UPattern *pattern = &strsrch->pattern;
	547	int32_t cesize = pattern->cesLength;
	548
	549	int16_t minlength = cesize > expandlength
	550	? (int16_t)cesize - expandlength : 1;
	551	pattern->defaultShiftSize = minlength;
	552	setShiftTable(pattern->shift, pattern->backShift, pattern->ces,
	553	cesize, expandlength, minlength, minlength);
	554	return;
	555	}
	556	strsrch->pattern.defaultShiftSize = 0;
	557	}
	558
	559	#if BOYER_MOORE
	560	/**
	561	* Check to make sure that the match length is at the end of the character by
	562	* using the breakiterator.
	563	* @param strsrch string search data
	564	* @param start target text start offset
	565	* @param end target text end offset
	566	*/
	567	static
	568	void checkBreakBoundary(const UStringSearch strsrch, int32_t /start/,
	569	int32_t *end)
	570	{
	571	#if !UCONFIG_NO_BREAK_ITERATION
	572	UBreakIterator *breakiterator = strsrch->search->internalBreakIter;
	573	if (breakiterator) {
	574	int32_t matchend = *end;
	575	//int32_t matchstart = *start;
	576
	577	if (!ubrk_isBoundary(breakiterator, matchend)) {
	578	*end = ubrk_following(breakiterator, matchend);
	579	}
	580
	581	/* Check the start of the matched text to make sure it doesn't have any accents
	582	* before it. This code may not be necessary and so it is commented out */
	583	/*if (!ubrk_isBoundary(breakiterator, matchstart) && !ubrk_isBoundary(breakiterator, matchstart-1)) {
	584	*start = ubrk_preceding(breakiterator, matchstart);
	585	}*/
	586	}
	587	#endif
	588	}
	589
	590	/**
	591	* Determine whether the target text in UStringSearch bounded by the offset
	592	* start and end is one or more whole units of text as
	593	* determined by the breakiterator in UStringSearch.
	594	* @param strsrch string search data
	595	* @param start target text start offset
	596	* @param end target text end offset
	597	*/
	598	static
	599	UBool isBreakUnit(const UStringSearch *strsrch, int32_t start,
	600	int32_t end)
	601	{
	602	#if !UCONFIG_NO_BREAK_ITERATION
	603	UBreakIterator *breakiterator = strsrch->search->breakIter;
	604	//TODO: Add here.
	605	if (breakiterator) {
	606	int32_t startindex = ubrk_first(breakiterator);
	607	int32_t endindex = ubrk_last(breakiterator);
	608
	609	// out-of-range indexes are never boundary positions
	610	if (start < startindex \|\| start > endindex \|\|
	611	end < startindex \|\| end > endindex) {
	612	return FALSE;
	613	}
	614	// otherwise, we can use following() on the position before the
	615	// specified one and return true of the position we get back is the
	616	// one the user specified
	617	UBool result = (start == startindex \|\|
	618	ubrk_following(breakiterator, start - 1) == start) &&
	619	(end == endindex \|\|
	620	ubrk_following(breakiterator, end - 1) == end);
	621	if (result) {
	622	// iterates the individual ces
	623	UCollationElements *coleiter = strsrch->utilIter;
	624	const UChar *text = strsrch->search->text +
	625	start;
	626	UErrorCode status = U_ZERO_ERROR;
	627	ucol_setText(coleiter, text, end - start, &status);
	628	for (int32_t count = 0; count < strsrch->pattern.cesLength;
	629	count ++) {
	630	int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
	631	if (ce == UCOL_IGNORABLE) {
	632	count --;
	633	continue;
	634	}
	635	if (U_FAILURE(status) \|\| ce != strsrch->pattern.ces[count]) {
	636	return FALSE;
	637	}
	638	}
	639	int32_t nextce = ucol_next(coleiter, &status);
	640	while (ucol_getOffset(coleiter) == (end - start)
	641	&& getCE(strsrch, nextce) == UCOL_IGNORABLE) {
	642	nextce = ucol_next(coleiter, &status);
	643	}
	644	if (ucol_getOffset(coleiter) == (end - start)
	645	&& nextce != UCOL_NULLORDER) {
	646	// extra collation elements at the end of the match
	647	return FALSE;
	648	}
	649	}
	650	return result;
	651	}
	652	#endif
	653	return TRUE;
	654	}
	655
	656	/**
	657	* Getting the next base character offset if current offset is an accent,
	658	* or the current offset if the current character contains a base character.
	659	* accents the following base character will be returned
	660	* @param text string
	661	* @param textoffset current offset
	662	* @param textlength length of text string
	663	* @return the next base character or the current offset
	664	* if the current character is contains a base character.
	665	*/
	666	static
	667	inline int32_t getNextBaseOffset(const UChar *text,
	668	int32_t textoffset,
	669	int32_t textlength)
	670	{
	671	if (textoffset < textlength) {
	672	int32_t temp = textoffset;
	673	if (getFCD(text, &temp, textlength) >> SECOND_LAST_BYTE_SHIFT_) {
	674	while (temp < textlength) {
	675	int32_t result = temp;
	676	if ((getFCD(text, &temp, textlength) >>
	677	SECOND_LAST_BYTE_SHIFT_) == 0) {
	678	return result;
	679	}
	680	}
	681	return textlength;
	682	}
	683	}
	684	return textoffset;
	685	}
	686
	687	/**
	688	* Gets the next base character offset depending on the string search pattern
	689	* data
	690	* @param strsrch string search data
	691	* @param textoffset current offset, one offset away from the last character
	692	* to search for.
	693	* @return start index of the next base character or the current offset
	694	* if the current character is contains a base character.
	695	*/
	696	static
	697	inline int32_t getNextUStringSearchBaseOffset(UStringSearch *strsrch,
	698	int32_t textoffset)
	699	{
	700	int32_t textlength = strsrch->search->textLength;
	701	if (strsrch->pattern.hasSuffixAccents &&
	702	textoffset < textlength) {
	703	int32_t temp = textoffset;
	704	const UChar *text = strsrch->search->text;
	705	U16_BACK_1(text, 0, temp);
	706	if (getFCD(text, &temp, textlength) & LAST_BYTE_MASK_) {
	707	return getNextBaseOffset(text, textoffset, textlength);
	708	}
	709	}
	710	return textoffset;
	711	}
	712
	713	/**
	714	* Shifting the collation element iterator position forward to prepare for
	715	* a following match. If the last character is a unsafe character, we'll only
	716	* shift by 1 to capture contractions, normalization etc.
	717	* Internal method, status assumed to be success.
	718	* @param text strsrch string search data
	719	* @param textoffset start text position to do search
	720	* @param ce the text ce which failed the match.
	721	* @param patternceindex index of the ce within the pattern ce buffer which
	722	* failed the match
	723	* @return final offset
	724	*/
	725	static
	726	inline int32_t shiftForward(UStringSearch *strsrch,
	727	int32_t textoffset,
	728	int32_t ce,
	729	int32_t patternceindex)
	730	{
	731	UPattern *pattern = &(strsrch->pattern);
	732	if (ce != UCOL_NULLORDER) {
	733	int32_t shift = pattern->shift[hash(ce)];
	734	// this is to adjust for characters in the middle of the
	735	// substring for matching that failed.
	736	int32_t adjust = pattern->cesLength - patternceindex;
	737	if (adjust > 1 && shift >= adjust) {
	738	shift -= adjust - 1;
	739	}
	740	textoffset += shift;
	741	}
	742	else {
	743	textoffset += pattern->defaultShiftSize;
	744	}
	745
	746	textoffset = getNextUStringSearchBaseOffset(strsrch, textoffset);
	747	// check for unsafe characters
	748	// * if it is the start or middle of a contraction: to be done after
	749	// a initial match is found
	750	// * thai or lao base consonant character: similar to contraction
	751	// * high surrogate character: similar to contraction
	752	// * next character is a accent: shift to the next base character
	753	return textoffset;
	754	}
	755	#endif // #if BOYER_MOORE
	756
	757	/**
	758	* sets match not found
	759	* @param strsrch string search data
	760	*/
	761	static
	762	inline void setMatchNotFound(UStringSearch *strsrch)
	763	{
	764	// this method resets the match result regardless of the error status.
	765	strsrch->search->matchedIndex = USEARCH_DONE;
	766	strsrch->search->matchedLength = 0;
	767	if (strsrch->search->isForwardSearching) {
	768	setColEIterOffset(strsrch->textIter, strsrch->search->textLength);
	769	}
	770	else {
	771	setColEIterOffset(strsrch->textIter, 0);
	772	}
	773	}
	774
	775	#if BOYER_MOORE
	776	/**
	777	* Gets the offset to the next safe point in text.
	778	* ie. not the middle of a contraction, swappable characters or supplementary
	779	* characters.
	780	* @param collator collation sata
	781	* @param text string to work with
	782	* @param textoffset offset in string
	783	* @param textlength length of text string
	784	* @return offset to the next safe character
	785	*/
	786	static
	787	inline int32_t getNextSafeOffset(const UCollator *collator,
	788	const UChar *text,
	789	int32_t textoffset,
	790	int32_t textlength)
	791	{
	792	int32_t result = textoffset; // first contraction character
	793	while (result != textlength && ucol_unsafeCP(text[result], collator)) {
	794	result ++;
	795	}
	796	return result;
	797	}
	798
	799	/**
	800	* This checks for accents in the potential match started with a .
	801	* composite character.
	802	* This is really painful... we have to check that composite character do not
	803	* have any extra accents. We have to normalize the potential match and find
	804	* the immediate decomposed character before the match.
	805	* The first composite character would have been taken care of by the fcd
	806	* checks in checkForwardExactMatch.
	807	* This is the slow path after the fcd of the first character and
	808	* the last character has been checked by checkForwardExactMatch and we
	809	* determine that the potential match has extra non-ignorable preceding
	810	* ces.
	811	* E.g. looking for \u0301 acute in \u01FA A ring above and acute,
	812	* checkExtraMatchAccent should fail since there is a middle ring in \u01FA
	813	* Note here that accents checking are slow and cautioned in the API docs.
	814	* Internal method, status assumed to be a success, caller should check status
	815	* before calling this method
	816	* @param strsrch string search data
	817	* @param start index of the potential unfriendly composite character
	818	* @param end index of the potential unfriendly composite character
	819	* @param status output error status if any.
	820	* @return TRUE if there is non-ignorable accents before at the beginning
	821	* of the match, FALSE otherwise.
	822	*/
	823
	824	static
	825	UBool checkExtraMatchAccents(const UStringSearch *strsrch, int32_t start,
	826	int32_t end,
	827	UErrorCode *status)
	828	{
	829	UBool result = FALSE;
	830	if (strsrch->pattern.hasPrefixAccents) {
	831	int32_t length = end - start;
	832	int32_t offset = 0;
	833	const UChar *text = strsrch->search->text + start;
	834
	835	U16_FWD_1(text, offset, length);
	836	// we are only concerned with the first composite character
	837	if (unorm_quickCheck(text, offset, UNORM_NFD, status) == UNORM_NO) {
	838	int32_t safeoffset = getNextSafeOffset(strsrch->collator,
	839	text, 0, length);
	840	if (safeoffset != length) {
	841	safeoffset ++;
	842	}
	843	UChar *norm = NULL;
	844	UChar buffer[INITIAL_ARRAY_SIZE_];
	845	int32_t size = unorm_normalize(text, safeoffset, UNORM_NFD, 0,
	846	buffer, INITIAL_ARRAY_SIZE_,
	847	status);
	848	if (U_FAILURE(*status)) {
	849	return FALSE;
	850	}
	851	if (size >= INITIAL_ARRAY_SIZE_) {
	852	norm = (UChar )allocateMemory((size + 1) sizeof(UChar),
	853	status);
	854	// if allocation failed, status will be set to
	855	// U_MEMORY_ALLOCATION_ERROR and unorm_normalize internally
	856	// checks for it.
	857	size = unorm_normalize(text, safeoffset, UNORM_NFD, 0, norm,
	858	size, status);
	859	if (U_FAILURE(*status) && norm != NULL) {
	860	uprv_free(norm);
	861	return FALSE;
	862	}
	863	}
	864	else {
	865	norm = buffer;
	866	}
	867
	868	UCollationElements *coleiter = strsrch->utilIter;
	869	ucol_setText(coleiter, norm, size, status);
	870	uint32_t firstce = strsrch->pattern.ces[0];
	871	UBool ignorable = TRUE;
	872	uint32_t ce = UCOL_IGNORABLE;
	873	while (U_SUCCESS(*status) && ce != firstce && ce != (uint32_t)UCOL_NULLORDER) {
	874	offset = ucol_getOffset(coleiter);
	875	if (ce != firstce && ce != UCOL_IGNORABLE) {
	876	ignorable = FALSE;
	877	}
	878	ce = ucol_next(coleiter, status);
	879	}
	880	UChar32 codepoint;
	881	U16_PREV(norm, 0, offset, codepoint);
	882	result = !ignorable && (u_getCombiningClass(codepoint) != 0);
	883
	884	if (norm != buffer) {
	885	uprv_free(norm);
	886	}
	887	}
	888	}
	889
	890	return result;
	891	}
	892
	893	/**
	894	* Used by exact matches, checks if there are accents before the match.
	895	* This is really painful... we have to check that composite characters at
	896	* the start of the matches have to not have any extra accents.
	897	* We check the FCD of the character first, if it starts with an accent and
	898	* the first pattern ce does not match the first ce of the character, we bail.
	899	* Otherwise we try normalizing the first composite
	900	* character and find the immediate decomposed character before the match to
	901	* see if it is an non-ignorable accent.
	902	* Now normalizing the first composite character is enough because we ensure
	903	* that when the match is passed in here with extra beginning ces, the
	904	* first or last ce that match has to occur within the first character.
	905	* E.g. looking for \u0301 acute in \u01FA A ring above and acute,
	906	* checkExtraMatchAccent should fail since there is a middle ring in \u01FA
	907	* Note here that accents checking are slow and cautioned in the API docs.
	908	* @param strsrch string search data
	909	* @param start offset
	910	* @param end offset
	911	* @return TRUE if there are accents on either side of the match,
	912	* FALSE otherwise
	913	*/
	914	static
	915	UBool hasAccentsBeforeMatch(const UStringSearch *strsrch, int32_t start,
	916	int32_t end)
	917	{
	918	if (strsrch->pattern.hasPrefixAccents) {
	919	UCollationElements *coleiter = strsrch->textIter;
	920	UErrorCode status = U_ZERO_ERROR;
	921	// we have been iterating forwards previously
	922	uint32_t ignorable = TRUE;
	923	int32_t firstce = strsrch->pattern.ces[0];
	924
	925	setColEIterOffset(coleiter, start);
	926	int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
	927	if (U_FAILURE(status)) {
	928	return TRUE;
	929	}
	930	while (ce != firstce) {
	931	if (ce != UCOL_IGNORABLE) {
	932	ignorable = FALSE;
	933	}
	934	ce = getCE(strsrch, ucol_next(coleiter, &status));
	935	if (U_FAILURE(status) \|\| ce == UCOL_NULLORDER) {
	936	return TRUE;
	937	}
	938	}
	939	if (!ignorable && inNormBuf(coleiter)) {
	940	// within normalization buffer, discontiguous handled here
	941	return TRUE;
	942	}
	943
	944	// within text
	945	int32_t temp = start;
	946	// original code
	947	// accent = (getFCD(strsrch->search->text, &temp,
	948	// strsrch->search->textLength)
	949	// >> SECOND_LAST_BYTE_SHIFT_);
	950	// however this code does not work well with VC7 .net in release mode.
	951	// maybe the inlines for getFCD combined with shifting has bugs in
	952	// VC7. anyways this is a work around.
	953	UBool accent = getFCD(strsrch->search->text, &temp,
	954	strsrch->search->textLength) > 0xFF;
	955	if (!accent) {
	956	return checkExtraMatchAccents(strsrch, start, end, &status);
	957	}
	958	if (!ignorable) {
	959	return TRUE;
	960	}
	961	if (start > 0) {
	962	temp = start;
	963	U16_BACK_1(strsrch->search->text, 0, temp);
	964	if (getFCD(strsrch->search->text, &temp,
	965	strsrch->search->textLength) & LAST_BYTE_MASK_) {
	966	setColEIterOffset(coleiter, start);
	967	ce = ucol_previous(coleiter, &status);
	968	if (U_FAILURE(status) \|\|
	969	(ce != UCOL_NULLORDER && ce != UCOL_IGNORABLE)) {
	970	return TRUE;
	971	}
	972	}
	973	}
	974	}
	975
	976	return FALSE;
	977	}
	978
	979	/**
	980	* Used by exact matches, checks if there are accents bounding the match.
	981	* Note this is the initial boundary check. If the potential match
	982	* starts or ends with composite characters, the accents in those
	983	* characters will be determined later.
	984	* Not doing backwards iteration here, since discontiguos contraction for
	985	* backwards collation element iterator, use up too many characters.
	986	* E.g. looking for \u030A ring in \u01FA A ring above and acute,
	987	* should fail since there is a acute at the end of \u01FA
	988	* Note here that accents checking are slow and cautioned in the API docs.
	989	* @param strsrch string search data
	990	* @param start offset of match
	991	* @param end end offset of the match
	992	* @return TRUE if there are accents on either side of the match,
	993	* FALSE otherwise
	994	*/
	995	static
	996	UBool hasAccentsAfterMatch(const UStringSearch *strsrch, int32_t start,
	997	int32_t end)
	998	{
	999	if (strsrch->pattern.hasSuffixAccents) {
	1000	const UChar *text = strsrch->search->text;
	1001	int32_t temp = end;
	1002	int32_t textlength = strsrch->search->textLength;
	1003	U16_BACK_1(text, 0, temp);
	1004	if (getFCD(text, &temp, textlength) & LAST_BYTE_MASK_) {
	1005	int32_t firstce = strsrch->pattern.ces[0];
	1006	UCollationElements *coleiter = strsrch->textIter;
	1007	UErrorCode status = U_ZERO_ERROR;
	1008	int32_t ce;
	1009	setColEIterOffset(coleiter, start);
	1010	while ((ce = getCE(strsrch, ucol_next(coleiter, &status))) != firstce) {
	1011	if (U_FAILURE(status) \|\| ce == UCOL_NULLORDER) {
	1012	return TRUE;
	1013	}
	1014	}
	1015	int32_t count = 1;
	1016	while (count < strsrch->pattern.cesLength) {
	1017	if (getCE(strsrch, ucol_next(coleiter, &status))
	1018	== UCOL_IGNORABLE) {
	1019	// Thai can give an ignorable here.
	1020	count --;
	1021	}
	1022	if (U_FAILURE(status)) {
	1023	return TRUE;
	1024	}
	1025	count ++;
	1026	}
	1027
	1028	ce = ucol_next(coleiter, &status);
	1029	if (U_FAILURE(status)) {
	1030	return TRUE;
	1031	}
	1032	if (ce != UCOL_NULLORDER && ce != UCOL_IGNORABLE) {
	1033	ce = getCE(strsrch, ce);
	1034	}
	1035	if (ce != UCOL_NULLORDER && ce != UCOL_IGNORABLE) {
	1036	if (ucol_getOffset(coleiter) <= end) {
	1037	return TRUE;
	1038	}
	1039	if (getFCD(text, &end, textlength) >> SECOND_LAST_BYTE_SHIFT_) {
	1040	return TRUE;
	1041	}
	1042	}
	1043	}
	1044	}
	1045	return FALSE;
	1046	}
	1047	#endif // #if BOYER_MOORE
	1048
	1049	/**
	1050	* Checks if the offset runs out of the text string
	1051	* @param offset
	1052	* @param textlength of the text string
	1053	* @return TRUE if offset is out of bounds, FALSE otherwise
	1054	*/
	1055	static
	1056	inline UBool isOutOfBounds(int32_t textlength, int32_t offset)
	1057	{
	1058	return offset < 0 \|\| offset > textlength;
	1059	}
	1060
	1061	/**
	1062	* Checks for identical match
	1063	* @param strsrch string search data
	1064	* @param start offset of possible match
	1065	* @param end offset of possible match
	1066	* @return TRUE if identical match is found
	1067	*/
	1068	static
	1069	inline UBool checkIdentical(const UStringSearch *strsrch, int32_t start,
	1070	int32_t end)
	1071	{
	1072	if (strsrch->strength != UCOL_IDENTICAL) {
	1073	return TRUE;
	1074	}
	1075
	1076	// Note: We could use Normalizer::compare() or similar, but for short strings
	1077	// which may not be in FCD it might be faster to just NFD them.
	1078	UErrorCode status = U_ZERO_ERROR;
	1079	UnicodeString t2, p2;
	1080	strsrch->nfd->normalize(
	1081	UnicodeString(FALSE, strsrch->search->text + start, end - start), t2, status);
	1082	strsrch->nfd->normalize(
	1083	UnicodeString(FALSE, strsrch->pattern.text, strsrch->pattern.textLength), p2, status);
	1084	// return FALSE if NFD failed
	1085	return U_SUCCESS(status) && t2 == p2;
	1086	}
	1087
	1088	#if BOYER_MOORE
	1089	/**
	1090	* Checks to see if the match is repeated
	1091	* @param strsrch string search data
	1092	* @param start new match start index
	1093	* @param end new match end index
	1094	* @return TRUE if the the match is repeated, FALSE otherwise
	1095	*/
	1096	static
	1097	inline UBool checkRepeatedMatch(UStringSearch *strsrch,
	1098	int32_t start,
	1099	int32_t end)
	1100	{
	1101	int32_t lastmatchindex = strsrch->search->matchedIndex;
	1102	UBool result;
	1103	if (lastmatchindex == USEARCH_DONE) {
	1104	return FALSE;
	1105	}
	1106	if (strsrch->search->isForwardSearching) {
	1107	result = start <= lastmatchindex;
	1108	}
	1109	else {
	1110	result = start >= lastmatchindex;
	1111	}
	1112	if (!result && !strsrch->search->isOverlap) {
	1113	if (strsrch->search->isForwardSearching) {
	1114	result = start < lastmatchindex + strsrch->search->matchedLength;
	1115	}
	1116	else {
	1117	result = end > lastmatchindex;
	1118	}
	1119	}
	1120	return result;
	1121	}
	1122
	1123	/**
	1124	* Gets the collation element iterator's current offset.
	1125	* @param coleiter collation element iterator
	1126	* @param forwards flag TRUE if we are moving in th forwards direction
	1127	* @return current offset
	1128	*/
	1129	static
	1130	inline int32_t getColElemIterOffset(const UCollationElements *coleiter,
	1131	UBool forwards)
	1132	{
	1133	int32_t result = ucol_getOffset(coleiter);
	1134	// intricacies of the the backwards collation element iterator
	1135	if (FALSE && !forwards && inNormBuf(coleiter) && !isFCDPointerNull(coleiter)) {
	1136	result ++;
	1137	}
	1138	return result;
	1139	}
	1140
	1141	/**
	1142	* Checks match for contraction.
	1143	* If the match ends with a partial contraction we fail.
	1144	* If the match starts too far off (because of backwards iteration) we try to
	1145	* chip off the extra characters depending on whether a breakiterator has
	1146	* been used.
	1147	* Internal method, error assumed to be success, caller has to check status
	1148	* before calling this method.
	1149	* @param strsrch string search data
	1150	* @param start offset of potential match, to be modified if necessary
	1151	* @param end offset of potential match, to be modified if necessary
	1152	* @param status output error status if any
	1153	* @return TRUE if match passes the contraction test, FALSE otherwise
	1154	*/
	1155
	1156	static
	1157	UBool checkNextExactContractionMatch(UStringSearch *strsrch,
	1158	int32_t *start,
	1159	int32_t end, UErrorCode status)
	1160	{
	1161	UCollationElements *coleiter = strsrch->textIter;
	1162	int32_t textlength = strsrch->search->textLength;
	1163	int32_t temp = *start;
	1164	const UCollator *collator = strsrch->collator;
	1165	const UChar *text = strsrch->search->text;
	1166	// This part checks if either ends of the match contains potential
	1167	// contraction. If so we'll have to iterate through them
	1168	// The start contraction needs to be checked since ucol_previous dumps
	1169	// all characters till the first safe character into the buffer.
	1170	// start + 1 is used to test for the unsafe characters instead of start
	1171	// because ucol_prev takes all unsafe characters till the first safe
	1172	// character ie start. so by testing start + 1, we can estimate if
	1173	// excess prefix characters has been included in the potential search
	1174	// results.
	1175	if ((end < textlength && ucol_unsafeCP(text[end], collator)) \|\|
	1176	(*start + 1 < textlength
	1177	&& ucol_unsafeCP(text[*start + 1], collator))) {
	1178	int32_t expansion = getExpansionPrefix(coleiter);
	1179	UBool expandflag = expansion > 0;
	1180	setColEIterOffset(coleiter, *start);
	1181	while (expansion > 0) {
	1182	// getting rid of the redundant ce, caused by setOffset.
	1183	// since backward contraction/expansion may have extra ces if we
	1184	// are in the normalization buffer, hasAccentsBeforeMatch would
	1185	// have taken care of it.
	1186	// E.g. the character \u01FA will have an expansion of 3, but if
	1187	// we are only looking for acute and ring \u030A and \u0301, we'll
	1188	// have to skip the first ce in the expansion buffer.
	1189	ucol_next(coleiter, status);
	1190	if (U_FAILURE(*status)) {
	1191	return FALSE;
	1192	}
	1193	if (ucol_getOffset(coleiter) != temp) {
	1194	*start = temp;
	1195	temp = ucol_getOffset(coleiter);
	1196	}
	1197	expansion --;
	1198	}
	1199
	1200	int32_t *patternce = strsrch->pattern.ces;
	1201	int32_t patterncelength = strsrch->pattern.cesLength;
	1202	int32_t count = 0;
	1203	while (count < patterncelength) {
	1204	int32_t ce = getCE(strsrch, ucol_next(coleiter, status));
	1205	if (ce == UCOL_IGNORABLE) {
	1206	continue;
	1207	}
	1208	if (expandflag && count == 0 && ucol_getOffset(coleiter) != temp) {
	1209	*start = temp;
	1210	temp = ucol_getOffset(coleiter);
	1211	}
	1212	if (U_FAILURE(*status) \|\| ce != patternce[count]) {
	1213	(*end) ++;
	1214	end = getNextUStringSearchBaseOffset(strsrch, end);
	1215	return FALSE;
	1216	}
	1217	count ++;
	1218	}
	1219	}
	1220	return TRUE;
	1221	}
	1222
	1223	/**
	1224	* Checks and sets the match information if found.
	1225	* Checks
	1226	* <ul>
	1227	* <li> the potential match does not repeat the previous match
	1228	* <li> boundaries are correct
	1229	* <li> exact matches has no extra accents
	1230	* <li> identical matchesb
	1231	* <li> potential match does not end in the middle of a contraction
	1232	* <\ul>
	1233	* Otherwise the offset will be shifted to the next character.
	1234	* Internal method, status assumed to be success, caller has to check status
	1235	* before calling this method.
	1236	* @param strsrch string search data
	1237	* @param textoffset offset in the collation element text. the returned value
	1238	* will be the truncated end offset of the match or the new start
	1239	* search offset.
	1240	* @param status output error status if any
	1241	* @return TRUE if the match is valid, FALSE otherwise
	1242	*/
	1243	static
	1244	inline UBool checkNextExactMatch(UStringSearch *strsrch,
	1245	int32_t textoffset, UErrorCode status)
	1246	{
	1247	UCollationElements *coleiter = strsrch->textIter;
	1248	int32_t start = getColElemIterOffset(coleiter, FALSE);
	1249
	1250	if (!checkNextExactContractionMatch(strsrch, &start, textoffset, status)) {
	1251	return FALSE;
	1252	}
	1253
	1254	// this totally matches, however we need to check if it is repeating
	1255	if (!isBreakUnit(strsrch, start, *textoffset) \|\|
	1256	checkRepeatedMatch(strsrch, start, *textoffset) \|\|
	1257	hasAccentsBeforeMatch(strsrch, start, *textoffset) \|\|
	1258	!checkIdentical(strsrch, start, *textoffset) \|\|
	1259	hasAccentsAfterMatch(strsrch, start, *textoffset)) {
	1260
	1261	(*textoffset) ++;
	1262	textoffset = getNextUStringSearchBaseOffset(strsrch, textoffset);
	1263	return FALSE;
	1264	}
	1265
	1266	//Add breakiterator boundary check for primary strength search.
	1267	if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) {
	1268	checkBreakBoundary(strsrch, &start, textoffset);
	1269	}
	1270
	1271	// totally match, we will get rid of the ending ignorables.
	1272	strsrch->search->matchedIndex = start;
	1273	strsrch->search->matchedLength = *textoffset - start;
	1274	return TRUE;
	1275	}
	1276
	1277	/**
	1278	* Getting the previous base character offset, or the current offset if the
	1279	* current character is a base character
	1280	* @param text string
	1281	* @param textoffset one offset after the current character
	1282	* @return the offset of the next character after the base character or the first
	1283	* composed character with accents
	1284	*/
	1285	static
	1286	inline int32_t getPreviousBaseOffset(const UChar *text,
	1287	int32_t textoffset)
	1288	{
	1289	if (textoffset > 0) {
	1290	for (;;) {
	1291	int32_t result = textoffset;
	1292	U16_BACK_1(text, 0, textoffset);
	1293	int32_t temp = textoffset;
	1294	uint16_t fcd = getFCD(text, &temp, result);
	1295	if ((fcd >> SECOND_LAST_BYTE_SHIFT_) == 0) {
	1296	if (fcd & LAST_BYTE_MASK_) {
	1297	return textoffset;
	1298	}
	1299	return result;
	1300	}
	1301	if (textoffset == 0) {
	1302	return 0;
	1303	}
	1304	}
	1305	}
	1306	return textoffset;
	1307	}
	1308
	1309	/**
	1310	* Getting the indexes of the accents that are not blocked in the argument
	1311	* accent array
	1312	* @param accents array of accents in nfd terminated by a 0.
	1313	* @param accentsindex array of indexes of the accents that are not blocked
	1314	*/
	1315	static
	1316	inline int getUnblockedAccentIndex(UChar accents, int32_t accentsindex)
	1317	{
	1318	int32_t index = 0;
	1319	int32_t length = u_strlen(accents);
	1320	UChar32 codepoint = 0;
	1321	int cclass = 0;
	1322	int result = 0;
	1323	int32_t temp;
	1324	while (index < length) {
	1325	temp = index;
	1326	U16_NEXT(accents, index, length, codepoint);
	1327	if (u_getCombiningClass(codepoint) != cclass) {
	1328	cclass = u_getCombiningClass(codepoint);
	1329	accentsindex[result] = temp;
	1330	result ++;
	1331	}
	1332	}
	1333	accentsindex[result] = length;
	1334	return result;
	1335	}
	1336
	1337	/**
	1338	* Appends 3 UChar arrays to a destination array.
	1339	* Creates a new array if we run out of space. The caller will have to
	1340	* manually deallocate the newly allocated array.
	1341	* Internal method, status assumed to be success, caller has to check status
	1342	* before calling this method. destination not to be NULL and has at least
	1343	* size destinationlength.
	1344	* @param destination target array
	1345	* @param destinationlength target array size, returning the appended length
	1346	* @param source1 null-terminated first array
	1347	* @param source2 second array
	1348	* @param source2length length of seond array
	1349	* @param source3 null-terminated third array
	1350	* @param status error status if any
	1351	* @return new destination array, destination if there was no new allocation
	1352	*/
	1353	static
	1354	inline UChar * addToUCharArray( UChar *destination,
	1355	int32_t *destinationlength,
	1356	const UChar *source1,
	1357	const UChar *source2,
	1358	int32_t source2length,
	1359	const UChar *source3,
	1360	UErrorCode *status)
	1361	{
	1362	int32_t source1length = source1 ? u_strlen(source1) : 0;
	1363	int32_t source3length = source3 ? u_strlen(source3) : 0;
	1364	if (*destinationlength < source1length + source2length + source3length +
	1365	1)
	1366	{
	1367	destination = (UChar *)allocateMemory(
	1368	(source1length + source2length + source3length + 1) * sizeof(UChar),
	1369	status);
	1370	// if error allocating memory, status will be
	1371	// U_MEMORY_ALLOCATION_ERROR
	1372	if (U_FAILURE(*status)) {
	1373	*destinationlength = 0;
	1374	return NULL;
	1375	}
	1376	}
	1377	if (source1length != 0) {
	1378	uprv_memcpy(destination, source1, sizeof(UChar) * source1length);
	1379	}
	1380	if (source2length != 0) {
	1381	uprv_memcpy(destination + source1length, source2,
	1382	sizeof(UChar) * source2length);
	1383	}
	1384	if (source3length != 0) {
	1385	uprv_memcpy(destination + source1length + source2length, source3,
	1386	sizeof(UChar) * source3length);
	1387	}
	1388	*destinationlength = source1length + source2length + source3length;
	1389	return destination;
	1390	}
	1391
	1392	/**
	1393	* Running through a collation element iterator to see if the contents matches
	1394	* pattern in string search data
	1395	* @param strsrch string search data
	1396	* @param coleiter collation element iterator
	1397	* @return TRUE if a match if found, FALSE otherwise
	1398	*/
	1399	static
	1400	inline UBool checkCollationMatch(const UStringSearch *strsrch,
	1401	UCollationElements *coleiter)
	1402	{
	1403	int patternceindex = strsrch->pattern.cesLength;
	1404	int32_t *patternce = strsrch->pattern.ces;
	1405	UErrorCode status = U_ZERO_ERROR;
	1406	while (patternceindex > 0) {
	1407	int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
	1408	if (ce == UCOL_IGNORABLE) {
	1409	continue;
	1410	}
	1411	if (U_FAILURE(status) \|\| ce != *patternce) {
	1412	return FALSE;
	1413	}
	1414	patternce ++;
	1415	patternceindex --;
	1416	}
	1417	return TRUE;
	1418	}
	1419
	1420	/**
	1421	* Rearranges the front accents to try matching.
	1422	* Prefix accents in the text will be grouped according to their combining
	1423	* class and the groups will be mixed and matched to try find the perfect
	1424	* match with the pattern.
	1425	* So for instance looking for "\u0301" in "\u030A\u0301\u0325"
	1426	* step 1: split "\u030A\u0301" into 6 other type of potential accent substrings
	1427	* "\u030A", "\u0301", "\u0325", "\u030A\u0301", "\u030A\u0325",
	1428	* "\u0301\u0325".
	1429	* step 2: check if any of the generated substrings matches the pattern.
	1430	* Internal method, status is assumed to be success, caller has to check status
	1431	* before calling this method.
	1432	* @param strsrch string search match
	1433	* @param start first offset of the accents to start searching
	1434	* @param end start of the last accent set
	1435	* @param status output error status if any
	1436	* @return USEARCH_DONE if a match is not found, otherwise return the starting
	1437	* offset of the match. Note this start includes all preceding accents.
	1438	*/
	1439	static
	1440	int32_t doNextCanonicalPrefixMatch(UStringSearch *strsrch,
	1441	int32_t start,
	1442	int32_t end,
	1443	UErrorCode *status)
	1444	{
	1445	const UChar *text = strsrch->search->text;
	1446	int32_t textlength = strsrch->search->textLength;
	1447	int32_t tempstart = start;
	1448
	1449	if ((getFCD(text, &tempstart, textlength) & LAST_BYTE_MASK_) == 0) {
	1450	// die... failed at a base character
	1451	return USEARCH_DONE;
	1452	}
	1453
	1454	int32_t offset = getNextBaseOffset(text, tempstart, textlength);
	1455	start = getPreviousBaseOffset(text, tempstart);
	1456
	1457	UChar accents[INITIAL_ARRAY_SIZE_];
	1458	// normalizing the offensive string
	1459	unorm_normalize(text + start, offset - start, UNORM_NFD, 0, accents,
	1460	INITIAL_ARRAY_SIZE_, status);
	1461	if (U_FAILURE(*status)) {
	1462	return USEARCH_DONE;
	1463	}
	1464
	1465	int32_t accentsindex[INITIAL_ARRAY_SIZE_];
	1466	int32_t accentsize = getUnblockedAccentIndex(accents,
	1467	accentsindex);
	1468	int32_t count = (2 << (accentsize - 1)) - 1;
	1469	UChar buffer[INITIAL_ARRAY_SIZE_];
	1470	UCollationElements *coleiter = strsrch->utilIter;
	1471	while (U_SUCCESS(*status) && count > 0) {
	1472	UChar *rearrange = strsrch->canonicalPrefixAccents;
	1473	// copy the base characters
	1474	for (int k = 0; k < accentsindex[0]; k ++) {
	1475	*rearrange ++ = accents[k];
	1476	}
	1477	// forming all possible canonical rearrangement by dropping
	1478	// sets of accents
	1479	for (int i = 0; i <= accentsize - 1; i ++) {
	1480	int32_t mask = 1 << (accentsize - i - 1);
	1481	if (count & mask) {
	1482	for (int j = accentsindex[i]; j < accentsindex[i + 1]; j ++) {
	1483	*rearrange ++ = accents[j];
	1484	}
	1485	}
	1486	}
	1487	*rearrange = 0;
	1488	int32_t matchsize = INITIAL_ARRAY_SIZE_;
	1489	UChar *match = addToUCharArray(buffer, &matchsize,
	1490	strsrch->canonicalPrefixAccents,
	1491	strsrch->search->text + offset,
	1492	end - offset,
	1493	strsrch->canonicalSuffixAccents,
	1494	status);
	1495
	1496	// if status is a failure, ucol_setText does nothing.
	1497	// run the collator iterator through this match
	1498	ucol_setText(coleiter, match, matchsize, status);
	1499	if (U_SUCCESS(*status)) {
	1500	if (checkCollationMatch(strsrch, coleiter)) {
	1501	if (match != buffer) {
	1502	uprv_free(match);
	1503	}
	1504	return start;
	1505	}
	1506	}
	1507	count --;
	1508	}
	1509	return USEARCH_DONE;
	1510	}
	1511
	1512	/**
	1513	* Gets the offset to the safe point in text before textoffset.
	1514	* ie. not the middle of a contraction, swappable characters or supplementary
	1515	* characters.
	1516	* @param collator collation sata
	1517	* @param text string to work with
	1518	* @param textoffset offset in string
	1519	* @param textlength length of text string
	1520	* @return offset to the previous safe character
	1521	*/
	1522	static
	1523	inline uint32_t getPreviousSafeOffset(const UCollator *collator,
	1524	const UChar *text,
	1525	int32_t textoffset)
	1526	{
	1527	int32_t result = textoffset; // first contraction character
	1528	while (result != 0 && ucol_unsafeCP(text[result - 1], collator)) {
	1529	result --;
	1530	}
	1531	if (result != 0) {
	1532	// the first contraction character is consider unsafe here
	1533	result --;
	1534	}
	1535	return result;
	1536	}
	1537
	1538	/**
	1539	* Cleaning up after we passed the safe zone
	1540	* @param strsrch string search data
	1541	* @param safetext safe text array
	1542	* @param safebuffer safe text buffer
	1543	* @param coleiter collation element iterator for safe text
	1544	*/
	1545	static
	1546	inline void cleanUpSafeText(const UStringSearch strsrch, UChar safetext,
	1547	UChar *safebuffer)
	1548	{
	1549	if (safetext != safebuffer && safetext != strsrch->canonicalSuffixAccents)
	1550	{
	1551	uprv_free(safetext);
	1552	}
	1553	}
	1554
	1555	/**
	1556	* Take the rearranged end accents and tries matching. If match failed at
	1557	* a seperate preceding set of accents (seperated from the rearranged on by
	1558	* at least a base character) then we rearrange the preceding accents and
	1559	* tries matching again.
	1560	* We allow skipping of the ends of the accent set if the ces do not match.
	1561	* However if the failure is found before the accent set, it fails.
	1562	* Internal method, status assumed to be success, caller has to check status
	1563	* before calling this method.
	1564	* @param strsrch string search data
	1565	* @param textoffset of the start of the rearranged accent
	1566	* @param status output error status if any
	1567	* @return USEARCH_DONE if a match is not found, otherwise return the starting
	1568	* offset of the match. Note this start includes all preceding accents.
	1569	*/
	1570	static
	1571	int32_t doNextCanonicalSuffixMatch(UStringSearch *strsrch,
	1572	int32_t textoffset,
	1573	UErrorCode *status)
	1574	{
	1575	const UChar *text = strsrch->search->text;
	1576	const UCollator *collator = strsrch->collator;
	1577	int32_t safelength = 0;
	1578	UChar *safetext;
	1579	int32_t safetextlength;
	1580	UChar safebuffer[INITIAL_ARRAY_SIZE_];
	1581	UCollationElements *coleiter = strsrch->utilIter;
	1582	int32_t safeoffset = textoffset;
	1583
	1584	if (textoffset != 0 && ucol_unsafeCP(strsrch->canonicalSuffixAccents[0],
	1585	collator)) {
	1586	safeoffset = getPreviousSafeOffset(collator, text, textoffset);
	1587	safelength = textoffset - safeoffset;
	1588	safetextlength = INITIAL_ARRAY_SIZE_;
	1589	safetext = addToUCharArray(safebuffer, &safetextlength, NULL,
	1590	text + safeoffset, safelength,
	1591	strsrch->canonicalSuffixAccents,
	1592	status);
	1593	}
	1594	else {
	1595	safetextlength = u_strlen(strsrch->canonicalSuffixAccents);
	1596	safetext = strsrch->canonicalSuffixAccents;
	1597	}
	1598
	1599	// if status is a failure, ucol_setText does nothing
	1600	ucol_setText(coleiter, safetext, safetextlength, status);
	1601	// status checked in loop below
	1602
	1603	int32_t *ce = strsrch->pattern.ces;
	1604	int32_t celength = strsrch->pattern.cesLength;
	1605	int ceindex = celength - 1;
	1606	UBool isSafe = TRUE; // indication flag for position in safe zone
	1607
	1608	while (ceindex >= 0) {
	1609	int32_t textce = ucol_previous(coleiter, status);
	1610	if (U_FAILURE(*status)) {
	1611	if (isSafe) {
	1612	cleanUpSafeText(strsrch, safetext, safebuffer);
	1613	}
	1614	return USEARCH_DONE;
	1615	}
	1616	if (textce == UCOL_NULLORDER) {
	1617	// check if we have passed the safe buffer
	1618	if (coleiter == strsrch->textIter) {
	1619	cleanUpSafeText(strsrch, safetext, safebuffer);
	1620	return USEARCH_DONE;
	1621	}
	1622	cleanUpSafeText(strsrch, safetext, safebuffer);
	1623	safetext = safebuffer;
	1624	coleiter = strsrch->textIter;
	1625	setColEIterOffset(coleiter, safeoffset);
	1626	// status checked at the start of the loop
	1627	isSafe = FALSE;
	1628	continue;
	1629	}
	1630	textce = getCE(strsrch, textce);
	1631	if (textce != UCOL_IGNORABLE && textce != ce[ceindex]) {
	1632	// do the beginning stuff
	1633	int32_t failedoffset = getColElemIterOffset(coleiter, FALSE);
	1634	if (isSafe && failedoffset >= safelength) {
	1635	// alas... no hope. failed at rearranged accent set
	1636	cleanUpSafeText(strsrch, safetext, safebuffer);
	1637	return USEARCH_DONE;
	1638	}
	1639	else {
	1640	if (isSafe) {
	1641	failedoffset += safeoffset;
	1642	cleanUpSafeText(strsrch, safetext, safebuffer);
	1643	}
	1644
	1645	// try rearranging the front accents
	1646	int32_t result = doNextCanonicalPrefixMatch(strsrch,
	1647	failedoffset, textoffset, status);
	1648	if (result != USEARCH_DONE) {
	1649	// if status is a failure, ucol_setOffset does nothing
	1650	setColEIterOffset(strsrch->textIter, result);
	1651	}
	1652	if (U_FAILURE(*status)) {
	1653	return USEARCH_DONE;
	1654	}
	1655	return result;
	1656	}
	1657	}
	1658	if (textce == ce[ceindex]) {
	1659	ceindex --;
	1660	}
	1661	}
	1662	// set offset here
	1663	if (isSafe) {
	1664	int32_t result = getColElemIterOffset(coleiter, FALSE);
	1665	// sets the text iterator here with the correct expansion and offset
	1666	int32_t leftoverces = getExpansionPrefix(coleiter);
	1667	cleanUpSafeText(strsrch, safetext, safebuffer);
	1668	if (result >= safelength) {
	1669	result = textoffset;
	1670	}
	1671	else {
	1672	result += safeoffset;
	1673	}
	1674	setColEIterOffset(strsrch->textIter, result);
	1675	strsrch->textIter->iteratordata_.toReturn =
	1676	setExpansionPrefix(strsrch->textIter, leftoverces);
	1677	return result;
	1678	}
	1679
	1680	return ucol_getOffset(coleiter);
	1681	}
	1682
	1683	/**
	1684	* Trying out the substring and sees if it can be a canonical match.
	1685	* This will try normalizing the end accents and arranging them into canonical
	1686	* equivalents and check their corresponding ces with the pattern ce.
	1687	* Suffix accents in the text will be grouped according to their combining
	1688	* class and the groups will be mixed and matched to try find the perfect
	1689	* match with the pattern.
	1690	* So for instance looking for "\u0301" in "\u030A\u0301\u0325"
	1691	* step 1: split "\u030A\u0301" into 6 other type of potential accent substrings
	1692	* "\u030A", "\u0301", "\u0325", "\u030A\u0301", "\u030A\u0325",
	1693	* "\u0301\u0325".
	1694	* step 2: check if any of the generated substrings matches the pattern.
	1695	* Internal method, status assumed to be success, caller has to check status
	1696	* before calling this method.
	1697	* @param strsrch string search data
	1698	* @param textoffset end offset in the collation element text that ends with
	1699	* the accents to be rearranged
	1700	* @param status error status if any
	1701	* @return TRUE if the match is valid, FALSE otherwise
	1702	*/
	1703	static
	1704	UBool doNextCanonicalMatch(UStringSearch *strsrch,
	1705	int32_t textoffset,
	1706	UErrorCode *status)
	1707	{
	1708	const UChar *text = strsrch->search->text;
	1709	int32_t temp = textoffset;
	1710	U16_BACK_1(text, 0, temp);
	1711	if ((getFCD(text, &temp, textoffset) & LAST_BYTE_MASK_) == 0) {
	1712	UCollationElements *coleiter = strsrch->textIter;
	1713	int32_t offset = getColElemIterOffset(coleiter, FALSE);
	1714	if (strsrch->pattern.hasPrefixAccents) {
	1715	offset = doNextCanonicalPrefixMatch(strsrch, offset, textoffset,
	1716	status);
	1717	if (U_SUCCESS(*status) && offset != USEARCH_DONE) {
	1718	setColEIterOffset(coleiter, offset);
	1719	return TRUE;
	1720	}
	1721	}
	1722	return FALSE;
	1723	}
	1724
	1725	if (!strsrch->pattern.hasSuffixAccents) {
	1726	return FALSE;
	1727	}
	1728
	1729	UChar accents[INITIAL_ARRAY_SIZE_];
	1730	// offset to the last base character in substring to search
	1731	int32_t baseoffset = getPreviousBaseOffset(text, textoffset);
	1732	// normalizing the offensive string
	1733	unorm_normalize(text + baseoffset, textoffset - baseoffset, UNORM_NFD,
	1734	0, accents, INITIAL_ARRAY_SIZE_, status);
	1735	// status checked in loop below
	1736
	1737	int32_t accentsindex[INITIAL_ARRAY_SIZE_];
	1738	int32_t size = getUnblockedAccentIndex(accents, accentsindex);
	1739
	1740	// 2 power n - 1 plus the full set of accents
	1741	int32_t count = (2 << (size - 1)) - 1;
	1742	while (U_SUCCESS(*status) && count > 0) {
	1743	UChar *rearrange = strsrch->canonicalSuffixAccents;
	1744	// copy the base characters
	1745	for (int k = 0; k < accentsindex[0]; k ++) {
	1746	*rearrange ++ = accents[k];
	1747	}
	1748	// forming all possible canonical rearrangement by dropping
	1749	// sets of accents
	1750	for (int i = 0; i <= size - 1; i ++) {
	1751	int32_t mask = 1 << (size - i - 1);
	1752	if (count & mask) {
	1753	for (int j = accentsindex[i]; j < accentsindex[i + 1]; j ++) {
	1754	*rearrange ++ = accents[j];
	1755	}
	1756	}
	1757	}
	1758	*rearrange = 0;
	1759	int32_t offset = doNextCanonicalSuffixMatch(strsrch, baseoffset,
	1760	status);
	1761	if (offset != USEARCH_DONE) {
	1762	return TRUE; // match found
	1763	}
	1764	count --;
	1765	}
	1766	return FALSE;
	1767	}
	1768
	1769	/**
	1770	* Gets the previous base character offset depending on the string search
	1771	* pattern data
	1772	* @param strsrch string search data
	1773	* @param textoffset current offset, current character
	1774	* @return the offset of the next character after this base character or itself
	1775	* if it is a composed character with accents
	1776	*/
	1777	static
	1778	inline int32_t getPreviousUStringSearchBaseOffset(UStringSearch *strsrch,
	1779	int32_t textoffset)
	1780	{
	1781	if (strsrch->pattern.hasPrefixAccents && textoffset > 0) {
	1782	const UChar *text = strsrch->search->text;
	1783	int32_t offset = textoffset;
	1784	if (getFCD(text, &offset, strsrch->search->textLength) >>
	1785	SECOND_LAST_BYTE_SHIFT_) {
	1786	return getPreviousBaseOffset(text, textoffset);
	1787	}
	1788	}
	1789	return textoffset;
	1790	}
	1791
	1792	/**
	1793	* Checks match for contraction.
	1794	* If the match ends with a partial contraction we fail.
	1795	* If the match starts too far off (because of backwards iteration) we try to
	1796	* chip off the extra characters
	1797	* Internal method, status assumed to be success, caller has to check status
	1798	* before calling this method.
	1799	* @param strsrch string search data
	1800	* @param start offset of potential match, to be modified if necessary
	1801	* @param end offset of potential match, to be modified if necessary
	1802	* @param status output error status if any
	1803	* @return TRUE if match passes the contraction test, FALSE otherwise
	1804	*/
	1805	static
	1806	UBool checkNextCanonicalContractionMatch(UStringSearch *strsrch,
	1807	int32_t *start,
	1808	int32_t *end,
	1809	UErrorCode *status)
	1810	{
	1811	UCollationElements *coleiter = strsrch->textIter;
	1812	int32_t textlength = strsrch->search->textLength;
	1813	int32_t temp = *start;
	1814	const UCollator *collator = strsrch->collator;
	1815	const UChar *text = strsrch->search->text;
	1816	// This part checks if either ends of the match contains potential
	1817	// contraction. If so we'll have to iterate through them
	1818	if ((end < textlength && ucol_unsafeCP(text[end], collator)) \|\|
	1819	(*start + 1 < textlength
	1820	&& ucol_unsafeCP(text[*start + 1], collator))) {
	1821	int32_t expansion = getExpansionPrefix(coleiter);
	1822	UBool expandflag = expansion > 0;
	1823	setColEIterOffset(coleiter, *start);
	1824	while (expansion > 0) {
	1825	// getting rid of the redundant ce, caused by setOffset.
	1826	// since backward contraction/expansion may have extra ces if we
	1827	// are in the normalization buffer, hasAccentsBeforeMatch would
	1828	// have taken care of it.
	1829	// E.g. the character \u01FA will have an expansion of 3, but if
	1830	// we are only looking for acute and ring \u030A and \u0301, we'll
	1831	// have to skip the first ce in the expansion buffer.
	1832	ucol_next(coleiter, status);
	1833	if (U_FAILURE(*status)) {
	1834	return FALSE;
	1835	}
	1836	if (ucol_getOffset(coleiter) != temp) {
	1837	*start = temp;
	1838	temp = ucol_getOffset(coleiter);
	1839	}
	1840	expansion --;
	1841	}
	1842
	1843	int32_t *patternce = strsrch->pattern.ces;
	1844	int32_t patterncelength = strsrch->pattern.cesLength;
	1845	int32_t count = 0;
	1846	int32_t textlength = strsrch->search->textLength;
	1847	while (count < patterncelength) {
	1848	int32_t ce = getCE(strsrch, ucol_next(coleiter, status));
	1849	// status checked below, note that if status is a failure
	1850	// ucol_next returns UCOL_NULLORDER
	1851	if (ce == UCOL_IGNORABLE) {
	1852	continue;
	1853	}
	1854	if (expandflag && count == 0 && ucol_getOffset(coleiter) != temp) {
	1855	*start = temp;
	1856	temp = ucol_getOffset(coleiter);
	1857	}
	1858
	1859	if (count == 0 && ce != patternce[0]) {
	1860	// accents may have extra starting ces, this occurs when a
	1861	// pure accent pattern is matched without rearrangement
	1862	// text \u0325\u0300 and looking for \u0300
	1863	int32_t expected = patternce[0];
	1864	if (getFCD(text, start, textlength) & LAST_BYTE_MASK_) {
	1865	ce = getCE(strsrch, ucol_next(coleiter, status));
	1866	while (U_SUCCESS(*status) && ce != expected &&
	1867	ce != UCOL_NULLORDER &&
	1868	ucol_getOffset(coleiter) <= *end) {
	1869	ce = getCE(strsrch, ucol_next(coleiter, status));
	1870	}
	1871	}
	1872	}
	1873	if (U_FAILURE(*status) \|\| ce != patternce[count]) {
	1874	(*end) ++;
	1875	end = getNextUStringSearchBaseOffset(strsrch, end);
	1876	return FALSE;
	1877	}
	1878	count ++;
	1879	}
	1880	}
	1881	return TRUE;
	1882	}
	1883
	1884	/**
	1885	* Checks and sets the match information if found.
	1886	* Checks
	1887	* <ul>
	1888	* <li> the potential match does not repeat the previous match
	1889	* <li> boundaries are correct
	1890	* <li> potential match does not end in the middle of a contraction
	1891	* <li> identical matches
	1892	* <\ul>
	1893	* Otherwise the offset will be shifted to the next character.
	1894	* Internal method, status assumed to be success, caller has to check the
	1895	* status before calling this method.
	1896	* @param strsrch string search data
	1897	* @param textoffset offset in the collation element text. the returned value
	1898	* will be the truncated end offset of the match or the new start
	1899	* search offset.
	1900	* @param status output error status if any
	1901	* @return TRUE if the match is valid, FALSE otherwise
	1902	*/
	1903	static
	1904	inline UBool checkNextCanonicalMatch(UStringSearch *strsrch,
	1905	int32_t *textoffset,
	1906	UErrorCode *status)
	1907	{
	1908	// to ensure that the start and ends are not composite characters
	1909	UCollationElements *coleiter = strsrch->textIter;
	1910	// if we have a canonical accent match
	1911	if ((strsrch->pattern.hasSuffixAccents &&
	1912	strsrch->canonicalSuffixAccents[0]) \|\|
	1913	(strsrch->pattern.hasPrefixAccents &&
	1914	strsrch->canonicalPrefixAccents[0])) {
	1915	strsrch->search->matchedIndex = getPreviousUStringSearchBaseOffset(
	1916	strsrch,
	1917	ucol_getOffset(coleiter));
	1918	strsrch->search->matchedLength = *textoffset -
	1919	strsrch->search->matchedIndex;
	1920	return TRUE;
	1921	}
	1922
	1923	int32_t start = getColElemIterOffset(coleiter, FALSE);
	1924	if (!checkNextCanonicalContractionMatch(strsrch, &start, textoffset,
	1925	status) \|\| U_FAILURE(*status)) {
	1926	return FALSE;
	1927	}
	1928
	1929	start = getPreviousUStringSearchBaseOffset(strsrch, start);
	1930	// this totally matches, however we need to check if it is repeating
	1931	if (checkRepeatedMatch(strsrch, start, *textoffset) \|\|
	1932	!isBreakUnit(strsrch, start, *textoffset) \|\|
	1933	!checkIdentical(strsrch, start, *textoffset)) {
	1934	(*textoffset) ++;
	1935	textoffset = getNextBaseOffset(strsrch->search->text, textoffset,
	1936	strsrch->search->textLength);
	1937	return FALSE;
	1938	}
	1939
	1940	strsrch->search->matchedIndex = start;
	1941	strsrch->search->matchedLength = *textoffset - start;
	1942	return TRUE;
	1943	}
	1944
	1945	/**
	1946	* Shifting the collation element iterator position forward to prepare for
	1947	* a preceding match. If the first character is a unsafe character, we'll only
	1948	* shift by 1 to capture contractions, normalization etc.
	1949	* Internal method, status assumed to be success, caller has to check status
	1950	* before calling this method.
	1951	* @param text strsrch string search data
	1952	* @param textoffset start text position to do search
	1953	* @param ce the text ce which failed the match.
	1954	* @param patternceindex index of the ce within the pattern ce buffer which
	1955	* failed the match
	1956	* @return final offset
	1957	*/
	1958	static
	1959	inline int32_t reverseShift(UStringSearch *strsrch,
	1960	int32_t textoffset,
	1961	int32_t ce,
	1962	int32_t patternceindex)
	1963	{
	1964	if (strsrch->search->isOverlap) {
	1965	if (textoffset != strsrch->search->textLength) {
	1966	textoffset --;
	1967	}
	1968	else {
	1969	textoffset -= strsrch->pattern.defaultShiftSize;
	1970	}
	1971	}
	1972	else {
	1973	if (ce != UCOL_NULLORDER) {
	1974	int32_t shift = strsrch->pattern.backShift[hash(ce)];
	1975
	1976	// this is to adjust for characters in the middle of the substring
	1977	// for matching that failed.
	1978	int32_t adjust = patternceindex;
	1979	if (adjust > 1 && shift > adjust) {
	1980	shift -= adjust - 1;
	1981	}
	1982	textoffset -= shift;
	1983	}
	1984	else {
	1985	textoffset -= strsrch->pattern.defaultShiftSize;
	1986	}
	1987	}
	1988	textoffset = getPreviousUStringSearchBaseOffset(strsrch, textoffset);
	1989	return textoffset;
	1990	}
	1991
	1992	/**
	1993	* Checks match for contraction.
	1994	* If the match starts with a partial contraction we fail.
	1995	* Internal method, status assumed to be success, caller has to check status
	1996	* before calling this method.
	1997	* @param strsrch string search data
	1998	* @param start offset of potential match, to be modified if necessary
	1999	* @param end offset of potential match, to be modified if necessary
	2000	* @param status output error status if any
	2001	* @return TRUE if match passes the contraction test, FALSE otherwise
	2002	*/
	2003	static
	2004	UBool checkPreviousExactContractionMatch(UStringSearch *strsrch,
	2005	int32_t *start,
	2006	int32_t end, UErrorCode status)
	2007	{
	2008	UCollationElements *coleiter = strsrch->textIter;
	2009	int32_t textlength = strsrch->search->textLength;
	2010	int32_t temp = *end;
	2011	const UCollator *collator = strsrch->collator;
	2012	const UChar *text = strsrch->search->text;
	2013	// This part checks if either if the start of the match contains potential
	2014	// contraction. If so we'll have to iterate through them
	2015	// Since we used ucol_next while previously looking for the potential
	2016	// match, this guarantees that our end will not be a partial contraction,
	2017	// or a partial supplementary character.
	2018	if (start < textlength && ucol_unsafeCP(text[start], collator)) {
	2019	int32_t expansion = getExpansionSuffix(coleiter);
	2020	UBool expandflag = expansion > 0;
	2021	setColEIterOffset(coleiter, *end);
	2022	while (U_SUCCESS(*status) && expansion > 0) {
	2023	// getting rid of the redundant ce
	2024	// since forward contraction/expansion may have extra ces
	2025	// if we are in the normalization buffer, hasAccentsBeforeMatch
	2026	// would have taken care of it.
	2027	// E.g. the character \u01FA will have an expansion of 3, but if
	2028	// we are only looking for A ring A\u030A, we'll have to skip the
	2029	// last ce in the expansion buffer
	2030	ucol_previous(coleiter, status);
	2031	if (U_FAILURE(*status)) {
	2032	return FALSE;
	2033	}
	2034	if (ucol_getOffset(coleiter) != temp) {
	2035	*end = temp;
	2036	temp = ucol_getOffset(coleiter);
	2037	}
	2038	expansion --;
	2039	}
	2040
	2041	int32_t *patternce = strsrch->pattern.ces;
	2042	int32_t patterncelength = strsrch->pattern.cesLength;
	2043	int32_t count = patterncelength;
	2044	while (count > 0) {
	2045	int32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
	2046	// status checked below, note that if status is a failure
	2047	// ucol_previous returns UCOL_NULLORDER
	2048	if (ce == UCOL_IGNORABLE) {
	2049	continue;
	2050	}
	2051	if (expandflag && count == 0 &&
	2052	getColElemIterOffset(coleiter, FALSE) != temp) {
	2053	*end = temp;
	2054	temp = ucol_getOffset(coleiter);
	2055	}
	2056	if (U_FAILURE(*status) \|\| ce != patternce[count - 1]) {
	2057	(*start) --;
	2058	start = getPreviousBaseOffset(text, start);
	2059	return FALSE;
	2060	}
	2061	count --;
	2062	}
	2063	}
	2064	return TRUE;
	2065	}
	2066
	2067	/**
	2068	* Checks and sets the match information if found.
	2069	* Checks
	2070	* <ul>
	2071	* <li> the current match does not repeat the last match
	2072	* <li> boundaries are correct
	2073	* <li> exact matches has no extra accents
	2074	* <li> identical matches
	2075	* <\ul>
	2076	* Otherwise the offset will be shifted to the preceding character.
	2077	* Internal method, status assumed to be success, caller has to check status
	2078	* before calling this method.
	2079	* @param strsrch string search data
	2080	* @param collator
	2081	* @param coleiter collation element iterator
	2082	* @param text string
	2083	* @param textoffset offset in the collation element text. the returned value
	2084	* will be the truncated start offset of the match or the new start
	2085	* search offset.
	2086	* @param status output error status if any
	2087	* @return TRUE if the match is valid, FALSE otherwise
	2088	*/
	2089	static
	2090	inline UBool checkPreviousExactMatch(UStringSearch *strsrch,
	2091	int32_t *textoffset,
	2092	UErrorCode *status)
	2093	{
	2094	// to ensure that the start and ends are not composite characters
	2095	int32_t end = ucol_getOffset(strsrch->textIter);
	2096	if (!checkPreviousExactContractionMatch(strsrch, textoffset, &end, status)
	2097	\|\| U_FAILURE(*status)) {
	2098	return FALSE;
	2099	}
	2100
	2101	// this totally matches, however we need to check if it is repeating
	2102	// the old match
	2103	if (checkRepeatedMatch(strsrch, *textoffset, end) \|\|
	2104	!isBreakUnit(strsrch, *textoffset, end) \|\|
	2105	hasAccentsBeforeMatch(strsrch, *textoffset, end) \|\|
	2106	!checkIdentical(strsrch, *textoffset, end) \|\|
	2107	hasAccentsAfterMatch(strsrch, *textoffset, end)) {
	2108	(*textoffset) --;
	2109	*textoffset = getPreviousBaseOffset(strsrch->search->text,
	2110	*textoffset);
	2111	return FALSE;
	2112	}
	2113
	2114	//Add breakiterator boundary check for primary strength search.
	2115	if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) {
	2116	checkBreakBoundary(strsrch, textoffset, &end);
	2117	}
	2118
	2119	strsrch->search->matchedIndex = *textoffset;
	2120	strsrch->search->matchedLength = end - *textoffset;
	2121	return TRUE;
	2122	}
	2123
	2124	/**
	2125	* Rearranges the end accents to try matching.
	2126	* Suffix accents in the text will be grouped according to their combining
	2127	* class and the groups will be mixed and matched to try find the perfect
	2128	* match with the pattern.
	2129	* So for instance looking for "\u0301" in "\u030A\u0301\u0325"
	2130	* step 1: split "\u030A\u0301" into 6 other type of potential accent substrings
	2131	* "\u030A", "\u0301", "\u0325", "\u030A\u0301", "\u030A\u0325",
	2132	* "\u0301\u0325".
	2133	* step 2: check if any of the generated substrings matches the pattern.
	2134	* Internal method, status assumed to be success, user has to check status
	2135	* before calling this method.
	2136	* @param strsrch string search match
	2137	* @param start offset of the first base character
	2138	* @param end start of the last accent set
	2139	* @param status only error status if any
	2140	* @return USEARCH_DONE if a match is not found, otherwise return the ending
	2141	* offset of the match. Note this start includes all following accents.
	2142	*/
	2143	static
	2144	int32_t doPreviousCanonicalSuffixMatch(UStringSearch *strsrch,
	2145	int32_t start,
	2146	int32_t end,
	2147	UErrorCode *status)
	2148	{
	2149	const UChar *text = strsrch->search->text;
	2150	int32_t tempend = end;
	2151
	2152	U16_BACK_1(text, 0, tempend);
	2153	if (!(getFCD(text, &tempend, strsrch->search->textLength) &
	2154	LAST_BYTE_MASK_)) {
	2155	// die... failed at a base character
	2156	return USEARCH_DONE;
	2157	}
	2158	end = getNextBaseOffset(text, end, strsrch->search->textLength);
	2159
	2160	if (U_SUCCESS(*status)) {
	2161	UChar accents[INITIAL_ARRAY_SIZE_];
	2162	int32_t offset = getPreviousBaseOffset(text, end);
	2163	// normalizing the offensive string
	2164	unorm_normalize(text + offset, end - offset, UNORM_NFD, 0, accents,
	2165	INITIAL_ARRAY_SIZE_, status);
	2166
	2167	int32_t accentsindex[INITIAL_ARRAY_SIZE_];
	2168	int32_t accentsize = getUnblockedAccentIndex(accents,
	2169	accentsindex);
	2170	int32_t count = (2 << (accentsize - 1)) - 1;
	2171	UChar buffer[INITIAL_ARRAY_SIZE_];
	2172	UCollationElements *coleiter = strsrch->utilIter;
	2173	while (U_SUCCESS(*status) && count > 0) {
	2174	UChar *rearrange = strsrch->canonicalSuffixAccents;
	2175	// copy the base characters
	2176	for (int k = 0; k < accentsindex[0]; k ++) {
	2177	*rearrange ++ = accents[k];
	2178	}
	2179	// forming all possible canonical rearrangement by dropping
	2180	// sets of accents
	2181	for (int i = 0; i <= accentsize - 1; i ++) {
	2182	int32_t mask = 1 << (accentsize - i - 1);
	2183	if (count & mask) {
	2184	for (int j = accentsindex[i]; j < accentsindex[i + 1]; j ++) {
	2185	*rearrange ++ = accents[j];
	2186	}
	2187	}
	2188	}
	2189	*rearrange = 0;
	2190	int32_t matchsize = INITIAL_ARRAY_SIZE_;
	2191	UChar *match = addToUCharArray(buffer, &matchsize,
	2192	strsrch->canonicalPrefixAccents,
	2193	strsrch->search->text + start,
	2194	offset - start,
	2195	strsrch->canonicalSuffixAccents,
	2196	status);
	2197
	2198	// run the collator iterator through this match
	2199	// if status is a failure ucol_setText does nothing
	2200	ucol_setText(coleiter, match, matchsize, status);
	2201	if (U_SUCCESS(*status)) {
	2202	if (checkCollationMatch(strsrch, coleiter)) {
	2203	if (match != buffer) {
	2204	uprv_free(match);
	2205	}
	2206	return end;
	2207	}
	2208	}
	2209	count --;
	2210	}
	2211	}
	2212	return USEARCH_DONE;
	2213	}
	2214
	2215	/**
	2216	* Take the rearranged start accents and tries matching. If match failed at
	2217	* a seperate following set of accents (seperated from the rearranged on by
	2218	* at least a base character) then we rearrange the preceding accents and
	2219	* tries matching again.
	2220	* We allow skipping of the ends of the accent set if the ces do not match.
	2221	* However if the failure is found before the accent set, it fails.
	2222	* Internal method, status assumed to be success, caller has to check status
	2223	* before calling this method.
	2224	* @param strsrch string search data
	2225	* @param textoffset of the ends of the rearranged accent
	2226	* @param status output error status if any
	2227	* @return USEARCH_DONE if a match is not found, otherwise return the ending
	2228	* offset of the match. Note this start includes all following accents.
	2229	*/
	2230	static
	2231	int32_t doPreviousCanonicalPrefixMatch(UStringSearch *strsrch,
	2232	int32_t textoffset,
	2233	UErrorCode *status)
	2234	{
	2235	const UChar *text = strsrch->search->text;
	2236	const UCollator *collator = strsrch->collator;
	2237	int32_t safelength = 0;
	2238	UChar *safetext;
	2239	int32_t safetextlength;
	2240	UChar safebuffer[INITIAL_ARRAY_SIZE_];
	2241	int32_t safeoffset = textoffset;
	2242
	2243	if (textoffset &&
	2244	ucol_unsafeCP(strsrch->canonicalPrefixAccents[
	2245	u_strlen(strsrch->canonicalPrefixAccents) - 1
	2246	], collator)) {
	2247	safeoffset = getNextSafeOffset(collator, text, textoffset,
	2248	strsrch->search->textLength);
	2249	safelength = safeoffset - textoffset;
	2250	safetextlength = INITIAL_ARRAY_SIZE_;
	2251	safetext = addToUCharArray(safebuffer, &safetextlength,
	2252	strsrch->canonicalPrefixAccents,
	2253	text + textoffset, safelength,
	2254	NULL, status);
	2255	}
	2256	else {
	2257	safetextlength = u_strlen(strsrch->canonicalPrefixAccents);
	2258	safetext = strsrch->canonicalPrefixAccents;
	2259	}
	2260
	2261	UCollationElements *coleiter = strsrch->utilIter;
	2262	// if status is a failure, ucol_setText does nothing
	2263	ucol_setText(coleiter, safetext, safetextlength, status);
	2264	// status checked in loop below
	2265
	2266	int32_t *ce = strsrch->pattern.ces;
	2267	int32_t celength = strsrch->pattern.cesLength;
	2268	int ceindex = 0;
	2269	UBool isSafe = TRUE; // safe zone indication flag for position
	2270	int32_t prefixlength = u_strlen(strsrch->canonicalPrefixAccents);
	2271
	2272	while (ceindex < celength) {
	2273	int32_t textce = ucol_next(coleiter, status);
	2274	if (U_FAILURE(*status)) {
	2275	if (isSafe) {
	2276	cleanUpSafeText(strsrch, safetext, safebuffer);
	2277	}
	2278	return USEARCH_DONE;
	2279	}
	2280	if (textce == UCOL_NULLORDER) {
	2281	// check if we have passed the safe buffer
	2282	if (coleiter == strsrch->textIter) {
	2283	cleanUpSafeText(strsrch, safetext, safebuffer);
	2284	return USEARCH_DONE;
	2285	}
	2286	cleanUpSafeText(strsrch, safetext, safebuffer);
	2287	safetext = safebuffer;
	2288	coleiter = strsrch->textIter;
	2289	setColEIterOffset(coleiter, safeoffset);
	2290	// status checked at the start of the loop
	2291	isSafe = FALSE;
	2292	continue;
	2293	}
	2294	textce = getCE(strsrch, textce);
	2295	if (textce != UCOL_IGNORABLE && textce != ce[ceindex]) {
	2296	// do the beginning stuff
	2297	int32_t failedoffset = ucol_getOffset(coleiter);
	2298	if (isSafe && failedoffset <= prefixlength) {
	2299	// alas... no hope. failed at rearranged accent set
	2300	cleanUpSafeText(strsrch, safetext, safebuffer);
	2301	return USEARCH_DONE;
	2302	}
	2303	else {
	2304	if (isSafe) {
	2305	failedoffset = safeoffset - failedoffset;
	2306	cleanUpSafeText(strsrch, safetext, safebuffer);
	2307	}
	2308
	2309	// try rearranging the end accents
	2310	int32_t result = doPreviousCanonicalSuffixMatch(strsrch,
	2311	textoffset, failedoffset, status);
	2312	if (result != USEARCH_DONE) {
	2313	// if status is a failure, ucol_setOffset does nothing
	2314	setColEIterOffset(strsrch->textIter, result);
	2315	}
	2316	if (U_FAILURE(*status)) {
	2317	return USEARCH_DONE;
	2318	}
	2319	return result;
	2320	}
	2321	}
	2322	if (textce == ce[ceindex]) {
	2323	ceindex ++;
	2324	}
	2325	}
	2326	// set offset here
	2327	if (isSafe) {
	2328	int32_t result = ucol_getOffset(coleiter);
	2329	// sets the text iterator here with the correct expansion and offset
	2330	int32_t leftoverces = getExpansionSuffix(coleiter);
	2331	cleanUpSafeText(strsrch, safetext, safebuffer);
	2332	if (result <= prefixlength) {
	2333	result = textoffset;
	2334	}
	2335	else {
	2336	result = textoffset + (safeoffset - result);
	2337	}
	2338	setColEIterOffset(strsrch->textIter, result);
	2339	setExpansionSuffix(strsrch->textIter, leftoverces);
	2340	return result;
	2341	}
	2342
	2343	return ucol_getOffset(coleiter);
	2344	}
	2345
	2346	/**
	2347	* Trying out the substring and sees if it can be a canonical match.
	2348	* This will try normalizing the starting accents and arranging them into
	2349	* canonical equivalents and check their corresponding ces with the pattern ce.
	2350	* Prefix accents in the text will be grouped according to their combining
	2351	* class and the groups will be mixed and matched to try find the perfect
	2352	* match with the pattern.
	2353	* So for instance looking for "\u0301" in "\u030A\u0301\u0325"
	2354	* step 1: split "\u030A\u0301" into 6 other type of potential accent substrings
	2355	* "\u030A", "\u0301", "\u0325", "\u030A\u0301", "\u030A\u0325",
	2356	* "\u0301\u0325".
	2357	* step 2: check if any of the generated substrings matches the pattern.
	2358	* Internal method, status assumed to be success, caller has to check status
	2359	* before calling this method.
	2360	* @param strsrch string search data
	2361	* @param textoffset start offset in the collation element text that starts
	2362	* with the accents to be rearranged
	2363	* @param status output error status if any
	2364	* @return TRUE if the match is valid, FALSE otherwise
	2365	*/
	2366	static
	2367	UBool doPreviousCanonicalMatch(UStringSearch *strsrch,
	2368	int32_t textoffset,
	2369	UErrorCode *status)
	2370	{
	2371	const UChar *text = strsrch->search->text;
	2372	int32_t temp = textoffset;
	2373	int32_t textlength = strsrch->search->textLength;
	2374	if ((getFCD(text, &temp, textlength) >> SECOND_LAST_BYTE_SHIFT_) == 0) {
	2375	UCollationElements *coleiter = strsrch->textIter;
	2376	int32_t offset = ucol_getOffset(coleiter);
	2377	if (strsrch->pattern.hasSuffixAccents) {
	2378	offset = doPreviousCanonicalSuffixMatch(strsrch, textoffset,
	2379	offset, status);
	2380	if (U_SUCCESS(*status) && offset != USEARCH_DONE) {
	2381	setColEIterOffset(coleiter, offset);
	2382	return TRUE;
	2383	}
	2384	}
	2385	return FALSE;
	2386	}
	2387
	2388	if (!strsrch->pattern.hasPrefixAccents) {
	2389	return FALSE;
	2390	}
	2391
	2392	UChar accents[INITIAL_ARRAY_SIZE_];
	2393	// offset to the last base character in substring to search
	2394	int32_t baseoffset = getNextBaseOffset(text, textoffset, textlength);
	2395	// normalizing the offensive string
	2396	unorm_normalize(text + textoffset, baseoffset - textoffset, UNORM_NFD,
	2397	0, accents, INITIAL_ARRAY_SIZE_, status);
	2398	// status checked in loop
	2399
	2400	int32_t accentsindex[INITIAL_ARRAY_SIZE_];
	2401	int32_t size = getUnblockedAccentIndex(accents, accentsindex);
	2402
	2403	// 2 power n - 1 plus the full set of accents
	2404	int32_t count = (2 << (size - 1)) - 1;
	2405	while (U_SUCCESS(*status) && count > 0) {
	2406	UChar *rearrange = strsrch->canonicalPrefixAccents;
	2407	// copy the base characters
	2408	for (int k = 0; k < accentsindex[0]; k ++) {
	2409	*rearrange ++ = accents[k];
	2410	}
	2411	// forming all possible canonical rearrangement by dropping
	2412	// sets of accents
	2413	for (int i = 0; i <= size - 1; i ++) {
	2414	int32_t mask = 1 << (size - i - 1);
	2415	if (count & mask) {
	2416	for (int j = accentsindex[i]; j < accentsindex[i + 1]; j ++) {
	2417	*rearrange ++ = accents[j];
	2418	}
	2419	}
	2420	}
	2421	*rearrange = 0;
	2422	int32_t offset = doPreviousCanonicalPrefixMatch(strsrch,
	2423	baseoffset, status);
	2424	if (offset != USEARCH_DONE) {
	2425	return TRUE; // match found
	2426	}
	2427	count --;
	2428	}
	2429	return FALSE;
	2430	}
	2431
	2432	/**
	2433	* Checks match for contraction.
	2434	* If the match starts with a partial contraction we fail.
	2435	* Internal method, status assumed to be success, caller has to check status
	2436	* before calling this method.
	2437	* @param strsrch string search data
	2438	* @param start offset of potential match, to be modified if necessary
	2439	* @param end offset of potential match, to be modified if necessary
	2440	* @param status only error status if any
	2441	* @return TRUE if match passes the contraction test, FALSE otherwise
	2442	*/
	2443	static
	2444	UBool checkPreviousCanonicalContractionMatch(UStringSearch *strsrch,
	2445	int32_t *start,
	2446	int32_t end, UErrorCode status)
	2447	{
	2448	UCollationElements *coleiter = strsrch->textIter;
	2449	int32_t textlength = strsrch->search->textLength;
	2450	int32_t temp = *end;
	2451	const UCollator *collator = strsrch->collator;
	2452	const UChar *text = strsrch->search->text;
	2453	// This part checks if either if the start of the match contains potential
	2454	// contraction. If so we'll have to iterate through them
	2455	// Since we used ucol_next while previously looking for the potential
	2456	// match, this guarantees that our end will not be a partial contraction,
	2457	// or a partial supplementary character.
	2458	if (start < textlength && ucol_unsafeCP(text[start], collator)) {
	2459	int32_t expansion = getExpansionSuffix(coleiter);
	2460	UBool expandflag = expansion > 0;
	2461	setColEIterOffset(coleiter, *end);
	2462	while (expansion > 0) {
	2463	// getting rid of the redundant ce
	2464	// since forward contraction/expansion may have extra ces
	2465	// if we are in the normalization buffer, hasAccentsBeforeMatch
	2466	// would have taken care of it.
	2467	// E.g. the character \u01FA will have an expansion of 3, but if
	2468	// we are only looking for A ring A\u030A, we'll have to skip the
	2469	// last ce in the expansion buffer
	2470	ucol_previous(coleiter, status);
	2471	if (U_FAILURE(*status)) {
	2472	return FALSE;
	2473	}
	2474	if (ucol_getOffset(coleiter) != temp) {
	2475	*end = temp;
	2476	temp = ucol_getOffset(coleiter);
	2477	}
	2478	expansion --;
	2479	}
	2480
	2481	int32_t *patternce = strsrch->pattern.ces;
	2482	int32_t patterncelength = strsrch->pattern.cesLength;
	2483	int32_t count = patterncelength;
	2484	while (count > 0) {
	2485	int32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
	2486	// status checked below, note that if status is a failure
	2487	// ucol_previous returns UCOL_NULLORDER
	2488	if (ce == UCOL_IGNORABLE) {
	2489	continue;
	2490	}
	2491	if (expandflag && count == 0 &&
	2492	getColElemIterOffset(coleiter, FALSE) != temp) {
	2493	*end = temp;
	2494	temp = ucol_getOffset(coleiter);
	2495	}
	2496	if (count == patterncelength &&
	2497	ce != patternce[patterncelength - 1]) {
	2498	// accents may have extra starting ces, this occurs when a
	2499	// pure accent pattern is matched without rearrangement
	2500	int32_t expected = patternce[patterncelength - 1];
	2501	U16_BACK_1(text, 0, *end);
	2502	if (getFCD(text, end, textlength) & LAST_BYTE_MASK_) {
	2503	ce = getCE(strsrch, ucol_previous(coleiter, status));
	2504	while (U_SUCCESS(*status) && ce != expected &&
	2505	ce != UCOL_NULLORDER &&
	2506	ucol_getOffset(coleiter) <= *start) {
	2507	ce = getCE(strsrch, ucol_previous(coleiter, status));
	2508	}
	2509	}
	2510	}
	2511	if (U_FAILURE(*status) \|\| ce != patternce[count - 1]) {
	2512	(*start) --;
	2513	start = getPreviousBaseOffset(text, start);
	2514	return FALSE;
	2515	}
	2516	count --;
	2517	}
	2518	}
	2519	return TRUE;
	2520	}
	2521
	2522	/**
	2523	* Checks and sets the match information if found.
	2524	* Checks
	2525	* <ul>
	2526	* <li> the potential match does not repeat the previous match
	2527	* <li> boundaries are correct
	2528	* <li> potential match does not end in the middle of a contraction
	2529	* <li> identical matches
	2530	* <\ul>
	2531	* Otherwise the offset will be shifted to the next character.
	2532	* Internal method, status assumed to be success, caller has to check status
	2533	* before calling this method.
	2534	* @param strsrch string search data
	2535	* @param textoffset offset in the collation element text. the returned value
	2536	* will be the truncated start offset of the match or the new start
	2537	* search offset.
	2538	* @param status only error status if any
	2539	* @return TRUE if the match is valid, FALSE otherwise
	2540	*/
	2541	static
	2542	inline UBool checkPreviousCanonicalMatch(UStringSearch *strsrch,
	2543	int32_t *textoffset,
	2544	UErrorCode *status)
	2545	{
	2546	// to ensure that the start and ends are not composite characters
	2547	UCollationElements *coleiter = strsrch->textIter;
	2548	// if we have a canonical accent match
	2549	if ((strsrch->pattern.hasSuffixAccents &&
	2550	strsrch->canonicalSuffixAccents[0]) \|\|
	2551	(strsrch->pattern.hasPrefixAccents &&
	2552	strsrch->canonicalPrefixAccents[0])) {
	2553	strsrch->search->matchedIndex = *textoffset;
	2554	strsrch->search->matchedLength =
	2555	getNextUStringSearchBaseOffset(strsrch,
	2556	getColElemIterOffset(coleiter, FALSE))
	2557	- *textoffset;
	2558	return TRUE;
	2559	}
	2560
	2561	int32_t end = ucol_getOffset(coleiter);
	2562	if (!checkPreviousCanonicalContractionMatch(strsrch, textoffset, &end,
	2563	status) \|\|
	2564	U_FAILURE(*status)) {
	2565	return FALSE;
	2566	}
	2567
	2568	end = getNextUStringSearchBaseOffset(strsrch, end);
	2569	// this totally matches, however we need to check if it is repeating
	2570	if (checkRepeatedMatch(strsrch, *textoffset, end) \|\|
	2571	!isBreakUnit(strsrch, *textoffset, end) \|\|
	2572	!checkIdentical(strsrch, *textoffset, end)) {
	2573	(*textoffset) --;
	2574	*textoffset = getPreviousBaseOffset(strsrch->search->text,
	2575	*textoffset);
	2576	return FALSE;
	2577	}
	2578
	2579	strsrch->search->matchedIndex = *textoffset;
	2580	strsrch->search->matchedLength = end - *textoffset;
	2581	return TRUE;
	2582	}
	2583	#endif // #if BOYER_MOORE
	2584
	2585	// constructors and destructor -------------------------------------------
	2586
	2587	U_CAPI UStringSearch * U_EXPORT2 usearch_open(const UChar *pattern,
	2588	int32_t patternlength,
	2589	const UChar *text,
	2590	int32_t textlength,
	2591	const char *locale,
	2592	UBreakIterator *breakiter,
	2593	UErrorCode *status)
	2594	{
	2595	if (U_FAILURE(*status)) {
	2596	return NULL;
	2597	}
	2598	#if UCONFIG_NO_BREAK_ITERATION
	2599	if (breakiter != NULL) {
	2600	*status = U_UNSUPPORTED_ERROR;
	2601	return NULL;
	2602	}
	2603	#endif
	2604	if (locale) {
	2605	// ucol_open internally checks for status
	2606	UCollator *collator = ucol_open(locale, status);
	2607	// pattern, text checks are done in usearch_openFromCollator
	2608	UStringSearch *result = usearch_openFromCollator(pattern,
	2609	patternlength, text, textlength,
	2610	collator, breakiter, status);
	2611
	2612	if (result == NULL \|\| U_FAILURE(*status)) {
	2613	if (collator) {
	2614	ucol_close(collator);
	2615	}
	2616	return NULL;
	2617	}
	2618	else {
	2619	result->ownCollator = TRUE;
	2620	}
	2621	return result;
	2622	}
	2623	*status = U_ILLEGAL_ARGUMENT_ERROR;
	2624	return NULL;
	2625	}
	2626
	2627	U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator(
	2628	const UChar *pattern,
	2629	int32_t patternlength,
	2630	const UChar *text,
	2631	int32_t textlength,
	2632	const UCollator *collator,
	2633	UBreakIterator *breakiter,
	2634	UErrorCode *status)
	2635	{
	2636	if (U_FAILURE(*status)) {
	2637	return NULL;
	2638	}
	2639	#if UCONFIG_NO_BREAK_ITERATION
	2640	if (breakiter != NULL) {
	2641	*status = U_UNSUPPORTED_ERROR;
	2642	return NULL;
	2643	}
	2644	#endif
	2645	if (pattern == NULL \|\| text == NULL \|\| collator == NULL) {
	2646	*status = U_ILLEGAL_ARGUMENT_ERROR;
	2647	return NULL;
	2648	}
	2649
	2650	// string search does not really work when numeric collation is turned on
	2651	if(ucol_getAttribute(collator, UCOL_NUMERIC_COLLATION, status) == UCOL_ON) {
	2652	*status = U_UNSUPPORTED_ERROR;
	2653	return NULL;
	2654	}
	2655
	2656	if (U_SUCCESS(*status)) {
	2657	initializeFCD(status);
	2658	if (U_FAILURE(*status)) {
	2659	return NULL;
	2660	}
	2661
	2662	UStringSearch *result;
	2663	if (textlength == -1) {
	2664	textlength = u_strlen(text);
	2665	}
	2666	if (patternlength == -1) {
	2667	patternlength = u_strlen(pattern);
	2668	}
	2669	if (textlength <= 0 \|\| patternlength <= 0) {
	2670	*status = U_ILLEGAL_ARGUMENT_ERROR;
	2671	return NULL;
	2672	}
	2673
	2674	result = (UStringSearch *)uprv_malloc(sizeof(UStringSearch));
	2675	if (result == NULL) {
	2676	*status = U_MEMORY_ALLOCATION_ERROR;
	2677	return NULL;
	2678	}
	2679
	2680	result->collator = collator;
	2681	result->strength = ucol_getStrength(collator);
	2682	result->ceMask = getMask(result->strength);
	2683	result->toShift =
	2684	ucol_getAttribute(collator, UCOL_ALTERNATE_HANDLING, status) ==
	2685	UCOL_SHIFTED;
	2686	result->variableTop = ucol_getVariableTop(collator, status);
	2687
	2688	result->nfd = Normalizer2::getNFDInstance(*status);
	2689
	2690	if (U_FAILURE(*status)) {
	2691	uprv_free(result);
	2692	return NULL;
	2693	}
	2694
	2695	result->search = (USearch *)uprv_malloc(sizeof(USearch));
	2696	if (result->search == NULL) {
	2697	*status = U_MEMORY_ALLOCATION_ERROR;
	2698	uprv_free(result);
	2699	return NULL;
	2700	}
	2701
	2702	result->search->text = text;
	2703	result->search->textLength = textlength;
	2704
	2705	result->pattern.text = pattern;
	2706	result->pattern.textLength = patternlength;
	2707	result->pattern.ces = NULL;
	2708	result->pattern.pces = NULL;
	2709
	2710	result->search->breakIter = breakiter;
	2711	#if !UCONFIG_NO_BREAK_ITERATION
	2712	result->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(result->collator, ULOC_VALID_LOCALE, status), text, textlength, status);
	2713	if (breakiter) {
	2714	ubrk_setText(breakiter, text, textlength, status);
	2715	}
	2716	#endif
	2717
	2718	result->ownCollator = FALSE;
	2719	result->search->matchedLength = 0;
	2720	result->search->matchedIndex = USEARCH_DONE;
	2721	result->utilIter = NULL;
	2722	result->textIter = ucol_openElements(collator, text,
	2723	textlength, status);
	2724	result->textProcessedIter = NULL;
	2725	if (U_FAILURE(*status)) {
	2726	usearch_close(result);
	2727	return NULL;
	2728	}
	2729
	2730	result->search->isOverlap = FALSE;
	2731	result->search->isCanonicalMatch = FALSE;
	2732	result->search->elementComparisonType = 0;
	2733	result->search->isForwardSearching = TRUE;
	2734	result->search->reset = TRUE;
	2735
	2736	initialize(result, status);
	2737
	2738	if (U_FAILURE(*status)) {
	2739	usearch_close(result);
	2740	return NULL;
	2741	}
	2742
	2743	return result;
	2744	}
	2745	return NULL;
	2746	}
	2747
	2748	U_CAPI void U_EXPORT2 usearch_close(UStringSearch *strsrch)
	2749	{
	2750	if (strsrch) {
	2751	if (strsrch->pattern.ces != strsrch->pattern.cesBuffer &&
	2752	strsrch->pattern.ces) {
	2753	uprv_free(strsrch->pattern.ces);
	2754	}
	2755
	2756	if (strsrch->pattern.pces != NULL &&
	2757	strsrch->pattern.pces != strsrch->pattern.pcesBuffer) {
	2758	uprv_free(strsrch->pattern.pces);
	2759	}
	2760
	2761	delete strsrch->textProcessedIter;
	2762	ucol_closeElements(strsrch->textIter);
	2763	ucol_closeElements(strsrch->utilIter);
	2764
	2765	if (strsrch->ownCollator && strsrch->collator) {
	2766	ucol_close((UCollator *)strsrch->collator);
	2767	}
	2768
	2769	#if !UCONFIG_NO_BREAK_ITERATION
	2770	if (strsrch->search->internalBreakIter) {
	2771	ubrk_close(strsrch->search->internalBreakIter);
	2772	}
	2773	#endif
	2774
	2775	uprv_free(strsrch->search);
	2776	uprv_free(strsrch);
	2777	}
	2778	}
	2779
	2780	namespace {
	2781
	2782	UBool initTextProcessedIter(UStringSearch strsrch, UErrorCode status) {
	2783	if (U_FAILURE(*status)) { return FALSE; }
	2784	if (strsrch->textProcessedIter == NULL) {
	2785	strsrch->textProcessedIter = new icu::UCollationPCE(strsrch->textIter);
	2786	if (strsrch->textProcessedIter == NULL) {
	2787	*status = U_MEMORY_ALLOCATION_ERROR;
	2788	return FALSE;
	2789	}
	2790	} else {
	2791	strsrch->textProcessedIter->init(strsrch->textIter);
	2792	}
	2793	return TRUE;
	2794	}
	2795
	2796	}
	2797
	2798	// set and get methods --------------------------------------------------
	2799
	2800	U_CAPI void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch,
	2801	int32_t position,
	2802	UErrorCode *status)
	2803	{
	2804	if (U_SUCCESS(*status) && strsrch) {
	2805	if (isOutOfBounds(strsrch->search->textLength, position)) {
	2806	*status = U_INDEX_OUTOFBOUNDS_ERROR;
	2807	}
	2808	else {
	2809	setColEIterOffset(strsrch->textIter, position);
	2810	}
	2811	strsrch->search->matchedIndex = USEARCH_DONE;
	2812	strsrch->search->matchedLength = 0;
	2813	strsrch->search->reset = FALSE;
	2814	}
	2815	}
	2816
	2817	U_CAPI int32_t U_EXPORT2 usearch_getOffset(const UStringSearch *strsrch)
	2818	{
	2819	if (strsrch) {
	2820	int32_t result = ucol_getOffset(strsrch->textIter);
	2821	if (isOutOfBounds(strsrch->search->textLength, result)) {
	2822	return USEARCH_DONE;
	2823	}
	2824	return result;
	2825	}
	2826	return USEARCH_DONE;
	2827	}
	2828
	2829	U_CAPI void U_EXPORT2 usearch_setAttribute(UStringSearch *strsrch,
	2830	USearchAttribute attribute,
	2831	USearchAttributeValue value,
	2832	UErrorCode *status)
	2833	{
	2834	if (U_SUCCESS(*status) && strsrch) {
	2835	switch (attribute)
	2836	{
	2837	case USEARCH_OVERLAP :
	2838	strsrch->search->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
	2839	break;
	2840	case USEARCH_CANONICAL_MATCH :
	2841	strsrch->search->isCanonicalMatch = (value == USEARCH_ON ? TRUE :
	2842	FALSE);
	2843	break;
	2844	case USEARCH_ELEMENT_COMPARISON :
	2845	if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD \|\| value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
	2846	strsrch->search->elementComparisonType = (int16_t)value;
	2847	} else {
	2848	strsrch->search->elementComparisonType = 0;
	2849	}
	2850	break;
	2851	case USEARCH_ATTRIBUTE_COUNT :
	2852	default:
	2853	*status = U_ILLEGAL_ARGUMENT_ERROR;
	2854	}
	2855	}
	2856	if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
	2857	*status = U_ILLEGAL_ARGUMENT_ERROR;
	2858	}
	2859	}
	2860
	2861	U_CAPI USearchAttributeValue U_EXPORT2 usearch_getAttribute(
	2862	const UStringSearch *strsrch,
	2863	USearchAttribute attribute)
	2864	{
	2865	if (strsrch) {
	2866	switch (attribute) {
	2867	case USEARCH_OVERLAP :
	2868	return (strsrch->search->isOverlap == TRUE ? USEARCH_ON :
	2869	USEARCH_OFF);
	2870	case USEARCH_CANONICAL_MATCH :
	2871	return (strsrch->search->isCanonicalMatch == TRUE ? USEARCH_ON :
	2872	USEARCH_OFF);
	2873	case USEARCH_ELEMENT_COMPARISON :
	2874	{
	2875	int16_t value = strsrch->search->elementComparisonType;
	2876	if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD \|\| value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
	2877	return (USearchAttributeValue)value;
	2878	} else {
	2879	return USEARCH_STANDARD_ELEMENT_COMPARISON;
	2880	}
	2881	}
	2882	case USEARCH_ATTRIBUTE_COUNT :
	2883	return USEARCH_DEFAULT;
	2884	}
	2885	}
	2886	return USEARCH_DEFAULT;
	2887	}
	2888
	2889	U_CAPI int32_t U_EXPORT2 usearch_getMatchedStart(
	2890	const UStringSearch *strsrch)
	2891	{
	2892	if (strsrch == NULL) {
	2893	return USEARCH_DONE;
	2894	}
	2895	return strsrch->search->matchedIndex;
	2896	}
	2897
	2898
	2899	U_CAPI int32_t U_EXPORT2 usearch_getMatchedText(const UStringSearch *strsrch,
	2900	UChar *result,
	2901	int32_t resultCapacity,
	2902	UErrorCode *status)
	2903	{
	2904	if (U_FAILURE(*status)) {
	2905	return USEARCH_DONE;
	2906	}
	2907	if (strsrch == NULL \|\| resultCapacity < 0 \|\| (resultCapacity > 0 &&
	2908	result == NULL)) {
	2909	*status = U_ILLEGAL_ARGUMENT_ERROR;
	2910	return USEARCH_DONE;
	2911	}
	2912
	2913	int32_t copylength = strsrch->search->matchedLength;
	2914	int32_t copyindex = strsrch->search->matchedIndex;
	2915	if (copyindex == USEARCH_DONE) {
	2916	u_terminateUChars(result, resultCapacity, 0, status);
	2917	return USEARCH_DONE;
	2918	}
	2919
	2920	if (resultCapacity < copylength) {
	2921	copylength = resultCapacity;
	2922	}
	2923	if (copylength > 0) {
	2924	uprv_memcpy(result, strsrch->search->text + copyindex,
	2925	copylength * sizeof(UChar));
	2926	}
	2927	return u_terminateUChars(result, resultCapacity,
	2928	strsrch->search->matchedLength, status);
	2929	}
	2930
	2931	U_CAPI int32_t U_EXPORT2 usearch_getMatchedLength(
	2932	const UStringSearch *strsrch)
	2933	{
	2934	if (strsrch) {
	2935	return strsrch->search->matchedLength;
	2936	}
	2937	return USEARCH_DONE;
	2938	}
	2939
	2940	#if !UCONFIG_NO_BREAK_ITERATION
	2941
	2942	U_CAPI void U_EXPORT2 usearch_setBreakIterator(UStringSearch *strsrch,
	2943	UBreakIterator *breakiter,
	2944	UErrorCode *status)
	2945	{
	2946	if (U_SUCCESS(*status) && strsrch) {
	2947	strsrch->search->breakIter = breakiter;
	2948	if (breakiter) {
	2949	ubrk_setText(breakiter, strsrch->search->text,
	2950	strsrch->search->textLength, status);
	2951	}
	2952	}
	2953	}
	2954
	2955	U_CAPI const UBreakIterator* U_EXPORT2
	2956	usearch_getBreakIterator(const UStringSearch *strsrch)
	2957	{
	2958	if (strsrch) {
	2959	return strsrch->search->breakIter;
	2960	}
	2961	return NULL;
	2962	}
	2963
	2964	#endif
	2965
	2966	U_CAPI void U_EXPORT2 usearch_setText( UStringSearch *strsrch,
	2967	const UChar *text,
	2968	int32_t textlength,
	2969	UErrorCode *status)
	2970	{
	2971	if (U_SUCCESS(*status)) {
	2972	if (strsrch == NULL \|\| text == NULL \|\| textlength < -1 \|\|
	2973	textlength == 0) {
	2974	*status = U_ILLEGAL_ARGUMENT_ERROR;
	2975	}
	2976	else {
	2977	if (textlength == -1) {
	2978	textlength = u_strlen(text);
	2979	}
	2980	strsrch->search->text = text;
	2981	strsrch->search->textLength = textlength;
	2982	ucol_setText(strsrch->textIter, text, textlength, status);
	2983	strsrch->search->matchedIndex = USEARCH_DONE;
	2984	strsrch->search->matchedLength = 0;
	2985	strsrch->search->reset = TRUE;
	2986	#if !UCONFIG_NO_BREAK_ITERATION
	2987	if (strsrch->search->breakIter != NULL) {
	2988	ubrk_setText(strsrch->search->breakIter, text,
	2989	textlength, status);
	2990	}
	2991	ubrk_setText(strsrch->search->internalBreakIter, text, textlength, status);
	2992	#endif
	2993	}
	2994	}
	2995	}
	2996
	2997	U_CAPI const UChar * U_EXPORT2 usearch_getText(const UStringSearch *strsrch,
	2998	int32_t *length)
	2999	{
	3000	if (strsrch) {
	3001	*length = strsrch->search->textLength;
	3002	return strsrch->search->text;
	3003	}
	3004	return NULL;
	3005	}
	3006
	3007	U_CAPI void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch,
	3008	const UCollator *collator,
	3009	UErrorCode *status)
	3010	{
	3011	if (U_SUCCESS(*status)) {
	3012	if (collator == NULL) {
	3013	*status = U_ILLEGAL_ARGUMENT_ERROR;
	3014	return;
	3015	}
	3016
	3017	if (strsrch) {
	3018	delete strsrch->textProcessedIter;
	3019	strsrch->textProcessedIter = NULL;
	3020	ucol_closeElements(strsrch->textIter);
	3021	ucol_closeElements(strsrch->utilIter);
	3022	strsrch->textIter = strsrch->utilIter = NULL;
	3023	if (strsrch->ownCollator && (strsrch->collator != collator)) {
	3024	ucol_close((UCollator *)strsrch->collator);
	3025	strsrch->ownCollator = FALSE;
	3026	}
	3027	strsrch->collator = collator;
	3028	strsrch->strength = ucol_getStrength(collator);
	3029	strsrch->ceMask = getMask(strsrch->strength);
	3030	#if !UCONFIG_NO_BREAK_ITERATION
	3031	ubrk_close(strsrch->search->internalBreakIter);
	3032	strsrch->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(collator, ULOC_VALID_LOCALE, status),
	3033	strsrch->search->text, strsrch->search->textLength, status);
	3034	#endif
	3035	// if status is a failure, ucol_getAttribute returns UCOL_DEFAULT
	3036	strsrch->toShift =
	3037	ucol_getAttribute(collator, UCOL_ALTERNATE_HANDLING, status) ==
	3038	UCOL_SHIFTED;
	3039	// if status is a failure, ucol_getVariableTop returns 0
	3040	strsrch->variableTop = ucol_getVariableTop(collator, status);
	3041	strsrch->textIter = ucol_openElements(collator,
	3042	strsrch->search->text,
	3043	strsrch->search->textLength,
	3044	status);
	3045	strsrch->utilIter = ucol_openElements(
	3046	collator, strsrch->pattern.text, strsrch->pattern.textLength, status);
	3047	// initialize() _after_ setting the iterators for the new collator.
	3048	initialize(strsrch, status);
	3049	}
	3050
	3051	// **** are these calls needed?
	3052	// **** we call uprv_init_pce in initializePatternPCETable
	3053	// **** and the CEIBuffer constructor...
	3054	#if 0
	3055	uprv_init_pce(strsrch->textIter);
	3056	uprv_init_pce(strsrch->utilIter);
	3057	#endif
	3058	}
	3059	}
	3060
	3061	U_CAPI UCollator * U_EXPORT2 usearch_getCollator(const UStringSearch *strsrch)
	3062	{
	3063	if (strsrch) {
	3064	return (UCollator *)strsrch->collator;
	3065	}
	3066	return NULL;
	3067	}
	3068
	3069	U_CAPI void U_EXPORT2 usearch_setPattern( UStringSearch *strsrch,
	3070	const UChar *pattern,
	3071	int32_t patternlength,
	3072	UErrorCode *status)
	3073	{
	3074	if (U_SUCCESS(*status)) {
	3075	if (strsrch == NULL \|\| pattern == NULL) {
	3076	*status = U_ILLEGAL_ARGUMENT_ERROR;
	3077	}
	3078	else {
	3079	if (patternlength == -1) {
	3080	patternlength = u_strlen(pattern);
	3081	}
	3082	if (patternlength == 0) {
	3083	*status = U_ILLEGAL_ARGUMENT_ERROR;
	3084	return;
	3085	}
	3086	strsrch->pattern.text = pattern;
	3087	strsrch->pattern.textLength = patternlength;
	3088	initialize(strsrch, status);
	3089	}
	3090	}
	3091	}
	3092
	3093	U_CAPI const UChar* U_EXPORT2
	3094	usearch_getPattern(const UStringSearch *strsrch,
	3095	int32_t *length)
	3096	{
	3097	if (strsrch) {
	3098	*length = strsrch->pattern.textLength;
	3099	return strsrch->pattern.text;
	3100	}
	3101	return NULL;
	3102	}
	3103
	3104	// miscellanous methods --------------------------------------------------
	3105
	3106	U_CAPI int32_t U_EXPORT2 usearch_first(UStringSearch *strsrch,
	3107	UErrorCode *status)
	3108	{
	3109	if (strsrch && U_SUCCESS(*status)) {
	3110	strsrch->search->isForwardSearching = TRUE;
	3111	usearch_setOffset(strsrch, 0, status);
	3112	if (U_SUCCESS(*status)) {
	3113	return usearch_next(strsrch, status);
	3114	}
	3115	}
	3116	return USEARCH_DONE;
	3117	}
	3118
	3119	U_CAPI int32_t U_EXPORT2 usearch_following(UStringSearch *strsrch,
	3120	int32_t position,
	3121	UErrorCode *status)
	3122	{
	3123	if (strsrch && U_SUCCESS(*status)) {
	3124	strsrch->search->isForwardSearching = TRUE;
	3125	// position checked in usearch_setOffset
	3126	usearch_setOffset(strsrch, position, status);
	3127	if (U_SUCCESS(*status)) {
	3128	return usearch_next(strsrch, status);
	3129	}
	3130	}
	3131	return USEARCH_DONE;
	3132	}
	3133
	3134	U_CAPI int32_t U_EXPORT2 usearch_last(UStringSearch *strsrch,
	3135	UErrorCode *status)
	3136	{
	3137	if (strsrch && U_SUCCESS(*status)) {
	3138	strsrch->search->isForwardSearching = FALSE;
	3139	usearch_setOffset(strsrch, strsrch->search->textLength, status);
	3140	if (U_SUCCESS(*status)) {
	3141	return usearch_previous(strsrch, status);
	3142	}
	3143	}
	3144	return USEARCH_DONE;
	3145	}
	3146
	3147	U_CAPI int32_t U_EXPORT2 usearch_preceding(UStringSearch *strsrch,
	3148	int32_t position,
	3149	UErrorCode *status)
	3150	{
	3151	if (strsrch && U_SUCCESS(*status)) {
	3152	strsrch->search->isForwardSearching = FALSE;
	3153	// position checked in usearch_setOffset
	3154	usearch_setOffset(strsrch, position, status);
	3155	if (U_SUCCESS(*status)) {
	3156	return usearch_previous(strsrch, status);
	3157	}
	3158	}
	3159	return USEARCH_DONE;
	3160	}
	3161
	3162	/**
	3163	* If a direction switch is required, we'll count the number of ces till the
	3164	* beginning of the collation element iterator and iterate forwards that
	3165	* number of times. This is so that we get to the correct point within the
	3166	* string to continue the search in. Imagine when we are in the middle of the
	3167	* normalization buffer when the change in direction is request. arrrgghh....
	3168	* After searching the offset within the collation element iterator will be
	3169	* shifted to the start of the match. If a match is not found, the offset would
	3170	* have been set to the end of the text string in the collation element
	3171	* iterator.
	3172	* Okay, here's my take on normalization buffer. The only time when there can
	3173	* be 2 matches within the same normalization is when the pattern is consists
	3174	* of all accents. But since the offset returned is from the text string, we
	3175	* should not confuse the caller by returning the second match within the
	3176	* same normalization buffer. If we do, the 2 results will have the same match
	3177	* offsets, and that'll be confusing. I'll return the next match that doesn't
	3178	* fall within the same normalization buffer. Note this does not affect the
	3179	* results of matches spanning the text and the normalization buffer.
	3180	* The position to start searching is taken from the collation element
	3181	* iterator. Callers of this API would have to set the offset in the collation
	3182	* element iterator before using this method.
	3183	*/
	3184	U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch,
	3185	UErrorCode *status)
	3186	{
	3187	if (U_SUCCESS(*status) && strsrch) {
	3188	// note offset is either equivalent to the start of the previous match
	3189	// or is set by the user
	3190	int32_t offset = usearch_getOffset(strsrch);
	3191	USearch *search = strsrch->search;
	3192	search->reset = FALSE;
	3193	int32_t textlength = search->textLength;
	3194	if (search->isForwardSearching) {
	3195	#if BOYER_MOORE
	3196	if (offset == textlength
	3197	\|\| (!search->isOverlap &&
	3198	(offset + strsrch->pattern.defaultShiftSize > textlength \|\|
	3199	(search->matchedIndex != USEARCH_DONE &&
	3200	offset + search->matchedLength >= textlength)))) {
	3201	// not enough characters to match
	3202	setMatchNotFound(strsrch);
	3203	return USEARCH_DONE;
	3204	}
	3205	#else
	3206	if (offset == textlength \|\|
	3207	(! search->isOverlap &&
	3208	(search->matchedIndex != USEARCH_DONE &&
	3209	offset + search->matchedLength > textlength))) {
	3210	// not enough characters to match
	3211	setMatchNotFound(strsrch);
	3212	return USEARCH_DONE;
	3213	}
	3214	#endif
	3215	}
	3216	else {
	3217	// switching direction.
	3218	// if matchedIndex == USEARCH_DONE, it means that either a
	3219	// setOffset has been called or that previous ran off the text
	3220	// string. the iterator would have been set to offset 0 if a
	3221	// match is not found.
	3222	search->isForwardSearching = TRUE;
	3223	if (search->matchedIndex != USEARCH_DONE) {
	3224	// there's no need to set the collation element iterator
	3225	// the next call to next will set the offset.
	3226	return search->matchedIndex;
	3227	}
	3228	}
	3229
	3230	if (U_SUCCESS(*status)) {
	3231	if (strsrch->pattern.cesLength == 0) {
	3232	if (search->matchedIndex == USEARCH_DONE) {
	3233	search->matchedIndex = offset;
	3234	}
	3235	else { // moves by codepoints
	3236	U16_FWD_1(search->text, search->matchedIndex, textlength);
	3237	}
	3238
	3239	search->matchedLength = 0;
	3240	setColEIterOffset(strsrch->textIter, search->matchedIndex);
	3241	// status checked below
	3242	if (search->matchedIndex == textlength) {
	3243	search->matchedIndex = USEARCH_DONE;
	3244	}
	3245	}
	3246	else {
	3247	if (search->matchedLength > 0) {
	3248	// if matchlength is 0 we are at the start of the iteration
	3249	if (search->isOverlap) {
	3250	ucol_setOffset(strsrch->textIter, offset + 1, status);
	3251	}
	3252	else {
	3253	ucol_setOffset(strsrch->textIter,
	3254	offset + search->matchedLength, status);
	3255	}
	3256	}
	3257	else {
	3258	// for boundary check purposes. this will ensure that the
	3259	// next match will not preceed the current offset
	3260	// note search->matchedIndex will always be set to something
	3261	// in the code
	3262	search->matchedIndex = offset - 1;
	3263	}
	3264
	3265	if (search->isCanonicalMatch) {
	3266	// can't use exact here since extra accents are allowed.
	3267	usearch_handleNextCanonical(strsrch, status);
	3268	}
	3269	else {
	3270	usearch_handleNextExact(strsrch, status);
	3271	}
	3272	}
	3273
	3274	if (U_FAILURE(*status)) {
	3275	return USEARCH_DONE;
	3276	}
	3277
	3278	#if !BOYER_MOORE
	3279	if (search->matchedIndex == USEARCH_DONE) {
	3280	ucol_setOffset(strsrch->textIter, search->textLength, status);
	3281	} else {
	3282	ucol_setOffset(strsrch->textIter, search->matchedIndex, status);
	3283	}
	3284	#endif
	3285
	3286	return search->matchedIndex;
	3287	}
	3288	}
	3289	return USEARCH_DONE;
	3290	}
	3291
	3292	U_CAPI int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch,
	3293	UErrorCode *status)
	3294	{
	3295	if (U_SUCCESS(*status) && strsrch) {
	3296	int32_t offset;
	3297	USearch *search = strsrch->search;
	3298	if (search->reset) {
	3299	offset = search->textLength;
	3300	search->isForwardSearching = FALSE;
	3301	search->reset = FALSE;
	3302	setColEIterOffset(strsrch->textIter, offset);
	3303	}
	3304	else {
	3305	offset = usearch_getOffset(strsrch);
	3306	}
	3307
	3308	int32_t matchedindex = search->matchedIndex;
	3309	if (search->isForwardSearching == TRUE) {
	3310	// switching direction.
	3311	// if matchedIndex == USEARCH_DONE, it means that either a
	3312	// setOffset has been called or that next ran off the text
	3313	// string. the iterator would have been set to offset textLength if
	3314	// a match is not found.
	3315	search->isForwardSearching = FALSE;
	3316	if (matchedindex != USEARCH_DONE) {
	3317	return matchedindex;
	3318	}
	3319	}
	3320	else {
	3321	#if BOYER_MOORE
	3322	if (offset == 0 \|\| matchedindex == 0 \|\|
	3323	(!search->isOverlap &&
	3324	(offset < strsrch->pattern.defaultShiftSize \|\|
	3325	(matchedindex != USEARCH_DONE &&
	3326	matchedindex < strsrch->pattern.defaultShiftSize)))) {
	3327	// not enough characters to match
	3328	setMatchNotFound(strsrch);
	3329	return USEARCH_DONE;
	3330	}
	3331	#else
	3332	// Could check pattern length, but the
	3333	// linear search will do the right thing
	3334	if (offset == 0 \|\| matchedindex == 0) {
	3335	setMatchNotFound(strsrch);
	3336	return USEARCH_DONE;
	3337	}
	3338	#endif
	3339	}
	3340
	3341	if (U_SUCCESS(*status)) {
	3342	if (strsrch->pattern.cesLength == 0) {
	3343	search->matchedIndex =
	3344	(matchedindex == USEARCH_DONE ? offset : matchedindex);
	3345	if (search->matchedIndex == 0) {
	3346	setMatchNotFound(strsrch);
	3347	// status checked below
	3348	}
	3349	else { // move by codepoints
	3350	U16_BACK_1(search->text, 0, search->matchedIndex);
	3351	setColEIterOffset(strsrch->textIter, search->matchedIndex);
	3352	// status checked below
	3353	search->matchedLength = 0;
	3354	}
	3355	}
	3356	else {
	3357	if (strsrch->search->isCanonicalMatch) {
	3358	// can't use exact here since extra accents are allowed.
	3359	usearch_handlePreviousCanonical(strsrch, status);
	3360	// status checked below
	3361	}
	3362	else {
	3363	usearch_handlePreviousExact(strsrch, status);
	3364	// status checked below
	3365	}
	3366	}
	3367
	3368	if (U_FAILURE(*status)) {
	3369	return USEARCH_DONE;
	3370	}
	3371
	3372	return search->matchedIndex;
	3373	}
	3374	}
	3375	return USEARCH_DONE;
	3376	}
	3377
	3378
	3379
	3380	U_CAPI void U_EXPORT2 usearch_reset(UStringSearch *strsrch)
	3381	{
	3382	/*
	3383	reset is setting the attributes that are already in
	3384	string search, hence all attributes in the collator should
	3385	be retrieved without any problems
	3386	*/
	3387	if (strsrch) {
	3388	UErrorCode status = U_ZERO_ERROR;
	3389	UBool sameCollAttribute = TRUE;
	3390	uint32_t ceMask;
	3391	UBool shift;
	3392	uint32_t varTop;
	3393
	3394	// ** hack to deal w/ how processed CEs encode quaternary **
	3395	UCollationStrength newStrength = ucol_getStrength(strsrch->collator);
	3396	if ((strsrch->strength < UCOL_QUATERNARY && newStrength >= UCOL_QUATERNARY) \|\|
	3397	(strsrch->strength >= UCOL_QUATERNARY && newStrength < UCOL_QUATERNARY)) {
	3398	sameCollAttribute = FALSE;
	3399	}
	3400
	3401	strsrch->strength = ucol_getStrength(strsrch->collator);
	3402	ceMask = getMask(strsrch->strength);
	3403	if (strsrch->ceMask != ceMask) {
	3404	strsrch->ceMask = ceMask;
	3405	sameCollAttribute = FALSE;
	3406	}
	3407
	3408	// if status is a failure, ucol_getAttribute returns UCOL_DEFAULT
	3409	shift = ucol_getAttribute(strsrch->collator, UCOL_ALTERNATE_HANDLING,
	3410	&status) == UCOL_SHIFTED;
	3411	if (strsrch->toShift != shift) {
	3412	strsrch->toShift = shift;
	3413	sameCollAttribute = FALSE;
	3414	}
	3415
	3416	// if status is a failure, ucol_getVariableTop returns 0
	3417	varTop = ucol_getVariableTop(strsrch->collator, &status);
	3418	if (strsrch->variableTop != varTop) {
	3419	strsrch->variableTop = varTop;
	3420	sameCollAttribute = FALSE;
	3421	}
	3422	if (!sameCollAttribute) {
	3423	initialize(strsrch, &status);
	3424	}
	3425	ucol_setText(strsrch->textIter, strsrch->search->text,
	3426	strsrch->search->textLength,
	3427	&status);
	3428	strsrch->search->matchedLength = 0;
	3429	strsrch->search->matchedIndex = USEARCH_DONE;
	3430	strsrch->search->isOverlap = FALSE;
	3431	strsrch->search->isCanonicalMatch = FALSE;
	3432	strsrch->search->elementComparisonType = 0;
	3433	strsrch->search->isForwardSearching = TRUE;
	3434	strsrch->search->reset = TRUE;
	3435	}
	3436	}
	3437
	3438	//
	3439	// CEI Collation Element + source text index.
	3440	// These structs are kept in the circular buffer.
	3441	//
	3442	struct CEI {
	3443	int64_t ce;
	3444	int32_t lowIndex;
	3445	int32_t highIndex;
	3446	};
	3447
	3448	U_NAMESPACE_BEGIN
	3449
	3450	namespace {
	3451	//
	3452	// CEIBuffer A circular buffer of CEs-with-index from the text being searched.
	3453	//
	3454	#define DEFAULT_CEBUFFER_SIZE 96
	3455	#define CEBUFFER_EXTRA 32
	3456	// Some typical max values to make buffer size more reasonable for asymmetric search.
	3457	// #8694 is for a better long-term solution to allocation of this buffer.
	3458	#define MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L 8
	3459	#define MAX_TARGET_IGNORABLES_PER_PAT_OTHER 3
	3460	#define MIGHT_BE_JAMO_L(c) ((c >= 0x1100 && c <= 0x115E) \|\| (c >= 0x3131 && c <= 0x314E) \|\| (c >= 0x3165 && c <= 0x3186))
	3461	struct CEIBuffer {
	3462	CEI defBuf[DEFAULT_CEBUFFER_SIZE];
	3463	CEI *buf;
	3464	int32_t bufSize;
	3465	int32_t firstIx;
	3466	int32_t limitIx;
	3467	UCollationElements *ceIter;
	3468	UStringSearch *strSearch;
	3469
	3470
	3471
	3472	CEIBuffer(UStringSearch ss, UErrorCode status);
	3473	~CEIBuffer();
	3474	const CEI *get(int32_t index);
	3475	const CEI *getPrevious(int32_t index);
	3476	};
	3477
	3478
	3479	CEIBuffer::CEIBuffer(UStringSearch ss, UErrorCode status) {
	3480	buf = defBuf;
	3481	strSearch = ss;
	3482	bufSize = ss->pattern.pcesLength + CEBUFFER_EXTRA;
	3483	if (ss->search->elementComparisonType != 0) {
	3484	const UChar * patText = ss->pattern.text;
	3485	if (patText) {
	3486	const UChar * patTextLimit = patText + ss->pattern.textLength;
	3487	while ( patText < patTextLimit ) {
	3488	UChar c = *patText++;
	3489	if (MIGHT_BE_JAMO_L(c)) {
	3490	bufSize += MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L;
	3491	} else {
	3492	// No check for surrogates, we might allocate slightly more buffer than necessary.
	3493	bufSize += MAX_TARGET_IGNORABLES_PER_PAT_OTHER;
	3494	}
	3495	}
	3496	}
	3497	}
	3498	ceIter = ss->textIter;
	3499	firstIx = 0;
	3500	limitIx = 0;
	3501
	3502	if (!initTextProcessedIter(ss, status)) { return; }
	3503
	3504	if (bufSize>DEFAULT_CEBUFFER_SIZE) {
	3505	buf = (CEI )uprv_malloc(bufSize sizeof(CEI));
	3506	if (buf == NULL) {
	3507	*status = U_MEMORY_ALLOCATION_ERROR;
	3508	}
	3509	}
	3510	}
	3511
	3512	// TODO: add a reset or init function so that allocated
	3513	// buffers can be retained & reused.
	3514
	3515	CEIBuffer::~CEIBuffer() {
	3516	if (buf != defBuf) {
	3517	uprv_free(buf);
	3518	}
	3519	}
	3520
	3521
	3522	// Get the CE with the specified index.
	3523	// Index must be in the range
	3524	// n-history_size < index < n+1
	3525	// where n is the largest index to have been fetched by some previous call to this function.
	3526	// The CE value will be UCOL__PROCESSED_NULLORDER at end of input.
	3527	//
	3528	const CEI *CEIBuffer::get(int32_t index) {
	3529	int i = index % bufSize;
	3530
	3531	if (index>=firstIx && index<limitIx) {
	3532	// The request was for an entry already in our buffer.
	3533	// Just return it.
	3534	return &buf[i];
	3535	}
	3536
	3537	// Caller is requesting a new, never accessed before, CE.
	3538	// Verify that it is the next one in sequence, which is all
	3539	// that is allowed.
	3540	if (index != limitIx) {
	3541	U_ASSERT(FALSE);
	3542
	3543	return NULL;
	3544	}
	3545
	3546	// Manage the circular CE buffer indexing
	3547	limitIx++;
	3548
	3549	if (limitIx - firstIx >= bufSize) {
	3550	// The buffer is full, knock out the lowest-indexed entry.
	3551	firstIx++;
	3552	}
	3553
	3554	UErrorCode status = U_ZERO_ERROR;
	3555
	3556	buf[i].ce = strSearch->textProcessedIter->nextProcessed(&buf[i].lowIndex, &buf[i].highIndex, &status);
	3557
	3558	return &buf[i];
	3559	}
	3560
	3561	// Get the CE with the specified index.
	3562	// Index must be in the range
	3563	// n-history_size < index < n+1
	3564	// where n is the largest index to have been fetched by some previous call to this function.
	3565	// The CE value will be UCOL__PROCESSED_NULLORDER at end of input.
	3566	//
	3567	const CEI *CEIBuffer::getPrevious(int32_t index) {
	3568	int i = index % bufSize;
	3569
	3570	if (index>=firstIx && index<limitIx) {
	3571	// The request was for an entry already in our buffer.
	3572	// Just return it.
	3573	return &buf[i];
	3574	}
	3575
	3576	// Caller is requesting a new, never accessed before, CE.
	3577	// Verify that it is the next one in sequence, which is all
	3578	// that is allowed.
	3579	if (index != limitIx) {
	3580	U_ASSERT(FALSE);
	3581
	3582	return NULL;
	3583	}
	3584
	3585	// Manage the circular CE buffer indexing
	3586	limitIx++;
	3587
	3588	if (limitIx - firstIx >= bufSize) {
	3589	// The buffer is full, knock out the lowest-indexed entry.
	3590	firstIx++;
	3591	}
	3592
	3593	UErrorCode status = U_ZERO_ERROR;
	3594
	3595	buf[i].ce = strSearch->textProcessedIter->previousProcessed(&buf[i].lowIndex, &buf[i].highIndex, &status);
	3596
	3597	return &buf[i];
	3598	}
	3599
	3600	}
	3601
	3602	U_NAMESPACE_END
	3603
	3604
	3605	// #define USEARCH_DEBUG
	3606
	3607	#ifdef USEARCH_DEBUG
	3608	#include <stdio.h>
	3609	#include <stdlib.h>
	3610	#endif
	3611
	3612	/*
	3613	* Find the next break boundary after startIndex. If the UStringSearch object
	3614	* has an external break iterator, use that. Otherwise use the internal character
	3615	* break iterator.
	3616	*/
	3617	static int32_t nextBoundaryAfter(UStringSearch *strsrch, int32_t startIndex) {
	3618	#if 0
	3619	const UChar *text = strsrch->search->text;
	3620	int32_t textLen = strsrch->search->textLength;
	3621
	3622	U_ASSERT(startIndex>=0);
	3623	U_ASSERT(startIndex<=textLen);
	3624
	3625	if (startIndex >= textLen) {
	3626	return startIndex;
	3627	}
	3628
	3629	UChar32 c;
	3630	int32_t i = startIndex;
	3631	U16_NEXT(text, i, textLen, c);
	3632
	3633	// If we are on a control character, stop without looking for combining marks.
	3634	// Control characters do not combine.
	3635	int32_t gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
	3636	if (gcProperty==U_GCB_CONTROL \|\| gcProperty==U_GCB_LF \|\| gcProperty==U_GCB_CR) {
	3637	return i;
	3638	}
	3639
	3640	// The initial character was not a control, and can thus accept trailing
	3641	// combining characters. Advance over however many of them there are.
	3642	int32_t indexOfLastCharChecked;
	3643	for (;;) {
	3644	indexOfLastCharChecked = i;
	3645	if (i>=textLen) {
	3646	break;
	3647	}
	3648	U16_NEXT(text, i, textLen, c);
	3649	gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
	3650	if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) {
	3651	break;
	3652	}
	3653	}
	3654	return indexOfLastCharChecked;
	3655	#elif !UCONFIG_NO_BREAK_ITERATION
	3656	UBreakIterator *breakiterator = strsrch->search->breakIter;
	3657
	3658	if (breakiterator == NULL) {
	3659	breakiterator = strsrch->search->internalBreakIter;
	3660	}
	3661
	3662	if (breakiterator != NULL) {
	3663	return ubrk_following(breakiterator, startIndex);
	3664	}
	3665
	3666	return startIndex;
	3667	#else
	3668	// ** or should we use the original code? **
	3669	return startIndex;
	3670	#endif
	3671
	3672	}
	3673
	3674	/*
	3675	* Returns TRUE if index is on a break boundary. If the UStringSearch
	3676	* has an external break iterator, test using that, otherwise test
	3677	* using the internal character break iterator.
	3678	*/
	3679	static UBool isBreakBoundary(UStringSearch *strsrch, int32_t index) {
	3680	#if 0
	3681	const UChar *text = strsrch->search->text;
	3682	int32_t textLen = strsrch->search->textLength;
	3683
	3684	U_ASSERT(index>=0);
	3685	U_ASSERT(index<=textLen);
	3686
	3687	if (index>=textLen \|\| index<=0) {
	3688	return TRUE;
	3689	}
	3690
	3691	// If the character at the current index is not a GRAPHEME_EXTEND
	3692	// then we can not be within a combining sequence.
	3693	UChar32 c;
	3694	U16_GET(text, 0, index, textLen, c);
	3695	int32_t gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
	3696	if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) {
	3697	return TRUE;
	3698	}
	3699
	3700	// We are at a combining mark. If the preceding character is anything
	3701	// except a CONTROL, CR or LF, we are in a combining sequence.
	3702	U16_PREV(text, 0, index, c);
	3703	gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
	3704	UBool combining = !(gcProperty==U_GCB_CONTROL \|\| gcProperty==U_GCB_LF \|\| gcProperty==U_GCB_CR);
	3705	return !combining;
	3706	#elif !UCONFIG_NO_BREAK_ITERATION
	3707	UBreakIterator *breakiterator = strsrch->search->breakIter;
	3708
	3709	if (breakiterator == NULL) {
	3710	breakiterator = strsrch->search->internalBreakIter;
	3711	}
	3712
	3713	return (breakiterator != NULL && ubrk_isBoundary(breakiterator, index));
	3714	#else
	3715	// ** or use the original code? **
	3716	return TRUE;
	3717	#endif
	3718	}
	3719
	3720	#if 0
	3721	static UBool onBreakBoundaries(const UStringSearch *strsrch, int32_t start, int32_t end)
	3722	{
	3723	#if !UCONFIG_NO_BREAK_ITERATION
	3724	UBreakIterator *breakiterator = strsrch->search->breakIter;
	3725
	3726	if (breakiterator != NULL) {
	3727	int32_t startindex = ubrk_first(breakiterator);
	3728	int32_t endindex = ubrk_last(breakiterator);
	3729
	3730	// out-of-range indexes are never boundary positions
	3731	if (start < startindex \|\| start > endindex \|\|
	3732	end < startindex \|\| end > endindex) {
	3733	return FALSE;
	3734	}
	3735
	3736	return ubrk_isBoundary(breakiterator, start) &&
	3737	ubrk_isBoundary(breakiterator, end);
	3738	}
	3739	#endif
	3740
	3741	return TRUE;
	3742	}
	3743	#endif
	3744
	3745	typedef enum {
	3746	U_CE_MATCH = -1,
	3747	U_CE_NO_MATCH = 0,
	3748	U_CE_SKIP_TARG,
	3749	U_CE_SKIP_PATN
	3750	} UCompareCEsResult;
	3751	#define U_CE_LEVEL2_BASE 0x00000005
	3752	#define U_CE_LEVEL3_BASE 0x00050000
	3753
	3754	static UCompareCEsResult compareCE64s(int64_t targCE, int64_t patCE, int16_t compareType) {
	3755	if (targCE == patCE) {
	3756	return U_CE_MATCH;
	3757	}
	3758	if (compareType == 0) {
	3759	return U_CE_NO_MATCH;
	3760	}
	3761
	3762	int64_t targCEshifted = targCE >> 32;
	3763	int64_t patCEshifted = patCE >> 32;
	3764	int64_t mask;
	3765
	3766	mask = 0xFFFF0000;
	3767	int32_t targLev1 = (int32_t)(targCEshifted & mask);
	3768	int32_t patLev1 = (int32_t)(patCEshifted & mask);
	3769	if ( targLev1 != patLev1 ) {
	3770	if ( targLev1 == 0 ) {
	3771	return U_CE_SKIP_TARG;
	3772	}
	3773	if ( patLev1 == 0 && compareType == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD ) {
	3774	return U_CE_SKIP_PATN;
	3775	}
	3776	return U_CE_NO_MATCH;
	3777	}
	3778
	3779	mask = 0x0000FFFF;
	3780	int32_t targLev2 = (int32_t)(targCEshifted & mask);
	3781	int32_t patLev2 = (int32_t)(patCEshifted & mask);
	3782	if ( targLev2 != patLev2 ) {
	3783	if ( targLev2 == 0 ) {
	3784	return U_CE_SKIP_TARG;
	3785	}
	3786	if ( patLev2 == 0 && compareType == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD ) {
	3787	return U_CE_SKIP_PATN;
	3788	}
	3789	return (patLev2 == U_CE_LEVEL2_BASE \|\| (compareType == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD && targLev2 == U_CE_LEVEL2_BASE) )?
	3790	U_CE_MATCH: U_CE_NO_MATCH;
	3791	}
	3792
	3793	mask = 0xFFFF0000;
	3794	int32_t targLev3 = (int32_t)(targCE & mask);
	3795	int32_t patLev3 = (int32_t)(patCE & mask);
	3796	if ( targLev3 != patLev3 ) {
	3797	return (patLev3 == U_CE_LEVEL3_BASE \|\| (compareType == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD && targLev3 == U_CE_LEVEL3_BASE) )?
	3798	U_CE_MATCH: U_CE_NO_MATCH;
	3799	}
	3800
	3801	return U_CE_MATCH;
	3802	}
	3803
	3804	#if BOYER_MOORE
	3805	// TODO: #if BOYER_MOORE, need 32-bit version of compareCE64s
	3806	#endif
	3807
	3808	U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
	3809	int32_t startIdx,
	3810	int32_t *matchStart,
	3811	int32_t *matchLimit,
	3812	UErrorCode *status)
	3813	{
	3814	if (U_FAILURE(*status)) {
	3815	return FALSE;
	3816	}
	3817
	3818	// TODO: reject search patterns beginning with a combining char.
	3819
	3820	#ifdef USEARCH_DEBUG
	3821	if (getenv("USEARCH_DEBUG") != NULL) {
	3822	printf("Pattern CEs\n");
	3823	for (int ii=0; ii<strsrch->pattern.cesLength; ii++) {
	3824	printf(" %8x", strsrch->pattern.ces[ii]);
	3825	}
	3826	printf("\n");
	3827	}
	3828
	3829	#endif
	3830	// Input parameter sanity check.
	3831	// TODO: should input indicies clip to the text length
	3832	// in the same way that UText does.
	3833	if(strsrch->pattern.cesLength == 0 \|\|
	3834	startIdx < 0 \|\|
	3835	startIdx > strsrch->search->textLength \|\|
	3836	strsrch->pattern.ces == NULL) {
	3837	*status = U_ILLEGAL_ARGUMENT_ERROR;
	3838	return FALSE;
	3839	}
	3840
	3841	if (strsrch->pattern.pces == NULL) {
	3842	initializePatternPCETable(strsrch, status);
	3843	}
	3844
	3845	ucol_setOffset(strsrch->textIter, startIdx, status);
	3846	CEIBuffer ceb(strsrch, status);
	3847
	3848
	3849	int32_t targetIx = 0;
	3850	const CEI *targetCEI = NULL;
	3851	int32_t patIx;
	3852	UBool found;
	3853
	3854	int32_t mStart = -1;
	3855	int32_t mLimit = -1;
	3856	int32_t minLimit;
	3857	int32_t maxLimit;
	3858
	3859
	3860
	3861	// Outer loop moves over match starting positions in the
	3862	// target CE space.
	3863	// Here we see the target as a sequence of collation elements, resulting from the following:
	3864	// 1. Target characters were decomposed, and (if appropriate) other compressions and expansions are applied
	3865	// (for example, digraphs such as IJ may be broken into two characters).
	3866	// 2. An int64_t CE weight is determined for each resulting unit (high 16 bits are primary strength, next
	3867	// 16 bits are secondary, next 16 (the high 16 bits of the low 32-bit half) are tertiary. Any of these
	3868	// fields that are for strengths below that of the collator are set to 0. If this makes the int64_t
	3869	// CE weight 0 (as for a combining diacritic with secondary weight when the collator strentgh is primary),
	3870	// then the CE is deleted, so the following code sees only CEs that are relevant.
	3871	// For each CE, the lowIndex and highIndex correspond to where this CE begins and ends in the original text.
	3872	// If lowIndex==highIndex, either the CE resulted from an expansion/decomposition of one of the original text
	3873	// characters, or the CE marks the limit of the target text (in which case the CE weight is UCOL_PROCESSED_NULLORDER).
	3874	//
	3875	for(targetIx=0; ; targetIx++)
	3876	{
	3877	found = TRUE;
	3878	// Inner loop checks for a match beginning at each
	3879	// position from the outer loop.
	3880	int32_t targetIxOffset = 0;
	3881	int64_t patCE = 0;
	3882	// For targetIx > 0, this ceb.get gets a CE that is as far back in the ring buffer
	3883	// (compared to the last CE fetched for the previous targetIx value) as we need to go
	3884	// for this targetIx value, so if it is non-NULL then other ceb.get calls should be OK.
	3885	const CEI *firstCEI = ceb.get(targetIx);
	3886	if (firstCEI == NULL) {
	3887	*status = U_INTERNAL_PROGRAM_ERROR;
	3888	found = FALSE;
	3889	break;
	3890	}
	3891
	3892	for (patIx=0; patIx<strsrch->pattern.pcesLength; patIx++) {
	3893	patCE = strsrch->pattern.pces[patIx];
	3894	targetCEI = ceb.get(targetIx+patIx+targetIxOffset);
	3895	// Compare CE from target string with CE from the pattern.
	3896	// Note that the target CE will be UCOL_PROCESSED_NULLORDER if we reach the end of input,
	3897	// which will fail the compare, below.
	3898	UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsrch->search->elementComparisonType);
	3899	if ( ceMatch == U_CE_NO_MATCH ) {
	3900	found = FALSE;
	3901	break;
	3902	} else if ( ceMatch > U_CE_NO_MATCH ) {
	3903	if ( ceMatch == U_CE_SKIP_TARG ) {
	3904	// redo with same patCE, next targCE
	3905	patIx--;
	3906	targetIxOffset++;
	3907	} else { // ceMatch == U_CE_SKIP_PATN
	3908	// redo with same targCE, next patCE
	3909	targetIxOffset--;
	3910	}
	3911	}
	3912	}
	3913	targetIxOffset += strsrch->pattern.pcesLength; // this is now the offset in target CE space to end of the match so far
	3914
	3915	if (!found && ((targetCEI == NULL) \|\| (targetCEI->ce != UCOL_PROCESSED_NULLORDER))) {
	3916	// No match at this targetIx. Try again at the next.
	3917	continue;
	3918	}
	3919
	3920	if (!found) {
	3921	// No match at all, we have run off the end of the target text.
	3922	break;
	3923	}
	3924
	3925
	3926	// We have found a match in CE space.
	3927	// Now determine the bounds in string index space.
	3928	// There still is a chance of match failure if the CE range not correspond to
	3929	// an acceptable character range.
	3930	//
	3931	const CEI *lastCEI = ceb.get(targetIx + targetIxOffset - 1);
	3932
	3933	mStart = firstCEI->lowIndex;
	3934	minLimit = lastCEI->lowIndex;
	3935
	3936	// Look at the CE following the match. If it is UCOL_NULLORDER the match
	3937	// extended to the end of input, and the match is good.
	3938
	3939	// Look at the high and low indices of the CE following the match. If
	3940	// they are the same it means one of two things:
	3941	// 1. The match extended to the last CE from the target text, which is OK, or
	3942	// 2. The last CE that was part of the match is in an expansion that extends
	3943	// to the first CE after the match. In this case, we reject the match.
	3944	const CEI *nextCEI = 0;
	3945	if (strsrch->search->elementComparisonType == 0) {
	3946	nextCEI = ceb.get(targetIx + targetIxOffset);
	3947	maxLimit = nextCEI->lowIndex;
	3948	if (nextCEI->lowIndex == nextCEI->highIndex && nextCEI->ce != UCOL_PROCESSED_NULLORDER) {
	3949	found = FALSE;
	3950	}
	3951	} else {
	3952	for ( ; ; ++targetIxOffset ) {
	3953	nextCEI = ceb.get(targetIx + targetIxOffset);
	3954	maxLimit = nextCEI->lowIndex;
	3955	// If we are at the end of the target too, match succeeds
	3956	if ( nextCEI->ce == UCOL_PROCESSED_NULLORDER ) {
	3957	break;
	3958	}
	3959	// As long as the next CE has primary weight of 0,
	3960	// it is part of the last target element matched by the pattern;
	3961	// make sure it can be part of a match with the last patCE
	3962	if ( (((nextCEI->ce) >> 32) & 0xFFFF0000UL) == 0 ) {
	3963	UCompareCEsResult ceMatch = compareCE64s(nextCEI->ce, patCE, strsrch->search->elementComparisonType);
	3964	if ( ceMatch == U_CE_NO_MATCH \|\| ceMatch == U_CE_SKIP_PATN ) {
	3965	found = FALSE;
	3966	break;
	3967	}
	3968	// If lowIndex == highIndex, this target CE is part of an expansion of the last matched
	3969	// target element, but it has non-zero primary weight => match fails
	3970	} else if ( nextCEI->lowIndex == nextCEI->highIndex ) {
	3971	found = false;
	3972	break;
	3973	// Else the target CE is not part of an expansion of the last matched element, match succeeds
	3974	} else {
	3975	break;
	3976	}
	3977	}
	3978	}
	3979
	3980
	3981	// Check for the start of the match being within a combining sequence.
	3982	// This can happen if the pattern itself begins with a combining char, and
	3983	// the match found combining marks in the target text that were attached
	3984	// to something else.
	3985	// This type of match should be rejected for not completely consuming a
	3986	// combining sequence.
	3987	if (!isBreakBoundary(strsrch, mStart)) {
	3988	found = FALSE;
	3989	}
	3990
	3991	// Check for the start of the match being within an Collation Element Expansion,
	3992	// meaning that the first char of the match is only partially matched.
	3993	// With exapnsions, the first CE will report the index of the source
	3994	// character, and all subsequent (expansions) CEs will report the source index of the
	3995	// _following_ character.
	3996	int32_t secondIx = firstCEI->highIndex;
	3997	if (mStart == secondIx) {
	3998	found = FALSE;
	3999	}
	4000
	4001	// Advance the match end position to the first acceptable match boundary.
	4002	// This advances the index over any combining charcters.
	4003	mLimit = maxLimit;
	4004	if (minLimit < maxLimit) {
	4005	// When the last CE's low index is same with its high index, the CE is likely
	4006	// a part of expansion. In this case, the index is located just after the
	4007	// character corresponding to the CEs compared above. If the index is right
	4008	// at the break boundary, move the position to the next boundary will result
	4009	// incorrect match length when there are ignorable characters exist between
	4010	// the position and the next character produces CE(s). See ticket#8482.
	4011	if (minLimit == lastCEI->highIndex && isBreakBoundary(strsrch, minLimit)) {
	4012	mLimit = minLimit;
	4013	} else {
	4014	int32_t nba = nextBoundaryAfter(strsrch, minLimit);
	4015	if (nba >= lastCEI->highIndex) {
	4016	mLimit = nba;
	4017	}
	4018	}
	4019	}
	4020
	4021	#ifdef USEARCH_DEBUG
	4022	if (getenv("USEARCH_DEBUG") != NULL) {
	4023	printf("minLimit, maxLimit, mLimit = %d, %d, %d\n", minLimit, maxLimit, mLimit);
	4024	}
	4025	#endif
	4026
	4027	// If default breakIter is being used, and next collation element belonging to this
	4028	// combining sequence has non-zero primary weight and corresponds to a separate
	4029	// character following the one at end of the current match, then do NOT require
	4030	// that match end position be on a breakIter boundary, or that end of the
	4031	// combining sequence not extend beyond the match in CE space. Only do those
	4032	// tests if the conditions above are not met. Added this to make prefix search
	4033	// work in Indic scripts per <rdar://problem/18063262>.
	4034	UBool doLimitTests = !(strsrch->search->breakIter == NULL &&
	4035	nextCEI != NULL && (((nextCEI->ce) >> 32) & 0xFFFF0000UL) != 0 &&
	4036	nextCEI->lowIndex >= lastCEI->highIndex && nextCEI->highIndex > nextCEI->lowIndex);
	4037
	4038	if (doLimitTests) { // <rdar://problem/18063262>
	4039	// If advancing to the end of a combining sequence in character indexing space
	4040	// advanced us beyond the end of the match in CE space, reject this match.
	4041	if (mLimit > maxLimit) {
	4042	found = FALSE;
	4043	}
	4044
	4045	if (!isBreakBoundary(strsrch, mLimit)) {
	4046	found = FALSE;
	4047	}
	4048	}
	4049
	4050	if (! checkIdentical(strsrch, mStart, mLimit)) {
	4051	found = FALSE;
	4052	}
	4053
	4054	if (found) {
	4055	break;
	4056	}
	4057	}
	4058
	4059	#ifdef USEARCH_DEBUG
	4060	if (getenv("USEARCH_DEBUG") != NULL) {
	4061	printf("Target CEs [%d .. %d]\n", ceb.firstIx, ceb.limitIx);
	4062	int32_t lastToPrint = ceb.limitIx+2;
	4063	for (int ii=ceb.firstIx; ii<lastToPrint; ii++) {
	4064	printf("%8x@%d ", ceb.get(ii)->ce, ceb.get(ii)->srcIndex);
	4065	}
	4066	printf("\n%s\n", found? "match found" : "no match");
	4067	}
	4068	#endif
	4069
	4070	// All Done. Store back the match bounds to the caller.
	4071	//
	4072	if (found==FALSE) {
	4073	mLimit = -1;
	4074	mStart = -1;
	4075	}
	4076
	4077	if (matchStart != NULL) {
	4078	*matchStart= mStart;
	4079	}
	4080
	4081	if (matchLimit != NULL) {
	4082	*matchLimit = mLimit;
	4083	}
	4084
	4085	return found;
	4086	}
	4087
	4088	U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch,
	4089	int32_t startIdx,
	4090	int32_t *matchStart,
	4091	int32_t *matchLimit,
	4092	UErrorCode *status)
	4093	{
	4094	if (U_FAILURE(*status)) {
	4095	return FALSE;
	4096	}
	4097
	4098	// TODO: reject search patterns beginning with a combining char.
	4099
	4100	#ifdef USEARCH_DEBUG
	4101	if (getenv("USEARCH_DEBUG") != NULL) {
	4102	printf("Pattern CEs\n");
	4103	for (int ii=0; ii<strsrch->pattern.cesLength; ii++) {
	4104	printf(" %8x", strsrch->pattern.ces[ii]);
	4105	}
	4106	printf("\n");
	4107	}
	4108
	4109	#endif
	4110	// Input parameter sanity check.
	4111	// TODO: should input indicies clip to the text length
	4112	// in the same way that UText does.
	4113	if(strsrch->pattern.cesLength == 0 \|\|
	4114	startIdx < 0 \|\|
	4115	startIdx > strsrch->search->textLength \|\|
	4116	strsrch->pattern.ces == NULL) {
	4117	*status = U_ILLEGAL_ARGUMENT_ERROR;
	4118	return FALSE;
	4119	}
	4120
	4121	if (strsrch->pattern.pces == NULL) {
	4122	initializePatternPCETable(strsrch, status);
	4123	}
	4124
	4125	CEIBuffer ceb(strsrch, status);
	4126	int32_t targetIx = 0;
	4127
	4128	/*
	4129	* Pre-load the buffer with the CE's for the grapheme
	4130	* after our starting position so that we're sure that
	4131	* we can look at the CE following the match when we
	4132	* check the match boundaries.
	4133	*
	4134	* This will also pre-fetch the first CE that we'll
	4135	* consider for the match.
	4136	*/
	4137	if (startIdx < strsrch->search->textLength) {
	4138	UBreakIterator *bi = strsrch->search->internalBreakIter;
	4139	int32_t next = ubrk_following(bi, startIdx);
	4140
	4141	ucol_setOffset(strsrch->textIter, next, status);
	4142
	4143	for (targetIx = 0; ; targetIx += 1) {
	4144	if (ceb.getPrevious(targetIx)->lowIndex < startIdx) {
	4145	break;
	4146	}
	4147	}
	4148	} else {
	4149	ucol_setOffset(strsrch->textIter, startIdx, status);
	4150	}
	4151
	4152
	4153	const CEI *targetCEI = NULL;
	4154	int32_t patIx;
	4155	UBool found;
	4156
	4157	int32_t limitIx = targetIx;
	4158	int32_t mStart = -1;
	4159	int32_t mLimit = -1;
	4160	int32_t minLimit;
	4161	int32_t maxLimit;
	4162
	4163
	4164
	4165	// Outer loop moves over match starting positions in the
	4166	// target CE space.
	4167	// Here, targetIx values increase toward the beginning of the base text (i.e. we get the text CEs in reverse order).
	4168	// But patIx is 0 at the beginning of the pattern and increases toward the end.
	4169	// So this loop performs a comparison starting with the end of pattern, and prcessd toward the beginning of the pattern
	4170	// and the beginning of the base text.
	4171	for(targetIx = limitIx; ; targetIx += 1)
	4172	{
	4173	found = TRUE;
	4174	// For targetIx > limitIx, this ceb.getPrevious gets a CE that is as far back in the ring buffer
	4175	// (compared to the last CE fetched for the previous targetIx value) as we need to go
	4176	// for this targetIx value, so if it is non-NULL then other ceb.getPrevious calls should be OK.
	4177	const CEI *lastCEI = ceb.getPrevious(targetIx);
	4178	if (lastCEI == NULL) {
	4179	*status = U_INTERNAL_PROGRAM_ERROR;
	4180	found = FALSE;
	4181	break;
	4182	}
	4183	// Inner loop checks for a match beginning at each
	4184	// position from the outer loop.
	4185	int32_t targetIxOffset = 0;
	4186	for (patIx = strsrch->pattern.pcesLength - 1; patIx >= 0; patIx -= 1) {
	4187	int64_t patCE = strsrch->pattern.pces[patIx];
	4188
	4189	targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.pcesLength - 1 - patIx + targetIxOffset);
	4190	// Compare CE from target string with CE from the pattern.
	4191	// Note that the target CE will be UCOL_NULLORDER if we reach the end of input,
	4192	// which will fail the compare, below.
	4193	UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsrch->search->elementComparisonType);
	4194	if ( ceMatch == U_CE_NO_MATCH ) {
	4195	found = FALSE;
	4196	break;
	4197	} else if ( ceMatch > U_CE_NO_MATCH ) {
	4198	if ( ceMatch == U_CE_SKIP_TARG ) {
	4199	// redo with same patCE, next targCE
	4200	patIx++;
	4201	targetIxOffset++;
	4202	} else { // ceMatch == U_CE_SKIP_PATN
	4203	// redo with same targCE, next patCE
	4204	targetIxOffset--;
	4205	}
	4206	}
	4207	}
	4208
	4209	if (!found && ((targetCEI == NULL) \|\| (targetCEI->ce != UCOL_PROCESSED_NULLORDER))) {
	4210	// No match at this targetIx. Try again at the next.
	4211	continue;
	4212	}
	4213
	4214	if (!found) {
	4215	// No match at all, we have run off the end of the target text.
	4216	break;
	4217	}
	4218
	4219
	4220	// We have found a match in CE space.
	4221	// Now determine the bounds in string index space.
	4222	// There still is a chance of match failure if the CE range not correspond to
	4223	// an acceptable character range.
	4224	//
	4225	const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.pcesLength - 1 + targetIxOffset);
	4226	mStart = firstCEI->lowIndex;
	4227
	4228	// Check for the start of the match being within a combining sequence.
	4229	// This can happen if the pattern itself begins with a combining char, and
	4230	// the match found combining marks in the target text that were attached
	4231	// to something else.
	4232	// This type of match should be rejected for not completely consuming a
	4233	// combining sequence.
	4234	if (!isBreakBoundary(strsrch, mStart)) {
	4235	found = FALSE;
	4236	}
	4237
	4238	// Look at the high index of the first CE in the match. If it's the same as the
	4239	// low index, the first CE in the match is in the middle of an expansion.
	4240	if (mStart == firstCEI->highIndex) {
	4241	found = FALSE;
	4242	}
	4243
	4244
	4245	minLimit = lastCEI->lowIndex;
	4246
	4247	if (targetIx > 0) {
	4248	// Look at the CE following the match. If it is UCOL_NULLORDER the match
	4249	// extended to the end of input, and the match is good.
	4250
	4251	// Look at the high and low indices of the CE following the match. If
	4252	// they are the same it means one of two things:
	4253	// 1. The match extended to the last CE from the target text, which is OK, or
	4254	// 2. The last CE that was part of the match is in an expansion that extends
	4255	// to the first CE after the match. In this case, we reject the match.
	4256	const CEI *nextCEI = ceb.getPrevious(targetIx - 1);
	4257
	4258	if (nextCEI->lowIndex == nextCEI->highIndex && nextCEI->ce != UCOL_PROCESSED_NULLORDER) {
	4259	found = FALSE;
	4260	}
	4261
	4262	mLimit = maxLimit = nextCEI->lowIndex;
	4263
	4264	// Advance the match end position to the first acceptable match boundary.
	4265	// This advances the index over any combining characters.
	4266	if (minLimit < maxLimit) {
	4267	int32_t nba = nextBoundaryAfter(strsrch, minLimit);
	4268
	4269	if (nba >= lastCEI->highIndex) {
	4270	mLimit = nba;
	4271	}
	4272	}
	4273
	4274	// If default breakIter is being used, and next collation element belonging to this
	4275	// combining sequence has non-zero primary weight and corresponds to a separate
	4276	// character following the one at end of the current match, then do NOT require
	4277	// that match end position be on a breakIter boundary, or that end of the
	4278	// combining sequence not extend beyond the match in CE space. Only do those
	4279	// tests if the conditions above are not met. Added this to make prefix search
	4280	// work in Indic scripts per <rdar://problem/18063262>.
	4281	UBool doLimitTests = !(strsrch->search->breakIter == NULL &&
	4282	nextCEI != NULL && (((nextCEI->ce) >> 32) & 0xFFFF0000UL) != 0 &&
	4283	nextCEI->lowIndex >= lastCEI->highIndex && nextCEI->highIndex > nextCEI->lowIndex);
	4284
	4285	if (doLimitTests) { // <rdar://problem/18063262>
	4286	// If advancing to the end of a combining sequence in character indexing space
	4287	// advanced us beyond the end of the match in CE space, reject this match.
	4288	if (mLimit > maxLimit) {
	4289	found = FALSE;
	4290	}
	4291
	4292	// Make sure the end of the match is on a break boundary
	4293	if (!isBreakBoundary(strsrch, mLimit)) {
	4294	found = FALSE;
	4295	}
	4296	}
	4297
	4298	} else {
	4299	// No non-ignorable CEs after this point.
	4300	// The maximum position is detected by boundary after
	4301	// the last non-ignorable CE. Combining sequence
	4302	// across the start index will be truncated.
	4303	int32_t nba = nextBoundaryAfter(strsrch, minLimit);
	4304	mLimit = maxLimit = (nba > 0) && (startIdx > nba) ? nba : startIdx;
	4305	}
	4306
	4307	#ifdef USEARCH_DEBUG
	4308	if (getenv("USEARCH_DEBUG") != NULL) {
	4309	printf("minLimit, maxLimit, mLimit = %d, %d, %d\n", minLimit, maxLimit, mLimit);
	4310	}
	4311	#endif
	4312
	4313
	4314	if (! checkIdentical(strsrch, mStart, mLimit)) {
	4315	found = FALSE;
	4316	}
	4317
	4318	if (found) {
	4319	break;
	4320	}
	4321	}
	4322
	4323	#ifdef USEARCH_DEBUG
	4324	if (getenv("USEARCH_DEBUG") != NULL) {
	4325	printf("Target CEs [%d .. %d]\n", ceb.firstIx, ceb.limitIx);
	4326	int32_t lastToPrint = ceb.limitIx+2;
	4327	for (int ii=ceb.firstIx; ii<lastToPrint; ii++) {
	4328	printf("%8x@%d ", ceb.get(ii)->ce, ceb.get(ii)->srcIndex);
	4329	}
	4330	printf("\n%s\n", found? "match found" : "no match");
	4331	}
	4332	#endif
	4333
	4334	// All Done. Store back the match bounds to the caller.
	4335	//
	4336	if (found==FALSE) {
	4337	mLimit = -1;
	4338	mStart = -1;
	4339	}
	4340
	4341	if (matchStart != NULL) {
	4342	*matchStart= mStart;
	4343	}
	4344
	4345	if (matchLimit != NULL) {
	4346	*matchLimit = mLimit;
	4347	}
	4348
	4349	return found;
	4350	}
	4351
	4352	// internal use methods declared in usrchimp.h -----------------------------
	4353
	4354	UBool usearch_handleNextExact(UStringSearch strsrch, UErrorCode status)
	4355	{
	4356	if (U_FAILURE(*status)) {
	4357	setMatchNotFound(strsrch);
	4358	return FALSE;
	4359	}
	4360
	4361	#if BOYER_MOORE
	4362	UCollationElements *coleiter = strsrch->textIter;
	4363	int32_t textlength = strsrch->search->textLength;
	4364	int32_t *patternce = strsrch->pattern.ces;
	4365	int32_t patterncelength = strsrch->pattern.cesLength;
	4366	int32_t textoffset = ucol_getOffset(coleiter);
	4367
	4368	// status used in setting coleiter offset, since offset is checked in
	4369	// shiftForward before setting the coleiter offset, status never
	4370	// a failure
	4371	textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER,
	4372	patterncelength);
	4373	while (textoffset <= textlength)
	4374	{
	4375	uint32_t patternceindex = patterncelength - 1;
	4376	int32_t targetce;
	4377	UBool found = FALSE;
	4378	int32_t lastce = UCOL_NULLORDER;
	4379
	4380	setColEIterOffset(coleiter, textoffset);
	4381
	4382	for (;;) {
	4383	// finding the last pattern ce match, imagine composite characters
	4384	// for example: search for pattern A in text \u00C0
	4385	// we'll have to skip \u0300 the grave first before we get to A
	4386	targetce = ucol_previous(coleiter, status);
	4387	if (U_FAILURE(*status) \|\| targetce == UCOL_NULLORDER) {
	4388	found = FALSE;
	4389	break;
	4390	}
	4391	targetce = getCE(strsrch, targetce);
	4392	if (targetce == UCOL_IGNORABLE && inNormBuf(coleiter)) {
	4393	// this is for the text \u0315\u0300 that requires
	4394	// normalization and pattern \u0300, where \u0315 is ignorable
	4395	continue;
	4396	}
	4397	if (lastce == UCOL_NULLORDER \|\| lastce == UCOL_IGNORABLE) {
	4398	lastce = targetce;
	4399	}
	4400	// TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s
	4401	if (targetce == patternce[patternceindex]) {
	4402	// the first ce can be a contraction
	4403	found = TRUE;
	4404	break;
	4405	}
	4406	if (!hasExpansion(coleiter)) {
	4407	found = FALSE;
	4408	break;
	4409	}
	4410	}
	4411
	4412	//targetce = lastce;
	4413
	4414	while (found && patternceindex > 0) {
	4415	lastce = targetce;
	4416	targetce = ucol_previous(coleiter, status);
	4417	if (U_FAILURE(*status) \|\| targetce == UCOL_NULLORDER) {
	4418	found = FALSE;
	4419	break;
	4420	}
	4421	targetce = getCE(strsrch, targetce);
	4422	if (targetce == UCOL_IGNORABLE) {
	4423	continue;
	4424	}
	4425
	4426	patternceindex --;
	4427	// TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s
	4428	found = found && targetce == patternce[patternceindex];
	4429	}
	4430
	4431	targetce = lastce;
	4432
	4433	if (!found) {
	4434	if (U_FAILURE(*status)) {
	4435	break;
	4436	}
	4437	textoffset = shiftForward(strsrch, textoffset, lastce,
	4438	patternceindex);
	4439	// status checked at loop.
	4440	patternceindex = patterncelength;
	4441	continue;
	4442	}
	4443
	4444	if (checkNextExactMatch(strsrch, &textoffset, status)) {
	4445	// status checked in ucol_setOffset
	4446	setColEIterOffset(coleiter, strsrch->search->matchedIndex);
	4447	return TRUE;
	4448	}
	4449	}
	4450	setMatchNotFound(strsrch);
	4451	return FALSE;
	4452	#else
	4453	int32_t textOffset = ucol_getOffset(strsrch->textIter);
	4454	int32_t start = -1;
	4455	int32_t end = -1;
	4456
	4457	if (usearch_search(strsrch, textOffset, &start, &end, status)) {
	4458	strsrch->search->matchedIndex = start;
	4459	strsrch->search->matchedLength = end - start;
	4460	return TRUE;
	4461	} else {
	4462	setMatchNotFound(strsrch);
	4463	return FALSE;
	4464	}
	4465	#endif
	4466	}
	4467
	4468	UBool usearch_handleNextCanonical(UStringSearch strsrch, UErrorCode status)
	4469	{
	4470	if (U_FAILURE(*status)) {
	4471	setMatchNotFound(strsrch);
	4472	return FALSE;
	4473	}
	4474
	4475	#if BOYER_MOORE
	4476	UCollationElements *coleiter = strsrch->textIter;
	4477	int32_t textlength = strsrch->search->textLength;
	4478	int32_t *patternce = strsrch->pattern.ces;
	4479	int32_t patterncelength = strsrch->pattern.cesLength;
	4480	int32_t textoffset = ucol_getOffset(coleiter);
	4481	UBool hasPatternAccents =
	4482	strsrch->pattern.hasSuffixAccents \|\| strsrch->pattern.hasPrefixAccents;
	4483
	4484	textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER,
	4485	patterncelength);
	4486	strsrch->canonicalPrefixAccents[0] = 0;
	4487	strsrch->canonicalSuffixAccents[0] = 0;
	4488
	4489	while (textoffset <= textlength)
	4490	{
	4491	int32_t patternceindex = patterncelength - 1;
	4492	int32_t targetce;
	4493	UBool found = FALSE;
	4494	int32_t lastce = UCOL_NULLORDER;
	4495
	4496	setColEIterOffset(coleiter, textoffset);
	4497
	4498	for (;;) {
	4499	// finding the last pattern ce match, imagine composite characters
	4500	// for example: search for pattern A in text \u00C0
	4501	// we'll have to skip \u0300 the grave first before we get to A
	4502	targetce = ucol_previous(coleiter, status);
	4503	if (U_FAILURE(*status) \|\| targetce == UCOL_NULLORDER) {
	4504	found = FALSE;
	4505	break;
	4506	}
	4507	targetce = getCE(strsrch, targetce);
	4508	if (lastce == UCOL_NULLORDER \|\| lastce == UCOL_IGNORABLE) {
	4509	lastce = targetce;
	4510	}
	4511	// TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s
	4512	if (targetce == patternce[patternceindex]) {
	4513	// the first ce can be a contraction
	4514	found = TRUE;
	4515	break;
	4516	}
	4517	if (!hasExpansion(coleiter)) {
	4518	found = FALSE;
	4519	break;
	4520	}
	4521	}
	4522
	4523	while (found && patternceindex > 0) {
	4524	targetce = ucol_previous(coleiter, status);
	4525	if (U_FAILURE(*status) \|\| targetce == UCOL_NULLORDER) {
	4526	found = FALSE;
	4527	break;
	4528	}
	4529	targetce = getCE(strsrch, targetce);
	4530	if (targetce == UCOL_IGNORABLE) {
	4531	continue;
	4532	}
	4533
	4534	patternceindex --;
	4535	// TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s
	4536	found = found && targetce == patternce[patternceindex];
	4537	}
	4538
	4539	// initializing the rearranged accent array
	4540	if (hasPatternAccents && !found) {
	4541	strsrch->canonicalPrefixAccents[0] = 0;
	4542	strsrch->canonicalSuffixAccents[0] = 0;
	4543	if (U_FAILURE(*status)) {
	4544	break;
	4545	}
	4546	found = doNextCanonicalMatch(strsrch, textoffset, status);
	4547	}
	4548
	4549	if (!found) {
	4550	if (U_FAILURE(*status)) {
	4551	break;
	4552	}
	4553	textoffset = shiftForward(strsrch, textoffset, lastce,
	4554	patternceindex);
	4555	// status checked at loop
	4556	patternceindex = patterncelength;
	4557	continue;
	4558	}
	4559
	4560	if (checkNextCanonicalMatch(strsrch, &textoffset, status)) {
	4561	setColEIterOffset(coleiter, strsrch->search->matchedIndex);
	4562	return TRUE;
	4563	}
	4564	}
	4565	setMatchNotFound(strsrch);
	4566	return FALSE;
	4567	#else
	4568	int32_t textOffset = ucol_getOffset(strsrch->textIter);
	4569	int32_t start = -1;
	4570	int32_t end = -1;
	4571
	4572	if (usearch_search(strsrch, textOffset, &start, &end, status)) {
	4573	strsrch->search->matchedIndex = start;
	4574	strsrch->search->matchedLength = end - start;
	4575	return TRUE;
	4576	} else {
	4577	setMatchNotFound(strsrch);
	4578	return FALSE;
	4579	}
	4580	#endif
	4581	}
	4582
	4583	UBool usearch_handlePreviousExact(UStringSearch strsrch, UErrorCode status)
	4584	{
	4585	if (U_FAILURE(*status)) {
	4586	setMatchNotFound(strsrch);
	4587	return FALSE;
	4588	}
	4589
	4590	#if BOYER_MOORE
	4591	UCollationElements *coleiter = strsrch->textIter;
	4592	int32_t *patternce = strsrch->pattern.ces;
	4593	int32_t patterncelength = strsrch->pattern.cesLength;
	4594	int32_t textoffset = ucol_getOffset(coleiter);
	4595
	4596	// shifting it check for setting offset
	4597	// if setOffset is called previously or there was no previous match, we
	4598	// leave the offset as it is.
	4599	if (strsrch->search->matchedIndex != USEARCH_DONE) {
	4600	textoffset = strsrch->search->matchedIndex;
	4601	}
	4602
	4603	textoffset = reverseShift(strsrch, textoffset, UCOL_NULLORDER,
	4604	patterncelength);
	4605
	4606	while (textoffset >= 0)
	4607	{
	4608	int32_t patternceindex = 1;
	4609	int32_t targetce;
	4610	UBool found = FALSE;
	4611	int32_t firstce = UCOL_NULLORDER;
	4612
	4613	// if status is a failure, ucol_setOffset does nothing
	4614	setColEIterOffset(coleiter, textoffset);
	4615
	4616	for (;;) {
	4617	// finding the first pattern ce match, imagine composite
	4618	// characters. for example: search for pattern \u0300 in text
	4619	// \u00C0, we'll have to skip A first before we get to
	4620	// \u0300 the grave accent
	4621	targetce = ucol_next(coleiter, status);
	4622	if (U_FAILURE(*status) \|\| targetce == UCOL_NULLORDER) {
	4623	found = FALSE;
	4624	break;
	4625	}
	4626	targetce = getCE(strsrch, targetce);
	4627	if (firstce == UCOL_NULLORDER \|\| firstce == UCOL_IGNORABLE) {
	4628	firstce = targetce;
	4629	}
	4630	if (targetce == UCOL_IGNORABLE && strsrch->strength != UCOL_PRIMARY) {
	4631	continue;
	4632	}
	4633	// TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s
	4634	if (targetce == patternce[0]) {
	4635	found = TRUE;
	4636	break;
	4637	}
	4638	if (!hasExpansion(coleiter)) {
	4639	// checking for accents in composite character
	4640	found = FALSE;
	4641	break;
	4642	}
	4643	}
	4644
	4645	//targetce = firstce;
	4646
	4647	while (found && (patternceindex < patterncelength)) {
	4648	firstce = targetce;
	4649	targetce = ucol_next(coleiter, status);
	4650	if (U_FAILURE(*status) \|\| targetce == UCOL_NULLORDER) {
	4651	found = FALSE;
	4652	break;
	4653	}
	4654	targetce = getCE(strsrch, targetce);
	4655	if (targetce == UCOL_IGNORABLE) {
	4656	continue;
	4657	}
	4658
	4659	// TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s
	4660	found = found && targetce == patternce[patternceindex];
	4661	patternceindex ++;
	4662	}
	4663
	4664	targetce = firstce;
	4665
	4666	if (!found) {
	4667	if (U_FAILURE(*status)) {
	4668	break;
	4669	}
	4670
	4671	textoffset = reverseShift(strsrch, textoffset, targetce,
	4672	patternceindex);
	4673	patternceindex = 0;
	4674	continue;
	4675	}
	4676
	4677	if (checkPreviousExactMatch(strsrch, &textoffset, status)) {
	4678	setColEIterOffset(coleiter, textoffset);
	4679	return TRUE;
	4680	}
	4681	}
	4682	setMatchNotFound(strsrch);
	4683	return FALSE;
	4684	#else
	4685	int32_t textOffset;
	4686
	4687	if (strsrch->search->isOverlap) {
	4688	if (strsrch->search->matchedIndex != USEARCH_DONE) {
	4689	textOffset = strsrch->search->matchedIndex + strsrch->search->matchedLength - 1;
	4690	} else {
	4691	// move the start position at the end of possible match
	4692	initializePatternPCETable(strsrch, status);
	4693	if (!initTextProcessedIter(strsrch, status)) {
	4694	setMatchNotFound(strsrch);
	4695	return FALSE;
	4696	}
	4697	for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPCEs++) {
	4698	int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NULL, status);
	4699	if (pce == UCOL_PROCESSED_NULLORDER) {
	4700	// at the end of the text
	4701	break;
	4702	}
	4703	}
	4704	if (U_FAILURE(*status)) {
	4705	setMatchNotFound(strsrch);
	4706	return FALSE;
	4707	}
	4708	textOffset = ucol_getOffset(strsrch->textIter);
	4709	}
	4710	} else {
	4711	textOffset = ucol_getOffset(strsrch->textIter);
	4712	}
	4713
	4714	int32_t start = -1;
	4715	int32_t end = -1;
	4716
	4717	if (usearch_searchBackwards(strsrch, textOffset, &start, &end, status)) {
	4718	strsrch->search->matchedIndex = start;
	4719	strsrch->search->matchedLength = end - start;
	4720	return TRUE;
	4721	} else {
	4722	setMatchNotFound(strsrch);
	4723	return FALSE;
	4724	}
	4725	#endif
	4726	}
	4727
	4728	UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
	4729	UErrorCode *status)
	4730	{
	4731	if (U_FAILURE(*status)) {
	4732	setMatchNotFound(strsrch);
	4733	return FALSE;
	4734	}
	4735
	4736	#if BOYER_MOORE
	4737	UCollationElements *coleiter = strsrch->textIter;
	4738	int32_t *patternce = strsrch->pattern.ces;
	4739	int32_t patterncelength = strsrch->pattern.cesLength;
	4740	int32_t textoffset = ucol_getOffset(coleiter);
	4741	UBool hasPatternAccents =
	4742	strsrch->pattern.hasSuffixAccents \|\| strsrch->pattern.hasPrefixAccents;
	4743
	4744	// shifting it check for setting offset
	4745	// if setOffset is called previously or there was no previous match, we
	4746	// leave the offset as it is.
	4747	if (strsrch->search->matchedIndex != USEARCH_DONE) {
	4748	textoffset = strsrch->search->matchedIndex;
	4749	}
	4750
	4751	textoffset = reverseShift(strsrch, textoffset, UCOL_NULLORDER,
	4752	patterncelength);
	4753	strsrch->canonicalPrefixAccents[0] = 0;
	4754	strsrch->canonicalSuffixAccents[0] = 0;
	4755
	4756	while (textoffset >= 0)
	4757	{
	4758	int32_t patternceindex = 1;
	4759	int32_t targetce;
	4760	UBool found = FALSE;
	4761	int32_t firstce = UCOL_NULLORDER;
	4762
	4763	setColEIterOffset(coleiter, textoffset);
	4764	for (;;) {
	4765	// finding the first pattern ce match, imagine composite
	4766	// characters. for example: search for pattern \u0300 in text
	4767	// \u00C0, we'll have to skip A first before we get to
	4768	// \u0300 the grave accent
	4769	targetce = ucol_next(coleiter, status);
	4770	if (U_FAILURE(*status) \|\| targetce == UCOL_NULLORDER) {
	4771	found = FALSE;
	4772	break;
	4773	}
	4774	targetce = getCE(strsrch, targetce);
	4775	if (firstce == UCOL_NULLORDER \|\| firstce == UCOL_IGNORABLE) {
	4776	firstce = targetce;
	4777	}
	4778
	4779	// TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s
	4780	if (targetce == patternce[0]) {
	4781	// the first ce can be a contraction
	4782	found = TRUE;
	4783	break;
	4784	}
	4785	if (!hasExpansion(coleiter)) {
	4786	// checking for accents in composite character
	4787	found = FALSE;
	4788	break;
	4789	}
	4790	}
	4791
	4792	targetce = firstce;
	4793
	4794	while (found && patternceindex < patterncelength) {
	4795	targetce = ucol_next(coleiter, status);
	4796	if (U_FAILURE(*status) \|\| targetce == UCOL_NULLORDER) {
	4797	found = FALSE;
	4798	break;
	4799	}
	4800	targetce = getCE(strsrch, targetce);
	4801	if (targetce == UCOL_IGNORABLE) {
	4802	continue;
	4803	}
	4804
	4805	// TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s
	4806	found = found && targetce == patternce[patternceindex];
	4807	patternceindex ++;
	4808	}
	4809
	4810	// initializing the rearranged accent array
	4811	if (hasPatternAccents && !found) {
	4812	strsrch->canonicalPrefixAccents[0] = 0;
	4813	strsrch->canonicalSuffixAccents[0] = 0;
	4814	if (U_FAILURE(*status)) {
	4815	break;
	4816	}
	4817	found = doPreviousCanonicalMatch(strsrch, textoffset, status);
	4818	}
	4819
	4820	if (!found) {
	4821	if (U_FAILURE(*status)) {
	4822	break;
	4823	}
	4824	textoffset = reverseShift(strsrch, textoffset, targetce,
	4825	patternceindex);
	4826	patternceindex = 0;
	4827	continue;
	4828	}
	4829
	4830	if (checkPreviousCanonicalMatch(strsrch, &textoffset, status)) {
	4831	setColEIterOffset(coleiter, textoffset);
	4832	return TRUE;
	4833	}
	4834	}
	4835	setMatchNotFound(strsrch);
	4836	return FALSE;
	4837	#else
	4838	int32_t textOffset;
	4839
	4840	if (strsrch->search->isOverlap) {
	4841	if (strsrch->search->matchedIndex != USEARCH_DONE) {
	4842	textOffset = strsrch->search->matchedIndex + strsrch->search->matchedLength - 1;
	4843	} else {
	4844	// move the start position at the end of possible match
	4845	initializePatternPCETable(strsrch, status);
	4846	if (!initTextProcessedIter(strsrch, status)) {
	4847	setMatchNotFound(strsrch);
	4848	return FALSE;
	4849	}
	4850	for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPCEs++) {
	4851	int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NULL, status);
	4852	if (pce == UCOL_PROCESSED_NULLORDER) {
	4853	// at the end of the text
	4854	break;
	4855	}
	4856	}
	4857	if (U_FAILURE(*status)) {
	4858	setMatchNotFound(strsrch);
	4859	return FALSE;
	4860	}
	4861	textOffset = ucol_getOffset(strsrch->textIter);
	4862	}
	4863	} else {
	4864	textOffset = ucol_getOffset(strsrch->textIter);
	4865	}
	4866
	4867	int32_t start = -1;
	4868	int32_t end = -1;
	4869
	4870	if (usearch_searchBackwards(strsrch, textOffset, &start, &end, status)) {
	4871	strsrch->search->matchedIndex = start;
	4872	strsrch->search->matchedLength = end - start;
	4873	return TRUE;
	4874	} else {
	4875	setMatchNotFound(strsrch);
	4876	return FALSE;
	4877	}
	4878	#endif
	4879	}
	4880
	4881	#endif /* #if !UCONFIG_NO_COLLATION */