git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/alphaindex.cpp

Commit	Line	Data
f3c0d7a5 A	1	// © 2016 and later: Unicode, Inc. and others.
f3c0d7a5 A	2	// License & terms of use: http://www.unicode.org/copyright.html
4388f060 A	3	/*
4388f060 A	4	*******************************************************************************
57a6839d	5	* Copyright (C) 2009-2014, International Business Machines Corporation and
51004dcb	6	* others. All Rights Reserved.
4388f060 A	7	*******************************************************************************
	8	*/
	9
4388f060 A	10	#include "unicode/utypes.h"
4388f060 A	11
57a6839d	12	#if !UCONFIG_NO_COLLATION
4388f060 A	13
	14	#include "unicode/alphaindex.h"
	15	#include "unicode/coll.h"
51004dcb	16	#include "unicode/localpointer.h"
4388f060	17	#include "unicode/normalizer2.h"
4388f060	18	#include "unicode/tblcoll.h"
57a6839d	19	#include "unicode/uchar.h"
4388f060 A	20	#include "unicode/ulocdata.h"
	21	#include "unicode/uniset.h"
	22	#include "unicode/uobject.h"
4388f060	23	#include "unicode/usetiter.h"
4388f060 A	24	#include "unicode/utf16.h"
4388f060 A	25
51004dcb	26	#include "cmemory.h"
4388f060	27	#include "cstring.h"
4388f060	28	#include "uassert.h"
4388f060	29	#include "uvector.h"
57a6839d	30	#include "uvectr64.h"
4388f060 A	31
	32	//#include <string>
	33	//#include <iostream>
51004dcb	34
4388f060 A	35	U_NAMESPACE_BEGIN
4388f060 A	36
51004dcb	37	namespace {
4388f060	38
51004dcb A	39	/**
	40	* Prefix string for Chinese index buckets.
	41	* See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
	42	*/
	43	const UChar BASE[1] = { 0xFDD0 };
	44	const int32_t BASE_LENGTH = 1;
	45
	46	UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
	47	const UnicodeString &one, const UnicodeString &other);
	48
	49	} // namespace
4388f060 A	50
4388f060 A	51	static int32_t U_CALLCONV
51004dcb	52	collatorComparator(const void context, const void left, const void *right);
4388f060 A	53
	54	static int32_t U_CALLCONV
	55	recordCompareFn(const void context, const void left, const void *right);
	56
4388f060 A	57	// UVector<Record *> support function, delete a Record.
	58	static void U_CALLCONV
	59	alphaIndex_deleteRecord(void *obj) {
	60	delete static_cast<AlphabeticIndex::Record *>(obj);
	61	}
	62
51004dcb	63	namespace {
4388f060	64
51004dcb A	65	UnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
	66	UErrorCode &errorCode) {
	67	if (U_FAILURE(errorCode)) { return NULL; }
	68	if (owned.isValid()) {
	69	return owned.orphan();
	70	}
	71	UnicodeString *p = new UnicodeString(s);
	72	if (p == NULL) {
	73	errorCode = U_MEMORY_ALLOCATION_ERROR;
	74	}
	75	return p;
	76	}
4388f060	77
51004dcb A	78	inline UnicodeString *getString(const UVector &list, int32_t i) {
	79	return static_cast<UnicodeString *>(list[i]);
	80	}
4388f060	81
51004dcb A	82	inline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
	83	return static_cast<AlphabeticIndex::Bucket *>(list[i]);
	84	}
4388f060	85
51004dcb A	86	inline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
	87	return static_cast<AlphabeticIndex::Record *>(list[i]);
	88	}
	89
	90	/**
	91	* Like Java Collections.binarySearch(List, String, Comparator).
	92	*
	93	* @return the index>=0 where the item was found,
	94	* or the index<0 for inserting the string at ~index in sorted order
	95	*/
	96	int32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
	97	if (list.size() == 0) { return ~0; }
	98	int32_t start = 0;
	99	int32_t limit = list.size();
	100	for (;;) {
	101	int32_t i = (start + limit) / 2;
	102	const UnicodeString si = static_cast<UnicodeString >(list.elementAt(i));
	103	UErrorCode errorCode = U_ZERO_ERROR;
	104	UCollationResult cmp = coll.compare(s, *si, errorCode);
	105	if (cmp == UCOL_EQUAL) {
	106	return i;
	107	} else if (cmp < 0) {
	108	if (i == start) {
	109	return ~start; // insert s before *si
	110	}
	111	limit = i;
	112	} else {
	113	if (i == start) {
	114	return ~(start + 1); // insert s after *si
	115	}
	116	start = i;
	117	}
4388f060 A	118	}
	119	}
	120
51004dcb A	121	} // namespace
	122
	123	// The BucketList is not in the anonymous namespace because only Clang
	124	// seems to support its use in other classes from there.
	125	// However, we also don't need U_I18N_API because it is not used from outside the i18n library.
	126	class BucketList : public UObject {
	127	public:
	128	BucketList(UVector bucketList, UVector publicBucketList)
	129	: bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
	130	int32_t displayIndex = 0;
	131	for (int32_t i = 0; i < publicBucketList->size(); ++i) {
	132	getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
	133	}
	134	}
4388f060	135
51004dcb A	136	// The virtual destructor must not be inline.
	137	// See ticket #8454 for details.
	138	virtual ~BucketList();
	139
	140	int32_t getBucketCount() const {
	141	return immutableVisibleList_->size();
4388f060	142	}
4388f060	143
51004dcb A	144	int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
	145	UErrorCode &errorCode) {
	146	// binary search
	147	int32_t start = 0;
	148	int32_t limit = bucketList_->size();
	149	while ((start + 1) < limit) {
	150	int32_t i = (start + limit) / 2;
	151	const AlphabeticIndex::Bucket bucket = getBucket(bucketList_, i);
	152	UCollationResult nameVsBucket =
	153	collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
	154	if (nameVsBucket < 0) {
	155	limit = i;
	156	} else {
	157	start = i;
	158	}
	159	}
	160	const AlphabeticIndex::Bucket bucket = getBucket(bucketList_, start);
	161	if (bucket->displayBucket_ != NULL) {
	162	bucket = bucket->displayBucket_;
	163	}
	164	return bucket->displayIndex_;
4388f060	165	}
51004dcb A	166
	167	/** All of the buckets, visible and invisible. */
	168	UVector *bucketList_;
	169	/** Just the visible buckets. */
	170	UVector *immutableVisibleList_;
	171	};
	172
	173	BucketList::~BucketList() {
	174	delete bucketList_;
	175	if (immutableVisibleList_ != bucketList_) {
	176	delete immutableVisibleList_;
4388f060	177	}
51004dcb A	178	}
	179
	180	AlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
	181	delete buckets_;
	182	delete collatorPrimaryOnly_;
	183	}
	184
	185	int32_t
	186	AlphabeticIndex::ImmutableIndex::getBucketCount() const {
	187	return buckets_->getBucketCount();
	188	}
	189
	190	int32_t
	191	AlphabeticIndex::ImmutableIndex::getBucketIndex(
	192	const UnicodeString &name, UErrorCode &errorCode) const {
	193	return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
	194	}
	195
	196	const AlphabeticIndex::Bucket *
	197	AlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
	198	if (0 <= index && index < buckets_->getBucketCount()) {
	199	return icu::getBucket(*buckets_->immutableVisibleList_, index);
	200	} else {
	201	return NULL;
4388f060	202	}
4388f060 A	203	}
4388f060 A	204
51004dcb A	205	AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
	206	: inputList_(NULL),
	207	labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
	208	maxLabelCount_(99),
	209	initialLabels_(NULL), firstCharsInScripts_(NULL),
	210	collator_(NULL), collatorPrimaryOnly_(NULL),
	211	buckets_(NULL) {
	212	init(&locale, status);
	213	}
	214
	215
	216	AlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
	217	: inputList_(NULL),
	218	labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
	219	maxLabelCount_(99),
	220	initialLabels_(NULL), firstCharsInScripts_(NULL),
	221	collator_(collator), collatorPrimaryOnly_(NULL),
	222	buckets_(NULL) {
	223	init(NULL, status);
	224	}
	225
	226
4388f060 A	227
4388f060 A	228	AlphabeticIndex::~AlphabeticIndex() {
4388f060 A	229	delete collator_;
4388f060 A	230	delete collatorPrimaryOnly_;
51004dcb A	231	delete firstCharsInScripts_;
	232	delete buckets_;
	233	delete inputList_;
4388f060 A	234	delete initialLabels_;
	235	}
	236
	237
	238	AlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
	239	if (U_FAILURE(status)) {
	240	return *this;
	241	}
	242	initialLabels_->addAll(additions);
51004dcb	243	clearBuckets();
4388f060 A	244	return *this;
	245	}
	246
	247
	248	AlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
51004dcb A	249	addIndexExemplars(locale, status);
51004dcb A	250	clearBuckets();
4388f060 A	251	return *this;
	252	}
	253
	254
51004dcb A	255	AlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
	256	if (U_FAILURE(errorCode)) { return NULL; }
	257	// In C++, the ImmutableIndex must own its copy of the BucketList,
	258	// even if it contains no records, for proper memory management.
	259	// We could clone the buckets_ if they are not NULL,
	260	// but that would be worth it only if this method is called multiple times,
	261	// or called after using the old-style bucket iterator API.
	262	LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
340931cb	263	LocalPointer<RuleBasedCollator> coll(collatorPrimaryOnly_->clone());
51004dcb A	264	if (immutableBucketList.isNull() \|\| coll.isNull()) {
	265	errorCode = U_MEMORY_ALLOCATION_ERROR;
	266	return NULL;
	267	}
	268	ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
	269	if (immIndex == NULL) {
	270	errorCode = U_MEMORY_ALLOCATION_ERROR;
	271	return NULL;
	272	}
	273	// The ImmutableIndex adopted its parameter objects.
	274	immutableBucketList.orphan();
	275	coll.orphan();
	276	return immIndex;
	277	}
	278
4388f060	279	int32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
51004dcb	280	initBuckets(status);
4388f060 A	281	if (U_FAILURE(status)) {
	282	return 0;
	283	}
51004dcb	284	return buckets_->getBucketCount();
4388f060 A	285	}
	286
	287
	288	int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
51004dcb	289	if (U_FAILURE(status) \|\| inputList_ == NULL) {
4388f060 A	290	return 0;
4388f060 A	291	}
51004dcb	292	return inputList_->size();
4388f060 A	293	}
4388f060 A	294
51004dcb A	295	void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
	296	const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
	297	if (U_FAILURE(errorCode)) { return; }
	298
	299	const UnicodeString &firstScriptBoundary = getString(firstCharsInScripts_, 0);
	300	const UnicodeString &overflowBoundary =
	301	getString(firstCharsInScripts_, firstCharsInScripts_->size() - 1);
	302
	303	// We make a sorted array of elements.
	304	// Some of the input may be redundant.
	305	// That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
	306	// We filter out those cases.
	307	UnicodeSetIterator iter(*initialLabels_);
	308	while (iter.next()) {
	309	const UnicodeString *item = &iter.getString();
	310	LocalPointer<UnicodeString> ownedItem;
	311	UBool checkDistinct;
	312	int32_t itemLength = item->length();
	313	if (!item->hasMoreChar32Than(0, itemLength, 1)) {
	314	checkDistinct = FALSE;
	315	} else if(item->charAt(itemLength - 1) == 0x2a && // '*'
	316	item->charAt(itemLength - 2) != 0x2a) {
	317	// Use a label if it is marked with one trailing star,
	318	// even if the label string sorts the same when all contractions are suppressed.
	319	ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
	320	item = ownedItem.getAlias();
	321	if (item == NULL) {
	322	errorCode = U_MEMORY_ALLOCATION_ERROR;
	323	return;
4388f060	324	}
51004dcb	325	checkDistinct = FALSE;
4388f060	326	} else {
51004dcb A	327	checkDistinct = TRUE;
	328	}
	329	if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
	330	// Ignore a primary-ignorable or non-alphabetic index character.
	331	} else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
57a6839d	332	// Ignore an index character that will land in the overflow bucket.
51004dcb A	333	} else if (checkDistinct &&
	334	collatorPrimaryOnly_->compare(item, separated(item), errorCode) == 0) {
	335	// Ignore a multi-code point index character that does not sort distinctly
	336	// from the sequence of its separate characters.
	337	} else {
	338	int32_t insertionPoint = binarySearch(indexCharacters, item, collatorPrimaryOnly_);
	339	if (insertionPoint < 0) {
	340	indexCharacters.insertElementAt(
	341	ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
	342	} else {
	343	const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
	344	if (isOneLabelBetterThanOther(nfkdNormalizer, item, itemAlreadyIn)) {
	345	indexCharacters.setElementAt(
	346	ownedString(*item, ownedItem, errorCode), insertionPoint);
	347	}
	348	}
4388f060 A	349	}
4388f060 A	350	}
51004dcb	351	if (U_FAILURE(errorCode)) { return; }
4388f060	352
b331163b	353	// if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
4388f060	354
51004dcb	355	int32_t size = indexCharacters.size() - 1;
4388f060	356	if (size > maxLabelCount_) {
4388f060 A	357	int32_t count = 0;
4388f060 A	358	int32_t old = -1;
51004dcb	359	for (int32_t i = 0; i < indexCharacters.size();) {
4388f060	360	++count;
51004dcb	361	int32_t bump = count * maxLabelCount_ / size;
4388f060	362	if (bump == old) {
51004dcb	363	indexCharacters.removeElementAt(i);
4388f060	364	} else {
4388f060	365	old = bump;
51004dcb	366	++i;
4388f060 A	367	}
4388f060 A	368	}
4388f060	369	}
51004dcb	370	}
4388f060	371
51004dcb A	372	namespace {
	373
	374	const UnicodeString &fixLabel(const UnicodeString &current, UnicodeString &temp) {
	375	if (!current.startsWith(BASE, BASE_LENGTH)) {
	376	return current;
	377	}
	378	UChar rest = current.charAt(BASE_LENGTH);
	379	if (0x2800 < rest && rest <= 0x28FF) { // stroke count
	380	int32_t count = rest-0x2800;
	381	temp.setTo((UChar)(0x30 + count % 10));
	382	if (count >= 10) {
	383	count /= 10;
	384	temp.insert(0, (UChar)(0x30 + count % 10));
	385	if (count >= 10) {
	386	count /= 10;
	387	temp.insert(0, (UChar)(0x30 + count));
	388	}
	389	}
	390	return temp.append((UChar)0x5283);
	391	}
	392	return temp.setTo(current, BASE_LENGTH);
4388f060 A	393	}
4388f060 A	394
51004dcb	395	UBool hasMultiplePrimaryWeights(
57a6839d A	396	const RuleBasedCollator &coll, uint32_t variableTop,
	397	const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) {
	398	ces.removeAllElements();
	399	coll.internalGetCEs(s, ces, errorCode);
	400	if (U_FAILURE(errorCode)) { return FALSE; }
51004dcb	401	UBool seenPrimary = FALSE;
57a6839d A	402	for (int32_t i = 0; i < ces.size(); ++i) {
	403	int64_t ce = ces.elementAti(i);
	404	uint32_t p = (uint32_t)(ce >> 32);
	405	if (p > variableTop) {
	406	// not primary ignorable
51004dcb A	407	if (seenPrimary) {
51004dcb A	408	return TRUE;
4388f060	409	}
51004dcb	410	seenPrimary = TRUE;
4388f060	411	}
51004dcb A	412	}
51004dcb A	413	return FALSE;
4388f060 A	414	}
4388f060 A	415
51004dcb	416	} // namespace
4388f060	417
51004dcb A	418	BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
	419	// Initialize indexCharacters.
	420	UVector indexCharacters(errorCode);
	421	indexCharacters.setDeleter(uprv_deleteUObject);
	422	initLabels(indexCharacters, errorCode);
	423	if (U_FAILURE(errorCode)) { return NULL; }
	424
	425	// Variables for hasMultiplePrimaryWeights().
57a6839d A	426	UVector64 ces(errorCode);
57a6839d A	427	uint32_t variableTop;
51004dcb	428	if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
57a6839d	429	variableTop = collatorPrimaryOnly_->getVariableTop(errorCode);
51004dcb A	430	} else {
	431	variableTop = 0;
	432	}
	433	UBool hasInvisibleBuckets = FALSE;
	434
	435	// Helper arrays for Chinese Pinyin collation.
	436	Bucket *asciiBuckets[26] = {
	437	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
	438	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
	439	};
	440	Bucket *pinyinBuckets[26] = {
	441	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
	442	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
	443	};
	444	UBool hasPinyin = FALSE;
	445
b331163b A	446	LocalPointer<UVector> bucketList(new UVector(errorCode), errorCode);
b331163b A	447	if (U_FAILURE(errorCode)) {
51004dcb A	448	return NULL;
	449	}
	450	bucketList->setDeleter(uprv_deleteUObject);
	451
	452	// underflow bucket
	453	Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
	454	if (bucket == NULL) {
	455	errorCode = U_MEMORY_ALLOCATION_ERROR;
	456	return NULL;
	457	}
	458	bucketList->addElement(bucket, errorCode);
	459	if (U_FAILURE(errorCode)) { return NULL; }
	460
	461	UnicodeString temp;
	462
	463	// fix up the list, adding underflow, additions, overflow
	464	// Insert inflow labels as needed.
	465	int32_t scriptIndex = -1;
	466	const UnicodeString *scriptUpperBoundary = &emptyString_;
	467	for (int32_t i = 0; i < indexCharacters.size(); ++i) {
	468	UnicodeString &current = *getString(indexCharacters, i);
	469	if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
	470	// We crossed the script boundary into a new script.
	471	const UnicodeString &inflowBoundary = *scriptUpperBoundary;
	472	UBool skippedScript = FALSE;
	473	for (;;) {
	474	scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
	475	if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
	476	break;
	477	}
	478	skippedScript = TRUE;
	479	}
	480	if (skippedScript && bucketList->size() > 1) {
	481	// We are skipping one or more scripts,
	482	// and we are not just getting out of the underflow label.
	483	bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
	484	if (bucket == NULL) {
	485	errorCode = U_MEMORY_ALLOCATION_ERROR;
	486	return NULL;
	487	}
	488	bucketList->addElement(bucket, errorCode);
	489	}
	490	}
	491	// Add a bucket with the current label.
	492	bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
	493	if (bucket == NULL) {
	494	errorCode = U_MEMORY_ALLOCATION_ERROR;
	495	return NULL;
	496	}
	497	bucketList->addElement(bucket, errorCode);
	498	// Remember ASCII and Pinyin buckets for Pinyin redirects.
	499	UChar c;
	500	if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) { // A-Z
	501	asciiBuckets[c - 0x41] = bucket;
	502	} else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
	503	0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
	504	pinyinBuckets[c - 0x41] = bucket;
	505	hasPinyin = TRUE;
	506	}
	507	// Check for multiple primary weights.
	508	if (!current.startsWith(BASE, BASE_LENGTH) &&
57a6839d A	509	hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current,
57a6839d A	510	ces, errorCode) &&
51004dcb A	511	current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
51004dcb A	512	// "AE-ligature" or "Sch" etc.
3d1f044b A	513	for (int32_t j = bucketList->size() - 2;; --j) {
3d1f044b A	514	Bucket singleBucket = getBucket(bucketList, j);
51004dcb A	515	if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
	516	// There is no single-character bucket since the last
	517	// underflow or inflow label.
	518	break;
	519	}
	520	if (singleBucket->displayBucket_ == NULL &&
57a6839d A	521	!hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop,
	522	singleBucket->lowerBoundary_,
	523	ces, errorCode)) {
51004dcb A	524	// Add an invisible bucket that redirects strings greater than the expansion
	525	// to the previous single-character bucket.
	526	// For example, after ... Q R S Sch we add Sch\uFFFF->S
	527	// and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
	528	bucket = new Bucket(emptyString_,
	529	UnicodeString(current).append((UChar)0xFFFF),
	530	U_ALPHAINDEX_NORMAL);
	531	if (bucket == NULL) {
	532	errorCode = U_MEMORY_ALLOCATION_ERROR;
	533	return NULL;
	534	}
	535	bucket->displayBucket_ = singleBucket;
	536	bucketList->addElement(bucket, errorCode);
	537	hasInvisibleBuckets = TRUE;
	538	break;
	539	}
	540	}
	541	}
	542	}
	543	if (U_FAILURE(errorCode)) { return NULL; }
	544	if (bucketList->size() == 1) {
	545	// No real labels, show only the underflow label.
	546	BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
	547	if (bl == NULL) {
	548	errorCode = U_MEMORY_ALLOCATION_ERROR;
	549	return NULL;
	550	}
	551	bucketList.orphan();
	552	return bl;
	553	}
	554	// overflow bucket
	555	bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
	556	if (bucket == NULL) {
	557	errorCode = U_MEMORY_ALLOCATION_ERROR;
	558	return NULL;
	559	}
	560	bucketList->addElement(bucket, errorCode); // final
	561
	562	if (hasPinyin) {
	563	// Redirect Pinyin buckets.
	564	Bucket *asciiBucket = NULL;
	565	for (int32_t i = 0; i < 26; ++i) {
	566	if (asciiBuckets[i] != NULL) {
	567	asciiBucket = asciiBuckets[i];
	568	}
	569	if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
	570	pinyinBuckets[i]->displayBucket_ = asciiBucket;
	571	hasInvisibleBuckets = TRUE;
	572	}
	573	}
	574	}
	575
	576	if (U_FAILURE(errorCode)) { return NULL; }
	577	if (!hasInvisibleBuckets) {
	578	BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
	579	if (bl == NULL) {
	580	errorCode = U_MEMORY_ALLOCATION_ERROR;
	581	return NULL;
	582	}
	583	bucketList.orphan();
	584	return bl;
	585	}
	586	// Merge inflow buckets that are visually adjacent.
	587	// Iterate backwards: Merge inflow into overflow rather than the other way around.
588	int32_t i = bucketList->size() - 1;
589	Bucket nextBucket = getBucket(bucketList, i);
590	while (--i > 0) {
591	bucket = getBucket(*bucketList, i);
592	if (bucket->displayBucket_ != NULL) {
593	continue; // skip invisible buckets
594	}
595	if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
596	if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
597	bucket->displayBucket_ = nextBucket;
598	continue;
599	}
600	}
601	nextBucket = bucket;
602	}
603
b331163b A	604	LocalPointer<UVector> publicBucketList(new UVector(errorCode), errorCode);
b331163b A	605	if (U_FAILURE(errorCode)) {
51004dcb A	606	return NULL;
	607	}
	608	// Do not call publicBucketList->setDeleter():
	609	// This vector shares its objects with the bucketList.
3d1f044b A	610	for (int32_t j = 0; j < bucketList->size(); ++j) {
3d1f044b A	611	bucket = getBucket(*bucketList, j);
51004dcb A	612	if (bucket->displayBucket_ == NULL) {
	613	publicBucketList->addElement(bucket, errorCode);
	614	}
	615	}
	616	if (U_FAILURE(errorCode)) { return NULL; }
	617	BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
	618	if (bl == NULL) {
	619	errorCode = U_MEMORY_ALLOCATION_ERROR;
	620	return NULL;
	621	}
	622	bucketList.orphan();
	623	publicBucketList.orphan();
	624	return bl;
	625	}
	626
	627	/**
	628	* Creates an index, and buckets and sorts the list of records into the index.
	629	*/
	630	void AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
	631	if (U_FAILURE(errorCode) \|\| buckets_ != NULL) {
	632	return;
	633	}
	634	buckets_ = createBucketList(errorCode);
	635	if (U_FAILURE(errorCode) \|\| inputList_ == NULL \|\| inputList_->isEmpty()) {
4388f060 A	636	return;
	637	}
	638
51004dcb A	639	// Sort the records by name.
	640	// Stable sort preserves input order of collation duplicates.
	641	inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
	642
	643	// Now, we traverse all of the input, which is now sorted.
	644	// If the item doesn't go in the current bucket, we find the next bucket that contains it.
	645	// This makes the process order n*log(n), since we just sort the list and then do a linear process.
	646	// However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
	647	// we need to improve it for that case.
	648
	649	Bucket currentBucket = getBucket(buckets_->bucketList_, 0);
	650	int32_t bucketIndex = 1;
	651	Bucket *nextBucket;
	652	const UnicodeString *upperBoundary;
	653	if (bucketIndex < buckets_->bucketList_->size()) {
	654	nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
	655	upperBoundary = &nextBucket->lowerBoundary_;
	656	} else {
	657	nextBucket = NULL;
	658	upperBoundary = NULL;
	659	}
	660	for (int32_t i = 0; i < inputList_->size(); ++i) {
	661	Record r = getRecord(inputList_, i);
	662	// if the current bucket isn't the right one, find the one that is
	663	// We have a special flag for the last bucket so that we don't look any further
	664	while (upperBoundary != NULL &&
	665	collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
	666	currentBucket = nextBucket;
	667	// now reset the boundary that we compare against
	668	if (bucketIndex < buckets_->bucketList_->size()) {
	669	nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
	670	upperBoundary = &nextBucket->lowerBoundary_;
4388f060	671	} else {
51004dcb	672	upperBoundary = NULL;
4388f060	673	}
4388f060	674	}
51004dcb A	675	// now put the record into the bucket.
	676	Bucket *bucket = currentBucket;
	677	if (bucket->displayBucket_ != NULL) {
	678	bucket = bucket->displayBucket_;
	679	}
	680	if (bucket->records_ == NULL) {
	681	bucket->records_ = new UVector(errorCode);
	682	if (bucket->records_ == NULL) {
	683	errorCode = U_MEMORY_ALLOCATION_ERROR;
	684	return;
	685	}
	686	}
	687	bucket->records_->addElement(r, errorCode);
4388f060	688	}
51004dcb	689	}
4388f060	690
51004dcb A	691	void AlphabeticIndex::clearBuckets() {
	692	if (buckets_ != NULL) {
	693	delete buckets_;
	694	buckets_ = NULL;
	695	internalResetBucketIterator();
	696	}
4388f060 A	697	}
4388f060 A	698
51004dcb A	699	void AlphabeticIndex::internalResetBucketIterator() {
	700	labelsIterIndex_ = -1;
	701	currentBucket_ = NULL;
	702	}
4388f060	703
51004dcb A	704
51004dcb A	705	void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
51004dcb	706	LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
4388f060 A	707	if (U_FAILURE(status)) {
	708	return;
	709	}
	710
4388f060 A	711	UnicodeSet exemplars;
	712	ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
	713	if (U_SUCCESS(status)) {
51004dcb	714	initialLabels_->addAll(exemplars);
4388f060 A	715	return;
	716	}
	717	status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR
	718
51004dcb	719	// The locale data did not include explicit Index characters.
4388f060	720	// Synthesize a set of them from the locale's standard exemplar characters.
4388f060 A	721	ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
	722	if (U_FAILURE(status)) {
	723	return;
	724	}
	725
51004dcb	726	// question: should we add auxiliary exemplars?
0f5d89e8	727	if (exemplars.containsSome(0x61, 0x7A) /* a-z */ \|\| exemplars.isEmpty()) {
51004dcb A	728	exemplars.add(0x61, 0x7A);
	729	}
	730	if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables
	731	// cut down to small list
	732	exemplars.remove(0xAC00, 0xD7A3).
	733	add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
	734	add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
	735	add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
	736	add(0xD30C).add(0xD558);
	737	}
	738	if (exemplars.containsSome(0x1200, 0x137F)) { // Ethiopic block
	739	// cut down to small list
	740	// make use of the fact that Ethiopic is allocated in 8's, where
	741	// the base is 0 mod 8.
0f5d89e8 A	742	UnicodeSet ethiopic(UnicodeString(u"[ሀለሐመሠረሰሸቀቈቐቘበቨተቸኀኈነኘአከኰኸዀወዐዘዠየደዸጀገጐጘጠጨጰጸፀፈፐፘ]"), status);
	743	ethiopic.retainAll(exemplars);
	744	exemplars.remove(u'ሀ', 0x137F).addAll(ethiopic);
51004dcb A	745	}
51004dcb A	746
4388f060 A	747	// Upper-case any that aren't already so.
4388f060 A	748	// (We only do this for synthesized index characters.)
4388f060 A	749	UnicodeSetIterator it(exemplars);
4388f060 A	750	UnicodeString upperC;
4388f060 A	751	while (it.next()) {
	752	const UnicodeString &exemplarC = it.getString();
	753	upperC = exemplarC;
	754	upperC.toUpper(locale);
51004dcb	755	initialLabels_->add(upperC);
4388f060	756	}
51004dcb	757	}
4388f060	758
51004dcb A	759	UBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
51004dcb A	760	UnicodeSet contractions;
57a6839d A	761	collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode);
	762	if (U_FAILURE(errorCode) \|\| contractions.isEmpty()) { return FALSE; }
	763	initialLabels_->addAll(contractions);
51004dcb A	764	UnicodeSetIterator iter(contractions);
	765	while (iter.next()) {
	766	const UnicodeString &s = iter.getString();
57a6839d A	767	U_ASSERT (s.startsWith(BASE, BASE_LENGTH));
	768	UChar c = s.charAt(s.length() - 1);
	769	if (0x41 <= c && c <= 0x5A) { // A-Z
	770	// There are Pinyin labels, add ASCII A-Z labels as well.
	771	initialLabels_->add(0x41, 0x5A); // A-Z
	772	break;
4388f060 A	773	}
4388f060 A	774	}
57a6839d	775	return TRUE;
4388f060 A	776	}
	777
	778
	779	/*
	780	* Return the string with interspersed CGJs. Input must have more than 2 codepoints.
	781	*/
51004dcb	782	static const UChar CGJ = 0x034F;
4388f060 A	783	UnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
	784	UnicodeString result;
	785	if (item.length() == 0) {
	786	return result;
	787	}
	788	int32_t i = 0;
	789	for (;;) {
	790	UChar32 cp = item.char32At(i);
	791	result.append(cp);
	792	i = item.moveIndex32(i, 1);
	793	if (i >= item.length()) {
	794	break;
	795	}
	796	result.append(CGJ);
	797	}
	798	return result;
	799	}
	800
	801
	802	UBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
	803	return FALSE;
	804	}
	805
	806
	807	UBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
	808	return FALSE;
	809	}
	810
	811
	812	const RuleBasedCollator &AlphabeticIndex::getCollator() const {
57a6839d	813	return *collator_;
4388f060 A	814	}
	815
	816
	817	const UnicodeString &AlphabeticIndex::getInflowLabel() const {
	818	return inflowLabel_;
	819	}
	820
	821	const UnicodeString &AlphabeticIndex::getOverflowLabel() const {
	822	return overflowLabel_;
	823	}
	824
	825
	826	const UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
	827	return underflowLabel_;
	828	}
	829
	830
	831	AlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/status/) {
	832	inflowLabel_ = label;
51004dcb	833	clearBuckets();
4388f060 A	834	return *this;
	835	}
	836
	837
	838	AlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/status/) {
	839	overflowLabel_ = label;
51004dcb	840	clearBuckets();
4388f060 A	841	return *this;
	842	}
	843
	844
	845	AlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/status/) {
	846	underflowLabel_ = label;
51004dcb	847	clearBuckets();
4388f060 A	848	return *this;
	849	}
	850
	851
	852	int32_t AlphabeticIndex::getMaxLabelCount() const {
	853	return maxLabelCount_;
	854	}
	855
	856
	857	AlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
	858	if (U_FAILURE(status)) {
	859	return *this;
	860	}
	861	if (maxLabelCount <= 0) {
	862	status = U_ILLEGAL_ARGUMENT_ERROR;
	863	return *this;
	864	}
	865	maxLabelCount_ = maxLabelCount;
51004dcb	866	clearBuckets();
4388f060 A	867	return *this;
	868	}
	869
	870
4388f060 A	871	//
	872	// init() - Common code for constructors.
	873	//
	874
51004dcb A	875	void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
	876	if (U_FAILURE(status)) { return; }
	877	if (locale == NULL && collator_ == NULL) {
	878	status = U_ILLEGAL_ARGUMENT_ERROR;
4388f060 A	879	return;
4388f060 A	880	}
51004dcb	881
4388f060	882	initialLabels_ = new UnicodeSet();
51004dcb A	883	if (initialLabels_ == NULL) {
	884	status = U_MEMORY_ALLOCATION_ERROR;
	885	return;
	886	}
4388f060	887
51004dcb	888	inflowLabel_.setTo((UChar)0x2026); // Ellipsis
4388f060 A	889	overflowLabel_ = inflowLabel_;
	890	underflowLabel_ = inflowLabel_;
	891
51004dcb	892	if (collator_ == NULL) {
57a6839d A	893	Collator coll = Collator::createInstance(locale, status);
	894	if (U_FAILURE(status)) {
	895	delete coll;
	896	return;
	897	}
	898	if (coll == NULL) {
51004dcb A	899	status = U_MEMORY_ALLOCATION_ERROR;
	900	return;
	901	}
57a6839d A	902	collator_ = dynamic_cast<RuleBasedCollator *>(coll);
	903	if (collator_ == NULL) {
	904	delete coll;
	905	status = U_UNSUPPORTED_ERROR;
	906	return;
	907	}
51004dcb	908	}
340931cb	909	collatorPrimaryOnly_ = collator_->clone();
51004dcb A	910	if (collatorPrimaryOnly_ == NULL) {
51004dcb A	911	status = U_MEMORY_ALLOCATION_ERROR;
4388f060 A	912	return;
4388f060 A	913	}
51004dcb A	914	collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
	915	firstCharsInScripts_ = firstStringsInScript(status);
	916	if (U_FAILURE(status)) { return; }
	917	firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
51004dcb A	918	// Guard against a degenerate collator where
	919	// some script boundary strings are primary ignorable.
	920	for (;;) {
	921	if (U_FAILURE(status)) { return; }
	922	if (firstCharsInScripts_->isEmpty()) {
	923	// AlphabeticIndex requires some non-ignorable script boundary strings.
	924	status = U_ILLEGAL_ARGUMENT_ERROR;
	925	return;
4388f060	926	}
51004dcb A	927	if (collatorPrimaryOnly_->compare(
	928	static_cast<UnicodeString >(firstCharsInScripts_->elementAt(0)),
	929	emptyString_, status) == UCOL_EQUAL) {
	930	firstCharsInScripts_->removeElementAt(0);
	931	} else {
	932	break;
4388f060 A	933	}
4388f060 A	934	}
4388f060	935
57a6839d A	936	// Chinese index characters, which are specific to each of the several Chinese tailorings,
	937	// take precedence over the single locale data exemplar set per language.
	938	if (!addChineseIndexCharacters(status) && locale != NULL) {
51004dcb	939	addIndexExemplars(*locale, status);
4388f060	940	}
4388f060 A	941	}
	942
	943
	944	//
	945	// Comparison function for UVector<UnicodeString *> sorting with a collator.
	946	//
	947	static int32_t U_CALLCONV
51004dcb	948	collatorComparator(const void context, const void left, const void *right) {
4388f060 A	949	const UElement leftElement = static_cast<const UElement >(left);
	950	const UElement rightElement = static_cast<const UElement >(right);
	951	const UnicodeString leftString = static_cast<const UnicodeString >(leftElement->pointer);
	952	const UnicodeString rightString = static_cast<const UnicodeString >(rightElement->pointer);
4388f060 A	953
	954	if (leftString == rightString) {
	955	// Catches case where both are NULL
	956	return 0;
	957	}
	958	if (leftString == NULL) {
	959	return 1;
340931cb	960	}
4388f060 A	961	if (rightString == NULL) {
	962	return -1;
	963	}
51004dcb A	964	const Collator col = static_cast<const Collator >(context);
	965	UErrorCode errorCode = U_ZERO_ERROR;
	966	return col->compare(leftString, rightString, errorCode);
4388f060 A	967	}
	968
	969	//
	970	// Comparison function for UVector<Record *> sorting with a collator.
	971	//
	972	static int32_t U_CALLCONV
	973	recordCompareFn(const void context, const void left, const void *right) {
	974	const UElement leftElement = static_cast<const UElement >(left);
	975	const UElement rightElement = static_cast<const UElement >(right);
	976	const AlphabeticIndex::Record leftRec = static_cast<const AlphabeticIndex::Record >(leftElement->pointer);
	977	const AlphabeticIndex::Record rightRec = static_cast<const AlphabeticIndex::Record >(rightElement->pointer);
	978	const Collator col = static_cast<const Collator >(context);
51004dcb A	979	UErrorCode errorCode = U_ZERO_ERROR;
51004dcb A	980	return col->compare(leftRec->name_, rightRec->name_, errorCode);
4388f060 A	981	}
4388f060 A	982
4388f060 A	983	UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
	984	if (U_FAILURE(status)) {
	985	return NULL;
	986	}
b331163b A	987	LocalPointer<UVector> dest(new UVector(status), status);
b331163b A	988	if (U_FAILURE(status)) {
4388f060 A	989	return NULL;
	990	}
	991	dest->setDeleter(uprv_deleteUObject);
57a6839d A	992	// Fetch the script-first-primary contractions which are defined in the root collator.
	993	// They all start with U+FDD1.
	994	UnicodeSet set;
	995	collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
	996	if (U_FAILURE(status)) {
	997	return NULL;
	998	}
	999	if (set.isEmpty()) {
	1000	status = U_UNSUPPORTED_ERROR;
	1001	return NULL;
	1002	}
	1003	UnicodeSetIterator iter(set);
	1004	while (iter.next()) {
	1005	const UnicodeString &boundary = iter.getString();
	1006	uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
	1007	if ((gcMask & (U_GC_L_MASK \| U_GC_CN_MASK)) == 0) {
	1008	// Ignore boundaries for the special reordering groups.
	1009	// Take only those for "real scripts" (where the sample character is a Letter,
	1010	// and the one for unassigned implicit weights (Cn).
	1011	continue;
4388f060	1012	}
57a6839d A	1013	UnicodeString *s = new UnicodeString(boundary);
57a6839d A	1014	if (s == NULL) {
4388f060	1015	status = U_MEMORY_ALLOCATION_ERROR;
57a6839d	1016	return NULL;
4388f060	1017	}
57a6839d A	1018	dest->addElement(s, status);
	1019	}
	1020	return dest.orphan();
4388f060 A	1021	}
	1022
	1023
51004dcb	1024	namespace {
4388f060 A	1025
4388f060 A	1026	/**
51004dcb A	1027	* Returns true if one index character string is "better" than the other.
	1028	* Shorter NFKD is better, and otherwise NFKD-binary-less-than is
	1029	* better, and otherwise binary-less-than is better.
4388f060	1030	*/
51004dcb A	1031	UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
	1032	const UnicodeString &one, const UnicodeString &other) {
	1033	// This is called with primary-equal strings, but never with one.equals(other).
	1034	UErrorCode status = U_ZERO_ERROR;
	1035	UnicodeString n1 = nfkdNormalizer.normalize(one, status);
	1036	UnicodeString n2 = nfkdNormalizer.normalize(other, status);
	1037	if (U_FAILURE(status)) { return FALSE; }
	1038	int32_t result = n1.countChar32() - n2.countChar32();
4388f060	1039	if (result != 0) {
51004dcb	1040	return result < 0;
4388f060	1041	}
4388f060 A	1042	result = n1.compareCodePointOrder(n2);
4388f060 A	1043	if (result != 0) {
51004dcb	1044	return result < 0;
4388f060	1045	}
51004dcb	1046	return one.compareCodePointOrder(other) < 0;
4388f060 A	1047	}
4388f060 A	1048
51004dcb	1049	} // namespace
4388f060 A	1050
	1051	//
	1052	// Constructor & Destructor for AlphabeticIndex::Record
	1053	//
	1054	// Records are internal only, instances are not directly surfaced in the public API.
	1055	// This class is mostly struct-like, with all public fields.
	1056
51004dcb A	1057	AlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
	1058	: name_(name), data_(data) {}
	1059
4388f060 A	1060	AlphabeticIndex::Record::~Record() {
	1061	}
	1062
	1063
	1064	AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
	1065	if (U_FAILURE(status)) {
	1066	return *this;
	1067	}
51004dcb A	1068	if (inputList_ == NULL) {
	1069	inputList_ = new UVector(status);
	1070	if (inputList_ == NULL) {
	1071	status = U_MEMORY_ALLOCATION_ERROR;
	1072	return *this;
	1073	}
	1074	inputList_->setDeleter(alphaIndex_deleteRecord);
	1075	}
	1076	Record *r = new Record(name, data);
	1077	if (r == NULL) {
	1078	status = U_MEMORY_ALLOCATION_ERROR;
	1079	return *this;
	1080	}
	1081	inputList_->addElement(r, status);
	1082	clearBuckets();
4388f060 A	1083	//std::string ss;
	1084	//std::string ss2;
	1085	//std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" <<
	1086	// " sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
	1087	return *this;
	1088	}
	1089
	1090
	1091	AlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
51004dcb A	1092	if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
	1093	inputList_->removeAllElements();
	1094	clearBuckets();
4388f060	1095	}
4388f060 A	1096	return *this;
	1097	}
	1098
4388f060	1099	int32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
51004dcb	1100	initBuckets(status);
4388f060 A	1101	if (U_FAILURE(status)) {
	1102	return 0;
	1103	}
51004dcb	1104	return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
4388f060 A	1105	}
	1106
	1107
	1108	int32_t AlphabeticIndex::getBucketIndex() const {
	1109	return labelsIterIndex_;
	1110	}
	1111
	1112
	1113	UBool AlphabeticIndex::nextBucket(UErrorCode &status) {
	1114	if (U_FAILURE(status)) {
	1115	return FALSE;
	1116	}
51004dcb	1117	if (buckets_ == NULL && currentBucket_ != NULL) {
4388f060 A	1118	status = U_ENUM_OUT_OF_SYNC_ERROR;
	1119	return FALSE;
	1120	}
51004dcb	1121	initBuckets(status);
4388f060 A	1122	if (U_FAILURE(status)) {
	1123	return FALSE;
	1124	}
	1125	++labelsIterIndex_;
51004dcb A	1126	if (labelsIterIndex_ >= buckets_->getBucketCount()) {
51004dcb A	1127	labelsIterIndex_ = buckets_->getBucketCount();
4388f060 A	1128	return FALSE;
4388f060 A	1129	}
51004dcb	1130	currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
4388f060 A	1131	resetRecordIterator();
	1132	return TRUE;
	1133	}
	1134
	1135	const UnicodeString &AlphabeticIndex::getBucketLabel() const {
	1136	if (currentBucket_ != NULL) {
	1137	return currentBucket_->label_;
	1138	} else {
51004dcb	1139	return emptyString_;
4388f060 A	1140	}
	1141	}
	1142
	1143
	1144	UAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
	1145	if (currentBucket_ != NULL) {
	1146	return currentBucket_->labelType_;
	1147	} else {
	1148	return U_ALPHAINDEX_NORMAL;
	1149	}
	1150	}
	1151
	1152
	1153	int32_t AlphabeticIndex::getBucketRecordCount() const {
51004dcb	1154	if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
4388f060 A	1155	return currentBucket_->records_->size();
	1156	} else {
	1157	return 0;
	1158	}
	1159	}
	1160
	1161
	1162	AlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
	1163	if (U_FAILURE(status)) {
	1164	return *this;
	1165	}
51004dcb	1166	internalResetBucketIterator();
4388f060 A	1167	return *this;
	1168	}
	1169
	1170
	1171	UBool AlphabeticIndex::nextRecord(UErrorCode &status) {
	1172	if (U_FAILURE(status)) {
	1173	return FALSE;
	1174	}
	1175	if (currentBucket_ == NULL) {
	1176	// We are trying to iterate over the items in a bucket, but there is no
	1177	// current bucket from the enumeration of buckets.
	1178	status = U_INVALID_STATE_ERROR;
	1179	return FALSE;
	1180	}
51004dcb	1181	if (buckets_ == NULL) {
4388f060 A	1182	status = U_ENUM_OUT_OF_SYNC_ERROR;
	1183	return FALSE;
	1184	}
51004dcb A	1185	if (currentBucket_->records_ == NULL) {
	1186	return FALSE;
	1187	}
4388f060 A	1188	++itemsIterIndex_;
	1189	if (itemsIterIndex_ >= currentBucket_->records_->size()) {
	1190	itemsIterIndex_ = currentBucket_->records_->size();
	1191	return FALSE;
	1192	}
	1193	return TRUE;
	1194	}
	1195
	1196
	1197	const UnicodeString &AlphabeticIndex::getRecordName() const {
51004dcb A	1198	const UnicodeString *retStr = &emptyString_;
51004dcb A	1199	if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
4388f060 A	1200	itemsIterIndex_ >= 0 &&
	1201	itemsIterIndex_ < currentBucket_->records_->size()) {
	1202	Record item = static_cast<Record >(currentBucket_->records_->elementAt(itemsIterIndex_));
	1203	retStr = &item->name_;
	1204	}
	1205	return *retStr;
	1206	}
	1207
	1208	const void *AlphabeticIndex::getRecordData() const {
	1209	const void *retPtr = NULL;
51004dcb	1210	if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
4388f060 A	1211	itemsIterIndex_ >= 0 &&
	1212	itemsIterIndex_ < currentBucket_->records_->size()) {
	1213	Record item = static_cast<Record >(currentBucket_->records_->elementAt(itemsIterIndex_));
	1214	retPtr = item->data_;
	1215	}
	1216	return retPtr;
	1217	}
	1218
	1219
	1220	AlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
	1221	itemsIterIndex_ = -1;
	1222	return *this;
	1223	}
	1224
	1225
	1226
	1227	AlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
	1228	const UnicodeString &lowerBoundary,
51004dcb A	1229	UAlphabeticIndexLabelType type)
	1230	: label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
	1231	displayBucket_(NULL), displayIndex_(-1),
	1232	records_(NULL) {
4388f060 A	1233	}
	1234
	1235
	1236	AlphabeticIndex::Bucket::~Bucket() {
	1237	delete records_;
	1238	}
	1239
	1240	U_NAMESPACE_END
	1241
57a6839d	1242	#endif // !UCONFIG_NO_COLLATION