]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/alphaindex.cpp
ICU-551.24.tar.gz
[apple/icu.git] / icuSources / i18n / alphaindex.cpp
CommitLineData
4388f060
A
1/*
2*******************************************************************************
57a6839d 3* Copyright (C) 2009-2014, International Business Machines Corporation and
51004dcb 4* others. All Rights Reserved.
4388f060
A
5*******************************************************************************
6*/
7
4388f060
A
8#include "unicode/utypes.h"
9
57a6839d 10#if !UCONFIG_NO_COLLATION
4388f060
A
11
12#include "unicode/alphaindex.h"
13#include "unicode/coll.h"
51004dcb 14#include "unicode/localpointer.h"
4388f060 15#include "unicode/normalizer2.h"
4388f060 16#include "unicode/tblcoll.h"
57a6839d 17#include "unicode/uchar.h"
4388f060
A
18#include "unicode/ulocdata.h"
19#include "unicode/uniset.h"
20#include "unicode/uobject.h"
4388f060 21#include "unicode/usetiter.h"
4388f060
A
22#include "unicode/utf16.h"
23
51004dcb 24#include "cmemory.h"
4388f060 25#include "cstring.h"
4388f060 26#include "uassert.h"
4388f060 27#include "uvector.h"
57a6839d 28#include "uvectr64.h"
4388f060
A
29
30//#include <string>
31//#include <iostream>
51004dcb 32
4388f060
A
33U_NAMESPACE_BEGIN
34
51004dcb 35namespace {
4388f060 36
51004dcb
A
37/**
38 * Prefix string for Chinese index buckets.
39 * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
40 */
41const UChar BASE[1] = { 0xFDD0 };
42const int32_t BASE_LENGTH = 1;
43
44UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
45 const UnicodeString &one, const UnicodeString &other);
46
47} // namespace
4388f060
A
48
49static int32_t U_CALLCONV
51004dcb 50collatorComparator(const void *context, const void *left, const void *right);
4388f060
A
51
52static int32_t U_CALLCONV
53recordCompareFn(const void *context, const void *left, const void *right);
54
4388f060
A
55// UVector<Record *> support function, delete a Record.
56static void U_CALLCONV
57alphaIndex_deleteRecord(void *obj) {
58 delete static_cast<AlphabeticIndex::Record *>(obj);
59}
60
51004dcb 61namespace {
4388f060 62
51004dcb
A
63UnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
64 UErrorCode &errorCode) {
65 if (U_FAILURE(errorCode)) { return NULL; }
66 if (owned.isValid()) {
67 return owned.orphan();
68 }
69 UnicodeString *p = new UnicodeString(s);
70 if (p == NULL) {
71 errorCode = U_MEMORY_ALLOCATION_ERROR;
72 }
73 return p;
74}
4388f060 75
51004dcb
A
76inline UnicodeString *getString(const UVector &list, int32_t i) {
77 return static_cast<UnicodeString *>(list[i]);
78}
4388f060 79
51004dcb
A
80inline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
81 return static_cast<AlphabeticIndex::Bucket *>(list[i]);
82}
4388f060 83
51004dcb
A
84inline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
85 return static_cast<AlphabeticIndex::Record *>(list[i]);
86}
87
88/**
89 * Like Java Collections.binarySearch(List, String, Comparator).
90 *
91 * @return the index>=0 where the item was found,
92 * or the index<0 for inserting the string at ~index in sorted order
93 */
94int32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
95 if (list.size() == 0) { return ~0; }
96 int32_t start = 0;
97 int32_t limit = list.size();
98 for (;;) {
99 int32_t i = (start + limit) / 2;
100 const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i));
101 UErrorCode errorCode = U_ZERO_ERROR;
102 UCollationResult cmp = coll.compare(s, *si, errorCode);
103 if (cmp == UCOL_EQUAL) {
104 return i;
105 } else if (cmp < 0) {
106 if (i == start) {
107 return ~start; // insert s before *si
108 }
109 limit = i;
110 } else {
111 if (i == start) {
112 return ~(start + 1); // insert s after *si
113 }
114 start = i;
115 }
4388f060
A
116 }
117}
118
51004dcb
A
119} // namespace
120
121// The BucketList is not in the anonymous namespace because only Clang
122// seems to support its use in other classes from there.
123// However, we also don't need U_I18N_API because it is not used from outside the i18n library.
124class BucketList : public UObject {
125public:
126 BucketList(UVector *bucketList, UVector *publicBucketList)
127 : bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
128 int32_t displayIndex = 0;
129 for (int32_t i = 0; i < publicBucketList->size(); ++i) {
130 getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
131 }
132 }
4388f060 133
51004dcb
A
134 // The virtual destructor must not be inline.
135 // See ticket #8454 for details.
136 virtual ~BucketList();
137
138 int32_t getBucketCount() const {
139 return immutableVisibleList_->size();
4388f060 140 }
4388f060 141
51004dcb
A
142 int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
143 UErrorCode &errorCode) {
144 // binary search
145 int32_t start = 0;
146 int32_t limit = bucketList_->size();
147 while ((start + 1) < limit) {
148 int32_t i = (start + limit) / 2;
149 const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i);
150 UCollationResult nameVsBucket =
151 collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
152 if (nameVsBucket < 0) {
153 limit = i;
154 } else {
155 start = i;
156 }
157 }
158 const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start);
159 if (bucket->displayBucket_ != NULL) {
160 bucket = bucket->displayBucket_;
161 }
162 return bucket->displayIndex_;
4388f060 163 }
51004dcb
A
164
165 /** All of the buckets, visible and invisible. */
166 UVector *bucketList_;
167 /** Just the visible buckets. */
168 UVector *immutableVisibleList_;
169};
170
171BucketList::~BucketList() {
172 delete bucketList_;
173 if (immutableVisibleList_ != bucketList_) {
174 delete immutableVisibleList_;
4388f060 175 }
51004dcb
A
176}
177
178AlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
179 delete buckets_;
180 delete collatorPrimaryOnly_;
181}
182
183int32_t
184AlphabeticIndex::ImmutableIndex::getBucketCount() const {
185 return buckets_->getBucketCount();
186}
187
188int32_t
189AlphabeticIndex::ImmutableIndex::getBucketIndex(
190 const UnicodeString &name, UErrorCode &errorCode) const {
191 return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
192}
193
194const AlphabeticIndex::Bucket *
195AlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
196 if (0 <= index && index < buckets_->getBucketCount()) {
197 return icu::getBucket(*buckets_->immutableVisibleList_, index);
198 } else {
199 return NULL;
4388f060 200 }
4388f060
A
201}
202
51004dcb
A
203AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
204 : inputList_(NULL),
205 labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
206 maxLabelCount_(99),
207 initialLabels_(NULL), firstCharsInScripts_(NULL),
208 collator_(NULL), collatorPrimaryOnly_(NULL),
209 buckets_(NULL) {
210 init(&locale, status);
211}
212
213
214AlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
215 : inputList_(NULL),
216 labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
217 maxLabelCount_(99),
218 initialLabels_(NULL), firstCharsInScripts_(NULL),
219 collator_(collator), collatorPrimaryOnly_(NULL),
220 buckets_(NULL) {
221 init(NULL, status);
222}
223
224
4388f060
A
225
226AlphabeticIndex::~AlphabeticIndex() {
4388f060
A
227 delete collator_;
228 delete collatorPrimaryOnly_;
51004dcb
A
229 delete firstCharsInScripts_;
230 delete buckets_;
231 delete inputList_;
4388f060
A
232 delete initialLabels_;
233}
234
235
236AlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
237 if (U_FAILURE(status)) {
238 return *this;
239 }
240 initialLabels_->addAll(additions);
51004dcb 241 clearBuckets();
4388f060
A
242 return *this;
243}
244
245
246AlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
51004dcb
A
247 addIndexExemplars(locale, status);
248 clearBuckets();
4388f060
A
249 return *this;
250}
251
252
51004dcb
A
253AlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
254 if (U_FAILURE(errorCode)) { return NULL; }
255 // In C++, the ImmutableIndex must own its copy of the BucketList,
256 // even if it contains no records, for proper memory management.
257 // We could clone the buckets_ if they are not NULL,
258 // but that would be worth it only if this method is called multiple times,
259 // or called after using the old-style bucket iterator API.
260 LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
261 LocalPointer<RuleBasedCollator> coll(
262 static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone()));
263 if (immutableBucketList.isNull() || coll.isNull()) {
264 errorCode = U_MEMORY_ALLOCATION_ERROR;
265 return NULL;
266 }
267 ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
268 if (immIndex == NULL) {
269 errorCode = U_MEMORY_ALLOCATION_ERROR;
270 return NULL;
271 }
272 // The ImmutableIndex adopted its parameter objects.
273 immutableBucketList.orphan();
274 coll.orphan();
275 return immIndex;
276}
277
4388f060 278int32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
51004dcb 279 initBuckets(status);
4388f060
A
280 if (U_FAILURE(status)) {
281 return 0;
282 }
51004dcb 283 return buckets_->getBucketCount();
4388f060
A
284}
285
286
287int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
51004dcb 288 if (U_FAILURE(status) || inputList_ == NULL) {
4388f060
A
289 return 0;
290 }
51004dcb 291 return inputList_->size();
4388f060
A
292}
293
51004dcb
A
294void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
295 const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
296 if (U_FAILURE(errorCode)) { return; }
297
298 const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
299 const UnicodeString &overflowBoundary =
300 *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);
301
302 // We make a sorted array of elements.
303 // Some of the input may be redundant.
304 // That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
305 // We filter out those cases.
306 UnicodeSetIterator iter(*initialLabels_);
307 while (iter.next()) {
308 const UnicodeString *item = &iter.getString();
309 LocalPointer<UnicodeString> ownedItem;
310 UBool checkDistinct;
311 int32_t itemLength = item->length();
312 if (!item->hasMoreChar32Than(0, itemLength, 1)) {
313 checkDistinct = FALSE;
314 } else if(item->charAt(itemLength - 1) == 0x2a && // '*'
315 item->charAt(itemLength - 2) != 0x2a) {
316 // Use a label if it is marked with one trailing star,
317 // even if the label string sorts the same when all contractions are suppressed.
318 ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
319 item = ownedItem.getAlias();
320 if (item == NULL) {
321 errorCode = U_MEMORY_ALLOCATION_ERROR;
322 return;
4388f060 323 }
51004dcb 324 checkDistinct = FALSE;
4388f060 325 } else {
51004dcb
A
326 checkDistinct = TRUE;
327 }
328 if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
329 // Ignore a primary-ignorable or non-alphabetic index character.
330 } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
57a6839d 331 // Ignore an index character that will land in the overflow bucket.
51004dcb
A
332 } else if (checkDistinct &&
333 collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
334 // Ignore a multi-code point index character that does not sort distinctly
335 // from the sequence of its separate characters.
336 } else {
337 int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
338 if (insertionPoint < 0) {
339 indexCharacters.insertElementAt(
340 ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
341 } else {
342 const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
343 if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
344 indexCharacters.setElementAt(
345 ownedString(*item, ownedItem, errorCode), insertionPoint);
346 }
347 }
4388f060
A
348 }
349 }
51004dcb 350 if (U_FAILURE(errorCode)) { return; }
4388f060 351
b331163b 352 // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
4388f060 353
51004dcb 354 int32_t size = indexCharacters.size() - 1;
4388f060 355 if (size > maxLabelCount_) {
4388f060
A
356 int32_t count = 0;
357 int32_t old = -1;
51004dcb 358 for (int32_t i = 0; i < indexCharacters.size();) {
4388f060 359 ++count;
51004dcb 360 int32_t bump = count * maxLabelCount_ / size;
4388f060 361 if (bump == old) {
51004dcb 362 indexCharacters.removeElementAt(i);
4388f060 363 } else {
4388f060 364 old = bump;
51004dcb 365 ++i;
4388f060
A
366 }
367 }
4388f060 368 }
51004dcb 369}
4388f060 370
51004dcb
A
371namespace {
372
373const UnicodeString &fixLabel(const UnicodeString &current, UnicodeString &temp) {
374 if (!current.startsWith(BASE, BASE_LENGTH)) {
375 return current;
376 }
377 UChar rest = current.charAt(BASE_LENGTH);
378 if (0x2800 < rest && rest <= 0x28FF) { // stroke count
379 int32_t count = rest-0x2800;
380 temp.setTo((UChar)(0x30 + count % 10));
381 if (count >= 10) {
382 count /= 10;
383 temp.insert(0, (UChar)(0x30 + count % 10));
384 if (count >= 10) {
385 count /= 10;
386 temp.insert(0, (UChar)(0x30 + count));
387 }
388 }
389 return temp.append((UChar)0x5283);
390 }
391 return temp.setTo(current, BASE_LENGTH);
4388f060
A
392}
393
51004dcb 394UBool hasMultiplePrimaryWeights(
57a6839d
A
395 const RuleBasedCollator &coll, uint32_t variableTop,
396 const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) {
397 ces.removeAllElements();
398 coll.internalGetCEs(s, ces, errorCode);
399 if (U_FAILURE(errorCode)) { return FALSE; }
51004dcb 400 UBool seenPrimary = FALSE;
57a6839d
A
401 for (int32_t i = 0; i < ces.size(); ++i) {
402 int64_t ce = ces.elementAti(i);
403 uint32_t p = (uint32_t)(ce >> 32);
404 if (p > variableTop) {
405 // not primary ignorable
51004dcb
A
406 if (seenPrimary) {
407 return TRUE;
4388f060 408 }
51004dcb 409 seenPrimary = TRUE;
4388f060 410 }
51004dcb
A
411 }
412 return FALSE;
4388f060
A
413}
414
51004dcb 415} // namespace
4388f060 416
51004dcb
A
417BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
418 // Initialize indexCharacters.
419 UVector indexCharacters(errorCode);
420 indexCharacters.setDeleter(uprv_deleteUObject);
421 initLabels(indexCharacters, errorCode);
422 if (U_FAILURE(errorCode)) { return NULL; }
423
424 // Variables for hasMultiplePrimaryWeights().
57a6839d
A
425 UVector64 ces(errorCode);
426 uint32_t variableTop;
51004dcb 427 if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
57a6839d 428 variableTop = collatorPrimaryOnly_->getVariableTop(errorCode);
51004dcb
A
429 } else {
430 variableTop = 0;
431 }
432 UBool hasInvisibleBuckets = FALSE;
433
434 // Helper arrays for Chinese Pinyin collation.
435 Bucket *asciiBuckets[26] = {
436 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
437 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
438 };
439 Bucket *pinyinBuckets[26] = {
440 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
441 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
442 };
443 UBool hasPinyin = FALSE;
444
b331163b
A
445 LocalPointer<UVector> bucketList(new UVector(errorCode), errorCode);
446 if (U_FAILURE(errorCode)) {
51004dcb
A
447 return NULL;
448 }
449 bucketList->setDeleter(uprv_deleteUObject);
450
451 // underflow bucket
452 Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
453 if (bucket == NULL) {
454 errorCode = U_MEMORY_ALLOCATION_ERROR;
455 return NULL;
456 }
457 bucketList->addElement(bucket, errorCode);
458 if (U_FAILURE(errorCode)) { return NULL; }
459
460 UnicodeString temp;
461
462 // fix up the list, adding underflow, additions, overflow
463 // Insert inflow labels as needed.
464 int32_t scriptIndex = -1;
465 const UnicodeString *scriptUpperBoundary = &emptyString_;
466 for (int32_t i = 0; i < indexCharacters.size(); ++i) {
467 UnicodeString &current = *getString(indexCharacters, i);
468 if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
469 // We crossed the script boundary into a new script.
470 const UnicodeString &inflowBoundary = *scriptUpperBoundary;
471 UBool skippedScript = FALSE;
472 for (;;) {
473 scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
474 if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
475 break;
476 }
477 skippedScript = TRUE;
478 }
479 if (skippedScript && bucketList->size() > 1) {
480 // We are skipping one or more scripts,
481 // and we are not just getting out of the underflow label.
482 bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
483 if (bucket == NULL) {
484 errorCode = U_MEMORY_ALLOCATION_ERROR;
485 return NULL;
486 }
487 bucketList->addElement(bucket, errorCode);
488 }
489 }
490 // Add a bucket with the current label.
491 bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
492 if (bucket == NULL) {
493 errorCode = U_MEMORY_ALLOCATION_ERROR;
494 return NULL;
495 }
496 bucketList->addElement(bucket, errorCode);
497 // Remember ASCII and Pinyin buckets for Pinyin redirects.
498 UChar c;
499 if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) { // A-Z
500 asciiBuckets[c - 0x41] = bucket;
501 } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
502 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
503 pinyinBuckets[c - 0x41] = bucket;
504 hasPinyin = TRUE;
505 }
506 // Check for multiple primary weights.
507 if (!current.startsWith(BASE, BASE_LENGTH) &&
57a6839d
A
508 hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current,
509 ces, errorCode) &&
51004dcb
A
510 current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
511 // "AE-ligature" or "Sch" etc.
512 for (int32_t i = bucketList->size() - 2;; --i) {
513 Bucket *singleBucket = getBucket(*bucketList, i);
514 if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
515 // There is no single-character bucket since the last
516 // underflow or inflow label.
517 break;
518 }
519 if (singleBucket->displayBucket_ == NULL &&
57a6839d
A
520 !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop,
521 singleBucket->lowerBoundary_,
522 ces, errorCode)) {
51004dcb
A
523 // Add an invisible bucket that redirects strings greater than the expansion
524 // to the previous single-character bucket.
525 // For example, after ... Q R S Sch we add Sch\uFFFF->S
526 // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
527 bucket = new Bucket(emptyString_,
528 UnicodeString(current).append((UChar)0xFFFF),
529 U_ALPHAINDEX_NORMAL);
530 if (bucket == NULL) {
531 errorCode = U_MEMORY_ALLOCATION_ERROR;
532 return NULL;
533 }
534 bucket->displayBucket_ = singleBucket;
535 bucketList->addElement(bucket, errorCode);
536 hasInvisibleBuckets = TRUE;
537 break;
538 }
539 }
540 }
541 }
542 if (U_FAILURE(errorCode)) { return NULL; }
543 if (bucketList->size() == 1) {
544 // No real labels, show only the underflow label.
545 BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
546 if (bl == NULL) {
547 errorCode = U_MEMORY_ALLOCATION_ERROR;
548 return NULL;
549 }
550 bucketList.orphan();
551 return bl;
552 }
553 // overflow bucket
554 bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
555 if (bucket == NULL) {
556 errorCode = U_MEMORY_ALLOCATION_ERROR;
557 return NULL;
558 }
559 bucketList->addElement(bucket, errorCode); // final
560
561 if (hasPinyin) {
562 // Redirect Pinyin buckets.
563 Bucket *asciiBucket = NULL;
564 for (int32_t i = 0; i < 26; ++i) {
565 if (asciiBuckets[i] != NULL) {
566 asciiBucket = asciiBuckets[i];
567 }
568 if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
569 pinyinBuckets[i]->displayBucket_ = asciiBucket;
570 hasInvisibleBuckets = TRUE;
571 }
572 }
573 }
574
575 if (U_FAILURE(errorCode)) { return NULL; }
576 if (!hasInvisibleBuckets) {
577 BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
578 if (bl == NULL) {
579 errorCode = U_MEMORY_ALLOCATION_ERROR;
580 return NULL;
581 }
582 bucketList.orphan();
583 return bl;
584 }
585 // Merge inflow buckets that are visually adjacent.
586 // Iterate backwards: Merge inflow into overflow rather than the other way around.
587 int32_t i = bucketList->size() - 1;
588 Bucket *nextBucket = getBucket(*bucketList, i);
589 while (--i > 0) {
590 bucket = getBucket(*bucketList, i);
591 if (bucket->displayBucket_ != NULL) {
592 continue; // skip invisible buckets
593 }
594 if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
595 if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
596 bucket->displayBucket_ = nextBucket;
597 continue;
598 }
599 }
600 nextBucket = bucket;
601 }
602
b331163b
A
603 LocalPointer<UVector> publicBucketList(new UVector(errorCode), errorCode);
604 if (U_FAILURE(errorCode)) {
51004dcb
A
605 return NULL;
606 }
607 // Do not call publicBucketList->setDeleter():
608 // This vector shares its objects with the bucketList.
609 for (int32_t i = 0; i < bucketList->size(); ++i) {
610 bucket = getBucket(*bucketList, i);
611 if (bucket->displayBucket_ == NULL) {
612 publicBucketList->addElement(bucket, errorCode);
613 }
614 }
615 if (U_FAILURE(errorCode)) { return NULL; }
616 BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
617 if (bl == NULL) {
618 errorCode = U_MEMORY_ALLOCATION_ERROR;
619 return NULL;
620 }
621 bucketList.orphan();
622 publicBucketList.orphan();
623 return bl;
624}
625
626/**
627 * Creates an index, and buckets and sorts the list of records into the index.
628 */
629void AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
630 if (U_FAILURE(errorCode) || buckets_ != NULL) {
631 return;
632 }
633 buckets_ = createBucketList(errorCode);
634 if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) {
4388f060
A
635 return;
636 }
637
51004dcb
A
638 // Sort the records by name.
639 // Stable sort preserves input order of collation duplicates.
640 inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
641
642 // Now, we traverse all of the input, which is now sorted.
643 // If the item doesn't go in the current bucket, we find the next bucket that contains it.
644 // This makes the process order n*log(n), since we just sort the list and then do a linear process.
645 // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
646 // we need to improve it for that case.
647
648 Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0);
649 int32_t bucketIndex = 1;
650 Bucket *nextBucket;
651 const UnicodeString *upperBoundary;
652 if (bucketIndex < buckets_->bucketList_->size()) {
653 nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
654 upperBoundary = &nextBucket->lowerBoundary_;
655 } else {
656 nextBucket = NULL;
657 upperBoundary = NULL;
658 }
659 for (int32_t i = 0; i < inputList_->size(); ++i) {
660 Record *r = getRecord(*inputList_, i);
661 // if the current bucket isn't the right one, find the one that is
662 // We have a special flag for the last bucket so that we don't look any further
663 while (upperBoundary != NULL &&
664 collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
665 currentBucket = nextBucket;
666 // now reset the boundary that we compare against
667 if (bucketIndex < buckets_->bucketList_->size()) {
668 nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
669 upperBoundary = &nextBucket->lowerBoundary_;
4388f060 670 } else {
51004dcb 671 upperBoundary = NULL;
4388f060 672 }
4388f060 673 }
51004dcb
A
674 // now put the record into the bucket.
675 Bucket *bucket = currentBucket;
676 if (bucket->displayBucket_ != NULL) {
677 bucket = bucket->displayBucket_;
678 }
679 if (bucket->records_ == NULL) {
680 bucket->records_ = new UVector(errorCode);
681 if (bucket->records_ == NULL) {
682 errorCode = U_MEMORY_ALLOCATION_ERROR;
683 return;
684 }
685 }
686 bucket->records_->addElement(r, errorCode);
4388f060 687 }
51004dcb 688}
4388f060 689
51004dcb
A
690void AlphabeticIndex::clearBuckets() {
691 if (buckets_ != NULL) {
692 delete buckets_;
693 buckets_ = NULL;
694 internalResetBucketIterator();
695 }
4388f060
A
696}
697
51004dcb
A
698void AlphabeticIndex::internalResetBucketIterator() {
699 labelsIterIndex_ = -1;
700 currentBucket_ = NULL;
701}
4388f060 702
51004dcb
A
703
704void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
51004dcb 705 LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
4388f060
A
706 if (U_FAILURE(status)) {
707 return;
708 }
709
4388f060
A
710 UnicodeSet exemplars;
711 ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
712 if (U_SUCCESS(status)) {
51004dcb 713 initialLabels_->addAll(exemplars);
4388f060
A
714 return;
715 }
716 status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR
717
51004dcb 718 // The locale data did not include explicit Index characters.
4388f060 719 // Synthesize a set of them from the locale's standard exemplar characters.
4388f060
A
720 ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
721 if (U_FAILURE(status)) {
722 return;
723 }
724
51004dcb
A
725 // question: should we add auxiliary exemplars?
726 if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
727 exemplars.add(0x61, 0x7A);
728 }
729 if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables
730 // cut down to small list
731 exemplars.remove(0xAC00, 0xD7A3).
732 add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
733 add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
734 add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
735 add(0xD30C).add(0xD558);
736 }
737 if (exemplars.containsSome(0x1200, 0x137F)) { // Ethiopic block
738 // cut down to small list
739 // make use of the fact that Ethiopic is allocated in 8's, where
740 // the base is 0 mod 8.
741 UnicodeSet ethiopic(
742 UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
743 UnicodeSetIterator it(ethiopic);
744 while (it.next() && !it.isString()) {
745 if ((it.getCodepoint() & 0x7) != 0) {
746 exemplars.remove(it.getCodepoint());
747 }
748 }
749 }
750
4388f060
A
751 // Upper-case any that aren't already so.
752 // (We only do this for synthesized index characters.)
4388f060
A
753 UnicodeSetIterator it(exemplars);
754 UnicodeString upperC;
4388f060
A
755 while (it.next()) {
756 const UnicodeString &exemplarC = it.getString();
757 upperC = exemplarC;
758 upperC.toUpper(locale);
51004dcb 759 initialLabels_->add(upperC);
4388f060 760 }
51004dcb 761}
4388f060 762
51004dcb
A
763UBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
764 UnicodeSet contractions;
57a6839d
A
765 collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode);
766 if (U_FAILURE(errorCode) || contractions.isEmpty()) { return FALSE; }
767 initialLabels_->addAll(contractions);
51004dcb
A
768 UnicodeSetIterator iter(contractions);
769 while (iter.next()) {
770 const UnicodeString &s = iter.getString();
57a6839d
A
771 U_ASSERT (s.startsWith(BASE, BASE_LENGTH));
772 UChar c = s.charAt(s.length() - 1);
773 if (0x41 <= c && c <= 0x5A) { // A-Z
774 // There are Pinyin labels, add ASCII A-Z labels as well.
775 initialLabels_->add(0x41, 0x5A); // A-Z
776 break;
4388f060
A
777 }
778 }
57a6839d 779 return TRUE;
4388f060
A
780}
781
782
783/*
784 * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
785 */
51004dcb 786static const UChar CGJ = 0x034F;
4388f060
A
787UnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
788 UnicodeString result;
789 if (item.length() == 0) {
790 return result;
791 }
792 int32_t i = 0;
793 for (;;) {
794 UChar32 cp = item.char32At(i);
795 result.append(cp);
796 i = item.moveIndex32(i, 1);
797 if (i >= item.length()) {
798 break;
799 }
800 result.append(CGJ);
801 }
802 return result;
803}
804
805
806UBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
807 return FALSE;
808}
809
810
811UBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
812 return FALSE;
813}
814
815
816const RuleBasedCollator &AlphabeticIndex::getCollator() const {
57a6839d 817 return *collator_;
4388f060
A
818}
819
820
821const UnicodeString &AlphabeticIndex::getInflowLabel() const {
822 return inflowLabel_;
823}
824
825const UnicodeString &AlphabeticIndex::getOverflowLabel() const {
826 return overflowLabel_;
827}
828
829
830const UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
831 return underflowLabel_;
832}
833
834
835AlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
836 inflowLabel_ = label;
51004dcb 837 clearBuckets();
4388f060
A
838 return *this;
839}
840
841
842AlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
843 overflowLabel_ = label;
51004dcb 844 clearBuckets();
4388f060
A
845 return *this;
846}
847
848
849AlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
850 underflowLabel_ = label;
51004dcb 851 clearBuckets();
4388f060
A
852 return *this;
853}
854
855
856int32_t AlphabeticIndex::getMaxLabelCount() const {
857 return maxLabelCount_;
858}
859
860
861AlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
862 if (U_FAILURE(status)) {
863 return *this;
864 }
865 if (maxLabelCount <= 0) {
866 status = U_ILLEGAL_ARGUMENT_ERROR;
867 return *this;
868 }
869 maxLabelCount_ = maxLabelCount;
51004dcb 870 clearBuckets();
4388f060
A
871 return *this;
872}
873
874
4388f060
A
875//
876// init() - Common code for constructors.
877//
878
51004dcb
A
879void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
880 if (U_FAILURE(status)) { return; }
881 if (locale == NULL && collator_ == NULL) {
882 status = U_ILLEGAL_ARGUMENT_ERROR;
4388f060
A
883 return;
884 }
51004dcb 885
4388f060 886 initialLabels_ = new UnicodeSet();
51004dcb
A
887 if (initialLabels_ == NULL) {
888 status = U_MEMORY_ALLOCATION_ERROR;
889 return;
890 }
4388f060 891
51004dcb 892 inflowLabel_.setTo((UChar)0x2026); // Ellipsis
4388f060
A
893 overflowLabel_ = inflowLabel_;
894 underflowLabel_ = inflowLabel_;
895
51004dcb 896 if (collator_ == NULL) {
57a6839d
A
897 Collator *coll = Collator::createInstance(*locale, status);
898 if (U_FAILURE(status)) {
899 delete coll;
900 return;
901 }
902 if (coll == NULL) {
51004dcb
A
903 status = U_MEMORY_ALLOCATION_ERROR;
904 return;
905 }
57a6839d
A
906 collator_ = dynamic_cast<RuleBasedCollator *>(coll);
907 if (collator_ == NULL) {
908 delete coll;
909 status = U_UNSUPPORTED_ERROR;
910 return;
911 }
51004dcb
A
912 }
913 collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
914 if (collatorPrimaryOnly_ == NULL) {
915 status = U_MEMORY_ALLOCATION_ERROR;
4388f060
A
916 return;
917 }
51004dcb
A
918 collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
919 firstCharsInScripts_ = firstStringsInScript(status);
920 if (U_FAILURE(status)) { return; }
921 firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
51004dcb
A
922 // Guard against a degenerate collator where
923 // some script boundary strings are primary ignorable.
924 for (;;) {
925 if (U_FAILURE(status)) { return; }
926 if (firstCharsInScripts_->isEmpty()) {
927 // AlphabeticIndex requires some non-ignorable script boundary strings.
928 status = U_ILLEGAL_ARGUMENT_ERROR;
929 return;
4388f060 930 }
51004dcb
A
931 if (collatorPrimaryOnly_->compare(
932 *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
933 emptyString_, status) == UCOL_EQUAL) {
934 firstCharsInScripts_->removeElementAt(0);
935 } else {
936 break;
4388f060
A
937 }
938 }
4388f060 939
57a6839d
A
940 // Chinese index characters, which are specific to each of the several Chinese tailorings,
941 // take precedence over the single locale data exemplar set per language.
942 if (!addChineseIndexCharacters(status) && locale != NULL) {
51004dcb 943 addIndexExemplars(*locale, status);
4388f060 944 }
4388f060
A
945}
946
947
948//
949// Comparison function for UVector<UnicodeString *> sorting with a collator.
950//
951static int32_t U_CALLCONV
51004dcb 952collatorComparator(const void *context, const void *left, const void *right) {
4388f060
A
953 const UElement *leftElement = static_cast<const UElement *>(left);
954 const UElement *rightElement = static_cast<const UElement *>(right);
955 const UnicodeString *leftString = static_cast<const UnicodeString *>(leftElement->pointer);
956 const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer);
4388f060
A
957
958 if (leftString == rightString) {
959 // Catches case where both are NULL
960 return 0;
961 }
962 if (leftString == NULL) {
963 return 1;
964 };
965 if (rightString == NULL) {
966 return -1;
967 }
51004dcb
A
968 const Collator *col = static_cast<const Collator *>(context);
969 UErrorCode errorCode = U_ZERO_ERROR;
970 return col->compare(*leftString, *rightString, errorCode);
4388f060
A
971}
972
973//
974// Comparison function for UVector<Record *> sorting with a collator.
975//
976static int32_t U_CALLCONV
977recordCompareFn(const void *context, const void *left, const void *right) {
978 const UElement *leftElement = static_cast<const UElement *>(left);
979 const UElement *rightElement = static_cast<const UElement *>(right);
980 const AlphabeticIndex::Record *leftRec = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer);
981 const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer);
982 const Collator *col = static_cast<const Collator *>(context);
51004dcb
A
983 UErrorCode errorCode = U_ZERO_ERROR;
984 return col->compare(leftRec->name_, rightRec->name_, errorCode);
4388f060
A
985}
986
4388f060
A
987UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
988 if (U_FAILURE(status)) {
989 return NULL;
990 }
b331163b
A
991 LocalPointer<UVector> dest(new UVector(status), status);
992 if (U_FAILURE(status)) {
4388f060
A
993 return NULL;
994 }
995 dest->setDeleter(uprv_deleteUObject);
57a6839d
A
996 // Fetch the script-first-primary contractions which are defined in the root collator.
997 // They all start with U+FDD1.
998 UnicodeSet set;
999 collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
1000 if (U_FAILURE(status)) {
1001 return NULL;
1002 }
1003 if (set.isEmpty()) {
1004 status = U_UNSUPPORTED_ERROR;
1005 return NULL;
1006 }
1007 UnicodeSetIterator iter(set);
1008 while (iter.next()) {
1009 const UnicodeString &boundary = iter.getString();
1010 uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
1011 if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
1012 // Ignore boundaries for the special reordering groups.
1013 // Take only those for "real scripts" (where the sample character is a Letter,
1014 // and the one for unassigned implicit weights (Cn).
1015 continue;
4388f060 1016 }
57a6839d
A
1017 UnicodeString *s = new UnicodeString(boundary);
1018 if (s == NULL) {
4388f060 1019 status = U_MEMORY_ALLOCATION_ERROR;
57a6839d 1020 return NULL;
4388f060 1021 }
57a6839d
A
1022 dest->addElement(s, status);
1023 }
1024 return dest.orphan();
4388f060
A
1025}
1026
1027
51004dcb 1028namespace {
4388f060
A
1029
1030/**
51004dcb
A
1031 * Returns true if one index character string is "better" than the other.
1032 * Shorter NFKD is better, and otherwise NFKD-binary-less-than is
1033 * better, and otherwise binary-less-than is better.
4388f060 1034 */
51004dcb
A
1035UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
1036 const UnicodeString &one, const UnicodeString &other) {
1037 // This is called with primary-equal strings, but never with one.equals(other).
1038 UErrorCode status = U_ZERO_ERROR;
1039 UnicodeString n1 = nfkdNormalizer.normalize(one, status);
1040 UnicodeString n2 = nfkdNormalizer.normalize(other, status);
1041 if (U_FAILURE(status)) { return FALSE; }
1042 int32_t result = n1.countChar32() - n2.countChar32();
4388f060 1043 if (result != 0) {
51004dcb 1044 return result < 0;
4388f060 1045 }
4388f060
A
1046 result = n1.compareCodePointOrder(n2);
1047 if (result != 0) {
51004dcb 1048 return result < 0;
4388f060 1049 }
51004dcb 1050 return one.compareCodePointOrder(other) < 0;
4388f060
A
1051}
1052
51004dcb 1053} // namespace
4388f060
A
1054
1055//
1056// Constructor & Destructor for AlphabeticIndex::Record
1057//
1058// Records are internal only, instances are not directly surfaced in the public API.
1059// This class is mostly struct-like, with all public fields.
1060
51004dcb
A
1061AlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
1062 : name_(name), data_(data) {}
1063
4388f060
A
1064AlphabeticIndex::Record::~Record() {
1065}
1066
1067
1068AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
1069 if (U_FAILURE(status)) {
1070 return *this;
1071 }
51004dcb
A
1072 if (inputList_ == NULL) {
1073 inputList_ = new UVector(status);
1074 if (inputList_ == NULL) {
1075 status = U_MEMORY_ALLOCATION_ERROR;
1076 return *this;
1077 }
1078 inputList_->setDeleter(alphaIndex_deleteRecord);
1079 }
1080 Record *r = new Record(name, data);
1081 if (r == NULL) {
1082 status = U_MEMORY_ALLOCATION_ERROR;
1083 return *this;
1084 }
1085 inputList_->addElement(r, status);
1086 clearBuckets();
4388f060
A
1087 //std::string ss;
1088 //std::string ss2;
1089 //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" <<
1090 // " sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
1091 return *this;
1092}
1093
1094
1095AlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
51004dcb
A
1096 if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
1097 inputList_->removeAllElements();
1098 clearBuckets();
4388f060 1099 }
4388f060
A
1100 return *this;
1101}
1102
4388f060 1103int32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
51004dcb 1104 initBuckets(status);
4388f060
A
1105 if (U_FAILURE(status)) {
1106 return 0;
1107 }
51004dcb 1108 return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
4388f060
A
1109}
1110
1111
1112int32_t AlphabeticIndex::getBucketIndex() const {
1113 return labelsIterIndex_;
1114}
1115
1116
1117UBool AlphabeticIndex::nextBucket(UErrorCode &status) {
1118 if (U_FAILURE(status)) {
1119 return FALSE;
1120 }
51004dcb 1121 if (buckets_ == NULL && currentBucket_ != NULL) {
4388f060
A
1122 status = U_ENUM_OUT_OF_SYNC_ERROR;
1123 return FALSE;
1124 }
51004dcb 1125 initBuckets(status);
4388f060
A
1126 if (U_FAILURE(status)) {
1127 return FALSE;
1128 }
1129 ++labelsIterIndex_;
51004dcb
A
1130 if (labelsIterIndex_ >= buckets_->getBucketCount()) {
1131 labelsIterIndex_ = buckets_->getBucketCount();
4388f060
A
1132 return FALSE;
1133 }
51004dcb 1134 currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
4388f060
A
1135 resetRecordIterator();
1136 return TRUE;
1137}
1138
1139const UnicodeString &AlphabeticIndex::getBucketLabel() const {
1140 if (currentBucket_ != NULL) {
1141 return currentBucket_->label_;
1142 } else {
51004dcb 1143 return emptyString_;
4388f060
A
1144 }
1145}
1146
1147
1148UAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
1149 if (currentBucket_ != NULL) {
1150 return currentBucket_->labelType_;
1151 } else {
1152 return U_ALPHAINDEX_NORMAL;
1153 }
1154}
1155
1156
1157int32_t AlphabeticIndex::getBucketRecordCount() const {
51004dcb 1158 if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
4388f060
A
1159 return currentBucket_->records_->size();
1160 } else {
1161 return 0;
1162 }
1163}
1164
1165
1166AlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
1167 if (U_FAILURE(status)) {
1168 return *this;
1169 }
51004dcb 1170 internalResetBucketIterator();
4388f060
A
1171 return *this;
1172}
1173
1174
1175UBool AlphabeticIndex::nextRecord(UErrorCode &status) {
1176 if (U_FAILURE(status)) {
1177 return FALSE;
1178 }
1179 if (currentBucket_ == NULL) {
1180 // We are trying to iterate over the items in a bucket, but there is no
1181 // current bucket from the enumeration of buckets.
1182 status = U_INVALID_STATE_ERROR;
1183 return FALSE;
1184 }
51004dcb 1185 if (buckets_ == NULL) {
4388f060
A
1186 status = U_ENUM_OUT_OF_SYNC_ERROR;
1187 return FALSE;
1188 }
51004dcb
A
1189 if (currentBucket_->records_ == NULL) {
1190 return FALSE;
1191 }
4388f060
A
1192 ++itemsIterIndex_;
1193 if (itemsIterIndex_ >= currentBucket_->records_->size()) {
1194 itemsIterIndex_ = currentBucket_->records_->size();
1195 return FALSE;
1196 }
1197 return TRUE;
1198}
1199
1200
1201const UnicodeString &AlphabeticIndex::getRecordName() const {
51004dcb
A
1202 const UnicodeString *retStr = &emptyString_;
1203 if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
4388f060
A
1204 itemsIterIndex_ >= 0 &&
1205 itemsIterIndex_ < currentBucket_->records_->size()) {
1206 Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1207 retStr = &item->name_;
1208 }
1209 return *retStr;
1210}
1211
1212const void *AlphabeticIndex::getRecordData() const {
1213 const void *retPtr = NULL;
51004dcb 1214 if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
4388f060
A
1215 itemsIterIndex_ >= 0 &&
1216 itemsIterIndex_ < currentBucket_->records_->size()) {
1217 Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1218 retPtr = item->data_;
1219 }
1220 return retPtr;
1221}
1222
1223
1224AlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
1225 itemsIterIndex_ = -1;
1226 return *this;
1227}
1228
1229
1230
1231AlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
1232 const UnicodeString &lowerBoundary,
51004dcb
A
1233 UAlphabeticIndexLabelType type)
1234 : label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
1235 displayBucket_(NULL), displayIndex_(-1),
1236 records_(NULL) {
4388f060
A
1237}
1238
1239
1240AlphabeticIndex::Bucket::~Bucket() {
1241 delete records_;
1242}
1243
1244U_NAMESPACE_END
1245
57a6839d 1246#endif // !UCONFIG_NO_COLLATION