icuSources/i18n/alphaindex.cpp

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4 *******************************************************************************
   5 * Copyright (C) 2009-2014, International Business Machines Corporation and
   6 * others. All Rights Reserved.
   7 *******************************************************************************
   8 */
   9
  10 #include "unicode/utypes.h"
  11
  12 #if !UCONFIG_NO_COLLATION
  13
  14 #include "unicode/alphaindex.h"
  15 #include "unicode/coll.h"
  16 #include "unicode/localpointer.h"
  17 #include "unicode/normalizer2.h"
  18 #include "unicode/tblcoll.h"
  19 #include "unicode/uchar.h"
  20 #include "unicode/ulocdata.h"
  21 #include "unicode/uniset.h"
  22 #include "unicode/uobject.h"
  23 #include "unicode/usetiter.h"
  24 #include "unicode/utf16.h"
  25
  26 #include "cmemory.h"
  27 #include "cstring.h"
  28 #include "uassert.h"
  29 #include "uvector.h"
  30 #include "uvectr64.h"
  31
  32 //#include <string>
  33 //#include <iostream>
  34
  35 U_NAMESPACE_BEGIN
  36
  37 namespace {
  38
  39 /**
  40  * Prefix string for Chinese index buckets.
  41  * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
  42  */
  43 const UChar BASE[1] = { 0xFDD0 };
  44 const int32_t BASE_LENGTH = 1;
  45
  46 UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
  47                                 const UnicodeString &one, const UnicodeString &other);
  48
  49 }  // namespace
  50
  51 static int32_t U_CALLCONV
  52 collatorComparator(const void *context, const void *left, const void *right);
  53
  54 static int32_t U_CALLCONV
  55 recordCompareFn(const void *context, const void *left, const void *right);
  56
  57 //  UVector<Record *> support function, delete a Record.
  58 static void U_CALLCONV
  59 alphaIndex_deleteRecord(void *obj) {
  60     delete static_cast<AlphabeticIndex::Record *>(obj);
  61 }
  62
  63 namespace {
  64
  65 UnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
  66                            UErrorCode &errorCode) {
  67     if (U_FAILURE(errorCode)) { return NULL; }
  68     if (owned.isValid()) {
  69         return owned.orphan();
  70     }
  71     UnicodeString *p = new UnicodeString(s);
  72     if (p == NULL) {
  73         errorCode = U_MEMORY_ALLOCATION_ERROR;
  74     }
  75     return p;
  76 }
  77
  78 inline UnicodeString *getString(const UVector &list, int32_t i) {
  79     return static_cast<UnicodeString *>(list[i]);
  80 }
  81
  82 inline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
  83     return static_cast<AlphabeticIndex::Bucket *>(list[i]);
  84 }
  85
  86 inline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
  87     return static_cast<AlphabeticIndex::Record *>(list[i]);
  88 }
  89
  90 /**
  91  * Like Java Collections.binarySearch(List, String, Comparator).
  92  *
  93  * @return the index>=0 where the item was found,
  94  *         or the index<0 for inserting the string at ~index in sorted order
  95  */
  96 int32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
  97     if (list.size() == 0) { return ~0; }
  98     int32_t start = 0;
  99     int32_t limit = list.size();
 100     for (;;) {
 101         int32_t i = (start + limit) / 2;
 102         const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i));
 103         UErrorCode errorCode = U_ZERO_ERROR;
 104         UCollationResult cmp = coll.compare(s, *si, errorCode);
 105         if (cmp == UCOL_EQUAL) {
 106             return i;
 107         } else if (cmp < 0) {
 108             if (i == start) {
 109                 return ~start;  // insert s before *si
 110             }
 111             limit = i;
 112         } else {
 113             if (i == start) {
 114                 return ~(start + 1);  // insert s after *si
 115             }
 116             start = i;
 117         }
 118     }
 119 }
 120
 121 }  // namespace
 122
 123 // The BucketList is not in the anonymous namespace because only Clang
 124 // seems to support its use in other classes from there.
 125 // However, we also don't need U_I18N_API because it is not used from outside the i18n library.
 126 class BucketList : public UObject {
 127 public:
 128     BucketList(UVector *bucketList, UVector *publicBucketList)
 129             : bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
 130         int32_t displayIndex = 0;
 131         for (int32_t i = 0; i < publicBucketList->size(); ++i) {
 132             getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
 133         }
 134     }
 135
 136     // The virtual destructor must not be inline.
 137     // See ticket #8454 for details.
 138     virtual ~BucketList();
 139
 140     int32_t getBucketCount() const {
 141         return immutableVisibleList_->size();
 142     }
 143
 144     int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
 145                            UErrorCode &errorCode) {
 146         // binary search
 147         int32_t start = 0;
 148         int32_t limit = bucketList_->size();
 149         while ((start + 1) < limit) {
 150             int32_t i = (start + limit) / 2;
 151             const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i);
 152             UCollationResult nameVsBucket =
 153                 collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
 154             if (nameVsBucket < 0) {
 155                 limit = i;
 156             } else {
 157                 start = i;
 158             }
 159         }
 160         const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start);
 161         if (bucket->displayBucket_ != NULL) {
 162             bucket = bucket->displayBucket_;
 163         }
 164         return bucket->displayIndex_;
 165     }
 166
 167     /** All of the buckets, visible and invisible. */
 168     UVector *bucketList_;
 169     /** Just the visible buckets. */
 170     UVector *immutableVisibleList_;
 171 };
 172
 173 BucketList::~BucketList() {
 174     delete bucketList_;
 175     if (immutableVisibleList_ != bucketList_) {
 176         delete immutableVisibleList_;
 177     }
 178 }
 179
 180 AlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
 181     delete buckets_;
 182     delete collatorPrimaryOnly_;
 183 }
 184
 185 int32_t
 186 AlphabeticIndex::ImmutableIndex::getBucketCount() const {
 187     return buckets_->getBucketCount();
 188 }
 189
 190 int32_t
 191 AlphabeticIndex::ImmutableIndex::getBucketIndex(
 192         const UnicodeString &name, UErrorCode &errorCode) const {
 193     return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
 194 }
 195
 196 const AlphabeticIndex::Bucket *
 197 AlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
 198     if (0 <= index && index < buckets_->getBucketCount()) {
 199         return icu::getBucket(*buckets_->immutableVisibleList_, index);
 200     } else {
 201         return NULL;
 202     }
 203 }
 204
 205 AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
 206         : inputList_(NULL),
 207           labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
 208           maxLabelCount_(99),
 209           initialLabels_(NULL), firstCharsInScripts_(NULL),
 210           collator_(NULL), collatorPrimaryOnly_(NULL),
 211           buckets_(NULL) {
 212     init(&locale, status);
 213 }
 214
 215
 216 AlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
 217         : inputList_(NULL),
 218           labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
 219           maxLabelCount_(99),
 220           initialLabels_(NULL), firstCharsInScripts_(NULL),
 221           collator_(collator), collatorPrimaryOnly_(NULL),
 222           buckets_(NULL) {
 223     init(NULL, status);
 224 }
 225
 226
 227
 228 AlphabeticIndex::~AlphabeticIndex() {
 229     delete collator_;
 230     delete collatorPrimaryOnly_;
 231     delete firstCharsInScripts_;
 232     delete buckets_;
 233     delete inputList_;
 234     delete initialLabels_;
 235 }
 236
 237
 238 AlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
 239     if (U_FAILURE(status)) {
 240         return *this;
 241     }
 242     initialLabels_->addAll(additions);
 243     clearBuckets();
 244     return *this;
 245 }
 246
 247
 248 AlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
 249     addIndexExemplars(locale, status);
 250     clearBuckets();
 251     return *this;
 252 }
 253
 254
 255 AlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
 256     if (U_FAILURE(errorCode)) { return NULL; }
 257     // In C++, the ImmutableIndex must own its copy of the BucketList,
 258     // even if it contains no records, for proper memory management.
 259     // We could clone the buckets_ if they are not NULL,
 260     // but that would be worth it only if this method is called multiple times,
 261     // or called after using the old-style bucket iterator API.
 262     LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
 263     LocalPointer<RuleBasedCollator> coll(
 264         static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone()));
 265     if (immutableBucketList.isNull() || coll.isNull()) {
 266         errorCode = U_MEMORY_ALLOCATION_ERROR;
 267         return NULL;
 268     }
 269     ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
 270     if (immIndex == NULL) {
 271         errorCode = U_MEMORY_ALLOCATION_ERROR;
 272         return NULL;
 273     }
 274     // The ImmutableIndex adopted its parameter objects.
 275     immutableBucketList.orphan();
 276     coll.orphan();
 277     return immIndex;
 278 }
 279
 280 int32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
 281     initBuckets(status);
 282     if (U_FAILURE(status)) {
 283         return 0;
 284     }
 285     return buckets_->getBucketCount();
 286 }
 287
 288
 289 int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
 290     if (U_FAILURE(status) || inputList_ == NULL) {
 291         return 0;
 292     }
 293     return inputList_->size();
 294 }
 295
 296 void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
 297     const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
 298     if (U_FAILURE(errorCode)) { return; }
 299
 300     const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
 301     const UnicodeString &overflowBoundary =
 302         *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);
 303
 304     // We make a sorted array of elements.
 305     // Some of the input may be redundant.
 306     // That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
 307     // We filter out those cases.
 308     UnicodeSetIterator iter(*initialLabels_);
 309     while (iter.next()) {
 310         const UnicodeString *item = &iter.getString();
 311         LocalPointer<UnicodeString> ownedItem;
 312         UBool checkDistinct;
 313         int32_t itemLength = item->length();
 314         if (!item->hasMoreChar32Than(0, itemLength, 1)) {
 315             checkDistinct = FALSE;
 316         } else if(item->charAt(itemLength - 1) == 0x2a &&  // '*'
 317                 item->charAt(itemLength - 2) != 0x2a) {
 318             // Use a label if it is marked with one trailing star,
 319             // even if the label string sorts the same when all contractions are suppressed.
 320             ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
 321             item = ownedItem.getAlias();
 322             if (item == NULL) {
 323                 errorCode = U_MEMORY_ALLOCATION_ERROR;
 324                 return;
 325             }
 326             checkDistinct = FALSE;
 327         } else {
 328             checkDistinct = TRUE;
 329         }
 330         if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
 331             // Ignore a primary-ignorable or non-alphabetic index character.
 332         } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
 333             // Ignore an index character that will land in the overflow bucket.
 334         } else if (checkDistinct &&
 335                 collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
 336             // Ignore a multi-code point index character that does not sort distinctly
 337             // from the sequence of its separate characters.
 338         } else {
 339             int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
 340             if (insertionPoint < 0) {
 341                 indexCharacters.insertElementAt(
 342                     ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
 343             } else {
 344                 const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
 345                 if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
 346                     indexCharacters.setElementAt(
 347                         ownedString(*item, ownedItem, errorCode), insertionPoint);
 348                 }
 349             }
 350         }
 351     }
 352     if (U_FAILURE(errorCode)) { return; }
 353
 354     // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
 355
 356     int32_t size = indexCharacters.size() - 1;
 357     if (size > maxLabelCount_) {
 358         int32_t count = 0;
 359         int32_t old = -1;
 360         for (int32_t i = 0; i < indexCharacters.size();) {
 361             ++count;
 362             int32_t bump = count * maxLabelCount_ / size;
 363             if (bump == old) {
 364                 indexCharacters.removeElementAt(i);
 365             } else {
 366                 old = bump;
 367                 ++i;
 368             }
 369         }
 370     }
 371 }
 372
 373 namespace {
 374
 375 const UnicodeString &fixLabel(const UnicodeString &current, UnicodeString &temp) {
 376     if (!current.startsWith(BASE, BASE_LENGTH)) {
 377         return current;
 378     }
 379     UChar rest = current.charAt(BASE_LENGTH);
 380     if (0x2800 < rest && rest <= 0x28FF) { // stroke count
 381         int32_t count = rest-0x2800;
 382         temp.setTo((UChar)(0x30 + count % 10));
 383         if (count >= 10) {
 384             count /= 10;
 385             temp.insert(0, (UChar)(0x30 + count % 10));
 386             if (count >= 10) {
 387                 count /= 10;
 388                 temp.insert(0, (UChar)(0x30 + count));
 389             }
 390         }
 391         return temp.append((UChar)0x5283);
 392     }
 393     return temp.setTo(current, BASE_LENGTH);
 394 }
 395
 396 UBool hasMultiplePrimaryWeights(
 397         const RuleBasedCollator &coll, uint32_t variableTop,
 398         const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) {
 399     ces.removeAllElements();
 400     coll.internalGetCEs(s, ces, errorCode);
 401     if (U_FAILURE(errorCode)) { return FALSE; }
 402     UBool seenPrimary = FALSE;
 403     for (int32_t i = 0; i < ces.size(); ++i) {
 404         int64_t ce = ces.elementAti(i);
 405         uint32_t p = (uint32_t)(ce >> 32);
 406         if (p > variableTop) {
 407             // not primary ignorable
 408             if (seenPrimary) {
 409                 return TRUE;
 410             }
 411             seenPrimary = TRUE;
 412         }
 413     }
 414     return FALSE;
 415 }
 416
 417 }  // namespace
 418
 419 BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
 420     // Initialize indexCharacters.
 421     UVector indexCharacters(errorCode);
 422     indexCharacters.setDeleter(uprv_deleteUObject);
 423     initLabels(indexCharacters, errorCode);
 424     if (U_FAILURE(errorCode)) { return NULL; }
 425
 426     // Variables for hasMultiplePrimaryWeights().
 427     UVector64 ces(errorCode);
 428     uint32_t variableTop;
 429     if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
 430         variableTop = collatorPrimaryOnly_->getVariableTop(errorCode);
 431     } else {
 432         variableTop = 0;
 433     }
 434     UBool hasInvisibleBuckets = FALSE;
 435
 436     // Helper arrays for Chinese Pinyin collation.
 437     Bucket *asciiBuckets[26] = {
 438         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 439         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
 440     };
 441     Bucket *pinyinBuckets[26] = {
 442         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 443         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
 444     };
 445     UBool hasPinyin = FALSE;
 446
 447     LocalPointer<UVector> bucketList(new UVector(errorCode), errorCode);
 448     if (U_FAILURE(errorCode)) {
 449         return NULL;
 450     }
 451     bucketList->setDeleter(uprv_deleteUObject);
 452
 453     // underflow bucket
 454     Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
 455     if (bucket == NULL) {
 456         errorCode = U_MEMORY_ALLOCATION_ERROR;
 457         return NULL;
 458     }
 459     bucketList->addElement(bucket, errorCode);
 460     if (U_FAILURE(errorCode)) { return NULL; }
 461
 462     UnicodeString temp;
 463
 464     // fix up the list, adding underflow, additions, overflow
 465     // Insert inflow labels as needed.
 466     int32_t scriptIndex = -1;
 467     const UnicodeString *scriptUpperBoundary = &emptyString_;
 468     for (int32_t i = 0; i < indexCharacters.size(); ++i) {
 469         UnicodeString &current = *getString(indexCharacters, i);
 470         if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
 471             // We crossed the script boundary into a new script.
 472             const UnicodeString &inflowBoundary = *scriptUpperBoundary;
 473             UBool skippedScript = FALSE;
 474             for (;;) {
 475                 scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
 476                 if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
 477                     break;
 478                 }
 479                 skippedScript = TRUE;
 480             }
 481             if (skippedScript && bucketList->size() > 1) {
 482                 // We are skipping one or more scripts,
 483                 // and we are not just getting out of the underflow label.
 484                 bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
 485                 if (bucket == NULL) {
 486                     errorCode = U_MEMORY_ALLOCATION_ERROR;
 487                     return NULL;
 488                 }
 489                 bucketList->addElement(bucket, errorCode);
 490             }
 491         }
 492         // Add a bucket with the current label.
 493         bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
 494         if (bucket == NULL) {
 495             errorCode = U_MEMORY_ALLOCATION_ERROR;
 496             return NULL;
 497         }
 498         bucketList->addElement(bucket, errorCode);
 499         // Remember ASCII and Pinyin buckets for Pinyin redirects.
 500         UChar c;
 501         if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) {  // A-Z
 502             asciiBuckets[c - 0x41] = bucket;
 503         } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
 504                 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
 505             pinyinBuckets[c - 0x41] = bucket;
 506             hasPinyin = TRUE;
 507         }
 508         // Check for multiple primary weights.
 509         if (!current.startsWith(BASE, BASE_LENGTH) &&
 510                 hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current,
 511                                           ces, errorCode) &&
 512                 current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
 513             // "AE-ligature" or "Sch" etc.
 514             for (int32_t i = bucketList->size() - 2;; --i) {
 515                 Bucket *singleBucket = getBucket(*bucketList, i);
 516                 if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
 517                     // There is no single-character bucket since the last
 518                     // underflow or inflow label.
 519                     break;
 520                 }
 521                 if (singleBucket->displayBucket_ == NULL &&
 522                         !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop,
 523                                                    singleBucket->lowerBoundary_,
 524                                                    ces, errorCode)) {
 525                     // Add an invisible bucket that redirects strings greater than the expansion
 526                     // to the previous single-character bucket.
 527                     // For example, after ... Q R S Sch we add Sch\uFFFF->S
 528                     // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
 529                     bucket = new Bucket(emptyString_,
 530                         UnicodeString(current).append((UChar)0xFFFF),
 531                         U_ALPHAINDEX_NORMAL);
 532                     if (bucket == NULL) {
 533                         errorCode = U_MEMORY_ALLOCATION_ERROR;
 534                         return NULL;
 535                     }
 536                     bucket->displayBucket_ = singleBucket;
 537                     bucketList->addElement(bucket, errorCode);
 538                     hasInvisibleBuckets = TRUE;
 539                     break;
 540                 }
 541             }
 542         }
 543     }
 544     if (U_FAILURE(errorCode)) { return NULL; }
 545     if (bucketList->size() == 1) {
 546         // No real labels, show only the underflow label.
 547         BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
 548         if (bl == NULL) {
 549             errorCode = U_MEMORY_ALLOCATION_ERROR;
 550             return NULL;
 551         }
 552         bucketList.orphan();
 553         return bl;
 554     }
 555     // overflow bucket
 556     bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
 557     if (bucket == NULL) {
 558         errorCode = U_MEMORY_ALLOCATION_ERROR;
 559         return NULL;
 560     }
 561     bucketList->addElement(bucket, errorCode); // final
 562
 563     if (hasPinyin) {
 564         // Redirect Pinyin buckets.
 565         Bucket *asciiBucket = NULL;
 566         for (int32_t i = 0; i < 26; ++i) {
 567             if (asciiBuckets[i] != NULL) {
 568                 asciiBucket = asciiBuckets[i];
 569             }
 570             if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
 571                 pinyinBuckets[i]->displayBucket_ = asciiBucket;
 572                 hasInvisibleBuckets = TRUE;
 573             }
 574         }
 575     }
 576
 577     if (U_FAILURE(errorCode)) { return NULL; }
 578     if (!hasInvisibleBuckets) {
 579         BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
 580         if (bl == NULL) {
 581             errorCode = U_MEMORY_ALLOCATION_ERROR;
 582             return NULL;
 583         }
 584         bucketList.orphan();
 585         return bl;
 586     }
 587     // Merge inflow buckets that are visually adjacent.
 588     // Iterate backwards: Merge inflow into overflow rather than the other way around.
 589     int32_t i = bucketList->size() - 1;
 590     Bucket *nextBucket = getBucket(*bucketList, i);
 591     while (--i > 0) {
 592         bucket = getBucket(*bucketList, i);
 593         if (bucket->displayBucket_ != NULL) {
 594             continue;  // skip invisible buckets
 595         }
 596         if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
 597             if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
 598                 bucket->displayBucket_ = nextBucket;
 599                 continue;
 600             }
 601         }
 602         nextBucket = bucket;
 603     }
 604
 605     LocalPointer<UVector> publicBucketList(new UVector(errorCode), errorCode);
 606     if (U_FAILURE(errorCode)) {
 607         return NULL;
 608     }
 609     // Do not call publicBucketList->setDeleter():
 610     // This vector shares its objects with the bucketList.
 611     for (int32_t i = 0; i < bucketList->size(); ++i) {
 612         bucket = getBucket(*bucketList, i);
 613         if (bucket->displayBucket_ == NULL) {
 614             publicBucketList->addElement(bucket, errorCode);
 615         }
 616     }
 617     if (U_FAILURE(errorCode)) { return NULL; }
 618     BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
 619     if (bl == NULL) {
 620         errorCode = U_MEMORY_ALLOCATION_ERROR;
 621         return NULL;
 622     }
 623     bucketList.orphan();
 624     publicBucketList.orphan();
 625     return bl;
 626 }
 627
 628 /**
 629  * Creates an index, and buckets and sorts the list of records into the index.
 630  */
 631 void AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
 632     if (U_FAILURE(errorCode) || buckets_ != NULL) {
 633         return;
 634     }
 635     buckets_ = createBucketList(errorCode);
 636     if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) {
 637         return;
 638     }
 639
 640     // Sort the records by name.
 641     // Stable sort preserves input order of collation duplicates.
 642     inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
 643
 644     // Now, we traverse all of the input, which is now sorted.
 645     // If the item doesn't go in the current bucket, we find the next bucket that contains it.
 646     // This makes the process order n*log(n), since we just sort the list and then do a linear process.
 647     // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
 648     // we need to improve it for that case.
 649
 650     Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0);
 651     int32_t bucketIndex = 1;
 652     Bucket *nextBucket;
 653     const UnicodeString *upperBoundary;
 654     if (bucketIndex < buckets_->bucketList_->size()) {
 655         nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
 656         upperBoundary = &nextBucket->lowerBoundary_;
 657     } else {
 658         nextBucket = NULL;
 659         upperBoundary = NULL;
 660     }
 661     for (int32_t i = 0; i < inputList_->size(); ++i) {
 662         Record *r = getRecord(*inputList_, i);
 663         // if the current bucket isn't the right one, find the one that is
 664         // We have a special flag for the last bucket so that we don't look any further
 665         while (upperBoundary != NULL &&
 666                 collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
 667             currentBucket = nextBucket;
 668             // now reset the boundary that we compare against
 669             if (bucketIndex < buckets_->bucketList_->size()) {
 670                 nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
 671                 upperBoundary = &nextBucket->lowerBoundary_;
 672             } else {
 673                 upperBoundary = NULL;
 674             }
 675         }
 676         // now put the record into the bucket.
 677         Bucket *bucket = currentBucket;
 678         if (bucket->displayBucket_ != NULL) {
 679             bucket = bucket->displayBucket_;
 680         }
 681         if (bucket->records_ == NULL) {
 682             bucket->records_ = new UVector(errorCode);
 683             if (bucket->records_ == NULL) {
 684                 errorCode = U_MEMORY_ALLOCATION_ERROR;
 685                 return;
 686             }
 687         }
 688         bucket->records_->addElement(r, errorCode);
 689     }
 690 }
 691
 692 void AlphabeticIndex::clearBuckets() {
 693     if (buckets_ != NULL) {
 694         delete buckets_;
 695         buckets_ = NULL;
 696         internalResetBucketIterator();
 697     }
 698 }
 699
 700 void AlphabeticIndex::internalResetBucketIterator() {
 701     labelsIterIndex_ = -1;
 702     currentBucket_ = NULL;
 703 }
 704
 705
 706 void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
 707     LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
 708     if (U_FAILURE(status)) {
 709         return;
 710     }
 711
 712     UnicodeSet exemplars;
 713     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
 714     if (U_SUCCESS(status)) {
 715         initialLabels_->addAll(exemplars);
 716         return;
 717     }
 718     status = U_ZERO_ERROR;  // Clear out U_MISSING_RESOURCE_ERROR
 719
 720     // The locale data did not include explicit Index characters.
 721     // Synthesize a set of them from the locale's standard exemplar characters.
 722     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
 723     if (U_FAILURE(status)) {
 724         return;
 725     }
 726
 727     // question: should we add auxiliary exemplars?
 728     if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.isEmpty()) {
 729         exemplars.add(0x61, 0x7A);
 730     }
 731     if (exemplars.containsSome(0xAC00, 0xD7A3)) {  // Hangul syllables
 732         // cut down to small list
 733         exemplars.remove(0xAC00, 0xD7A3).
 734             add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
 735             add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
 736             add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
 737             add(0xD30C).add(0xD558);
 738     }
 739     if (exemplars.containsSome(0x1200, 0x137F)) {  // Ethiopic block
 740         // cut down to small list
 741         // make use of the fact that Ethiopic is allocated in 8's, where
 742         // the base is 0 mod 8.
 743         UnicodeSet ethiopic(UnicodeString(u"[ሀለሐመሠረሰሸቀቈቐቘበቨተቸኀኈነኘአከኰኸዀወዐዘዠየደዸጀገጐጘጠጨጰጸፀፈፐፘ]"), status);
 744         ethiopic.retainAll(exemplars);
 745         exemplars.remove(u'ሀ', 0x137F).addAll(ethiopic);
 746     }
 747
 748     // Upper-case any that aren't already so.
 749     //   (We only do this for synthesized index characters.)
 750     UnicodeSetIterator it(exemplars);
 751     UnicodeString upperC;
 752     while (it.next()) {
 753         const UnicodeString &exemplarC = it.getString();
 754         upperC = exemplarC;
 755         upperC.toUpper(locale);
 756         initialLabels_->add(upperC);
 757     }
 758 }
 759
 760 UBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
 761     UnicodeSet contractions;
 762     collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode);
 763     if (U_FAILURE(errorCode) || contractions.isEmpty()) { return FALSE; }
 764     initialLabels_->addAll(contractions);
 765     UnicodeSetIterator iter(contractions);
 766     while (iter.next()) {
 767         const UnicodeString &s = iter.getString();
 768         U_ASSERT (s.startsWith(BASE, BASE_LENGTH));
 769         UChar c = s.charAt(s.length() - 1);
 770         if (0x41 <= c && c <= 0x5A) {  // A-Z
 771             // There are Pinyin labels, add ASCII A-Z labels as well.
 772             initialLabels_->add(0x41, 0x5A);  // A-Z
 773             break;
 774         }
 775     }
 776     return TRUE;
 777 }
 778
 779
 780 /*
 781  * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
 782  */
 783 static const UChar CGJ = 0x034F;
 784 UnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
 785     UnicodeString result;
 786     if (item.length() == 0) {
 787         return result;
 788     }
 789     int32_t i = 0;
 790     for (;;) {
 791         UChar32  cp = item.char32At(i);
 792         result.append(cp);
 793         i = item.moveIndex32(i, 1);
 794         if (i >= item.length()) {
 795             break;
 796         }
 797         result.append(CGJ);
 798     }
 799     return result;
 800 }
 801
 802
 803 UBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
 804     return FALSE;
 805 }
 806
 807
 808 UBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
 809     return FALSE;
 810 }
 811
 812
 813 const RuleBasedCollator &AlphabeticIndex::getCollator() const {
 814     return *collator_;
 815 }
 816
 817
 818 const UnicodeString &AlphabeticIndex::getInflowLabel() const {
 819     return inflowLabel_;
 820 }
 821
 822 const UnicodeString &AlphabeticIndex::getOverflowLabel() const {
 823     return overflowLabel_;
 824 }
 825
 826
 827 const UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
 828     return underflowLabel_;
 829 }
 830
 831
 832 AlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
 833     inflowLabel_ = label;
 834     clearBuckets();
 835     return *this;
 836 }
 837
 838
 839 AlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
 840     overflowLabel_ = label;
 841     clearBuckets();
 842     return *this;
 843 }
 844
 845
 846 AlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
 847     underflowLabel_ = label;
 848     clearBuckets();
 849     return *this;
 850 }
 851
 852
 853 int32_t AlphabeticIndex::getMaxLabelCount() const {
 854     return maxLabelCount_;
 855 }
 856
 857
 858 AlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
 859     if (U_FAILURE(status)) {
 860         return *this;
 861     }
 862     if (maxLabelCount <= 0) {
 863         status = U_ILLEGAL_ARGUMENT_ERROR;
 864         return *this;
 865     }
 866     maxLabelCount_ = maxLabelCount;
 867     clearBuckets();
 868     return *this;
 869 }
 870
 871
 872 //
 873 //  init() - Common code for constructors.
 874 //
 875
 876 void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
 877     if (U_FAILURE(status)) { return; }
 878     if (locale == NULL && collator_ == NULL) {
 879         status = U_ILLEGAL_ARGUMENT_ERROR;
 880         return;
 881     }
 882
 883     initialLabels_         = new UnicodeSet();
 884     if (initialLabels_ == NULL) {
 885         status = U_MEMORY_ALLOCATION_ERROR;
 886         return;
 887     }
 888
 889     inflowLabel_.setTo((UChar)0x2026);    // Ellipsis
 890     overflowLabel_ = inflowLabel_;
 891     underflowLabel_ = inflowLabel_;
 892
 893     if (collator_ == NULL) {
 894         Collator *coll = Collator::createInstance(*locale, status);
 895         if (U_FAILURE(status)) {
 896             delete coll;
 897             return;
 898         }
 899         if (coll == NULL) {
 900             status = U_MEMORY_ALLOCATION_ERROR;
 901             return;
 902         }
 903         collator_ = dynamic_cast<RuleBasedCollator *>(coll);
 904         if (collator_ == NULL) {
 905             delete coll;
 906             status = U_UNSUPPORTED_ERROR;
 907             return;
 908         }
 909     }
 910     collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
 911     if (collatorPrimaryOnly_ == NULL) {
 912         status = U_MEMORY_ALLOCATION_ERROR;
 913         return;
 914     }
 915     collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
 916     firstCharsInScripts_ = firstStringsInScript(status);
 917     if (U_FAILURE(status)) { return; }
 918     firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
 919     // Guard against a degenerate collator where
 920     // some script boundary strings are primary ignorable.
 921     for (;;) {
 922         if (U_FAILURE(status)) { return; }
 923         if (firstCharsInScripts_->isEmpty()) {
 924             // AlphabeticIndex requires some non-ignorable script boundary strings.
 925             status = U_ILLEGAL_ARGUMENT_ERROR;
 926             return;
 927         }
 928         if (collatorPrimaryOnly_->compare(
 929                 *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
 930                 emptyString_, status) == UCOL_EQUAL) {
 931             firstCharsInScripts_->removeElementAt(0);
 932         } else {
 933             break;
 934         }
 935     }
 936
 937     // Chinese index characters, which are specific to each of the several Chinese tailorings,
 938     // take precedence over the single locale data exemplar set per language.
 939     if (!addChineseIndexCharacters(status) && locale != NULL) {
 940         addIndexExemplars(*locale, status);
 941     }
 942 }
 943
 944
 945 //
 946 //  Comparison function for UVector<UnicodeString *> sorting with a collator.
 947 //
 948 static int32_t U_CALLCONV
 949 collatorComparator(const void *context, const void *left, const void *right) {
 950     const UElement *leftElement = static_cast<const UElement *>(left);
 951     const UElement *rightElement = static_cast<const UElement *>(right);
 952     const UnicodeString *leftString  = static_cast<const UnicodeString *>(leftElement->pointer);
 953     const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer);
 954
 955     if (leftString == rightString) {
 956         // Catches case where both are NULL
 957         return 0;
 958     }
 959     if (leftString == NULL) {
 960         return 1;
 961     };
 962     if (rightString == NULL) {
 963         return -1;
 964     }
 965     const Collator *col = static_cast<const Collator *>(context);
 966     UErrorCode errorCode = U_ZERO_ERROR;
 967     return col->compare(*leftString, *rightString, errorCode);
 968 }
 969
 970 //
 971 //  Comparison function for UVector<Record *> sorting with a collator.
 972 //
 973 static int32_t U_CALLCONV
 974 recordCompareFn(const void *context, const void *left, const void *right) {
 975     const UElement *leftElement = static_cast<const UElement *>(left);
 976     const UElement *rightElement = static_cast<const UElement *>(right);
 977     const AlphabeticIndex::Record *leftRec  = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer);
 978     const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer);
 979     const Collator *col = static_cast<const Collator *>(context);
 980     UErrorCode errorCode = U_ZERO_ERROR;
 981     return col->compare(leftRec->name_, rightRec->name_, errorCode);
 982 }
 983
 984 UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
 985     if (U_FAILURE(status)) {
 986         return NULL;
 987     }
 988     LocalPointer<UVector> dest(new UVector(status), status);
 989     if (U_FAILURE(status)) {
 990         return NULL;
 991     }
 992     dest->setDeleter(uprv_deleteUObject);
 993     // Fetch the script-first-primary contractions which are defined in the root collator.
 994     // They all start with U+FDD1.
 995     UnicodeSet set;
 996     collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
 997     if (U_FAILURE(status)) {
 998         return NULL;
 999     }
1000     if (set.isEmpty()) {
1001         status = U_UNSUPPORTED_ERROR;
1002         return NULL;
1003     }
1004     UnicodeSetIterator iter(set);
1005     while (iter.next()) {
1006         const UnicodeString &boundary = iter.getString();
1007         uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
1008         if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
1009             // Ignore boundaries for the special reordering groups.
1010             // Take only those for "real scripts" (where the sample character is a Letter,
1011             // and the one for unassigned implicit weights (Cn).
1012             continue;
1013         }
1014         UnicodeString *s = new UnicodeString(boundary);
1015         if (s == NULL) {
1016             status = U_MEMORY_ALLOCATION_ERROR;
1017             return NULL;
1018         }
1019         dest->addElement(s, status);
1020     }
1021     return dest.orphan();
1022 }
1023
1024
1025 namespace {
1026
1027 /**
1028  * Returns true if one index character string is "better" than the other.
1029  * Shorter NFKD is better, and otherwise NFKD-binary-less-than is
1030  * better, and otherwise binary-less-than is better.
1031  */
1032 UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
1033                                 const UnicodeString &one, const UnicodeString &other) {
1034     // This is called with primary-equal strings, but never with one.equals(other).
1035     UErrorCode status = U_ZERO_ERROR;
1036     UnicodeString n1 = nfkdNormalizer.normalize(one, status);
1037     UnicodeString n2 = nfkdNormalizer.normalize(other, status);
1038     if (U_FAILURE(status)) { return FALSE; }
1039     int32_t result = n1.countChar32() - n2.countChar32();
1040     if (result != 0) {
1041         return result < 0;
1042     }
1043     result = n1.compareCodePointOrder(n2);
1044     if (result != 0) {
1045         return result < 0;
1046     }
1047     return one.compareCodePointOrder(other) < 0;
1048 }
1049
1050 }  // namespace
1051
1052 //
1053 //  Constructor & Destructor for AlphabeticIndex::Record
1054 //
1055 //     Records are internal only, instances are not directly surfaced in the public API.
1056 //     This class is mostly struct-like, with all public fields.
1057
1058 AlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
1059         : name_(name), data_(data) {}
1060
1061 AlphabeticIndex::Record::~Record() {
1062 }
1063
1064
1065 AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
1066     if (U_FAILURE(status)) {
1067         return *this;
1068     }
1069     if (inputList_ == NULL) {
1070         inputList_ = new UVector(status);
1071         if (inputList_ == NULL) {
1072             status = U_MEMORY_ALLOCATION_ERROR;
1073             return *this;
1074         }
1075         inputList_->setDeleter(alphaIndex_deleteRecord);
1076     }
1077     Record *r = new Record(name, data);
1078     if (r == NULL) {
1079         status = U_MEMORY_ALLOCATION_ERROR;
1080         return *this;
1081     }
1082     inputList_->addElement(r, status);
1083     clearBuckets();
1084     //std::string ss;
1085     //std::string ss2;
1086     //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" <<
1087     //             "   sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
1088     return *this;
1089 }
1090
1091
1092 AlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
1093     if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
1094         inputList_->removeAllElements();
1095         clearBuckets();
1096     }
1097     return *this;
1098 }
1099
1100 int32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
1101     initBuckets(status);
1102     if (U_FAILURE(status)) {
1103         return 0;
1104     }
1105     return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
1106 }
1107
1108
1109 int32_t AlphabeticIndex::getBucketIndex() const {
1110     return labelsIterIndex_;
1111 }
1112
1113
1114 UBool AlphabeticIndex::nextBucket(UErrorCode &status) {
1115     if (U_FAILURE(status)) {
1116         return FALSE;
1117     }
1118     if (buckets_ == NULL && currentBucket_ != NULL) {
1119         status = U_ENUM_OUT_OF_SYNC_ERROR;
1120         return FALSE;
1121     }
1122     initBuckets(status);
1123     if (U_FAILURE(status)) {
1124         return FALSE;
1125     }
1126     ++labelsIterIndex_;
1127     if (labelsIterIndex_ >= buckets_->getBucketCount()) {
1128         labelsIterIndex_ = buckets_->getBucketCount();
1129         return FALSE;
1130     }
1131     currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
1132     resetRecordIterator();
1133     return TRUE;
1134 }
1135
1136 const UnicodeString &AlphabeticIndex::getBucketLabel() const {
1137     if (currentBucket_ != NULL) {
1138         return currentBucket_->label_;
1139     } else {
1140         return emptyString_;
1141     }
1142 }
1143
1144
1145 UAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
1146     if (currentBucket_ != NULL) {
1147         return currentBucket_->labelType_;
1148     } else {
1149         return U_ALPHAINDEX_NORMAL;
1150     }
1151 }
1152
1153
1154 int32_t AlphabeticIndex::getBucketRecordCount() const {
1155     if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
1156         return currentBucket_->records_->size();
1157     } else {
1158         return 0;
1159     }
1160 }
1161
1162
1163 AlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
1164     if (U_FAILURE(status)) {
1165         return *this;
1166     }
1167     internalResetBucketIterator();
1168     return *this;
1169 }
1170
1171
1172 UBool AlphabeticIndex::nextRecord(UErrorCode &status) {
1173     if (U_FAILURE(status)) {
1174         return FALSE;
1175     }
1176     if (currentBucket_ == NULL) {
1177         // We are trying to iterate over the items in a bucket, but there is no
1178         // current bucket from the enumeration of buckets.
1179         status = U_INVALID_STATE_ERROR;
1180         return FALSE;
1181     }
1182     if (buckets_ == NULL) {
1183         status = U_ENUM_OUT_OF_SYNC_ERROR;
1184         return FALSE;
1185     }
1186     if (currentBucket_->records_ == NULL) {
1187         return FALSE;
1188     }
1189     ++itemsIterIndex_;
1190     if (itemsIterIndex_ >= currentBucket_->records_->size()) {
1191         itemsIterIndex_  = currentBucket_->records_->size();
1192         return FALSE;
1193     }
1194     return TRUE;
1195 }
1196
1197
1198 const UnicodeString &AlphabeticIndex::getRecordName() const {
1199     const UnicodeString *retStr = &emptyString_;
1200     if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1201         itemsIterIndex_ >= 0 &&
1202         itemsIterIndex_ < currentBucket_->records_->size()) {
1203             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1204             retStr = &item->name_;
1205     }
1206     return *retStr;
1207 }
1208
1209 const void *AlphabeticIndex::getRecordData() const {
1210     const void *retPtr = NULL;
1211     if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1212         itemsIterIndex_ >= 0 &&
1213         itemsIterIndex_ < currentBucket_->records_->size()) {
1214             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1215             retPtr = item->data_;
1216     }
1217     return retPtr;
1218 }
1219
1220
1221 AlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
1222     itemsIterIndex_ = -1;
1223     return *this;
1224 }
1225
1226
1227
1228 AlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
1229                                 const UnicodeString &lowerBoundary,
1230                                 UAlphabeticIndexLabelType type)
1231         : label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
1232           displayBucket_(NULL), displayIndex_(-1),
1233           records_(NULL) {
1234 }
1235
1236
1237 AlphabeticIndex::Bucket::~Bucket() {
1238     delete records_;
1239 }
1240
1241 U_NAMESPACE_END
1242
1243 #endif  // !UCONFIG_NO_COLLATION