icuSources/i18n/alphaindex.cpp

   1 /*
   2 *******************************************************************************
   3 * Copyright (C) 2009-2014, International Business Machines Corporation and
   4 * others. All Rights Reserved.
   5 *******************************************************************************
   6 */
   7
   8 #include "unicode/utypes.h"
   9
  10 #if !UCONFIG_NO_COLLATION
  11
  12 #include "unicode/alphaindex.h"
  13 #include "unicode/coll.h"
  14 #include "unicode/localpointer.h"
  15 #include "unicode/normalizer2.h"
  16 #include "unicode/tblcoll.h"
  17 #include "unicode/uchar.h"
  18 #include "unicode/ulocdata.h"
  19 #include "unicode/uniset.h"
  20 #include "unicode/uobject.h"
  21 #include "unicode/usetiter.h"
  22 #include "unicode/utf16.h"
  23
  24 #include "cmemory.h"
  25 #include "cstring.h"
  26 #include "uassert.h"
  27 #include "uvector.h"
  28 #include "uvectr64.h"
  29
  30 //#include <string>
  31 //#include <iostream>
  32
  33 U_NAMESPACE_BEGIN
  34
  35 namespace {
  36
  37 /**
  38  * Prefix string for Chinese index buckets.
  39  * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
  40  */
  41 const UChar BASE[1] = { 0xFDD0 };
  42 const int32_t BASE_LENGTH = 1;
  43
  44 UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
  45                                 const UnicodeString &one, const UnicodeString &other);
  46
  47 }  // namespace
  48
  49 static int32_t U_CALLCONV
  50 collatorComparator(const void *context, const void *left, const void *right);
  51
  52 static int32_t U_CALLCONV
  53 recordCompareFn(const void *context, const void *left, const void *right);
  54
  55 //  UVector<Record *> support function, delete a Record.
  56 static void U_CALLCONV
  57 alphaIndex_deleteRecord(void *obj) {
  58     delete static_cast<AlphabeticIndex::Record *>(obj);
  59 }
  60
  61 namespace {
  62
  63 UnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
  64                            UErrorCode &errorCode) {
  65     if (U_FAILURE(errorCode)) { return NULL; }
  66     if (owned.isValid()) {
  67         return owned.orphan();
  68     }
  69     UnicodeString *p = new UnicodeString(s);
  70     if (p == NULL) {
  71         errorCode = U_MEMORY_ALLOCATION_ERROR;
  72     }
  73     return p;
  74 }
  75
  76 inline UnicodeString *getString(const UVector &list, int32_t i) {
  77     return static_cast<UnicodeString *>(list[i]);
  78 }
  79
  80 inline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
  81     return static_cast<AlphabeticIndex::Bucket *>(list[i]);
  82 }
  83
  84 inline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
  85     return static_cast<AlphabeticIndex::Record *>(list[i]);
  86 }
  87
  88 /**
  89  * Like Java Collections.binarySearch(List, String, Comparator).
  90  *
  91  * @return the index>=0 where the item was found,
  92  *         or the index<0 for inserting the string at ~index in sorted order
  93  */
  94 int32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
  95     if (list.size() == 0) { return ~0; }
  96     int32_t start = 0;
  97     int32_t limit = list.size();
  98     for (;;) {
  99         int32_t i = (start + limit) / 2;
 100         const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i));
 101         UErrorCode errorCode = U_ZERO_ERROR;
 102         UCollationResult cmp = coll.compare(s, *si, errorCode);
 103         if (cmp == UCOL_EQUAL) {
 104             return i;
 105         } else if (cmp < 0) {
 106             if (i == start) {
 107                 return ~start;  // insert s before *si
 108             }
 109             limit = i;
 110         } else {
 111             if (i == start) {
 112                 return ~(start + 1);  // insert s after *si
 113             }
 114             start = i;
 115         }
 116     }
 117 }
 118
 119 }  // namespace
 120
 121 // The BucketList is not in the anonymous namespace because only Clang
 122 // seems to support its use in other classes from there.
 123 // However, we also don't need U_I18N_API because it is not used from outside the i18n library.
 124 class BucketList : public UObject {
 125 public:
 126     BucketList(UVector *bucketList, UVector *publicBucketList)
 127             : bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
 128         int32_t displayIndex = 0;
 129         for (int32_t i = 0; i < publicBucketList->size(); ++i) {
 130             getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
 131         }
 132     }
 133
 134     // The virtual destructor must not be inline.
 135     // See ticket #8454 for details.
 136     virtual ~BucketList();
 137
 138     int32_t getBucketCount() const {
 139         return immutableVisibleList_->size();
 140     }
 141
 142     int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
 143                            UErrorCode &errorCode) {
 144         // binary search
 145         int32_t start = 0;
 146         int32_t limit = bucketList_->size();
 147         while ((start + 1) < limit) {
 148             int32_t i = (start + limit) / 2;
 149             const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i);
 150             UCollationResult nameVsBucket =
 151                 collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
 152             if (nameVsBucket < 0) {
 153                 limit = i;
 154             } else {
 155                 start = i;
 156             }
 157         }
 158         const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start);
 159         if (bucket->displayBucket_ != NULL) {
 160             bucket = bucket->displayBucket_;
 161         }
 162         return bucket->displayIndex_;
 163     }
 164
 165     /** All of the buckets, visible and invisible. */
 166     UVector *bucketList_;
 167     /** Just the visible buckets. */
 168     UVector *immutableVisibleList_;
 169 };
 170
 171 BucketList::~BucketList() {
 172     delete bucketList_;
 173     if (immutableVisibleList_ != bucketList_) {
 174         delete immutableVisibleList_;
 175     }
 176 }
 177
 178 AlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
 179     delete buckets_;
 180     delete collatorPrimaryOnly_;
 181 }
 182
 183 int32_t
 184 AlphabeticIndex::ImmutableIndex::getBucketCount() const {
 185     return buckets_->getBucketCount();
 186 }
 187
 188 int32_t
 189 AlphabeticIndex::ImmutableIndex::getBucketIndex(
 190         const UnicodeString &name, UErrorCode &errorCode) const {
 191     return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
 192 }
 193
 194 const AlphabeticIndex::Bucket *
 195 AlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
 196     if (0 <= index && index < buckets_->getBucketCount()) {
 197         return icu::getBucket(*buckets_->immutableVisibleList_, index);
 198     } else {
 199         return NULL;
 200     }
 201 }
 202
 203 AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
 204         : inputList_(NULL),
 205           labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
 206           maxLabelCount_(99),
 207           initialLabels_(NULL), firstCharsInScripts_(NULL),
 208           collator_(NULL), collatorPrimaryOnly_(NULL),
 209           buckets_(NULL) {
 210     init(&locale, status);
 211 }
 212
 213
 214 AlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
 215         : inputList_(NULL),
 216           labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
 217           maxLabelCount_(99),
 218           initialLabels_(NULL), firstCharsInScripts_(NULL),
 219           collator_(collator), collatorPrimaryOnly_(NULL),
 220           buckets_(NULL) {
 221     init(NULL, status);
 222 }
 223
 224
 225
 226 AlphabeticIndex::~AlphabeticIndex() {
 227     delete collator_;
 228     delete collatorPrimaryOnly_;
 229     delete firstCharsInScripts_;
 230     delete buckets_;
 231     delete inputList_;
 232     delete initialLabels_;
 233 }
 234
 235
 236 AlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
 237     if (U_FAILURE(status)) {
 238         return *this;
 239     }
 240     initialLabels_->addAll(additions);
 241     clearBuckets();
 242     return *this;
 243 }
 244
 245
 246 AlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
 247     addIndexExemplars(locale, status);
 248     clearBuckets();
 249     return *this;
 250 }
 251
 252
 253 AlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
 254     if (U_FAILURE(errorCode)) { return NULL; }
 255     // In C++, the ImmutableIndex must own its copy of the BucketList,
 256     // even if it contains no records, for proper memory management.
 257     // We could clone the buckets_ if they are not NULL,
 258     // but that would be worth it only if this method is called multiple times,
 259     // or called after using the old-style bucket iterator API.
 260     LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
 261     LocalPointer<RuleBasedCollator> coll(
 262         static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone()));
 263     if (immutableBucketList.isNull() || coll.isNull()) {
 264         errorCode = U_MEMORY_ALLOCATION_ERROR;
 265         return NULL;
 266     }
 267     ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
 268     if (immIndex == NULL) {
 269         errorCode = U_MEMORY_ALLOCATION_ERROR;
 270         return NULL;
 271     }
 272     // The ImmutableIndex adopted its parameter objects.
 273     immutableBucketList.orphan();
 274     coll.orphan();
 275     return immIndex;
 276 }
 277
 278 int32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
 279     initBuckets(status);
 280     if (U_FAILURE(status)) {
 281         return 0;
 282     }
 283     return buckets_->getBucketCount();
 284 }
 285
 286
 287 int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
 288     if (U_FAILURE(status) || inputList_ == NULL) {
 289         return 0;
 290     }
 291     return inputList_->size();
 292 }
 293
 294 void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
 295     const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
 296     if (U_FAILURE(errorCode)) { return; }
 297
 298     const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
 299     const UnicodeString &overflowBoundary =
 300         *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);
 301
 302     // We make a sorted array of elements.
 303     // Some of the input may be redundant.
 304     // That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
 305     // We filter out those cases.
 306     UnicodeSetIterator iter(*initialLabels_);
 307     while (iter.next()) {
 308         const UnicodeString *item = &iter.getString();
 309         LocalPointer<UnicodeString> ownedItem;
 310         UBool checkDistinct;
 311         int32_t itemLength = item->length();
 312         if (!item->hasMoreChar32Than(0, itemLength, 1)) {
 313             checkDistinct = FALSE;
 314         } else if(item->charAt(itemLength - 1) == 0x2a &&  // '*'
 315                 item->charAt(itemLength - 2) != 0x2a) {
 316             // Use a label if it is marked with one trailing star,
 317             // even if the label string sorts the same when all contractions are suppressed.
 318             ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
 319             item = ownedItem.getAlias();
 320             if (item == NULL) {
 321                 errorCode = U_MEMORY_ALLOCATION_ERROR;
 322                 return;
 323             }
 324             checkDistinct = FALSE;
 325         } else {
 326             checkDistinct = TRUE;
 327         }
 328         if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
 329             // Ignore a primary-ignorable or non-alphabetic index character.
 330         } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
 331             // Ignore an index character that will land in the overflow bucket.
 332         } else if (checkDistinct &&
 333                 collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
 334             // Ignore a multi-code point index character that does not sort distinctly
 335             // from the sequence of its separate characters.
 336         } else {
 337             int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
 338             if (insertionPoint < 0) {
 339                 indexCharacters.insertElementAt(
 340                     ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
 341             } else {
 342                 const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
 343                 if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
 344                     indexCharacters.setElementAt(
 345                         ownedString(*item, ownedItem, errorCode), insertionPoint);
 346                 }
 347             }
 348         }
 349     }
 350     if (U_FAILURE(errorCode)) { return; }
 351
 352     // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
 353
 354     int32_t size = indexCharacters.size() - 1;
 355     if (size > maxLabelCount_) {
 356         int32_t count = 0;
 357         int32_t old = -1;
 358         for (int32_t i = 0; i < indexCharacters.size();) {
 359             ++count;
 360             int32_t bump = count * maxLabelCount_ / size;
 361             if (bump == old) {
 362                 indexCharacters.removeElementAt(i);
 363             } else {
 364                 old = bump;
 365                 ++i;
 366             }
 367         }
 368     }
 369 }
 370
 371 namespace {
 372
 373 const UnicodeString &fixLabel(const UnicodeString &current, UnicodeString &temp) {
 374     if (!current.startsWith(BASE, BASE_LENGTH)) {
 375         return current;
 376     }
 377     UChar rest = current.charAt(BASE_LENGTH);
 378     if (0x2800 < rest && rest <= 0x28FF) { // stroke count
 379         int32_t count = rest-0x2800;
 380         temp.setTo((UChar)(0x30 + count % 10));
 381         if (count >= 10) {
 382             count /= 10;
 383             temp.insert(0, (UChar)(0x30 + count % 10));
 384             if (count >= 10) {
 385                 count /= 10;
 386                 temp.insert(0, (UChar)(0x30 + count));
 387             }
 388         }
 389         return temp.append((UChar)0x5283);
 390     }
 391     return temp.setTo(current, BASE_LENGTH);
 392 }
 393
 394 UBool hasMultiplePrimaryWeights(
 395         const RuleBasedCollator &coll, uint32_t variableTop,
 396         const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) {
 397     ces.removeAllElements();
 398     coll.internalGetCEs(s, ces, errorCode);
 399     if (U_FAILURE(errorCode)) { return FALSE; }
 400     UBool seenPrimary = FALSE;
 401     for (int32_t i = 0; i < ces.size(); ++i) {
 402         int64_t ce = ces.elementAti(i);
 403         uint32_t p = (uint32_t)(ce >> 32);
 404         if (p > variableTop) {
 405             // not primary ignorable
 406             if (seenPrimary) {
 407                 return TRUE;
 408             }
 409             seenPrimary = TRUE;
 410         }
 411     }
 412     return FALSE;
 413 }
 414
 415 }  // namespace
 416
 417 BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
 418     // Initialize indexCharacters.
 419     UVector indexCharacters(errorCode);
 420     indexCharacters.setDeleter(uprv_deleteUObject);
 421     initLabels(indexCharacters, errorCode);
 422     if (U_FAILURE(errorCode)) { return NULL; }
 423
 424     // Variables for hasMultiplePrimaryWeights().
 425     UVector64 ces(errorCode);
 426     uint32_t variableTop;
 427     if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
 428         variableTop = collatorPrimaryOnly_->getVariableTop(errorCode);
 429     } else {
 430         variableTop = 0;
 431     }
 432     UBool hasInvisibleBuckets = FALSE;
 433
 434     // Helper arrays for Chinese Pinyin collation.
 435     Bucket *asciiBuckets[26] = {
 436         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 437         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
 438     };
 439     Bucket *pinyinBuckets[26] = {
 440         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 441         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
 442     };
 443     UBool hasPinyin = FALSE;
 444
 445     LocalPointer<UVector> bucketList(new UVector(errorCode), errorCode);
 446     if (U_FAILURE(errorCode)) {
 447         return NULL;
 448     }
 449     bucketList->setDeleter(uprv_deleteUObject);
 450
 451     // underflow bucket
 452     Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
 453     if (bucket == NULL) {
 454         errorCode = U_MEMORY_ALLOCATION_ERROR;
 455         return NULL;
 456     }
 457     bucketList->addElement(bucket, errorCode);
 458     if (U_FAILURE(errorCode)) { return NULL; }
 459
 460     UnicodeString temp;
 461
 462     // fix up the list, adding underflow, additions, overflow
 463     // Insert inflow labels as needed.
 464     int32_t scriptIndex = -1;
 465     const UnicodeString *scriptUpperBoundary = &emptyString_;
 466     for (int32_t i = 0; i < indexCharacters.size(); ++i) {
 467         UnicodeString &current = *getString(indexCharacters, i);
 468         if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
 469             // We crossed the script boundary into a new script.
 470             const UnicodeString &inflowBoundary = *scriptUpperBoundary;
 471             UBool skippedScript = FALSE;
 472             for (;;) {
 473                 scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
 474                 if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
 475                     break;
 476                 }
 477                 skippedScript = TRUE;
 478             }
 479             if (skippedScript && bucketList->size() > 1) {
 480                 // We are skipping one or more scripts,
 481                 // and we are not just getting out of the underflow label.
 482                 bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
 483                 if (bucket == NULL) {
 484                     errorCode = U_MEMORY_ALLOCATION_ERROR;
 485                     return NULL;
 486                 }
 487                 bucketList->addElement(bucket, errorCode);
 488             }
 489         }
 490         // Add a bucket with the current label.
 491         bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
 492         if (bucket == NULL) {
 493             errorCode = U_MEMORY_ALLOCATION_ERROR;
 494             return NULL;
 495         }
 496         bucketList->addElement(bucket, errorCode);
 497         // Remember ASCII and Pinyin buckets for Pinyin redirects.
 498         UChar c;
 499         if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) {  // A-Z
 500             asciiBuckets[c - 0x41] = bucket;
 501         } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
 502                 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
 503             pinyinBuckets[c - 0x41] = bucket;
 504             hasPinyin = TRUE;
 505         }
 506         // Check for multiple primary weights.
 507         if (!current.startsWith(BASE, BASE_LENGTH) &&
 508                 hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current,
 509                                           ces, errorCode) &&
 510                 current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
 511             // "AE-ligature" or "Sch" etc.
 512             for (int32_t i = bucketList->size() - 2;; --i) {
 513                 Bucket *singleBucket = getBucket(*bucketList, i);
 514                 if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
 515                     // There is no single-character bucket since the last
 516                     // underflow or inflow label.
 517                     break;
 518                 }
 519                 if (singleBucket->displayBucket_ == NULL &&
 520                         !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop,
 521                                                    singleBucket->lowerBoundary_,
 522                                                    ces, errorCode)) {
 523                     // Add an invisible bucket that redirects strings greater than the expansion
 524                     // to the previous single-character bucket.
 525                     // For example, after ... Q R S Sch we add Sch\uFFFF->S
 526                     // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
 527                     bucket = new Bucket(emptyString_,
 528                         UnicodeString(current).append((UChar)0xFFFF),
 529                         U_ALPHAINDEX_NORMAL);
 530                     if (bucket == NULL) {
 531                         errorCode = U_MEMORY_ALLOCATION_ERROR;
 532                         return NULL;
 533                     }
 534                     bucket->displayBucket_ = singleBucket;
 535                     bucketList->addElement(bucket, errorCode);
 536                     hasInvisibleBuckets = TRUE;
 537                     break;
 538                 }
 539             }
 540         }
 541     }
 542     if (U_FAILURE(errorCode)) { return NULL; }
 543     if (bucketList->size() == 1) {
 544         // No real labels, show only the underflow label.
 545         BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
 546         if (bl == NULL) {
 547             errorCode = U_MEMORY_ALLOCATION_ERROR;
 548             return NULL;
 549         }
 550         bucketList.orphan();
 551         return bl;
 552     }
 553     // overflow bucket
 554     bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
 555     if (bucket == NULL) {
 556         errorCode = U_MEMORY_ALLOCATION_ERROR;
 557         return NULL;
 558     }
 559     bucketList->addElement(bucket, errorCode); // final
 560
 561     if (hasPinyin) {
 562         // Redirect Pinyin buckets.
 563         Bucket *asciiBucket = NULL;
 564         for (int32_t i = 0; i < 26; ++i) {
 565             if (asciiBuckets[i] != NULL) {
 566                 asciiBucket = asciiBuckets[i];
 567             }
 568             if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
 569                 pinyinBuckets[i]->displayBucket_ = asciiBucket;
 570                 hasInvisibleBuckets = TRUE;
 571             }
 572         }
 573     }
 574
 575     if (U_FAILURE(errorCode)) { return NULL; }
 576     if (!hasInvisibleBuckets) {
 577         BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
 578         if (bl == NULL) {
 579             errorCode = U_MEMORY_ALLOCATION_ERROR;
 580             return NULL;
 581         }
 582         bucketList.orphan();
 583         return bl;
 584     }
 585     // Merge inflow buckets that are visually adjacent.
 586     // Iterate backwards: Merge inflow into overflow rather than the other way around.
 587     int32_t i = bucketList->size() - 1;
 588     Bucket *nextBucket = getBucket(*bucketList, i);
 589     while (--i > 0) {
 590         bucket = getBucket(*bucketList, i);
 591         if (bucket->displayBucket_ != NULL) {
 592             continue;  // skip invisible buckets
 593         }
 594         if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
 595             if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
 596                 bucket->displayBucket_ = nextBucket;
 597                 continue;
 598             }
 599         }
 600         nextBucket = bucket;
 601     }
 602
 603     LocalPointer<UVector> publicBucketList(new UVector(errorCode), errorCode);
 604     if (U_FAILURE(errorCode)) {
 605         return NULL;
 606     }
 607     // Do not call publicBucketList->setDeleter():
 608     // This vector shares its objects with the bucketList.
 609     for (int32_t i = 0; i < bucketList->size(); ++i) {
 610         bucket = getBucket(*bucketList, i);
 611         if (bucket->displayBucket_ == NULL) {
 612             publicBucketList->addElement(bucket, errorCode);
 613         }
 614     }
 615     if (U_FAILURE(errorCode)) { return NULL; }
 616     BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
 617     if (bl == NULL) {
 618         errorCode = U_MEMORY_ALLOCATION_ERROR;
 619         return NULL;
 620     }
 621     bucketList.orphan();
 622     publicBucketList.orphan();
 623     return bl;
 624 }
 625
 626 /**
 627  * Creates an index, and buckets and sorts the list of records into the index.
 628  */
 629 void AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
 630     if (U_FAILURE(errorCode) || buckets_ != NULL) {
 631         return;
 632     }
 633     buckets_ = createBucketList(errorCode);
 634     if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) {
 635         return;
 636     }
 637
 638     // Sort the records by name.
 639     // Stable sort preserves input order of collation duplicates.
 640     inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
 641
 642     // Now, we traverse all of the input, which is now sorted.
 643     // If the item doesn't go in the current bucket, we find the next bucket that contains it.
 644     // This makes the process order n*log(n), since we just sort the list and then do a linear process.
 645     // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
 646     // we need to improve it for that case.
 647
 648     Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0);
 649     int32_t bucketIndex = 1;
 650     Bucket *nextBucket;
 651     const UnicodeString *upperBoundary;
 652     if (bucketIndex < buckets_->bucketList_->size()) {
 653         nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
 654         upperBoundary = &nextBucket->lowerBoundary_;
 655     } else {
 656         nextBucket = NULL;
 657         upperBoundary = NULL;
 658     }
 659     for (int32_t i = 0; i < inputList_->size(); ++i) {
 660         Record *r = getRecord(*inputList_, i);
 661         // if the current bucket isn't the right one, find the one that is
 662         // We have a special flag for the last bucket so that we don't look any further
 663         while (upperBoundary != NULL &&
 664                 collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
 665             currentBucket = nextBucket;
 666             // now reset the boundary that we compare against
 667             if (bucketIndex < buckets_->bucketList_->size()) {
 668                 nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
 669                 upperBoundary = &nextBucket->lowerBoundary_;
 670             } else {
 671                 upperBoundary = NULL;
 672             }
 673         }
 674         // now put the record into the bucket.
 675         Bucket *bucket = currentBucket;
 676         if (bucket->displayBucket_ != NULL) {
 677             bucket = bucket->displayBucket_;
 678         }
 679         if (bucket->records_ == NULL) {
 680             bucket->records_ = new UVector(errorCode);
 681             if (bucket->records_ == NULL) {
 682                 errorCode = U_MEMORY_ALLOCATION_ERROR;
 683                 return;
 684             }
 685         }
 686         bucket->records_->addElement(r, errorCode);
 687     }
 688 }
 689
 690 void AlphabeticIndex::clearBuckets() {
 691     if (buckets_ != NULL) {
 692         delete buckets_;
 693         buckets_ = NULL;
 694         internalResetBucketIterator();
 695     }
 696 }
 697
 698 void AlphabeticIndex::internalResetBucketIterator() {
 699     labelsIterIndex_ = -1;
 700     currentBucket_ = NULL;
 701 }
 702
 703
 704 void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
 705     LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
 706     if (U_FAILURE(status)) {
 707         return;
 708     }
 709
 710     UnicodeSet exemplars;
 711     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
 712     if (U_SUCCESS(status)) {
 713         initialLabels_->addAll(exemplars);
 714         return;
 715     }
 716     status = U_ZERO_ERROR;  // Clear out U_MISSING_RESOURCE_ERROR
 717
 718     // The locale data did not include explicit Index characters.
 719     // Synthesize a set of them from the locale's standard exemplar characters.
 720     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
 721     if (U_FAILURE(status)) {
 722         return;
 723     }
 724
 725     // question: should we add auxiliary exemplars?
 726     if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
 727         exemplars.add(0x61, 0x7A);
 728     }
 729     if (exemplars.containsSome(0xAC00, 0xD7A3)) {  // Hangul syllables
 730         // cut down to small list
 731         exemplars.remove(0xAC00, 0xD7A3).
 732             add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
 733             add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
 734             add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
 735             add(0xD30C).add(0xD558);
 736     }
 737     if (exemplars.containsSome(0x1200, 0x137F)) {  // Ethiopic block
 738         // cut down to small list
 739         // make use of the fact that Ethiopic is allocated in 8's, where
 740         // the base is 0 mod 8.
 741         UnicodeSet ethiopic(
 742             UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
 743         UnicodeSetIterator it(ethiopic);
 744         while (it.next() && !it.isString()) {
 745             if ((it.getCodepoint() & 0x7) != 0) {
 746                 exemplars.remove(it.getCodepoint());
 747             }
 748         }
 749     }
 750
 751     // Upper-case any that aren't already so.
 752     //   (We only do this for synthesized index characters.)
 753     UnicodeSetIterator it(exemplars);
 754     UnicodeString upperC;
 755     while (it.next()) {
 756         const UnicodeString &exemplarC = it.getString();
 757         upperC = exemplarC;
 758         upperC.toUpper(locale);
 759         initialLabels_->add(upperC);
 760     }
 761 }
 762
 763 UBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
 764     UnicodeSet contractions;
 765     collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode);
 766     if (U_FAILURE(errorCode) || contractions.isEmpty()) { return FALSE; }
 767     initialLabels_->addAll(contractions);
 768     UnicodeSetIterator iter(contractions);
 769     while (iter.next()) {
 770         const UnicodeString &s = iter.getString();
 771         U_ASSERT (s.startsWith(BASE, BASE_LENGTH));
 772         UChar c = s.charAt(s.length() - 1);
 773         if (0x41 <= c && c <= 0x5A) {  // A-Z
 774             // There are Pinyin labels, add ASCII A-Z labels as well.
 775             initialLabels_->add(0x41, 0x5A);  // A-Z
 776             break;
 777         }
 778     }
 779     return TRUE;
 780 }
 781
 782
 783 /*
 784  * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
 785  */
 786 static const UChar CGJ = 0x034F;
 787 UnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
 788     UnicodeString result;
 789     if (item.length() == 0) {
 790         return result;
 791     }
 792     int32_t i = 0;
 793     for (;;) {
 794         UChar32  cp = item.char32At(i);
 795         result.append(cp);
 796         i = item.moveIndex32(i, 1);
 797         if (i >= item.length()) {
 798             break;
 799         }
 800         result.append(CGJ);
 801     }
 802     return result;
 803 }
 804
 805
 806 UBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
 807     return FALSE;
 808 }
 809
 810
 811 UBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
 812     return FALSE;
 813 }
 814
 815
 816 const RuleBasedCollator &AlphabeticIndex::getCollator() const {
 817     return *collator_;
 818 }
 819
 820
 821 const UnicodeString &AlphabeticIndex::getInflowLabel() const {
 822     return inflowLabel_;
 823 }
 824
 825 const UnicodeString &AlphabeticIndex::getOverflowLabel() const {
 826     return overflowLabel_;
 827 }
 828
 829
 830 const UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
 831     return underflowLabel_;
 832 }
 833
 834
 835 AlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
 836     inflowLabel_ = label;
 837     clearBuckets();
 838     return *this;
 839 }
 840
 841
 842 AlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
 843     overflowLabel_ = label;
 844     clearBuckets();
 845     return *this;
 846 }
 847
 848
 849 AlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
 850     underflowLabel_ = label;
 851     clearBuckets();
 852     return *this;
 853 }
 854
 855
 856 int32_t AlphabeticIndex::getMaxLabelCount() const {
 857     return maxLabelCount_;
 858 }
 859
 860
 861 AlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
 862     if (U_FAILURE(status)) {
 863         return *this;
 864     }
 865     if (maxLabelCount <= 0) {
 866         status = U_ILLEGAL_ARGUMENT_ERROR;
 867         return *this;
 868     }
 869     maxLabelCount_ = maxLabelCount;
 870     clearBuckets();
 871     return *this;
 872 }
 873
 874
 875 //
 876 //  init() - Common code for constructors.
 877 //
 878
 879 void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
 880     if (U_FAILURE(status)) { return; }
 881     if (locale == NULL && collator_ == NULL) {
 882         status = U_ILLEGAL_ARGUMENT_ERROR;
 883         return;
 884     }
 885
 886     initialLabels_         = new UnicodeSet();
 887     if (initialLabels_ == NULL) {
 888         status = U_MEMORY_ALLOCATION_ERROR;
 889         return;
 890     }
 891
 892     inflowLabel_.setTo((UChar)0x2026);    // Ellipsis
 893     overflowLabel_ = inflowLabel_;
 894     underflowLabel_ = inflowLabel_;
 895
 896     if (collator_ == NULL) {
 897         Collator *coll = Collator::createInstance(*locale, status);
 898         if (U_FAILURE(status)) {
 899             delete coll;
 900             return;
 901         }
 902         if (coll == NULL) {
 903             status = U_MEMORY_ALLOCATION_ERROR;
 904             return;
 905         }
 906         collator_ = dynamic_cast<RuleBasedCollator *>(coll);
 907         if (collator_ == NULL) {
 908             delete coll;
 909             status = U_UNSUPPORTED_ERROR;
 910             return;
 911         }
 912     }
 913     collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
 914     if (collatorPrimaryOnly_ == NULL) {
 915         status = U_MEMORY_ALLOCATION_ERROR;
 916         return;
 917     }
 918     collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
 919     firstCharsInScripts_ = firstStringsInScript(status);
 920     if (U_FAILURE(status)) { return; }
 921     firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
 922     // Guard against a degenerate collator where
 923     // some script boundary strings are primary ignorable.
 924     for (;;) {
 925         if (U_FAILURE(status)) { return; }
 926         if (firstCharsInScripts_->isEmpty()) {
 927             // AlphabeticIndex requires some non-ignorable script boundary strings.
 928             status = U_ILLEGAL_ARGUMENT_ERROR;
 929             return;
 930         }
 931         if (collatorPrimaryOnly_->compare(
 932                 *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
 933                 emptyString_, status) == UCOL_EQUAL) {
 934             firstCharsInScripts_->removeElementAt(0);
 935         } else {
 936             break;
 937         }
 938     }
 939
 940     // Chinese index characters, which are specific to each of the several Chinese tailorings,
 941     // take precedence over the single locale data exemplar set per language.
 942     if (!addChineseIndexCharacters(status) && locale != NULL) {
 943         addIndexExemplars(*locale, status);
 944     }
 945 }
 946
 947
 948 //
 949 //  Comparison function for UVector<UnicodeString *> sorting with a collator.
 950 //
 951 static int32_t U_CALLCONV
 952 collatorComparator(const void *context, const void *left, const void *right) {
 953     const UElement *leftElement = static_cast<const UElement *>(left);
 954     const UElement *rightElement = static_cast<const UElement *>(right);
 955     const UnicodeString *leftString  = static_cast<const UnicodeString *>(leftElement->pointer);
 956     const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer);
 957
 958     if (leftString == rightString) {
 959         // Catches case where both are NULL
 960         return 0;
 961     }
 962     if (leftString == NULL) {
 963         return 1;
 964     };
 965     if (rightString == NULL) {
 966         return -1;
 967     }
 968     const Collator *col = static_cast<const Collator *>(context);
 969     UErrorCode errorCode = U_ZERO_ERROR;
 970     return col->compare(*leftString, *rightString, errorCode);
 971 }
 972
 973 //
 974 //  Comparison function for UVector<Record *> sorting with a collator.
 975 //
 976 static int32_t U_CALLCONV
 977 recordCompareFn(const void *context, const void *left, const void *right) {
 978     const UElement *leftElement = static_cast<const UElement *>(left);
 979     const UElement *rightElement = static_cast<const UElement *>(right);
 980     const AlphabeticIndex::Record *leftRec  = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer);
 981     const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer);
 982     const Collator *col = static_cast<const Collator *>(context);
 983     UErrorCode errorCode = U_ZERO_ERROR;
 984     return col->compare(leftRec->name_, rightRec->name_, errorCode);
 985 }
 986
 987 UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
 988     if (U_FAILURE(status)) {
 989         return NULL;
 990     }
 991     LocalPointer<UVector> dest(new UVector(status), status);
 992     if (U_FAILURE(status)) {
 993         return NULL;
 994     }
 995     dest->setDeleter(uprv_deleteUObject);
 996     // Fetch the script-first-primary contractions which are defined in the root collator.
 997     // They all start with U+FDD1.
 998     UnicodeSet set;
 999     collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
1000     if (U_FAILURE(status)) {
1001         return NULL;
1002     }
1003     if (set.isEmpty()) {
1004         status = U_UNSUPPORTED_ERROR;
1005         return NULL;
1006     }
1007     UnicodeSetIterator iter(set);
1008     while (iter.next()) {
1009         const UnicodeString &boundary = iter.getString();
1010         uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
1011         if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
1012             // Ignore boundaries for the special reordering groups.
1013             // Take only those for "real scripts" (where the sample character is a Letter,
1014             // and the one for unassigned implicit weights (Cn).
1015             continue;
1016         }
1017         UnicodeString *s = new UnicodeString(boundary);
1018         if (s == NULL) {
1019             status = U_MEMORY_ALLOCATION_ERROR;
1020             return NULL;
1021         }
1022         dest->addElement(s, status);
1023     }
1024     return dest.orphan();
1025 }
1026
1027
1028 namespace {
1029
1030 /**
1031  * Returns true if one index character string is "better" than the other.
1032  * Shorter NFKD is better, and otherwise NFKD-binary-less-than is
1033  * better, and otherwise binary-less-than is better.
1034  */
1035 UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
1036                                 const UnicodeString &one, const UnicodeString &other) {
1037     // This is called with primary-equal strings, but never with one.equals(other).
1038     UErrorCode status = U_ZERO_ERROR;
1039     UnicodeString n1 = nfkdNormalizer.normalize(one, status);
1040     UnicodeString n2 = nfkdNormalizer.normalize(other, status);
1041     if (U_FAILURE(status)) { return FALSE; }
1042     int32_t result = n1.countChar32() - n2.countChar32();
1043     if (result != 0) {
1044         return result < 0;
1045     }
1046     result = n1.compareCodePointOrder(n2);
1047     if (result != 0) {
1048         return result < 0;
1049     }
1050     return one.compareCodePointOrder(other) < 0;
1051 }
1052
1053 }  // namespace
1054
1055 //
1056 //  Constructor & Destructor for AlphabeticIndex::Record
1057 //
1058 //     Records are internal only, instances are not directly surfaced in the public API.
1059 //     This class is mostly struct-like, with all public fields.
1060
1061 AlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
1062         : name_(name), data_(data) {}
1063
1064 AlphabeticIndex::Record::~Record() {
1065 }
1066
1067
1068 AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
1069     if (U_FAILURE(status)) {
1070         return *this;
1071     }
1072     if (inputList_ == NULL) {
1073         inputList_ = new UVector(status);
1074         if (inputList_ == NULL) {
1075             status = U_MEMORY_ALLOCATION_ERROR;
1076             return *this;
1077         }
1078         inputList_->setDeleter(alphaIndex_deleteRecord);
1079     }
1080     Record *r = new Record(name, data);
1081     if (r == NULL) {
1082         status = U_MEMORY_ALLOCATION_ERROR;
1083         return *this;
1084     }
1085     inputList_->addElement(r, status);
1086     clearBuckets();
1087     //std::string ss;
1088     //std::string ss2;
1089     //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" <<
1090     //             "   sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
1091     return *this;
1092 }
1093
1094
1095 AlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
1096     if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
1097         inputList_->removeAllElements();
1098         clearBuckets();
1099     }
1100     return *this;
1101 }
1102
1103 int32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
1104     initBuckets(status);
1105     if (U_FAILURE(status)) {
1106         return 0;
1107     }
1108     return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
1109 }
1110
1111
1112 int32_t AlphabeticIndex::getBucketIndex() const {
1113     return labelsIterIndex_;
1114 }
1115
1116
1117 UBool AlphabeticIndex::nextBucket(UErrorCode &status) {
1118     if (U_FAILURE(status)) {
1119         return FALSE;
1120     }
1121     if (buckets_ == NULL && currentBucket_ != NULL) {
1122         status = U_ENUM_OUT_OF_SYNC_ERROR;
1123         return FALSE;
1124     }
1125     initBuckets(status);
1126     if (U_FAILURE(status)) {
1127         return FALSE;
1128     }
1129     ++labelsIterIndex_;
1130     if (labelsIterIndex_ >= buckets_->getBucketCount()) {
1131         labelsIterIndex_ = buckets_->getBucketCount();
1132         return FALSE;
1133     }
1134     currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
1135     resetRecordIterator();
1136     return TRUE;
1137 }
1138
1139 const UnicodeString &AlphabeticIndex::getBucketLabel() const {
1140     if (currentBucket_ != NULL) {
1141         return currentBucket_->label_;
1142     } else {
1143         return emptyString_;
1144     }
1145 }
1146
1147
1148 UAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
1149     if (currentBucket_ != NULL) {
1150         return currentBucket_->labelType_;
1151     } else {
1152         return U_ALPHAINDEX_NORMAL;
1153     }
1154 }
1155
1156
1157 int32_t AlphabeticIndex::getBucketRecordCount() const {
1158     if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
1159         return currentBucket_->records_->size();
1160     } else {
1161         return 0;
1162     }
1163 }
1164
1165
1166 AlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
1167     if (U_FAILURE(status)) {
1168         return *this;
1169     }
1170     internalResetBucketIterator();
1171     return *this;
1172 }
1173
1174
1175 UBool AlphabeticIndex::nextRecord(UErrorCode &status) {
1176     if (U_FAILURE(status)) {
1177         return FALSE;
1178     }
1179     if (currentBucket_ == NULL) {
1180         // We are trying to iterate over the items in a bucket, but there is no
1181         // current bucket from the enumeration of buckets.
1182         status = U_INVALID_STATE_ERROR;
1183         return FALSE;
1184     }
1185     if (buckets_ == NULL) {
1186         status = U_ENUM_OUT_OF_SYNC_ERROR;
1187         return FALSE;
1188     }
1189     if (currentBucket_->records_ == NULL) {
1190         return FALSE;
1191     }
1192     ++itemsIterIndex_;
1193     if (itemsIterIndex_ >= currentBucket_->records_->size()) {
1194         itemsIterIndex_  = currentBucket_->records_->size();
1195         return FALSE;
1196     }
1197     return TRUE;
1198 }
1199
1200
1201 const UnicodeString &AlphabeticIndex::getRecordName() const {
1202     const UnicodeString *retStr = &emptyString_;
1203     if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1204         itemsIterIndex_ >= 0 &&
1205         itemsIterIndex_ < currentBucket_->records_->size()) {
1206             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1207             retStr = &item->name_;
1208     }
1209     return *retStr;
1210 }
1211
1212 const void *AlphabeticIndex::getRecordData() const {
1213     const void *retPtr = NULL;
1214     if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1215         itemsIterIndex_ >= 0 &&
1216         itemsIterIndex_ < currentBucket_->records_->size()) {
1217             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1218             retPtr = item->data_;
1219     }
1220     return retPtr;
1221 }
1222
1223
1224 AlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
1225     itemsIterIndex_ = -1;
1226     return *this;
1227 }
1228
1229
1230
1231 AlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
1232                                 const UnicodeString &lowerBoundary,
1233                                 UAlphabeticIndexLabelType type)
1234         : label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
1235           displayBucket_(NULL), displayIndex_(-1),
1236           records_(NULL) {
1237 }
1238
1239
1240 AlphabeticIndex::Bucket::~Bucket() {
1241     delete records_;
1242 }
1243
1244 U_NAMESPACE_END
1245
1246 #endif  // !UCONFIG_NO_COLLATION