]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/alphaindex.cpp
ICU-531.31.tar.gz
[apple/icu.git] / icuSources / i18n / alphaindex.cpp
CommitLineData
4388f060
A
1/*
2*******************************************************************************
57a6839d 3* Copyright (C) 2009-2014, International Business Machines Corporation and
51004dcb 4* others. All Rights Reserved.
4388f060
A
5*******************************************************************************
6*/
7
4388f060
A
8#include "unicode/utypes.h"
9
57a6839d 10#if !UCONFIG_NO_COLLATION
4388f060
A
11
12#include "unicode/alphaindex.h"
13#include "unicode/coll.h"
51004dcb 14#include "unicode/localpointer.h"
4388f060 15#include "unicode/normalizer2.h"
4388f060 16#include "unicode/tblcoll.h"
57a6839d 17#include "unicode/uchar.h"
4388f060
A
18#include "unicode/ulocdata.h"
19#include "unicode/uniset.h"
20#include "unicode/uobject.h"
4388f060 21#include "unicode/usetiter.h"
4388f060
A
22#include "unicode/utf16.h"
23
51004dcb 24#include "cmemory.h"
4388f060 25#include "cstring.h"
4388f060 26#include "uassert.h"
4388f060 27#include "uvector.h"
57a6839d 28#include "uvectr64.h"
4388f060
A
29
30//#include <string>
31//#include <iostream>
51004dcb
A
32
33#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
34
4388f060
A
35U_NAMESPACE_BEGIN
36
51004dcb 37namespace {
4388f060 38
51004dcb
A
39/**
40 * Prefix string for Chinese index buckets.
41 * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
42 */
43const UChar BASE[1] = { 0xFDD0 };
44const int32_t BASE_LENGTH = 1;
45
46UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
47 const UnicodeString &one, const UnicodeString &other);
48
49} // namespace
4388f060
A
50
51static int32_t U_CALLCONV
51004dcb 52collatorComparator(const void *context, const void *left, const void *right);
4388f060
A
53
54static int32_t U_CALLCONV
55recordCompareFn(const void *context, const void *left, const void *right);
56
4388f060
A
57// UVector<Record *> support function, delete a Record.
58static void U_CALLCONV
59alphaIndex_deleteRecord(void *obj) {
60 delete static_cast<AlphabeticIndex::Record *>(obj);
61}
62
51004dcb 63namespace {
4388f060 64
51004dcb
A
65UnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
66 UErrorCode &errorCode) {
67 if (U_FAILURE(errorCode)) { return NULL; }
68 if (owned.isValid()) {
69 return owned.orphan();
70 }
71 UnicodeString *p = new UnicodeString(s);
72 if (p == NULL) {
73 errorCode = U_MEMORY_ALLOCATION_ERROR;
74 }
75 return p;
76}
4388f060 77
51004dcb
A
78inline UnicodeString *getString(const UVector &list, int32_t i) {
79 return static_cast<UnicodeString *>(list[i]);
80}
4388f060 81
51004dcb
A
82inline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
83 return static_cast<AlphabeticIndex::Bucket *>(list[i]);
84}
4388f060 85
51004dcb
A
86inline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
87 return static_cast<AlphabeticIndex::Record *>(list[i]);
88}
89
90/**
91 * Like Java Collections.binarySearch(List, String, Comparator).
92 *
93 * @return the index>=0 where the item was found,
94 * or the index<0 for inserting the string at ~index in sorted order
95 */
96int32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
97 if (list.size() == 0) { return ~0; }
98 int32_t start = 0;
99 int32_t limit = list.size();
100 for (;;) {
101 int32_t i = (start + limit) / 2;
102 const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i));
103 UErrorCode errorCode = U_ZERO_ERROR;
104 UCollationResult cmp = coll.compare(s, *si, errorCode);
105 if (cmp == UCOL_EQUAL) {
106 return i;
107 } else if (cmp < 0) {
108 if (i == start) {
109 return ~start; // insert s before *si
110 }
111 limit = i;
112 } else {
113 if (i == start) {
114 return ~(start + 1); // insert s after *si
115 }
116 start = i;
117 }
4388f060
A
118 }
119}
120
51004dcb
A
121} // namespace
122
123// The BucketList is not in the anonymous namespace because only Clang
124// seems to support its use in other classes from there.
125// However, we also don't need U_I18N_API because it is not used from outside the i18n library.
126class BucketList : public UObject {
127public:
128 BucketList(UVector *bucketList, UVector *publicBucketList)
129 : bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
130 int32_t displayIndex = 0;
131 for (int32_t i = 0; i < publicBucketList->size(); ++i) {
132 getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
133 }
134 }
4388f060 135
51004dcb
A
136 // The virtual destructor must not be inline.
137 // See ticket #8454 for details.
138 virtual ~BucketList();
139
140 int32_t getBucketCount() const {
141 return immutableVisibleList_->size();
4388f060 142 }
4388f060 143
51004dcb
A
144 int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
145 UErrorCode &errorCode) {
146 // binary search
147 int32_t start = 0;
148 int32_t limit = bucketList_->size();
149 while ((start + 1) < limit) {
150 int32_t i = (start + limit) / 2;
151 const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i);
152 UCollationResult nameVsBucket =
153 collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
154 if (nameVsBucket < 0) {
155 limit = i;
156 } else {
157 start = i;
158 }
159 }
160 const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start);
161 if (bucket->displayBucket_ != NULL) {
162 bucket = bucket->displayBucket_;
163 }
164 return bucket->displayIndex_;
4388f060 165 }
51004dcb
A
166
167 /** All of the buckets, visible and invisible. */
168 UVector *bucketList_;
169 /** Just the visible buckets. */
170 UVector *immutableVisibleList_;
171};
172
173BucketList::~BucketList() {
174 delete bucketList_;
175 if (immutableVisibleList_ != bucketList_) {
176 delete immutableVisibleList_;
4388f060 177 }
51004dcb
A
178}
179
180AlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
181 delete buckets_;
182 delete collatorPrimaryOnly_;
183}
184
185int32_t
186AlphabeticIndex::ImmutableIndex::getBucketCount() const {
187 return buckets_->getBucketCount();
188}
189
190int32_t
191AlphabeticIndex::ImmutableIndex::getBucketIndex(
192 const UnicodeString &name, UErrorCode &errorCode) const {
193 return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
194}
195
196const AlphabeticIndex::Bucket *
197AlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
198 if (0 <= index && index < buckets_->getBucketCount()) {
199 return icu::getBucket(*buckets_->immutableVisibleList_, index);
200 } else {
201 return NULL;
4388f060 202 }
4388f060
A
203}
204
51004dcb
A
205AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
206 : inputList_(NULL),
207 labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
208 maxLabelCount_(99),
209 initialLabels_(NULL), firstCharsInScripts_(NULL),
210 collator_(NULL), collatorPrimaryOnly_(NULL),
211 buckets_(NULL) {
212 init(&locale, status);
213}
214
215
216AlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
217 : inputList_(NULL),
218 labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
219 maxLabelCount_(99),
220 initialLabels_(NULL), firstCharsInScripts_(NULL),
221 collator_(collator), collatorPrimaryOnly_(NULL),
222 buckets_(NULL) {
223 init(NULL, status);
224}
225
226
4388f060
A
227
228AlphabeticIndex::~AlphabeticIndex() {
4388f060
A
229 delete collator_;
230 delete collatorPrimaryOnly_;
51004dcb
A
231 delete firstCharsInScripts_;
232 delete buckets_;
233 delete inputList_;
4388f060
A
234 delete initialLabels_;
235}
236
237
238AlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
239 if (U_FAILURE(status)) {
240 return *this;
241 }
242 initialLabels_->addAll(additions);
51004dcb 243 clearBuckets();
4388f060
A
244 return *this;
245}
246
247
248AlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
51004dcb
A
249 addIndexExemplars(locale, status);
250 clearBuckets();
4388f060
A
251 return *this;
252}
253
254
51004dcb
A
255AlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
256 if (U_FAILURE(errorCode)) { return NULL; }
257 // In C++, the ImmutableIndex must own its copy of the BucketList,
258 // even if it contains no records, for proper memory management.
259 // We could clone the buckets_ if they are not NULL,
260 // but that would be worth it only if this method is called multiple times,
261 // or called after using the old-style bucket iterator API.
262 LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
263 LocalPointer<RuleBasedCollator> coll(
264 static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone()));
265 if (immutableBucketList.isNull() || coll.isNull()) {
266 errorCode = U_MEMORY_ALLOCATION_ERROR;
267 return NULL;
268 }
269 ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
270 if (immIndex == NULL) {
271 errorCode = U_MEMORY_ALLOCATION_ERROR;
272 return NULL;
273 }
274 // The ImmutableIndex adopted its parameter objects.
275 immutableBucketList.orphan();
276 coll.orphan();
277 return immIndex;
278}
279
4388f060 280int32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
51004dcb 281 initBuckets(status);
4388f060
A
282 if (U_FAILURE(status)) {
283 return 0;
284 }
51004dcb 285 return buckets_->getBucketCount();
4388f060
A
286}
287
288
289int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
51004dcb 290 if (U_FAILURE(status) || inputList_ == NULL) {
4388f060
A
291 return 0;
292 }
51004dcb 293 return inputList_->size();
4388f060
A
294}
295
51004dcb
A
296void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
297 const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
298 if (U_FAILURE(errorCode)) { return; }
299
300 const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
301 const UnicodeString &overflowBoundary =
302 *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);
303
304 // We make a sorted array of elements.
305 // Some of the input may be redundant.
306 // That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
307 // We filter out those cases.
308 UnicodeSetIterator iter(*initialLabels_);
309 while (iter.next()) {
310 const UnicodeString *item = &iter.getString();
311 LocalPointer<UnicodeString> ownedItem;
312 UBool checkDistinct;
313 int32_t itemLength = item->length();
314 if (!item->hasMoreChar32Than(0, itemLength, 1)) {
315 checkDistinct = FALSE;
316 } else if(item->charAt(itemLength - 1) == 0x2a && // '*'
317 item->charAt(itemLength - 2) != 0x2a) {
318 // Use a label if it is marked with one trailing star,
319 // even if the label string sorts the same when all contractions are suppressed.
320 ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
321 item = ownedItem.getAlias();
322 if (item == NULL) {
323 errorCode = U_MEMORY_ALLOCATION_ERROR;
324 return;
4388f060 325 }
51004dcb 326 checkDistinct = FALSE;
4388f060 327 } else {
51004dcb
A
328 checkDistinct = TRUE;
329 }
330 if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
331 // Ignore a primary-ignorable or non-alphabetic index character.
332 } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
57a6839d 333 // Ignore an index character that will land in the overflow bucket.
51004dcb
A
334 } else if (checkDistinct &&
335 collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
336 // Ignore a multi-code point index character that does not sort distinctly
337 // from the sequence of its separate characters.
338 } else {
339 int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
340 if (insertionPoint < 0) {
341 indexCharacters.insertElementAt(
342 ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
343 } else {
344 const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
345 if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
346 indexCharacters.setElementAt(
347 ownedString(*item, ownedItem, errorCode), insertionPoint);
348 }
349 }
4388f060
A
350 }
351 }
51004dcb 352 if (U_FAILURE(errorCode)) { return; }
4388f060 353
51004dcb 354 // if the result is still too large, cut down to maxCount elements, by removing every nth element
4388f060 355
51004dcb 356 int32_t size = indexCharacters.size() - 1;
4388f060 357 if (size > maxLabelCount_) {
4388f060
A
358 int32_t count = 0;
359 int32_t old = -1;
51004dcb 360 for (int32_t i = 0; i < indexCharacters.size();) {
4388f060 361 ++count;
51004dcb 362 int32_t bump = count * maxLabelCount_ / size;
4388f060 363 if (bump == old) {
51004dcb 364 indexCharacters.removeElementAt(i);
4388f060 365 } else {
4388f060 366 old = bump;
51004dcb 367 ++i;
4388f060
A
368 }
369 }
4388f060 370 }
51004dcb 371}
4388f060 372
51004dcb
A
373namespace {
374
375const UnicodeString &fixLabel(const UnicodeString &current, UnicodeString &temp) {
376 if (!current.startsWith(BASE, BASE_LENGTH)) {
377 return current;
378 }
379 UChar rest = current.charAt(BASE_LENGTH);
380 if (0x2800 < rest && rest <= 0x28FF) { // stroke count
381 int32_t count = rest-0x2800;
382 temp.setTo((UChar)(0x30 + count % 10));
383 if (count >= 10) {
384 count /= 10;
385 temp.insert(0, (UChar)(0x30 + count % 10));
386 if (count >= 10) {
387 count /= 10;
388 temp.insert(0, (UChar)(0x30 + count));
389 }
390 }
391 return temp.append((UChar)0x5283);
392 }
393 return temp.setTo(current, BASE_LENGTH);
4388f060
A
394}
395
51004dcb 396UBool hasMultiplePrimaryWeights(
57a6839d
A
397 const RuleBasedCollator &coll, uint32_t variableTop,
398 const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) {
399 ces.removeAllElements();
400 coll.internalGetCEs(s, ces, errorCode);
401 if (U_FAILURE(errorCode)) { return FALSE; }
51004dcb 402 UBool seenPrimary = FALSE;
57a6839d
A
403 for (int32_t i = 0; i < ces.size(); ++i) {
404 int64_t ce = ces.elementAti(i);
405 uint32_t p = (uint32_t)(ce >> 32);
406 if (p > variableTop) {
407 // not primary ignorable
51004dcb
A
408 if (seenPrimary) {
409 return TRUE;
4388f060 410 }
51004dcb 411 seenPrimary = TRUE;
4388f060 412 }
51004dcb
A
413 }
414 return FALSE;
4388f060
A
415}
416
51004dcb 417} // namespace
4388f060 418
51004dcb
A
419BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
420 // Initialize indexCharacters.
421 UVector indexCharacters(errorCode);
422 indexCharacters.setDeleter(uprv_deleteUObject);
423 initLabels(indexCharacters, errorCode);
424 if (U_FAILURE(errorCode)) { return NULL; }
425
426 // Variables for hasMultiplePrimaryWeights().
57a6839d
A
427 UVector64 ces(errorCode);
428 uint32_t variableTop;
51004dcb 429 if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
57a6839d 430 variableTop = collatorPrimaryOnly_->getVariableTop(errorCode);
51004dcb
A
431 } else {
432 variableTop = 0;
433 }
434 UBool hasInvisibleBuckets = FALSE;
435
436 // Helper arrays for Chinese Pinyin collation.
437 Bucket *asciiBuckets[26] = {
438 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
439 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
440 };
441 Bucket *pinyinBuckets[26] = {
442 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
443 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
444 };
445 UBool hasPinyin = FALSE;
446
447 LocalPointer<UVector> bucketList(new UVector(errorCode));
448 if (bucketList.isNull()) {
449 errorCode = U_MEMORY_ALLOCATION_ERROR;
450 return NULL;
451 }
452 bucketList->setDeleter(uprv_deleteUObject);
453
454 // underflow bucket
455 Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
456 if (bucket == NULL) {
457 errorCode = U_MEMORY_ALLOCATION_ERROR;
458 return NULL;
459 }
460 bucketList->addElement(bucket, errorCode);
461 if (U_FAILURE(errorCode)) { return NULL; }
462
463 UnicodeString temp;
464
465 // fix up the list, adding underflow, additions, overflow
466 // Insert inflow labels as needed.
467 int32_t scriptIndex = -1;
468 const UnicodeString *scriptUpperBoundary = &emptyString_;
469 for (int32_t i = 0; i < indexCharacters.size(); ++i) {
470 UnicodeString &current = *getString(indexCharacters, i);
471 if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
472 // We crossed the script boundary into a new script.
473 const UnicodeString &inflowBoundary = *scriptUpperBoundary;
474 UBool skippedScript = FALSE;
475 for (;;) {
476 scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
477 if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
478 break;
479 }
480 skippedScript = TRUE;
481 }
482 if (skippedScript && bucketList->size() > 1) {
483 // We are skipping one or more scripts,
484 // and we are not just getting out of the underflow label.
485 bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
486 if (bucket == NULL) {
487 errorCode = U_MEMORY_ALLOCATION_ERROR;
488 return NULL;
489 }
490 bucketList->addElement(bucket, errorCode);
491 }
492 }
493 // Add a bucket with the current label.
494 bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
495 if (bucket == NULL) {
496 errorCode = U_MEMORY_ALLOCATION_ERROR;
497 return NULL;
498 }
499 bucketList->addElement(bucket, errorCode);
500 // Remember ASCII and Pinyin buckets for Pinyin redirects.
501 UChar c;
502 if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) { // A-Z
503 asciiBuckets[c - 0x41] = bucket;
504 } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
505 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
506 pinyinBuckets[c - 0x41] = bucket;
507 hasPinyin = TRUE;
508 }
509 // Check for multiple primary weights.
510 if (!current.startsWith(BASE, BASE_LENGTH) &&
57a6839d
A
511 hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current,
512 ces, errorCode) &&
51004dcb
A
513 current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
514 // "AE-ligature" or "Sch" etc.
515 for (int32_t i = bucketList->size() - 2;; --i) {
516 Bucket *singleBucket = getBucket(*bucketList, i);
517 if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
518 // There is no single-character bucket since the last
519 // underflow or inflow label.
520 break;
521 }
522 if (singleBucket->displayBucket_ == NULL &&
57a6839d
A
523 !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop,
524 singleBucket->lowerBoundary_,
525 ces, errorCode)) {
51004dcb
A
526 // Add an invisible bucket that redirects strings greater than the expansion
527 // to the previous single-character bucket.
528 // For example, after ... Q R S Sch we add Sch\uFFFF->S
529 // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
530 bucket = new Bucket(emptyString_,
531 UnicodeString(current).append((UChar)0xFFFF),
532 U_ALPHAINDEX_NORMAL);
533 if (bucket == NULL) {
534 errorCode = U_MEMORY_ALLOCATION_ERROR;
535 return NULL;
536 }
537 bucket->displayBucket_ = singleBucket;
538 bucketList->addElement(bucket, errorCode);
539 hasInvisibleBuckets = TRUE;
540 break;
541 }
542 }
543 }
544 }
545 if (U_FAILURE(errorCode)) { return NULL; }
546 if (bucketList->size() == 1) {
547 // No real labels, show only the underflow label.
548 BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
549 if (bl == NULL) {
550 errorCode = U_MEMORY_ALLOCATION_ERROR;
551 return NULL;
552 }
553 bucketList.orphan();
554 return bl;
555 }
556 // overflow bucket
557 bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
558 if (bucket == NULL) {
559 errorCode = U_MEMORY_ALLOCATION_ERROR;
560 return NULL;
561 }
562 bucketList->addElement(bucket, errorCode); // final
563
564 if (hasPinyin) {
565 // Redirect Pinyin buckets.
566 Bucket *asciiBucket = NULL;
567 for (int32_t i = 0; i < 26; ++i) {
568 if (asciiBuckets[i] != NULL) {
569 asciiBucket = asciiBuckets[i];
570 }
571 if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
572 pinyinBuckets[i]->displayBucket_ = asciiBucket;
573 hasInvisibleBuckets = TRUE;
574 }
575 }
576 }
577
578 if (U_FAILURE(errorCode)) { return NULL; }
579 if (!hasInvisibleBuckets) {
580 BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
581 if (bl == NULL) {
582 errorCode = U_MEMORY_ALLOCATION_ERROR;
583 return NULL;
584 }
585 bucketList.orphan();
586 return bl;
587 }
588 // Merge inflow buckets that are visually adjacent.
589 // Iterate backwards: Merge inflow into overflow rather than the other way around.
590 int32_t i = bucketList->size() - 1;
591 Bucket *nextBucket = getBucket(*bucketList, i);
592 while (--i > 0) {
593 bucket = getBucket(*bucketList, i);
594 if (bucket->displayBucket_ != NULL) {
595 continue; // skip invisible buckets
596 }
597 if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
598 if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
599 bucket->displayBucket_ = nextBucket;
600 continue;
601 }
602 }
603 nextBucket = bucket;
604 }
605
606 LocalPointer<UVector> publicBucketList(new UVector(errorCode));
607 if (bucketList.isNull()) {
608 errorCode = U_MEMORY_ALLOCATION_ERROR;
609 return NULL;
610 }
611 // Do not call publicBucketList->setDeleter():
612 // This vector shares its objects with the bucketList.
613 for (int32_t i = 0; i < bucketList->size(); ++i) {
614 bucket = getBucket(*bucketList, i);
615 if (bucket->displayBucket_ == NULL) {
616 publicBucketList->addElement(bucket, errorCode);
617 }
618 }
619 if (U_FAILURE(errorCode)) { return NULL; }
620 BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
621 if (bl == NULL) {
622 errorCode = U_MEMORY_ALLOCATION_ERROR;
623 return NULL;
624 }
625 bucketList.orphan();
626 publicBucketList.orphan();
627 return bl;
628}
629
630/**
631 * Creates an index, and buckets and sorts the list of records into the index.
632 */
633void AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
634 if (U_FAILURE(errorCode) || buckets_ != NULL) {
635 return;
636 }
637 buckets_ = createBucketList(errorCode);
638 if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) {
4388f060
A
639 return;
640 }
641
51004dcb
A
642 // Sort the records by name.
643 // Stable sort preserves input order of collation duplicates.
644 inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
645
646 // Now, we traverse all of the input, which is now sorted.
647 // If the item doesn't go in the current bucket, we find the next bucket that contains it.
648 // This makes the process order n*log(n), since we just sort the list and then do a linear process.
649 // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
650 // we need to improve it for that case.
651
652 Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0);
653 int32_t bucketIndex = 1;
654 Bucket *nextBucket;
655 const UnicodeString *upperBoundary;
656 if (bucketIndex < buckets_->bucketList_->size()) {
657 nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
658 upperBoundary = &nextBucket->lowerBoundary_;
659 } else {
660 nextBucket = NULL;
661 upperBoundary = NULL;
662 }
663 for (int32_t i = 0; i < inputList_->size(); ++i) {
664 Record *r = getRecord(*inputList_, i);
665 // if the current bucket isn't the right one, find the one that is
666 // We have a special flag for the last bucket so that we don't look any further
667 while (upperBoundary != NULL &&
668 collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
669 currentBucket = nextBucket;
670 // now reset the boundary that we compare against
671 if (bucketIndex < buckets_->bucketList_->size()) {
672 nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
673 upperBoundary = &nextBucket->lowerBoundary_;
4388f060 674 } else {
51004dcb 675 upperBoundary = NULL;
4388f060 676 }
4388f060 677 }
51004dcb
A
678 // now put the record into the bucket.
679 Bucket *bucket = currentBucket;
680 if (bucket->displayBucket_ != NULL) {
681 bucket = bucket->displayBucket_;
682 }
683 if (bucket->records_ == NULL) {
684 bucket->records_ = new UVector(errorCode);
685 if (bucket->records_ == NULL) {
686 errorCode = U_MEMORY_ALLOCATION_ERROR;
687 return;
688 }
689 }
690 bucket->records_->addElement(r, errorCode);
4388f060 691 }
51004dcb 692}
4388f060 693
51004dcb
A
694void AlphabeticIndex::clearBuckets() {
695 if (buckets_ != NULL) {
696 delete buckets_;
697 buckets_ = NULL;
698 internalResetBucketIterator();
699 }
4388f060
A
700}
701
51004dcb
A
702void AlphabeticIndex::internalResetBucketIterator() {
703 labelsIterIndex_ = -1;
704 currentBucket_ = NULL;
705}
4388f060 706
51004dcb
A
707
708void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
51004dcb 709 LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
4388f060
A
710 if (U_FAILURE(status)) {
711 return;
712 }
713
4388f060
A
714 UnicodeSet exemplars;
715 ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
716 if (U_SUCCESS(status)) {
51004dcb 717 initialLabels_->addAll(exemplars);
4388f060
A
718 return;
719 }
720 status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR
721
51004dcb 722 // The locale data did not include explicit Index characters.
4388f060 723 // Synthesize a set of them from the locale's standard exemplar characters.
4388f060
A
724 ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
725 if (U_FAILURE(status)) {
726 return;
727 }
728
51004dcb
A
729 // question: should we add auxiliary exemplars?
730 if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
731 exemplars.add(0x61, 0x7A);
732 }
733 if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables
734 // cut down to small list
735 exemplars.remove(0xAC00, 0xD7A3).
736 add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
737 add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
738 add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
739 add(0xD30C).add(0xD558);
740 }
741 if (exemplars.containsSome(0x1200, 0x137F)) { // Ethiopic block
742 // cut down to small list
743 // make use of the fact that Ethiopic is allocated in 8's, where
744 // the base is 0 mod 8.
745 UnicodeSet ethiopic(
746 UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
747 UnicodeSetIterator it(ethiopic);
748 while (it.next() && !it.isString()) {
749 if ((it.getCodepoint() & 0x7) != 0) {
750 exemplars.remove(it.getCodepoint());
751 }
752 }
753 }
754
4388f060
A
755 // Upper-case any that aren't already so.
756 // (We only do this for synthesized index characters.)
4388f060
A
757 UnicodeSetIterator it(exemplars);
758 UnicodeString upperC;
4388f060
A
759 while (it.next()) {
760 const UnicodeString &exemplarC = it.getString();
761 upperC = exemplarC;
762 upperC.toUpper(locale);
51004dcb 763 initialLabels_->add(upperC);
4388f060 764 }
51004dcb 765}
4388f060 766
51004dcb
A
767UBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
768 UnicodeSet contractions;
57a6839d
A
769 collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode);
770 if (U_FAILURE(errorCode) || contractions.isEmpty()) { return FALSE; }
771 initialLabels_->addAll(contractions);
51004dcb
A
772 UnicodeSetIterator iter(contractions);
773 while (iter.next()) {
774 const UnicodeString &s = iter.getString();
57a6839d
A
775 U_ASSERT (s.startsWith(BASE, BASE_LENGTH));
776 UChar c = s.charAt(s.length() - 1);
777 if (0x41 <= c && c <= 0x5A) { // A-Z
778 // There are Pinyin labels, add ASCII A-Z labels as well.
779 initialLabels_->add(0x41, 0x5A); // A-Z
780 break;
4388f060
A
781 }
782 }
57a6839d 783 return TRUE;
4388f060
A
784}
785
786
787/*
788 * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
789 */
51004dcb 790static const UChar CGJ = 0x034F;
4388f060
A
791UnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
792 UnicodeString result;
793 if (item.length() == 0) {
794 return result;
795 }
796 int32_t i = 0;
797 for (;;) {
798 UChar32 cp = item.char32At(i);
799 result.append(cp);
800 i = item.moveIndex32(i, 1);
801 if (i >= item.length()) {
802 break;
803 }
804 result.append(CGJ);
805 }
806 return result;
807}
808
809
810UBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
811 return FALSE;
812}
813
814
815UBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
816 return FALSE;
817}
818
819
820const RuleBasedCollator &AlphabeticIndex::getCollator() const {
57a6839d 821 return *collator_;
4388f060
A
822}
823
824
825const UnicodeString &AlphabeticIndex::getInflowLabel() const {
826 return inflowLabel_;
827}
828
829const UnicodeString &AlphabeticIndex::getOverflowLabel() const {
830 return overflowLabel_;
831}
832
833
834const UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
835 return underflowLabel_;
836}
837
838
839AlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
840 inflowLabel_ = label;
51004dcb 841 clearBuckets();
4388f060
A
842 return *this;
843}
844
845
846AlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
847 overflowLabel_ = label;
51004dcb 848 clearBuckets();
4388f060
A
849 return *this;
850}
851
852
853AlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
854 underflowLabel_ = label;
51004dcb 855 clearBuckets();
4388f060
A
856 return *this;
857}
858
859
860int32_t AlphabeticIndex::getMaxLabelCount() const {
861 return maxLabelCount_;
862}
863
864
865AlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
866 if (U_FAILURE(status)) {
867 return *this;
868 }
869 if (maxLabelCount <= 0) {
870 status = U_ILLEGAL_ARGUMENT_ERROR;
871 return *this;
872 }
873 maxLabelCount_ = maxLabelCount;
51004dcb 874 clearBuckets();
4388f060
A
875 return *this;
876}
877
878
4388f060
A
879//
880// init() - Common code for constructors.
881//
882
51004dcb
A
883void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
884 if (U_FAILURE(status)) { return; }
885 if (locale == NULL && collator_ == NULL) {
886 status = U_ILLEGAL_ARGUMENT_ERROR;
4388f060
A
887 return;
888 }
51004dcb 889
4388f060 890 initialLabels_ = new UnicodeSet();
51004dcb
A
891 if (initialLabels_ == NULL) {
892 status = U_MEMORY_ALLOCATION_ERROR;
893 return;
894 }
4388f060 895
51004dcb 896 inflowLabel_.setTo((UChar)0x2026); // Ellipsis
4388f060
A
897 overflowLabel_ = inflowLabel_;
898 underflowLabel_ = inflowLabel_;
899
51004dcb 900 if (collator_ == NULL) {
57a6839d
A
901 Collator *coll = Collator::createInstance(*locale, status);
902 if (U_FAILURE(status)) {
903 delete coll;
904 return;
905 }
906 if (coll == NULL) {
51004dcb
A
907 status = U_MEMORY_ALLOCATION_ERROR;
908 return;
909 }
57a6839d
A
910 collator_ = dynamic_cast<RuleBasedCollator *>(coll);
911 if (collator_ == NULL) {
912 delete coll;
913 status = U_UNSUPPORTED_ERROR;
914 return;
915 }
51004dcb
A
916 }
917 collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
918 if (collatorPrimaryOnly_ == NULL) {
919 status = U_MEMORY_ALLOCATION_ERROR;
4388f060
A
920 return;
921 }
51004dcb
A
922 collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
923 firstCharsInScripts_ = firstStringsInScript(status);
924 if (U_FAILURE(status)) { return; }
925 firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
51004dcb
A
926 // Guard against a degenerate collator where
927 // some script boundary strings are primary ignorable.
928 for (;;) {
929 if (U_FAILURE(status)) { return; }
930 if (firstCharsInScripts_->isEmpty()) {
931 // AlphabeticIndex requires some non-ignorable script boundary strings.
932 status = U_ILLEGAL_ARGUMENT_ERROR;
933 return;
4388f060 934 }
51004dcb
A
935 if (collatorPrimaryOnly_->compare(
936 *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
937 emptyString_, status) == UCOL_EQUAL) {
938 firstCharsInScripts_->removeElementAt(0);
939 } else {
940 break;
4388f060
A
941 }
942 }
4388f060 943
57a6839d
A
944 // Chinese index characters, which are specific to each of the several Chinese tailorings,
945 // take precedence over the single locale data exemplar set per language.
946 if (!addChineseIndexCharacters(status) && locale != NULL) {
51004dcb 947 addIndexExemplars(*locale, status);
4388f060 948 }
4388f060
A
949}
950
951
952//
953// Comparison function for UVector<UnicodeString *> sorting with a collator.
954//
955static int32_t U_CALLCONV
51004dcb 956collatorComparator(const void *context, const void *left, const void *right) {
4388f060
A
957 const UElement *leftElement = static_cast<const UElement *>(left);
958 const UElement *rightElement = static_cast<const UElement *>(right);
959 const UnicodeString *leftString = static_cast<const UnicodeString *>(leftElement->pointer);
960 const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer);
4388f060
A
961
962 if (leftString == rightString) {
963 // Catches case where both are NULL
964 return 0;
965 }
966 if (leftString == NULL) {
967 return 1;
968 };
969 if (rightString == NULL) {
970 return -1;
971 }
51004dcb
A
972 const Collator *col = static_cast<const Collator *>(context);
973 UErrorCode errorCode = U_ZERO_ERROR;
974 return col->compare(*leftString, *rightString, errorCode);
4388f060
A
975}
976
977//
978// Comparison function for UVector<Record *> sorting with a collator.
979//
980static int32_t U_CALLCONV
981recordCompareFn(const void *context, const void *left, const void *right) {
982 const UElement *leftElement = static_cast<const UElement *>(left);
983 const UElement *rightElement = static_cast<const UElement *>(right);
984 const AlphabeticIndex::Record *leftRec = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer);
985 const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer);
986 const Collator *col = static_cast<const Collator *>(context);
51004dcb
A
987 UErrorCode errorCode = U_ZERO_ERROR;
988 return col->compare(leftRec->name_, rightRec->name_, errorCode);
4388f060
A
989}
990
4388f060
A
991UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
992 if (U_FAILURE(status)) {
993 return NULL;
994 }
57a6839d
A
995 LocalPointer<UVector> dest(new UVector(status));
996 if (dest.isNull()) {
51004dcb 997 status = U_MEMORY_ALLOCATION_ERROR;
4388f060
A
998 return NULL;
999 }
1000 dest->setDeleter(uprv_deleteUObject);
57a6839d
A
1001 // Fetch the script-first-primary contractions which are defined in the root collator.
1002 // They all start with U+FDD1.
1003 UnicodeSet set;
1004 collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
1005 if (U_FAILURE(status)) {
1006 return NULL;
1007 }
1008 if (set.isEmpty()) {
1009 status = U_UNSUPPORTED_ERROR;
1010 return NULL;
1011 }
1012 UnicodeSetIterator iter(set);
1013 while (iter.next()) {
1014 const UnicodeString &boundary = iter.getString();
1015 uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
1016 if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
1017 // Ignore boundaries for the special reordering groups.
1018 // Take only those for "real scripts" (where the sample character is a Letter,
1019 // and the one for unassigned implicit weights (Cn).
1020 continue;
4388f060 1021 }
57a6839d
A
1022 UnicodeString *s = new UnicodeString(boundary);
1023 if (s == NULL) {
4388f060 1024 status = U_MEMORY_ALLOCATION_ERROR;
57a6839d 1025 return NULL;
4388f060 1026 }
57a6839d
A
1027 dest->addElement(s, status);
1028 }
1029 return dest.orphan();
4388f060
A
1030}
1031
1032
51004dcb 1033namespace {
4388f060
A
1034
1035/**
51004dcb
A
1036 * Returns true if one index character string is "better" than the other.
1037 * Shorter NFKD is better, and otherwise NFKD-binary-less-than is
1038 * better, and otherwise binary-less-than is better.
4388f060 1039 */
51004dcb
A
1040UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
1041 const UnicodeString &one, const UnicodeString &other) {
1042 // This is called with primary-equal strings, but never with one.equals(other).
1043 UErrorCode status = U_ZERO_ERROR;
1044 UnicodeString n1 = nfkdNormalizer.normalize(one, status);
1045 UnicodeString n2 = nfkdNormalizer.normalize(other, status);
1046 if (U_FAILURE(status)) { return FALSE; }
1047 int32_t result = n1.countChar32() - n2.countChar32();
4388f060 1048 if (result != 0) {
51004dcb 1049 return result < 0;
4388f060 1050 }
4388f060
A
1051 result = n1.compareCodePointOrder(n2);
1052 if (result != 0) {
51004dcb 1053 return result < 0;
4388f060 1054 }
51004dcb 1055 return one.compareCodePointOrder(other) < 0;
4388f060
A
1056}
1057
51004dcb 1058} // namespace
4388f060
A
1059
1060//
1061// Constructor & Destructor for AlphabeticIndex::Record
1062//
1063// Records are internal only, instances are not directly surfaced in the public API.
1064// This class is mostly struct-like, with all public fields.
1065
51004dcb
A
1066AlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
1067 : name_(name), data_(data) {}
1068
4388f060
A
1069AlphabeticIndex::Record::~Record() {
1070}
1071
1072
1073AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
1074 if (U_FAILURE(status)) {
1075 return *this;
1076 }
51004dcb
A
1077 if (inputList_ == NULL) {
1078 inputList_ = new UVector(status);
1079 if (inputList_ == NULL) {
1080 status = U_MEMORY_ALLOCATION_ERROR;
1081 return *this;
1082 }
1083 inputList_->setDeleter(alphaIndex_deleteRecord);
1084 }
1085 Record *r = new Record(name, data);
1086 if (r == NULL) {
1087 status = U_MEMORY_ALLOCATION_ERROR;
1088 return *this;
1089 }
1090 inputList_->addElement(r, status);
1091 clearBuckets();
4388f060
A
1092 //std::string ss;
1093 //std::string ss2;
1094 //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" <<
1095 // " sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
1096 return *this;
1097}
1098
1099
1100AlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
51004dcb
A
1101 if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
1102 inputList_->removeAllElements();
1103 clearBuckets();
4388f060 1104 }
4388f060
A
1105 return *this;
1106}
1107
4388f060 1108int32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
51004dcb 1109 initBuckets(status);
4388f060
A
1110 if (U_FAILURE(status)) {
1111 return 0;
1112 }
51004dcb 1113 return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
4388f060
A
1114}
1115
1116
1117int32_t AlphabeticIndex::getBucketIndex() const {
1118 return labelsIterIndex_;
1119}
1120
1121
1122UBool AlphabeticIndex::nextBucket(UErrorCode &status) {
1123 if (U_FAILURE(status)) {
1124 return FALSE;
1125 }
51004dcb 1126 if (buckets_ == NULL && currentBucket_ != NULL) {
4388f060
A
1127 status = U_ENUM_OUT_OF_SYNC_ERROR;
1128 return FALSE;
1129 }
51004dcb 1130 initBuckets(status);
4388f060
A
1131 if (U_FAILURE(status)) {
1132 return FALSE;
1133 }
1134 ++labelsIterIndex_;
51004dcb
A
1135 if (labelsIterIndex_ >= buckets_->getBucketCount()) {
1136 labelsIterIndex_ = buckets_->getBucketCount();
4388f060
A
1137 return FALSE;
1138 }
51004dcb 1139 currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
4388f060
A
1140 resetRecordIterator();
1141 return TRUE;
1142}
1143
1144const UnicodeString &AlphabeticIndex::getBucketLabel() const {
1145 if (currentBucket_ != NULL) {
1146 return currentBucket_->label_;
1147 } else {
51004dcb 1148 return emptyString_;
4388f060
A
1149 }
1150}
1151
1152
1153UAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
1154 if (currentBucket_ != NULL) {
1155 return currentBucket_->labelType_;
1156 } else {
1157 return U_ALPHAINDEX_NORMAL;
1158 }
1159}
1160
1161
1162int32_t AlphabeticIndex::getBucketRecordCount() const {
51004dcb 1163 if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
4388f060
A
1164 return currentBucket_->records_->size();
1165 } else {
1166 return 0;
1167 }
1168}
1169
1170
1171AlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
1172 if (U_FAILURE(status)) {
1173 return *this;
1174 }
51004dcb 1175 internalResetBucketIterator();
4388f060
A
1176 return *this;
1177}
1178
1179
1180UBool AlphabeticIndex::nextRecord(UErrorCode &status) {
1181 if (U_FAILURE(status)) {
1182 return FALSE;
1183 }
1184 if (currentBucket_ == NULL) {
1185 // We are trying to iterate over the items in a bucket, but there is no
1186 // current bucket from the enumeration of buckets.
1187 status = U_INVALID_STATE_ERROR;
1188 return FALSE;
1189 }
51004dcb 1190 if (buckets_ == NULL) {
4388f060
A
1191 status = U_ENUM_OUT_OF_SYNC_ERROR;
1192 return FALSE;
1193 }
51004dcb
A
1194 if (currentBucket_->records_ == NULL) {
1195 return FALSE;
1196 }
4388f060
A
1197 ++itemsIterIndex_;
1198 if (itemsIterIndex_ >= currentBucket_->records_->size()) {
1199 itemsIterIndex_ = currentBucket_->records_->size();
1200 return FALSE;
1201 }
1202 return TRUE;
1203}
1204
1205
1206const UnicodeString &AlphabeticIndex::getRecordName() const {
51004dcb
A
1207 const UnicodeString *retStr = &emptyString_;
1208 if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
4388f060
A
1209 itemsIterIndex_ >= 0 &&
1210 itemsIterIndex_ < currentBucket_->records_->size()) {
1211 Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1212 retStr = &item->name_;
1213 }
1214 return *retStr;
1215}
1216
1217const void *AlphabeticIndex::getRecordData() const {
1218 const void *retPtr = NULL;
51004dcb 1219 if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
4388f060
A
1220 itemsIterIndex_ >= 0 &&
1221 itemsIterIndex_ < currentBucket_->records_->size()) {
1222 Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1223 retPtr = item->data_;
1224 }
1225 return retPtr;
1226}
1227
1228
1229AlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
1230 itemsIterIndex_ = -1;
1231 return *this;
1232}
1233
1234
1235
1236AlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
1237 const UnicodeString &lowerBoundary,
51004dcb
A
1238 UAlphabeticIndexLabelType type)
1239 : label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
1240 displayBucket_(NULL), displayIndex_(-1),
1241 records_(NULL) {
4388f060
A
1242}
1243
1244
1245AlphabeticIndex::Bucket::~Bucket() {
1246 delete records_;
1247}
1248
1249U_NAMESPACE_END
1250
57a6839d 1251#endif // !UCONFIG_NO_COLLATION