2 *******************************************************************************
3 * Copyright (C) 2009-2012, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
10 * \brief C API: AlphabeticIndex class
13 #include "unicode/utypes.h"
15 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
17 #include "unicode/alphaindex.h"
18 #include "unicode/coll.h"
19 #include "unicode/normalizer2.h"
20 #include "unicode/strenum.h"
21 #include "unicode/tblcoll.h"
22 #include "unicode/ulocdata.h"
23 #include "unicode/uniset.h"
24 #include "unicode/uobject.h"
25 #include "unicode/uscript.h"
26 #include "unicode/usetiter.h"
27 #include "unicode/ustring.h"
28 #include "unicode/utf16.h"
41 UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(AlphabeticIndex
)
43 // Forward Declarations
44 static int32_t U_CALLCONV
45 PreferenceComparator(const void *context
, const void *left
, const void *right
);
47 static int32_t U_CALLCONV
48 sortCollateComparator(const void *context
, const void *left
, const void *right
);
50 static int32_t U_CALLCONV
51 recordCompareFn(const void *context
, const void *left
, const void *right
);
53 // UVector<Bucket *> support function, delete a Bucket.
54 static void U_CALLCONV
55 alphaIndex_deleteBucket(void *obj
) {
56 delete static_cast<AlphabeticIndex::Bucket
*>(obj
);
59 // UVector<Record *> support function, delete a Record.
60 static void U_CALLCONV
61 alphaIndex_deleteRecord(void *obj
) {
62 delete static_cast<AlphabeticIndex::Record
*>(obj
);
67 static const Normalizer2
*nfkdNormalizer
;
70 // Append the contents of a UnicodeSet to a UVector of UnicodeStrings.
71 // Append everything - individual characters are handled as strings of length 1.
72 // The destination vector owns the appended strings.
74 static void appendUnicodeSetToUVector(UVector
&dest
, const UnicodeSet
&source
, UErrorCode
&status
) {
75 UnicodeSetIterator
setIter(source
);
76 while (setIter
.next()) {
77 const UnicodeString
&str
= setIter
.getString();
78 dest
.addElement(str
.clone(), status
);
83 AlphabeticIndex::AlphabeticIndex(const Locale
&locale
, UErrorCode
&status
) {
85 if (U_FAILURE(status
)) {
89 langType_
= langTypeFromLocale(locale_
);
91 collator_
= Collator::createInstance(locale
, status
);
92 if (collator_
!= NULL
) {
93 collatorPrimaryOnly_
= collator_
->clone();
95 if (collatorPrimaryOnly_
!= NULL
) {
96 collatorPrimaryOnly_
->setStrength(Collator::PRIMARY
);
98 getIndexExemplars(*initialLabels_
, locale
, status
);
99 indexBuildRequired_
= TRUE
;
100 if ((collator_
== NULL
|| collatorPrimaryOnly_
== NULL
) && U_SUCCESS(status
)) {
101 status
= U_MEMORY_ALLOCATION_ERROR
;
103 firstScriptCharacters_
= firstStringsInScript(status
);
107 AlphabeticIndex::~AlphabeticIndex() {
108 uhash_close(alreadyIn_
);
111 delete collatorPrimaryOnly_
;
112 delete firstScriptCharacters_
;
114 delete inputRecords_
;
115 delete noDistinctSorting_
;
116 delete notAlphabetic_
;
117 delete initialLabels_
;
121 AlphabeticIndex
&AlphabeticIndex::addLabels(const UnicodeSet
&additions
, UErrorCode
&status
) {
122 if (U_FAILURE(status
)) {
125 initialLabels_
->addAll(additions
);
130 AlphabeticIndex
&AlphabeticIndex::addLabels(const Locale
&locale
, UErrorCode
&status
) {
131 if (U_FAILURE(status
)) {
134 UnicodeSet additions
;
135 getIndexExemplars(additions
, locale
, status
);
136 initialLabels_
->addAll(additions
);
141 int32_t AlphabeticIndex::getBucketCount(UErrorCode
&status
) {
143 if (U_FAILURE(status
)) {
146 return bucketList_
->size();
150 int32_t AlphabeticIndex::getRecordCount(UErrorCode
&status
) {
151 if (U_FAILURE(status
)) {
154 return inputRecords_
->size();
158 void AlphabeticIndex::buildIndex(UErrorCode
&status
) {
159 if (U_FAILURE(status
)) {
162 if (!indexBuildRequired_
) {
166 // Discard any already-built data.
167 // This is important when the user builds and uses an index, then subsequently modifies it,
168 // necessitating a rebuild.
170 bucketList_
->removeAllElements();
171 labels_
->removeAllElements();
172 uhash_removeAll(alreadyIn_
);
173 noDistinctSorting_
->clear();
174 notAlphabetic_
->clear();
176 // first sort the incoming Labels, with a "best" ordering among items
177 // that are the same according to the collator
179 UVector
preferenceSorting(status
); // Vector of UnicodeStrings; owned by the vector.
180 preferenceSorting
.setDeleter(uprv_deleteUObject
);
181 appendUnicodeSetToUVector(preferenceSorting
, *initialLabels_
, status
);
182 preferenceSorting
.sortWithUComparator(PreferenceComparator
, &status
, status
);
184 // We now make a set of Labels.
185 // Some of the input may, however, be redundant.
186 // That is, we might have c, ch, d, where "ch" sorts just like "c", "h"
187 // So we make a pass through, filtering out those cases.
188 // TODO: filtering these out would seem to be at odds with the eventual goal
189 // of being able to split buckets that contain too many items.
192 for (int32_t psIndex
=0; psIndex
<preferenceSorting
.size(); psIndex
++) {
193 UnicodeString item
= *static_cast<const UnicodeString
*>(preferenceSorting
.elementAt(psIndex
));
194 // TODO: Since preferenceSorting was originally populated from the contents of a UnicodeSet,
195 // is it even possible for duplicates to show up in this check?
196 if (labelSet
.contains(item
)) {
197 UnicodeSetIterator
itemAlreadyInIter(labelSet
);
198 while (itemAlreadyInIter
.next()) {
199 const UnicodeString
&itemAlreadyIn
= itemAlreadyInIter
.getString();
200 if (collatorPrimaryOnly_
->compare(item
, itemAlreadyIn
) == 0) {
201 UnicodeSet
*targets
= static_cast<UnicodeSet
*>(uhash_get(alreadyIn_
, &itemAlreadyIn
));
202 if (targets
== NULL
) {
203 // alreadyIn.put(itemAlreadyIn, targets = new LinkedHashSet<String>());
204 targets
= new UnicodeSet();
205 uhash_put(alreadyIn_
, itemAlreadyIn
.clone(), targets
, &status
);
211 } else if (item
.moveIndex32(0, 1) < item
.length() && // Label contains more than one code point.
212 collatorPrimaryOnly_
->compare(item
, separated(item
)) == 0) {
213 noDistinctSorting_
->add(item
);
214 } else if (!ALPHABETIC
->containsSome(item
)) {
215 notAlphabetic_
->add(item
);
221 // If we have no labels, hard-code a fallback default set of [A-Z]
222 // This case can occur with locales that don't have exemplar character data, including root.
223 // A no-labels situation will cause other problems; it needs to be avoided.
224 if (labelSet
.isEmpty()) {
225 labelSet
.add((UChar32
)0x41, (UChar32
)0x5A);
228 // Move the set of Labels from the set into a vector, and sort
229 // according to the collator.
231 appendUnicodeSetToUVector(*labels_
, labelSet
, status
);
232 labels_
->sortWithUComparator(sortCollateComparator
, collatorPrimaryOnly_
, status
);
234 // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
235 // Implemented by copying the elements to be retained to a new UVector.
237 const int32_t size
= labelSet
.size() - 1;
238 if (size
> maxLabelCount_
) {
239 UVector
*newLabels
= new UVector(status
);
240 newLabels
->setDeleter(uprv_deleteUObject
);
243 for (int32_t srcIndex
=0; srcIndex
<labels_
->size(); srcIndex
++) {
244 const UnicodeString
*str
= static_cast<const UnicodeString
*>(labels_
->elementAt(srcIndex
));
246 const int32_t bump
= count
* maxLabelCount_
/ size
;
250 newLabels
->addElement(str
->clone(), status
);
258 // We now know the list of labels.
259 // Create a corresponding list of buckets, one per label.
261 buildBucketList(status
); // Corresponds to Java BucketList constructor.
263 // Bin the Records into the Buckets.
264 bucketRecords(status
);
266 indexBuildRequired_
= FALSE
;
267 resetBucketIterator(status
);
271 // buildBucketList() Corresponds to the BucketList constructor in the Java version.
273 void AlphabeticIndex::buildBucketList(UErrorCode
&status
) {
274 UnicodeString labelStr
= getUnderflowLabel();
275 Bucket
*b
= new Bucket(labelStr
, *EMPTY_STRING
, U_ALPHAINDEX_UNDERFLOW
, status
);
276 bucketList_
->addElement(b
, status
);
278 // Build up the list, adding underflow, additions, overflow
279 // insert infix labels as needed, using \uFFFF.
280 const UnicodeString
*last
= static_cast<UnicodeString
*>(labels_
->elementAt(0));
281 b
= new Bucket(*last
, *last
, U_ALPHAINDEX_NORMAL
, status
);
282 bucketList_
->addElement(b
, status
);
286 AlphabeticIndex::getScriptSet(lastSet
, *last
, status
);
287 lastSet
.removeAll(*IGNORE_SCRIPTS
);
289 for (int i
= 1; i
< labels_
->size(); ++i
) {
290 UnicodeString
*current
= static_cast<UnicodeString
*>(labels_
->elementAt(i
));
291 getScriptSet(set
, *current
, status
);
292 set
.removeAll(*IGNORE_SCRIPTS
);
293 if (lastSet
.containsNone(set
)) {
294 // check for adjacent
295 const UnicodeString
&overflowComparisonString
= getOverflowComparisonString(*last
, status
);
296 if (collatorPrimaryOnly_
->compare(overflowComparisonString
, *current
) < 0) {
297 labelStr
= getInflowLabel();
298 b
= new Bucket(labelStr
, overflowComparisonString
, U_ALPHAINDEX_INFLOW
, status
);
299 bucketList_
->addElement(b
, status
);
304 b
= new Bucket(*current
, *current
, U_ALPHAINDEX_NORMAL
, status
);
305 bucketList_
->addElement(b
, status
);
309 const UnicodeString
&limitString
= getOverflowComparisonString(*last
, status
);
310 b
= new Bucket(getOverflowLabel(), limitString
, U_ALPHAINDEX_OVERFLOW
, status
);
311 bucketList_
->addElement(b
, status
);
312 // final overflow bucket
317 // Place all of the raw input records into the correct bucket.
319 // Begin by sorting the input records; this lets us bin them in a single pass.
321 // Note on storage management: The input records are owned by the
322 // inputRecords_ vector, and will (eventually) be auto-deleted by it.
323 // The Bucket objects have pointers to the Record objects, but do not own them.
325 void AlphabeticIndex::bucketRecords(UErrorCode
&status
) {
326 if (U_FAILURE(status
)) {
330 inputRecords_
->sortWithUComparator(recordCompareFn
, collator_
, status
);
331 U_ASSERT(bucketList_
->size() > 0); // Should always have at least an overflow
332 // bucket, even if no user labels.
333 int32_t bucketIndex
= 0;
334 Bucket
*destBucket
= static_cast<Bucket
*>(bucketList_
->elementAt(bucketIndex
));
335 Bucket
*nextBucket
= NULL
;
336 if (bucketIndex
+1 < bucketList_
->size()) {
337 nextBucket
= static_cast<Bucket
*>(bucketList_
->elementAt(bucketIndex
+1));
339 int32_t recordIndex
= 0;
340 Record
*r
= static_cast<Record
*>(inputRecords_
->elementAt(recordIndex
));
341 while (recordIndex
< inputRecords_
->size()) {
342 if (nextBucket
== NULL
||
343 collatorPrimaryOnly_
->compare(r
->sortingName_
, nextBucket
->lowerBoundary_
) < 0) {
344 // Record goes in current bucket. Advance to next record,
345 // stay on current bucket.
346 destBucket
->records_
->addElement(r
, status
);
348 r
= static_cast<Record
*>(inputRecords_
->elementAt(recordIndex
));
350 // Advance to the next bucket, stay on current record.
352 destBucket
= nextBucket
;
353 if (bucketIndex
+1 < bucketList_
->size()) {
354 nextBucket
= static_cast<Bucket
*>(bucketList_
->elementAt(bucketIndex
+1));
358 U_ASSERT(destBucket
!= NULL
);
365 void AlphabeticIndex::getIndexExemplars(UnicodeSet
&dest
, const Locale
&locale
, UErrorCode
&status
) {
366 if (U_FAILURE(status
)) {
370 LocalULocaleDataPointer
uld(ulocdata_open(locale
.getName(), &status
));
371 UnicodeSet exemplars
;
372 ulocdata_getExemplarSet(uld
.getAlias(), exemplars
.toUSet(), 0, ULOCDATA_ES_INDEX
, &status
);
373 if (U_SUCCESS(status
)) {
374 dest
.addAll(exemplars
);
377 status
= U_ZERO_ERROR
; // Clear out U_MISSING_RESOURCE_ERROR
379 // Locale data did not include explicit Index characters.
380 // Synthesize a set of them from the locale's standard exemplar characters.
382 ulocdata_getExemplarSet(uld
.getAlias(), exemplars
.toUSet(), 0, ULOCDATA_ES_STANDARD
, &status
);
383 if (U_FAILURE(status
)) {
387 // Upper-case any that aren't already so.
388 // (We only do this for synthesized index characters.)
390 UnicodeSetIterator
it(exemplars
);
391 UnicodeString upperC
;
392 UnicodeSet lowersToRemove
;
393 UnicodeSet uppersToAdd
;
395 const UnicodeString
&exemplarC
= it
.getString();
397 upperC
.toUpper(locale
);
398 if (exemplarC
!= upperC
) {
399 lowersToRemove
.add(exemplarC
);
400 uppersToAdd
.add(upperC
);
403 exemplars
.removeAll(lowersToRemove
);
404 exemplars
.addAll(uppersToAdd
);
406 // get the exemplars, and handle special cases
408 // question: should we add auxiliary exemplars?
409 if (exemplars
.containsSome(*CORE_LATIN
)) {
410 exemplars
.addAll(*CORE_LATIN
);
412 if (exemplars
.containsSome(*HANGUL
)) {
413 // cut down to small list
414 UnicodeSet
BLOCK_HANGUL_SYLLABLES(UNICODE_STRING_SIMPLE("[:block=hangul_syllables:]"), status
);
415 exemplars
.removeAll(BLOCK_HANGUL_SYLLABLES
);
416 exemplars
.addAll(*HANGUL
);
418 if (exemplars
.containsSome(*ETHIOPIC
)) {
419 // cut down to small list
420 // make use of the fact that Ethiopic is allocated in 8's, where
421 // the base is 0 mod 8.
422 UnicodeSetIterator
it(*ETHIOPIC
);
423 while (it
.next() && !it
.isString()) {
424 if ((it
.getCodepoint() & 0x7) != 0) {
425 exemplars
.remove(it
.getCodepoint());
429 dest
.addAll(exemplars
);
434 * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
436 static const UChar32 CGJ
= (UChar
)0x034F;
437 UnicodeString
AlphabeticIndex::separated(const UnicodeString
&item
) {
438 UnicodeString result
;
439 if (item
.length() == 0) {
444 UChar32 cp
= item
.char32At(i
);
446 i
= item
.moveIndex32(i
, 1);
447 if (i
>= item
.length()) {
456 UBool
AlphabeticIndex::operator==(const AlphabeticIndex
& /* other */) const {
461 UBool
AlphabeticIndex::operator!=(const AlphabeticIndex
& /* other */) const {
466 const RuleBasedCollator
&AlphabeticIndex::getCollator() const {
467 // There are no known non-RuleBasedCollator collators, and none ever expected.
468 // But, in case that changes, better a null pointer than a wrong type.
469 return *dynamic_cast<RuleBasedCollator
*>(collator_
);
473 const UnicodeString
&AlphabeticIndex::getInflowLabel() const {
477 const UnicodeString
&AlphabeticIndex::getOverflowLabel() const {
478 return overflowLabel_
;
482 const UnicodeString
&AlphabeticIndex::getUnderflowLabel() const {
483 return underflowLabel_
;
487 AlphabeticIndex
&AlphabeticIndex::setInflowLabel(const UnicodeString
&label
, UErrorCode
&/*status*/) {
488 inflowLabel_
= label
;
489 indexBuildRequired_
= TRUE
;
494 AlphabeticIndex
&AlphabeticIndex::setOverflowLabel(const UnicodeString
&label
, UErrorCode
&/*status*/) {
495 overflowLabel_
= label
;
496 indexBuildRequired_
= TRUE
;
501 AlphabeticIndex
&AlphabeticIndex::setUnderflowLabel(const UnicodeString
&label
, UErrorCode
&/*status*/) {
502 underflowLabel_
= label
;
503 indexBuildRequired_
= TRUE
;
508 int32_t AlphabeticIndex::getMaxLabelCount() const {
509 return maxLabelCount_
;
513 AlphabeticIndex
&AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount
, UErrorCode
&status
) {
514 if (U_FAILURE(status
)) {
517 if (maxLabelCount
<= 0) {
518 status
= U_ILLEGAL_ARGUMENT_ERROR
;
521 maxLabelCount_
= maxLabelCount
;
522 if (maxLabelCount
< bucketList_
->size()) {
523 indexBuildRequired_
= TRUE
;
529 const UnicodeString
&AlphabeticIndex::getOverflowComparisonString(const UnicodeString
&lowerLimit
, UErrorCode
&/*status*/) {
530 for (int32_t i
=0; i
<firstScriptCharacters_
->size(); i
++) {
531 const UnicodeString
*s
=
532 static_cast<const UnicodeString
*>(firstScriptCharacters_
->elementAt(i
));
533 if (collator_
->compare(*s
, lowerLimit
) > 0) {
537 return *EMPTY_STRING
;
540 UnicodeSet
*AlphabeticIndex::getScriptSet(UnicodeSet
&dest
, const UnicodeString
&codePoint
, UErrorCode
&status
) {
541 if (U_FAILURE(status
)) {
544 UChar32 cp
= codePoint
.char32At(0);
545 UScriptCode scriptCode
= uscript_getScript(cp
, &status
);
546 dest
.applyIntPropertyValue(UCHAR_SCRIPT
, scriptCode
, status
);
551 // init() - Common code for constructors.
554 void AlphabeticIndex::init(UErrorCode
&status
) {
555 // Initialize statics if needed.
556 AlphabeticIndex::staticInit(status
);
558 // Put the object into a known state so that the destructor will function.
563 collatorPrimaryOnly_
= NULL
;
564 currentBucket_
= NULL
;
565 firstScriptCharacters_
= NULL
;
566 initialLabels_
= NULL
;
567 indexBuildRequired_
= TRUE
;
568 inputRecords_
= NULL
;
571 labelsIterIndex_
= 0;
573 noDistinctSorting_
= NULL
;
574 notAlphabetic_
= NULL
;
577 if (U_FAILURE(status
)) {
580 alreadyIn_
= uhash_open(uhash_hashUnicodeString
, // Key Hash,
581 uhash_compareUnicodeString
, // key Comparator,
582 NULL
, // value Comparator
584 uhash_setKeyDeleter(alreadyIn_
, uprv_deleteUObject
);
585 uhash_setValueDeleter(alreadyIn_
, uprv_deleteUObject
);
587 bucketList_
= new UVector(status
);
588 bucketList_
->setDeleter(alphaIndex_deleteBucket
);
589 labels_
= new UVector(status
);
590 labels_
->setDeleter(uprv_deleteUObject
);
591 labels_
->setComparer(uhash_compareUnicodeString
);
592 inputRecords_
= new UVector(status
);
593 inputRecords_
->setDeleter(alphaIndex_deleteRecord
);
595 noDistinctSorting_
= new UnicodeSet();
596 notAlphabetic_
= new UnicodeSet();
597 initialLabels_
= new UnicodeSet();
599 inflowLabel_
.remove();
600 inflowLabel_
.append((UChar
)0x2026); // Ellipsis
601 overflowLabel_
= inflowLabel_
;
602 underflowLabel_
= inflowLabel_
;
604 // TODO: check for memory allocation failures.
608 static UBool indexCharactersAreInitialized
= FALSE
;
610 // Index Characters Clean up function. Delete statically allocated constant stuff.
612 static UBool U_CALLCONV
indexCharacters_cleanup(void) {
613 AlphabeticIndex::staticCleanup();
618 void AlphabeticIndex::staticCleanup() {
627 delete IGNORE_SCRIPTS
;
628 IGNORE_SCRIPTS
= NULL
;
635 nfkdNormalizer
= NULL
; // ref to a singleton. Do not delete.
636 indexCharactersAreInitialized
= FALSE
;
640 UnicodeSet
*AlphabeticIndex::ALPHABETIC
;
641 UnicodeSet
*AlphabeticIndex::HANGUL
;
642 UnicodeSet
*AlphabeticIndex::ETHIOPIC
;
643 UnicodeSet
*AlphabeticIndex::CORE_LATIN
;
644 UnicodeSet
*AlphabeticIndex::IGNORE_SCRIPTS
;
645 UnicodeSet
*AlphabeticIndex::TO_TRY
;
646 UnicodeSet
*AlphabeticIndex::UNIHAN
;
647 const UnicodeString
*AlphabeticIndex::EMPTY_STRING
;
650 // staticInit() One-time initialization of constants.
651 // Thread safe. Called from constructors.
652 // Mutex overhead is not a concern. AlphabeticIndex constructors are
653 // sufficiently heavy that the cost of the mutex check is not significant.
655 void AlphabeticIndex::staticInit(UErrorCode
&status
) {
656 static UMTX IndexCharsInitMutex
;
658 Mutex
mutex(&IndexCharsInitMutex
);
659 if (indexCharactersAreInitialized
|| U_FAILURE(status
)) {
662 UBool finishedInit
= FALSE
;
665 UnicodeString alphaString
= UNICODE_STRING_SIMPLE("[[:alphabetic:]-[:mark:]]");
666 ALPHABETIC
= new UnicodeSet(alphaString
, status
);
667 if (ALPHABETIC
== NULL
) {
671 HANGUL
= new UnicodeSet();
672 HANGUL
->add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).add(0xB9C8).add(0xBC14).add(0xC0AC).
673 add(0xC544).add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).add(0xD30C).add(0xD558);
679 UnicodeString EthiopicStr
= UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]");
680 ETHIOPIC
= new UnicodeSet(EthiopicStr
, status
);
681 if (ETHIOPIC
== NULL
) {
685 CORE_LATIN
= new UnicodeSet((UChar32
)0x61, (UChar32
)0x7a); // ('a', 'z');
686 if (CORE_LATIN
== NULL
) {
690 UnicodeString IgnoreStr
= UNICODE_STRING_SIMPLE(
691 "[[:sc=Common:][:sc=inherited:][:script=Unknown:][:script=braille:]]");
692 IGNORE_SCRIPTS
= new UnicodeSet(IgnoreStr
, status
);
693 IGNORE_SCRIPTS
->freeze();
694 if (IGNORE_SCRIPTS
== NULL
) {
698 UnicodeString nfcqcStr
= UNICODE_STRING_SIMPLE("[:^nfcqc=no:]");
699 TO_TRY
= new UnicodeSet(nfcqcStr
, status
);
700 if (TO_TRY
== NULL
) {
704 UnicodeString unihanStr
= UNICODE_STRING_SIMPLE("[:script=Hani:]");
705 UNIHAN
= new UnicodeSet(unihanStr
, status
);
706 if (UNIHAN
== NULL
) {
710 EMPTY_STRING
= new UnicodeString();
712 nfkdNormalizer
= Normalizer2::getNFKDInstance(status
);
713 if (nfkdNormalizer
== NULL
) {
720 if (!finishedInit
&& U_SUCCESS(status
)) {
721 status
= U_MEMORY_ALLOCATION_ERROR
;
723 if (U_FAILURE(status
)) {
724 indexCharacters_cleanup();
727 ucln_i18n_registerCleanup(UCLN_I18N_INDEX_CHARACTERS
, indexCharacters_cleanup
);
728 indexCharactersAreInitialized
= TRUE
;
733 // Comparison function for UVector<UnicodeString *> sorting with a collator.
735 static int32_t U_CALLCONV
736 sortCollateComparator(const void *context
, const void *left
, const void *right
) {
737 const UElement
*leftElement
= static_cast<const UElement
*>(left
);
738 const UElement
*rightElement
= static_cast<const UElement
*>(right
);
739 const UnicodeString
*leftString
= static_cast<const UnicodeString
*>(leftElement
->pointer
);
740 const UnicodeString
*rightString
= static_cast<const UnicodeString
*>(rightElement
->pointer
);
741 const Collator
*col
= static_cast<const Collator
*>(context
);
743 if (leftString
== rightString
) {
744 // Catches case where both are NULL
747 if (leftString
== NULL
) {
750 if (rightString
== NULL
) {
753 Collator::EComparisonResult r
= col
->compare(*leftString
, *rightString
);
758 // Comparison function for UVector<Record *> sorting with a collator.
760 static int32_t U_CALLCONV
761 recordCompareFn(const void *context
, const void *left
, const void *right
) {
762 const UElement
*leftElement
= static_cast<const UElement
*>(left
);
763 const UElement
*rightElement
= static_cast<const UElement
*>(right
);
764 const AlphabeticIndex::Record
*leftRec
= static_cast<const AlphabeticIndex::Record
*>(leftElement
->pointer
);
765 const AlphabeticIndex::Record
*rightRec
= static_cast<const AlphabeticIndex::Record
*>(rightElement
->pointer
);
766 const Collator
*col
= static_cast<const Collator
*>(context
);
768 Collator::EComparisonResult r
= col
->compare(leftRec
->sortingName_
, rightRec
->sortingName_
);
769 if (r
== Collator::EQUAL
) {
770 if (leftRec
->serialNumber_
< rightRec
->serialNumber_
) {
772 } else if (leftRec
->serialNumber_
> rightRec
->serialNumber_
) {
773 r
= Collator::GREATER
;
782 // First characters in scripts.
783 // Create a UVector whose contents are pointers to UnicodeStrings for the First Characters in each script.
784 // The vector is sorted according to this index's collation.
786 // This code is too slow to use, so for now hard code the data.
787 // Hard coded implementation is follows.
789 UVector
*AlphabeticIndex::firstStringsInScript(Collator
*ruleBasedCollator
, UErrorCode
&status
) {
791 if (U_FAILURE(status
)) {
795 UnicodeString results
[USCRIPT_CODE_LIMIT
];
796 UnicodeString LOWER_A
= UNICODE_STRING_SIMPLE("a");
798 UnicodeSetIterator
siter(*TO_TRY
);
799 while (siter
.next()) {
800 const UnicodeString
¤t
= siter
.getString();
801 Collator::EComparisonResult r
= ruleBasedCollator
->compare(current
, LOWER_A
);
802 if (r
< 0) { // TODO fix; we only want "real" script characters, not
807 int script
= uscript_getScript(current
.char32At(0), &status
);
808 if (results
[script
].length() == 0) {
809 results
[script
] = current
;
811 else if (ruleBasedCollator
->compare(current
, results
[script
]) < 0) {
812 results
[script
] = current
;
817 UnicodeSet expansions
;
818 RuleBasedCollator
*rbc
= dynamic_cast<RuleBasedCollator
*>(ruleBasedCollator
);
819 const UCollator
*uRuleBasedCollator
= rbc
->getUCollator();
820 ucol_getContractionsAndExpansions(uRuleBasedCollator
, extras
.toUSet(), expansions
.toUSet(), true, &status
);
821 extras
.addAll(expansions
).removeAll(*TO_TRY
);
822 if (extras
.size() != 0) {
823 const Normalizer2
*normalizer
= Normalizer2::getNFKCInstance(status
);
824 UnicodeSetIterator
extrasIter(extras
);
825 while (extrasIter
.next()) {
826 const UnicodeString
¤t
= extrasIter
.next();
827 if (!TO_TRY
->containsAll(current
))
829 if (!normalizer
->isNormalized(current
, status
) ||
830 ruleBasedCollator
->compare(current
, LOWER_A
) < 0) {
833 int script
= uscript_getScript(current
.char32At(0), &status
);
834 if (results
[script
].length() == 0) {
835 results
[script
] = current
;
836 } else if (ruleBasedCollator
->compare(current
, results
[script
]) < 0) {
837 results
[script
] = current
;
842 UVector
*dest
= new UVector(status
);
843 dest
->setDeleter(uprv_deleteUObject
);
844 for (uint32_t i
= 0; i
< sizeof(results
) / sizeof(results
[0]); ++i
) {
845 if (results
[i
].length() > 0) {
846 dest
->addElement(results
[i
].clone(), status
);
849 dest
->sortWithUComparator(sortCollateComparator
, ruleBasedCollator
, status
);
856 // First characters in scripts.
857 // Create a UVector whose contents are pointers to UnicodeStrings for the First Characters in each script.
858 // The vector is sorted according to this index's collation.
860 // It takes too much time to compute this from character properties, so hard code it for now.
861 // Character constants copied from corresponding declaration in ICU4J.
862 // See main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java
864 static UChar HACK_FIRST_CHARS_IN_SCRIPTS
[] = { 0x61, 0, 0x03B1, 0,
865 0x2C81, 0, 0x0430, 0, 0x2C30, 0, 0x10D0, 0, 0x0561, 0, 0x05D0, 0, 0xD802, 0xDD00, 0, 0x0800, 0, 0x0621, 0, 0x0710, 0,
866 0x0780, 0, 0x07CA, 0, 0x2D30, 0, 0x1200, 0, 0x0950, 0, 0x0985, 0, 0x0A74, 0, 0x0AD0, 0, 0x0B05, 0, 0x0BD0, 0,
867 0x0C05, 0, 0x0C85, 0, 0x0D05, 0, 0x0D85, 0,
868 0xAAF2, 0, // Meetei Mayek
869 0xA800, 0, 0xA882, 0, 0xD804, 0xDC83, 0,
870 U16_LEAD(0x111C4), U16_TRAIL(0x111C4), 0, // Sharada
871 U16_LEAD(0x11680), U16_TRAIL(0x11680), 0, // Takri
873 0xD802, 0xDE00, 0, 0x0E01, 0,
875 0xAA80, 0, 0x0F40, 0, 0x1C00, 0, 0xA840, 0, 0x1900, 0, 0x1700, 0, 0x1720, 0,
876 0x1740, 0, 0x1760, 0, 0x1A00, 0, 0xA930, 0, 0xA90A, 0, 0x1000, 0,
877 U16_LEAD(0x11103), U16_TRAIL(0x11103), 0, // Chakma
878 0x1780, 0, 0x1950, 0, 0x1980, 0, 0x1A20, 0,
879 0xAA00, 0, 0x1B05, 0, 0xA984, 0, 0x1880, 0, 0x1C5A, 0, 0x13A0, 0, 0x1401, 0, 0x1681, 0, 0x16A0, 0, 0xD803, 0xDC00, 0,
880 0xA500, 0, 0xA6A0, 0, 0x1100, 0, 0x3041, 0, 0x30A1, 0, 0x3105, 0, 0xA000, 0, 0xA4F8, 0,
881 U16_LEAD(0x16F00), U16_TRAIL(0x16F00), 0, // Miao
883 0xD800, 0xDEA0, 0, 0xD802, 0xDD20, 0, 0xD800, 0xDF00, 0, 0xD800, 0xDF30, 0, 0xD801, 0xDC28, 0, 0xD801, 0xDC50, 0,
885 U16_LEAD(0x110D0), U16_TRAIL(0x110D0), 0, // Sora Sompeng
886 0xD800, 0xDC00, 0, 0xD802, 0xDC00, 0, 0xD802, 0xDE60, 0, 0xD802, 0xDF00, 0, 0xD802, 0xDC40, 0,
887 0xD802, 0xDF40, 0, 0xD802, 0xDF60, 0, 0xD800, 0xDF80, 0, 0xD800, 0xDFA0, 0, 0xD808, 0xDC00, 0, 0xD80C, 0xDC00, 0,
888 U16_LEAD(0x109A0), U16_TRAIL(0x109A0), 0, // Meroitic Cursive
889 U16_LEAD(0x10980), U16_TRAIL(0x10980), 0, // Meroitic Hieroglyphs
892 UVector
*AlphabeticIndex::firstStringsInScript(UErrorCode
&status
) {
893 if (U_FAILURE(status
)) {
896 UVector
*dest
= new UVector(status
);
898 if (U_SUCCESS(status
)) {
899 status
= U_MEMORY_ALLOCATION_ERROR
;
903 dest
->setDeleter(uprv_deleteUObject
);
904 const UChar
*src
= HACK_FIRST_CHARS_IN_SCRIPTS
;
905 const UChar
*limit
= src
+ sizeof(HACK_FIRST_CHARS_IN_SCRIPTS
) / sizeof(HACK_FIRST_CHARS_IN_SCRIPTS
[0]);
907 if (U_FAILURE(status
)) {
910 UnicodeString
*str
= new UnicodeString(src
, -1);
912 status
= U_MEMORY_ALLOCATION_ERROR
;
914 dest
->addElement(str
, status
);
915 src
+= str
->length() + 1;
917 } while (src
< limit
);
918 dest
->sortWithUComparator(sortCollateComparator
, collator_
, status
);
923 AlphabeticIndex::ELangType
AlphabeticIndex::langTypeFromLocale(const Locale
&loc
) {
924 const char *lang
= loc
.getLanguage();
925 if (uprv_strcmp(lang
, "zh") != 0) {
928 const char *script
= loc
.getScript();
929 if (uprv_strcmp(script
, "Hant") == 0) {
932 const char *country
= loc
.getCountry();
933 if (uprv_strcmp(country
, "TW") == 0) {
941 // Pinyin Hacks. Direct port from Java.
944 static const UChar32 probeCharInLong
= 0x28EAD;
947 static const UChar PINYIN_LOWER_BOUNDS_SHORT
[] = { // "\u0101bcd\u0113fghjkl\u1E3F\u0144\u014Dpqrstwxyz"
948 0x0101, 0x62, 0x63, 0x64, 0x0113, 0x66, 0x67, 0x68, 0x6A, 0x6B, /*l*/0x6C, 0x1E3F, 0x0144, 0x014D,
949 /*p*/0x70, 0x71, 0x72, 0x73, 0x74, /*w*/0x77, 0x78, 0x79, 0x7A};
952 // Pinyin lookup tables copied, pasted (and reformatted) from the ICU4J code.
954 AlphabeticIndex::PinyinLookup
AlphabeticIndex::HACK_PINYIN_LOOKUP_SHORT
= {
955 {(UChar
)0, (UChar
)0, (UChar
)0}, // A
956 {(UChar
)0x516B, (UChar
)0, (UChar
)0}, // B
957 {(UChar
)0x5693, (UChar
)0, (UChar
)0}, // C
958 {(UChar
)0x5491, (UChar
)0, (UChar
)0}, // D
959 {(UChar
)0x59B8, (UChar
)0, (UChar
)0}, // E
960 {(UChar
)0x53D1, (UChar
)0, (UChar
)0}, // F
961 {(UChar
)0x65EE, (UChar
)0, (UChar
)0}, // G
962 {(UChar
)0x54C8, (UChar
)0, (UChar
)0}, // H
963 {(UChar
)0x4E0C, (UChar
)0, (UChar
)0}, // J
964 {(UChar
)0x5494, (UChar
)0, (UChar
)0}, // K
965 {(UChar
)0x5783, (UChar
)0, (UChar
)0}, // L
966 {(UChar
)0x5452, (UChar
)0, (UChar
)0}, // M
967 {(UChar
)0x5514, (UChar
)0, (UChar
)0}, // N
968 {(UChar
)0x5594, (UChar
)0, (UChar
)0}, // O
969 {(UChar
)0x5991, (UChar
)0, (UChar
)0}, // P
970 {(UChar
)0x4E03, (UChar
)0, (UChar
)0}, // Q
971 {(UChar
)0x513F, (UChar
)0, (UChar
)0}, // R
972 {(UChar
)0x4EE8, (UChar
)0, (UChar
)0}, // S
973 {(UChar
)0x4ED6, (UChar
)0, (UChar
)0}, // T
974 {(UChar
)0x7A75, (UChar
)0, (UChar
)0}, // W
975 {(UChar
)0x5915, (UChar
)0, (UChar
)0}, // X
976 {(UChar
)0x4E2B, (UChar
)0, (UChar
)0}, // Y
977 {(UChar
)0x5E00, (UChar
)0, (UChar
)0}, // Z
978 {(UChar
)0xFFFF, (UChar
)0, (UChar
)0}, // mark end of array
981 static const UChar PINYIN_LOWER_BOUNDS_LONG
[] = { // "\u0101bcd\u0113fghjkl\u1E3F\u0144\u014Dpqrstwxyz";
982 0x0101, 0x62, 0x63, 0x64, 0x0113, 0x66, 0x67, 0x68, 0x6A, 0x6B, /*l*/0x6C, 0x1E3F, 0x0144, 0x014D,
983 /*p*/0x70, 0x71, 0x72, 0x73, 0x74, /*w*/0x77, 0x78, 0x79, 0x7A};
985 AlphabeticIndex::PinyinLookup
AlphabeticIndex::HACK_PINYIN_LOOKUP_LONG
= {
986 {(UChar
)0, (UChar
)0, (UChar
)0}, // A
987 {(UChar
)0x516B, (UChar
)0, (UChar
)0}, // b
988 {(UChar
)0xD863, (UChar
)0xDEAD, (UChar
)0}, // c
989 {(UChar
)0xD844, (UChar
)0xDE51, (UChar
)0}, // d
990 {(UChar
)0x59B8, (UChar
)0, (UChar
)0}, // e
991 {(UChar
)0x53D1, (UChar
)0, (UChar
)0}, // f
992 {(UChar
)0xD844, (UChar
)0xDE45, (UChar
)0}, // g
993 {(UChar
)0x54C8, (UChar
)0, (UChar
)0}, // h
994 {(UChar
)0x4E0C, (UChar
)0, (UChar
)0}, // j
995 {(UChar
)0x5494, (UChar
)0, (UChar
)0}, // k
996 {(UChar
)0x3547, (UChar
)0, (UChar
)0}, // l
997 {(UChar
)0x5452, (UChar
)0, (UChar
)0}, // m
998 {(UChar
)0x5514, (UChar
)0, (UChar
)0}, // n
999 {(UChar
)0x5594, (UChar
)0, (UChar
)0}, // o
1000 {(UChar
)0xD84F, (UChar
)0xDC7A, (UChar
)0}, // p
1001 {(UChar
)0x4E03, (UChar
)0, (UChar
)0}, // q
1002 {(UChar
)0x513F, (UChar
)0, (UChar
)0}, // r
1003 {(UChar
)0x4EE8, (UChar
)0, (UChar
)0}, // s
1004 {(UChar
)0x4ED6, (UChar
)0, (UChar
)0}, // t
1005 {(UChar
)0x7A75, (UChar
)0, (UChar
)0}, // w
1006 {(UChar
)0x5915, (UChar
)0, (UChar
)0}, // x
1007 {(UChar
)0x4E2B, (UChar
)0, (UChar
)0}, // y
1008 {(UChar
)0x5E00, (UChar
)0, (UChar
)0}, // z
1009 {(UChar
)0xFFFF, (UChar
)0, (UChar
)0}, // mark end of array
1014 // Probe the collation data, and decide which Pinyin tables should be used
1016 // ICU can be built with a choice between two Chinese collations.
1017 // The hack Pinyin tables to use depend on which one is in use.
1018 // We can assume that any given copy of ICU will have only one of the collations available,
1019 // and that there is no way, in a given process, to create two alphabetic indexes using
1020 // different Chinese collations. Which means the probe can be done once
1021 // and the results cached.
1023 // This whole arrangement is temporary.
1025 AlphabeticIndex::PinyinLookup
*AlphabeticIndex::HACK_PINYIN_LOOKUP
= NULL
;
1026 const UChar
*AlphabeticIndex::PINYIN_LOWER_BOUNDS
= NULL
;
1028 void AlphabeticIndex::initPinyinBounds(const Collator
*col
, UErrorCode
&status
) {
1031 if (PINYIN_LOWER_BOUNDS
!= NULL
) {
1035 UnicodeSet
*colSet
= col
->getTailoredSet(status
);
1036 if (U_FAILURE(status
) || colSet
== NULL
) {
1038 if (U_SUCCESS(status
)) {
1039 status
= U_MEMORY_ALLOCATION_ERROR
;
1043 UBool useLongTables
= colSet
->contains(probeCharInLong
);
1047 if (useLongTables
) {
1048 PINYIN_LOWER_BOUNDS
= PINYIN_LOWER_BOUNDS_LONG
;
1049 HACK_PINYIN_LOOKUP
= &HACK_PINYIN_LOOKUP_LONG
;
1051 PINYIN_LOWER_BOUNDS
= PINYIN_LOWER_BOUNDS_SHORT
;
1052 HACK_PINYIN_LOOKUP
= &HACK_PINYIN_LOOKUP_SHORT
;
1058 // Modify a Chinese name by prepending a Latin letter. The modified name is used
1059 // when putting records (names) into buckets, to put the name under a Latin index heading.
1061 void AlphabeticIndex::hackName(UnicodeString
&dest
, const UnicodeString
&name
, const Collator
*col
) {
1063 if (langType_
!= kSimplified
|| !UNIHAN
->contains(name
.char32At(0))) {
1068 UErrorCode status
= U_ZERO_ERROR
;
1069 initPinyinBounds(col
, status
);
1070 if (U_FAILURE(status
)) {
1074 // TODO: use binary search
1076 for (index
=0; ; index
++) {
1077 if ((*HACK_PINYIN_LOOKUP
)[index
][0] == (UChar
)0xffff) {
1081 int32_t compareResult
= col
->compare(name
, UnicodeString(TRUE
, (*HACK_PINYIN_LOOKUP
)[index
], -1));
1082 if (compareResult
< 0) {
1085 if (compareResult
<= 0) {
1089 UChar c
= PINYIN_LOWER_BOUNDS
[index
];
1098 * Comparator that returns "better" items first, where shorter NFKD is better, and otherwise NFKD binary order is
1099 * better, and otherwise binary order is better.
1101 * For use with array sort or UVector.
1102 * @param context A UErrorCode pointer.
1103 * @param left A UElement pointer, which must refer to a UnicodeString *
1104 * @param right A UElement pointer, which must refer to a UnicodeString *
1107 static int32_t U_CALLCONV
1108 PreferenceComparator(const void *context
, const void *left
, const void *right
) {
1109 const UElement
*leftElement
= static_cast<const UElement
*>(left
);
1110 const UElement
*rightElement
= static_cast<const UElement
*>(right
);
1111 const UnicodeString
*s1
= static_cast<const UnicodeString
*>(leftElement
->pointer
);
1112 const UnicodeString
*s2
= static_cast<const UnicodeString
*>(rightElement
->pointer
);
1113 UErrorCode
&status
= *(UErrorCode
*)(context
); // Cast off both static and const.
1118 UnicodeString n1
= nfkdNormalizer
->normalize(*s1
, status
);
1119 UnicodeString n2
= nfkdNormalizer
->normalize(*s2
, status
);
1120 int32_t result
= n1
.length() - n2
.length();
1125 result
= n1
.compareCodePointOrder(n2
);
1129 return s1
->compareCodePointOrder(*s2
);
1134 // Constructor & Destructor for AlphabeticIndex::Record
1136 // Records are internal only, instances are not directly surfaced in the public API.
1137 // This class is mostly struct-like, with all public fields.
1139 AlphabeticIndex::Record::Record(AlphabeticIndex
*alphaIndex
, const UnicodeString
&name
, const void *data
):
1140 alphaIndex_(alphaIndex
), name_(name
), data_(data
)
1142 UnicodeString prefixedName
;
1143 alphaIndex
->hackName(sortingName_
, name_
, alphaIndex
->collatorPrimaryOnly_
);
1144 serialNumber_
= ++alphaIndex
->recordCounter_
;
1147 AlphabeticIndex::Record::~Record() {
1151 AlphabeticIndex
& AlphabeticIndex::addRecord(const UnicodeString
&name
, const void *data
, UErrorCode
&status
) {
1152 if (U_FAILURE(status
)) {
1155 Record
*r
= new Record(this, name
, data
);
1156 inputRecords_
->addElement(r
, status
);
1157 indexBuildRequired_
= TRUE
;
1160 //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" <<
1161 // " sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
1166 AlphabeticIndex
&AlphabeticIndex::clearRecords(UErrorCode
&status
) {
1167 if (U_FAILURE(status
)) {
1170 inputRecords_
->removeAllElements();
1171 indexBuildRequired_
= TRUE
;
1176 int32_t AlphabeticIndex::getBucketIndex(const UnicodeString
&name
, UErrorCode
&status
) {
1178 if (U_FAILURE(status
)) {
1182 // For simplified Chinese prepend a prefix to the name.
1183 // For non-Chinese locales or non-Chinese names, the name is not modified.
1185 UnicodeString prefixedName
;
1186 hackName(prefixedName
, name
, collatorPrimaryOnly_
);
1188 // TODO: use a binary search.
1189 for (int32_t i
= 0; i
< bucketList_
->size(); ++i
) {
1190 Bucket
*bucket
= static_cast<Bucket
*>(bucketList_
->elementAt(i
));
1191 Collator::EComparisonResult comp
= collatorPrimaryOnly_
->compare(prefixedName
, bucket
->lowerBoundary_
);
1196 // Loop runs until we find the bucket following the one that would hold prefixedName.
1197 // If the prefixedName belongs in the last bucket the loop will drop out the bottom rather
1198 // than returning from the middle.
1200 return bucketList_
->size() - 1;
1204 int32_t AlphabeticIndex::getBucketIndex() const {
1205 return labelsIterIndex_
;
1209 UBool
AlphabeticIndex::nextBucket(UErrorCode
&status
) {
1210 if (U_FAILURE(status
)) {
1213 if (indexBuildRequired_
&& currentBucket_
!= NULL
) {
1214 status
= U_ENUM_OUT_OF_SYNC_ERROR
;
1218 if (U_FAILURE(status
)) {
1222 if (labelsIterIndex_
>= bucketList_
->size()) {
1223 labelsIterIndex_
= bucketList_
->size();
1226 currentBucket_
= static_cast<Bucket
*>(bucketList_
->elementAt(labelsIterIndex_
));
1227 resetRecordIterator();
1231 const UnicodeString
&AlphabeticIndex::getBucketLabel() const {
1232 if (currentBucket_
!= NULL
) {
1233 return currentBucket_
->label_
;
1235 return *EMPTY_STRING
;
1240 UAlphabeticIndexLabelType
AlphabeticIndex::getBucketLabelType() const {
1241 if (currentBucket_
!= NULL
) {
1242 return currentBucket_
->labelType_
;
1244 return U_ALPHAINDEX_NORMAL
;
1249 int32_t AlphabeticIndex::getBucketRecordCount() const {
1250 if (currentBucket_
!= NULL
) {
1251 return currentBucket_
->records_
->size();
1258 AlphabeticIndex
&AlphabeticIndex::resetBucketIterator(UErrorCode
&status
) {
1259 if (U_FAILURE(status
)) {
1263 labelsIterIndex_
= -1;
1264 currentBucket_
= NULL
;
1269 UBool
AlphabeticIndex::nextRecord(UErrorCode
&status
) {
1270 if (U_FAILURE(status
)) {
1273 if (currentBucket_
== NULL
) {
1274 // We are trying to iterate over the items in a bucket, but there is no
1275 // current bucket from the enumeration of buckets.
1276 status
= U_INVALID_STATE_ERROR
;
1279 if (indexBuildRequired_
) {
1280 status
= U_ENUM_OUT_OF_SYNC_ERROR
;
1284 if (itemsIterIndex_
>= currentBucket_
->records_
->size()) {
1285 itemsIterIndex_
= currentBucket_
->records_
->size();
1292 const UnicodeString
&AlphabeticIndex::getRecordName() const {
1293 const UnicodeString
*retStr
= EMPTY_STRING
;
1294 if (currentBucket_
!= NULL
&&
1295 itemsIterIndex_
>= 0 &&
1296 itemsIterIndex_
< currentBucket_
->records_
->size()) {
1297 Record
*item
= static_cast<Record
*>(currentBucket_
->records_
->elementAt(itemsIterIndex_
));
1298 retStr
= &item
->name_
;
1303 const void *AlphabeticIndex::getRecordData() const {
1304 const void *retPtr
= NULL
;
1305 if (currentBucket_
!= NULL
&&
1306 itemsIterIndex_
>= 0 &&
1307 itemsIterIndex_
< currentBucket_
->records_
->size()) {
1308 Record
*item
= static_cast<Record
*>(currentBucket_
->records_
->elementAt(itemsIterIndex_
));
1309 retPtr
= item
->data_
;
1315 AlphabeticIndex
& AlphabeticIndex::resetRecordIterator() {
1316 itemsIterIndex_
= -1;
1322 AlphabeticIndex::Bucket::Bucket(const UnicodeString
&label
,
1323 const UnicodeString
&lowerBoundary
,
1324 UAlphabeticIndexLabelType type
,
1325 UErrorCode
&status
):
1326 label_(label
), lowerBoundary_(lowerBoundary
), labelType_(type
), records_(NULL
) {
1327 if (U_FAILURE(status
)) {
1330 records_
= new UVector(status
);
1331 if (records_
== NULL
&& U_SUCCESS(status
)) {
1332 status
= U_MEMORY_ALLOCATION_ERROR
;
1337 AlphabeticIndex::Bucket::~Bucket() {