2 ******************************************************************************
3 * Copyright (C) 1996-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ******************************************************************************
11 * Created by: Helena Shih
13 * Modification History:
15 * Date Name Description
16 * 2/5/97 aliu Modified createDefault to load collation data from
17 * binary files when possible. Added related methods
18 * createCollationFromFile, chopLocale, createPathName.
19 * 2/11/97 aliu Added methods addToCache, findInCache, which implement
20 * a Collation cache. Modified createDefault to look in
21 * cache first, and also to store newly created Collation
22 * objects in the cache. Modified to not use gLocPath.
23 * 2/12/97 aliu Modified to create objects from RuleBasedCollator cache.
24 * Moved cache out of Collation class.
25 * 2/13/97 aliu Moved several methods out of this class and into
26 * RuleBasedCollator, with modifications. Modified
27 * createDefault() to call new RuleBasedCollator(Locale&)
28 * constructor. General clean up and documentation.
29 * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy
31 * 05/06/97 helena Added memory allocation error detection.
32 * 05/08/97 helena Added createInstance().
33 * 6/20/97 helena Java class name change.
34 * 04/23/99 stephen Removed EDecompositionMode, merged with
36 * 11/23/9 srl Inlining of some critical functions
37 * 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h)
38 * 2012-2014 markus Rewritten in C++ again.
41 #include "utypeinfo.h" // for 'typeid' to work
43 #include "unicode/utypes.h"
45 #if !UCONFIG_NO_COLLATION
47 #include "unicode/coll.h"
48 #include "unicode/tblcoll.h"
49 #include "collationdata.h"
50 #include "collationroot.h"
51 #include "collationtailoring.h"
62 static icu::Locale
* availableLocaleList
= NULL
;
63 static int32_t availableLocaleListCount
;
64 static icu::ICULocaleService
* gService
= NULL
;
65 static icu::UInitOnce gServiceInitOnce
= U_INITONCE_INITIALIZER
;
66 static icu::UInitOnce gAvailableLocaleListInitOnce
;
69 * Release all static memory held by collator.
72 static UBool U_CALLCONV
collator_cleanup(void) {
73 #if !UCONFIG_NO_SERVICE
78 gServiceInitOnce
.reset();
80 if (availableLocaleList
) {
81 delete []availableLocaleList
;
82 availableLocaleList
= NULL
;
84 availableLocaleListCount
= 0;
85 gAvailableLocaleListInitOnce
.reset();
93 #if !UCONFIG_NO_SERVICE
95 // ------------------------------------------
100 //-------------------------------------------
102 CollatorFactory::~CollatorFactory() {}
104 //-------------------------------------------
107 CollatorFactory::visible(void) const {
111 //-------------------------------------------
114 CollatorFactory::getDisplayName(const Locale
& objectLocale
,
115 const Locale
& displayLocale
,
116 UnicodeString
& result
)
118 return objectLocale
.getDisplayName(displayLocale
, result
);
121 // -------------------------------------
123 class ICUCollatorFactory
: public ICUResourceBundleFactory
{
125 ICUCollatorFactory() : ICUResourceBundleFactory(UnicodeString(U_ICUDATA_COLL
, -1, US_INV
)) { }
126 virtual ~ICUCollatorFactory();
128 virtual UObject
* create(const ICUServiceKey
& key
, const ICUService
* service
, UErrorCode
& status
) const;
131 ICUCollatorFactory::~ICUCollatorFactory() {}
134 ICUCollatorFactory::create(const ICUServiceKey
& key
, const ICUService
* /* service */, UErrorCode
& status
) const {
135 if (handlesKey(key
, status
)) {
136 const LocaleKey
& lkey
= (const LocaleKey
&)key
;
138 // make sure the requested locale is correct
139 // default LocaleFactory uses currentLocale since that's the one vetted by handlesKey
140 // but for ICU rb resources we use the actual one since it will fallback again
141 lkey
.canonicalLocale(loc
);
143 return Collator::makeInstance(loc
, status
);
148 // -------------------------------------
150 class ICUCollatorService
: public ICULocaleService
{
153 : ICULocaleService(UNICODE_STRING_SIMPLE("Collator"))
155 UErrorCode status
= U_ZERO_ERROR
;
156 registerFactory(new ICUCollatorFactory(), status
);
159 virtual ~ICUCollatorService();
161 virtual UObject
* cloneInstance(UObject
* instance
) const {
162 return ((Collator
*)instance
)->clone();
165 virtual UObject
* handleDefault(const ICUServiceKey
& key
, UnicodeString
* actualID
, UErrorCode
& status
) const {
166 LocaleKey
& lkey
= (LocaleKey
&)key
;
168 // Ugly Hack Alert! We return an empty actualID to signal
169 // to callers that this is a default object, not a "real"
170 // service-created object. (TODO remove in 3.0) [aliu]
171 actualID
->truncate(0);
174 lkey
.canonicalLocale(loc
);
175 return Collator::makeInstance(loc
, status
);
178 virtual UObject
* getKey(ICUServiceKey
& key
, UnicodeString
* actualReturn
, UErrorCode
& status
) const {
180 if (actualReturn
== NULL
) {
183 return (Collator
*)ICULocaleService::getKey(key
, actualReturn
, status
);
186 virtual UBool
isDefault() const {
187 return countFactories() == 1;
191 ICUCollatorService::~ICUCollatorService() {}
193 // -------------------------------------
195 static void U_CALLCONV
initService() {
196 gService
= new ICUCollatorService();
197 ucln_i18n_registerCleanup(UCLN_I18N_COLLATOR
, collator_cleanup
);
201 static ICULocaleService
*
204 umtx_initOnce(gServiceInitOnce
, &initService
);
208 // -------------------------------------
213 UBool retVal
= !gServiceInitOnce
.isReset() && (getService() != NULL
);
217 #endif /* UCONFIG_NO_SERVICE */
219 static void U_CALLCONV
220 initAvailableLocaleList(UErrorCode
&status
) {
221 U_ASSERT(availableLocaleListCount
== 0);
222 U_ASSERT(availableLocaleList
== NULL
);
223 // for now, there is a hardcoded list, so just walk through that list and set it up.
224 UResourceBundle
*index
= NULL
;
225 UResourceBundle installed
;
228 ures_initStackObject(&installed
);
229 index
= ures_openDirect(U_ICUDATA_COLL
, "res_index", &status
);
230 ures_getByKey(index
, "InstalledLocales", &installed
, &status
);
232 if(U_SUCCESS(status
)) {
233 availableLocaleListCount
= ures_getSize(&installed
);
234 availableLocaleList
= new Locale
[availableLocaleListCount
];
236 if (availableLocaleList
!= NULL
) {
237 ures_resetIterator(&installed
);
238 while(ures_hasNext(&installed
)) {
239 const char *tempKey
= NULL
;
240 ures_getNextString(&installed
, NULL
, &tempKey
, &status
);
241 availableLocaleList
[i
++] = Locale(tempKey
);
244 U_ASSERT(availableLocaleListCount
== i
);
245 ures_close(&installed
);
248 ucln_i18n_registerCleanup(UCLN_I18N_COLLATOR
, collator_cleanup
);
251 static UBool
isAvailableLocaleListInitialized(UErrorCode
&status
) {
252 umtx_initOnce(gAvailableLocaleListInitOnce
, &initAvailableLocaleList
, status
);
253 return U_SUCCESS(status
);
257 // Collator public methods -----------------------------------------------
259 Collator
* U_EXPORT2
Collator::createInstance(UErrorCode
& success
)
261 return createInstance(Locale::getDefault(), success
);
264 Collator
* U_EXPORT2
Collator::createInstance(const Locale
& desiredLocale
,
267 if (U_FAILURE(status
))
270 #if !UCONFIG_NO_SERVICE
273 return (Collator
*)gService
->get(desiredLocale
, &actualLoc
, status
);
276 return makeInstance(desiredLocale
, status
);
280 Collator
* Collator::makeInstance(const Locale
& desiredLocale
,
283 Locale
validLocale("");
284 const CollationTailoring
*t
=
285 CollationLoader::loadTailoring(desiredLocale
, validLocale
, status
);
286 if (U_SUCCESS(status
)) {
287 Collator
*result
= new RuleBasedCollator(t
, validLocale
);
288 if (result
!= NULL
) {
291 status
= U_MEMORY_ALLOCATION_ERROR
;
294 t
->deleteIfZeroRefCount();
300 Collator::safeClone() const {
304 // implement deprecated, previously abstract method
305 Collator::EComparisonResult
Collator::compare(const UnicodeString
& source
,
306 const UnicodeString
& target
) const
308 UErrorCode ec
= U_ZERO_ERROR
;
309 return (EComparisonResult
)compare(source
, target
, ec
);
312 // implement deprecated, previously abstract method
313 Collator::EComparisonResult
Collator::compare(const UnicodeString
& source
,
314 const UnicodeString
& target
,
315 int32_t length
) const
317 UErrorCode ec
= U_ZERO_ERROR
;
318 return (EComparisonResult
)compare(source
, target
, length
, ec
);
321 // implement deprecated, previously abstract method
322 Collator::EComparisonResult
Collator::compare(const UChar
* source
, int32_t sourceLength
,
323 const UChar
* target
, int32_t targetLength
)
326 UErrorCode ec
= U_ZERO_ERROR
;
327 return (EComparisonResult
)compare(source
, sourceLength
, target
, targetLength
, ec
);
330 UCollationResult
Collator::compare(UCharIterator
&/*sIter*/,
331 UCharIterator
&/*tIter*/,
332 UErrorCode
&status
) const {
333 if(U_SUCCESS(status
)) {
334 // Not implemented in the base class.
335 status
= U_UNSUPPORTED_ERROR
;
340 UCollationResult
Collator::compareUTF8(const StringPiece
&source
,
341 const StringPiece
&target
,
342 UErrorCode
&status
) const {
343 if(U_FAILURE(status
)) {
346 UCharIterator sIter
, tIter
;
347 uiter_setUTF8(&sIter
, source
.data(), source
.length());
348 uiter_setUTF8(&tIter
, target
.data(), target
.length());
349 return compare(sIter
, tIter
, status
);
352 UBool
Collator::equals(const UnicodeString
& source
,
353 const UnicodeString
& target
) const
355 UErrorCode ec
= U_ZERO_ERROR
;
356 return (compare(source
, target
, ec
) == UCOL_EQUAL
);
359 UBool
Collator::greaterOrEqual(const UnicodeString
& source
,
360 const UnicodeString
& target
) const
362 UErrorCode ec
= U_ZERO_ERROR
;
363 return (compare(source
, target
, ec
) != UCOL_LESS
);
366 UBool
Collator::greater(const UnicodeString
& source
,
367 const UnicodeString
& target
) const
369 UErrorCode ec
= U_ZERO_ERROR
;
370 return (compare(source
, target
, ec
) == UCOL_GREATER
);
373 // this API ignores registered collators, since it returns an
374 // array of indefinite lifetime
375 const Locale
* U_EXPORT2
Collator::getAvailableLocales(int32_t& count
)
377 UErrorCode status
= U_ZERO_ERROR
;
378 Locale
*result
= NULL
;
380 if (isAvailableLocaleListInitialized(status
))
382 result
= availableLocaleList
;
383 count
= availableLocaleListCount
;
388 UnicodeString
& U_EXPORT2
Collator::getDisplayName(const Locale
& objectLocale
,
389 const Locale
& displayLocale
,
392 #if !UCONFIG_NO_SERVICE
394 UnicodeString locNameStr
;
395 LocaleUtility::initNameFromLocale(objectLocale
, locNameStr
);
396 return gService
->getDisplayName(locNameStr
, name
, displayLocale
);
399 return objectLocale
.getDisplayName(displayLocale
, name
);
402 UnicodeString
& U_EXPORT2
Collator::getDisplayName(const Locale
& objectLocale
,
405 return getDisplayName(objectLocale
, Locale::getDefault(), name
);
408 /* This is useless information */
409 /*void Collator::getVersion(UVersionInfo versionInfo) const
411 if (versionInfo!=NULL)
412 uprv_memcpy(versionInfo, fVersion, U_MAX_VERSION_LENGTH);
416 // UCollator protected constructor destructor ----------------------------
419 * Default constructor.
420 * Constructor is different from the old default Collator constructor.
421 * The task for determing the default collation strength and normalization mode
422 * is left to the child class.
431 * Empty constructor, does not handle the arguments.
432 * This constructor is done for backward compatibility with 1.7 and 1.8.
433 * The task for handling the argument collation strength and normalization
434 * mode is left to the child class.
435 * @param collationStrength collation strength
436 * @param decompositionMode
437 * @deprecated 2.4 use the default constructor instead
439 Collator::Collator(UCollationStrength
, UNormalizationMode
)
444 Collator::~Collator()
448 Collator::Collator(const Collator
&other
)
453 UBool
Collator::operator==(const Collator
& other
) const
455 // Subclasses: Call this method and then add more specific checks.
456 return typeid(*this) == typeid(other
);
459 UBool
Collator::operator!=(const Collator
& other
) const
461 return (UBool
)!(*this == other
);
464 int32_t U_EXPORT2
Collator::getBound(const uint8_t *source
,
465 int32_t sourceLength
,
466 UColBoundMode boundType
,
469 int32_t resultLength
,
472 return ucol_getBound(source
, sourceLength
, boundType
, noOfLevels
, result
, resultLength
, &status
);
476 Collator::setLocales(const Locale
& /* requestedLocale */, const Locale
& /* validLocale */, const Locale
& /*actualLocale*/) {
479 UnicodeSet
*Collator::getTailoredSet(UErrorCode
&status
) const
481 if(U_FAILURE(status
)) {
484 // everything can be changed
485 return new UnicodeSet(0, 0x10FFFF);
488 // -------------------------------------
490 #if !UCONFIG_NO_SERVICE
491 URegistryKey U_EXPORT2
492 Collator::registerInstance(Collator
* toAdopt
, const Locale
& locale
, UErrorCode
& status
)
494 if (U_SUCCESS(status
)) {
495 // Set the collator locales while registering so that createInstance()
496 // need not guess whether the collator's locales are already set properly
497 // (as they are by the data loader).
498 toAdopt
->setLocales(locale
, locale
, locale
);
499 return getService()->registerInstance(toAdopt
, locale
, status
);
504 // -------------------------------------
506 class CFactory
: public LocaleKeyFactory
{
508 CollatorFactory
* _delegate
;
512 CFactory(CollatorFactory
* delegate
, UErrorCode
& status
)
513 : LocaleKeyFactory(delegate
->visible() ? VISIBLE
: INVISIBLE
)
514 , _delegate(delegate
)
517 if (U_SUCCESS(status
)) {
519 _ids
= new Hashtable(status
);
521 const UnicodeString
* idlist
= _delegate
->getSupportedIDs(count
, status
);
522 for (int i
= 0; i
< count
; ++i
) {
523 _ids
->put(idlist
[i
], (void*)this, status
);
524 if (U_FAILURE(status
)) {
531 status
= U_MEMORY_ALLOCATION_ERROR
;
538 virtual UObject
* create(const ICUServiceKey
& key
, const ICUService
* service
, UErrorCode
& status
) const;
541 virtual const Hashtable
* getSupportedIDs(UErrorCode
& status
) const
543 if (U_SUCCESS(status
)) {
549 virtual UnicodeString
&
550 getDisplayName(const UnicodeString
& id
, const Locale
& locale
, UnicodeString
& result
) const;
553 CFactory::~CFactory()
560 CFactory::create(const ICUServiceKey
& key
, const ICUService
* /* service */, UErrorCode
& status
) const
562 if (handlesKey(key
, status
)) {
563 const LocaleKey
& lkey
= (const LocaleKey
&)key
;
565 lkey
.currentLocale(validLoc
);
566 return _delegate
->createCollator(validLoc
);
572 CFactory::getDisplayName(const UnicodeString
& id
, const Locale
& locale
, UnicodeString
& result
) const
574 if ((_coverage
& 0x1) == 0) {
575 UErrorCode status
= U_ZERO_ERROR
;
576 const Hashtable
* ids
= getSupportedIDs(status
);
577 if (ids
&& (ids
->get(id
) != NULL
)) {
579 LocaleUtility::initLocaleFromName(id
, loc
);
580 return _delegate
->getDisplayName(loc
, locale
, result
);
587 URegistryKey U_EXPORT2
588 Collator::registerFactory(CollatorFactory
* toAdopt
, UErrorCode
& status
)
590 if (U_SUCCESS(status
)) {
591 CFactory
* f
= new CFactory(toAdopt
, status
);
593 return getService()->registerFactory(f
, status
);
595 status
= U_MEMORY_ALLOCATION_ERROR
;
600 // -------------------------------------
603 Collator::unregister(URegistryKey key
, UErrorCode
& status
)
605 if (U_SUCCESS(status
)) {
607 return gService
->unregister(key
, status
);
609 status
= U_ILLEGAL_ARGUMENT_ERROR
;
613 #endif /* UCONFIG_NO_SERVICE */
615 class CollationLocaleListEnumeration
: public StringEnumeration
{
619 static UClassID U_EXPORT2
getStaticClassID(void);
620 virtual UClassID
getDynamicClassID(void) const;
622 CollationLocaleListEnumeration()
625 // The global variables should already be initialized.
626 //isAvailableLocaleListInitialized(status);
629 virtual ~CollationLocaleListEnumeration();
631 virtual StringEnumeration
* clone() const
633 CollationLocaleListEnumeration
*result
= new CollationLocaleListEnumeration();
635 result
->index
= index
;
640 virtual int32_t count(UErrorCode
&/*status*/) const {
641 return availableLocaleListCount
;
644 virtual const char* next(int32_t* resultLength
, UErrorCode
& /*status*/) {
646 if(index
< availableLocaleListCount
) {
647 result
= availableLocaleList
[index
++].getName();
648 if(resultLength
!= NULL
) {
649 *resultLength
= (int32_t)uprv_strlen(result
);
652 if(resultLength
!= NULL
) {
660 virtual const UnicodeString
* snext(UErrorCode
& status
) {
661 int32_t resultLength
= 0;
662 const char *s
= next(&resultLength
, status
);
663 return setChars(s
, resultLength
, status
);
666 virtual void reset(UErrorCode
& /*status*/) {
671 CollationLocaleListEnumeration::~CollationLocaleListEnumeration() {}
673 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationLocaleListEnumeration
)
676 // -------------------------------------
678 StringEnumeration
* U_EXPORT2
679 Collator::getAvailableLocales(void)
681 #if !UCONFIG_NO_SERVICE
683 return getService()->getAvailableLocales();
685 #endif /* UCONFIG_NO_SERVICE */
686 UErrorCode status
= U_ZERO_ERROR
;
687 if (isAvailableLocaleListInitialized(status
)) {
688 return new CollationLocaleListEnumeration();
693 StringEnumeration
* U_EXPORT2
694 Collator::getKeywords(UErrorCode
& status
) {
695 // This is a wrapper over ucol_getKeywords
696 UEnumeration
* uenum
= ucol_getKeywords(&status
);
697 if (U_FAILURE(status
)) {
701 return new UStringEnumeration(uenum
);
704 StringEnumeration
* U_EXPORT2
705 Collator::getKeywordValues(const char *keyword
, UErrorCode
& status
) {
706 // This is a wrapper over ucol_getKeywordValues
707 UEnumeration
* uenum
= ucol_getKeywordValues(keyword
, &status
);
708 if (U_FAILURE(status
)) {
712 return new UStringEnumeration(uenum
);
715 StringEnumeration
* U_EXPORT2
716 Collator::getKeywordValuesForLocale(const char* key
, const Locale
& locale
,
717 UBool commonlyUsed
, UErrorCode
& status
) {
718 // This is a wrapper over ucol_getKeywordValuesForLocale
719 UEnumeration
*uenum
= ucol_getKeywordValuesForLocale(key
, locale
.getName(),
720 commonlyUsed
, &status
);
721 if (U_FAILURE(status
)) {
725 return new UStringEnumeration(uenum
);
729 Collator::getFunctionalEquivalent(const char* keyword
, const Locale
& locale
,
730 UBool
& isAvailable
, UErrorCode
& status
) {
731 // This is a wrapper over ucol_getFunctionalEquivalent
732 char loc
[ULOC_FULLNAME_CAPACITY
];
733 /*int32_t len =*/ ucol_getFunctionalEquivalent(loc
, sizeof(loc
),
734 keyword
, locale
.getName(), &isAvailable
, &status
);
735 if (U_FAILURE(status
)) {
738 return Locale::createFromName(loc
);
741 Collator::ECollationStrength
742 Collator::getStrength(void) const {
743 UErrorCode intStatus
= U_ZERO_ERROR
;
744 return (ECollationStrength
)getAttribute(UCOL_STRENGTH
, intStatus
);
748 Collator::setStrength(ECollationStrength newStrength
) {
749 UErrorCode intStatus
= U_ZERO_ERROR
;
750 setAttribute(UCOL_STRENGTH
, (UColAttributeValue
)newStrength
, intStatus
);
754 Collator::setMaxVariable(UColReorderCode
/*group*/, UErrorCode
&errorCode
) {
755 if (U_SUCCESS(errorCode
)) {
756 errorCode
= U_UNSUPPORTED_ERROR
;
762 Collator::getMaxVariable() const {
763 return UCOL_REORDER_CODE_PUNCTUATION
;
767 Collator::getReorderCodes(int32_t* /* dest*/,
768 int32_t /* destCapacity*/,
769 UErrorCode
& status
) const
771 if (U_SUCCESS(status
)) {
772 status
= U_UNSUPPORTED_ERROR
;
778 Collator::setReorderCodes(const int32_t* /* reorderCodes */,
779 int32_t /* reorderCodesLength */,
782 if (U_SUCCESS(status
)) {
783 status
= U_UNSUPPORTED_ERROR
;
788 Collator::getEquivalentReorderCodes(int32_t reorderCode
,
789 int32_t *dest
, int32_t capacity
,
790 UErrorCode
&errorCode
) {
791 if(U_FAILURE(errorCode
)) { return 0; }
792 if(capacity
< 0 || (dest
== NULL
&& capacity
> 0)) {
793 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
796 const CollationData
*baseData
= CollationRoot::getData(errorCode
);
797 if(U_FAILURE(errorCode
)) { return 0; }
798 return baseData
->getEquivalentScripts(reorderCode
, dest
, capacity
, errorCode
);
802 Collator::internalGetShortDefinitionString(const char * /*locale*/,
804 int32_t /*capacity*/,
805 UErrorCode
&status
) const {
806 if(U_SUCCESS(status
)) {
807 status
= U_UNSUPPORTED_ERROR
; /* Shouldn't happen, internal function */
813 Collator::internalCompareUTF8(const char *left
, int32_t leftLength
,
814 const char *right
, int32_t rightLength
,
815 UErrorCode
&errorCode
) const {
816 if(U_FAILURE(errorCode
)) { return UCOL_EQUAL
; }
817 if((left
== NULL
&& leftLength
!= 0) || (right
== NULL
&& rightLength
!= 0)) {
818 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
822 StringPiece(left
, (leftLength
< 0) ? uprv_strlen(left
) : leftLength
),
823 StringPiece(right
, (rightLength
< 0) ? uprv_strlen(right
) : rightLength
),
828 Collator::internalNextSortKeyPart(UCharIterator
* /*iter*/, uint32_t /*state*/[2],
829 uint8_t * /*dest*/, int32_t /*count*/, UErrorCode
&errorCode
) const {
830 if (U_SUCCESS(errorCode
)) {
831 errorCode
= U_UNSUPPORTED_ERROR
;
836 // UCollator private data members ----------------------------------------
838 /* This is useless information */
839 /*const UVersionInfo Collator::fVersion = {1, 1, 0, 0};*/
841 // -------------------------------------
845 #endif /* #if !UCONFIG_NO_COLLATION */