]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/coll.cpp
ICU-8.11.tar.gz
[apple/icu.git] / icuSources / i18n / coll.cpp
CommitLineData
b75a7d8f
A
1/*
2******************************************************************************
73c04bcf 3* Copyright (C) 1996-2005, International Business Machines Corporation and *
b75a7d8f
A
4* others. All Rights Reserved. *
5******************************************************************************
6*/
7
8/**
9* File coll.cpp
10*
11* Created by: Helena Shih
12*
13* Modification History:
14*
15* Date Name Description
16* 2/5/97 aliu Modified createDefault to load collation data from
17* binary files when possible. Added related methods
18* createCollationFromFile, chopLocale, createPathName.
19* 2/11/97 aliu Added methods addToCache, findInCache, which implement
20* a Collation cache. Modified createDefault to look in
21* cache first, and also to store newly created Collation
22* objects in the cache. Modified to not use gLocPath.
23* 2/12/97 aliu Modified to create objects from RuleBasedCollator cache.
24* Moved cache out of Collation class.
25* 2/13/97 aliu Moved several methods out of this class and into
26* RuleBasedCollator, with modifications. Modified
27* createDefault() to call new RuleBasedCollator(Locale&)
28* constructor. General clean up and documentation.
29* 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy
30* constructor.
31* 05/06/97 helena Added memory allocation error detection.
32* 05/08/97 helena Added createInstance().
33* 6/20/97 helena Java class name change.
34* 04/23/99 stephen Removed EDecompositionMode, merged with
35* Normalizer::EMode
36* 11/23/9 srl Inlining of some critical functions
37* 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h)
38*/
39
40#include "unicode/utypes.h"
41
42#if !UCONFIG_NO_COLLATION
43
44#include "unicode/coll.h"
45#include "unicode/tblcoll.h"
374ca955 46#include "ucol_imp.h"
b75a7d8f
A
47#include "cmemory.h"
48#include "mutex.h"
73c04bcf 49#include "servloc.h"
374ca955 50#include "ustrenum.h"
b75a7d8f
A
51#include "ucln_in.h"
52
374ca955
A
53U_NAMESPACE_BEGIN
54#if !UCONFIG_NO_SERVICE
55U_NAMESPACE_END
56
57static ICULocaleService* gService = NULL;
58/**
59 * Release all static memory held by collator.
60 */
61U_CDECL_BEGIN
62static UBool U_CALLCONV collator_cleanup(void) {
63 if (gService) {
64 delete gService;
65 gService = NULL;
66 }
67 return TRUE;
68}
69U_CDECL_END
70
b75a7d8f
A
71U_NAMESPACE_BEGIN
72
73// ------------------------------------------
74//
75// Registration
76//
77
78//-------------------------------------------
79
374ca955
A
80CollatorFactory::~CollatorFactory() {}
81
82//-------------------------------------------
83
b75a7d8f
A
84UBool
85CollatorFactory::visible(void) const {
374ca955 86 return TRUE;
b75a7d8f
A
87}
88
89//-------------------------------------------
90
91UnicodeString&
92CollatorFactory::getDisplayName(const Locale& objectLocale,
93 const Locale& displayLocale,
94 UnicodeString& result)
95{
96 return objectLocale.getDisplayName(displayLocale, result);
97}
98
99// -------------------------------------
100
101class ICUCollatorFactory : public ICUResourceBundleFactory {
374ca955 102 public:
73c04bcf 103 ICUCollatorFactory(): ICUResourceBundleFactory(UnicodeString(U_ICUDATA_COLL, -1, US_INV)) { }
374ca955
A
104 protected:
105 virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
b75a7d8f
A
106};
107
108UObject*
109ICUCollatorFactory::create(const ICUServiceKey& key, const ICUService* /* service */, UErrorCode& status) const {
110 if (handlesKey(key, status)) {
111 const LocaleKey& lkey = (const LocaleKey&)key;
112 Locale loc;
113 // make sure the requested locale is correct
114 // default LocaleFactory uses currentLocale since that's the one vetted by handlesKey
115 // but for ICU rb resources we use the actual one since it will fallback again
116 lkey.canonicalLocale(loc);
374ca955 117
b75a7d8f
A
118 return Collator::makeInstance(loc, status);
119 }
120 return NULL;
121}
122
123// -------------------------------------
124
125class ICUCollatorService : public ICULocaleService {
374ca955 126public:
b75a7d8f 127 ICUCollatorService()
73c04bcf 128 : ICULocaleService(UNICODE_STRING_SIMPLE("Collator"))
b75a7d8f
A
129 {
130 UErrorCode status = U_ZERO_ERROR;
131 registerFactory(new ICUCollatorFactory(), status);
132 }
374ca955 133
b75a7d8f
A
134 virtual UObject* cloneInstance(UObject* instance) const {
135 return ((Collator*)instance)->clone();
136 }
374ca955 137
b75a7d8f
A
138 virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* actualID, UErrorCode& status) const {
139 LocaleKey& lkey = (LocaleKey&)key;
374ca955
A
140 if (actualID) {
141 // Ugly Hack Alert! We return an empty actualID to signal
142 // to callers that this is a default object, not a "real"
143 // service-created object. (TODO remove in 3.0) [aliu]
144 actualID->truncate(0);
145 }
146 Locale loc("");
b75a7d8f
A
147 lkey.canonicalLocale(loc);
148 return Collator::makeInstance(loc, status);
149 }
374ca955
A
150
151 virtual UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const {
152 UnicodeString ar;
153 if (actualReturn == NULL) {
154 actualReturn = &ar;
155 }
156 Collator* result = (Collator*)ICULocaleService::getKey(key, actualReturn, status);
157 // Ugly Hack Alert! If the actualReturn length is zero, this
158 // means we got a default object, not a "real" service-created
159 // object. We don't call setLocales() on a default object,
160 // because that will overwrite its correct built-in locale
161 // metadata (valid & actual) with our incorrect data (all we
162 // have is the requested locale). (TODO remove in 3.0) [aliu]
163 if (result && actualReturn->length() > 0) {
164 const LocaleKey& lkey = (const LocaleKey&)key;
165 Locale canonicalLocale("");
166 Locale currentLocale("");
167
168 result->setLocales(lkey.canonicalLocale(canonicalLocale),
169 LocaleUtility::initLocaleFromName(*actualReturn, currentLocale));
170 }
171 return result;
172 }
b75a7d8f
A
173
174 virtual UBool isDefault() const {
175 return countFactories() == 1;
176 }
177};
178
179// -------------------------------------
180
181class ICUCollatorService;
182
b75a7d8f
A
183static ICULocaleService*
184getService(void)
185{
374ca955
A
186 UBool needInit;
187 {
188 Mutex mutex;
189 needInit = (UBool)(gService == NULL);
b75a7d8f 190 }
374ca955
A
191 if(needInit) {
192 ICULocaleService *newservice = new ICUCollatorService();
193 if(newservice) {
194 Mutex mutex;
195 if(gService == NULL) {
196 gService = newservice;
197 newservice = NULL;
198 }
199 }
200 if(newservice) {
201 delete newservice;
202 }
203 else {
204#if !UCONFIG_NO_SERVICE
205 ucln_i18n_registerCleanup(UCLN_I18N_COLLATOR, collator_cleanup);
206#endif
207 }
b75a7d8f 208 }
374ca955 209 return gService;
b75a7d8f
A
210}
211
212// -------------------------------------
213
214static UBool
215hasService(void)
216{
374ca955
A
217 Mutex mutex;
218 return gService != NULL;
b75a7d8f
A
219}
220
221// -------------------------------------
222
223UCollator*
224Collator::createUCollator(const char *loc,
374ca955 225 UErrorCode *status)
b75a7d8f 226{
374ca955
A
227 UCollator *result = 0;
228 if (status && U_SUCCESS(*status) && hasService()) {
229 Locale desiredLocale(loc);
230 Collator *col = (Collator*)gService->get(desiredLocale, *status);
231 if (col && col->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) {
232 RuleBasedCollator *rbc = (RuleBasedCollator *)col;
233 if (!rbc->dataIsOwned) {
234 result = ucol_safeClone(rbc->ucollator, NULL, NULL, status);
235 } else {
236 result = rbc->ucollator;
237 rbc->ucollator = NULL; // to prevent free on delete
238 }
239 }
240 delete col;
241 }
242 return result;
b75a7d8f 243}
374ca955 244#endif /* UCONFIG_NO_SERVICE */
b75a7d8f
A
245
246// Collator public methods -----------------------------------------------
247
374ca955 248Collator* U_EXPORT2 Collator::createInstance(UErrorCode& success)
b75a7d8f 249{
374ca955 250 return createInstance(Locale::getDefault(), success);
b75a7d8f
A
251}
252
374ca955 253Collator* U_EXPORT2 Collator::createInstance(const Locale& desiredLocale,
b75a7d8f
A
254 UErrorCode& status)
255{
374ca955
A
256 if (U_FAILURE(status))
257 return 0;
258
259#if !UCONFIG_NO_SERVICE
260 if (hasService()) {
261 Locale actualLoc;
262 Collator *result =
263 (Collator*)gService->get(desiredLocale, &actualLoc, status);
264 // Ugly Hack Alert! If the returned locale is empty (not root,
265 // but empty -- getName() == "") then that means the service
266 // returned a default object, not a "real" service object. In
267 // that case, the locale metadata (valid & actual) is setup
268 // correctly already, and we don't want to overwrite it. (TODO
269 // remove in 3.0) [aliu]
270 if (*actualLoc.getName() != 0) {
271 result->setLocales(desiredLocale, actualLoc);
272 }
273 return result;
274 }
275#endif
276 return makeInstance(desiredLocale, status);
b75a7d8f
A
277}
278
279
280Collator* Collator::makeInstance(const Locale& desiredLocale,
281 UErrorCode& status)
282{
374ca955
A
283 // A bit of explanation is required here. Although in the current
284 // implementation
285 // Collator::createInstance() is just turning around and calling
286 // RuleBasedCollator(Locale&), this will not necessarily always be the
287 // case. For example, suppose we modify this code to handle a
288 // non-table-based Collator, such as that for Thai. In this case,
289 // createInstance() will have to be modified to somehow determine this fact
290 // (perhaps a field in the resource bundle). Then it can construct the
291 // non-table-based Collator in some other way, when it sees that it needs
292 // to.
293 // The specific caution is this: RuleBasedCollator(Locale&) will ALWAYS
294 // return a valid collation object, if the system if functioning properly.
295 // The reason is that it will fall back, use the default locale, and even
296 // use the built-in default collation rules. THEREFORE, createInstance()
297 // should in general ONLY CALL RuleBasedCollator(Locale&) IF IT KNOWS IN
298 // ADVANCE that the given locale's collation is properly implemented as a
299 // RuleBasedCollator.
300 // Currently, we don't do this...we always return a RuleBasedCollator,
301 // whether it is strictly correct to do so or not, without checking, because
302 // we currently have no way of checking.
303
304 RuleBasedCollator* collation = new RuleBasedCollator(desiredLocale,
305 status);
306 /* test for NULL */
307 if (collation == 0) {
308 status = U_MEMORY_ALLOCATION_ERROR;
309 return 0;
310 }
311 if (U_FAILURE(status))
312 {
313 delete collation;
314 collation = 0;
315 }
316 return collation;
b75a7d8f
A
317}
318
374ca955 319#ifdef U_USE_COLLATION_OBSOLETE_2_6
b75a7d8f
A
320// !!! dlf the following is obsolete, ignore registration for this
321
322Collator *
323Collator::createInstance(const Locale &loc,
324 UVersionInfo version,
374ca955
A
325 UErrorCode &status)
326{
327 Collator *collator;
328 UVersionInfo info;
329
330 collator=new RuleBasedCollator(loc, status);
331 /* test for NULL */
332 if (collator == 0) {
333 status = U_MEMORY_ALLOCATION_ERROR;
334 return 0;
b75a7d8f 335 }
374ca955
A
336
337 if(U_SUCCESS(status)) {
338 collator->getVersion(info);
339 if(0!=uprv_memcmp(version, info, sizeof(UVersionInfo))) {
340 delete collator;
341 status=U_MISSING_RESOURCE_ERROR;
342 return 0;
343 }
344 }
345 return collator;
b75a7d8f 346}
374ca955 347#endif
b75a7d8f
A
348
349// implement deprecated, previously abstract method
350Collator::EComparisonResult Collator::compare(const UnicodeString& source,
351 const UnicodeString& target) const
352{
374ca955
A
353 UErrorCode ec = U_ZERO_ERROR;
354 return (Collator::EComparisonResult)compare(source, target, ec);
b75a7d8f
A
355}
356
357// implement deprecated, previously abstract method
358Collator::EComparisonResult Collator::compare(const UnicodeString& source,
359 const UnicodeString& target,
360 int32_t length) const
361{
374ca955
A
362 UErrorCode ec = U_ZERO_ERROR;
363 return (Collator::EComparisonResult)compare(source, target, length, ec);
b75a7d8f
A
364}
365
366// implement deprecated, previously abstract method
367Collator::EComparisonResult Collator::compare(const UChar* source, int32_t sourceLength,
368 const UChar* target, int32_t targetLength)
369 const
370{
374ca955
A
371 UErrorCode ec = U_ZERO_ERROR;
372 return (Collator::EComparisonResult)compare(source, sourceLength, target, targetLength, ec);
b75a7d8f
A
373}
374
375UBool Collator::equals(const UnicodeString& source,
374ca955 376 const UnicodeString& target) const
b75a7d8f 377{
374ca955
A
378 UErrorCode ec = U_ZERO_ERROR;
379 return (compare(source, target, ec) == UCOL_EQUAL);
b75a7d8f
A
380}
381
382UBool Collator::greaterOrEqual(const UnicodeString& source,
374ca955 383 const UnicodeString& target) const
b75a7d8f 384{
374ca955
A
385 UErrorCode ec = U_ZERO_ERROR;
386 return (compare(source, target, ec) != UCOL_LESS);
b75a7d8f
A
387}
388
389UBool Collator::greater(const UnicodeString& source,
374ca955 390 const UnicodeString& target) const
b75a7d8f 391{
374ca955
A
392 UErrorCode ec = U_ZERO_ERROR;
393 return (compare(source, target, ec) == UCOL_GREATER);
b75a7d8f
A
394}
395
396// this API ignores registered collators, since it returns an
397// array of indefinite lifetime
374ca955 398const Locale* U_EXPORT2 Collator::getAvailableLocales(int32_t& count)
b75a7d8f 399{
374ca955 400 return Locale::getAvailableLocales(count);
b75a7d8f
A
401}
402
374ca955
A
403UnicodeString& U_EXPORT2 Collator::getDisplayName(const Locale& objectLocale,
404 const Locale& displayLocale,
405 UnicodeString& name)
b75a7d8f 406{
374ca955
A
407#if !UCONFIG_NO_SERVICE
408 if (hasService()) {
73c04bcf
A
409 UnicodeString locNameStr;
410 LocaleUtility::initNameFromLocale(objectLocale, locNameStr);
411 return gService->getDisplayName(locNameStr, name, displayLocale);
374ca955
A
412 }
413#endif
414 return objectLocale.getDisplayName(displayLocale, name);
b75a7d8f
A
415}
416
374ca955
A
417UnicodeString& U_EXPORT2 Collator::getDisplayName(const Locale& objectLocale,
418 UnicodeString& name)
b75a7d8f 419{
374ca955 420 return getDisplayName(objectLocale, Locale::getDefault(), name);
b75a7d8f
A
421}
422
423/* This is useless information */
424/*void Collator::getVersion(UVersionInfo versionInfo) const
425{
426 if (versionInfo!=NULL)
427 uprv_memcpy(versionInfo, fVersion, U_MAX_VERSION_LENGTH);
428}
429*/
430
431// UCollator protected constructor destructor ----------------------------
432
433/**
434* Default constructor.
435* Constructor is different from the old default Collator constructor.
436* The task for determing the default collation strength and normalization mode
437* is left to the child class.
438*/
439Collator::Collator()
374ca955 440: UObject()
b75a7d8f
A
441{
442}
443
444/**
445* Constructor.
446* Empty constructor, does not handle the arguments.
447* This constructor is done for backward compatibility with 1.7 and 1.8.
448* The task for handling the argument collation strength and normalization
449* mode is left to the child class.
450* @param collationStrength collation strength
451* @param decompositionMode
452* @deprecated 2.4 use the default constructor instead
453*/
454Collator::Collator(UCollationStrength, UNormalizationMode )
374ca955 455: UObject()
b75a7d8f
A
456{
457}
458
459Collator::~Collator()
460{
461}
462
463Collator::Collator(const Collator &other)
464 : UObject(other)
465{
466}
467
374ca955
A
468UBool Collator::operator==(const Collator& other) const
469{
470 return (UBool)(this == &other);
471}
472
473UBool Collator::operator!=(const Collator& other) const
474{
475 return (UBool)!(*this == other);
476}
477
478int32_t U_EXPORT2 Collator::getBound(const uint8_t *source,
479 int32_t sourceLength,
480 UColBoundMode boundType,
481 uint32_t noOfLevels,
482 uint8_t *result,
483 int32_t resultLength,
484 UErrorCode &status)
485{
486 return ucol_getBound(source, sourceLength, boundType, noOfLevels, result, resultLength, &status);
b75a7d8f
A
487}
488
489void
490Collator::setLocales(const Locale& /* requestedLocale */, const Locale& /* validLocale */) {
491}
492
374ca955
A
493UnicodeSet *Collator::getTailoredSet(UErrorCode &status) const
494{
495 if(U_FAILURE(status)) {
496 return NULL;
497 }
498 // everything can be changed
499 return new UnicodeSet(0, 0x10FFFF);
500}
501
b75a7d8f
A
502// -------------------------------------
503
374ca955
A
504#if !UCONFIG_NO_SERVICE
505URegistryKey U_EXPORT2
b75a7d8f
A
506Collator::registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status)
507{
508 if (U_SUCCESS(status)) {
509 return getService()->registerInstance(toAdopt, locale, status);
510 }
511 return NULL;
512}
513
514// -------------------------------------
515
516class CFactory : public LocaleKeyFactory {
517private:
518 CollatorFactory* _delegate;
519 Hashtable* _ids;
374ca955 520
b75a7d8f
A
521public:
522 CFactory(CollatorFactory* delegate, UErrorCode& status)
523 : LocaleKeyFactory(delegate->visible() ? VISIBLE : INVISIBLE)
524 , _delegate(delegate)
525 , _ids(NULL)
526 {
374ca955
A
527 if (U_SUCCESS(status)) {
528 int32_t count = 0;
529 _ids = new Hashtable(status);
530 if (_ids) {
531 const UnicodeString * idlist = _delegate->getSupportedIDs(count, status);
532 for (int i = 0; i < count; ++i) {
533 _ids->put(idlist[i], (void*)this, status);
534 if (U_FAILURE(status)) {
535 delete _ids;
536 _ids = NULL;
537 return;
538 }
539 }
540 } else {
541 status = U_MEMORY_ALLOCATION_ERROR;
542 }
b75a7d8f
A
543 }
544 }
374ca955 545
b75a7d8f
A
546 virtual ~CFactory()
547 {
548 delete _delegate;
549 delete _ids;
550 }
374ca955 551
b75a7d8f 552 virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
374ca955
A
553
554protected:
b75a7d8f
A
555 virtual const Hashtable* getSupportedIDs(UErrorCode& status) const
556 {
557 if (U_SUCCESS(status)) {
558 return _ids;
559 }
560 return NULL;
561 }
374ca955 562
b75a7d8f
A
563 virtual UnicodeString&
564 getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const;
565};
566
567UObject*
568CFactory::create(const ICUServiceKey& key, const ICUService* /* service */, UErrorCode& status) const
569{
570 if (handlesKey(key, status)) {
571 const LocaleKey& lkey = (const LocaleKey&)key;
572 Locale validLoc;
573 lkey.currentLocale(validLoc);
374ca955 574 return _delegate->createCollator(validLoc);
b75a7d8f
A
575 }
576 return NULL;
577}
578
579UnicodeString&
580CFactory::getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const
581{
582 if ((_coverage & 0x1) == 0) {
583 UErrorCode status = U_ZERO_ERROR;
584 const Hashtable* ids = getSupportedIDs(status);
585 if (ids && (ids->get(id) != NULL)) {
586 Locale loc;
587 LocaleUtility::initLocaleFromName(id, loc);
588 return _delegate->getDisplayName(loc, locale, result);
589 }
590 }
591 result.setToBogus();
592 return result;
593}
594
374ca955 595URegistryKey U_EXPORT2
b75a7d8f
A
596Collator::registerFactory(CollatorFactory* toAdopt, UErrorCode& status)
597{
598 if (U_SUCCESS(status)) {
599 CFactory* f = new CFactory(toAdopt, status);
600 if (f) {
601 return getService()->registerFactory(f, status);
602 }
603 status = U_MEMORY_ALLOCATION_ERROR;
604 }
605 return NULL;
606}
607
608// -------------------------------------
609
374ca955 610UBool U_EXPORT2
b75a7d8f
A
611Collator::unregister(URegistryKey key, UErrorCode& status)
612{
374ca955
A
613 if (U_SUCCESS(status)) {
614 if (hasService()) {
615 return gService->unregister(key, status);
616 }
617 status = U_ILLEGAL_ARGUMENT_ERROR;
b75a7d8f 618 }
374ca955 619 return FALSE;
b75a7d8f
A
620}
621
622// -------------------------------------
623
374ca955 624StringEnumeration* U_EXPORT2
b75a7d8f
A
625Collator::getAvailableLocales(void)
626{
374ca955
A
627 return getService()->getAvailableLocales();
628}
629#endif /* UCONFIG_NO_SERVICE */
630
631StringEnumeration* U_EXPORT2
632Collator::getKeywords(UErrorCode& status) {
633 // This is a wrapper over ucol_getKeywords
634 UEnumeration* uenum = ucol_getKeywords(&status);
635 if (U_FAILURE(status)) {
636 uenum_close(uenum);
637 return NULL;
638 }
639 return new UStringEnumeration(uenum);
640}
641
642StringEnumeration* U_EXPORT2
643Collator::getKeywordValues(const char *keyword, UErrorCode& status) {
644 // This is a wrapper over ucol_getKeywordValues
645 UEnumeration* uenum = ucol_getKeywordValues(keyword, &status);
646 if (U_FAILURE(status)) {
647 uenum_close(uenum);
648 return NULL;
649 }
650 return new UStringEnumeration(uenum);
651}
652
653Locale U_EXPORT2
654Collator::getFunctionalEquivalent(const char* keyword, const Locale& locale,
655 UBool& isAvailable, UErrorCode& status) {
656 // This is a wrapper over ucol_getFunctionalEquivalent
657 char loc[ULOC_FULLNAME_CAPACITY];
658 /*int32_t len =*/ ucol_getFunctionalEquivalent(loc, sizeof(loc),
659 keyword, locale.getName(), &isAvailable, &status);
660 if (U_FAILURE(status)) {
661 *loc = 0; // root
662 }
663 return Locale::createFromName(loc);
b75a7d8f
A
664}
665
666// UCollator private data members ----------------------------------------
667
668/* This is useless information */
669/*const UVersionInfo Collator::fVersion = {1, 1, 0, 0};*/
670
671// -------------------------------------
672
673U_NAMESPACE_END
674
b75a7d8f
A
675#endif /* #if !UCONFIG_NO_COLLATION */
676
677/* eof */