ICU-6.2.14.tar.gz
[apple/icu.git] / icuSources / i18n / coll.cpp
CommitLineData
b75a7d8f
A
1/*
2******************************************************************************
374ca955 3* Copyright (C) 1996-2004, International Business Machines Corporation and *
b75a7d8f
A
4* others. All Rights Reserved. *
5******************************************************************************
6*/
7
8/**
9* File coll.cpp
10*
11* Created by: Helena Shih
12*
13* Modification History:
14*
15* Date Name Description
16* 2/5/97 aliu Modified createDefault to load collation data from
17* binary files when possible. Added related methods
18* createCollationFromFile, chopLocale, createPathName.
19* 2/11/97 aliu Added methods addToCache, findInCache, which implement
20* a Collation cache. Modified createDefault to look in
21* cache first, and also to store newly created Collation
22* objects in the cache. Modified to not use gLocPath.
23* 2/12/97 aliu Modified to create objects from RuleBasedCollator cache.
24* Moved cache out of Collation class.
25* 2/13/97 aliu Moved several methods out of this class and into
26* RuleBasedCollator, with modifications. Modified
27* createDefault() to call new RuleBasedCollator(Locale&)
28* constructor. General clean up and documentation.
29* 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy
30* constructor.
31* 05/06/97 helena Added memory allocation error detection.
32* 05/08/97 helena Added createInstance().
33* 6/20/97 helena Java class name change.
34* 04/23/99 stephen Removed EDecompositionMode, merged with
35* Normalizer::EMode
36* 11/23/9 srl Inlining of some critical functions
37* 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h)
38*/
39
40#include "unicode/utypes.h"
41
42#if !UCONFIG_NO_COLLATION
43
44#include "unicode/coll.h"
45#include "unicode/tblcoll.h"
374ca955 46#include "ucol_imp.h"
b75a7d8f
A
47#include "cmemory.h"
48#include "mutex.h"
49#include "iculserv.h"
374ca955 50#include "ustrenum.h"
b75a7d8f
A
51#include "ucln_in.h"
52
374ca955
A
53U_NAMESPACE_BEGIN
54#if !UCONFIG_NO_SERVICE
55U_NAMESPACE_END
56
57static ICULocaleService* gService = NULL;
58/**
59 * Release all static memory held by collator.
60 */
61U_CDECL_BEGIN
62static UBool U_CALLCONV collator_cleanup(void) {
63 if (gService) {
64 delete gService;
65 gService = NULL;
66 }
67 return TRUE;
68}
69U_CDECL_END
70
b75a7d8f
A
71U_NAMESPACE_BEGIN
72
73// ------------------------------------------
74//
75// Registration
76//
77
78//-------------------------------------------
79
374ca955
A
80CollatorFactory::~CollatorFactory() {}
81
82//-------------------------------------------
83
b75a7d8f
A
84UBool
85CollatorFactory::visible(void) const {
374ca955 86 return TRUE;
b75a7d8f
A
87}
88
89//-------------------------------------------
90
91UnicodeString&
92CollatorFactory::getDisplayName(const Locale& objectLocale,
93 const Locale& displayLocale,
94 UnicodeString& result)
95{
96 return objectLocale.getDisplayName(displayLocale, result);
97}
98
99// -------------------------------------
100
101class ICUCollatorFactory : public ICUResourceBundleFactory {
374ca955
A
102 public:
103 ICUCollatorFactory(): ICUResourceBundleFactory(UnicodeString(U_ICUDATA_COLL, (char*)NULL)) { }
104 protected:
105 virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
b75a7d8f
A
106};
107
108UObject*
109ICUCollatorFactory::create(const ICUServiceKey& key, const ICUService* /* service */, UErrorCode& status) const {
110 if (handlesKey(key, status)) {
111 const LocaleKey& lkey = (const LocaleKey&)key;
112 Locale loc;
113 // make sure the requested locale is correct
114 // default LocaleFactory uses currentLocale since that's the one vetted by handlesKey
115 // but for ICU rb resources we use the actual one since it will fallback again
116 lkey.canonicalLocale(loc);
374ca955 117
b75a7d8f
A
118 return Collator::makeInstance(loc, status);
119 }
120 return NULL;
121}
122
123// -------------------------------------
124
125class ICUCollatorService : public ICULocaleService {
374ca955 126public:
b75a7d8f
A
127 ICUCollatorService()
128 : ICULocaleService("Collator")
129 {
130 UErrorCode status = U_ZERO_ERROR;
131 registerFactory(new ICUCollatorFactory(), status);
132 }
374ca955 133
b75a7d8f
A
134 virtual UObject* cloneInstance(UObject* instance) const {
135 return ((Collator*)instance)->clone();
136 }
374ca955 137
b75a7d8f
A
138 virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* actualID, UErrorCode& status) const {
139 LocaleKey& lkey = (LocaleKey&)key;
374ca955
A
140 if (actualID) {
141 // Ugly Hack Alert! We return an empty actualID to signal
142 // to callers that this is a default object, not a "real"
143 // service-created object. (TODO remove in 3.0) [aliu]
144 actualID->truncate(0);
145 }
146 Locale loc("");
b75a7d8f
A
147 lkey.canonicalLocale(loc);
148 return Collator::makeInstance(loc, status);
149 }
374ca955
A
150
151 virtual UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const {
152 UnicodeString ar;
153 if (actualReturn == NULL) {
154 actualReturn = &ar;
155 }
156 Collator* result = (Collator*)ICULocaleService::getKey(key, actualReturn, status);
157 // Ugly Hack Alert! If the actualReturn length is zero, this
158 // means we got a default object, not a "real" service-created
159 // object. We don't call setLocales() on a default object,
160 // because that will overwrite its correct built-in locale
161 // metadata (valid & actual) with our incorrect data (all we
162 // have is the requested locale). (TODO remove in 3.0) [aliu]
163 if (result && actualReturn->length() > 0) {
164 const LocaleKey& lkey = (const LocaleKey&)key;
165 Locale canonicalLocale("");
166 Locale currentLocale("");
167
168 result->setLocales(lkey.canonicalLocale(canonicalLocale),
169 LocaleUtility::initLocaleFromName(*actualReturn, currentLocale));
170 }
171 return result;
172 }
b75a7d8f
A
173
174 virtual UBool isDefault() const {
175 return countFactories() == 1;
176 }
177};
178
179// -------------------------------------
180
181class ICUCollatorService;
182
b75a7d8f
A
183static ICULocaleService*
184getService(void)
185{
374ca955
A
186 UBool needInit;
187 {
188 Mutex mutex;
189 needInit = (UBool)(gService == NULL);
b75a7d8f 190 }
374ca955
A
191 if(needInit) {
192 ICULocaleService *newservice = new ICUCollatorService();
193 if(newservice) {
194 Mutex mutex;
195 if(gService == NULL) {
196 gService = newservice;
197 newservice = NULL;
198 }
199 }
200 if(newservice) {
201 delete newservice;
202 }
203 else {
204#if !UCONFIG_NO_SERVICE
205 ucln_i18n_registerCleanup(UCLN_I18N_COLLATOR, collator_cleanup);
206#endif
207 }
b75a7d8f 208 }
374ca955 209 return gService;
b75a7d8f
A
210}
211
212// -------------------------------------
213
214static UBool
215hasService(void)
216{
374ca955
A
217 Mutex mutex;
218 return gService != NULL;
b75a7d8f
A
219}
220
221// -------------------------------------
222
223UCollator*
224Collator::createUCollator(const char *loc,
374ca955 225 UErrorCode *status)
b75a7d8f 226{
374ca955
A
227 UCollator *result = 0;
228 if (status && U_SUCCESS(*status) && hasService()) {
229 Locale desiredLocale(loc);
230 Collator *col = (Collator*)gService->get(desiredLocale, *status);
231 if (col && col->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) {
232 RuleBasedCollator *rbc = (RuleBasedCollator *)col;
233 if (!rbc->dataIsOwned) {
234 result = ucol_safeClone(rbc->ucollator, NULL, NULL, status);
235 } else {
236 result = rbc->ucollator;
237 rbc->ucollator = NULL; // to prevent free on delete
238 }
239 }
240 delete col;
241 }
242 return result;
b75a7d8f 243}
374ca955 244#endif /* UCONFIG_NO_SERVICE */
b75a7d8f
A
245
246// Collator public methods -----------------------------------------------
247
374ca955 248Collator* U_EXPORT2 Collator::createInstance(UErrorCode& success)
b75a7d8f 249{
374ca955 250 return createInstance(Locale::getDefault(), success);
b75a7d8f
A
251}
252
374ca955 253Collator* U_EXPORT2 Collator::createInstance(const Locale& desiredLocale,
b75a7d8f
A
254 UErrorCode& status)
255{
374ca955
A
256 if (U_FAILURE(status))
257 return 0;
258
259#if !UCONFIG_NO_SERVICE
260 if (hasService()) {
261 Locale actualLoc;
262 Collator *result =
263 (Collator*)gService->get(desiredLocale, &actualLoc, status);
264 // Ugly Hack Alert! If the returned locale is empty (not root,
265 // but empty -- getName() == "") then that means the service
266 // returned a default object, not a "real" service object. In
267 // that case, the locale metadata (valid & actual) is setup
268 // correctly already, and we don't want to overwrite it. (TODO
269 // remove in 3.0) [aliu]
270 if (*actualLoc.getName() != 0) {
271 result->setLocales(desiredLocale, actualLoc);
272 }
273 return result;
274 }
275#endif
276 return makeInstance(desiredLocale, status);
b75a7d8f
A
277}
278
279
280Collator* Collator::makeInstance(const Locale& desiredLocale,
281 UErrorCode& status)
282{
374ca955
A
283 // A bit of explanation is required here. Although in the current
284 // implementation
285 // Collator::createInstance() is just turning around and calling
286 // RuleBasedCollator(Locale&), this will not necessarily always be the
287 // case. For example, suppose we modify this code to handle a
288 // non-table-based Collator, such as that for Thai. In this case,
289 // createInstance() will have to be modified to somehow determine this fact
290 // (perhaps a field in the resource bundle). Then it can construct the
291 // non-table-based Collator in some other way, when it sees that it needs
292 // to.
293 // The specific caution is this: RuleBasedCollator(Locale&) will ALWAYS
294 // return a valid collation object, if the system if functioning properly.
295 // The reason is that it will fall back, use the default locale, and even
296 // use the built-in default collation rules. THEREFORE, createInstance()
297 // should in general ONLY CALL RuleBasedCollator(Locale&) IF IT KNOWS IN
298 // ADVANCE that the given locale's collation is properly implemented as a
299 // RuleBasedCollator.
300 // Currently, we don't do this...we always return a RuleBasedCollator,
301 // whether it is strictly correct to do so or not, without checking, because
302 // we currently have no way of checking.
303
304 RuleBasedCollator* collation = new RuleBasedCollator(desiredLocale,
305 status);
306 /* test for NULL */
307 if (collation == 0) {
308 status = U_MEMORY_ALLOCATION_ERROR;
309 return 0;
310 }
311 if (U_FAILURE(status))
312 {
313 delete collation;
314 collation = 0;
315 }
316 return collation;
b75a7d8f
A
317}
318
374ca955 319#ifdef U_USE_COLLATION_OBSOLETE_2_6
b75a7d8f
A
320// !!! dlf the following is obsolete, ignore registration for this
321
322Collator *
323Collator::createInstance(const Locale &loc,
324 UVersionInfo version,
374ca955
A
325 UErrorCode &status)
326{
327 Collator *collator;
328 UVersionInfo info;
329
330 collator=new RuleBasedCollator(loc, status);
331 /* test for NULL */
332 if (collator == 0) {
333 status = U_MEMORY_ALLOCATION_ERROR;
334 return 0;
b75a7d8f 335 }
374ca955
A
336
337 if(U_SUCCESS(status)) {
338 collator->getVersion(info);
339 if(0!=uprv_memcmp(version, info, sizeof(UVersionInfo))) {
340 delete collator;
341 status=U_MISSING_RESOURCE_ERROR;
342 return 0;
343 }
344 }
345 return collator;
b75a7d8f 346}
374ca955 347#endif
b75a7d8f
A
348
349// implement deprecated, previously abstract method
350Collator::EComparisonResult Collator::compare(const UnicodeString& source,
351 const UnicodeString& target) const
352{
374ca955
A
353 UErrorCode ec = U_ZERO_ERROR;
354 return (Collator::EComparisonResult)compare(source, target, ec);
b75a7d8f
A
355}
356
357// implement deprecated, previously abstract method
358Collator::EComparisonResult Collator::compare(const UnicodeString& source,
359 const UnicodeString& target,
360 int32_t length) const
361{
374ca955
A
362 UErrorCode ec = U_ZERO_ERROR;
363 return (Collator::EComparisonResult)compare(source, target, length, ec);
b75a7d8f
A
364}
365
366// implement deprecated, previously abstract method
367Collator::EComparisonResult Collator::compare(const UChar* source, int32_t sourceLength,
368 const UChar* target, int32_t targetLength)
369 const
370{
374ca955
A
371 UErrorCode ec = U_ZERO_ERROR;
372 return (Collator::EComparisonResult)compare(source, sourceLength, target, targetLength, ec);
b75a7d8f
A
373}
374
375UBool Collator::equals(const UnicodeString& source,
374ca955 376 const UnicodeString& target) const
b75a7d8f 377{
374ca955
A
378 UErrorCode ec = U_ZERO_ERROR;
379 return (compare(source, target, ec) == UCOL_EQUAL);
b75a7d8f
A
380}
381
382UBool Collator::greaterOrEqual(const UnicodeString& source,
374ca955 383 const UnicodeString& target) const
b75a7d8f 384{
374ca955
A
385 UErrorCode ec = U_ZERO_ERROR;
386 return (compare(source, target, ec) != UCOL_LESS);
b75a7d8f
A
387}
388
389UBool Collator::greater(const UnicodeString& source,
374ca955 390 const UnicodeString& target) const
b75a7d8f 391{
374ca955
A
392 UErrorCode ec = U_ZERO_ERROR;
393 return (compare(source, target, ec) == UCOL_GREATER);
b75a7d8f
A
394}
395
396// this API ignores registered collators, since it returns an
397// array of indefinite lifetime
374ca955 398const Locale* U_EXPORT2 Collator::getAvailableLocales(int32_t& count)
b75a7d8f 399{
374ca955 400 return Locale::getAvailableLocales(count);
b75a7d8f
A
401}
402
374ca955
A
403UnicodeString& U_EXPORT2 Collator::getDisplayName(const Locale& objectLocale,
404 const Locale& displayLocale,
405 UnicodeString& name)
b75a7d8f 406{
374ca955
A
407#if !UCONFIG_NO_SERVICE
408 if (hasService()) {
409 return gService->getDisplayName(objectLocale.getName(), name, displayLocale);
410 }
411#endif
412 return objectLocale.getDisplayName(displayLocale, name);
b75a7d8f
A
413}
414
374ca955
A
415UnicodeString& U_EXPORT2 Collator::getDisplayName(const Locale& objectLocale,
416 UnicodeString& name)
b75a7d8f 417{
374ca955 418 return getDisplayName(objectLocale, Locale::getDefault(), name);
b75a7d8f
A
419}
420
421/* This is useless information */
422/*void Collator::getVersion(UVersionInfo versionInfo) const
423{
424 if (versionInfo!=NULL)
425 uprv_memcpy(versionInfo, fVersion, U_MAX_VERSION_LENGTH);
426}
427*/
428
429// UCollator protected constructor destructor ----------------------------
430
431/**
432* Default constructor.
433* Constructor is different from the old default Collator constructor.
434* The task for determing the default collation strength and normalization mode
435* is left to the child class.
436*/
437Collator::Collator()
374ca955 438: UObject()
b75a7d8f
A
439{
440}
441
442/**
443* Constructor.
444* Empty constructor, does not handle the arguments.
445* This constructor is done for backward compatibility with 1.7 and 1.8.
446* The task for handling the argument collation strength and normalization
447* mode is left to the child class.
448* @param collationStrength collation strength
449* @param decompositionMode
450* @deprecated 2.4 use the default constructor instead
451*/
452Collator::Collator(UCollationStrength, UNormalizationMode )
374ca955 453: UObject()
b75a7d8f
A
454{
455}
456
457Collator::~Collator()
458{
459}
460
461Collator::Collator(const Collator &other)
462 : UObject(other)
463{
464}
465
374ca955
A
466UBool Collator::operator==(const Collator& other) const
467{
468 return (UBool)(this == &other);
469}
470
471UBool Collator::operator!=(const Collator& other) const
472{
473 return (UBool)!(*this == other);
474}
475
476int32_t U_EXPORT2 Collator::getBound(const uint8_t *source,
477 int32_t sourceLength,
478 UColBoundMode boundType,
479 uint32_t noOfLevels,
480 uint8_t *result,
481 int32_t resultLength,
482 UErrorCode &status)
483{
484 return ucol_getBound(source, sourceLength, boundType, noOfLevels, result, resultLength, &status);
b75a7d8f
A
485}
486
487void
488Collator::setLocales(const Locale& /* requestedLocale */, const Locale& /* validLocale */) {
489}
490
374ca955
A
491UnicodeSet *Collator::getTailoredSet(UErrorCode &status) const
492{
493 if(U_FAILURE(status)) {
494 return NULL;
495 }
496 // everything can be changed
497 return new UnicodeSet(0, 0x10FFFF);
498}
499
b75a7d8f
A
500// -------------------------------------
501
374ca955
A
502#if !UCONFIG_NO_SERVICE
503URegistryKey U_EXPORT2
b75a7d8f
A
504Collator::registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status)
505{
506 if (U_SUCCESS(status)) {
507 return getService()->registerInstance(toAdopt, locale, status);
508 }
509 return NULL;
510}
511
512// -------------------------------------
513
514class CFactory : public LocaleKeyFactory {
515private:
516 CollatorFactory* _delegate;
517 Hashtable* _ids;
374ca955 518
b75a7d8f
A
519public:
520 CFactory(CollatorFactory* delegate, UErrorCode& status)
521 : LocaleKeyFactory(delegate->visible() ? VISIBLE : INVISIBLE)
522 , _delegate(delegate)
523 , _ids(NULL)
524 {
374ca955
A
525 if (U_SUCCESS(status)) {
526 int32_t count = 0;
527 _ids = new Hashtable(status);
528 if (_ids) {
529 const UnicodeString * idlist = _delegate->getSupportedIDs(count, status);
530 for (int i = 0; i < count; ++i) {
531 _ids->put(idlist[i], (void*)this, status);
532 if (U_FAILURE(status)) {
533 delete _ids;
534 _ids = NULL;
535 return;
536 }
537 }
538 } else {
539 status = U_MEMORY_ALLOCATION_ERROR;
540 }
b75a7d8f
A
541 }
542 }
374ca955 543
b75a7d8f
A
544 virtual ~CFactory()
545 {
546 delete _delegate;
547 delete _ids;
548 }
374ca955 549
b75a7d8f 550 virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
374ca955
A
551
552protected:
b75a7d8f
A
553 virtual const Hashtable* getSupportedIDs(UErrorCode& status) const
554 {
555 if (U_SUCCESS(status)) {
556 return _ids;
557 }
558 return NULL;
559 }
374ca955 560
b75a7d8f
A
561 virtual UnicodeString&
562 getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const;
563};
564
565UObject*
566CFactory::create(const ICUServiceKey& key, const ICUService* /* service */, UErrorCode& status) const
567{
568 if (handlesKey(key, status)) {
569 const LocaleKey& lkey = (const LocaleKey&)key;
570 Locale validLoc;
571 lkey.currentLocale(validLoc);
374ca955 572 return _delegate->createCollator(validLoc);
b75a7d8f
A
573 }
574 return NULL;
575}
576
577UnicodeString&
578CFactory::getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const
579{
580 if ((_coverage & 0x1) == 0) {
581 UErrorCode status = U_ZERO_ERROR;
582 const Hashtable* ids = getSupportedIDs(status);
583 if (ids && (ids->get(id) != NULL)) {
584 Locale loc;
585 LocaleUtility::initLocaleFromName(id, loc);
586 return _delegate->getDisplayName(loc, locale, result);
587 }
588 }
589 result.setToBogus();
590 return result;
591}
592
374ca955 593URegistryKey U_EXPORT2
b75a7d8f
A
594Collator::registerFactory(CollatorFactory* toAdopt, UErrorCode& status)
595{
596 if (U_SUCCESS(status)) {
597 CFactory* f = new CFactory(toAdopt, status);
598 if (f) {
599 return getService()->registerFactory(f, status);
600 }
601 status = U_MEMORY_ALLOCATION_ERROR;
602 }
603 return NULL;
604}
605
606// -------------------------------------
607
374ca955 608UBool U_EXPORT2
b75a7d8f
A
609Collator::unregister(URegistryKey key, UErrorCode& status)
610{
374ca955
A
611 if (U_SUCCESS(status)) {
612 if (hasService()) {
613 return gService->unregister(key, status);
614 }
615 status = U_ILLEGAL_ARGUMENT_ERROR;
b75a7d8f 616 }
374ca955 617 return FALSE;
b75a7d8f
A
618}
619
620// -------------------------------------
621
374ca955 622StringEnumeration* U_EXPORT2
b75a7d8f
A
623Collator::getAvailableLocales(void)
624{
374ca955
A
625 return getService()->getAvailableLocales();
626}
627#endif /* UCONFIG_NO_SERVICE */
628
629StringEnumeration* U_EXPORT2
630Collator::getKeywords(UErrorCode& status) {
631 // This is a wrapper over ucol_getKeywords
632 UEnumeration* uenum = ucol_getKeywords(&status);
633 if (U_FAILURE(status)) {
634 uenum_close(uenum);
635 return NULL;
636 }
637 return new UStringEnumeration(uenum);
638}
639
640StringEnumeration* U_EXPORT2
641Collator::getKeywordValues(const char *keyword, UErrorCode& status) {
642 // This is a wrapper over ucol_getKeywordValues
643 UEnumeration* uenum = ucol_getKeywordValues(keyword, &status);
644 if (U_FAILURE(status)) {
645 uenum_close(uenum);
646 return NULL;
647 }
648 return new UStringEnumeration(uenum);
649}
650
651Locale U_EXPORT2
652Collator::getFunctionalEquivalent(const char* keyword, const Locale& locale,
653 UBool& isAvailable, UErrorCode& status) {
654 // This is a wrapper over ucol_getFunctionalEquivalent
655 char loc[ULOC_FULLNAME_CAPACITY];
656 /*int32_t len =*/ ucol_getFunctionalEquivalent(loc, sizeof(loc),
657 keyword, locale.getName(), &isAvailable, &status);
658 if (U_FAILURE(status)) {
659 *loc = 0; // root
660 }
661 return Locale::createFromName(loc);
b75a7d8f
A
662}
663
664// UCollator private data members ----------------------------------------
665
666/* This is useless information */
667/*const UVersionInfo Collator::fVersion = {1, 1, 0, 0};*/
668
669// -------------------------------------
670
671U_NAMESPACE_END
672
b75a7d8f
A
673#endif /* #if !UCONFIG_NO_COLLATION */
674
675/* eof */