]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/coll.cpp
ICU-59131.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / coll.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f 3/*
46f4442e 4 ******************************************************************************
57a6839d 5 * Copyright (C) 1996-2014, International Business Machines Corporation and
729e4ab9 6 * others. All Rights Reserved.
46f4442e
A
7 ******************************************************************************
8 */
b75a7d8f
A
9
10/**
46f4442e
A
11 * File coll.cpp
12 *
13 * Created by: Helena Shih
14 *
15 * Modification History:
16 *
17 * Date Name Description
18 * 2/5/97 aliu Modified createDefault to load collation data from
19 * binary files when possible. Added related methods
20 * createCollationFromFile, chopLocale, createPathName.
21 * 2/11/97 aliu Added methods addToCache, findInCache, which implement
22 * a Collation cache. Modified createDefault to look in
23 * cache first, and also to store newly created Collation
24 * objects in the cache. Modified to not use gLocPath.
25 * 2/12/97 aliu Modified to create objects from RuleBasedCollator cache.
26 * Moved cache out of Collation class.
27 * 2/13/97 aliu Moved several methods out of this class and into
28 * RuleBasedCollator, with modifications. Modified
29 * createDefault() to call new RuleBasedCollator(Locale&)
30 * constructor. General clean up and documentation.
31 * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy
32 * constructor.
33 * 05/06/97 helena Added memory allocation error detection.
34 * 05/08/97 helena Added createInstance().
35 * 6/20/97 helena Java class name change.
36 * 04/23/99 stephen Removed EDecompositionMode, merged with
37 * Normalizer::EMode
38 * 11/23/9 srl Inlining of some critical functions
39 * 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h)
57a6839d 40 * 2012-2014 markus Rewritten in C++ again.
46f4442e 41 */
b75a7d8f 42
57a6839d 43#include "utypeinfo.h" // for 'typeid' to work
51004dcb 44
b75a7d8f
A
45#include "unicode/utypes.h"
46
47#if !UCONFIG_NO_COLLATION
48
49#include "unicode/coll.h"
50#include "unicode/tblcoll.h"
57a6839d
A
51#include "collationdata.h"
52#include "collationroot.h"
53#include "collationtailoring.h"
374ca955 54#include "ucol_imp.h"
46f4442e 55#include "cstring.h"
b75a7d8f 56#include "cmemory.h"
46f4442e 57#include "umutex.h"
73c04bcf 58#include "servloc.h"
57a6839d 59#include "uassert.h"
374ca955 60#include "ustrenum.h"
46f4442e 61#include "uresimp.h"
b75a7d8f
A
62#include "ucln_in.h"
63
4388f060 64static icu::Locale* availableLocaleList = NULL;
46f4442e 65static int32_t availableLocaleListCount;
4388f060 66static icu::ICULocaleService* gService = NULL;
57a6839d
A
67static icu::UInitOnce gServiceInitOnce = U_INITONCE_INITIALIZER;
68static icu::UInitOnce gAvailableLocaleListInitOnce;
374ca955 69
374ca955
A
70/**
71 * Release all static memory held by collator.
72 */
73U_CDECL_BEGIN
74static UBool U_CALLCONV collator_cleanup(void) {
46f4442e 75#if !UCONFIG_NO_SERVICE
374ca955
A
76 if (gService) {
77 delete gService;
78 gService = NULL;
79 }
57a6839d 80 gServiceInitOnce.reset();
46f4442e
A
81#endif
82 if (availableLocaleList) {
83 delete []availableLocaleList;
84 availableLocaleList = NULL;
85 }
86 availableLocaleListCount = 0;
57a6839d 87 gAvailableLocaleListInitOnce.reset();
374ca955
A
88 return TRUE;
89}
46f4442e 90
374ca955
A
91U_CDECL_END
92
b75a7d8f
A
93U_NAMESPACE_BEGIN
94
46f4442e
A
95#if !UCONFIG_NO_SERVICE
96
b75a7d8f
A
97// ------------------------------------------
98//
99// Registration
100//
101
102//-------------------------------------------
103
374ca955
A
104CollatorFactory::~CollatorFactory() {}
105
106//-------------------------------------------
107
b75a7d8f
A
108UBool
109CollatorFactory::visible(void) const {
374ca955 110 return TRUE;
b75a7d8f
A
111}
112
113//-------------------------------------------
114
115UnicodeString&
116CollatorFactory::getDisplayName(const Locale& objectLocale,
117 const Locale& displayLocale,
118 UnicodeString& result)
119{
120 return objectLocale.getDisplayName(displayLocale, result);
121}
122
123// -------------------------------------
124
125class ICUCollatorFactory : public ICUResourceBundleFactory {
374ca955 126 public:
4388f060
A
127 ICUCollatorFactory() : ICUResourceBundleFactory(UnicodeString(U_ICUDATA_COLL, -1, US_INV)) { }
128 virtual ~ICUCollatorFactory();
374ca955
A
129 protected:
130 virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
b75a7d8f
A
131};
132
4388f060
A
133ICUCollatorFactory::~ICUCollatorFactory() {}
134
b75a7d8f
A
135UObject*
136ICUCollatorFactory::create(const ICUServiceKey& key, const ICUService* /* service */, UErrorCode& status) const {
137 if (handlesKey(key, status)) {
138 const LocaleKey& lkey = (const LocaleKey&)key;
139 Locale loc;
140 // make sure the requested locale is correct
141 // default LocaleFactory uses currentLocale since that's the one vetted by handlesKey
142 // but for ICU rb resources we use the actual one since it will fallback again
143 lkey.canonicalLocale(loc);
374ca955 144
b75a7d8f
A
145 return Collator::makeInstance(loc, status);
146 }
147 return NULL;
148}
149
150// -------------------------------------
151
152class ICUCollatorService : public ICULocaleService {
374ca955 153public:
b75a7d8f 154 ICUCollatorService()
73c04bcf 155 : ICULocaleService(UNICODE_STRING_SIMPLE("Collator"))
b75a7d8f
A
156 {
157 UErrorCode status = U_ZERO_ERROR;
158 registerFactory(new ICUCollatorFactory(), status);
159 }
4388f060
A
160
161 virtual ~ICUCollatorService();
162
b75a7d8f
A
163 virtual UObject* cloneInstance(UObject* instance) const {
164 return ((Collator*)instance)->clone();
165 }
374ca955 166
b75a7d8f
A
167 virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* actualID, UErrorCode& status) const {
168 LocaleKey& lkey = (LocaleKey&)key;
374ca955
A
169 if (actualID) {
170 // Ugly Hack Alert! We return an empty actualID to signal
171 // to callers that this is a default object, not a "real"
172 // service-created object. (TODO remove in 3.0) [aliu]
173 actualID->truncate(0);
174 }
175 Locale loc("");
b75a7d8f
A
176 lkey.canonicalLocale(loc);
177 return Collator::makeInstance(loc, status);
178 }
374ca955
A
179
180 virtual UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const {
181 UnicodeString ar;
182 if (actualReturn == NULL) {
183 actualReturn = &ar;
184 }
57a6839d 185 return (Collator*)ICULocaleService::getKey(key, actualReturn, status);
374ca955 186 }
b75a7d8f
A
187
188 virtual UBool isDefault() const {
189 return countFactories() == 1;
190 }
191};
192
4388f060
A
193ICUCollatorService::~ICUCollatorService() {}
194
b75a7d8f
A
195// -------------------------------------
196
57a6839d
A
197static void U_CALLCONV initService() {
198 gService = new ICUCollatorService();
199 ucln_i18n_registerCleanup(UCLN_I18N_COLLATOR, collator_cleanup);
200}
201
202
b75a7d8f
A
203static ICULocaleService*
204getService(void)
205{
57a6839d 206 umtx_initOnce(gServiceInitOnce, &initService);
374ca955 207 return gService;
b75a7d8f
A
208}
209
210// -------------------------------------
211
46f4442e 212static inline UBool
b75a7d8f
A
213hasService(void)
214{
57a6839d 215 UBool retVal = !gServiceInitOnce.isReset() && (getService() != NULL);
46f4442e 216 return retVal;
b75a7d8f
A
217}
218
374ca955 219#endif /* UCONFIG_NO_SERVICE */
b75a7d8f 220
57a6839d
A
221static void U_CALLCONV
222initAvailableLocaleList(UErrorCode &status) {
223 U_ASSERT(availableLocaleListCount == 0);
224 U_ASSERT(availableLocaleList == NULL);
46f4442e 225 // for now, there is a hardcoded list, so just walk through that list and set it up.
57a6839d
A
226 UResourceBundle *index = NULL;
227 UResourceBundle installed;
228 int32_t i = 0;
229
230 ures_initStackObject(&installed);
231 index = ures_openDirect(U_ICUDATA_COLL, "res_index", &status);
232 ures_getByKey(index, "InstalledLocales", &installed, &status);
233
234 if(U_SUCCESS(status)) {
235 availableLocaleListCount = ures_getSize(&installed);
236 availableLocaleList = new Locale[availableLocaleListCount];
46f4442e 237
57a6839d
A
238 if (availableLocaleList != NULL) {
239 ures_resetIterator(&installed);
240 while(ures_hasNext(&installed)) {
241 const char *tempKey = NULL;
242 ures_getNextString(&installed, NULL, &tempKey, &status);
243 availableLocaleList[i++] = Locale(tempKey);
46f4442e 244 }
46f4442e 245 }
57a6839d
A
246 U_ASSERT(availableLocaleListCount == i);
247 ures_close(&installed);
46f4442e 248 }
57a6839d
A
249 ures_close(index);
250 ucln_i18n_registerCleanup(UCLN_I18N_COLLATOR, collator_cleanup);
46f4442e
A
251}
252
57a6839d
A
253static UBool isAvailableLocaleListInitialized(UErrorCode &status) {
254 umtx_initOnce(gAvailableLocaleListInitOnce, &initAvailableLocaleList, status);
255 return U_SUCCESS(status);
256}
257
258
b75a7d8f
A
259// Collator public methods -----------------------------------------------
260
b331163b
A
261namespace {
262
263static const struct {
264 const char *name;
265 UColAttribute attr;
266} collAttributes[] = {
267 { "colStrength", UCOL_STRENGTH },
268 { "colBackwards", UCOL_FRENCH_COLLATION },
269 { "colCaseLevel", UCOL_CASE_LEVEL },
270 { "colCaseFirst", UCOL_CASE_FIRST },
271 { "colAlternate", UCOL_ALTERNATE_HANDLING },
272 { "colNormalization", UCOL_NORMALIZATION_MODE },
273 { "colNumeric", UCOL_NUMERIC_COLLATION }
274};
275
276static const struct {
277 const char *name;
278 UColAttributeValue value;
279} collAttributeValues[] = {
280 { "primary", UCOL_PRIMARY },
281 { "secondary", UCOL_SECONDARY },
282 { "tertiary", UCOL_TERTIARY },
283 { "quaternary", UCOL_QUATERNARY },
284 // Note: Not supporting typo "quarternary" because it was never supported in locale IDs.
285 { "identical", UCOL_IDENTICAL },
286 { "no", UCOL_OFF },
287 { "yes", UCOL_ON },
288 { "shifted", UCOL_SHIFTED },
289 { "non-ignorable", UCOL_NON_IGNORABLE },
290 { "lower", UCOL_LOWER_FIRST },
291 { "upper", UCOL_UPPER_FIRST }
292};
293
294static const char *collReorderCodes[UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST] = {
295 "space", "punct", "symbol", "currency", "digit"
296};
297
298int32_t getReorderCode(const char *s) {
299 for (int32_t i = 0; i < UPRV_LENGTHOF(collReorderCodes); ++i) {
300 if (uprv_stricmp(s, collReorderCodes[i]) == 0) {
301 return UCOL_REORDER_CODE_FIRST + i;
302 }
303 }
304 // Not supporting "others" = UCOL_REORDER_CODE_OTHERS
305 // as a synonym for Zzzz = USCRIPT_UNKNOWN for now:
306 // Avoid introducing synonyms/aliases.
307 return -1;
308}
309
310/**
311 * Sets collation attributes according to locale keywords. See
312 * http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Settings
313 *
314 * Using "alias" keywords and values where defined:
315 * http://www.unicode.org/reports/tr35/tr35.html#Old_Locale_Extension_Syntax
316 * http://unicode.org/repos/cldr/trunk/common/bcp47/collation.xml
317 */
318void setAttributesFromKeywords(const Locale &loc, Collator &coll, UErrorCode &errorCode) {
319 if (U_FAILURE(errorCode)) {
320 return;
321 }
322 if (uprv_strcmp(loc.getName(), loc.getBaseName()) == 0) {
323 // No keywords.
324 return;
325 }
326 char value[1024]; // The reordering value could be long.
327 // Check for collation keywords that were already deprecated
328 // before any were supported in createInstance() (except for "collation").
329 int32_t length = loc.getKeywordValue("colHiraganaQuaternary", value, UPRV_LENGTHOF(value), errorCode);
330 if (U_FAILURE(errorCode)) {
331 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
332 return;
333 }
334 if (length != 0) {
335 errorCode = U_UNSUPPORTED_ERROR;
336 return;
337 }
338 length = loc.getKeywordValue("variableTop", value, UPRV_LENGTHOF(value), errorCode);
339 if (U_FAILURE(errorCode)) {
340 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
341 return;
342 }
343 if (length != 0) {
344 errorCode = U_UNSUPPORTED_ERROR;
345 return;
346 }
347 // Parse known collation keywords, ignore others.
348 if (errorCode == U_STRING_NOT_TERMINATED_WARNING) {
349 errorCode = U_ZERO_ERROR;
350 }
351 for (int32_t i = 0; i < UPRV_LENGTHOF(collAttributes); ++i) {
352 length = loc.getKeywordValue(collAttributes[i].name, value, UPRV_LENGTHOF(value), errorCode);
353 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
354 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
355 return;
356 }
357 if (length == 0) { continue; }
358 for (int32_t j = 0;; ++j) {
359 if (j == UPRV_LENGTHOF(collAttributeValues)) {
360 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
361 return;
362 }
363 if (uprv_stricmp(value, collAttributeValues[j].name) == 0) {
364 coll.setAttribute(collAttributes[i].attr, collAttributeValues[j].value, errorCode);
365 break;
366 }
367 }
368 }
369 length = loc.getKeywordValue("colReorder", value, UPRV_LENGTHOF(value), errorCode);
370 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
371 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
372 return;
373 }
374 if (length != 0) {
375 int32_t codes[USCRIPT_CODE_LIMIT + UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST];
376 int32_t codesLength = 0;
377 char *scriptName = value;
378 for (;;) {
379 if (codesLength == UPRV_LENGTHOF(codes)) {
380 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
381 return;
382 }
383 char *limit = scriptName;
384 char c;
385 while ((c = *limit) != 0 && c != '-') { ++limit; }
386 *limit = 0;
387 int32_t code;
388 if ((limit - scriptName) == 4) {
389 // Strict parsing, accept only 4-letter script codes, not long names.
390 code = u_getPropertyValueEnum(UCHAR_SCRIPT, scriptName);
391 } else {
392 code = getReorderCode(scriptName);
393 }
394 if (code < 0) {
395 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
396 return;
397 }
398 codes[codesLength++] = code;
399 if (c == 0) { break; }
400 scriptName = limit + 1;
401 }
402 coll.setReorderCodes(codes, codesLength, errorCode);
403 }
404 length = loc.getKeywordValue("kv", value, UPRV_LENGTHOF(value), errorCode);
405 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
406 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
407 return;
408 }
409 if (length != 0) {
410 int32_t code = getReorderCode(value);
411 if (code < 0) {
412 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
413 return;
414 }
415 coll.setMaxVariable((UColReorderCode)code, errorCode);
416 }
417 if (U_FAILURE(errorCode)) {
418 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
419 }
420}
421
422} // namespace
423
374ca955 424Collator* U_EXPORT2 Collator::createInstance(UErrorCode& success)
b75a7d8f 425{
374ca955 426 return createInstance(Locale::getDefault(), success);
b75a7d8f
A
427}
428
374ca955 429Collator* U_EXPORT2 Collator::createInstance(const Locale& desiredLocale,
b75a7d8f
A
430 UErrorCode& status)
431{
374ca955
A
432 if (U_FAILURE(status))
433 return 0;
b331163b
A
434 if (desiredLocale.isBogus()) {
435 // Locale constructed from malformed locale ID or language tag.
436 status = U_ILLEGAL_ARGUMENT_ERROR;
437 return NULL;
438 }
439
440 Collator* coll;
374ca955
A
441#if !UCONFIG_NO_SERVICE
442 if (hasService()) {
443 Locale actualLoc;
b331163b
A
444 coll = (Collator*)gService->get(desiredLocale, &actualLoc, status);
445 } else
374ca955 446#endif
b331163b
A
447 {
448 coll = makeInstance(desiredLocale, status);
449 }
450 setAttributesFromKeywords(desiredLocale, *coll, status);
451 if (U_FAILURE(status)) {
452 delete coll;
453 return NULL;
454 }
455 return coll;
b75a7d8f
A
456}
457
458
b331163b
A
459Collator* Collator::makeInstance(const Locale& desiredLocale, UErrorCode& status) {
460 const CollationCacheEntry *entry = CollationLoader::loadTailoring(desiredLocale, status);
57a6839d 461 if (U_SUCCESS(status)) {
b331163b 462 Collator *result = new RuleBasedCollator(entry);
57a6839d 463 if (result != NULL) {
b331163b
A
464 // Both the unified cache's get() and the RBC constructor
465 // did addRef(). Undo one of them.
466 entry->removeRef();
57a6839d
A
467 return result;
468 }
374ca955 469 status = U_MEMORY_ALLOCATION_ERROR;
b75a7d8f 470 }
b331163b
A
471 if (entry != NULL) {
472 // Undo the addRef() from the cache.get().
473 entry->removeRef();
374ca955 474 }
57a6839d 475 return NULL;
b75a7d8f
A
476}
477
51004dcb
A
478Collator *
479Collator::safeClone() const {
480 return clone();
481}
482
b75a7d8f
A
483// implement deprecated, previously abstract method
484Collator::EComparisonResult Collator::compare(const UnicodeString& source,
485 const UnicodeString& target) const
486{
374ca955 487 UErrorCode ec = U_ZERO_ERROR;
51004dcb 488 return (EComparisonResult)compare(source, target, ec);
b75a7d8f
A
489}
490
491// implement deprecated, previously abstract method
492Collator::EComparisonResult Collator::compare(const UnicodeString& source,
493 const UnicodeString& target,
494 int32_t length) const
495{
374ca955 496 UErrorCode ec = U_ZERO_ERROR;
51004dcb 497 return (EComparisonResult)compare(source, target, length, ec);
b75a7d8f
A
498}
499
500// implement deprecated, previously abstract method
501Collator::EComparisonResult Collator::compare(const UChar* source, int32_t sourceLength,
502 const UChar* target, int32_t targetLength)
503 const
504{
374ca955 505 UErrorCode ec = U_ZERO_ERROR;
51004dcb 506 return (EComparisonResult)compare(source, sourceLength, target, targetLength, ec);
b75a7d8f
A
507}
508
729e4ab9
A
509UCollationResult Collator::compare(UCharIterator &/*sIter*/,
510 UCharIterator &/*tIter*/,
511 UErrorCode &status) const {
512 if(U_SUCCESS(status)) {
513 // Not implemented in the base class.
514 status = U_UNSUPPORTED_ERROR;
515 }
516 return UCOL_EQUAL;
517}
518
519UCollationResult Collator::compareUTF8(const StringPiece &source,
520 const StringPiece &target,
521 UErrorCode &status) const {
522 if(U_FAILURE(status)) {
523 return UCOL_EQUAL;
524 }
525 UCharIterator sIter, tIter;
526 uiter_setUTF8(&sIter, source.data(), source.length());
527 uiter_setUTF8(&tIter, target.data(), target.length());
528 return compare(sIter, tIter, status);
529}
530
b75a7d8f 531UBool Collator::equals(const UnicodeString& source,
374ca955 532 const UnicodeString& target) const
b75a7d8f 533{
374ca955
A
534 UErrorCode ec = U_ZERO_ERROR;
535 return (compare(source, target, ec) == UCOL_EQUAL);
b75a7d8f
A
536}
537
538UBool Collator::greaterOrEqual(const UnicodeString& source,
374ca955 539 const UnicodeString& target) const
b75a7d8f 540{
374ca955
A
541 UErrorCode ec = U_ZERO_ERROR;
542 return (compare(source, target, ec) != UCOL_LESS);
b75a7d8f
A
543}
544
545UBool Collator::greater(const UnicodeString& source,
374ca955 546 const UnicodeString& target) const
b75a7d8f 547{
374ca955
A
548 UErrorCode ec = U_ZERO_ERROR;
549 return (compare(source, target, ec) == UCOL_GREATER);
b75a7d8f
A
550}
551
552// this API ignores registered collators, since it returns an
553// array of indefinite lifetime
374ca955 554const Locale* U_EXPORT2 Collator::getAvailableLocales(int32_t& count)
b75a7d8f 555{
46f4442e
A
556 UErrorCode status = U_ZERO_ERROR;
557 Locale *result = NULL;
558 count = 0;
559 if (isAvailableLocaleListInitialized(status))
560 {
561 result = availableLocaleList;
562 count = availableLocaleListCount;
563 }
564 return result;
b75a7d8f
A
565}
566
374ca955
A
567UnicodeString& U_EXPORT2 Collator::getDisplayName(const Locale& objectLocale,
568 const Locale& displayLocale,
569 UnicodeString& name)
b75a7d8f 570{
374ca955
A
571#if !UCONFIG_NO_SERVICE
572 if (hasService()) {
73c04bcf
A
573 UnicodeString locNameStr;
574 LocaleUtility::initNameFromLocale(objectLocale, locNameStr);
575 return gService->getDisplayName(locNameStr, name, displayLocale);
374ca955
A
576 }
577#endif
578 return objectLocale.getDisplayName(displayLocale, name);
b75a7d8f
A
579}
580
374ca955
A
581UnicodeString& U_EXPORT2 Collator::getDisplayName(const Locale& objectLocale,
582 UnicodeString& name)
b75a7d8f 583{
374ca955 584 return getDisplayName(objectLocale, Locale::getDefault(), name);
b75a7d8f
A
585}
586
587/* This is useless information */
588/*void Collator::getVersion(UVersionInfo versionInfo) const
589{
590 if (versionInfo!=NULL)
591 uprv_memcpy(versionInfo, fVersion, U_MAX_VERSION_LENGTH);
592}
593*/
594
595// UCollator protected constructor destructor ----------------------------
596
597/**
598* Default constructor.
599* Constructor is different from the old default Collator constructor.
600* The task for determing the default collation strength and normalization mode
601* is left to the child class.
602*/
603Collator::Collator()
374ca955 604: UObject()
b75a7d8f
A
605{
606}
607
608/**
609* Constructor.
610* Empty constructor, does not handle the arguments.
611* This constructor is done for backward compatibility with 1.7 and 1.8.
612* The task for handling the argument collation strength and normalization
613* mode is left to the child class.
614* @param collationStrength collation strength
615* @param decompositionMode
616* @deprecated 2.4 use the default constructor instead
617*/
618Collator::Collator(UCollationStrength, UNormalizationMode )
374ca955 619: UObject()
b75a7d8f
A
620{
621}
622
623Collator::~Collator()
624{
625}
626
627Collator::Collator(const Collator &other)
628 : UObject(other)
629{
630}
631
374ca955
A
632UBool Collator::operator==(const Collator& other) const
633{
51004dcb
A
634 // Subclasses: Call this method and then add more specific checks.
635 return typeid(*this) == typeid(other);
374ca955
A
636}
637
638UBool Collator::operator!=(const Collator& other) const
639{
640 return (UBool)!(*this == other);
641}
642
643int32_t U_EXPORT2 Collator::getBound(const uint8_t *source,
644 int32_t sourceLength,
645 UColBoundMode boundType,
646 uint32_t noOfLevels,
647 uint8_t *result,
648 int32_t resultLength,
649 UErrorCode &status)
650{
651 return ucol_getBound(source, sourceLength, boundType, noOfLevels, result, resultLength, &status);
b75a7d8f
A
652}
653
654void
46f4442e 655Collator::setLocales(const Locale& /* requestedLocale */, const Locale& /* validLocale */, const Locale& /*actualLocale*/) {
b75a7d8f
A
656}
657
374ca955
A
658UnicodeSet *Collator::getTailoredSet(UErrorCode &status) const
659{
660 if(U_FAILURE(status)) {
661 return NULL;
662 }
663 // everything can be changed
664 return new UnicodeSet(0, 0x10FFFF);
665}
666
b75a7d8f
A
667// -------------------------------------
668
374ca955
A
669#if !UCONFIG_NO_SERVICE
670URegistryKey U_EXPORT2
b75a7d8f
A
671Collator::registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status)
672{
673 if (U_SUCCESS(status)) {
57a6839d
A
674 // Set the collator locales while registering so that createInstance()
675 // need not guess whether the collator's locales are already set properly
676 // (as they are by the data loader).
677 toAdopt->setLocales(locale, locale, locale);
b75a7d8f
A
678 return getService()->registerInstance(toAdopt, locale, status);
679 }
680 return NULL;
681}
682
683// -------------------------------------
684
685class CFactory : public LocaleKeyFactory {
686private:
687 CollatorFactory* _delegate;
688 Hashtable* _ids;
374ca955 689
b75a7d8f
A
690public:
691 CFactory(CollatorFactory* delegate, UErrorCode& status)
692 : LocaleKeyFactory(delegate->visible() ? VISIBLE : INVISIBLE)
693 , _delegate(delegate)
694 , _ids(NULL)
695 {
374ca955
A
696 if (U_SUCCESS(status)) {
697 int32_t count = 0;
698 _ids = new Hashtable(status);
699 if (_ids) {
700 const UnicodeString * idlist = _delegate->getSupportedIDs(count, status);
701 for (int i = 0; i < count; ++i) {
702 _ids->put(idlist[i], (void*)this, status);
703 if (U_FAILURE(status)) {
704 delete _ids;
705 _ids = NULL;
706 return;
707 }
708 }
709 } else {
710 status = U_MEMORY_ALLOCATION_ERROR;
711 }
b75a7d8f
A
712 }
713 }
4388f060
A
714
715 virtual ~CFactory();
716
b75a7d8f 717 virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
374ca955
A
718
719protected:
b75a7d8f
A
720 virtual const Hashtable* getSupportedIDs(UErrorCode& status) const
721 {
722 if (U_SUCCESS(status)) {
723 return _ids;
724 }
725 return NULL;
726 }
374ca955 727
b75a7d8f
A
728 virtual UnicodeString&
729 getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const;
730};
731
4388f060
A
732CFactory::~CFactory()
733{
734 delete _delegate;
735 delete _ids;
736}
737
b75a7d8f
A
738UObject*
739CFactory::create(const ICUServiceKey& key, const ICUService* /* service */, UErrorCode& status) const
740{
741 if (handlesKey(key, status)) {
742 const LocaleKey& lkey = (const LocaleKey&)key;
743 Locale validLoc;
744 lkey.currentLocale(validLoc);
374ca955 745 return _delegate->createCollator(validLoc);
b75a7d8f
A
746 }
747 return NULL;
748}
749
750UnicodeString&
751CFactory::getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const
752{
753 if ((_coverage & 0x1) == 0) {
754 UErrorCode status = U_ZERO_ERROR;
755 const Hashtable* ids = getSupportedIDs(status);
756 if (ids && (ids->get(id) != NULL)) {
757 Locale loc;
758 LocaleUtility::initLocaleFromName(id, loc);
759 return _delegate->getDisplayName(loc, locale, result);
760 }
761 }
762 result.setToBogus();
763 return result;
764}
765
374ca955 766URegistryKey U_EXPORT2
b75a7d8f
A
767Collator::registerFactory(CollatorFactory* toAdopt, UErrorCode& status)
768{
769 if (U_SUCCESS(status)) {
770 CFactory* f = new CFactory(toAdopt, status);
771 if (f) {
772 return getService()->registerFactory(f, status);
773 }
774 status = U_MEMORY_ALLOCATION_ERROR;
775 }
776 return NULL;
777}
778
779// -------------------------------------
780
374ca955 781UBool U_EXPORT2
b75a7d8f
A
782Collator::unregister(URegistryKey key, UErrorCode& status)
783{
374ca955
A
784 if (U_SUCCESS(status)) {
785 if (hasService()) {
786 return gService->unregister(key, status);
787 }
788 status = U_ILLEGAL_ARGUMENT_ERROR;
b75a7d8f 789 }
374ca955 790 return FALSE;
b75a7d8f 791}
46f4442e
A
792#endif /* UCONFIG_NO_SERVICE */
793
794class CollationLocaleListEnumeration : public StringEnumeration {
795private:
796 int32_t index;
797public:
798 static UClassID U_EXPORT2 getStaticClassID(void);
799 virtual UClassID getDynamicClassID(void) const;
800public:
801 CollationLocaleListEnumeration()
802 : index(0)
803 {
804 // The global variables should already be initialized.
805 //isAvailableLocaleListInitialized(status);
806 }
807
4388f060 808 virtual ~CollationLocaleListEnumeration();
46f4442e
A
809
810 virtual StringEnumeration * clone() const
811 {
812 CollationLocaleListEnumeration *result = new CollationLocaleListEnumeration();
813 if (result) {
814 result->index = index;
815 }
816 return result;
817 }
818
819 virtual int32_t count(UErrorCode &/*status*/) const {
820 return availableLocaleListCount;
821 }
822
823 virtual const char* next(int32_t* resultLength, UErrorCode& /*status*/) {
824 const char* result;
825 if(index < availableLocaleListCount) {
826 result = availableLocaleList[index++].getName();
827 if(resultLength != NULL) {
729e4ab9 828 *resultLength = (int32_t)uprv_strlen(result);
46f4442e
A
829 }
830 } else {
831 if(resultLength != NULL) {
832 *resultLength = 0;
833 }
834 result = NULL;
835 }
836 return result;
837 }
838
839 virtual const UnicodeString* snext(UErrorCode& status) {
840 int32_t resultLength = 0;
841 const char *s = next(&resultLength, status);
842 return setChars(s, resultLength, status);
843 }
844
845 virtual void reset(UErrorCode& /*status*/) {
846 index = 0;
847 }
848};
849
4388f060
A
850CollationLocaleListEnumeration::~CollationLocaleListEnumeration() {}
851
46f4442e
A
852UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationLocaleListEnumeration)
853
b75a7d8f
A
854
855// -------------------------------------
856
374ca955 857StringEnumeration* U_EXPORT2
b75a7d8f
A
858Collator::getAvailableLocales(void)
859{
46f4442e
A
860#if !UCONFIG_NO_SERVICE
861 if (hasService()) {
862 return getService()->getAvailableLocales();
863 }
374ca955 864#endif /* UCONFIG_NO_SERVICE */
46f4442e
A
865 UErrorCode status = U_ZERO_ERROR;
866 if (isAvailableLocaleListInitialized(status)) {
867 return new CollationLocaleListEnumeration();
868 }
869 return NULL;
870}
374ca955
A
871
872StringEnumeration* U_EXPORT2
873Collator::getKeywords(UErrorCode& status) {
b331163b
A
874 return UStringEnumeration::fromUEnumeration(
875 ucol_getKeywords(&status), status);
374ca955
A
876}
877
878StringEnumeration* U_EXPORT2
879Collator::getKeywordValues(const char *keyword, UErrorCode& status) {
b331163b
A
880 return UStringEnumeration::fromUEnumeration(
881 ucol_getKeywordValues(keyword, &status), status);
374ca955
A
882}
883
729e4ab9
A
884StringEnumeration* U_EXPORT2
885Collator::getKeywordValuesForLocale(const char* key, const Locale& locale,
886 UBool commonlyUsed, UErrorCode& status) {
b331163b
A
887 return UStringEnumeration::fromUEnumeration(
888 ucol_getKeywordValuesForLocale(
889 key, locale.getName(), commonlyUsed, &status),
890 status);
729e4ab9
A
891}
892
374ca955
A
893Locale U_EXPORT2
894Collator::getFunctionalEquivalent(const char* keyword, const Locale& locale,
895 UBool& isAvailable, UErrorCode& status) {
896 // This is a wrapper over ucol_getFunctionalEquivalent
897 char loc[ULOC_FULLNAME_CAPACITY];
898 /*int32_t len =*/ ucol_getFunctionalEquivalent(loc, sizeof(loc),
899 keyword, locale.getName(), &isAvailable, &status);
900 if (U_FAILURE(status)) {
901 *loc = 0; // root
902 }
903 return Locale::createFromName(loc);
b75a7d8f
A
904}
905
51004dcb
A
906Collator::ECollationStrength
907Collator::getStrength(void) const {
908 UErrorCode intStatus = U_ZERO_ERROR;
909 return (ECollationStrength)getAttribute(UCOL_STRENGTH, intStatus);
910}
911
912void
913Collator::setStrength(ECollationStrength newStrength) {
914 UErrorCode intStatus = U_ZERO_ERROR;
915 setAttribute(UCOL_STRENGTH, (UColAttributeValue)newStrength, intStatus);
916}
917
57a6839d
A
918Collator &
919Collator::setMaxVariable(UColReorderCode /*group*/, UErrorCode &errorCode) {
920 if (U_SUCCESS(errorCode)) {
921 errorCode = U_UNSUPPORTED_ERROR;
922 }
923 return *this;
924}
925
926UColReorderCode
927Collator::getMaxVariable() const {
928 return UCOL_REORDER_CODE_PUNCTUATION;
929}
930
51004dcb 931int32_t
4388f060
A
932Collator::getReorderCodes(int32_t* /* dest*/,
933 int32_t /* destCapacity*/,
934 UErrorCode& status) const
729e4ab9
A
935{
936 if (U_SUCCESS(status)) {
937 status = U_UNSUPPORTED_ERROR;
938 }
939 return 0;
940}
941
51004dcb 942void
4388f060
A
943Collator::setReorderCodes(const int32_t* /* reorderCodes */,
944 int32_t /* reorderCodesLength */,
945 UErrorCode& status)
729e4ab9
A
946{
947 if (U_SUCCESS(status)) {
948 status = U_UNSUPPORTED_ERROR;
949 }
950}
951
57a6839d
A
952int32_t
953Collator::getEquivalentReorderCodes(int32_t reorderCode,
954 int32_t *dest, int32_t capacity,
955 UErrorCode &errorCode) {
956 if(U_FAILURE(errorCode)) { return 0; }
957 if(capacity < 0 || (dest == NULL && capacity > 0)) {
958 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
959 return 0;
4388f060 960 }
57a6839d
A
961 const CollationData *baseData = CollationRoot::getData(errorCode);
962 if(U_FAILURE(errorCode)) { return 0; }
963 return baseData->getEquivalentScripts(reorderCode, dest, capacity, errorCode);
4388f060
A
964}
965
966int32_t
967Collator::internalGetShortDefinitionString(const char * /*locale*/,
968 char * /*buffer*/,
969 int32_t /*capacity*/,
970 UErrorCode &status) const {
971 if(U_SUCCESS(status)) {
972 status = U_UNSUPPORTED_ERROR; /* Shouldn't happen, internal function */
973 }
974 return 0;
975}
976
57a6839d
A
977UCollationResult
978Collator::internalCompareUTF8(const char *left, int32_t leftLength,
979 const char *right, int32_t rightLength,
980 UErrorCode &errorCode) const {
981 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
982 if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
983 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
984 return UCOL_EQUAL;
985 }
986 return compareUTF8(
987 StringPiece(left, (leftLength < 0) ? uprv_strlen(left) : leftLength),
988 StringPiece(right, (rightLength < 0) ? uprv_strlen(right) : rightLength),
989 errorCode);
990}
991
992int32_t
993Collator::internalNextSortKeyPart(UCharIterator * /*iter*/, uint32_t /*state*/[2],
994 uint8_t * /*dest*/, int32_t /*count*/, UErrorCode &errorCode) const {
995 if (U_SUCCESS(errorCode)) {
996 errorCode = U_UNSUPPORTED_ERROR;
997 }
998 return 0;
999}
1000
b75a7d8f
A
1001// UCollator private data members ----------------------------------------
1002
1003/* This is useless information */
1004/*const UVersionInfo Collator::fVersion = {1, 1, 0, 0};*/
1005
1006// -------------------------------------
1007
1008U_NAMESPACE_END
1009
b75a7d8f
A
1010#endif /* #if !UCONFIG_NO_COLLATION */
1011
1012/* eof */