]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/tblcoll.cpp
ICU-8.11.tar.gz
[apple/icu.git] / icuSources / i18n / tblcoll.cpp
CommitLineData
b75a7d8f
A
1/*
2******************************************************************************
73c04bcf 3* Copyright (C) 1996-2006, International Business Machines Corporation and *
b75a7d8f
A
4* others. All Rights Reserved. *
5******************************************************************************
6*/
7
8/**
9* File tblcoll.cpp
10*
11* Created by: Helena Shih
12*
13* Modification History:
14*
15* Date Name Description
16* 2/5/97 aliu Added streamIn and streamOut methods. Added
17* constructor which reads RuleBasedCollator object from
18* a binary file. Added writeToFile method which streams
19* RuleBasedCollator out to a binary file. The streamIn
20* and streamOut methods use istream and ostream objects
21* in binary mode.
22* 2/11/97 aliu Moved declarations out of for loop initializer.
23* Added Mac compatibility #ifdef for ios::nocreate.
24* 2/12/97 aliu Modified to use TableCollationData sub-object to
25* hold invariant data.
26* 2/13/97 aliu Moved several methods into this class from Collation.
27* Added a private RuleBasedCollator(Locale&) constructor,
28* to be used by Collator::getInstance(). General
29* clean up. Made use of UErrorCode variables consistent.
30* 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy
31* constructor and getDynamicClassID.
32* 3/5/97 aliu Changed compaction cycle to improve performance. We
33* use the maximum allowable value which is kBlockCount.
34* Modified getRules() to load rules dynamically. Changed
35* constructFromFile() call to accomodate this (added
36* parameter to specify whether binary loading is to
37* take place).
38* 05/06/97 helena Added memory allocation error check.
39* 6/20/97 helena Java class name change.
40* 6/23/97 helena Adding comments to make code more readable.
41* 09/03/97 helena Added createCollationKeyValues().
42* 06/26/98 erm Changes for CollationKeys using byte arrays.
43* 08/10/98 erm Synched with 1.2 version of RuleBasedCollator.java
44* 04/23/99 stephen Removed EDecompositionMode, merged with
45* Normalizer::EMode
46* 06/14/99 stephen Removed kResourceBundleSuffix
47* 06/22/99 stephen Fixed logic in constructFromFile() since .ctx
48* files are no longer used.
49* 11/02/99 helena Collator performance enhancements. Special case
50* for NO_OP situations.
51* 11/17/99 srl More performance enhancements. Inlined some internal functions.
52* 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator
53* to implementation file.
54* 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h)
55*/
56
57#include "unicode/utypes.h"
58
59#if !UCONFIG_NO_COLLATION
60
61#include "unicode/tblcoll.h"
62#include "unicode/coleitr.h"
374ca955 63#include "unicode/ures.h"
b75a7d8f
A
64#include "unicode/uset.h"
65#include "ucol_imp.h"
66#include "uresimp.h"
67#include "uhash.h"
68#include "cmemory.h"
69#include "cstring.h"
374ca955 70#include "putilimp.h"
b75a7d8f
A
71
72/* public RuleBasedCollator constructor ---------------------------------- */
73
74U_NAMESPACE_BEGIN
75
76/**
77* Copy constructor, aliasing, not write-through
78*/
374ca955
A
79RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that)
80: Collator(that)
81, dataIsOwned(FALSE)
82, isWriteThroughAlias(FALSE)
73c04bcf 83, ucollator(NULL)
b75a7d8f 84{
73c04bcf 85 RuleBasedCollator::operator=(that);
b75a7d8f
A
86}
87
88RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
374ca955
A
89 UErrorCode& status) :
90dataIsOwned(FALSE)
b75a7d8f 91{
374ca955
A
92 construct(rules,
93 UCOL_DEFAULT_STRENGTH,
94 UCOL_DEFAULT,
95 status);
b75a7d8f
A
96}
97
98RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
374ca955
A
99 ECollationStrength collationStrength,
100 UErrorCode& status) : dataIsOwned(FALSE)
b75a7d8f 101{
374ca955
A
102 construct(rules,
103 getUCollationStrength(collationStrength),
104 UCOL_DEFAULT,
105 status);
b75a7d8f
A
106}
107
108RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
109 UColAttributeValue decompositionMode,
110 UErrorCode& status) :
374ca955 111dataIsOwned(FALSE)
b75a7d8f 112{
374ca955
A
113 construct(rules,
114 UCOL_DEFAULT_STRENGTH,
115 decompositionMode,
116 status);
b75a7d8f
A
117}
118
119RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
374ca955
A
120 ECollationStrength collationStrength,
121 UColAttributeValue decompositionMode,
122 UErrorCode& status) : dataIsOwned(FALSE)
b75a7d8f 123{
374ca955
A
124 construct(rules,
125 getUCollationStrength(collationStrength),
126 decompositionMode,
127 status);
b75a7d8f 128}
73c04bcf
A
129RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
130 const RuleBasedCollator *base,
131 UErrorCode &status) :
132dataIsOwned(TRUE),
133isWriteThroughAlias(FALSE)
134{
135 ucollator = ucol_openBinary(bin, length, base->ucollator, &status);
136}
b75a7d8f
A
137
138void
73c04bcf 139RuleBasedCollator::setRuleStringFromCollator()
b75a7d8f 140{
73c04bcf
A
141 int32_t length;
142 const UChar *r = ucol_getRules(ucollator, &length);
374ca955 143
73c04bcf
A
144 if (r && length > 0) {
145 // alias the rules string
146 urulestring.setTo(TRUE, r, length);
147 }
148 else {
149 urulestring.truncate(0); // Clear string.
b75a7d8f 150 }
b75a7d8f
A
151}
152
153// not aliasing, not write-through
154void
155RuleBasedCollator::construct(const UnicodeString& rules,
156 UColAttributeValue collationStrength,
157 UColAttributeValue decompositionMode,
158 UErrorCode& status)
159{
374ca955
A
160 ucollator = ucol_openRules(rules.getBuffer(), rules.length(),
161 decompositionMode, collationStrength,
162 NULL, &status);
b75a7d8f 163
374ca955
A
164 dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it
165 isWriteThroughAlias = FALSE;
b75a7d8f 166
73c04bcf
A
167 if(ucollator == NULL) {
168 if(U_SUCCESS(status)) {
169 status = U_MEMORY_ALLOCATION_ERROR;
170 }
171 return; // Failure
172 }
173
174 setRuleStringFromCollator();
b75a7d8f
A
175}
176
177/* RuleBasedCollator public destructor ----------------------------------- */
178
179RuleBasedCollator::~RuleBasedCollator()
180{
374ca955
A
181 if (dataIsOwned)
182 {
183 ucol_close(ucollator);
374ca955
A
184 }
185 ucollator = 0;
b75a7d8f
A
186}
187
188/* RuleBaseCollator public methods --------------------------------------- */
189
190UBool RuleBasedCollator::operator==(const Collator& that) const
191{
192 /* only checks for address equals here */
193 if (Collator::operator==(that))
194 return TRUE;
195
196 if (getDynamicClassID() != that.getDynamicClassID())
197 return FALSE; /* not the same class */
198
199 RuleBasedCollator& thatAlias = (RuleBasedCollator&)that;
200
201 // weiv: use C function, commented code below is wrong
202 return ucol_equals(this->ucollator, thatAlias.ucollator);
203 /*
204 synwee : orginal code does not check for data compatibility
205 */
206 /*
207 if (ucollator != thatAlias.ucollator)
208 return FALSE;
209
210 return TRUE;
211 */
212}
213
374ca955
A
214UBool RuleBasedCollator::operator!=(const Collator& other) const
215{
216 return !(*this == other);
217}
218
b75a7d8f
A
219// aliasing, not write-through
220RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that)
221{
374ca955 222 if (this != &that)
b75a7d8f 223 {
374ca955
A
224 if (dataIsOwned)
225 {
226 ucol_close(ucollator);
374ca955 227 }
b75a7d8f 228
73c04bcf
A
229 urulestring.truncate(0); // empty the rule string
230 dataIsOwned = TRUE;
374ca955 231 isWriteThroughAlias = FALSE;
73c04bcf
A
232
233 UErrorCode intStatus = U_ZERO_ERROR;
234 int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
235 ucollator = ucol_safeClone(that.ucollator, NULL, &buffersize,
236 &intStatus);
237 if (U_SUCCESS(intStatus)) {
238 setRuleStringFromCollator();
239 }
374ca955
A
240 }
241 return *this;
b75a7d8f
A
242}
243
244// aliasing, not write-through
245Collator* RuleBasedCollator::clone() const
246{
73c04bcf 247 return new RuleBasedCollator(*this);
b75a7d8f
A
248}
249
250CollationElementIterator* RuleBasedCollator::createCollationElementIterator
251 (const UnicodeString& source) const
252{
374ca955
A
253 UErrorCode status = U_ZERO_ERROR;
254 CollationElementIterator *result = new CollationElementIterator(source, this,
255 status);
256 if (U_FAILURE(status)) {
257 delete result;
258 return NULL;
259 }
b75a7d8f 260
374ca955 261 return result;
b75a7d8f
A
262}
263
264/**
265* Create a CollationElementIterator object that will iterate over the
266* elements in a string, using the collation rules defined in this
267* RuleBasedCollator
268*/
269CollationElementIterator* RuleBasedCollator::createCollationElementIterator
270 (const CharacterIterator& source) const
271{
374ca955
A
272 UErrorCode status = U_ZERO_ERROR;
273 CollationElementIterator *result = new CollationElementIterator(source, this,
274 status);
b75a7d8f 275
374ca955
A
276 if (U_FAILURE(status)) {
277 delete result;
278 return NULL;
279 }
b75a7d8f 280
374ca955 281 return result;
b75a7d8f
A
282}
283
284/**
285* Return a string representation of this collator's rules. The string can
286* later be passed to the constructor that takes a UnicodeString argument,
287* which will construct a collator that's functionally identical to this one.
288* You can also allow users to edit the string in order to change the collation
289* data, or you can print it out for inspection, or whatever.
290*/
291const UnicodeString& RuleBasedCollator::getRules() const
292{
73c04bcf 293 return urulestring;
b75a7d8f
A
294}
295
296void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer)
297{
298 int32_t rulesize = ucol_getRulesEx(ucollator, delta, NULL, -1);
299
300 if (rulesize > 0) {
301 UChar *rules = (UChar*) uprv_malloc( sizeof(UChar) * (rulesize) );
302 if(rules != NULL) {
374ca955
A
303 ucol_getRulesEx(ucollator, delta, rules, rulesize);
304 buffer.setTo(rules, rulesize);
305 uprv_free(rules);
b75a7d8f 306 } else { // couldn't allocate
374ca955 307 buffer.remove();
b75a7d8f
A
308 }
309 }
310 else {
311 buffer.remove();
312 }
313}
314
315UnicodeSet *
316RuleBasedCollator::getTailoredSet(UErrorCode &status) const
317{
374ca955
A
318 if(U_FAILURE(status)) {
319 return NULL;
320 }
321 return (UnicodeSet *)ucol_getTailoredSet(this->ucollator, &status);
b75a7d8f
A
322}
323
324
325void RuleBasedCollator::getVersion(UVersionInfo versionInfo) const
326{
327 if (versionInfo!=NULL){
328 ucol_getVersion(ucollator, versionInfo);
329 }
330}
331
332Collator::EComparisonResult RuleBasedCollator::compare(
333 const UnicodeString& source,
334 const UnicodeString& target,
335 int32_t length) const
336{
374ca955
A
337 UErrorCode status = U_ZERO_ERROR;
338 return getEComparisonResult(compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status));
b75a7d8f
A
339}
340
341UCollationResult RuleBasedCollator::compare(
342 const UnicodeString& source,
343 const UnicodeString& target,
374ca955 344 int32_t length,
b75a7d8f
A
345 UErrorCode &status) const
346{
374ca955 347 return compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status);
b75a7d8f
A
348}
349
350Collator::EComparisonResult RuleBasedCollator::compare(const UChar* source,
351 int32_t sourceLength,
352 const UChar* target,
353 int32_t targetLength)
354 const
355{
374ca955
A
356 return getEComparisonResult(ucol_strcoll(ucollator, source, sourceLength,
357 target, targetLength));
b75a7d8f
A
358}
359
360UCollationResult RuleBasedCollator::compare(const UChar* source,
361 int32_t sourceLength,
362 const UChar* target,
374ca955 363 int32_t targetLength,
b75a7d8f
A
364 UErrorCode &status) const
365{
374ca955
A
366 if(U_SUCCESS(status)) {
367 return ucol_strcoll(ucollator, source, sourceLength, target, targetLength);
368 } else {
369 return UCOL_EQUAL;
370 }
b75a7d8f
A
371}
372
373/**
374* Compare two strings using this collator
375*/
376Collator::EComparisonResult RuleBasedCollator::compare(
377 const UnicodeString& source,
378 const UnicodeString& target) const
379{
374ca955
A
380 return getEComparisonResult(ucol_strcoll(ucollator, source.getBuffer(), source.length(),
381 target.getBuffer(), target.length()));
b75a7d8f
A
382}
383
384UCollationResult RuleBasedCollator::compare(
385 const UnicodeString& source,
374ca955 386 const UnicodeString& target,
b75a7d8f
A
387 UErrorCode &status) const
388{
374ca955
A
389 if(U_SUCCESS(status)) {
390 return ucol_strcoll(ucollator, source.getBuffer(), source.length(),
391 target.getBuffer(), target.length());
392 } else {
393 return UCOL_EQUAL;
394 }
b75a7d8f
A
395}
396
397/**
398* Retrieve a collation key for the specified string. The key can be compared
399* with other collation keys using a bitwise comparison (e.g. memcmp) to find
400* the ordering of their respective source strings. This is handy when doing a
401* sort, where each sort key must be compared many times.
402*
403* The basic algorithm here is to find all of the collation elements for each
404* character in the source string, convert them to an ASCII representation, and
405* put them into the collation key. But it's trickier than that. Each
406* collation element in a string has three components: primary ('A' vs 'B'),
374ca955 407* secondary ('u' vs '\u00FC'), and tertiary ('A' vs 'a'), and a primary difference
b75a7d8f
A
408* at the end of a string takes precedence over a secondary or tertiary
409* difference earlier in the string.
410*
411* To account for this, we put all of the primary orders at the beginning of
412* the string, followed by the secondary and tertiary orders. Each set of
413* orders is terminated by nulls so that a key for a string which is a initial
414* substring of another key will compare less without any special case.
415*
416* Here's a hypothetical example, with the collation element represented as a
417* three-digit number, one digit for primary, one for secondary, etc.
418*
374ca955 419* String: A a B \u00C9
b75a7d8f
A
420* Collation Elements: 101 100 201 511
421* Collation Key: 1125<null>0001<null>1011<null>
422*
423* To make things even trickier, secondary differences (accent marks) are
424* compared starting at the *end* of the string in languages with French
425* secondary ordering. But when comparing the accent marks on a single base
426* character, they are compared from the beginning. To handle this, we reverse
427* all of the accents that belong to each base character, then we reverse the
428* entire string of secondary orderings at the end.
429*/
430CollationKey& RuleBasedCollator::getCollationKey(
431 const UnicodeString& source,
432 CollationKey& sortkey,
433 UErrorCode& status) const
434{
374ca955 435 return getCollationKey(source.getBuffer(), source.length(), sortkey, status);
b75a7d8f
A
436}
437
438CollationKey& RuleBasedCollator::getCollationKey(const UChar* source,
439 int32_t sourceLen,
440 CollationKey& sortkey,
441 UErrorCode& status) const
442{
374ca955
A
443 if (U_FAILURE(status))
444 {
445 return sortkey.setToBogus();
446 }
b75a7d8f 447
374ca955
A
448 if ((!source) || (sourceLen == 0)) {
449 return sortkey.reset();
450 }
b75a7d8f 451
374ca955
A
452 uint8_t *result;
453 int32_t resultLen = ucol_getSortKeyWithAllocation(ucollator,
454 source, sourceLen,
455 &result,
456 &status);
457 sortkey.adopt(result, resultLen);
458 return sortkey;
b75a7d8f
A
459}
460
461/**
462 * Return the maximum length of any expansion sequences that end with the
463 * specified comparison order.
464 * @param order a collation order returned by previous or next.
465 * @return the maximum length of any expansion seuences ending with the
466 * specified order or 1 if collation order does not occur at the end of any
467 * expansion sequence.
468 * @see CollationElementIterator#getMaxExpansion
469 */
470int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const
471{
374ca955
A
472 uint8_t result;
473 UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result);
474 return result;
b75a7d8f
A
475}
476
477uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length,
478 UErrorCode &status)
479{
374ca955 480 return ucol_cloneRuleData(ucollator, &length, &status);
b75a7d8f
A
481}
482
73c04bcf
A
483
484int32_t RuleBasedCollator::cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status)
485{
486 return ucol_cloneBinary(ucollator, buffer, capacity, &status);
487}
488
b75a7d8f
A
489void RuleBasedCollator::setAttribute(UColAttribute attr,
490 UColAttributeValue value,
491 UErrorCode &status)
492{
374ca955
A
493 if (U_FAILURE(status))
494 return;
495 checkOwned();
496 ucol_setAttribute(ucollator, attr, value, &status);
b75a7d8f
A
497}
498
499UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr,
500 UErrorCode &status)
501{
374ca955
A
502 if (U_FAILURE(status))
503 return UCOL_DEFAULT;
504 return ucol_getAttribute(ucollator, attr, &status);
b75a7d8f
A
505}
506
507uint32_t RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) {
374ca955
A
508 checkOwned();
509 return ucol_setVariableTop(ucollator, varTop, len, &status);
b75a7d8f
A
510}
511
512uint32_t RuleBasedCollator::setVariableTop(const UnicodeString varTop, UErrorCode &status) {
374ca955
A
513 checkOwned();
514 return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &status);
b75a7d8f
A
515}
516
517void RuleBasedCollator::setVariableTop(const uint32_t varTop, UErrorCode &status) {
374ca955
A
518 checkOwned();
519 ucol_restoreVariableTop(ucollator, varTop, &status);
b75a7d8f
A
520}
521
522uint32_t RuleBasedCollator::getVariableTop(UErrorCode &status) const {
523 return ucol_getVariableTop(ucollator, &status);
524}
525
526Collator* RuleBasedCollator::safeClone(void)
527{
374ca955
A
528 UErrorCode intStatus = U_ZERO_ERROR;
529 int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
530 UCollator *ucol = ucol_safeClone(ucollator, NULL, &buffersize,
531 &intStatus);
532 if (U_FAILURE(intStatus)) {
533 return NULL;
534 }
b75a7d8f 535
73c04bcf
A
536 RuleBasedCollator *result = new RuleBasedCollator();
537 result->ucollator = ucol;
374ca955
A
538 result->dataIsOwned = TRUE;
539 result->isWriteThroughAlias = FALSE;
73c04bcf 540 setRuleStringFromCollator();
b75a7d8f 541
374ca955 542 return result;
b75a7d8f
A
543}
544
545
546int32_t RuleBasedCollator::getSortKey(const UnicodeString& source,
547 uint8_t *result, int32_t resultLength)
548 const
549{
374ca955 550 return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength);
b75a7d8f
A
551}
552
553int32_t RuleBasedCollator::getSortKey(const UChar *source,
554 int32_t sourceLength, uint8_t *result,
555 int32_t resultLength) const
556{
374ca955 557 return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength);
b75a7d8f
A
558}
559
560Collator::ECollationStrength RuleBasedCollator::getStrength(void) const
561{
374ca955
A
562 UErrorCode intStatus = U_ZERO_ERROR;
563 return getECollationStrength(ucol_getAttribute(ucollator, UCOL_STRENGTH,
564 &intStatus));
b75a7d8f
A
565}
566
567void RuleBasedCollator::setStrength(ECollationStrength newStrength)
568{
374ca955
A
569 checkOwned();
570 UErrorCode intStatus = U_ZERO_ERROR;
571 UCollationStrength strength = getUCollationStrength(newStrength);
572 ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus);
b75a7d8f
A
573}
574
575/**
576* Create a hash code for this collation. Just hash the main rule table -- that
577* should be good enough for almost any use.
578*/
579int32_t RuleBasedCollator::hashCode() const
580{
374ca955
A
581 int32_t length;
582 const UChar *rules = ucol_getRules(ucollator, &length);
583 return uhash_hashUCharsN(rules, length);
b75a7d8f
A
584}
585
586/**
587* return the locale of this collator
588*/
589const Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const {
374ca955
A
590 const char *result = ucol_getLocale(ucollator, type, &status);
591 if(result == NULL) {
592 Locale res("");
593 res.setToBogus();
594 return res;
595 } else {
596 return Locale(result);
597 }
b75a7d8f
A
598}
599
600void
601RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale) {
374ca955 602 checkOwned();
b75a7d8f
A
603 size_t rlen = uprv_strlen(requestedLocale.getName());
604 char* rloc = (char *)uprv_malloc((rlen+1)*sizeof(char));
605 if (rloc) {
606 uprv_strcpy(rloc, requestedLocale.getName());
607 size_t vlen = uprv_strlen(validLocale.getName());
608 char* vloc = (char*)uprv_malloc((vlen+1)*sizeof(char));
609 if (vloc) {
610 uprv_strcpy(vloc, validLocale.getName());
611 ucol_setReqValidLocales(ucollator, rloc, vloc);
612 return;
613 }
614 uprv_free(rloc);
615 }
616}
617
618// RuleBaseCollatorNew private constructor ----------------------------------
619
374ca955 620RuleBasedCollator::RuleBasedCollator()
73c04bcf 621 : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
b75a7d8f 622{
b75a7d8f
A
623}
624
625RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale,
73c04bcf
A
626 UErrorCode& status)
627 : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
b75a7d8f 628{
374ca955
A
629 if (U_FAILURE(status))
630 return;
b75a7d8f 631
374ca955
A
632 /*
633 Try to load, in order:
634 1. The desired locale's collation.
635 2. A fallback of the desired locale.
636 3. The default locale's collation.
637 4. A fallback of the default locale.
638 5. The default collation rules, which contains en_US collation rules.
639
640 To reiterate, we try:
641 Specific:
642 language+country+variant
643 language+country
644 language
645 Default:
646 language+country+variant
647 language+country
648 language
649 Root: (aka DEFAULTRULES)
650 steps 1-5 are handled by resource bundle fallback mechanism.
651 however, in a very unprobable situation that no resource bundle
652 data exists, step 5 is repeated with hardcoded default rules.
653 */
654
655 setUCollator(desiredLocale, status);
656
657 if (U_FAILURE(status))
658 {
659 status = U_ZERO_ERROR;
b75a7d8f 660
374ca955
A
661 setUCollator(kRootLocaleName, status);
662 if (status == U_ZERO_ERROR) {
663 status = U_USING_DEFAULT_WARNING;
664 }
b75a7d8f 665 }
b75a7d8f 666
374ca955
A
667 if (U_SUCCESS(status))
668 {
73c04bcf 669 setRuleStringFromCollator();
b75a7d8f 670 }
b75a7d8f
A
671}
672
673void
674RuleBasedCollator::setUCollator(const char *locale,
675 UErrorCode &status)
676{
374ca955
A
677 if (U_FAILURE(status))
678 return;
679 if (ucollator && dataIsOwned)
680 ucol_close(ucollator);
681 ucollator = ucol_open_internal(locale, &status);
682 dataIsOwned = TRUE;
683 isWriteThroughAlias = FALSE;
b75a7d8f
A
684}
685
686
687void
688RuleBasedCollator::checkOwned() {
374ca955
A
689 if (!(dataIsOwned || isWriteThroughAlias)) {
690 UErrorCode status = U_ZERO_ERROR;
691 ucollator = ucol_safeClone(ucollator, NULL, NULL, &status);
73c04bcf 692 setRuleStringFromCollator();
374ca955
A
693 dataIsOwned = TRUE;
694 isWriteThroughAlias = FALSE;
695 }
b75a7d8f
A
696}
697
698/* RuleBasedCollator private data members -------------------------------- */
699
700/*
701 * TODO:
702 * These should probably be enums (<=0xffff) or #defines (>0xffff)
703 * for better performance.
704 * Include ucol_imp.h and use its constants if possible.
705 * Only used in coleitr.h?!
706 * Remove from here!
707 */
708
709/* need look up in .commit() */
710const int32_t RuleBasedCollator::CHARINDEX = 0x70000000;
711/* Expand index follows */
712const int32_t RuleBasedCollator::EXPANDCHARINDEX = 0x7E000000;
713/* contract indexes follows */
714const int32_t RuleBasedCollator::CONTRACTCHARINDEX = 0x7F000000;
715/* unmapped character values */
716const int32_t RuleBasedCollator::UNMAPPED = 0xFFFFFFFF;
717/* primary strength increment */
718const int32_t RuleBasedCollator::PRIMARYORDERINCREMENT = 0x00010000;
719/* secondary strength increment */
720const int32_t RuleBasedCollator::SECONDARYORDERINCREMENT = 0x00000100;
721/* tertiary strength increment */
722const int32_t RuleBasedCollator::TERTIARYORDERINCREMENT = 0x00000001;
723/* mask off anything but primary order */
724const int32_t RuleBasedCollator::PRIMARYORDERMASK = 0xffff0000;
725/* mask off anything but secondary order */
726const int32_t RuleBasedCollator::SECONDARYORDERMASK = 0x0000ff00;
727/* mask off anything but tertiary order */
728const int32_t RuleBasedCollator::TERTIARYORDERMASK = 0x000000ff;
729/* mask off ignorable char order */
730const int32_t RuleBasedCollator::IGNORABLEMASK = 0x0000ffff;
731/* use only the primary difference */
732const int32_t RuleBasedCollator::PRIMARYDIFFERENCEONLY = 0xffff0000;
733/* use only the primary and secondary difference */
734const int32_t RuleBasedCollator::SECONDARYDIFFERENCEONLY = 0xffffff00;
735/* primary order shift */
736const int32_t RuleBasedCollator::PRIMARYORDERSHIFT = 16;
737/* secondary order shift */
738const int32_t RuleBasedCollator::SECONDARYORDERSHIFT = 8;
739/* starting value for collation elements */
740const int32_t RuleBasedCollator::COLELEMENTSTART = 0x02020202;
741/* testing mask for primary low element */
742const int32_t RuleBasedCollator::PRIMARYLOWZEROMASK = 0x00FF0000;
743/* reseting value for secondaries and tertiaries */
744const int32_t RuleBasedCollator::RESETSECONDARYTERTIARY = 0x00000202;
745/* reseting value for tertiaries */
746const int32_t RuleBasedCollator::RESETTERTIARY = 0x00000002;
747
748const int32_t RuleBasedCollator::PRIMIGNORABLE = 0x0202;
749
374ca955 750UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
b75a7d8f
A
751
752U_NAMESPACE_END
753
754#endif /* #if !UCONFIG_NO_COLLATION */