]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/tblcoll.cpp
ICU-491.11.1.tar.gz
[apple/icu.git] / icuSources / i18n / tblcoll.cpp
CommitLineData
b75a7d8f 1/*
46f4442e 2 ******************************************************************************
4388f060 3 * Copyright (C) 1996-2012, International Business Machines Corporation and
729e4ab9 4 * others. All Rights Reserved.
46f4442e
A
5 ******************************************************************************
6 */
b75a7d8f
A
7
8/**
46f4442e
A
9 * File tblcoll.cpp
10 *
11 * Created by: Helena Shih
12 *
13 * Modification History:
14 *
15 * Date Name Description
16 * 2/5/97 aliu Added streamIn and streamOut methods. Added
17 * constructor which reads RuleBasedCollator object from
18 * a binary file. Added writeToFile method which streams
19 * RuleBasedCollator out to a binary file. The streamIn
20 * and streamOut methods use istream and ostream objects
21 * in binary mode.
22 * 2/11/97 aliu Moved declarations out of for loop initializer.
23 * Added Mac compatibility #ifdef for ios::nocreate.
24 * 2/12/97 aliu Modified to use TableCollationData sub-object to
25 * hold invariant data.
26 * 2/13/97 aliu Moved several methods into this class from Collation.
27 * Added a private RuleBasedCollator(Locale&) constructor,
28 * to be used by Collator::getInstance(). General
29 * clean up. Made use of UErrorCode variables consistent.
30 * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy
31 * constructor and getDynamicClassID.
32 * 3/5/97 aliu Changed compaction cycle to improve performance. We
33 * use the maximum allowable value which is kBlockCount.
34 * Modified getRules() to load rules dynamically. Changed
35 * constructFromFile() call to accomodate this (added
36 * parameter to specify whether binary loading is to
37 * take place).
38 * 05/06/97 helena Added memory allocation error check.
39 * 6/20/97 helena Java class name change.
40 * 6/23/97 helena Adding comments to make code more readable.
41 * 09/03/97 helena Added createCollationKeyValues().
42 * 06/26/98 erm Changes for CollationKeys using byte arrays.
43 * 08/10/98 erm Synched with 1.2 version of RuleBasedCollator.java
44 * 04/23/99 stephen Removed EDecompositionMode, merged with
45 * Normalizer::EMode
46 * 06/14/99 stephen Removed kResourceBundleSuffix
47 * 06/22/99 stephen Fixed logic in constructFromFile() since .ctx
48 * files are no longer used.
49 * 11/02/99 helena Collator performance enhancements. Special case
50 * for NO_OP situations.
51 * 11/17/99 srl More performance enhancements. Inlined some internal functions.
52 * 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator
53 * to implementation file.
54 * 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h)
55 */
b75a7d8f 56
729e4ab9
A
57#include <typeinfo> // for 'typeid' to work
58
b75a7d8f
A
59#include "unicode/utypes.h"
60
61#if !UCONFIG_NO_COLLATION
62
63#include "unicode/tblcoll.h"
64#include "unicode/coleitr.h"
374ca955 65#include "unicode/ures.h"
b75a7d8f
A
66#include "unicode/uset.h"
67#include "ucol_imp.h"
68#include "uresimp.h"
69#include "uhash.h"
70#include "cmemory.h"
71#include "cstring.h"
374ca955 72#include "putilimp.h"
4388f060 73#include "ustr_imp.h"
b75a7d8f
A
74
75/* public RuleBasedCollator constructor ---------------------------------- */
76
77U_NAMESPACE_BEGIN
78
79/**
80* Copy constructor, aliasing, not write-through
81*/
374ca955
A
82RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that)
83: Collator(that)
84, dataIsOwned(FALSE)
85, isWriteThroughAlias(FALSE)
73c04bcf 86, ucollator(NULL)
b75a7d8f 87{
73c04bcf 88 RuleBasedCollator::operator=(that);
b75a7d8f
A
89}
90
91RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
374ca955
A
92 UErrorCode& status) :
93dataIsOwned(FALSE)
b75a7d8f 94{
374ca955
A
95 construct(rules,
96 UCOL_DEFAULT_STRENGTH,
97 UCOL_DEFAULT,
98 status);
b75a7d8f
A
99}
100
101RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
374ca955
A
102 ECollationStrength collationStrength,
103 UErrorCode& status) : dataIsOwned(FALSE)
b75a7d8f 104{
374ca955
A
105 construct(rules,
106 getUCollationStrength(collationStrength),
107 UCOL_DEFAULT,
108 status);
b75a7d8f
A
109}
110
111RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
112 UColAttributeValue decompositionMode,
113 UErrorCode& status) :
374ca955 114dataIsOwned(FALSE)
b75a7d8f 115{
374ca955
A
116 construct(rules,
117 UCOL_DEFAULT_STRENGTH,
118 decompositionMode,
119 status);
b75a7d8f
A
120}
121
122RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
374ca955
A
123 ECollationStrength collationStrength,
124 UColAttributeValue decompositionMode,
125 UErrorCode& status) : dataIsOwned(FALSE)
b75a7d8f 126{
374ca955
A
127 construct(rules,
128 getUCollationStrength(collationStrength),
129 decompositionMode,
130 status);
b75a7d8f 131}
729e4ab9
A
132RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
133 const RuleBasedCollator *base,
73c04bcf
A
134 UErrorCode &status) :
135dataIsOwned(TRUE),
136isWriteThroughAlias(FALSE)
137{
138 ucollator = ucol_openBinary(bin, length, base->ucollator, &status);
139}
b75a7d8f
A
140
141void
73c04bcf 142RuleBasedCollator::setRuleStringFromCollator()
b75a7d8f 143{
73c04bcf
A
144 int32_t length;
145 const UChar *r = ucol_getRules(ucollator, &length);
374ca955 146
73c04bcf
A
147 if (r && length > 0) {
148 // alias the rules string
149 urulestring.setTo(TRUE, r, length);
150 }
151 else {
152 urulestring.truncate(0); // Clear string.
b75a7d8f 153 }
b75a7d8f
A
154}
155
156// not aliasing, not write-through
157void
158RuleBasedCollator::construct(const UnicodeString& rules,
159 UColAttributeValue collationStrength,
160 UColAttributeValue decompositionMode,
161 UErrorCode& status)
162{
374ca955
A
163 ucollator = ucol_openRules(rules.getBuffer(), rules.length(),
164 decompositionMode, collationStrength,
165 NULL, &status);
b75a7d8f 166
374ca955
A
167 dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it
168 isWriteThroughAlias = FALSE;
b75a7d8f 169
73c04bcf
A
170 if(ucollator == NULL) {
171 if(U_SUCCESS(status)) {
172 status = U_MEMORY_ALLOCATION_ERROR;
173 }
174 return; // Failure
175 }
176
177 setRuleStringFromCollator();
b75a7d8f
A
178}
179
180/* RuleBasedCollator public destructor ----------------------------------- */
181
182RuleBasedCollator::~RuleBasedCollator()
183{
374ca955
A
184 if (dataIsOwned)
185 {
186 ucol_close(ucollator);
374ca955
A
187 }
188 ucollator = 0;
b75a7d8f
A
189}
190
191/* RuleBaseCollator public methods --------------------------------------- */
192
193UBool RuleBasedCollator::operator==(const Collator& that) const
194{
195 /* only checks for address equals here */
196 if (Collator::operator==(that))
197 return TRUE;
198
729e4ab9 199 if (typeid(*this) != typeid(that))
b75a7d8f
A
200 return FALSE; /* not the same class */
201
202 RuleBasedCollator& thatAlias = (RuleBasedCollator&)that;
203
204 // weiv: use C function, commented code below is wrong
205 return ucol_equals(this->ucollator, thatAlias.ucollator);
206 /*
207 synwee : orginal code does not check for data compatibility
208 */
209 /*
210 if (ucollator != thatAlias.ucollator)
211 return FALSE;
212
213 return TRUE;
214 */
215}
216
374ca955
A
217UBool RuleBasedCollator::operator!=(const Collator& other) const
218{
219 return !(*this == other);
220}
221
b75a7d8f
A
222// aliasing, not write-through
223RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that)
224{
374ca955 225 if (this != &that)
b75a7d8f 226 {
374ca955
A
227 if (dataIsOwned)
228 {
229 ucol_close(ucollator);
374ca955 230 }
b75a7d8f 231
73c04bcf
A
232 urulestring.truncate(0); // empty the rule string
233 dataIsOwned = TRUE;
374ca955 234 isWriteThroughAlias = FALSE;
73c04bcf
A
235
236 UErrorCode intStatus = U_ZERO_ERROR;
237 int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
238 ucollator = ucol_safeClone(that.ucollator, NULL, &buffersize,
239 &intStatus);
240 if (U_SUCCESS(intStatus)) {
241 setRuleStringFromCollator();
242 }
374ca955
A
243 }
244 return *this;
b75a7d8f
A
245}
246
247// aliasing, not write-through
248Collator* RuleBasedCollator::clone() const
249{
73c04bcf 250 return new RuleBasedCollator(*this);
b75a7d8f
A
251}
252
4388f060 253
b75a7d8f
A
254CollationElementIterator* RuleBasedCollator::createCollationElementIterator
255 (const UnicodeString& source) const
256{
374ca955
A
257 UErrorCode status = U_ZERO_ERROR;
258 CollationElementIterator *result = new CollationElementIterator(source, this,
259 status);
260 if (U_FAILURE(status)) {
261 delete result;
262 return NULL;
263 }
b75a7d8f 264
374ca955 265 return result;
b75a7d8f
A
266}
267
268/**
269* Create a CollationElementIterator object that will iterate over the
270* elements in a string, using the collation rules defined in this
271* RuleBasedCollator
272*/
273CollationElementIterator* RuleBasedCollator::createCollationElementIterator
274 (const CharacterIterator& source) const
275{
374ca955
A
276 UErrorCode status = U_ZERO_ERROR;
277 CollationElementIterator *result = new CollationElementIterator(source, this,
278 status);
b75a7d8f 279
374ca955
A
280 if (U_FAILURE(status)) {
281 delete result;
282 return NULL;
283 }
b75a7d8f 284
374ca955 285 return result;
b75a7d8f
A
286}
287
288/**
289* Return a string representation of this collator's rules. The string can
290* later be passed to the constructor that takes a UnicodeString argument,
291* which will construct a collator that's functionally identical to this one.
292* You can also allow users to edit the string in order to change the collation
293* data, or you can print it out for inspection, or whatever.
294*/
295const UnicodeString& RuleBasedCollator::getRules() const
296{
73c04bcf 297 return urulestring;
b75a7d8f
A
298}
299
300void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer)
301{
302 int32_t rulesize = ucol_getRulesEx(ucollator, delta, NULL, -1);
303
304 if (rulesize > 0) {
305 UChar *rules = (UChar*) uprv_malloc( sizeof(UChar) * (rulesize) );
306 if(rules != NULL) {
374ca955
A
307 ucol_getRulesEx(ucollator, delta, rules, rulesize);
308 buffer.setTo(rules, rulesize);
309 uprv_free(rules);
729e4ab9 310 } else { // couldn't allocate
374ca955 311 buffer.remove();
b75a7d8f
A
312 }
313 }
314 else {
315 buffer.remove();
316 }
317}
318
319UnicodeSet *
320RuleBasedCollator::getTailoredSet(UErrorCode &status) const
321{
374ca955
A
322 if(U_FAILURE(status)) {
323 return NULL;
324 }
325 return (UnicodeSet *)ucol_getTailoredSet(this->ucollator, &status);
b75a7d8f
A
326}
327
328
329void RuleBasedCollator::getVersion(UVersionInfo versionInfo) const
330{
331 if (versionInfo!=NULL){
332 ucol_getVersion(ucollator, versionInfo);
333 }
334}
335
336Collator::EComparisonResult RuleBasedCollator::compare(
337 const UnicodeString& source,
338 const UnicodeString& target,
339 int32_t length) const
340{
374ca955
A
341 UErrorCode status = U_ZERO_ERROR;
342 return getEComparisonResult(compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status));
b75a7d8f
A
343}
344
345UCollationResult RuleBasedCollator::compare(
346 const UnicodeString& source,
347 const UnicodeString& target,
374ca955 348 int32_t length,
b75a7d8f
A
349 UErrorCode &status) const
350{
374ca955 351 return compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status);
b75a7d8f
A
352}
353
354Collator::EComparisonResult RuleBasedCollator::compare(const UChar* source,
355 int32_t sourceLength,
356 const UChar* target,
357 int32_t targetLength)
358 const
359{
374ca955
A
360 return getEComparisonResult(ucol_strcoll(ucollator, source, sourceLength,
361 target, targetLength));
b75a7d8f
A
362}
363
364UCollationResult RuleBasedCollator::compare(const UChar* source,
365 int32_t sourceLength,
366 const UChar* target,
374ca955 367 int32_t targetLength,
b75a7d8f
A
368 UErrorCode &status) const
369{
374ca955
A
370 if(U_SUCCESS(status)) {
371 return ucol_strcoll(ucollator, source, sourceLength, target, targetLength);
372 } else {
373 return UCOL_EQUAL;
374 }
b75a7d8f
A
375}
376
377/**
378* Compare two strings using this collator
379*/
380Collator::EComparisonResult RuleBasedCollator::compare(
381 const UnicodeString& source,
382 const UnicodeString& target) const
383{
374ca955
A
384 return getEComparisonResult(ucol_strcoll(ucollator, source.getBuffer(), source.length(),
385 target.getBuffer(), target.length()));
b75a7d8f
A
386}
387
388UCollationResult RuleBasedCollator::compare(
389 const UnicodeString& source,
374ca955 390 const UnicodeString& target,
b75a7d8f
A
391 UErrorCode &status) const
392{
374ca955
A
393 if(U_SUCCESS(status)) {
394 return ucol_strcoll(ucollator, source.getBuffer(), source.length(),
395 target.getBuffer(), target.length());
396 } else {
397 return UCOL_EQUAL;
398 }
b75a7d8f
A
399}
400
729e4ab9
A
401UCollationResult RuleBasedCollator::compare(UCharIterator &sIter,
402 UCharIterator &tIter,
403 UErrorCode &status) const {
404 if(U_SUCCESS(status)) {
405 return ucol_strcollIter(ucollator, &sIter, &tIter, &status);
406 } else {
407 return UCOL_EQUAL;
408 }
409}
410
b75a7d8f
A
411/**
412* Retrieve a collation key for the specified string. The key can be compared
413* with other collation keys using a bitwise comparison (e.g. memcmp) to find
414* the ordering of their respective source strings. This is handy when doing a
415* sort, where each sort key must be compared many times.
416*
417* The basic algorithm here is to find all of the collation elements for each
418* character in the source string, convert them to an ASCII representation, and
419* put them into the collation key. But it's trickier than that. Each
420* collation element in a string has three components: primary ('A' vs 'B'),
374ca955 421* secondary ('u' vs '\u00FC'), and tertiary ('A' vs 'a'), and a primary difference
b75a7d8f
A
422* at the end of a string takes precedence over a secondary or tertiary
423* difference earlier in the string.
424*
425* To account for this, we put all of the primary orders at the beginning of
426* the string, followed by the secondary and tertiary orders. Each set of
427* orders is terminated by nulls so that a key for a string which is a initial
428* substring of another key will compare less without any special case.
429*
430* Here's a hypothetical example, with the collation element represented as a
431* three-digit number, one digit for primary, one for secondary, etc.
432*
374ca955 433* String: A a B \u00C9
b75a7d8f
A
434* Collation Elements: 101 100 201 511
435* Collation Key: 1125<null>0001<null>1011<null>
436*
437* To make things even trickier, secondary differences (accent marks) are
438* compared starting at the *end* of the string in languages with French
439* secondary ordering. But when comparing the accent marks on a single base
440* character, they are compared from the beginning. To handle this, we reverse
441* all of the accents that belong to each base character, then we reverse the
442* entire string of secondary orderings at the end.
443*/
444CollationKey& RuleBasedCollator::getCollationKey(
445 const UnicodeString& source,
446 CollationKey& sortkey,
447 UErrorCode& status) const
448{
374ca955 449 return getCollationKey(source.getBuffer(), source.length(), sortkey, status);
b75a7d8f
A
450}
451
452CollationKey& RuleBasedCollator::getCollationKey(const UChar* source,
453 int32_t sourceLen,
454 CollationKey& sortkey,
455 UErrorCode& status) const
456{
4388f060
A
457 if (U_FAILURE(status)) {
458 return sortkey.setToBogus();
459 }
460 if (sourceLen < -1 || (source == NULL && sourceLen != 0)) {
461 status = U_ILLEGAL_ARGUMENT_ERROR;
374ca955
A
462 return sortkey.setToBogus();
463 }
b75a7d8f 464
4388f060
A
465 if (sourceLen < 0) {
466 sourceLen = u_strlen(source);
467 }
468 if (sourceLen == 0) {
374ca955
A
469 return sortkey.reset();
470 }
b75a7d8f 471
374ca955 472 uint8_t *result;
4388f060
A
473 int32_t resultCapacity;
474 if (sortkey.fCapacity >= (sourceLen * 3)) {
475 // Try to reuse the CollationKey.fBytes.
476 result = sortkey.fBytes;
477 resultCapacity = sortkey.fCapacity;
478 } else {
479 result = NULL;
480 resultCapacity = 0;
481 }
482 int32_t resultLen = ucol_getSortKeyWithAllocation(ucollator, source, sourceLen,
483 result, resultCapacity, &status);
484
485 if (U_SUCCESS(status)) {
486 if (result == sortkey.fBytes) {
487 sortkey.setLength(resultLen);
488 } else {
489 sortkey.adopt(result, resultCapacity, resultLen);
490 }
491 } else {
492 if (result != sortkey.fBytes) {
493 uprv_free(result);
494 }
495 sortkey.setToBogus();
496 }
374ca955 497 return sortkey;
b75a7d8f
A
498}
499
500/**
501 * Return the maximum length of any expansion sequences that end with the
502 * specified comparison order.
503 * @param order a collation order returned by previous or next.
504 * @return the maximum length of any expansion seuences ending with the
505 * specified order or 1 if collation order does not occur at the end of any
506 * expansion sequence.
507 * @see CollationElementIterator#getMaxExpansion
508 */
509int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const
510{
374ca955
A
511 uint8_t result;
512 UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result);
513 return result;
b75a7d8f
A
514}
515
516uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length,
517 UErrorCode &status)
518{
374ca955 519 return ucol_cloneRuleData(ucollator, &length, &status);
b75a7d8f
A
520}
521
73c04bcf
A
522
523int32_t RuleBasedCollator::cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status)
524{
525 return ucol_cloneBinary(ucollator, buffer, capacity, &status);
526}
527
b75a7d8f
A
528void RuleBasedCollator::setAttribute(UColAttribute attr,
529 UColAttributeValue value,
530 UErrorCode &status)
531{
374ca955
A
532 if (U_FAILURE(status))
533 return;
534 checkOwned();
535 ucol_setAttribute(ucollator, attr, value, &status);
b75a7d8f
A
536}
537
538UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr,
539 UErrorCode &status)
540{
374ca955
A
541 if (U_FAILURE(status))
542 return UCOL_DEFAULT;
543 return ucol_getAttribute(ucollator, attr, &status);
b75a7d8f
A
544}
545
546uint32_t RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) {
374ca955
A
547 checkOwned();
548 return ucol_setVariableTop(ucollator, varTop, len, &status);
b75a7d8f
A
549}
550
551uint32_t RuleBasedCollator::setVariableTop(const UnicodeString varTop, UErrorCode &status) {
374ca955
A
552 checkOwned();
553 return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &status);
b75a7d8f
A
554}
555
556void RuleBasedCollator::setVariableTop(const uint32_t varTop, UErrorCode &status) {
374ca955
A
557 checkOwned();
558 ucol_restoreVariableTop(ucollator, varTop, &status);
b75a7d8f
A
559}
560
561uint32_t RuleBasedCollator::getVariableTop(UErrorCode &status) const {
562 return ucol_getVariableTop(ucollator, &status);
563}
564
565Collator* RuleBasedCollator::safeClone(void)
566{
374ca955
A
567 UErrorCode intStatus = U_ZERO_ERROR;
568 int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
569 UCollator *ucol = ucol_safeClone(ucollator, NULL, &buffersize,
570 &intStatus);
571 if (U_FAILURE(intStatus)) {
572 return NULL;
573 }
b75a7d8f 574
73c04bcf 575 RuleBasedCollator *result = new RuleBasedCollator();
46f4442e
A
576 // Null pointer check
577 if (result != NULL) {
578 result->ucollator = ucol;
579 result->dataIsOwned = TRUE;
580 result->isWriteThroughAlias = FALSE;
581 setRuleStringFromCollator();
582 }
b75a7d8f 583
374ca955 584 return result;
b75a7d8f
A
585}
586
587
588int32_t RuleBasedCollator::getSortKey(const UnicodeString& source,
589 uint8_t *result, int32_t resultLength)
590 const
591{
374ca955 592 return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength);
b75a7d8f
A
593}
594
595int32_t RuleBasedCollator::getSortKey(const UChar *source,
596 int32_t sourceLength, uint8_t *result,
597 int32_t resultLength) const
598{
374ca955 599 return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength);
b75a7d8f
A
600}
601
602Collator::ECollationStrength RuleBasedCollator::getStrength(void) const
603{
374ca955
A
604 UErrorCode intStatus = U_ZERO_ERROR;
605 return getECollationStrength(ucol_getAttribute(ucollator, UCOL_STRENGTH,
606 &intStatus));
b75a7d8f
A
607}
608
609void RuleBasedCollator::setStrength(ECollationStrength newStrength)
610{
374ca955
A
611 checkOwned();
612 UErrorCode intStatus = U_ZERO_ERROR;
613 UCollationStrength strength = getUCollationStrength(newStrength);
614 ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus);
b75a7d8f
A
615}
616
729e4ab9
A
617int32_t RuleBasedCollator::getReorderCodes(int32_t *dest,
618 int32_t destCapacity,
619 UErrorCode& status) const
620{
621 return ucol_getReorderCodes(ucollator, dest, destCapacity, &status);
622}
623
624void RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes,
625 int32_t reorderCodesLength,
626 UErrorCode& status)
627{
4388f060 628 checkOwned();
729e4ab9
A
629 ucol_setReorderCodes(ucollator, reorderCodes, reorderCodesLength, &status);
630}
631
4388f060
A
632int32_t RuleBasedCollator::getEquivalentReorderCodes(int32_t reorderCode,
633 int32_t* dest,
634 int32_t destCapacity,
635 UErrorCode& status)
636{
637 return ucol_getEquivalentReorderCodes(reorderCode, dest, destCapacity, &status);
638}
729e4ab9 639
b75a7d8f
A
640/**
641* Create a hash code for this collation. Just hash the main rule table -- that
642* should be good enough for almost any use.
643*/
644int32_t RuleBasedCollator::hashCode() const
645{
374ca955
A
646 int32_t length;
647 const UChar *rules = ucol_getRules(ucollator, &length);
4388f060 648 return ustr_hashUCharsN(rules, length);
b75a7d8f
A
649}
650
651/**
652* return the locale of this collator
653*/
654const Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const {
729e4ab9 655 const char *result = ucol_getLocaleByType(ucollator, type, &status);
374ca955
A
656 if(result == NULL) {
657 Locale res("");
658 res.setToBogus();
659 return res;
660 } else {
661 return Locale(result);
662 }
b75a7d8f
A
663}
664
665void
46f4442e 666RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) {
374ca955 667 checkOwned();
46f4442e 668 char* rloc = uprv_strdup(requestedLocale.getName());
b75a7d8f 669 if (rloc) {
46f4442e 670 char* vloc = uprv_strdup(validLocale.getName());
b75a7d8f 671 if (vloc) {
46f4442e
A
672 char* aloc = uprv_strdup(actualLocale.getName());
673 if (aloc) {
674 ucol_setReqValidLocales(ucollator, rloc, vloc, aloc);
675 return;
676 }
677 uprv_free(vloc);
b75a7d8f
A
678 }
679 uprv_free(rloc);
680 }
681}
682
683// RuleBaseCollatorNew private constructor ----------------------------------
684
374ca955 685RuleBasedCollator::RuleBasedCollator()
73c04bcf 686 : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
b75a7d8f 687{
b75a7d8f
A
688}
689
690RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale,
73c04bcf
A
691 UErrorCode& status)
692 : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
b75a7d8f 693{
374ca955
A
694 if (U_FAILURE(status))
695 return;
b75a7d8f 696
374ca955
A
697 /*
698 Try to load, in order:
699 1. The desired locale's collation.
700 2. A fallback of the desired locale.
701 3. The default locale's collation.
702 4. A fallback of the default locale.
703 5. The default collation rules, which contains en_US collation rules.
704
705 To reiterate, we try:
706 Specific:
707 language+country+variant
708 language+country
709 language
710 Default:
711 language+country+variant
712 language+country
713 language
714 Root: (aka DEFAULTRULES)
715 steps 1-5 are handled by resource bundle fallback mechanism.
716 however, in a very unprobable situation that no resource bundle
717 data exists, step 5 is repeated with hardcoded default rules.
718 */
719
720 setUCollator(desiredLocale, status);
721
722 if (U_FAILURE(status))
723 {
724 status = U_ZERO_ERROR;
b75a7d8f 725
374ca955
A
726 setUCollator(kRootLocaleName, status);
727 if (status == U_ZERO_ERROR) {
728 status = U_USING_DEFAULT_WARNING;
729 }
b75a7d8f 730 }
b75a7d8f 731
374ca955
A
732 if (U_SUCCESS(status))
733 {
73c04bcf 734 setRuleStringFromCollator();
b75a7d8f 735 }
b75a7d8f
A
736}
737
729e4ab9 738void
b75a7d8f
A
739RuleBasedCollator::setUCollator(const char *locale,
740 UErrorCode &status)
741{
4388f060 742 if (U_FAILURE(status)) {
374ca955 743 return;
4388f060 744 }
374ca955
A
745 if (ucollator && dataIsOwned)
746 ucol_close(ucollator);
747 ucollator = ucol_open_internal(locale, &status);
748 dataIsOwned = TRUE;
749 isWriteThroughAlias = FALSE;
b75a7d8f
A
750}
751
752
753void
754RuleBasedCollator::checkOwned() {
374ca955
A
755 if (!(dataIsOwned || isWriteThroughAlias)) {
756 UErrorCode status = U_ZERO_ERROR;
757 ucollator = ucol_safeClone(ucollator, NULL, NULL, &status);
73c04bcf 758 setRuleStringFromCollator();
374ca955
A
759 dataIsOwned = TRUE;
760 isWriteThroughAlias = FALSE;
761 }
b75a7d8f
A
762}
763
4388f060
A
764
765int32_t RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
766 char *buffer,
767 int32_t capacity,
768 UErrorCode &status) const {
769 /* simply delegate */
770 return ucol_getShortDefinitionString(ucollator, locale, buffer, capacity, &status);
771}
772
773
374ca955 774UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
b75a7d8f
A
775
776U_NAMESPACE_END
777
778#endif /* #if !UCONFIG_NO_COLLATION */