]>
Commit | Line | Data |
---|---|---|
b75a7d8f | 1 | /* |
46f4442e A |
2 | ****************************************************************************** |
3 | * Copyright (C) 1996-2008, International Business Machines Corporation and * | |
4 | * others. All Rights Reserved. * | |
5 | ****************************************************************************** | |
6 | */ | |
b75a7d8f A |
7 | |
8 | /** | |
46f4442e A |
9 | * File tblcoll.cpp |
10 | * | |
11 | * Created by: Helena Shih | |
12 | * | |
13 | * Modification History: | |
14 | * | |
15 | * Date Name Description | |
16 | * 2/5/97 aliu Added streamIn and streamOut methods. Added | |
17 | * constructor which reads RuleBasedCollator object from | |
18 | * a binary file. Added writeToFile method which streams | |
19 | * RuleBasedCollator out to a binary file. The streamIn | |
20 | * and streamOut methods use istream and ostream objects | |
21 | * in binary mode. | |
22 | * 2/11/97 aliu Moved declarations out of for loop initializer. | |
23 | * Added Mac compatibility #ifdef for ios::nocreate. | |
24 | * 2/12/97 aliu Modified to use TableCollationData sub-object to | |
25 | * hold invariant data. | |
26 | * 2/13/97 aliu Moved several methods into this class from Collation. | |
27 | * Added a private RuleBasedCollator(Locale&) constructor, | |
28 | * to be used by Collator::getInstance(). General | |
29 | * clean up. Made use of UErrorCode variables consistent. | |
30 | * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy | |
31 | * constructor and getDynamicClassID. | |
32 | * 3/5/97 aliu Changed compaction cycle to improve performance. We | |
33 | * use the maximum allowable value which is kBlockCount. | |
34 | * Modified getRules() to load rules dynamically. Changed | |
35 | * constructFromFile() call to accomodate this (added | |
36 | * parameter to specify whether binary loading is to | |
37 | * take place). | |
38 | * 05/06/97 helena Added memory allocation error check. | |
39 | * 6/20/97 helena Java class name change. | |
40 | * 6/23/97 helena Adding comments to make code more readable. | |
41 | * 09/03/97 helena Added createCollationKeyValues(). | |
42 | * 06/26/98 erm Changes for CollationKeys using byte arrays. | |
43 | * 08/10/98 erm Synched with 1.2 version of RuleBasedCollator.java | |
44 | * 04/23/99 stephen Removed EDecompositionMode, merged with | |
45 | * Normalizer::EMode | |
46 | * 06/14/99 stephen Removed kResourceBundleSuffix | |
47 | * 06/22/99 stephen Fixed logic in constructFromFile() since .ctx | |
48 | * files are no longer used. | |
49 | * 11/02/99 helena Collator performance enhancements. Special case | |
50 | * for NO_OP situations. | |
51 | * 11/17/99 srl More performance enhancements. Inlined some internal functions. | |
52 | * 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator | |
53 | * to implementation file. | |
54 | * 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h) | |
55 | */ | |
b75a7d8f A |
56 | |
57 | #include "unicode/utypes.h" | |
58 | ||
59 | #if !UCONFIG_NO_COLLATION | |
60 | ||
61 | #include "unicode/tblcoll.h" | |
62 | #include "unicode/coleitr.h" | |
374ca955 | 63 | #include "unicode/ures.h" |
b75a7d8f A |
64 | #include "unicode/uset.h" |
65 | #include "ucol_imp.h" | |
66 | #include "uresimp.h" | |
67 | #include "uhash.h" | |
68 | #include "cmemory.h" | |
69 | #include "cstring.h" | |
374ca955 | 70 | #include "putilimp.h" |
b75a7d8f A |
71 | |
72 | /* public RuleBasedCollator constructor ---------------------------------- */ | |
73 | ||
74 | U_NAMESPACE_BEGIN | |
75 | ||
76 | /** | |
77 | * Copy constructor, aliasing, not write-through | |
78 | */ | |
374ca955 A |
79 | RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that) |
80 | : Collator(that) | |
81 | , dataIsOwned(FALSE) | |
82 | , isWriteThroughAlias(FALSE) | |
73c04bcf | 83 | , ucollator(NULL) |
b75a7d8f | 84 | { |
73c04bcf | 85 | RuleBasedCollator::operator=(that); |
b75a7d8f A |
86 | } |
87 | ||
88 | RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, | |
374ca955 A |
89 | UErrorCode& status) : |
90 | dataIsOwned(FALSE) | |
b75a7d8f | 91 | { |
374ca955 A |
92 | construct(rules, |
93 | UCOL_DEFAULT_STRENGTH, | |
94 | UCOL_DEFAULT, | |
95 | status); | |
b75a7d8f A |
96 | } |
97 | ||
98 | RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, | |
374ca955 A |
99 | ECollationStrength collationStrength, |
100 | UErrorCode& status) : dataIsOwned(FALSE) | |
b75a7d8f | 101 | { |
374ca955 A |
102 | construct(rules, |
103 | getUCollationStrength(collationStrength), | |
104 | UCOL_DEFAULT, | |
105 | status); | |
b75a7d8f A |
106 | } |
107 | ||
108 | RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, | |
109 | UColAttributeValue decompositionMode, | |
110 | UErrorCode& status) : | |
374ca955 | 111 | dataIsOwned(FALSE) |
b75a7d8f | 112 | { |
374ca955 A |
113 | construct(rules, |
114 | UCOL_DEFAULT_STRENGTH, | |
115 | decompositionMode, | |
116 | status); | |
b75a7d8f A |
117 | } |
118 | ||
119 | RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, | |
374ca955 A |
120 | ECollationStrength collationStrength, |
121 | UColAttributeValue decompositionMode, | |
122 | UErrorCode& status) : dataIsOwned(FALSE) | |
b75a7d8f | 123 | { |
374ca955 A |
124 | construct(rules, |
125 | getUCollationStrength(collationStrength), | |
126 | decompositionMode, | |
127 | status); | |
b75a7d8f | 128 | } |
73c04bcf A |
129 | RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length, |
130 | const RuleBasedCollator *base, | |
131 | UErrorCode &status) : | |
132 | dataIsOwned(TRUE), | |
133 | isWriteThroughAlias(FALSE) | |
134 | { | |
135 | ucollator = ucol_openBinary(bin, length, base->ucollator, &status); | |
136 | } | |
b75a7d8f A |
137 | |
138 | void | |
73c04bcf | 139 | RuleBasedCollator::setRuleStringFromCollator() |
b75a7d8f | 140 | { |
73c04bcf A |
141 | int32_t length; |
142 | const UChar *r = ucol_getRules(ucollator, &length); | |
374ca955 | 143 | |
73c04bcf A |
144 | if (r && length > 0) { |
145 | // alias the rules string | |
146 | urulestring.setTo(TRUE, r, length); | |
147 | } | |
148 | else { | |
149 | urulestring.truncate(0); // Clear string. | |
b75a7d8f | 150 | } |
b75a7d8f A |
151 | } |
152 | ||
153 | // not aliasing, not write-through | |
154 | void | |
155 | RuleBasedCollator::construct(const UnicodeString& rules, | |
156 | UColAttributeValue collationStrength, | |
157 | UColAttributeValue decompositionMode, | |
158 | UErrorCode& status) | |
159 | { | |
374ca955 A |
160 | ucollator = ucol_openRules(rules.getBuffer(), rules.length(), |
161 | decompositionMode, collationStrength, | |
162 | NULL, &status); | |
b75a7d8f | 163 | |
374ca955 A |
164 | dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it |
165 | isWriteThroughAlias = FALSE; | |
b75a7d8f | 166 | |
73c04bcf A |
167 | if(ucollator == NULL) { |
168 | if(U_SUCCESS(status)) { | |
169 | status = U_MEMORY_ALLOCATION_ERROR; | |
170 | } | |
171 | return; // Failure | |
172 | } | |
173 | ||
174 | setRuleStringFromCollator(); | |
b75a7d8f A |
175 | } |
176 | ||
177 | /* RuleBasedCollator public destructor ----------------------------------- */ | |
178 | ||
179 | RuleBasedCollator::~RuleBasedCollator() | |
180 | { | |
374ca955 A |
181 | if (dataIsOwned) |
182 | { | |
183 | ucol_close(ucollator); | |
374ca955 A |
184 | } |
185 | ucollator = 0; | |
b75a7d8f A |
186 | } |
187 | ||
188 | /* RuleBaseCollator public methods --------------------------------------- */ | |
189 | ||
190 | UBool RuleBasedCollator::operator==(const Collator& that) const | |
191 | { | |
192 | /* only checks for address equals here */ | |
193 | if (Collator::operator==(that)) | |
194 | return TRUE; | |
195 | ||
196 | if (getDynamicClassID() != that.getDynamicClassID()) | |
197 | return FALSE; /* not the same class */ | |
198 | ||
199 | RuleBasedCollator& thatAlias = (RuleBasedCollator&)that; | |
200 | ||
201 | // weiv: use C function, commented code below is wrong | |
202 | return ucol_equals(this->ucollator, thatAlias.ucollator); | |
203 | /* | |
204 | synwee : orginal code does not check for data compatibility | |
205 | */ | |
206 | /* | |
207 | if (ucollator != thatAlias.ucollator) | |
208 | return FALSE; | |
209 | ||
210 | return TRUE; | |
211 | */ | |
212 | } | |
213 | ||
374ca955 A |
214 | UBool RuleBasedCollator::operator!=(const Collator& other) const |
215 | { | |
216 | return !(*this == other); | |
217 | } | |
218 | ||
b75a7d8f A |
219 | // aliasing, not write-through |
220 | RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that) | |
221 | { | |
374ca955 | 222 | if (this != &that) |
b75a7d8f | 223 | { |
374ca955 A |
224 | if (dataIsOwned) |
225 | { | |
226 | ucol_close(ucollator); | |
374ca955 | 227 | } |
b75a7d8f | 228 | |
73c04bcf A |
229 | urulestring.truncate(0); // empty the rule string |
230 | dataIsOwned = TRUE; | |
374ca955 | 231 | isWriteThroughAlias = FALSE; |
73c04bcf A |
232 | |
233 | UErrorCode intStatus = U_ZERO_ERROR; | |
234 | int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE; | |
235 | ucollator = ucol_safeClone(that.ucollator, NULL, &buffersize, | |
236 | &intStatus); | |
237 | if (U_SUCCESS(intStatus)) { | |
238 | setRuleStringFromCollator(); | |
239 | } | |
374ca955 A |
240 | } |
241 | return *this; | |
b75a7d8f A |
242 | } |
243 | ||
244 | // aliasing, not write-through | |
245 | Collator* RuleBasedCollator::clone() const | |
246 | { | |
73c04bcf | 247 | return new RuleBasedCollator(*this); |
b75a7d8f A |
248 | } |
249 | ||
250 | CollationElementIterator* RuleBasedCollator::createCollationElementIterator | |
251 | (const UnicodeString& source) const | |
252 | { | |
374ca955 A |
253 | UErrorCode status = U_ZERO_ERROR; |
254 | CollationElementIterator *result = new CollationElementIterator(source, this, | |
255 | status); | |
256 | if (U_FAILURE(status)) { | |
257 | delete result; | |
258 | return NULL; | |
259 | } | |
b75a7d8f | 260 | |
374ca955 | 261 | return result; |
b75a7d8f A |
262 | } |
263 | ||
264 | /** | |
265 | * Create a CollationElementIterator object that will iterate over the | |
266 | * elements in a string, using the collation rules defined in this | |
267 | * RuleBasedCollator | |
268 | */ | |
269 | CollationElementIterator* RuleBasedCollator::createCollationElementIterator | |
270 | (const CharacterIterator& source) const | |
271 | { | |
374ca955 A |
272 | UErrorCode status = U_ZERO_ERROR; |
273 | CollationElementIterator *result = new CollationElementIterator(source, this, | |
274 | status); | |
b75a7d8f | 275 | |
374ca955 A |
276 | if (U_FAILURE(status)) { |
277 | delete result; | |
278 | return NULL; | |
279 | } | |
b75a7d8f | 280 | |
374ca955 | 281 | return result; |
b75a7d8f A |
282 | } |
283 | ||
284 | /** | |
285 | * Return a string representation of this collator's rules. The string can | |
286 | * later be passed to the constructor that takes a UnicodeString argument, | |
287 | * which will construct a collator that's functionally identical to this one. | |
288 | * You can also allow users to edit the string in order to change the collation | |
289 | * data, or you can print it out for inspection, or whatever. | |
290 | */ | |
291 | const UnicodeString& RuleBasedCollator::getRules() const | |
292 | { | |
73c04bcf | 293 | return urulestring; |
b75a7d8f A |
294 | } |
295 | ||
296 | void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) | |
297 | { | |
298 | int32_t rulesize = ucol_getRulesEx(ucollator, delta, NULL, -1); | |
299 | ||
300 | if (rulesize > 0) { | |
301 | UChar *rules = (UChar*) uprv_malloc( sizeof(UChar) * (rulesize) ); | |
302 | if(rules != NULL) { | |
374ca955 A |
303 | ucol_getRulesEx(ucollator, delta, rules, rulesize); |
304 | buffer.setTo(rules, rulesize); | |
305 | uprv_free(rules); | |
b75a7d8f | 306 | } else { // couldn't allocate |
374ca955 | 307 | buffer.remove(); |
b75a7d8f A |
308 | } |
309 | } | |
310 | else { | |
311 | buffer.remove(); | |
312 | } | |
313 | } | |
314 | ||
315 | UnicodeSet * | |
316 | RuleBasedCollator::getTailoredSet(UErrorCode &status) const | |
317 | { | |
374ca955 A |
318 | if(U_FAILURE(status)) { |
319 | return NULL; | |
320 | } | |
321 | return (UnicodeSet *)ucol_getTailoredSet(this->ucollator, &status); | |
b75a7d8f A |
322 | } |
323 | ||
324 | ||
325 | void RuleBasedCollator::getVersion(UVersionInfo versionInfo) const | |
326 | { | |
327 | if (versionInfo!=NULL){ | |
328 | ucol_getVersion(ucollator, versionInfo); | |
329 | } | |
330 | } | |
331 | ||
332 | Collator::EComparisonResult RuleBasedCollator::compare( | |
333 | const UnicodeString& source, | |
334 | const UnicodeString& target, | |
335 | int32_t length) const | |
336 | { | |
374ca955 A |
337 | UErrorCode status = U_ZERO_ERROR; |
338 | return getEComparisonResult(compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status)); | |
b75a7d8f A |
339 | } |
340 | ||
341 | UCollationResult RuleBasedCollator::compare( | |
342 | const UnicodeString& source, | |
343 | const UnicodeString& target, | |
374ca955 | 344 | int32_t length, |
b75a7d8f A |
345 | UErrorCode &status) const |
346 | { | |
374ca955 | 347 | return compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status); |
b75a7d8f A |
348 | } |
349 | ||
350 | Collator::EComparisonResult RuleBasedCollator::compare(const UChar* source, | |
351 | int32_t sourceLength, | |
352 | const UChar* target, | |
353 | int32_t targetLength) | |
354 | const | |
355 | { | |
374ca955 A |
356 | return getEComparisonResult(ucol_strcoll(ucollator, source, sourceLength, |
357 | target, targetLength)); | |
b75a7d8f A |
358 | } |
359 | ||
360 | UCollationResult RuleBasedCollator::compare(const UChar* source, | |
361 | int32_t sourceLength, | |
362 | const UChar* target, | |
374ca955 | 363 | int32_t targetLength, |
b75a7d8f A |
364 | UErrorCode &status) const |
365 | { | |
374ca955 A |
366 | if(U_SUCCESS(status)) { |
367 | return ucol_strcoll(ucollator, source, sourceLength, target, targetLength); | |
368 | } else { | |
369 | return UCOL_EQUAL; | |
370 | } | |
b75a7d8f A |
371 | } |
372 | ||
373 | /** | |
374 | * Compare two strings using this collator | |
375 | */ | |
376 | Collator::EComparisonResult RuleBasedCollator::compare( | |
377 | const UnicodeString& source, | |
378 | const UnicodeString& target) const | |
379 | { | |
374ca955 A |
380 | return getEComparisonResult(ucol_strcoll(ucollator, source.getBuffer(), source.length(), |
381 | target.getBuffer(), target.length())); | |
b75a7d8f A |
382 | } |
383 | ||
384 | UCollationResult RuleBasedCollator::compare( | |
385 | const UnicodeString& source, | |
374ca955 | 386 | const UnicodeString& target, |
b75a7d8f A |
387 | UErrorCode &status) const |
388 | { | |
374ca955 A |
389 | if(U_SUCCESS(status)) { |
390 | return ucol_strcoll(ucollator, source.getBuffer(), source.length(), | |
391 | target.getBuffer(), target.length()); | |
392 | } else { | |
393 | return UCOL_EQUAL; | |
394 | } | |
b75a7d8f A |
395 | } |
396 | ||
397 | /** | |
398 | * Retrieve a collation key for the specified string. The key can be compared | |
399 | * with other collation keys using a bitwise comparison (e.g. memcmp) to find | |
400 | * the ordering of their respective source strings. This is handy when doing a | |
401 | * sort, where each sort key must be compared many times. | |
402 | * | |
403 | * The basic algorithm here is to find all of the collation elements for each | |
404 | * character in the source string, convert them to an ASCII representation, and | |
405 | * put them into the collation key. But it's trickier than that. Each | |
406 | * collation element in a string has three components: primary ('A' vs 'B'), | |
374ca955 | 407 | * secondary ('u' vs '\u00FC'), and tertiary ('A' vs 'a'), and a primary difference |
b75a7d8f A |
408 | * at the end of a string takes precedence over a secondary or tertiary |
409 | * difference earlier in the string. | |
410 | * | |
411 | * To account for this, we put all of the primary orders at the beginning of | |
412 | * the string, followed by the secondary and tertiary orders. Each set of | |
413 | * orders is terminated by nulls so that a key for a string which is a initial | |
414 | * substring of another key will compare less without any special case. | |
415 | * | |
416 | * Here's a hypothetical example, with the collation element represented as a | |
417 | * three-digit number, one digit for primary, one for secondary, etc. | |
418 | * | |
374ca955 | 419 | * String: A a B \u00C9 |
b75a7d8f A |
420 | * Collation Elements: 101 100 201 511 |
421 | * Collation Key: 1125<null>0001<null>1011<null> | |
422 | * | |
423 | * To make things even trickier, secondary differences (accent marks) are | |
424 | * compared starting at the *end* of the string in languages with French | |
425 | * secondary ordering. But when comparing the accent marks on a single base | |
426 | * character, they are compared from the beginning. To handle this, we reverse | |
427 | * all of the accents that belong to each base character, then we reverse the | |
428 | * entire string of secondary orderings at the end. | |
429 | */ | |
430 | CollationKey& RuleBasedCollator::getCollationKey( | |
431 | const UnicodeString& source, | |
432 | CollationKey& sortkey, | |
433 | UErrorCode& status) const | |
434 | { | |
374ca955 | 435 | return getCollationKey(source.getBuffer(), source.length(), sortkey, status); |
b75a7d8f A |
436 | } |
437 | ||
438 | CollationKey& RuleBasedCollator::getCollationKey(const UChar* source, | |
439 | int32_t sourceLen, | |
440 | CollationKey& sortkey, | |
441 | UErrorCode& status) const | |
442 | { | |
374ca955 A |
443 | if (U_FAILURE(status)) |
444 | { | |
445 | return sortkey.setToBogus(); | |
446 | } | |
b75a7d8f | 447 | |
374ca955 A |
448 | if ((!source) || (sourceLen == 0)) { |
449 | return sortkey.reset(); | |
450 | } | |
b75a7d8f | 451 | |
374ca955 A |
452 | uint8_t *result; |
453 | int32_t resultLen = ucol_getSortKeyWithAllocation(ucollator, | |
454 | source, sourceLen, | |
455 | &result, | |
456 | &status); | |
457 | sortkey.adopt(result, resultLen); | |
458 | return sortkey; | |
b75a7d8f A |
459 | } |
460 | ||
461 | /** | |
462 | * Return the maximum length of any expansion sequences that end with the | |
463 | * specified comparison order. | |
464 | * @param order a collation order returned by previous or next. | |
465 | * @return the maximum length of any expansion seuences ending with the | |
466 | * specified order or 1 if collation order does not occur at the end of any | |
467 | * expansion sequence. | |
468 | * @see CollationElementIterator#getMaxExpansion | |
469 | */ | |
470 | int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const | |
471 | { | |
374ca955 A |
472 | uint8_t result; |
473 | UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result); | |
474 | return result; | |
b75a7d8f A |
475 | } |
476 | ||
477 | uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length, | |
478 | UErrorCode &status) | |
479 | { | |
374ca955 | 480 | return ucol_cloneRuleData(ucollator, &length, &status); |
b75a7d8f A |
481 | } |
482 | ||
73c04bcf A |
483 | |
484 | int32_t RuleBasedCollator::cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) | |
485 | { | |
486 | return ucol_cloneBinary(ucollator, buffer, capacity, &status); | |
487 | } | |
488 | ||
b75a7d8f A |
489 | void RuleBasedCollator::setAttribute(UColAttribute attr, |
490 | UColAttributeValue value, | |
491 | UErrorCode &status) | |
492 | { | |
374ca955 A |
493 | if (U_FAILURE(status)) |
494 | return; | |
495 | checkOwned(); | |
496 | ucol_setAttribute(ucollator, attr, value, &status); | |
b75a7d8f A |
497 | } |
498 | ||
499 | UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr, | |
500 | UErrorCode &status) | |
501 | { | |
374ca955 A |
502 | if (U_FAILURE(status)) |
503 | return UCOL_DEFAULT; | |
504 | return ucol_getAttribute(ucollator, attr, &status); | |
b75a7d8f A |
505 | } |
506 | ||
507 | uint32_t RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) { | |
374ca955 A |
508 | checkOwned(); |
509 | return ucol_setVariableTop(ucollator, varTop, len, &status); | |
b75a7d8f A |
510 | } |
511 | ||
512 | uint32_t RuleBasedCollator::setVariableTop(const UnicodeString varTop, UErrorCode &status) { | |
374ca955 A |
513 | checkOwned(); |
514 | return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &status); | |
b75a7d8f A |
515 | } |
516 | ||
517 | void RuleBasedCollator::setVariableTop(const uint32_t varTop, UErrorCode &status) { | |
374ca955 A |
518 | checkOwned(); |
519 | ucol_restoreVariableTop(ucollator, varTop, &status); | |
b75a7d8f A |
520 | } |
521 | ||
522 | uint32_t RuleBasedCollator::getVariableTop(UErrorCode &status) const { | |
523 | return ucol_getVariableTop(ucollator, &status); | |
524 | } | |
525 | ||
526 | Collator* RuleBasedCollator::safeClone(void) | |
527 | { | |
374ca955 A |
528 | UErrorCode intStatus = U_ZERO_ERROR; |
529 | int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE; | |
530 | UCollator *ucol = ucol_safeClone(ucollator, NULL, &buffersize, | |
531 | &intStatus); | |
532 | if (U_FAILURE(intStatus)) { | |
533 | return NULL; | |
534 | } | |
b75a7d8f | 535 | |
73c04bcf | 536 | RuleBasedCollator *result = new RuleBasedCollator(); |
46f4442e A |
537 | // Null pointer check |
538 | if (result != NULL) { | |
539 | result->ucollator = ucol; | |
540 | result->dataIsOwned = TRUE; | |
541 | result->isWriteThroughAlias = FALSE; | |
542 | setRuleStringFromCollator(); | |
543 | } | |
b75a7d8f | 544 | |
374ca955 | 545 | return result; |
b75a7d8f A |
546 | } |
547 | ||
548 | ||
549 | int32_t RuleBasedCollator::getSortKey(const UnicodeString& source, | |
550 | uint8_t *result, int32_t resultLength) | |
551 | const | |
552 | { | |
374ca955 | 553 | return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength); |
b75a7d8f A |
554 | } |
555 | ||
556 | int32_t RuleBasedCollator::getSortKey(const UChar *source, | |
557 | int32_t sourceLength, uint8_t *result, | |
558 | int32_t resultLength) const | |
559 | { | |
374ca955 | 560 | return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength); |
b75a7d8f A |
561 | } |
562 | ||
563 | Collator::ECollationStrength RuleBasedCollator::getStrength(void) const | |
564 | { | |
374ca955 A |
565 | UErrorCode intStatus = U_ZERO_ERROR; |
566 | return getECollationStrength(ucol_getAttribute(ucollator, UCOL_STRENGTH, | |
567 | &intStatus)); | |
b75a7d8f A |
568 | } |
569 | ||
570 | void RuleBasedCollator::setStrength(ECollationStrength newStrength) | |
571 | { | |
374ca955 A |
572 | checkOwned(); |
573 | UErrorCode intStatus = U_ZERO_ERROR; | |
574 | UCollationStrength strength = getUCollationStrength(newStrength); | |
575 | ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus); | |
b75a7d8f A |
576 | } |
577 | ||
578 | /** | |
579 | * Create a hash code for this collation. Just hash the main rule table -- that | |
580 | * should be good enough for almost any use. | |
581 | */ | |
582 | int32_t RuleBasedCollator::hashCode() const | |
583 | { | |
374ca955 A |
584 | int32_t length; |
585 | const UChar *rules = ucol_getRules(ucollator, &length); | |
586 | return uhash_hashUCharsN(rules, length); | |
b75a7d8f A |
587 | } |
588 | ||
589 | /** | |
590 | * return the locale of this collator | |
591 | */ | |
592 | const Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const { | |
374ca955 A |
593 | const char *result = ucol_getLocale(ucollator, type, &status); |
594 | if(result == NULL) { | |
595 | Locale res(""); | |
596 | res.setToBogus(); | |
597 | return res; | |
598 | } else { | |
599 | return Locale(result); | |
600 | } | |
b75a7d8f A |
601 | } |
602 | ||
603 | void | |
46f4442e | 604 | RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) { |
374ca955 | 605 | checkOwned(); |
46f4442e | 606 | char* rloc = uprv_strdup(requestedLocale.getName()); |
b75a7d8f | 607 | if (rloc) { |
46f4442e | 608 | char* vloc = uprv_strdup(validLocale.getName()); |
b75a7d8f | 609 | if (vloc) { |
46f4442e A |
610 | char* aloc = uprv_strdup(actualLocale.getName()); |
611 | if (aloc) { | |
612 | ucol_setReqValidLocales(ucollator, rloc, vloc, aloc); | |
613 | return; | |
614 | } | |
615 | uprv_free(vloc); | |
b75a7d8f A |
616 | } |
617 | uprv_free(rloc); | |
618 | } | |
619 | } | |
620 | ||
621 | // RuleBaseCollatorNew private constructor ---------------------------------- | |
622 | ||
374ca955 | 623 | RuleBasedCollator::RuleBasedCollator() |
73c04bcf | 624 | : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL) |
b75a7d8f | 625 | { |
b75a7d8f A |
626 | } |
627 | ||
628 | RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale, | |
73c04bcf A |
629 | UErrorCode& status) |
630 | : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL) | |
b75a7d8f | 631 | { |
374ca955 A |
632 | if (U_FAILURE(status)) |
633 | return; | |
b75a7d8f | 634 | |
374ca955 A |
635 | /* |
636 | Try to load, in order: | |
637 | 1. The desired locale's collation. | |
638 | 2. A fallback of the desired locale. | |
639 | 3. The default locale's collation. | |
640 | 4. A fallback of the default locale. | |
641 | 5. The default collation rules, which contains en_US collation rules. | |
642 | ||
643 | To reiterate, we try: | |
644 | Specific: | |
645 | language+country+variant | |
646 | language+country | |
647 | language | |
648 | Default: | |
649 | language+country+variant | |
650 | language+country | |
651 | language | |
652 | Root: (aka DEFAULTRULES) | |
653 | steps 1-5 are handled by resource bundle fallback mechanism. | |
654 | however, in a very unprobable situation that no resource bundle | |
655 | data exists, step 5 is repeated with hardcoded default rules. | |
656 | */ | |
657 | ||
658 | setUCollator(desiredLocale, status); | |
659 | ||
660 | if (U_FAILURE(status)) | |
661 | { | |
662 | status = U_ZERO_ERROR; | |
b75a7d8f | 663 | |
374ca955 A |
664 | setUCollator(kRootLocaleName, status); |
665 | if (status == U_ZERO_ERROR) { | |
666 | status = U_USING_DEFAULT_WARNING; | |
667 | } | |
b75a7d8f | 668 | } |
b75a7d8f | 669 | |
374ca955 A |
670 | if (U_SUCCESS(status)) |
671 | { | |
73c04bcf | 672 | setRuleStringFromCollator(); |
b75a7d8f | 673 | } |
b75a7d8f A |
674 | } |
675 | ||
676 | void | |
677 | RuleBasedCollator::setUCollator(const char *locale, | |
678 | UErrorCode &status) | |
679 | { | |
374ca955 A |
680 | if (U_FAILURE(status)) |
681 | return; | |
682 | if (ucollator && dataIsOwned) | |
683 | ucol_close(ucollator); | |
684 | ucollator = ucol_open_internal(locale, &status); | |
685 | dataIsOwned = TRUE; | |
686 | isWriteThroughAlias = FALSE; | |
b75a7d8f A |
687 | } |
688 | ||
689 | ||
690 | void | |
691 | RuleBasedCollator::checkOwned() { | |
374ca955 A |
692 | if (!(dataIsOwned || isWriteThroughAlias)) { |
693 | UErrorCode status = U_ZERO_ERROR; | |
694 | ucollator = ucol_safeClone(ucollator, NULL, NULL, &status); | |
73c04bcf | 695 | setRuleStringFromCollator(); |
374ca955 A |
696 | dataIsOwned = TRUE; |
697 | isWriteThroughAlias = FALSE; | |
698 | } | |
b75a7d8f A |
699 | } |
700 | ||
374ca955 | 701 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator) |
b75a7d8f A |
702 | |
703 | U_NAMESPACE_END | |
704 | ||
705 | #endif /* #if !UCONFIG_NO_COLLATION */ |