]>
Commit | Line | Data |
---|---|---|
b75a7d8f | 1 | /* |
46f4442e | 2 | ****************************************************************************** |
729e4ab9 A |
3 | * Copyright (C) 1996-2010, International Business Machines Corporation and |
4 | * others. All Rights Reserved. | |
46f4442e A |
5 | ****************************************************************************** |
6 | */ | |
b75a7d8f A |
7 | |
8 | /** | |
46f4442e A |
9 | * File tblcoll.cpp |
10 | * | |
11 | * Created by: Helena Shih | |
12 | * | |
13 | * Modification History: | |
14 | * | |
15 | * Date Name Description | |
16 | * 2/5/97 aliu Added streamIn and streamOut methods. Added | |
17 | * constructor which reads RuleBasedCollator object from | |
18 | * a binary file. Added writeToFile method which streams | |
19 | * RuleBasedCollator out to a binary file. The streamIn | |
20 | * and streamOut methods use istream and ostream objects | |
21 | * in binary mode. | |
22 | * 2/11/97 aliu Moved declarations out of for loop initializer. | |
23 | * Added Mac compatibility #ifdef for ios::nocreate. | |
24 | * 2/12/97 aliu Modified to use TableCollationData sub-object to | |
25 | * hold invariant data. | |
26 | * 2/13/97 aliu Moved several methods into this class from Collation. | |
27 | * Added a private RuleBasedCollator(Locale&) constructor, | |
28 | * to be used by Collator::getInstance(). General | |
29 | * clean up. Made use of UErrorCode variables consistent. | |
30 | * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy | |
31 | * constructor and getDynamicClassID. | |
32 | * 3/5/97 aliu Changed compaction cycle to improve performance. We | |
33 | * use the maximum allowable value which is kBlockCount. | |
34 | * Modified getRules() to load rules dynamically. Changed | |
35 | * constructFromFile() call to accomodate this (added | |
36 | * parameter to specify whether binary loading is to | |
37 | * take place). | |
38 | * 05/06/97 helena Added memory allocation error check. | |
39 | * 6/20/97 helena Java class name change. | |
40 | * 6/23/97 helena Adding comments to make code more readable. | |
41 | * 09/03/97 helena Added createCollationKeyValues(). | |
42 | * 06/26/98 erm Changes for CollationKeys using byte arrays. | |
43 | * 08/10/98 erm Synched with 1.2 version of RuleBasedCollator.java | |
44 | * 04/23/99 stephen Removed EDecompositionMode, merged with | |
45 | * Normalizer::EMode | |
46 | * 06/14/99 stephen Removed kResourceBundleSuffix | |
47 | * 06/22/99 stephen Fixed logic in constructFromFile() since .ctx | |
48 | * files are no longer used. | |
49 | * 11/02/99 helena Collator performance enhancements. Special case | |
50 | * for NO_OP situations. | |
51 | * 11/17/99 srl More performance enhancements. Inlined some internal functions. | |
52 | * 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator | |
53 | * to implementation file. | |
54 | * 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h) | |
55 | */ | |
b75a7d8f | 56 | |
729e4ab9 A |
57 | #include <typeinfo> // for 'typeid' to work |
58 | ||
b75a7d8f A |
59 | #include "unicode/utypes.h" |
60 | ||
61 | #if !UCONFIG_NO_COLLATION | |
62 | ||
63 | #include "unicode/tblcoll.h" | |
64 | #include "unicode/coleitr.h" | |
374ca955 | 65 | #include "unicode/ures.h" |
b75a7d8f A |
66 | #include "unicode/uset.h" |
67 | #include "ucol_imp.h" | |
68 | #include "uresimp.h" | |
69 | #include "uhash.h" | |
70 | #include "cmemory.h" | |
71 | #include "cstring.h" | |
374ca955 | 72 | #include "putilimp.h" |
b75a7d8f A |
73 | |
74 | /* public RuleBasedCollator constructor ---------------------------------- */ | |
75 | ||
76 | U_NAMESPACE_BEGIN | |
77 | ||
78 | /** | |
79 | * Copy constructor, aliasing, not write-through | |
80 | */ | |
374ca955 A |
81 | RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that) |
82 | : Collator(that) | |
83 | , dataIsOwned(FALSE) | |
84 | , isWriteThroughAlias(FALSE) | |
73c04bcf | 85 | , ucollator(NULL) |
b75a7d8f | 86 | { |
73c04bcf | 87 | RuleBasedCollator::operator=(that); |
b75a7d8f A |
88 | } |
89 | ||
90 | RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, | |
374ca955 A |
91 | UErrorCode& status) : |
92 | dataIsOwned(FALSE) | |
b75a7d8f | 93 | { |
374ca955 A |
94 | construct(rules, |
95 | UCOL_DEFAULT_STRENGTH, | |
96 | UCOL_DEFAULT, | |
97 | status); | |
b75a7d8f A |
98 | } |
99 | ||
100 | RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, | |
374ca955 A |
101 | ECollationStrength collationStrength, |
102 | UErrorCode& status) : dataIsOwned(FALSE) | |
b75a7d8f | 103 | { |
374ca955 A |
104 | construct(rules, |
105 | getUCollationStrength(collationStrength), | |
106 | UCOL_DEFAULT, | |
107 | status); | |
b75a7d8f A |
108 | } |
109 | ||
110 | RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, | |
111 | UColAttributeValue decompositionMode, | |
112 | UErrorCode& status) : | |
374ca955 | 113 | dataIsOwned(FALSE) |
b75a7d8f | 114 | { |
374ca955 A |
115 | construct(rules, |
116 | UCOL_DEFAULT_STRENGTH, | |
117 | decompositionMode, | |
118 | status); | |
b75a7d8f A |
119 | } |
120 | ||
121 | RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, | |
374ca955 A |
122 | ECollationStrength collationStrength, |
123 | UColAttributeValue decompositionMode, | |
124 | UErrorCode& status) : dataIsOwned(FALSE) | |
b75a7d8f | 125 | { |
374ca955 A |
126 | construct(rules, |
127 | getUCollationStrength(collationStrength), | |
128 | decompositionMode, | |
129 | status); | |
b75a7d8f | 130 | } |
729e4ab9 A |
131 | RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length, |
132 | const RuleBasedCollator *base, | |
73c04bcf A |
133 | UErrorCode &status) : |
134 | dataIsOwned(TRUE), | |
135 | isWriteThroughAlias(FALSE) | |
136 | { | |
137 | ucollator = ucol_openBinary(bin, length, base->ucollator, &status); | |
138 | } | |
b75a7d8f A |
139 | |
140 | void | |
73c04bcf | 141 | RuleBasedCollator::setRuleStringFromCollator() |
b75a7d8f | 142 | { |
73c04bcf A |
143 | int32_t length; |
144 | const UChar *r = ucol_getRules(ucollator, &length); | |
374ca955 | 145 | |
73c04bcf A |
146 | if (r && length > 0) { |
147 | // alias the rules string | |
148 | urulestring.setTo(TRUE, r, length); | |
149 | } | |
150 | else { | |
151 | urulestring.truncate(0); // Clear string. | |
b75a7d8f | 152 | } |
b75a7d8f A |
153 | } |
154 | ||
155 | // not aliasing, not write-through | |
156 | void | |
157 | RuleBasedCollator::construct(const UnicodeString& rules, | |
158 | UColAttributeValue collationStrength, | |
159 | UColAttributeValue decompositionMode, | |
160 | UErrorCode& status) | |
161 | { | |
374ca955 A |
162 | ucollator = ucol_openRules(rules.getBuffer(), rules.length(), |
163 | decompositionMode, collationStrength, | |
164 | NULL, &status); | |
b75a7d8f | 165 | |
374ca955 A |
166 | dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it |
167 | isWriteThroughAlias = FALSE; | |
b75a7d8f | 168 | |
73c04bcf A |
169 | if(ucollator == NULL) { |
170 | if(U_SUCCESS(status)) { | |
171 | status = U_MEMORY_ALLOCATION_ERROR; | |
172 | } | |
173 | return; // Failure | |
174 | } | |
175 | ||
176 | setRuleStringFromCollator(); | |
b75a7d8f A |
177 | } |
178 | ||
179 | /* RuleBasedCollator public destructor ----------------------------------- */ | |
180 | ||
181 | RuleBasedCollator::~RuleBasedCollator() | |
182 | { | |
374ca955 A |
183 | if (dataIsOwned) |
184 | { | |
185 | ucol_close(ucollator); | |
374ca955 A |
186 | } |
187 | ucollator = 0; | |
b75a7d8f A |
188 | } |
189 | ||
190 | /* RuleBaseCollator public methods --------------------------------------- */ | |
191 | ||
192 | UBool RuleBasedCollator::operator==(const Collator& that) const | |
193 | { | |
194 | /* only checks for address equals here */ | |
195 | if (Collator::operator==(that)) | |
196 | return TRUE; | |
197 | ||
729e4ab9 | 198 | if (typeid(*this) != typeid(that)) |
b75a7d8f A |
199 | return FALSE; /* not the same class */ |
200 | ||
201 | RuleBasedCollator& thatAlias = (RuleBasedCollator&)that; | |
202 | ||
203 | // weiv: use C function, commented code below is wrong | |
204 | return ucol_equals(this->ucollator, thatAlias.ucollator); | |
205 | /* | |
206 | synwee : orginal code does not check for data compatibility | |
207 | */ | |
208 | /* | |
209 | if (ucollator != thatAlias.ucollator) | |
210 | return FALSE; | |
211 | ||
212 | return TRUE; | |
213 | */ | |
214 | } | |
215 | ||
374ca955 A |
216 | UBool RuleBasedCollator::operator!=(const Collator& other) const |
217 | { | |
218 | return !(*this == other); | |
219 | } | |
220 | ||
b75a7d8f A |
221 | // aliasing, not write-through |
222 | RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that) | |
223 | { | |
374ca955 | 224 | if (this != &that) |
b75a7d8f | 225 | { |
374ca955 A |
226 | if (dataIsOwned) |
227 | { | |
228 | ucol_close(ucollator); | |
374ca955 | 229 | } |
b75a7d8f | 230 | |
73c04bcf A |
231 | urulestring.truncate(0); // empty the rule string |
232 | dataIsOwned = TRUE; | |
374ca955 | 233 | isWriteThroughAlias = FALSE; |
73c04bcf A |
234 | |
235 | UErrorCode intStatus = U_ZERO_ERROR; | |
236 | int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE; | |
237 | ucollator = ucol_safeClone(that.ucollator, NULL, &buffersize, | |
238 | &intStatus); | |
239 | if (U_SUCCESS(intStatus)) { | |
240 | setRuleStringFromCollator(); | |
241 | } | |
374ca955 A |
242 | } |
243 | return *this; | |
b75a7d8f A |
244 | } |
245 | ||
246 | // aliasing, not write-through | |
247 | Collator* RuleBasedCollator::clone() const | |
248 | { | |
73c04bcf | 249 | return new RuleBasedCollator(*this); |
b75a7d8f A |
250 | } |
251 | ||
252 | CollationElementIterator* RuleBasedCollator::createCollationElementIterator | |
253 | (const UnicodeString& source) const | |
254 | { | |
374ca955 A |
255 | UErrorCode status = U_ZERO_ERROR; |
256 | CollationElementIterator *result = new CollationElementIterator(source, this, | |
257 | status); | |
258 | if (U_FAILURE(status)) { | |
259 | delete result; | |
260 | return NULL; | |
261 | } | |
b75a7d8f | 262 | |
374ca955 | 263 | return result; |
b75a7d8f A |
264 | } |
265 | ||
266 | /** | |
267 | * Create a CollationElementIterator object that will iterate over the | |
268 | * elements in a string, using the collation rules defined in this | |
269 | * RuleBasedCollator | |
270 | */ | |
271 | CollationElementIterator* RuleBasedCollator::createCollationElementIterator | |
272 | (const CharacterIterator& source) const | |
273 | { | |
374ca955 A |
274 | UErrorCode status = U_ZERO_ERROR; |
275 | CollationElementIterator *result = new CollationElementIterator(source, this, | |
276 | status); | |
b75a7d8f | 277 | |
374ca955 A |
278 | if (U_FAILURE(status)) { |
279 | delete result; | |
280 | return NULL; | |
281 | } | |
b75a7d8f | 282 | |
374ca955 | 283 | return result; |
b75a7d8f A |
284 | } |
285 | ||
286 | /** | |
287 | * Return a string representation of this collator's rules. The string can | |
288 | * later be passed to the constructor that takes a UnicodeString argument, | |
289 | * which will construct a collator that's functionally identical to this one. | |
290 | * You can also allow users to edit the string in order to change the collation | |
291 | * data, or you can print it out for inspection, or whatever. | |
292 | */ | |
293 | const UnicodeString& RuleBasedCollator::getRules() const | |
294 | { | |
73c04bcf | 295 | return urulestring; |
b75a7d8f A |
296 | } |
297 | ||
298 | void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) | |
299 | { | |
300 | int32_t rulesize = ucol_getRulesEx(ucollator, delta, NULL, -1); | |
301 | ||
302 | if (rulesize > 0) { | |
303 | UChar *rules = (UChar*) uprv_malloc( sizeof(UChar) * (rulesize) ); | |
304 | if(rules != NULL) { | |
374ca955 A |
305 | ucol_getRulesEx(ucollator, delta, rules, rulesize); |
306 | buffer.setTo(rules, rulesize); | |
307 | uprv_free(rules); | |
729e4ab9 | 308 | } else { // couldn't allocate |
374ca955 | 309 | buffer.remove(); |
b75a7d8f A |
310 | } |
311 | } | |
312 | else { | |
313 | buffer.remove(); | |
314 | } | |
315 | } | |
316 | ||
317 | UnicodeSet * | |
318 | RuleBasedCollator::getTailoredSet(UErrorCode &status) const | |
319 | { | |
374ca955 A |
320 | if(U_FAILURE(status)) { |
321 | return NULL; | |
322 | } | |
323 | return (UnicodeSet *)ucol_getTailoredSet(this->ucollator, &status); | |
b75a7d8f A |
324 | } |
325 | ||
326 | ||
327 | void RuleBasedCollator::getVersion(UVersionInfo versionInfo) const | |
328 | { | |
329 | if (versionInfo!=NULL){ | |
330 | ucol_getVersion(ucollator, versionInfo); | |
331 | } | |
332 | } | |
333 | ||
334 | Collator::EComparisonResult RuleBasedCollator::compare( | |
335 | const UnicodeString& source, | |
336 | const UnicodeString& target, | |
337 | int32_t length) const | |
338 | { | |
374ca955 A |
339 | UErrorCode status = U_ZERO_ERROR; |
340 | return getEComparisonResult(compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status)); | |
b75a7d8f A |
341 | } |
342 | ||
343 | UCollationResult RuleBasedCollator::compare( | |
344 | const UnicodeString& source, | |
345 | const UnicodeString& target, | |
374ca955 | 346 | int32_t length, |
b75a7d8f A |
347 | UErrorCode &status) const |
348 | { | |
374ca955 | 349 | return compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status); |
b75a7d8f A |
350 | } |
351 | ||
352 | Collator::EComparisonResult RuleBasedCollator::compare(const UChar* source, | |
353 | int32_t sourceLength, | |
354 | const UChar* target, | |
355 | int32_t targetLength) | |
356 | const | |
357 | { | |
374ca955 A |
358 | return getEComparisonResult(ucol_strcoll(ucollator, source, sourceLength, |
359 | target, targetLength)); | |
b75a7d8f A |
360 | } |
361 | ||
362 | UCollationResult RuleBasedCollator::compare(const UChar* source, | |
363 | int32_t sourceLength, | |
364 | const UChar* target, | |
374ca955 | 365 | int32_t targetLength, |
b75a7d8f A |
366 | UErrorCode &status) const |
367 | { | |
374ca955 A |
368 | if(U_SUCCESS(status)) { |
369 | return ucol_strcoll(ucollator, source, sourceLength, target, targetLength); | |
370 | } else { | |
371 | return UCOL_EQUAL; | |
372 | } | |
b75a7d8f A |
373 | } |
374 | ||
375 | /** | |
376 | * Compare two strings using this collator | |
377 | */ | |
378 | Collator::EComparisonResult RuleBasedCollator::compare( | |
379 | const UnicodeString& source, | |
380 | const UnicodeString& target) const | |
381 | { | |
374ca955 A |
382 | return getEComparisonResult(ucol_strcoll(ucollator, source.getBuffer(), source.length(), |
383 | target.getBuffer(), target.length())); | |
b75a7d8f A |
384 | } |
385 | ||
386 | UCollationResult RuleBasedCollator::compare( | |
387 | const UnicodeString& source, | |
374ca955 | 388 | const UnicodeString& target, |
b75a7d8f A |
389 | UErrorCode &status) const |
390 | { | |
374ca955 A |
391 | if(U_SUCCESS(status)) { |
392 | return ucol_strcoll(ucollator, source.getBuffer(), source.length(), | |
393 | target.getBuffer(), target.length()); | |
394 | } else { | |
395 | return UCOL_EQUAL; | |
396 | } | |
b75a7d8f A |
397 | } |
398 | ||
729e4ab9 A |
399 | UCollationResult RuleBasedCollator::compare(UCharIterator &sIter, |
400 | UCharIterator &tIter, | |
401 | UErrorCode &status) const { | |
402 | if(U_SUCCESS(status)) { | |
403 | return ucol_strcollIter(ucollator, &sIter, &tIter, &status); | |
404 | } else { | |
405 | return UCOL_EQUAL; | |
406 | } | |
407 | } | |
408 | ||
b75a7d8f A |
409 | /** |
410 | * Retrieve a collation key for the specified string. The key can be compared | |
411 | * with other collation keys using a bitwise comparison (e.g. memcmp) to find | |
412 | * the ordering of their respective source strings. This is handy when doing a | |
413 | * sort, where each sort key must be compared many times. | |
414 | * | |
415 | * The basic algorithm here is to find all of the collation elements for each | |
416 | * character in the source string, convert them to an ASCII representation, and | |
417 | * put them into the collation key. But it's trickier than that. Each | |
418 | * collation element in a string has three components: primary ('A' vs 'B'), | |
374ca955 | 419 | * secondary ('u' vs '\u00FC'), and tertiary ('A' vs 'a'), and a primary difference |
b75a7d8f A |
420 | * at the end of a string takes precedence over a secondary or tertiary |
421 | * difference earlier in the string. | |
422 | * | |
423 | * To account for this, we put all of the primary orders at the beginning of | |
424 | * the string, followed by the secondary and tertiary orders. Each set of | |
425 | * orders is terminated by nulls so that a key for a string which is a initial | |
426 | * substring of another key will compare less without any special case. | |
427 | * | |
428 | * Here's a hypothetical example, with the collation element represented as a | |
429 | * three-digit number, one digit for primary, one for secondary, etc. | |
430 | * | |
374ca955 | 431 | * String: A a B \u00C9 |
b75a7d8f A |
432 | * Collation Elements: 101 100 201 511 |
433 | * Collation Key: 1125<null>0001<null>1011<null> | |
434 | * | |
435 | * To make things even trickier, secondary differences (accent marks) are | |
436 | * compared starting at the *end* of the string in languages with French | |
437 | * secondary ordering. But when comparing the accent marks on a single base | |
438 | * character, they are compared from the beginning. To handle this, we reverse | |
439 | * all of the accents that belong to each base character, then we reverse the | |
440 | * entire string of secondary orderings at the end. | |
441 | */ | |
442 | CollationKey& RuleBasedCollator::getCollationKey( | |
443 | const UnicodeString& source, | |
444 | CollationKey& sortkey, | |
445 | UErrorCode& status) const | |
446 | { | |
374ca955 | 447 | return getCollationKey(source.getBuffer(), source.length(), sortkey, status); |
b75a7d8f A |
448 | } |
449 | ||
450 | CollationKey& RuleBasedCollator::getCollationKey(const UChar* source, | |
451 | int32_t sourceLen, | |
452 | CollationKey& sortkey, | |
453 | UErrorCode& status) const | |
454 | { | |
374ca955 A |
455 | if (U_FAILURE(status)) |
456 | { | |
457 | return sortkey.setToBogus(); | |
458 | } | |
b75a7d8f | 459 | |
374ca955 A |
460 | if ((!source) || (sourceLen == 0)) { |
461 | return sortkey.reset(); | |
462 | } | |
b75a7d8f | 463 | |
374ca955 A |
464 | uint8_t *result; |
465 | int32_t resultLen = ucol_getSortKeyWithAllocation(ucollator, | |
466 | source, sourceLen, | |
467 | &result, | |
468 | &status); | |
469 | sortkey.adopt(result, resultLen); | |
470 | return sortkey; | |
b75a7d8f A |
471 | } |
472 | ||
473 | /** | |
474 | * Return the maximum length of any expansion sequences that end with the | |
475 | * specified comparison order. | |
476 | * @param order a collation order returned by previous or next. | |
477 | * @return the maximum length of any expansion seuences ending with the | |
478 | * specified order or 1 if collation order does not occur at the end of any | |
479 | * expansion sequence. | |
480 | * @see CollationElementIterator#getMaxExpansion | |
481 | */ | |
482 | int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const | |
483 | { | |
374ca955 A |
484 | uint8_t result; |
485 | UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result); | |
486 | return result; | |
b75a7d8f A |
487 | } |
488 | ||
489 | uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length, | |
490 | UErrorCode &status) | |
491 | { | |
374ca955 | 492 | return ucol_cloneRuleData(ucollator, &length, &status); |
b75a7d8f A |
493 | } |
494 | ||
73c04bcf A |
495 | |
496 | int32_t RuleBasedCollator::cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) | |
497 | { | |
498 | return ucol_cloneBinary(ucollator, buffer, capacity, &status); | |
499 | } | |
500 | ||
b75a7d8f A |
501 | void RuleBasedCollator::setAttribute(UColAttribute attr, |
502 | UColAttributeValue value, | |
503 | UErrorCode &status) | |
504 | { | |
374ca955 A |
505 | if (U_FAILURE(status)) |
506 | return; | |
507 | checkOwned(); | |
508 | ucol_setAttribute(ucollator, attr, value, &status); | |
b75a7d8f A |
509 | } |
510 | ||
511 | UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr, | |
512 | UErrorCode &status) | |
513 | { | |
374ca955 A |
514 | if (U_FAILURE(status)) |
515 | return UCOL_DEFAULT; | |
516 | return ucol_getAttribute(ucollator, attr, &status); | |
b75a7d8f A |
517 | } |
518 | ||
519 | uint32_t RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) { | |
374ca955 A |
520 | checkOwned(); |
521 | return ucol_setVariableTop(ucollator, varTop, len, &status); | |
b75a7d8f A |
522 | } |
523 | ||
524 | uint32_t RuleBasedCollator::setVariableTop(const UnicodeString varTop, UErrorCode &status) { | |
374ca955 A |
525 | checkOwned(); |
526 | return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &status); | |
b75a7d8f A |
527 | } |
528 | ||
529 | void RuleBasedCollator::setVariableTop(const uint32_t varTop, UErrorCode &status) { | |
374ca955 A |
530 | checkOwned(); |
531 | ucol_restoreVariableTop(ucollator, varTop, &status); | |
b75a7d8f A |
532 | } |
533 | ||
534 | uint32_t RuleBasedCollator::getVariableTop(UErrorCode &status) const { | |
535 | return ucol_getVariableTop(ucollator, &status); | |
536 | } | |
537 | ||
538 | Collator* RuleBasedCollator::safeClone(void) | |
539 | { | |
374ca955 A |
540 | UErrorCode intStatus = U_ZERO_ERROR; |
541 | int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE; | |
542 | UCollator *ucol = ucol_safeClone(ucollator, NULL, &buffersize, | |
543 | &intStatus); | |
544 | if (U_FAILURE(intStatus)) { | |
545 | return NULL; | |
546 | } | |
b75a7d8f | 547 | |
73c04bcf | 548 | RuleBasedCollator *result = new RuleBasedCollator(); |
46f4442e A |
549 | // Null pointer check |
550 | if (result != NULL) { | |
551 | result->ucollator = ucol; | |
552 | result->dataIsOwned = TRUE; | |
553 | result->isWriteThroughAlias = FALSE; | |
554 | setRuleStringFromCollator(); | |
555 | } | |
b75a7d8f | 556 | |
374ca955 | 557 | return result; |
b75a7d8f A |
558 | } |
559 | ||
560 | ||
561 | int32_t RuleBasedCollator::getSortKey(const UnicodeString& source, | |
562 | uint8_t *result, int32_t resultLength) | |
563 | const | |
564 | { | |
374ca955 | 565 | return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength); |
b75a7d8f A |
566 | } |
567 | ||
568 | int32_t RuleBasedCollator::getSortKey(const UChar *source, | |
569 | int32_t sourceLength, uint8_t *result, | |
570 | int32_t resultLength) const | |
571 | { | |
374ca955 | 572 | return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength); |
b75a7d8f A |
573 | } |
574 | ||
575 | Collator::ECollationStrength RuleBasedCollator::getStrength(void) const | |
576 | { | |
374ca955 A |
577 | UErrorCode intStatus = U_ZERO_ERROR; |
578 | return getECollationStrength(ucol_getAttribute(ucollator, UCOL_STRENGTH, | |
579 | &intStatus)); | |
b75a7d8f A |
580 | } |
581 | ||
582 | void RuleBasedCollator::setStrength(ECollationStrength newStrength) | |
583 | { | |
374ca955 A |
584 | checkOwned(); |
585 | UErrorCode intStatus = U_ZERO_ERROR; | |
586 | UCollationStrength strength = getUCollationStrength(newStrength); | |
587 | ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus); | |
b75a7d8f A |
588 | } |
589 | ||
729e4ab9 A |
590 | int32_t RuleBasedCollator::getReorderCodes(int32_t *dest, |
591 | int32_t destCapacity, | |
592 | UErrorCode& status) const | |
593 | { | |
594 | return ucol_getReorderCodes(ucollator, dest, destCapacity, &status); | |
595 | } | |
596 | ||
597 | void RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, | |
598 | int32_t reorderCodesLength, | |
599 | UErrorCode& status) | |
600 | { | |
601 | ucol_setReorderCodes(ucollator, reorderCodes, reorderCodesLength, &status); | |
602 | } | |
603 | ||
604 | ||
b75a7d8f A |
605 | /** |
606 | * Create a hash code for this collation. Just hash the main rule table -- that | |
607 | * should be good enough for almost any use. | |
608 | */ | |
609 | int32_t RuleBasedCollator::hashCode() const | |
610 | { | |
374ca955 A |
611 | int32_t length; |
612 | const UChar *rules = ucol_getRules(ucollator, &length); | |
613 | return uhash_hashUCharsN(rules, length); | |
b75a7d8f A |
614 | } |
615 | ||
616 | /** | |
617 | * return the locale of this collator | |
618 | */ | |
619 | const Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const { | |
729e4ab9 | 620 | const char *result = ucol_getLocaleByType(ucollator, type, &status); |
374ca955 A |
621 | if(result == NULL) { |
622 | Locale res(""); | |
623 | res.setToBogus(); | |
624 | return res; | |
625 | } else { | |
626 | return Locale(result); | |
627 | } | |
b75a7d8f A |
628 | } |
629 | ||
630 | void | |
46f4442e | 631 | RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) { |
374ca955 | 632 | checkOwned(); |
46f4442e | 633 | char* rloc = uprv_strdup(requestedLocale.getName()); |
b75a7d8f | 634 | if (rloc) { |
46f4442e | 635 | char* vloc = uprv_strdup(validLocale.getName()); |
b75a7d8f | 636 | if (vloc) { |
46f4442e A |
637 | char* aloc = uprv_strdup(actualLocale.getName()); |
638 | if (aloc) { | |
639 | ucol_setReqValidLocales(ucollator, rloc, vloc, aloc); | |
640 | return; | |
641 | } | |
642 | uprv_free(vloc); | |
b75a7d8f A |
643 | } |
644 | uprv_free(rloc); | |
645 | } | |
646 | } | |
647 | ||
648 | // RuleBaseCollatorNew private constructor ---------------------------------- | |
649 | ||
374ca955 | 650 | RuleBasedCollator::RuleBasedCollator() |
73c04bcf | 651 | : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL) |
b75a7d8f | 652 | { |
b75a7d8f A |
653 | } |
654 | ||
655 | RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale, | |
73c04bcf A |
656 | UErrorCode& status) |
657 | : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL) | |
b75a7d8f | 658 | { |
374ca955 A |
659 | if (U_FAILURE(status)) |
660 | return; | |
b75a7d8f | 661 | |
374ca955 A |
662 | /* |
663 | Try to load, in order: | |
664 | 1. The desired locale's collation. | |
665 | 2. A fallback of the desired locale. | |
666 | 3. The default locale's collation. | |
667 | 4. A fallback of the default locale. | |
668 | 5. The default collation rules, which contains en_US collation rules. | |
669 | ||
670 | To reiterate, we try: | |
671 | Specific: | |
672 | language+country+variant | |
673 | language+country | |
674 | language | |
675 | Default: | |
676 | language+country+variant | |
677 | language+country | |
678 | language | |
679 | Root: (aka DEFAULTRULES) | |
680 | steps 1-5 are handled by resource bundle fallback mechanism. | |
681 | however, in a very unprobable situation that no resource bundle | |
682 | data exists, step 5 is repeated with hardcoded default rules. | |
683 | */ | |
684 | ||
685 | setUCollator(desiredLocale, status); | |
686 | ||
687 | if (U_FAILURE(status)) | |
688 | { | |
689 | status = U_ZERO_ERROR; | |
b75a7d8f | 690 | |
374ca955 A |
691 | setUCollator(kRootLocaleName, status); |
692 | if (status == U_ZERO_ERROR) { | |
693 | status = U_USING_DEFAULT_WARNING; | |
694 | } | |
b75a7d8f | 695 | } |
b75a7d8f | 696 | |
374ca955 A |
697 | if (U_SUCCESS(status)) |
698 | { | |
73c04bcf | 699 | setRuleStringFromCollator(); |
b75a7d8f | 700 | } |
b75a7d8f A |
701 | } |
702 | ||
729e4ab9 | 703 | void |
b75a7d8f A |
704 | RuleBasedCollator::setUCollator(const char *locale, |
705 | UErrorCode &status) | |
706 | { | |
374ca955 A |
707 | if (U_FAILURE(status)) |
708 | return; | |
709 | if (ucollator && dataIsOwned) | |
710 | ucol_close(ucollator); | |
711 | ucollator = ucol_open_internal(locale, &status); | |
712 | dataIsOwned = TRUE; | |
713 | isWriteThroughAlias = FALSE; | |
b75a7d8f A |
714 | } |
715 | ||
716 | ||
717 | void | |
718 | RuleBasedCollator::checkOwned() { | |
374ca955 A |
719 | if (!(dataIsOwned || isWriteThroughAlias)) { |
720 | UErrorCode status = U_ZERO_ERROR; | |
721 | ucollator = ucol_safeClone(ucollator, NULL, NULL, &status); | |
73c04bcf | 722 | setRuleStringFromCollator(); |
374ca955 A |
723 | dataIsOwned = TRUE; |
724 | isWriteThroughAlias = FALSE; | |
725 | } | |
b75a7d8f A |
726 | } |
727 | ||
374ca955 | 728 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator) |
b75a7d8f A |
729 | |
730 | U_NAMESPACE_END | |
731 | ||
732 | #endif /* #if !UCONFIG_NO_COLLATION */ |