]>
Commit | Line | Data |
---|---|---|
b75a7d8f | 1 | /* |
46f4442e | 2 | ****************************************************************************** |
4388f060 | 3 | * Copyright (C) 1996-2012, International Business Machines Corporation and |
729e4ab9 | 4 | * others. All Rights Reserved. |
46f4442e A |
5 | ****************************************************************************** |
6 | */ | |
b75a7d8f A |
7 | |
8 | /** | |
46f4442e A |
9 | * File tblcoll.cpp |
10 | * | |
11 | * Created by: Helena Shih | |
12 | * | |
13 | * Modification History: | |
14 | * | |
15 | * Date Name Description | |
16 | * 2/5/97 aliu Added streamIn and streamOut methods. Added | |
17 | * constructor which reads RuleBasedCollator object from | |
18 | * a binary file. Added writeToFile method which streams | |
19 | * RuleBasedCollator out to a binary file. The streamIn | |
20 | * and streamOut methods use istream and ostream objects | |
21 | * in binary mode. | |
22 | * 2/11/97 aliu Moved declarations out of for loop initializer. | |
23 | * Added Mac compatibility #ifdef for ios::nocreate. | |
24 | * 2/12/97 aliu Modified to use TableCollationData sub-object to | |
25 | * hold invariant data. | |
26 | * 2/13/97 aliu Moved several methods into this class from Collation. | |
27 | * Added a private RuleBasedCollator(Locale&) constructor, | |
28 | * to be used by Collator::getInstance(). General | |
29 | * clean up. Made use of UErrorCode variables consistent. | |
30 | * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy | |
31 | * constructor and getDynamicClassID. | |
32 | * 3/5/97 aliu Changed compaction cycle to improve performance. We | |
33 | * use the maximum allowable value which is kBlockCount. | |
34 | * Modified getRules() to load rules dynamically. Changed | |
35 | * constructFromFile() call to accomodate this (added | |
36 | * parameter to specify whether binary loading is to | |
37 | * take place). | |
38 | * 05/06/97 helena Added memory allocation error check. | |
39 | * 6/20/97 helena Java class name change. | |
40 | * 6/23/97 helena Adding comments to make code more readable. | |
41 | * 09/03/97 helena Added createCollationKeyValues(). | |
42 | * 06/26/98 erm Changes for CollationKeys using byte arrays. | |
43 | * 08/10/98 erm Synched with 1.2 version of RuleBasedCollator.java | |
44 | * 04/23/99 stephen Removed EDecompositionMode, merged with | |
45 | * Normalizer::EMode | |
46 | * 06/14/99 stephen Removed kResourceBundleSuffix | |
47 | * 06/22/99 stephen Fixed logic in constructFromFile() since .ctx | |
48 | * files are no longer used. | |
49 | * 11/02/99 helena Collator performance enhancements. Special case | |
50 | * for NO_OP situations. | |
51 | * 11/17/99 srl More performance enhancements. Inlined some internal functions. | |
52 | * 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator | |
53 | * to implementation file. | |
54 | * 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h) | |
55 | */ | |
b75a7d8f | 56 | |
729e4ab9 A |
57 | #include <typeinfo> // for 'typeid' to work |
58 | ||
b75a7d8f A |
59 | #include "unicode/utypes.h" |
60 | ||
61 | #if !UCONFIG_NO_COLLATION | |
62 | ||
63 | #include "unicode/tblcoll.h" | |
64 | #include "unicode/coleitr.h" | |
374ca955 | 65 | #include "unicode/ures.h" |
b75a7d8f A |
66 | #include "unicode/uset.h" |
67 | #include "ucol_imp.h" | |
68 | #include "uresimp.h" | |
69 | #include "uhash.h" | |
70 | #include "cmemory.h" | |
71 | #include "cstring.h" | |
374ca955 | 72 | #include "putilimp.h" |
4388f060 | 73 | #include "ustr_imp.h" |
b75a7d8f A |
74 | |
75 | /* public RuleBasedCollator constructor ---------------------------------- */ | |
76 | ||
77 | U_NAMESPACE_BEGIN | |
78 | ||
79 | /** | |
80 | * Copy constructor, aliasing, not write-through | |
81 | */ | |
374ca955 A |
82 | RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that) |
83 | : Collator(that) | |
84 | , dataIsOwned(FALSE) | |
85 | , isWriteThroughAlias(FALSE) | |
73c04bcf | 86 | , ucollator(NULL) |
b75a7d8f | 87 | { |
73c04bcf | 88 | RuleBasedCollator::operator=(that); |
b75a7d8f A |
89 | } |
90 | ||
91 | RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, | |
374ca955 A |
92 | UErrorCode& status) : |
93 | dataIsOwned(FALSE) | |
b75a7d8f | 94 | { |
374ca955 A |
95 | construct(rules, |
96 | UCOL_DEFAULT_STRENGTH, | |
97 | UCOL_DEFAULT, | |
98 | status); | |
b75a7d8f A |
99 | } |
100 | ||
101 | RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, | |
374ca955 A |
102 | ECollationStrength collationStrength, |
103 | UErrorCode& status) : dataIsOwned(FALSE) | |
b75a7d8f | 104 | { |
374ca955 A |
105 | construct(rules, |
106 | getUCollationStrength(collationStrength), | |
107 | UCOL_DEFAULT, | |
108 | status); | |
b75a7d8f A |
109 | } |
110 | ||
111 | RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, | |
112 | UColAttributeValue decompositionMode, | |
113 | UErrorCode& status) : | |
374ca955 | 114 | dataIsOwned(FALSE) |
b75a7d8f | 115 | { |
374ca955 A |
116 | construct(rules, |
117 | UCOL_DEFAULT_STRENGTH, | |
118 | decompositionMode, | |
119 | status); | |
b75a7d8f A |
120 | } |
121 | ||
122 | RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, | |
374ca955 A |
123 | ECollationStrength collationStrength, |
124 | UColAttributeValue decompositionMode, | |
125 | UErrorCode& status) : dataIsOwned(FALSE) | |
b75a7d8f | 126 | { |
374ca955 A |
127 | construct(rules, |
128 | getUCollationStrength(collationStrength), | |
129 | decompositionMode, | |
130 | status); | |
b75a7d8f | 131 | } |
729e4ab9 A |
132 | RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length, |
133 | const RuleBasedCollator *base, | |
73c04bcf A |
134 | UErrorCode &status) : |
135 | dataIsOwned(TRUE), | |
136 | isWriteThroughAlias(FALSE) | |
137 | { | |
138 | ucollator = ucol_openBinary(bin, length, base->ucollator, &status); | |
139 | } | |
b75a7d8f A |
140 | |
141 | void | |
73c04bcf | 142 | RuleBasedCollator::setRuleStringFromCollator() |
b75a7d8f | 143 | { |
73c04bcf A |
144 | int32_t length; |
145 | const UChar *r = ucol_getRules(ucollator, &length); | |
374ca955 | 146 | |
73c04bcf A |
147 | if (r && length > 0) { |
148 | // alias the rules string | |
149 | urulestring.setTo(TRUE, r, length); | |
150 | } | |
151 | else { | |
152 | urulestring.truncate(0); // Clear string. | |
b75a7d8f | 153 | } |
b75a7d8f A |
154 | } |
155 | ||
156 | // not aliasing, not write-through | |
157 | void | |
158 | RuleBasedCollator::construct(const UnicodeString& rules, | |
159 | UColAttributeValue collationStrength, | |
160 | UColAttributeValue decompositionMode, | |
161 | UErrorCode& status) | |
162 | { | |
374ca955 A |
163 | ucollator = ucol_openRules(rules.getBuffer(), rules.length(), |
164 | decompositionMode, collationStrength, | |
165 | NULL, &status); | |
b75a7d8f | 166 | |
374ca955 A |
167 | dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it |
168 | isWriteThroughAlias = FALSE; | |
b75a7d8f | 169 | |
73c04bcf A |
170 | if(ucollator == NULL) { |
171 | if(U_SUCCESS(status)) { | |
172 | status = U_MEMORY_ALLOCATION_ERROR; | |
173 | } | |
174 | return; // Failure | |
175 | } | |
176 | ||
177 | setRuleStringFromCollator(); | |
b75a7d8f A |
178 | } |
179 | ||
180 | /* RuleBasedCollator public destructor ----------------------------------- */ | |
181 | ||
182 | RuleBasedCollator::~RuleBasedCollator() | |
183 | { | |
374ca955 A |
184 | if (dataIsOwned) |
185 | { | |
186 | ucol_close(ucollator); | |
374ca955 A |
187 | } |
188 | ucollator = 0; | |
b75a7d8f A |
189 | } |
190 | ||
191 | /* RuleBaseCollator public methods --------------------------------------- */ | |
192 | ||
193 | UBool RuleBasedCollator::operator==(const Collator& that) const | |
194 | { | |
195 | /* only checks for address equals here */ | |
196 | if (Collator::operator==(that)) | |
197 | return TRUE; | |
198 | ||
729e4ab9 | 199 | if (typeid(*this) != typeid(that)) |
b75a7d8f A |
200 | return FALSE; /* not the same class */ |
201 | ||
202 | RuleBasedCollator& thatAlias = (RuleBasedCollator&)that; | |
203 | ||
204 | // weiv: use C function, commented code below is wrong | |
205 | return ucol_equals(this->ucollator, thatAlias.ucollator); | |
206 | /* | |
207 | synwee : orginal code does not check for data compatibility | |
208 | */ | |
209 | /* | |
210 | if (ucollator != thatAlias.ucollator) | |
211 | return FALSE; | |
212 | ||
213 | return TRUE; | |
214 | */ | |
215 | } | |
216 | ||
374ca955 A |
217 | UBool RuleBasedCollator::operator!=(const Collator& other) const |
218 | { | |
219 | return !(*this == other); | |
220 | } | |
221 | ||
b75a7d8f A |
222 | // aliasing, not write-through |
223 | RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that) | |
224 | { | |
374ca955 | 225 | if (this != &that) |
b75a7d8f | 226 | { |
374ca955 A |
227 | if (dataIsOwned) |
228 | { | |
229 | ucol_close(ucollator); | |
374ca955 | 230 | } |
b75a7d8f | 231 | |
73c04bcf A |
232 | urulestring.truncate(0); // empty the rule string |
233 | dataIsOwned = TRUE; | |
374ca955 | 234 | isWriteThroughAlias = FALSE; |
73c04bcf A |
235 | |
236 | UErrorCode intStatus = U_ZERO_ERROR; | |
237 | int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE; | |
238 | ucollator = ucol_safeClone(that.ucollator, NULL, &buffersize, | |
239 | &intStatus); | |
240 | if (U_SUCCESS(intStatus)) { | |
241 | setRuleStringFromCollator(); | |
242 | } | |
374ca955 A |
243 | } |
244 | return *this; | |
b75a7d8f A |
245 | } |
246 | ||
247 | // aliasing, not write-through | |
248 | Collator* RuleBasedCollator::clone() const | |
249 | { | |
73c04bcf | 250 | return new RuleBasedCollator(*this); |
b75a7d8f A |
251 | } |
252 | ||
4388f060 | 253 | |
b75a7d8f A |
254 | CollationElementIterator* RuleBasedCollator::createCollationElementIterator |
255 | (const UnicodeString& source) const | |
256 | { | |
374ca955 A |
257 | UErrorCode status = U_ZERO_ERROR; |
258 | CollationElementIterator *result = new CollationElementIterator(source, this, | |
259 | status); | |
260 | if (U_FAILURE(status)) { | |
261 | delete result; | |
262 | return NULL; | |
263 | } | |
b75a7d8f | 264 | |
374ca955 | 265 | return result; |
b75a7d8f A |
266 | } |
267 | ||
268 | /** | |
269 | * Create a CollationElementIterator object that will iterate over the | |
270 | * elements in a string, using the collation rules defined in this | |
271 | * RuleBasedCollator | |
272 | */ | |
273 | CollationElementIterator* RuleBasedCollator::createCollationElementIterator | |
274 | (const CharacterIterator& source) const | |
275 | { | |
374ca955 A |
276 | UErrorCode status = U_ZERO_ERROR; |
277 | CollationElementIterator *result = new CollationElementIterator(source, this, | |
278 | status); | |
b75a7d8f | 279 | |
374ca955 A |
280 | if (U_FAILURE(status)) { |
281 | delete result; | |
282 | return NULL; | |
283 | } | |
b75a7d8f | 284 | |
374ca955 | 285 | return result; |
b75a7d8f A |
286 | } |
287 | ||
288 | /** | |
289 | * Return a string representation of this collator's rules. The string can | |
290 | * later be passed to the constructor that takes a UnicodeString argument, | |
291 | * which will construct a collator that's functionally identical to this one. | |
292 | * You can also allow users to edit the string in order to change the collation | |
293 | * data, or you can print it out for inspection, or whatever. | |
294 | */ | |
295 | const UnicodeString& RuleBasedCollator::getRules() const | |
296 | { | |
73c04bcf | 297 | return urulestring; |
b75a7d8f A |
298 | } |
299 | ||
300 | void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) | |
301 | { | |
302 | int32_t rulesize = ucol_getRulesEx(ucollator, delta, NULL, -1); | |
303 | ||
304 | if (rulesize > 0) { | |
305 | UChar *rules = (UChar*) uprv_malloc( sizeof(UChar) * (rulesize) ); | |
306 | if(rules != NULL) { | |
374ca955 A |
307 | ucol_getRulesEx(ucollator, delta, rules, rulesize); |
308 | buffer.setTo(rules, rulesize); | |
309 | uprv_free(rules); | |
729e4ab9 | 310 | } else { // couldn't allocate |
374ca955 | 311 | buffer.remove(); |
b75a7d8f A |
312 | } |
313 | } | |
314 | else { | |
315 | buffer.remove(); | |
316 | } | |
317 | } | |
318 | ||
319 | UnicodeSet * | |
320 | RuleBasedCollator::getTailoredSet(UErrorCode &status) const | |
321 | { | |
374ca955 A |
322 | if(U_FAILURE(status)) { |
323 | return NULL; | |
324 | } | |
325 | return (UnicodeSet *)ucol_getTailoredSet(this->ucollator, &status); | |
b75a7d8f A |
326 | } |
327 | ||
328 | ||
329 | void RuleBasedCollator::getVersion(UVersionInfo versionInfo) const | |
330 | { | |
331 | if (versionInfo!=NULL){ | |
332 | ucol_getVersion(ucollator, versionInfo); | |
333 | } | |
334 | } | |
335 | ||
336 | Collator::EComparisonResult RuleBasedCollator::compare( | |
337 | const UnicodeString& source, | |
338 | const UnicodeString& target, | |
339 | int32_t length) const | |
340 | { | |
374ca955 A |
341 | UErrorCode status = U_ZERO_ERROR; |
342 | return getEComparisonResult(compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status)); | |
b75a7d8f A |
343 | } |
344 | ||
345 | UCollationResult RuleBasedCollator::compare( | |
346 | const UnicodeString& source, | |
347 | const UnicodeString& target, | |
374ca955 | 348 | int32_t length, |
b75a7d8f A |
349 | UErrorCode &status) const |
350 | { | |
374ca955 | 351 | return compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status); |
b75a7d8f A |
352 | } |
353 | ||
354 | Collator::EComparisonResult RuleBasedCollator::compare(const UChar* source, | |
355 | int32_t sourceLength, | |
356 | const UChar* target, | |
357 | int32_t targetLength) | |
358 | const | |
359 | { | |
374ca955 A |
360 | return getEComparisonResult(ucol_strcoll(ucollator, source, sourceLength, |
361 | target, targetLength)); | |
b75a7d8f A |
362 | } |
363 | ||
364 | UCollationResult RuleBasedCollator::compare(const UChar* source, | |
365 | int32_t sourceLength, | |
366 | const UChar* target, | |
374ca955 | 367 | int32_t targetLength, |
b75a7d8f A |
368 | UErrorCode &status) const |
369 | { | |
374ca955 A |
370 | if(U_SUCCESS(status)) { |
371 | return ucol_strcoll(ucollator, source, sourceLength, target, targetLength); | |
372 | } else { | |
373 | return UCOL_EQUAL; | |
374 | } | |
b75a7d8f A |
375 | } |
376 | ||
377 | /** | |
378 | * Compare two strings using this collator | |
379 | */ | |
380 | Collator::EComparisonResult RuleBasedCollator::compare( | |
381 | const UnicodeString& source, | |
382 | const UnicodeString& target) const | |
383 | { | |
374ca955 A |
384 | return getEComparisonResult(ucol_strcoll(ucollator, source.getBuffer(), source.length(), |
385 | target.getBuffer(), target.length())); | |
b75a7d8f A |
386 | } |
387 | ||
388 | UCollationResult RuleBasedCollator::compare( | |
389 | const UnicodeString& source, | |
374ca955 | 390 | const UnicodeString& target, |
b75a7d8f A |
391 | UErrorCode &status) const |
392 | { | |
374ca955 A |
393 | if(U_SUCCESS(status)) { |
394 | return ucol_strcoll(ucollator, source.getBuffer(), source.length(), | |
395 | target.getBuffer(), target.length()); | |
396 | } else { | |
397 | return UCOL_EQUAL; | |
398 | } | |
b75a7d8f A |
399 | } |
400 | ||
729e4ab9 A |
401 | UCollationResult RuleBasedCollator::compare(UCharIterator &sIter, |
402 | UCharIterator &tIter, | |
403 | UErrorCode &status) const { | |
404 | if(U_SUCCESS(status)) { | |
405 | return ucol_strcollIter(ucollator, &sIter, &tIter, &status); | |
406 | } else { | |
407 | return UCOL_EQUAL; | |
408 | } | |
409 | } | |
410 | ||
b75a7d8f A |
411 | /** |
412 | * Retrieve a collation key for the specified string. The key can be compared | |
413 | * with other collation keys using a bitwise comparison (e.g. memcmp) to find | |
414 | * the ordering of their respective source strings. This is handy when doing a | |
415 | * sort, where each sort key must be compared many times. | |
416 | * | |
417 | * The basic algorithm here is to find all of the collation elements for each | |
418 | * character in the source string, convert them to an ASCII representation, and | |
419 | * put them into the collation key. But it's trickier than that. Each | |
420 | * collation element in a string has three components: primary ('A' vs 'B'), | |
374ca955 | 421 | * secondary ('u' vs '\u00FC'), and tertiary ('A' vs 'a'), and a primary difference |
b75a7d8f A |
422 | * at the end of a string takes precedence over a secondary or tertiary |
423 | * difference earlier in the string. | |
424 | * | |
425 | * To account for this, we put all of the primary orders at the beginning of | |
426 | * the string, followed by the secondary and tertiary orders. Each set of | |
427 | * orders is terminated by nulls so that a key for a string which is a initial | |
428 | * substring of another key will compare less without any special case. | |
429 | * | |
430 | * Here's a hypothetical example, with the collation element represented as a | |
431 | * three-digit number, one digit for primary, one for secondary, etc. | |
432 | * | |
374ca955 | 433 | * String: A a B \u00C9 |
b75a7d8f A |
434 | * Collation Elements: 101 100 201 511 |
435 | * Collation Key: 1125<null>0001<null>1011<null> | |
436 | * | |
437 | * To make things even trickier, secondary differences (accent marks) are | |
438 | * compared starting at the *end* of the string in languages with French | |
439 | * secondary ordering. But when comparing the accent marks on a single base | |
440 | * character, they are compared from the beginning. To handle this, we reverse | |
441 | * all of the accents that belong to each base character, then we reverse the | |
442 | * entire string of secondary orderings at the end. | |
443 | */ | |
444 | CollationKey& RuleBasedCollator::getCollationKey( | |
445 | const UnicodeString& source, | |
446 | CollationKey& sortkey, | |
447 | UErrorCode& status) const | |
448 | { | |
374ca955 | 449 | return getCollationKey(source.getBuffer(), source.length(), sortkey, status); |
b75a7d8f A |
450 | } |
451 | ||
452 | CollationKey& RuleBasedCollator::getCollationKey(const UChar* source, | |
453 | int32_t sourceLen, | |
454 | CollationKey& sortkey, | |
455 | UErrorCode& status) const | |
456 | { | |
4388f060 A |
457 | if (U_FAILURE(status)) { |
458 | return sortkey.setToBogus(); | |
459 | } | |
460 | if (sourceLen < -1 || (source == NULL && sourceLen != 0)) { | |
461 | status = U_ILLEGAL_ARGUMENT_ERROR; | |
374ca955 A |
462 | return sortkey.setToBogus(); |
463 | } | |
b75a7d8f | 464 | |
4388f060 A |
465 | if (sourceLen < 0) { |
466 | sourceLen = u_strlen(source); | |
467 | } | |
468 | if (sourceLen == 0) { | |
374ca955 A |
469 | return sortkey.reset(); |
470 | } | |
b75a7d8f | 471 | |
374ca955 | 472 | uint8_t *result; |
4388f060 A |
473 | int32_t resultCapacity; |
474 | if (sortkey.fCapacity >= (sourceLen * 3)) { | |
475 | // Try to reuse the CollationKey.fBytes. | |
476 | result = sortkey.fBytes; | |
477 | resultCapacity = sortkey.fCapacity; | |
478 | } else { | |
479 | result = NULL; | |
480 | resultCapacity = 0; | |
481 | } | |
482 | int32_t resultLen = ucol_getSortKeyWithAllocation(ucollator, source, sourceLen, | |
483 | result, resultCapacity, &status); | |
484 | ||
485 | if (U_SUCCESS(status)) { | |
486 | if (result == sortkey.fBytes) { | |
487 | sortkey.setLength(resultLen); | |
488 | } else { | |
489 | sortkey.adopt(result, resultCapacity, resultLen); | |
490 | } | |
491 | } else { | |
492 | if (result != sortkey.fBytes) { | |
493 | uprv_free(result); | |
494 | } | |
495 | sortkey.setToBogus(); | |
496 | } | |
374ca955 | 497 | return sortkey; |
b75a7d8f A |
498 | } |
499 | ||
500 | /** | |
501 | * Return the maximum length of any expansion sequences that end with the | |
502 | * specified comparison order. | |
503 | * @param order a collation order returned by previous or next. | |
504 | * @return the maximum length of any expansion seuences ending with the | |
505 | * specified order or 1 if collation order does not occur at the end of any | |
506 | * expansion sequence. | |
507 | * @see CollationElementIterator#getMaxExpansion | |
508 | */ | |
509 | int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const | |
510 | { | |
374ca955 A |
511 | uint8_t result; |
512 | UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result); | |
513 | return result; | |
b75a7d8f A |
514 | } |
515 | ||
516 | uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length, | |
517 | UErrorCode &status) | |
518 | { | |
374ca955 | 519 | return ucol_cloneRuleData(ucollator, &length, &status); |
b75a7d8f A |
520 | } |
521 | ||
73c04bcf A |
522 | |
523 | int32_t RuleBasedCollator::cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) | |
524 | { | |
525 | return ucol_cloneBinary(ucollator, buffer, capacity, &status); | |
526 | } | |
527 | ||
b75a7d8f A |
528 | void RuleBasedCollator::setAttribute(UColAttribute attr, |
529 | UColAttributeValue value, | |
530 | UErrorCode &status) | |
531 | { | |
374ca955 A |
532 | if (U_FAILURE(status)) |
533 | return; | |
534 | checkOwned(); | |
535 | ucol_setAttribute(ucollator, attr, value, &status); | |
b75a7d8f A |
536 | } |
537 | ||
538 | UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr, | |
539 | UErrorCode &status) | |
540 | { | |
374ca955 A |
541 | if (U_FAILURE(status)) |
542 | return UCOL_DEFAULT; | |
543 | return ucol_getAttribute(ucollator, attr, &status); | |
b75a7d8f A |
544 | } |
545 | ||
546 | uint32_t RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) { | |
374ca955 A |
547 | checkOwned(); |
548 | return ucol_setVariableTop(ucollator, varTop, len, &status); | |
b75a7d8f A |
549 | } |
550 | ||
551 | uint32_t RuleBasedCollator::setVariableTop(const UnicodeString varTop, UErrorCode &status) { | |
374ca955 A |
552 | checkOwned(); |
553 | return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &status); | |
b75a7d8f A |
554 | } |
555 | ||
556 | void RuleBasedCollator::setVariableTop(const uint32_t varTop, UErrorCode &status) { | |
374ca955 A |
557 | checkOwned(); |
558 | ucol_restoreVariableTop(ucollator, varTop, &status); | |
b75a7d8f A |
559 | } |
560 | ||
561 | uint32_t RuleBasedCollator::getVariableTop(UErrorCode &status) const { | |
562 | return ucol_getVariableTop(ucollator, &status); | |
563 | } | |
564 | ||
565 | Collator* RuleBasedCollator::safeClone(void) | |
566 | { | |
374ca955 A |
567 | UErrorCode intStatus = U_ZERO_ERROR; |
568 | int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE; | |
569 | UCollator *ucol = ucol_safeClone(ucollator, NULL, &buffersize, | |
570 | &intStatus); | |
571 | if (U_FAILURE(intStatus)) { | |
572 | return NULL; | |
573 | } | |
b75a7d8f | 574 | |
73c04bcf | 575 | RuleBasedCollator *result = new RuleBasedCollator(); |
46f4442e A |
576 | // Null pointer check |
577 | if (result != NULL) { | |
578 | result->ucollator = ucol; | |
579 | result->dataIsOwned = TRUE; | |
580 | result->isWriteThroughAlias = FALSE; | |
581 | setRuleStringFromCollator(); | |
582 | } | |
b75a7d8f | 583 | |
374ca955 | 584 | return result; |
b75a7d8f A |
585 | } |
586 | ||
587 | ||
588 | int32_t RuleBasedCollator::getSortKey(const UnicodeString& source, | |
589 | uint8_t *result, int32_t resultLength) | |
590 | const | |
591 | { | |
374ca955 | 592 | return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength); |
b75a7d8f A |
593 | } |
594 | ||
595 | int32_t RuleBasedCollator::getSortKey(const UChar *source, | |
596 | int32_t sourceLength, uint8_t *result, | |
597 | int32_t resultLength) const | |
598 | { | |
374ca955 | 599 | return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength); |
b75a7d8f A |
600 | } |
601 | ||
602 | Collator::ECollationStrength RuleBasedCollator::getStrength(void) const | |
603 | { | |
374ca955 A |
604 | UErrorCode intStatus = U_ZERO_ERROR; |
605 | return getECollationStrength(ucol_getAttribute(ucollator, UCOL_STRENGTH, | |
606 | &intStatus)); | |
b75a7d8f A |
607 | } |
608 | ||
609 | void RuleBasedCollator::setStrength(ECollationStrength newStrength) | |
610 | { | |
374ca955 A |
611 | checkOwned(); |
612 | UErrorCode intStatus = U_ZERO_ERROR; | |
613 | UCollationStrength strength = getUCollationStrength(newStrength); | |
614 | ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus); | |
b75a7d8f A |
615 | } |
616 | ||
729e4ab9 A |
617 | int32_t RuleBasedCollator::getReorderCodes(int32_t *dest, |
618 | int32_t destCapacity, | |
619 | UErrorCode& status) const | |
620 | { | |
621 | return ucol_getReorderCodes(ucollator, dest, destCapacity, &status); | |
622 | } | |
623 | ||
624 | void RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, | |
625 | int32_t reorderCodesLength, | |
626 | UErrorCode& status) | |
627 | { | |
4388f060 | 628 | checkOwned(); |
729e4ab9 A |
629 | ucol_setReorderCodes(ucollator, reorderCodes, reorderCodesLength, &status); |
630 | } | |
631 | ||
4388f060 A |
632 | int32_t RuleBasedCollator::getEquivalentReorderCodes(int32_t reorderCode, |
633 | int32_t* dest, | |
634 | int32_t destCapacity, | |
635 | UErrorCode& status) | |
636 | { | |
637 | return ucol_getEquivalentReorderCodes(reorderCode, dest, destCapacity, &status); | |
638 | } | |
729e4ab9 | 639 | |
b75a7d8f A |
640 | /** |
641 | * Create a hash code for this collation. Just hash the main rule table -- that | |
642 | * should be good enough for almost any use. | |
643 | */ | |
644 | int32_t RuleBasedCollator::hashCode() const | |
645 | { | |
374ca955 A |
646 | int32_t length; |
647 | const UChar *rules = ucol_getRules(ucollator, &length); | |
4388f060 | 648 | return ustr_hashUCharsN(rules, length); |
b75a7d8f A |
649 | } |
650 | ||
651 | /** | |
652 | * return the locale of this collator | |
653 | */ | |
654 | const Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const { | |
729e4ab9 | 655 | const char *result = ucol_getLocaleByType(ucollator, type, &status); |
374ca955 A |
656 | if(result == NULL) { |
657 | Locale res(""); | |
658 | res.setToBogus(); | |
659 | return res; | |
660 | } else { | |
661 | return Locale(result); | |
662 | } | |
b75a7d8f A |
663 | } |
664 | ||
665 | void | |
46f4442e | 666 | RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) { |
374ca955 | 667 | checkOwned(); |
46f4442e | 668 | char* rloc = uprv_strdup(requestedLocale.getName()); |
b75a7d8f | 669 | if (rloc) { |
46f4442e | 670 | char* vloc = uprv_strdup(validLocale.getName()); |
b75a7d8f | 671 | if (vloc) { |
46f4442e A |
672 | char* aloc = uprv_strdup(actualLocale.getName()); |
673 | if (aloc) { | |
674 | ucol_setReqValidLocales(ucollator, rloc, vloc, aloc); | |
675 | return; | |
676 | } | |
677 | uprv_free(vloc); | |
b75a7d8f A |
678 | } |
679 | uprv_free(rloc); | |
680 | } | |
681 | } | |
682 | ||
683 | // RuleBaseCollatorNew private constructor ---------------------------------- | |
684 | ||
374ca955 | 685 | RuleBasedCollator::RuleBasedCollator() |
73c04bcf | 686 | : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL) |
b75a7d8f | 687 | { |
b75a7d8f A |
688 | } |
689 | ||
690 | RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale, | |
73c04bcf A |
691 | UErrorCode& status) |
692 | : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL) | |
b75a7d8f | 693 | { |
374ca955 A |
694 | if (U_FAILURE(status)) |
695 | return; | |
b75a7d8f | 696 | |
374ca955 A |
697 | /* |
698 | Try to load, in order: | |
699 | 1. The desired locale's collation. | |
700 | 2. A fallback of the desired locale. | |
701 | 3. The default locale's collation. | |
702 | 4. A fallback of the default locale. | |
703 | 5. The default collation rules, which contains en_US collation rules. | |
704 | ||
705 | To reiterate, we try: | |
706 | Specific: | |
707 | language+country+variant | |
708 | language+country | |
709 | language | |
710 | Default: | |
711 | language+country+variant | |
712 | language+country | |
713 | language | |
714 | Root: (aka DEFAULTRULES) | |
715 | steps 1-5 are handled by resource bundle fallback mechanism. | |
716 | however, in a very unprobable situation that no resource bundle | |
717 | data exists, step 5 is repeated with hardcoded default rules. | |
718 | */ | |
719 | ||
720 | setUCollator(desiredLocale, status); | |
721 | ||
722 | if (U_FAILURE(status)) | |
723 | { | |
724 | status = U_ZERO_ERROR; | |
b75a7d8f | 725 | |
374ca955 A |
726 | setUCollator(kRootLocaleName, status); |
727 | if (status == U_ZERO_ERROR) { | |
728 | status = U_USING_DEFAULT_WARNING; | |
729 | } | |
b75a7d8f | 730 | } |
b75a7d8f | 731 | |
374ca955 A |
732 | if (U_SUCCESS(status)) |
733 | { | |
73c04bcf | 734 | setRuleStringFromCollator(); |
b75a7d8f | 735 | } |
b75a7d8f A |
736 | } |
737 | ||
729e4ab9 | 738 | void |
b75a7d8f A |
739 | RuleBasedCollator::setUCollator(const char *locale, |
740 | UErrorCode &status) | |
741 | { | |
4388f060 | 742 | if (U_FAILURE(status)) { |
374ca955 | 743 | return; |
4388f060 | 744 | } |
374ca955 A |
745 | if (ucollator && dataIsOwned) |
746 | ucol_close(ucollator); | |
747 | ucollator = ucol_open_internal(locale, &status); | |
748 | dataIsOwned = TRUE; | |
749 | isWriteThroughAlias = FALSE; | |
b75a7d8f A |
750 | } |
751 | ||
752 | ||
753 | void | |
754 | RuleBasedCollator::checkOwned() { | |
374ca955 A |
755 | if (!(dataIsOwned || isWriteThroughAlias)) { |
756 | UErrorCode status = U_ZERO_ERROR; | |
757 | ucollator = ucol_safeClone(ucollator, NULL, NULL, &status); | |
73c04bcf | 758 | setRuleStringFromCollator(); |
374ca955 A |
759 | dataIsOwned = TRUE; |
760 | isWriteThroughAlias = FALSE; | |
761 | } | |
b75a7d8f A |
762 | } |
763 | ||
4388f060 A |
764 | |
765 | int32_t RuleBasedCollator::internalGetShortDefinitionString(const char *locale, | |
766 | char *buffer, | |
767 | int32_t capacity, | |
768 | UErrorCode &status) const { | |
769 | /* simply delegate */ | |
770 | return ucol_getShortDefinitionString(ucollator, locale, buffer, capacity, &status); | |
771 | } | |
772 | ||
773 | ||
374ca955 | 774 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator) |
b75a7d8f A |
775 | |
776 | U_NAMESPACE_END | |
777 | ||
778 | #endif /* #if !UCONFIG_NO_COLLATION */ |