]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ****************************************************************************** | |
374ca955 | 3 | * Copyright (C) 1996-2004, International Business Machines Corporation and * |
b75a7d8f A |
4 | * others. All Rights Reserved. * |
5 | ****************************************************************************** | |
6 | */ | |
7 | ||
8 | /** | |
9 | * File tblcoll.cpp | |
10 | * | |
11 | * Created by: Helena Shih | |
12 | * | |
13 | * Modification History: | |
14 | * | |
15 | * Date Name Description | |
16 | * 2/5/97 aliu Added streamIn and streamOut methods. Added | |
17 | * constructor which reads RuleBasedCollator object from | |
18 | * a binary file. Added writeToFile method which streams | |
19 | * RuleBasedCollator out to a binary file. The streamIn | |
20 | * and streamOut methods use istream and ostream objects | |
21 | * in binary mode. | |
22 | * 2/11/97 aliu Moved declarations out of for loop initializer. | |
23 | * Added Mac compatibility #ifdef for ios::nocreate. | |
24 | * 2/12/97 aliu Modified to use TableCollationData sub-object to | |
25 | * hold invariant data. | |
26 | * 2/13/97 aliu Moved several methods into this class from Collation. | |
27 | * Added a private RuleBasedCollator(Locale&) constructor, | |
28 | * to be used by Collator::getInstance(). General | |
29 | * clean up. Made use of UErrorCode variables consistent. | |
30 | * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy | |
31 | * constructor and getDynamicClassID. | |
32 | * 3/5/97 aliu Changed compaction cycle to improve performance. We | |
33 | * use the maximum allowable value which is kBlockCount. | |
34 | * Modified getRules() to load rules dynamically. Changed | |
35 | * constructFromFile() call to accomodate this (added | |
36 | * parameter to specify whether binary loading is to | |
37 | * take place). | |
38 | * 05/06/97 helena Added memory allocation error check. | |
39 | * 6/20/97 helena Java class name change. | |
40 | * 6/23/97 helena Adding comments to make code more readable. | |
41 | * 09/03/97 helena Added createCollationKeyValues(). | |
42 | * 06/26/98 erm Changes for CollationKeys using byte arrays. | |
43 | * 08/10/98 erm Synched with 1.2 version of RuleBasedCollator.java | |
44 | * 04/23/99 stephen Removed EDecompositionMode, merged with | |
45 | * Normalizer::EMode | |
46 | * 06/14/99 stephen Removed kResourceBundleSuffix | |
47 | * 06/22/99 stephen Fixed logic in constructFromFile() since .ctx | |
48 | * files are no longer used. | |
49 | * 11/02/99 helena Collator performance enhancements. Special case | |
50 | * for NO_OP situations. | |
51 | * 11/17/99 srl More performance enhancements. Inlined some internal functions. | |
52 | * 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator | |
53 | * to implementation file. | |
54 | * 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h) | |
55 | */ | |
56 | ||
57 | #include "unicode/utypes.h" | |
58 | ||
59 | #if !UCONFIG_NO_COLLATION | |
60 | ||
61 | #include "unicode/tblcoll.h" | |
62 | #include "unicode/coleitr.h" | |
374ca955 | 63 | #include "unicode/ures.h" |
b75a7d8f A |
64 | #include "unicode/uset.h" |
65 | #include "ucol_imp.h" | |
66 | #include "uresimp.h" | |
67 | #include "uhash.h" | |
68 | #include "cmemory.h" | |
69 | #include "cstring.h" | |
374ca955 | 70 | #include "putilimp.h" |
b75a7d8f A |
71 | |
72 | /* public RuleBasedCollator constructor ---------------------------------- */ | |
73 | ||
74 | U_NAMESPACE_BEGIN | |
75 | ||
76 | /** | |
77 | * Copy constructor, aliasing, not write-through | |
78 | */ | |
374ca955 A |
79 | RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that) |
80 | : Collator(that) | |
81 | , dataIsOwned(FALSE) | |
82 | , isWriteThroughAlias(FALSE) | |
83 | , ucollator(that.ucollator) | |
84 | , urulestring(that.urulestring) | |
b75a7d8f A |
85 | { |
86 | } | |
87 | ||
88 | RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, | |
374ca955 A |
89 | UErrorCode& status) : |
90 | dataIsOwned(FALSE) | |
b75a7d8f | 91 | { |
374ca955 A |
92 | construct(rules, |
93 | UCOL_DEFAULT_STRENGTH, | |
94 | UCOL_DEFAULT, | |
95 | status); | |
b75a7d8f A |
96 | } |
97 | ||
98 | RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, | |
374ca955 A |
99 | ECollationStrength collationStrength, |
100 | UErrorCode& status) : dataIsOwned(FALSE) | |
b75a7d8f | 101 | { |
374ca955 A |
102 | construct(rules, |
103 | getUCollationStrength(collationStrength), | |
104 | UCOL_DEFAULT, | |
105 | status); | |
b75a7d8f A |
106 | } |
107 | ||
108 | RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, | |
109 | UColAttributeValue decompositionMode, | |
110 | UErrorCode& status) : | |
374ca955 | 111 | dataIsOwned(FALSE) |
b75a7d8f | 112 | { |
374ca955 A |
113 | construct(rules, |
114 | UCOL_DEFAULT_STRENGTH, | |
115 | decompositionMode, | |
116 | status); | |
b75a7d8f A |
117 | } |
118 | ||
119 | RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, | |
374ca955 A |
120 | ECollationStrength collationStrength, |
121 | UColAttributeValue decompositionMode, | |
122 | UErrorCode& status) : dataIsOwned(FALSE) | |
b75a7d8f | 123 | { |
374ca955 A |
124 | construct(rules, |
125 | getUCollationStrength(collationStrength), | |
126 | decompositionMode, | |
127 | status); | |
b75a7d8f A |
128 | } |
129 | ||
130 | void | |
131 | RuleBasedCollator::setRuleStringFromCollator(UErrorCode& status) | |
132 | { | |
374ca955 A |
133 | urulestring = NULL; |
134 | if (U_SUCCESS(status)) | |
135 | { | |
136 | int32_t length; | |
137 | const UChar *r = ucol_getRules(ucollator, &length); | |
138 | ||
139 | if (length > 0) { | |
140 | // alias the rules string | |
141 | urulestring = new UnicodeString(TRUE, r, length); | |
142 | } | |
143 | else { | |
144 | urulestring = new UnicodeString(); | |
145 | } | |
146 | /* test for NULL */ | |
147 | if (urulestring == 0) { | |
148 | status = U_MEMORY_ALLOCATION_ERROR; | |
149 | return; | |
150 | } | |
b75a7d8f | 151 | } |
b75a7d8f A |
152 | } |
153 | ||
154 | // not aliasing, not write-through | |
155 | void | |
156 | RuleBasedCollator::construct(const UnicodeString& rules, | |
157 | UColAttributeValue collationStrength, | |
158 | UColAttributeValue decompositionMode, | |
159 | UErrorCode& status) | |
160 | { | |
374ca955 A |
161 | urulestring = 0; |
162 | ucollator = ucol_openRules(rules.getBuffer(), rules.length(), | |
163 | decompositionMode, collationStrength, | |
164 | NULL, &status); | |
b75a7d8f | 165 | |
374ca955 A |
166 | dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it |
167 | isWriteThroughAlias = FALSE; | |
b75a7d8f | 168 | |
374ca955 | 169 | setRuleStringFromCollator(status); |
b75a7d8f A |
170 | } |
171 | ||
172 | /* RuleBasedCollator public destructor ----------------------------------- */ | |
173 | ||
174 | RuleBasedCollator::~RuleBasedCollator() | |
175 | { | |
374ca955 A |
176 | if (dataIsOwned) |
177 | { | |
178 | ucol_close(ucollator); | |
179 | delete urulestring; | |
180 | } | |
181 | ucollator = 0; | |
182 | urulestring = 0; | |
b75a7d8f A |
183 | } |
184 | ||
185 | /* RuleBaseCollator public methods --------------------------------------- */ | |
186 | ||
187 | UBool RuleBasedCollator::operator==(const Collator& that) const | |
188 | { | |
189 | /* only checks for address equals here */ | |
190 | if (Collator::operator==(that)) | |
191 | return TRUE; | |
192 | ||
193 | if (getDynamicClassID() != that.getDynamicClassID()) | |
194 | return FALSE; /* not the same class */ | |
195 | ||
196 | RuleBasedCollator& thatAlias = (RuleBasedCollator&)that; | |
197 | ||
198 | // weiv: use C function, commented code below is wrong | |
199 | return ucol_equals(this->ucollator, thatAlias.ucollator); | |
200 | /* | |
201 | synwee : orginal code does not check for data compatibility | |
202 | */ | |
203 | /* | |
204 | if (ucollator != thatAlias.ucollator) | |
205 | return FALSE; | |
206 | ||
207 | return TRUE; | |
208 | */ | |
209 | } | |
210 | ||
374ca955 A |
211 | UBool RuleBasedCollator::operator!=(const Collator& other) const |
212 | { | |
213 | return !(*this == other); | |
214 | } | |
215 | ||
b75a7d8f A |
216 | // aliasing, not write-through |
217 | RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that) | |
218 | { | |
374ca955 | 219 | if (this != &that) |
b75a7d8f | 220 | { |
374ca955 A |
221 | if (dataIsOwned) |
222 | { | |
223 | ucol_close(ucollator); | |
224 | ucollator = NULL; | |
225 | delete urulestring; | |
226 | } | |
b75a7d8f | 227 | |
374ca955 A |
228 | dataIsOwned = FALSE; |
229 | isWriteThroughAlias = FALSE; | |
230 | ucollator = that.ucollator; | |
231 | urulestring = that.urulestring; | |
232 | } | |
233 | return *this; | |
b75a7d8f A |
234 | } |
235 | ||
236 | // aliasing, not write-through | |
237 | Collator* RuleBasedCollator::clone() const | |
238 | { | |
239 | return new RuleBasedCollator(*this); | |
240 | } | |
241 | ||
242 | CollationElementIterator* RuleBasedCollator::createCollationElementIterator | |
243 | (const UnicodeString& source) const | |
244 | { | |
374ca955 A |
245 | UErrorCode status = U_ZERO_ERROR; |
246 | CollationElementIterator *result = new CollationElementIterator(source, this, | |
247 | status); | |
248 | if (U_FAILURE(status)) { | |
249 | delete result; | |
250 | return NULL; | |
251 | } | |
b75a7d8f | 252 | |
374ca955 | 253 | return result; |
b75a7d8f A |
254 | } |
255 | ||
256 | /** | |
257 | * Create a CollationElementIterator object that will iterate over the | |
258 | * elements in a string, using the collation rules defined in this | |
259 | * RuleBasedCollator | |
260 | */ | |
261 | CollationElementIterator* RuleBasedCollator::createCollationElementIterator | |
262 | (const CharacterIterator& source) const | |
263 | { | |
374ca955 A |
264 | UErrorCode status = U_ZERO_ERROR; |
265 | CollationElementIterator *result = new CollationElementIterator(source, this, | |
266 | status); | |
b75a7d8f | 267 | |
374ca955 A |
268 | if (U_FAILURE(status)) { |
269 | delete result; | |
270 | return NULL; | |
271 | } | |
b75a7d8f | 272 | |
374ca955 | 273 | return result; |
b75a7d8f A |
274 | } |
275 | ||
276 | /** | |
277 | * Return a string representation of this collator's rules. The string can | |
278 | * later be passed to the constructor that takes a UnicodeString argument, | |
279 | * which will construct a collator that's functionally identical to this one. | |
280 | * You can also allow users to edit the string in order to change the collation | |
281 | * data, or you can print it out for inspection, or whatever. | |
282 | */ | |
283 | const UnicodeString& RuleBasedCollator::getRules() const | |
284 | { | |
285 | return (*urulestring); | |
286 | } | |
287 | ||
288 | void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) | |
289 | { | |
290 | int32_t rulesize = ucol_getRulesEx(ucollator, delta, NULL, -1); | |
291 | ||
292 | if (rulesize > 0) { | |
293 | UChar *rules = (UChar*) uprv_malloc( sizeof(UChar) * (rulesize) ); | |
294 | if(rules != NULL) { | |
374ca955 A |
295 | ucol_getRulesEx(ucollator, delta, rules, rulesize); |
296 | buffer.setTo(rules, rulesize); | |
297 | uprv_free(rules); | |
b75a7d8f | 298 | } else { // couldn't allocate |
374ca955 | 299 | buffer.remove(); |
b75a7d8f A |
300 | } |
301 | } | |
302 | else { | |
303 | buffer.remove(); | |
304 | } | |
305 | } | |
306 | ||
307 | UnicodeSet * | |
308 | RuleBasedCollator::getTailoredSet(UErrorCode &status) const | |
309 | { | |
374ca955 A |
310 | if(U_FAILURE(status)) { |
311 | return NULL; | |
312 | } | |
313 | return (UnicodeSet *)ucol_getTailoredSet(this->ucollator, &status); | |
b75a7d8f A |
314 | } |
315 | ||
316 | ||
317 | void RuleBasedCollator::getVersion(UVersionInfo versionInfo) const | |
318 | { | |
319 | if (versionInfo!=NULL){ | |
320 | ucol_getVersion(ucollator, versionInfo); | |
321 | } | |
322 | } | |
323 | ||
324 | Collator::EComparisonResult RuleBasedCollator::compare( | |
325 | const UnicodeString& source, | |
326 | const UnicodeString& target, | |
327 | int32_t length) const | |
328 | { | |
374ca955 A |
329 | UErrorCode status = U_ZERO_ERROR; |
330 | return getEComparisonResult(compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status)); | |
b75a7d8f A |
331 | } |
332 | ||
333 | UCollationResult RuleBasedCollator::compare( | |
334 | const UnicodeString& source, | |
335 | const UnicodeString& target, | |
374ca955 | 336 | int32_t length, |
b75a7d8f A |
337 | UErrorCode &status) const |
338 | { | |
374ca955 | 339 | return compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status); |
b75a7d8f A |
340 | } |
341 | ||
342 | Collator::EComparisonResult RuleBasedCollator::compare(const UChar* source, | |
343 | int32_t sourceLength, | |
344 | const UChar* target, | |
345 | int32_t targetLength) | |
346 | const | |
347 | { | |
374ca955 A |
348 | return getEComparisonResult(ucol_strcoll(ucollator, source, sourceLength, |
349 | target, targetLength)); | |
b75a7d8f A |
350 | } |
351 | ||
352 | UCollationResult RuleBasedCollator::compare(const UChar* source, | |
353 | int32_t sourceLength, | |
354 | const UChar* target, | |
374ca955 | 355 | int32_t targetLength, |
b75a7d8f A |
356 | UErrorCode &status) const |
357 | { | |
374ca955 A |
358 | if(U_SUCCESS(status)) { |
359 | return ucol_strcoll(ucollator, source, sourceLength, target, targetLength); | |
360 | } else { | |
361 | return UCOL_EQUAL; | |
362 | } | |
b75a7d8f A |
363 | } |
364 | ||
365 | /** | |
366 | * Compare two strings using this collator | |
367 | */ | |
368 | Collator::EComparisonResult RuleBasedCollator::compare( | |
369 | const UnicodeString& source, | |
370 | const UnicodeString& target) const | |
371 | { | |
374ca955 A |
372 | return getEComparisonResult(ucol_strcoll(ucollator, source.getBuffer(), source.length(), |
373 | target.getBuffer(), target.length())); | |
b75a7d8f A |
374 | } |
375 | ||
376 | UCollationResult RuleBasedCollator::compare( | |
377 | const UnicodeString& source, | |
374ca955 | 378 | const UnicodeString& target, |
b75a7d8f A |
379 | UErrorCode &status) const |
380 | { | |
374ca955 A |
381 | if(U_SUCCESS(status)) { |
382 | return ucol_strcoll(ucollator, source.getBuffer(), source.length(), | |
383 | target.getBuffer(), target.length()); | |
384 | } else { | |
385 | return UCOL_EQUAL; | |
386 | } | |
b75a7d8f A |
387 | } |
388 | ||
389 | /** | |
390 | * Retrieve a collation key for the specified string. The key can be compared | |
391 | * with other collation keys using a bitwise comparison (e.g. memcmp) to find | |
392 | * the ordering of their respective source strings. This is handy when doing a | |
393 | * sort, where each sort key must be compared many times. | |
394 | * | |
395 | * The basic algorithm here is to find all of the collation elements for each | |
396 | * character in the source string, convert them to an ASCII representation, and | |
397 | * put them into the collation key. But it's trickier than that. Each | |
398 | * collation element in a string has three components: primary ('A' vs 'B'), | |
374ca955 | 399 | * secondary ('u' vs '\u00FC'), and tertiary ('A' vs 'a'), and a primary difference |
b75a7d8f A |
400 | * at the end of a string takes precedence over a secondary or tertiary |
401 | * difference earlier in the string. | |
402 | * | |
403 | * To account for this, we put all of the primary orders at the beginning of | |
404 | * the string, followed by the secondary and tertiary orders. Each set of | |
405 | * orders is terminated by nulls so that a key for a string which is a initial | |
406 | * substring of another key will compare less without any special case. | |
407 | * | |
408 | * Here's a hypothetical example, with the collation element represented as a | |
409 | * three-digit number, one digit for primary, one for secondary, etc. | |
410 | * | |
374ca955 | 411 | * String: A a B \u00C9 |
b75a7d8f A |
412 | * Collation Elements: 101 100 201 511 |
413 | * Collation Key: 1125<null>0001<null>1011<null> | |
414 | * | |
415 | * To make things even trickier, secondary differences (accent marks) are | |
416 | * compared starting at the *end* of the string in languages with French | |
417 | * secondary ordering. But when comparing the accent marks on a single base | |
418 | * character, they are compared from the beginning. To handle this, we reverse | |
419 | * all of the accents that belong to each base character, then we reverse the | |
420 | * entire string of secondary orderings at the end. | |
421 | */ | |
422 | CollationKey& RuleBasedCollator::getCollationKey( | |
423 | const UnicodeString& source, | |
424 | CollationKey& sortkey, | |
425 | UErrorCode& status) const | |
426 | { | |
374ca955 | 427 | return getCollationKey(source.getBuffer(), source.length(), sortkey, status); |
b75a7d8f A |
428 | } |
429 | ||
430 | CollationKey& RuleBasedCollator::getCollationKey(const UChar* source, | |
431 | int32_t sourceLen, | |
432 | CollationKey& sortkey, | |
433 | UErrorCode& status) const | |
434 | { | |
374ca955 A |
435 | if (U_FAILURE(status)) |
436 | { | |
437 | return sortkey.setToBogus(); | |
438 | } | |
b75a7d8f | 439 | |
374ca955 A |
440 | if ((!source) || (sourceLen == 0)) { |
441 | return sortkey.reset(); | |
442 | } | |
b75a7d8f | 443 | |
374ca955 A |
444 | uint8_t *result; |
445 | int32_t resultLen = ucol_getSortKeyWithAllocation(ucollator, | |
446 | source, sourceLen, | |
447 | &result, | |
448 | &status); | |
449 | sortkey.adopt(result, resultLen); | |
450 | return sortkey; | |
b75a7d8f A |
451 | } |
452 | ||
453 | /** | |
454 | * Return the maximum length of any expansion sequences that end with the | |
455 | * specified comparison order. | |
456 | * @param order a collation order returned by previous or next. | |
457 | * @return the maximum length of any expansion seuences ending with the | |
458 | * specified order or 1 if collation order does not occur at the end of any | |
459 | * expansion sequence. | |
460 | * @see CollationElementIterator#getMaxExpansion | |
461 | */ | |
462 | int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const | |
463 | { | |
374ca955 A |
464 | uint8_t result; |
465 | UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result); | |
466 | return result; | |
b75a7d8f A |
467 | } |
468 | ||
469 | uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length, | |
470 | UErrorCode &status) | |
471 | { | |
374ca955 | 472 | return ucol_cloneRuleData(ucollator, &length, &status); |
b75a7d8f A |
473 | } |
474 | ||
475 | void RuleBasedCollator::setAttribute(UColAttribute attr, | |
476 | UColAttributeValue value, | |
477 | UErrorCode &status) | |
478 | { | |
374ca955 A |
479 | if (U_FAILURE(status)) |
480 | return; | |
481 | checkOwned(); | |
482 | ucol_setAttribute(ucollator, attr, value, &status); | |
b75a7d8f A |
483 | } |
484 | ||
485 | UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr, | |
486 | UErrorCode &status) | |
487 | { | |
374ca955 A |
488 | if (U_FAILURE(status)) |
489 | return UCOL_DEFAULT; | |
490 | return ucol_getAttribute(ucollator, attr, &status); | |
b75a7d8f A |
491 | } |
492 | ||
493 | uint32_t RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) { | |
374ca955 A |
494 | checkOwned(); |
495 | return ucol_setVariableTop(ucollator, varTop, len, &status); | |
b75a7d8f A |
496 | } |
497 | ||
498 | uint32_t RuleBasedCollator::setVariableTop(const UnicodeString varTop, UErrorCode &status) { | |
374ca955 A |
499 | checkOwned(); |
500 | return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &status); | |
b75a7d8f A |
501 | } |
502 | ||
503 | void RuleBasedCollator::setVariableTop(const uint32_t varTop, UErrorCode &status) { | |
374ca955 A |
504 | checkOwned(); |
505 | ucol_restoreVariableTop(ucollator, varTop, &status); | |
b75a7d8f A |
506 | } |
507 | ||
508 | uint32_t RuleBasedCollator::getVariableTop(UErrorCode &status) const { | |
509 | return ucol_getVariableTop(ucollator, &status); | |
510 | } | |
511 | ||
512 | Collator* RuleBasedCollator::safeClone(void) | |
513 | { | |
374ca955 A |
514 | UErrorCode intStatus = U_ZERO_ERROR; |
515 | int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE; | |
516 | UCollator *ucol = ucol_safeClone(ucollator, NULL, &buffersize, | |
517 | &intStatus); | |
518 | if (U_FAILURE(intStatus)) { | |
519 | return NULL; | |
520 | } | |
b75a7d8f | 521 | |
374ca955 A |
522 | UnicodeString *r = new UnicodeString(*urulestring); |
523 | RuleBasedCollator *result = new RuleBasedCollator(ucol, r); | |
524 | result->dataIsOwned = TRUE; | |
525 | result->isWriteThroughAlias = FALSE; | |
b75a7d8f | 526 | |
374ca955 | 527 | return result; |
b75a7d8f A |
528 | } |
529 | ||
530 | ||
531 | int32_t RuleBasedCollator::getSortKey(const UnicodeString& source, | |
532 | uint8_t *result, int32_t resultLength) | |
533 | const | |
534 | { | |
374ca955 | 535 | return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength); |
b75a7d8f A |
536 | } |
537 | ||
538 | int32_t RuleBasedCollator::getSortKey(const UChar *source, | |
539 | int32_t sourceLength, uint8_t *result, | |
540 | int32_t resultLength) const | |
541 | { | |
374ca955 | 542 | return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength); |
b75a7d8f A |
543 | } |
544 | ||
545 | Collator::ECollationStrength RuleBasedCollator::getStrength(void) const | |
546 | { | |
374ca955 A |
547 | UErrorCode intStatus = U_ZERO_ERROR; |
548 | return getECollationStrength(ucol_getAttribute(ucollator, UCOL_STRENGTH, | |
549 | &intStatus)); | |
b75a7d8f A |
550 | } |
551 | ||
552 | void RuleBasedCollator::setStrength(ECollationStrength newStrength) | |
553 | { | |
374ca955 A |
554 | checkOwned(); |
555 | UErrorCode intStatus = U_ZERO_ERROR; | |
556 | UCollationStrength strength = getUCollationStrength(newStrength); | |
557 | ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus); | |
b75a7d8f A |
558 | } |
559 | ||
560 | /** | |
561 | * Create a hash code for this collation. Just hash the main rule table -- that | |
562 | * should be good enough for almost any use. | |
563 | */ | |
564 | int32_t RuleBasedCollator::hashCode() const | |
565 | { | |
374ca955 A |
566 | int32_t length; |
567 | const UChar *rules = ucol_getRules(ucollator, &length); | |
568 | return uhash_hashUCharsN(rules, length); | |
b75a7d8f A |
569 | } |
570 | ||
571 | /** | |
572 | * return the locale of this collator | |
573 | */ | |
574 | const Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const { | |
374ca955 A |
575 | const char *result = ucol_getLocale(ucollator, type, &status); |
576 | if(result == NULL) { | |
577 | Locale res(""); | |
578 | res.setToBogus(); | |
579 | return res; | |
580 | } else { | |
581 | return Locale(result); | |
582 | } | |
b75a7d8f A |
583 | } |
584 | ||
585 | void | |
586 | RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale) { | |
374ca955 | 587 | checkOwned(); |
b75a7d8f A |
588 | size_t rlen = uprv_strlen(requestedLocale.getName()); |
589 | char* rloc = (char *)uprv_malloc((rlen+1)*sizeof(char)); | |
590 | if (rloc) { | |
591 | uprv_strcpy(rloc, requestedLocale.getName()); | |
592 | size_t vlen = uprv_strlen(validLocale.getName()); | |
593 | char* vloc = (char*)uprv_malloc((vlen+1)*sizeof(char)); | |
594 | if (vloc) { | |
595 | uprv_strcpy(vloc, validLocale.getName()); | |
596 | ucol_setReqValidLocales(ucollator, rloc, vloc); | |
597 | return; | |
598 | } | |
599 | uprv_free(rloc); | |
600 | } | |
601 | } | |
602 | ||
603 | // RuleBaseCollatorNew private constructor ---------------------------------- | |
604 | ||
374ca955 | 605 | RuleBasedCollator::RuleBasedCollator() |
b75a7d8f A |
606 | : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(0), urulestring(0) |
607 | { | |
608 | } | |
609 | ||
610 | RuleBasedCollator::RuleBasedCollator(UCollator *collator, | |
611 | UnicodeString *rule) | |
374ca955 | 612 | : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), urulestring(0) |
b75a7d8f | 613 | { |
374ca955 A |
614 | ucollator = collator; |
615 | urulestring = rule; | |
b75a7d8f A |
616 | } |
617 | ||
618 | RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale, | |
619 | UErrorCode& status) : | |
620 | dataIsOwned(FALSE), ucollator(0), urulestring(0) | |
621 | { | |
374ca955 A |
622 | if (U_FAILURE(status)) |
623 | return; | |
b75a7d8f | 624 | |
374ca955 A |
625 | /* |
626 | Try to load, in order: | |
627 | 1. The desired locale's collation. | |
628 | 2. A fallback of the desired locale. | |
629 | 3. The default locale's collation. | |
630 | 4. A fallback of the default locale. | |
631 | 5. The default collation rules, which contains en_US collation rules. | |
632 | ||
633 | To reiterate, we try: | |
634 | Specific: | |
635 | language+country+variant | |
636 | language+country | |
637 | language | |
638 | Default: | |
639 | language+country+variant | |
640 | language+country | |
641 | language | |
642 | Root: (aka DEFAULTRULES) | |
643 | steps 1-5 are handled by resource bundle fallback mechanism. | |
644 | however, in a very unprobable situation that no resource bundle | |
645 | data exists, step 5 is repeated with hardcoded default rules. | |
646 | */ | |
647 | ||
648 | setUCollator(desiredLocale, status); | |
649 | ||
650 | if (U_FAILURE(status)) | |
651 | { | |
652 | status = U_ZERO_ERROR; | |
b75a7d8f | 653 | |
374ca955 A |
654 | setUCollator(kRootLocaleName, status); |
655 | if (status == U_ZERO_ERROR) { | |
656 | status = U_USING_DEFAULT_WARNING; | |
657 | } | |
b75a7d8f | 658 | } |
b75a7d8f | 659 | |
374ca955 A |
660 | if (U_SUCCESS(status)) |
661 | { | |
662 | int32_t length; | |
663 | const UChar *r = ucol_getRules(ucollator, &length); | |
664 | if (length > 0) { | |
665 | // alias the rules string | |
666 | urulestring = new UnicodeString(TRUE, r, length); | |
667 | } | |
668 | else { | |
669 | urulestring = new UnicodeString(); | |
670 | } | |
671 | /* test for NULL */ | |
672 | if (urulestring == 0) { | |
673 | status = U_MEMORY_ALLOCATION_ERROR; | |
674 | return; | |
675 | } | |
676 | dataIsOwned = TRUE; | |
677 | isWriteThroughAlias = FALSE; | |
b75a7d8f | 678 | } |
b75a7d8f A |
679 | } |
680 | ||
681 | void | |
682 | RuleBasedCollator::setUCollator(const char *locale, | |
683 | UErrorCode &status) | |
684 | { | |
374ca955 A |
685 | if (U_FAILURE(status)) |
686 | return; | |
687 | if (ucollator && dataIsOwned) | |
688 | ucol_close(ucollator); | |
689 | ucollator = ucol_open_internal(locale, &status); | |
690 | dataIsOwned = TRUE; | |
691 | isWriteThroughAlias = FALSE; | |
b75a7d8f A |
692 | } |
693 | ||
694 | ||
695 | void | |
696 | RuleBasedCollator::checkOwned() { | |
374ca955 A |
697 | if (!(dataIsOwned || isWriteThroughAlias)) { |
698 | UErrorCode status = U_ZERO_ERROR; | |
699 | ucollator = ucol_safeClone(ucollator, NULL, NULL, &status); | |
700 | setRuleStringFromCollator(status); | |
701 | dataIsOwned = TRUE; | |
702 | isWriteThroughAlias = FALSE; | |
703 | } | |
b75a7d8f A |
704 | } |
705 | ||
706 | /* RuleBasedCollator private data members -------------------------------- */ | |
707 | ||
708 | /* | |
709 | * TODO: | |
710 | * These should probably be enums (<=0xffff) or #defines (>0xffff) | |
711 | * for better performance. | |
712 | * Include ucol_imp.h and use its constants if possible. | |
713 | * Only used in coleitr.h?! | |
714 | * Remove from here! | |
715 | */ | |
716 | ||
717 | /* need look up in .commit() */ | |
718 | const int32_t RuleBasedCollator::CHARINDEX = 0x70000000; | |
719 | /* Expand index follows */ | |
720 | const int32_t RuleBasedCollator::EXPANDCHARINDEX = 0x7E000000; | |
721 | /* contract indexes follows */ | |
722 | const int32_t RuleBasedCollator::CONTRACTCHARINDEX = 0x7F000000; | |
723 | /* unmapped character values */ | |
724 | const int32_t RuleBasedCollator::UNMAPPED = 0xFFFFFFFF; | |
725 | /* primary strength increment */ | |
726 | const int32_t RuleBasedCollator::PRIMARYORDERINCREMENT = 0x00010000; | |
727 | /* secondary strength increment */ | |
728 | const int32_t RuleBasedCollator::SECONDARYORDERINCREMENT = 0x00000100; | |
729 | /* tertiary strength increment */ | |
730 | const int32_t RuleBasedCollator::TERTIARYORDERINCREMENT = 0x00000001; | |
731 | /* mask off anything but primary order */ | |
732 | const int32_t RuleBasedCollator::PRIMARYORDERMASK = 0xffff0000; | |
733 | /* mask off anything but secondary order */ | |
734 | const int32_t RuleBasedCollator::SECONDARYORDERMASK = 0x0000ff00; | |
735 | /* mask off anything but tertiary order */ | |
736 | const int32_t RuleBasedCollator::TERTIARYORDERMASK = 0x000000ff; | |
737 | /* mask off ignorable char order */ | |
738 | const int32_t RuleBasedCollator::IGNORABLEMASK = 0x0000ffff; | |
739 | /* use only the primary difference */ | |
740 | const int32_t RuleBasedCollator::PRIMARYDIFFERENCEONLY = 0xffff0000; | |
741 | /* use only the primary and secondary difference */ | |
742 | const int32_t RuleBasedCollator::SECONDARYDIFFERENCEONLY = 0xffffff00; | |
743 | /* primary order shift */ | |
744 | const int32_t RuleBasedCollator::PRIMARYORDERSHIFT = 16; | |
745 | /* secondary order shift */ | |
746 | const int32_t RuleBasedCollator::SECONDARYORDERSHIFT = 8; | |
747 | /* starting value for collation elements */ | |
748 | const int32_t RuleBasedCollator::COLELEMENTSTART = 0x02020202; | |
749 | /* testing mask for primary low element */ | |
750 | const int32_t RuleBasedCollator::PRIMARYLOWZEROMASK = 0x00FF0000; | |
751 | /* reseting value for secondaries and tertiaries */ | |
752 | const int32_t RuleBasedCollator::RESETSECONDARYTERTIARY = 0x00000202; | |
753 | /* reseting value for tertiaries */ | |
754 | const int32_t RuleBasedCollator::RESETTERTIARY = 0x00000002; | |
755 | ||
756 | const int32_t RuleBasedCollator::PRIMIGNORABLE = 0x0202; | |
757 | ||
374ca955 | 758 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator) |
b75a7d8f A |
759 | |
760 | U_NAMESPACE_END | |
761 | ||
762 | #endif /* #if !UCONFIG_NO_COLLATION */ |