]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ****************************************************************************** | |
46f4442e | 3 | * Copyright (C) 1996-2008, International Business Machines Corporation and |
b75a7d8f A |
4 | * others. All Rights Reserved. |
5 | ****************************************************************************** | |
6 | */ | |
7 | ||
46f4442e A |
8 | /** |
9 | * \file | |
10 | * \brief C++ API: RuleBasedCollator class provides the simple implementation of Collator. | |
11 | */ | |
12 | ||
b75a7d8f A |
13 | /** |
14 | * File tblcoll.h | |
15 | * | |
16 | * Created by: Helena Shih | |
17 | * | |
18 | * Modification History: | |
19 | * | |
20 | * Date Name Description | |
21 | * 2/5/97 aliu Added streamIn and streamOut methods. Added | |
22 | * constructor which reads RuleBasedCollator object from | |
23 | * a binary file. Added writeToFile method which streams | |
24 | * RuleBasedCollator out to a binary file. The streamIn | |
25 | * and streamOut methods use istream and ostream objects | |
26 | * in binary mode. | |
27 | * 2/12/97 aliu Modified to use TableCollationData sub-object to | |
28 | * hold invariant data. | |
29 | * 2/13/97 aliu Moved several methods into this class from Collation. | |
30 | * Added a private RuleBasedCollator(Locale&) constructor, | |
31 | * to be used by Collator::createDefault(). General | |
32 | * clean up. | |
33 | * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy | |
34 | * constructor and getDynamicClassID. | |
35 | * 3/5/97 aliu Modified constructFromFile() to add parameter | |
36 | * specifying whether or not binary loading is to be | |
37 | * attempted. This is required for dynamic rule loading. | |
38 | * 05/07/97 helena Added memory allocation error detection. | |
39 | * 6/17/97 helena Added IDENTICAL strength for compare, changed getRules to | |
40 | * use MergeCollation::getPattern. | |
41 | * 6/20/97 helena Java class name change. | |
42 | * 8/18/97 helena Added internal API documentation. | |
43 | * 09/03/97 helena Added createCollationKeyValues(). | |
44 | * 02/10/98 damiba Added compare with "length" parameter | |
45 | * 08/05/98 erm Synched with 1.2 version of RuleBasedCollator.java | |
46 | * 04/23/99 stephen Removed EDecompositionMode, merged with | |
47 | * Normalizer::EMode | |
48 | * 06/14/99 stephen Removed kResourceBundleSuffix | |
49 | * 11/02/99 helena Collator performance enhancements. Eliminates the | |
50 | * UnicodeString construction and special case for NO_OP. | |
51 | * 11/23/99 srl More performance enhancements. Updates to NormalizerIterator | |
52 | * internal state management. | |
53 | * 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator | |
54 | * to implementation file. | |
55 | * 01/29/01 synwee Modified into a C++ wrapper which calls C API | |
56 | * (ucol.h) | |
57 | */ | |
58 | ||
59 | #ifndef TBLCOLL_H | |
60 | #define TBLCOLL_H | |
61 | ||
62 | #include "unicode/utypes.h" | |
63 | ||
73c04bcf | 64 | |
b75a7d8f A |
65 | #if !UCONFIG_NO_COLLATION |
66 | ||
67 | #include "unicode/coll.h" | |
68 | #include "unicode/ucol.h" | |
69 | #include "unicode/sortkey.h" | |
70 | #include "unicode/normlzr.h" | |
71 | ||
72 | U_NAMESPACE_BEGIN | |
73 | ||
74 | /** | |
75 | * @stable ICU 2.0 | |
76 | */ | |
77 | class StringSearch; | |
78 | /** | |
79 | * @stable ICU 2.0 | |
80 | */ | |
81 | class CollationElementIterator; | |
82 | ||
83 | /** | |
84 | * The RuleBasedCollator class provides the simple implementation of | |
85 | * Collator, using data-driven tables. The user can create a customized | |
86 | * table-based collation. | |
87 | * <P> | |
88 | * <em>Important: </em>The ICU collation service has been reimplemented | |
89 | * in order to achieve better performance and UCA compliance. | |
90 | * For details, see the | |
46f4442e | 91 | * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm"> |
b75a7d8f A |
92 | * collation design document</a>. |
93 | * <p> | |
94 | * RuleBasedCollator is a thin C++ wrapper over the C implementation. | |
95 | * <p> | |
96 | * For more information about the collation service see | |
46f4442e | 97 | * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>. |
b75a7d8f A |
98 | * <p> |
99 | * Collation service provides correct sorting orders for most locales supported in ICU. | |
100 | * If specific data for a locale is not available, the orders eventually falls back | |
101 | * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>. | |
102 | * <p> | |
103 | * Sort ordering may be customized by providing your own set of rules. For more on | |
46f4442e | 104 | * this subject see the <a href="http://icu-project.org/userguide/Collate_Customization.html"> |
b75a7d8f A |
105 | * Collation customization</a> section of the users guide. |
106 | * <p> | |
107 | * Note, RuleBasedCollator is not to be subclassed. | |
108 | * @see Collator | |
109 | * @version 2.0 11/15/2001 | |
110 | */ | |
111 | class U_I18N_API RuleBasedCollator : public Collator | |
112 | { | |
113 | public: | |
114 | ||
115 | // constructor ------------------------------------------------------------- | |
116 | ||
374ca955 A |
117 | /** |
118 | * RuleBasedCollator constructor. This takes the table rules and builds a | |
119 | * collation table out of them. Please see RuleBasedCollator class | |
120 | * description for more details on the collation rule syntax. | |
121 | * @param rules the collation rules to build the collation table from. | |
122 | * @param status reporting a success or an error. | |
123 | * @see Locale | |
124 | * @stable ICU 2.0 | |
125 | */ | |
b75a7d8f A |
126 | RuleBasedCollator(const UnicodeString& rules, UErrorCode& status); |
127 | ||
374ca955 A |
128 | /** |
129 | * RuleBasedCollator constructor. This takes the table rules and builds a | |
130 | * collation table out of them. Please see RuleBasedCollator class | |
131 | * description for more details on the collation rule syntax. | |
132 | * @param rules the collation rules to build the collation table from. | |
133 | * @param collationStrength default strength for comparison | |
134 | * @param status reporting a success or an error. | |
135 | * @see Locale | |
136 | * @stable ICU 2.0 | |
137 | */ | |
138 | RuleBasedCollator(const UnicodeString& rules, | |
b75a7d8f A |
139 | ECollationStrength collationStrength, |
140 | UErrorCode& status); | |
141 | ||
374ca955 A |
142 | /** |
143 | * RuleBasedCollator constructor. This takes the table rules and builds a | |
144 | * collation table out of them. Please see RuleBasedCollator class | |
145 | * description for more details on the collation rule syntax. | |
146 | * @param rules the collation rules to build the collation table from. | |
147 | * @param decompositionMode the normalisation mode | |
148 | * @param status reporting a success or an error. | |
149 | * @see Locale | |
150 | * @stable ICU 2.0 | |
151 | */ | |
152 | RuleBasedCollator(const UnicodeString& rules, | |
b75a7d8f A |
153 | UColAttributeValue decompositionMode, |
154 | UErrorCode& status); | |
155 | ||
374ca955 A |
156 | /** |
157 | * RuleBasedCollator constructor. This takes the table rules and builds a | |
158 | * collation table out of them. Please see RuleBasedCollator class | |
159 | * description for more details on the collation rule syntax. | |
160 | * @param rules the collation rules to build the collation table from. | |
161 | * @param collationStrength default strength for comparison | |
162 | * @param decompositionMode the normalisation mode | |
163 | * @param status reporting a success or an error. | |
164 | * @see Locale | |
165 | * @stable ICU 2.0 | |
166 | */ | |
167 | RuleBasedCollator(const UnicodeString& rules, | |
b75a7d8f A |
168 | ECollationStrength collationStrength, |
169 | UColAttributeValue decompositionMode, | |
170 | UErrorCode& status); | |
171 | ||
374ca955 A |
172 | /** |
173 | * Copy constructor. | |
174 | * @param other the RuleBasedCollator object to be copied | |
175 | * @see Locale | |
176 | * @stable ICU 2.0 | |
177 | */ | |
b75a7d8f A |
178 | RuleBasedCollator(const RuleBasedCollator& other); |
179 | ||
73c04bcf A |
180 | |
181 | /** Opens a collator from a collator binary image created using | |
182 | * cloneBinary. Binary image used in instantiation of the | |
183 | * collator remains owned by the user and should stay around for | |
184 | * the lifetime of the collator. The API also takes a base collator | |
185 | * which usualy should be UCA. | |
186 | * @param bin binary image owned by the user and required through the | |
187 | * lifetime of the collator | |
188 | * @param length size of the image. If negative, the API will try to | |
189 | * figure out the length of the image | |
190 | * @param base fallback collator, usually UCA. Base is required to be | |
191 | * present through the lifetime of the collator. Currently | |
192 | * it cannot be NULL. | |
193 | * @param status for catching errors | |
194 | * @return newly created collator | |
195 | * @see cloneBinary | |
46f4442e | 196 | * @stable ICU 3.4 |
73c04bcf A |
197 | */ |
198 | RuleBasedCollator(const uint8_t *bin, int32_t length, | |
199 | const RuleBasedCollator *base, | |
200 | UErrorCode &status); | |
374ca955 | 201 | // destructor -------------------------------------------------------------- |
b75a7d8f | 202 | |
374ca955 A |
203 | /** |
204 | * Destructor. | |
205 | * @stable ICU 2.0 | |
206 | */ | |
b75a7d8f A |
207 | virtual ~RuleBasedCollator(); |
208 | ||
374ca955 | 209 | // public methods ---------------------------------------------------------- |
b75a7d8f | 210 | |
374ca955 A |
211 | /** |
212 | * Assignment operator. | |
213 | * @param other other RuleBasedCollator object to compare with. | |
214 | * @stable ICU 2.0 | |
215 | */ | |
b75a7d8f A |
216 | RuleBasedCollator& operator=(const RuleBasedCollator& other); |
217 | ||
374ca955 A |
218 | /** |
219 | * Returns true if argument is the same as this object. | |
220 | * @param other Collator object to be compared. | |
221 | * @return true if arguments is the same as this object. | |
222 | * @stable ICU 2.0 | |
223 | */ | |
224 | virtual UBool operator==(const Collator& other) const; | |
225 | ||
226 | /** | |
227 | * Returns true if argument is not the same as this object. | |
228 | * @param other Collator object to be compared | |
229 | * @return returns true if argument is not the same as this object. | |
230 | * @stable ICU 2.0 | |
231 | */ | |
232 | virtual UBool operator!=(const Collator& other) const; | |
233 | ||
234 | /** | |
235 | * Makes a deep copy of the object. | |
236 | * The caller owns the returned object. | |
237 | * @return the cloned object. | |
238 | * @stable ICU 2.0 | |
239 | */ | |
240 | virtual Collator* clone(void) const; | |
241 | ||
242 | /** | |
243 | * Creates a collation element iterator for the source string. The caller of | |
244 | * this method is responsible for the memory management of the return | |
245 | * pointer. | |
246 | * @param source the string over which the CollationElementIterator will | |
247 | * iterate. | |
248 | * @return the collation element iterator of the source string using this as | |
249 | * the based Collator. | |
250 | * @stable ICU 2.2 | |
251 | */ | |
b75a7d8f A |
252 | virtual CollationElementIterator* createCollationElementIterator( |
253 | const UnicodeString& source) const; | |
254 | ||
374ca955 A |
255 | /** |
256 | * Creates a collation element iterator for the source. The caller of this | |
257 | * method is responsible for the memory management of the returned pointer. | |
258 | * @param source the CharacterIterator which produces the characters over | |
259 | * which the CollationElementItgerator will iterate. | |
260 | * @return the collation element iterator of the source using this as the | |
261 | * based Collator. | |
262 | * @stable ICU 2.2 | |
263 | */ | |
264 | virtual CollationElementIterator* createCollationElementIterator( | |
265 | const CharacterIterator& source) const; | |
266 | ||
267 | /** | |
268 | * Compares a range of character data stored in two different strings based | |
269 | * on the collation rules. Returns information about whether a string is | |
270 | * less than, greater than or equal to another string in a language. | |
271 | * This can be overriden in a subclass. | |
272 | * @param source the source string. | |
273 | * @param target the target string to be compared with the source string. | |
274 | * @return the comparison result. GREATER if the source string is greater | |
275 | * than the target string, LESS if the source is less than the | |
276 | * target. Otherwise, returns EQUAL. | |
277 | * @deprecated ICU 2.6 Use overload with UErrorCode& | |
278 | */ | |
279 | virtual EComparisonResult compare(const UnicodeString& source, | |
280 | const UnicodeString& target) const; | |
281 | ||
282 | ||
283 | /** | |
284 | * The comparison function compares the character data stored in two | |
285 | * different strings. Returns information about whether a string is less | |
286 | * than, greater than or equal to another string. | |
287 | * @param source the source string to be compared with. | |
288 | * @param target the string that is to be compared with the source string. | |
289 | * @param status possible error code | |
290 | * @return Returns an enum value. UCOL_GREATER if source is greater | |
291 | * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less | |
292 | * than target | |
293 | * @stable ICU 2.6 | |
294 | **/ | |
295 | virtual UCollationResult compare(const UnicodeString& source, | |
296 | const UnicodeString& target, | |
297 | UErrorCode &status) const; | |
298 | ||
299 | /** | |
300 | * Compares a range of character data stored in two different strings based | |
301 | * on the collation rules up to the specified length. Returns information | |
302 | * about whether a string is less than, greater than or equal to another | |
303 | * string in a language. This can be overriden in a subclass. | |
304 | * @param source the source string. | |
305 | * @param target the target string to be compared with the source string. | |
306 | * @param length compares up to the specified length | |
307 | * @return the comparison result. GREATER if the source string is greater | |
308 | * than the target string, LESS if the source is less than the | |
309 | * target. Otherwise, returns EQUAL. | |
310 | * @deprecated ICU 2.6 Use overload with UErrorCode& | |
311 | */ | |
312 | virtual EComparisonResult compare(const UnicodeString& source, | |
313 | const UnicodeString& target, | |
314 | int32_t length) const; | |
315 | ||
316 | /** | |
317 | * Does the same thing as compare but limits the comparison to a specified | |
318 | * length | |
319 | * @param source the source string to be compared with. | |
320 | * @param target the string that is to be compared with the source string. | |
321 | * @param length the length the comparison is limited to | |
322 | * @param status possible error code | |
323 | * @return Returns an enum value. UCOL_GREATER if source (up to the specified | |
324 | * length) is greater than target; UCOL_EQUAL if source (up to specified | |
325 | * length) is equal to target; UCOL_LESS if source (up to the specified | |
326 | * length) is less than target. | |
327 | * @stable ICU 2.6 | |
328 | */ | |
329 | virtual UCollationResult compare(const UnicodeString& source, | |
330 | const UnicodeString& target, | |
331 | int32_t length, | |
332 | UErrorCode &status) const; | |
333 | ||
334 | /** | |
335 | * The comparison function compares the character data stored in two | |
336 | * different string arrays. Returns information about whether a string array | |
337 | * is less than, greater than or equal to another string array. | |
338 | * <p>Example of use: | |
339 | * <pre> | |
340 | * . UChar ABC[] = {0x41, 0x42, 0x43, 0}; // = "ABC" | |
341 | * . UChar abc[] = {0x61, 0x62, 0x63, 0}; // = "abc" | |
342 | * . UErrorCode status = U_ZERO_ERROR; | |
343 | * . Collator *myCollation = | |
344 | * . Collator::createInstance(Locale::US, status); | |
345 | * . if (U_FAILURE(status)) return; | |
346 | * . myCollation->setStrength(Collator::PRIMARY); | |
347 | * . // result would be Collator::EQUAL ("abc" == "ABC") | |
348 | * . // (no primary difference between "abc" and "ABC") | |
349 | * . Collator::EComparisonResult result = | |
350 | * . myCollation->compare(abc, 3, ABC, 3); | |
351 | * . myCollation->setStrength(Collator::TERTIARY); | |
352 | * . // result would be Collator::LESS ("abc" <<< "ABC") | |
353 | * . // (with tertiary difference between "abc" and "ABC") | |
354 | * . result = myCollation->compare(abc, 3, ABC, 3); | |
355 | * </pre> | |
356 | * @param source the source string array to be compared with. | |
357 | * @param sourceLength the length of the source string array. If this value | |
358 | * is equal to -1, the string array is null-terminated. | |
359 | * @param target the string that is to be compared with the source string. | |
360 | * @param targetLength the length of the target string array. If this value | |
361 | * is equal to -1, the string array is null-terminated. | |
362 | * @return Returns a byte value. GREATER if source is greater than target; | |
363 | * EQUAL if source is equal to target; LESS if source is less than | |
364 | * target | |
365 | * @deprecated ICU 2.6 Use overload with UErrorCode& | |
366 | */ | |
367 | virtual EComparisonResult compare(const UChar* source, int32_t sourceLength, | |
368 | const UChar* target, int32_t targetLength) | |
369 | const; | |
370 | ||
371 | /** | |
372 | * The comparison function compares the character data stored in two | |
373 | * different string arrays. Returns information about whether a string array | |
374 | * is less than, greater than or equal to another string array. | |
375 | * @param source the source string array to be compared with. | |
376 | * @param sourceLength the length of the source string array. If this value | |
377 | * is equal to -1, the string array is null-terminated. | |
378 | * @param target the string that is to be compared with the source string. | |
379 | * @param targetLength the length of the target string array. If this value | |
380 | * is equal to -1, the string array is null-terminated. | |
381 | * @param status possible error code | |
382 | * @return Returns an enum value. UCOL_GREATER if source is greater | |
383 | * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less | |
384 | * than target | |
385 | * @stable ICU 2.6 | |
386 | */ | |
387 | virtual UCollationResult compare(const UChar* source, int32_t sourceLength, | |
388 | const UChar* target, int32_t targetLength, | |
389 | UErrorCode &status) const; | |
390 | ||
391 | /** | |
392 | * Transforms a specified region of the string into a series of characters | |
393 | * that can be compared with CollationKey.compare. Use a CollationKey when | |
394 | * you need to do repeated comparisions on the same string. For a single | |
395 | * comparison the compare method will be faster. | |
396 | * @param source the source string. | |
397 | * @param key the transformed key of the source string. | |
398 | * @param status the error code status. | |
399 | * @return the transformed key. | |
400 | * @see CollationKey | |
401 | * @deprecated ICU 2.8 Use getSortKey(...) instead | |
402 | */ | |
403 | virtual CollationKey& getCollationKey(const UnicodeString& source, | |
404 | CollationKey& key, | |
405 | UErrorCode& status) const; | |
406 | ||
407 | /** | |
408 | * Transforms a specified region of the string into a series of characters | |
409 | * that can be compared with CollationKey.compare. Use a CollationKey when | |
410 | * you need to do repeated comparisions on the same string. For a single | |
411 | * comparison the compare method will be faster. | |
412 | * @param source the source string. | |
413 | * @param sourceLength the length of the source string. | |
414 | * @param key the transformed key of the source string. | |
415 | * @param status the error code status. | |
416 | * @return the transformed key. | |
417 | * @see CollationKey | |
418 | * @deprecated ICU 2.8 Use getSortKey(...) instead | |
419 | */ | |
420 | virtual CollationKey& getCollationKey(const UChar *source, | |
421 | int32_t sourceLength, | |
422 | CollationKey& key, | |
423 | UErrorCode& status) const; | |
424 | ||
425 | /** | |
426 | * Generates the hash code for the rule-based collation object. | |
427 | * @return the hash code. | |
428 | * @stable ICU 2.0 | |
429 | */ | |
430 | virtual int32_t hashCode(void) const; | |
431 | ||
432 | /** | |
433 | * Gets the locale of the Collator | |
434 | * @param type can be either requested, valid or actual locale. For more | |
435 | * information see the definition of ULocDataLocaleType in | |
436 | * uloc.h | |
437 | * @param status the error code status. | |
438 | * @return locale where the collation data lives. If the collator | |
439 | * was instantiated from rules, locale is empty. | |
440 | * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback | |
441 | */ | |
442 | virtual const Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; | |
443 | ||
444 | /** | |
445 | * Gets the table-based rules for the collation object. | |
446 | * @return returns the collation rules that the table collation object was | |
447 | * created from. | |
448 | * @stable ICU 2.0 | |
449 | */ | |
450 | const UnicodeString& getRules(void) const; | |
451 | ||
452 | /** | |
453 | * Gets the version information for a Collator. | |
454 | * @param info the version # information, the result will be filled in | |
455 | * @stable ICU 2.0 | |
456 | */ | |
457 | virtual void getVersion(UVersionInfo info) const; | |
458 | ||
459 | /** | |
460 | * Return the maximum length of any expansion sequences that end with the | |
461 | * specified comparison order. | |
462 | * @param order a collation order returned by previous or next. | |
463 | * @return maximum size of the expansion sequences ending with the collation | |
464 | * element or 1 if collation element does not occur at the end of | |
465 | * any expansion sequence | |
466 | * @see CollationElementIterator#getMaxExpansion | |
467 | * @stable ICU 2.0 | |
468 | */ | |
469 | int32_t getMaxExpansion(int32_t order) const; | |
470 | ||
471 | /** | |
472 | * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This | |
473 | * method is to implement a simple version of RTTI, since not all C++ | |
474 | * compilers support genuine RTTI. Polymorphic operator==() and clone() | |
475 | * methods call this method. | |
476 | * @return The class ID for this object. All objects of a given class have | |
477 | * the same class ID. Objects of other classes have different class | |
478 | * IDs. | |
479 | * @stable ICU 2.0 | |
480 | */ | |
481 | virtual UClassID getDynamicClassID(void) const; | |
482 | ||
483 | /** | |
484 | * Returns the class ID for this class. This is useful only for comparing to | |
485 | * a return value from getDynamicClassID(). For example: | |
486 | * <pre> | |
487 | * Base* polymorphic_pointer = createPolymorphicObject(); | |
488 | * if (polymorphic_pointer->getDynamicClassID() == | |
489 | * Derived::getStaticClassID()) ... | |
490 | * </pre> | |
491 | * @return The class ID for all objects of this class. | |
492 | * @stable ICU 2.0 | |
493 | */ | |
494 | static UClassID U_EXPORT2 getStaticClassID(void); | |
495 | ||
496 | /** | |
497 | * Returns the binary format of the class's rules. The format is that of | |
498 | * .col files. | |
499 | * @param length Returns the length of the data, in bytes | |
500 | * @param status the error code status. | |
501 | * @return memory, owned by the caller, of size 'length' bytes. | |
502 | * @stable ICU 2.2 | |
503 | */ | |
504 | uint8_t *cloneRuleData(int32_t &length, UErrorCode &status); | |
505 | ||
73c04bcf A |
506 | |
507 | /** Creates a binary image of a collator. This binary image can be stored and | |
508 | * later used to instantiate a collator using ucol_openBinary. | |
509 | * This API supports preflighting. | |
510 | * @param buffer a fill-in buffer to receive the binary image | |
511 | * @param capacity capacity of the destination buffer | |
512 | * @param status for catching errors | |
513 | * @return size of the image | |
514 | * @see ucol_openBinary | |
46f4442e | 515 | * @stable ICU 3.4 |
73c04bcf A |
516 | */ |
517 | int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status); | |
518 | ||
374ca955 A |
519 | /** |
520 | * Returns current rules. Delta defines whether full rules are returned or | |
521 | * just the tailoring. | |
522 | * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. | |
523 | * @param buffer UnicodeString to store the result rules | |
524 | * @stable ICU 2.2 | |
525 | */ | |
526 | void getRules(UColRuleOption delta, UnicodeString &buffer); | |
527 | ||
528 | /** | |
529 | * Universal attribute setter | |
530 | * @param attr attribute type | |
531 | * @param value attribute value | |
532 | * @param status to indicate whether the operation went on smoothly or there were errors | |
533 | * @stable ICU 2.2 | |
534 | */ | |
535 | virtual void setAttribute(UColAttribute attr, UColAttributeValue value, | |
536 | UErrorCode &status); | |
537 | ||
538 | /** | |
539 | * Universal attribute getter. | |
540 | * @param attr attribute type | |
541 | * @param status to indicate whether the operation went on smoothly or there were errors | |
542 | * @return attribute value | |
543 | * @stable ICU 2.2 | |
544 | */ | |
545 | virtual UColAttributeValue getAttribute(UColAttribute attr, | |
546 | UErrorCode &status); | |
547 | ||
548 | /** | |
549 | * Sets the variable top to a collation element value of a string supplied. | |
550 | * @param varTop one or more (if contraction) UChars to which the variable top should be set | |
551 | * @param len length of variable top string. If -1 it is considered to be zero terminated. | |
552 | * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br> | |
553 | * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br> | |
554 | * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes | |
555 | * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined | |
556 | * @stable ICU 2.0 | |
557 | */ | |
558 | virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status); | |
559 | ||
560 | /** | |
561 | * Sets the variable top to a collation element value of a string supplied. | |
562 | * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set | |
563 | * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br> | |
564 | * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br> | |
565 | * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes | |
566 | * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined | |
567 | * @stable ICU 2.0 | |
568 | */ | |
569 | virtual uint32_t setVariableTop(const UnicodeString varTop, UErrorCode &status); | |
570 | ||
571 | /** | |
572 | * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits. | |
573 | * Lower 16 bits are ignored. | |
574 | * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop | |
575 | * @param status error code (not changed by function) | |
576 | * @stable ICU 2.0 | |
577 | */ | |
578 | virtual void setVariableTop(const uint32_t varTop, UErrorCode &status); | |
579 | ||
580 | /** | |
581 | * Gets the variable top value of a Collator. | |
582 | * Lower 16 bits are undefined and should be ignored. | |
583 | * @param status error code (not changed by function). If error code is set, the return value is undefined. | |
584 | * @stable ICU 2.0 | |
585 | */ | |
586 | virtual uint32_t getVariableTop(UErrorCode &status) const; | |
587 | ||
588 | /** | |
589 | * Get an UnicodeSet that contains all the characters and sequences tailored in | |
590 | * this collator. | |
591 | * @param status error code of the operation | |
592 | * @return a pointer to a UnicodeSet object containing all the | |
593 | * code points and sequences that may sort differently than | |
594 | * in the UCA. The object must be disposed of by using delete | |
595 | * @stable ICU 2.4 | |
596 | */ | |
597 | virtual UnicodeSet *getTailoredSet(UErrorCode &status) const; | |
598 | ||
599 | /** | |
600 | * Thread safe cloning operation. | |
601 | * @return pointer to the new clone, user should remove it. | |
602 | * @stable ICU 2.2 | |
603 | */ | |
604 | virtual Collator* safeClone(void); | |
605 | ||
606 | /** | |
607 | * Get the sort key as an array of bytes from an UnicodeString. | |
608 | * @param source string to be processed. | |
609 | * @param result buffer to store result in. If NULL, number of bytes needed | |
610 | * will be returned. | |
611 | * @param resultLength length of the result buffer. If if not enough the | |
612 | * buffer will be filled to capacity. | |
613 | * @return Number of bytes needed for storing the sort key | |
614 | * @stable ICU 2.0 | |
615 | */ | |
616 | virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result, | |
617 | int32_t resultLength) const; | |
618 | ||
619 | /** | |
620 | * Get the sort key as an array of bytes from an UChar buffer. | |
621 | * @param source string to be processed. | |
622 | * @param sourceLength length of string to be processed. If -1, the string | |
623 | * is 0 terminated and length will be decided by the function. | |
624 | * @param result buffer to store result in. If NULL, number of bytes needed | |
625 | * will be returned. | |
626 | * @param resultLength length of the result buffer. If if not enough the | |
627 | * buffer will be filled to capacity. | |
628 | * @return Number of bytes needed for storing the sort key | |
629 | * @stable ICU 2.2 | |
630 | */ | |
631 | virtual int32_t getSortKey(const UChar *source, int32_t sourceLength, | |
632 | uint8_t *result, int32_t resultLength) const; | |
633 | ||
634 | /** | |
635 | * Determines the minimum strength that will be use in comparison or | |
636 | * transformation. | |
637 | * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored | |
638 | * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference | |
639 | * are ignored. | |
640 | * @return the current comparison level. | |
641 | * @see RuleBasedCollator#setStrength | |
642 | * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead | |
643 | */ | |
644 | virtual ECollationStrength getStrength(void) const; | |
645 | ||
646 | /** | |
647 | * Sets the minimum strength to be used in comparison or transformation. | |
648 | * @see RuleBasedCollator#getStrength | |
649 | * @param newStrength the new comparison level. | |
650 | * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead | |
651 | */ | |
652 | virtual void setStrength(ECollationStrength newStrength); | |
b75a7d8f A |
653 | |
654 | private: | |
655 | ||
374ca955 A |
656 | // private static constants ----------------------------------------------- |
657 | ||
46f4442e A |
658 | enum { |
659 | /* need look up in .commit() */ | |
660 | CHARINDEX = 0x70000000, | |
661 | /* Expand index follows */ | |
662 | EXPANDCHARINDEX = 0x7E000000, | |
663 | /* contract indexes follows */ | |
664 | CONTRACTCHARINDEX = 0x7F000000, | |
665 | /* unmapped character values */ | |
666 | UNMAPPED = 0xFFFFFFFF, | |
667 | /* primary strength increment */ | |
668 | PRIMARYORDERINCREMENT = 0x00010000, | |
669 | /* secondary strength increment */ | |
670 | SECONDARYORDERINCREMENT = 0x00000100, | |
671 | /* tertiary strength increment */ | |
672 | TERTIARYORDERINCREMENT = 0x00000001, | |
673 | /* mask off anything but primary order */ | |
674 | PRIMARYORDERMASK = 0xffff0000, | |
675 | /* mask off anything but secondary order */ | |
676 | SECONDARYORDERMASK = 0x0000ff00, | |
677 | /* mask off anything but tertiary order */ | |
678 | TERTIARYORDERMASK = 0x000000ff, | |
679 | /* mask off ignorable char order */ | |
680 | IGNORABLEMASK = 0x0000ffff, | |
681 | /* use only the primary difference */ | |
682 | PRIMARYDIFFERENCEONLY = 0xffff0000, | |
683 | /* use only the primary and secondary difference */ | |
684 | SECONDARYDIFFERENCEONLY = 0xffffff00, | |
685 | /* primary order shift */ | |
686 | PRIMARYORDERSHIFT = 16, | |
687 | /* secondary order shift */ | |
688 | SECONDARYORDERSHIFT = 8, | |
689 | /* starting value for collation elements */ | |
690 | COLELEMENTSTART = 0x02020202, | |
691 | /* testing mask for primary low element */ | |
692 | PRIMARYLOWZEROMASK = 0x00FF0000, | |
693 | /* reseting value for secondaries and tertiaries */ | |
694 | RESETSECONDARYTERTIARY = 0x00000202, | |
695 | /* reseting value for tertiaries */ | |
696 | RESETTERTIARY = 0x00000002, | |
697 | ||
698 | PRIMIGNORABLE = 0x0202 | |
699 | }; | |
374ca955 A |
700 | |
701 | // private data members --------------------------------------------------- | |
702 | ||
703 | UBool dataIsOwned; | |
704 | ||
705 | UBool isWriteThroughAlias; | |
706 | ||
707 | /** | |
708 | * c struct for collation. All initialisation for it has to be done through | |
709 | * setUCollator(). | |
710 | */ | |
711 | UCollator *ucollator; | |
712 | ||
713 | /** | |
714 | * Rule UnicodeString | |
715 | */ | |
73c04bcf | 716 | UnicodeString urulestring; |
374ca955 A |
717 | |
718 | // friend classes -------------------------------------------------------- | |
719 | ||
720 | /** | |
721 | * Used to iterate over collation elements in a character source. | |
722 | */ | |
723 | friend class CollationElementIterator; | |
724 | ||
725 | /** | |
726 | * Collator ONLY needs access to RuleBasedCollator(const Locale&, | |
727 | * UErrorCode&) | |
728 | */ | |
729 | friend class Collator; | |
730 | ||
731 | /** | |
732 | * Searching over collation elements in a character source | |
733 | */ | |
734 | friend class StringSearch; | |
735 | ||
736 | // private constructors -------------------------------------------------- | |
737 | ||
738 | /** | |
739 | * Default constructor | |
740 | */ | |
741 | RuleBasedCollator(); | |
742 | ||
374ca955 A |
743 | /** |
744 | * RuleBasedCollator constructor. This constructor takes a locale. The | |
745 | * only caller of this class should be Collator::createInstance(). If | |
746 | * createInstance() happens to know that the requested locale's collation is | |
747 | * implemented as a RuleBasedCollator, it can then call this constructor. | |
748 | * OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID | |
749 | * COLLATION TABLE. It does this by falling back to defaults. | |
750 | * @param desiredLocale locale used | |
751 | * @param status error code status | |
752 | */ | |
753 | RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status); | |
754 | ||
755 | /** | |
756 | * common constructor implementation | |
757 | * | |
758 | * @param rules the collation rules to build the collation table from. | |
759 | * @param collationStrength default strength for comparison | |
760 | * @param decompositionMode the normalisation mode | |
761 | * @param status reporting a success or an error. | |
762 | */ | |
763 | void | |
764 | construct(const UnicodeString& rules, | |
765 | UColAttributeValue collationStrength, | |
766 | UColAttributeValue decompositionMode, | |
767 | UErrorCode& status); | |
768 | ||
769 | // private methods ------------------------------------------------------- | |
770 | ||
771 | /** | |
772 | * Creates the c struct for ucollator | |
773 | * @param locale desired locale | |
774 | * @param status error status | |
775 | */ | |
776 | void setUCollator(const Locale& locale, UErrorCode& status); | |
777 | ||
778 | /** | |
779 | * Creates the c struct for ucollator | |
780 | * @param locale desired locale name | |
781 | * @param status error status | |
782 | */ | |
783 | void setUCollator(const char* locale, UErrorCode& status); | |
784 | ||
785 | /** | |
786 | * Creates the c struct for ucollator. This used internally by StringSearch. | |
787 | * Hence the responsibility of cleaning up the ucollator is not done by | |
788 | * this RuleBasedCollator. The isDataOwned flag is set to FALSE. | |
789 | * @param collator new ucollator data | |
790 | * @param rules corresponding collation rules | |
791 | */ | |
73c04bcf | 792 | void setUCollator(UCollator *collator); |
b75a7d8f A |
793 | |
794 | public: | |
374ca955 A |
795 | /** |
796 | * Get UCollator data struct. Used only by StringSearch & intltest. | |
797 | * @return UCollator data struct | |
798 | * @internal | |
799 | */ | |
800 | const UCollator * getUCollator(); | |
b75a7d8f A |
801 | |
802 | protected: | |
374ca955 A |
803 | /** |
804 | * Used internally by registraton to define the requested and valid locales. | |
805 | * @param requestedLocale the requsted locale | |
806 | * @param validLocale the valid locale | |
46f4442e | 807 | * @param actualLocale the actual locale |
374ca955 A |
808 | * @internal |
809 | */ | |
46f4442e | 810 | virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale); |
b75a7d8f A |
811 | |
812 | private: | |
813 | ||
374ca955 A |
814 | // if not owned and not a write through alias, copy the ucollator |
815 | void checkOwned(void); | |
816 | ||
817 | // utility to init rule string used by checkOwned and construct | |
73c04bcf | 818 | void setRuleStringFromCollator(); |
374ca955 A |
819 | |
820 | /** | |
821 | * Converts C's UCollationResult to EComparisonResult | |
822 | * @param result member of the enum UComparisonResult | |
823 | * @return EComparisonResult equivalent of UCollationResult | |
824 | * @deprecated ICU 2.6. We will not need it. | |
825 | */ | |
826 | Collator::EComparisonResult getEComparisonResult( | |
827 | const UCollationResult &result) const; | |
828 | ||
829 | /** | |
830 | * Converts C's UCollationStrength to ECollationStrength | |
831 | * @param strength member of the enum UCollationStrength | |
832 | * @return ECollationStrength equivalent of UCollationStrength | |
833 | */ | |
834 | Collator::ECollationStrength getECollationStrength( | |
835 | const UCollationStrength &strength) const; | |
836 | ||
837 | /** | |
838 | * Converts C++'s ECollationStrength to UCollationStrength | |
839 | * @param strength member of the enum ECollationStrength | |
840 | * @return UCollationStrength equivalent of ECollationStrength | |
841 | */ | |
842 | UCollationStrength getUCollationStrength( | |
843 | const Collator::ECollationStrength &strength) const; | |
b75a7d8f A |
844 | }; |
845 | ||
846 | // inline method implementation --------------------------------------------- | |
847 | ||
b75a7d8f A |
848 | inline void RuleBasedCollator::setUCollator(const Locale &locale, |
849 | UErrorCode &status) | |
850 | { | |
374ca955 | 851 | setUCollator(locale.getName(), status); |
b75a7d8f A |
852 | } |
853 | ||
854 | ||
73c04bcf | 855 | inline void RuleBasedCollator::setUCollator(UCollator *collator) |
b75a7d8f | 856 | { |
73c04bcf | 857 | |
b75a7d8f A |
858 | if (ucollator && dataIsOwned) { |
859 | ucol_close(ucollator); | |
b75a7d8f A |
860 | } |
861 | ucollator = collator; | |
b75a7d8f | 862 | dataIsOwned = FALSE; |
374ca955 | 863 | isWriteThroughAlias = TRUE; |
46f4442e | 864 | setRuleStringFromCollator(); |
b75a7d8f A |
865 | } |
866 | ||
867 | inline const UCollator * RuleBasedCollator::getUCollator() | |
868 | { | |
869 | return ucollator; | |
870 | } | |
871 | ||
872 | inline Collator::EComparisonResult RuleBasedCollator::getEComparisonResult( | |
873 | const UCollationResult &result) const | |
874 | { | |
374ca955 A |
875 | switch (result) |
876 | { | |
877 | case UCOL_LESS : | |
878 | return Collator::LESS; | |
879 | case UCOL_EQUAL : | |
880 | return Collator::EQUAL; | |
881 | default : | |
882 | return Collator::GREATER; | |
883 | } | |
b75a7d8f A |
884 | } |
885 | ||
886 | inline Collator::ECollationStrength RuleBasedCollator::getECollationStrength( | |
887 | const UCollationStrength &strength) const | |
888 | { | |
374ca955 A |
889 | switch (strength) |
890 | { | |
891 | case UCOL_PRIMARY : | |
892 | return Collator::PRIMARY; | |
893 | case UCOL_SECONDARY : | |
894 | return Collator::SECONDARY; | |
895 | case UCOL_TERTIARY : | |
896 | return Collator::TERTIARY; | |
897 | case UCOL_QUATERNARY : | |
898 | return Collator::QUATERNARY; | |
899 | default : | |
900 | return Collator::IDENTICAL; | |
901 | } | |
b75a7d8f A |
902 | } |
903 | ||
904 | inline UCollationStrength RuleBasedCollator::getUCollationStrength( | |
905 | const Collator::ECollationStrength &strength) const | |
906 | { | |
374ca955 A |
907 | switch (strength) |
908 | { | |
909 | case Collator::PRIMARY : | |
910 | return UCOL_PRIMARY; | |
911 | case Collator::SECONDARY : | |
912 | return UCOL_SECONDARY; | |
913 | case Collator::TERTIARY : | |
914 | return UCOL_TERTIARY; | |
915 | case Collator::QUATERNARY : | |
916 | return UCOL_QUATERNARY; | |
917 | default : | |
918 | return UCOL_IDENTICAL; | |
919 | } | |
b75a7d8f A |
920 | } |
921 | ||
922 | U_NAMESPACE_END | |
923 | ||
924 | #endif /* #if !UCONFIG_NO_COLLATION */ | |
925 | ||
926 | #endif |