]>
Commit | Line | Data |
---|---|---|
1 | // © 2016 and later: Unicode, Inc. and others. | |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | /* | |
4 | ****************************************************************************** | |
5 | * Copyright (C) 1996-2016, International Business Machines Corporation and | |
6 | * others. All Rights Reserved. | |
7 | ****************************************************************************** | |
8 | */ | |
9 | ||
10 | /** | |
11 | * \file | |
12 | * \brief C++ API: The RuleBasedCollator class implements the Collator abstract base class. | |
13 | */ | |
14 | ||
15 | /** | |
16 | * File tblcoll.h | |
17 | * | |
18 | * Created by: Helena Shih | |
19 | * | |
20 | * Modification History: | |
21 | * | |
22 | * Date Name Description | |
23 | * 2/5/97 aliu Added streamIn and streamOut methods. Added | |
24 | * constructor which reads RuleBasedCollator object from | |
25 | * a binary file. Added writeToFile method which streams | |
26 | * RuleBasedCollator out to a binary file. The streamIn | |
27 | * and streamOut methods use istream and ostream objects | |
28 | * in binary mode. | |
29 | * 2/12/97 aliu Modified to use TableCollationData sub-object to | |
30 | * hold invariant data. | |
31 | * 2/13/97 aliu Moved several methods into this class from Collation. | |
32 | * Added a private RuleBasedCollator(Locale&) constructor, | |
33 | * to be used by Collator::createDefault(). General | |
34 | * clean up. | |
35 | * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy | |
36 | * constructor and getDynamicClassID. | |
37 | * 3/5/97 aliu Modified constructFromFile() to add parameter | |
38 | * specifying whether or not binary loading is to be | |
39 | * attempted. This is required for dynamic rule loading. | |
40 | * 05/07/97 helena Added memory allocation error detection. | |
41 | * 6/17/97 helena Added IDENTICAL strength for compare, changed getRules to | |
42 | * use MergeCollation::getPattern. | |
43 | * 6/20/97 helena Java class name change. | |
44 | * 8/18/97 helena Added internal API documentation. | |
45 | * 09/03/97 helena Added createCollationKeyValues(). | |
46 | * 02/10/98 damiba Added compare with "length" parameter | |
47 | * 08/05/98 erm Synched with 1.2 version of RuleBasedCollator.java | |
48 | * 04/23/99 stephen Removed EDecompositionMode, merged with | |
49 | * Normalizer::EMode | |
50 | * 06/14/99 stephen Removed kResourceBundleSuffix | |
51 | * 11/02/99 helena Collator performance enhancements. Eliminates the | |
52 | * UnicodeString construction and special case for NO_OP. | |
53 | * 11/23/99 srl More performance enhancements. Updates to NormalizerIterator | |
54 | * internal state management. | |
55 | * 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator | |
56 | * to implementation file. | |
57 | * 01/29/01 synwee Modified into a C++ wrapper which calls C API | |
58 | * (ucol.h) | |
59 | * 2012-2014 markus Rewritten in C++ again. | |
60 | */ | |
61 | ||
62 | #ifndef TBLCOLL_H | |
63 | #define TBLCOLL_H | |
64 | ||
65 | #include "unicode/utypes.h" | |
66 | ||
67 | #if !UCONFIG_NO_COLLATION | |
68 | ||
69 | #include "unicode/coll.h" | |
70 | #include "unicode/locid.h" | |
71 | #include "unicode/uiter.h" | |
72 | #include "unicode/ucol.h" | |
73 | ||
74 | #if U_SHOW_CPLUSPLUS_API | |
75 | U_NAMESPACE_BEGIN | |
76 | ||
77 | struct CollationCacheEntry; | |
78 | struct CollationData; | |
79 | struct CollationSettings; | |
80 | struct CollationTailoring; | |
81 | /** | |
82 | * @stable ICU 2.0 | |
83 | */ | |
84 | class StringSearch; | |
85 | /** | |
86 | * @stable ICU 2.0 | |
87 | */ | |
88 | class CollationElementIterator; | |
89 | class CollationKey; | |
90 | class SortKeyByteSink; | |
91 | class UnicodeSet; | |
92 | class UnicodeString; | |
93 | class UVector64; | |
94 | ||
95 | /** | |
96 | * The RuleBasedCollator class provides the implementation of | |
97 | * Collator, using data-driven tables. The user can create a customized | |
98 | * table-based collation. | |
99 | * <p> | |
100 | * For more information about the collation service see | |
101 | * <a href="http://userguide.icu-project.org/collation">the User Guide</a>. | |
102 | * <p> | |
103 | * Collation service provides correct sorting orders for most locales supported in ICU. | |
104 | * If specific data for a locale is not available, the orders eventually falls back | |
105 | * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>. | |
106 | * <p> | |
107 | * Sort ordering may be customized by providing your own set of rules. For more on | |
108 | * this subject see the <a href="http://userguide.icu-project.org/collation/customization"> | |
109 | * Collation Customization</a> section of the User Guide. | |
110 | * <p> | |
111 | * Note, RuleBasedCollator is not to be subclassed. | |
112 | * @see Collator | |
113 | */ | |
114 | class U_I18N_API RuleBasedCollator : public Collator { | |
115 | public: | |
116 | /** | |
117 | * RuleBasedCollator constructor. This takes the table rules and builds a | |
118 | * collation table out of them. Please see RuleBasedCollator class | |
119 | * description for more details on the collation rule syntax. | |
120 | * @param rules the collation rules to build the collation table from. | |
121 | * @param status reporting a success or an error. | |
122 | * @stable ICU 2.0 | |
123 | */ | |
124 | RuleBasedCollator(const UnicodeString& rules, UErrorCode& status); | |
125 | ||
126 | /** | |
127 | * RuleBasedCollator constructor. This takes the table rules and builds a | |
128 | * collation table out of them. Please see RuleBasedCollator class | |
129 | * description for more details on the collation rule syntax. | |
130 | * @param rules the collation rules to build the collation table from. | |
131 | * @param collationStrength strength for comparison | |
132 | * @param status reporting a success or an error. | |
133 | * @stable ICU 2.0 | |
134 | */ | |
135 | RuleBasedCollator(const UnicodeString& rules, | |
136 | ECollationStrength collationStrength, | |
137 | UErrorCode& status); | |
138 | ||
139 | /** | |
140 | * RuleBasedCollator constructor. This takes the table rules and builds a | |
141 | * collation table out of them. Please see RuleBasedCollator class | |
142 | * description for more details on the collation rule syntax. | |
143 | * @param rules the collation rules to build the collation table from. | |
144 | * @param decompositionMode the normalisation mode | |
145 | * @param status reporting a success or an error. | |
146 | * @stable ICU 2.0 | |
147 | */ | |
148 | RuleBasedCollator(const UnicodeString& rules, | |
149 | UColAttributeValue decompositionMode, | |
150 | UErrorCode& status); | |
151 | ||
152 | /** | |
153 | * RuleBasedCollator constructor. This takes the table rules and builds a | |
154 | * collation table out of them. Please see RuleBasedCollator class | |
155 | * description for more details on the collation rule syntax. | |
156 | * @param rules the collation rules to build the collation table from. | |
157 | * @param collationStrength strength for comparison | |
158 | * @param decompositionMode the normalisation mode | |
159 | * @param status reporting a success or an error. | |
160 | * @stable ICU 2.0 | |
161 | */ | |
162 | RuleBasedCollator(const UnicodeString& rules, | |
163 | ECollationStrength collationStrength, | |
164 | UColAttributeValue decompositionMode, | |
165 | UErrorCode& status); | |
166 | ||
167 | #ifndef U_HIDE_INTERNAL_API | |
168 | /** | |
169 | * TODO: document & propose as public API | |
170 | * @internal | |
171 | */ | |
172 | RuleBasedCollator(const UnicodeString &rules, | |
173 | UParseError &parseError, UnicodeString &reason, | |
174 | UErrorCode &errorCode); | |
175 | #endif /* U_HIDE_INTERNAL_API */ | |
176 | ||
177 | /** | |
178 | * Copy constructor. | |
179 | * @param other the RuleBasedCollator object to be copied | |
180 | * @stable ICU 2.0 | |
181 | */ | |
182 | RuleBasedCollator(const RuleBasedCollator& other); | |
183 | ||
184 | ||
185 | /** Opens a collator from a collator binary image created using | |
186 | * cloneBinary. Binary image used in instantiation of the | |
187 | * collator remains owned by the user and should stay around for | |
188 | * the lifetime of the collator. The API also takes a base collator | |
189 | * which must be the root collator. | |
190 | * @param bin binary image owned by the user and required through the | |
191 | * lifetime of the collator | |
192 | * @param length size of the image. If negative, the API will try to | |
193 | * figure out the length of the image | |
194 | * @param base Base collator, for lookup of untailored characters. | |
195 | * Must be the root collator, must not be NULL. | |
196 | * The base is required to be present through the lifetime of the collator. | |
197 | * @param status for catching errors | |
198 | * @return newly created collator | |
199 | * @see cloneBinary | |
200 | * @stable ICU 3.4 | |
201 | */ | |
202 | RuleBasedCollator(const uint8_t *bin, int32_t length, | |
203 | const RuleBasedCollator *base, | |
204 | UErrorCode &status); | |
205 | ||
206 | /** | |
207 | * Destructor. | |
208 | * @stable ICU 2.0 | |
209 | */ | |
210 | virtual ~RuleBasedCollator(); | |
211 | ||
212 | /** | |
213 | * Assignment operator. | |
214 | * @param other other RuleBasedCollator object to copy from. | |
215 | * @stable ICU 2.0 | |
216 | */ | |
217 | RuleBasedCollator& operator=(const RuleBasedCollator& other); | |
218 | ||
219 | /** | |
220 | * Returns true if argument is the same as this object. | |
221 | * @param other Collator object to be compared. | |
222 | * @return true if arguments is the same as this object. | |
223 | * @stable ICU 2.0 | |
224 | */ | |
225 | virtual UBool operator==(const Collator& other) const; | |
226 | ||
227 | /** | |
228 | * Makes a copy of this object. | |
229 | * @return a copy of this object, owned by the caller | |
230 | * @stable ICU 2.0 | |
231 | */ | |
232 | virtual Collator* clone(void) const; | |
233 | ||
234 | /** | |
235 | * Creates a collation element iterator for the source string. The caller of | |
236 | * this method is responsible for the memory management of the return | |
237 | * pointer. | |
238 | * @param source the string over which the CollationElementIterator will | |
239 | * iterate. | |
240 | * @return the collation element iterator of the source string using this as | |
241 | * the based Collator. | |
242 | * @stable ICU 2.2 | |
243 | */ | |
244 | virtual CollationElementIterator* createCollationElementIterator( | |
245 | const UnicodeString& source) const; | |
246 | ||
247 | /** | |
248 | * Creates a collation element iterator for the source. The caller of this | |
249 | * method is responsible for the memory management of the returned pointer. | |
250 | * @param source the CharacterIterator which produces the characters over | |
251 | * which the CollationElementItgerator will iterate. | |
252 | * @return the collation element iterator of the source using this as the | |
253 | * based Collator. | |
254 | * @stable ICU 2.2 | |
255 | */ | |
256 | virtual CollationElementIterator* createCollationElementIterator( | |
257 | const CharacterIterator& source) const; | |
258 | ||
259 | // Make deprecated versions of Collator::compare() visible. | |
260 | using Collator::compare; | |
261 | ||
262 | /** | |
263 | * The comparison function compares the character data stored in two | |
264 | * different strings. Returns information about whether a string is less | |
265 | * than, greater than or equal to another string. | |
266 | * @param source the source string to be compared with. | |
267 | * @param target the string that is to be compared with the source string. | |
268 | * @param status possible error code | |
269 | * @return Returns an enum value. UCOL_GREATER if source is greater | |
270 | * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less | |
271 | * than target | |
272 | * @stable ICU 2.6 | |
273 | **/ | |
274 | virtual UCollationResult compare(const UnicodeString& source, | |
275 | const UnicodeString& target, | |
276 | UErrorCode &status) const; | |
277 | ||
278 | /** | |
279 | * Does the same thing as compare but limits the comparison to a specified | |
280 | * length | |
281 | * @param source the source string to be compared with. | |
282 | * @param target the string that is to be compared with the source string. | |
283 | * @param length the length the comparison is limited to | |
284 | * @param status possible error code | |
285 | * @return Returns an enum value. UCOL_GREATER if source (up to the specified | |
286 | * length) is greater than target; UCOL_EQUAL if source (up to specified | |
287 | * length) is equal to target; UCOL_LESS if source (up to the specified | |
288 | * length) is less than target. | |
289 | * @stable ICU 2.6 | |
290 | */ | |
291 | virtual UCollationResult compare(const UnicodeString& source, | |
292 | const UnicodeString& target, | |
293 | int32_t length, | |
294 | UErrorCode &status) const; | |
295 | ||
296 | /** | |
297 | * The comparison function compares the character data stored in two | |
298 | * different string arrays. Returns information about whether a string array | |
299 | * is less than, greater than or equal to another string array. | |
300 | * @param source the source string array to be compared with. | |
301 | * @param sourceLength the length of the source string array. If this value | |
302 | * is equal to -1, the string array is null-terminated. | |
303 | * @param target the string that is to be compared with the source string. | |
304 | * @param targetLength the length of the target string array. If this value | |
305 | * is equal to -1, the string array is null-terminated. | |
306 | * @param status possible error code | |
307 | * @return Returns an enum value. UCOL_GREATER if source is greater | |
308 | * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less | |
309 | * than target | |
310 | * @stable ICU 2.6 | |
311 | */ | |
312 | virtual UCollationResult compare(const char16_t* source, int32_t sourceLength, | |
313 | const char16_t* target, int32_t targetLength, | |
314 | UErrorCode &status) const; | |
315 | ||
316 | /** | |
317 | * Compares two strings using the Collator. | |
318 | * Returns whether the first one compares less than/equal to/greater than | |
319 | * the second one. | |
320 | * This version takes UCharIterator input. | |
321 | * @param sIter the first ("source") string iterator | |
322 | * @param tIter the second ("target") string iterator | |
323 | * @param status ICU status | |
324 | * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER | |
325 | * @stable ICU 4.2 | |
326 | */ | |
327 | virtual UCollationResult compare(UCharIterator &sIter, | |
328 | UCharIterator &tIter, | |
329 | UErrorCode &status) const; | |
330 | ||
331 | /** | |
332 | * Compares two UTF-8 strings using the Collator. | |
333 | * Returns whether the first one compares less than/equal to/greater than | |
334 | * the second one. | |
335 | * This version takes UTF-8 input. | |
336 | * Note that a StringPiece can be implicitly constructed | |
337 | * from a std::string or a NUL-terminated const char * string. | |
338 | * @param source the first UTF-8 string | |
339 | * @param target the second UTF-8 string | |
340 | * @param status ICU status | |
341 | * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER | |
342 | * @stable ICU 51 | |
343 | */ | |
344 | virtual UCollationResult compareUTF8(const StringPiece &source, | |
345 | const StringPiece &target, | |
346 | UErrorCode &status) const; | |
347 | ||
348 | /** | |
349 | * Transforms the string into a series of characters | |
350 | * that can be compared with CollationKey.compare(). | |
351 | * | |
352 | * Note that sort keys are often less efficient than simply doing comparison. | |
353 | * For more details, see the ICU User Guide. | |
354 | * | |
355 | * @param source the source string. | |
356 | * @param key the transformed key of the source string. | |
357 | * @param status the error code status. | |
358 | * @return the transformed key. | |
359 | * @see CollationKey | |
360 | * @stable ICU 2.0 | |
361 | */ | |
362 | virtual CollationKey& getCollationKey(const UnicodeString& source, | |
363 | CollationKey& key, | |
364 | UErrorCode& status) const; | |
365 | ||
366 | /** | |
367 | * Transforms a specified region of the string into a series of characters | |
368 | * that can be compared with CollationKey.compare. | |
369 | * | |
370 | * Note that sort keys are often less efficient than simply doing comparison. | |
371 | * For more details, see the ICU User Guide. | |
372 | * | |
373 | * @param source the source string. | |
374 | * @param sourceLength the length of the source string. | |
375 | * @param key the transformed key of the source string. | |
376 | * @param status the error code status. | |
377 | * @return the transformed key. | |
378 | * @see CollationKey | |
379 | * @stable ICU 2.0 | |
380 | */ | |
381 | virtual CollationKey& getCollationKey(const char16_t *source, | |
382 | int32_t sourceLength, | |
383 | CollationKey& key, | |
384 | UErrorCode& status) const; | |
385 | ||
386 | /** | |
387 | * Generates the hash code for the rule-based collation object. | |
388 | * @return the hash code. | |
389 | * @stable ICU 2.0 | |
390 | */ | |
391 | virtual int32_t hashCode() const; | |
392 | ||
393 | /** | |
394 | * Gets the locale of the Collator | |
395 | * @param type can be either requested, valid or actual locale. For more | |
396 | * information see the definition of ULocDataLocaleType in | |
397 | * uloc.h | |
398 | * @param status the error code status. | |
399 | * @return locale where the collation data lives. If the collator | |
400 | * was instantiated from rules, locale is empty. | |
401 | * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback | |
402 | */ | |
403 | virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; | |
404 | ||
405 | /** | |
406 | * Gets the tailoring rules for this collator. | |
407 | * @return the collation tailoring from which this collator was created | |
408 | * @stable ICU 2.0 | |
409 | */ | |
410 | const UnicodeString& getRules() const; | |
411 | ||
412 | /** | |
413 | * Gets the version information for a Collator. | |
414 | * @param info the version # information, the result will be filled in | |
415 | * @stable ICU 2.0 | |
416 | */ | |
417 | virtual void getVersion(UVersionInfo info) const; | |
418 | ||
419 | #ifndef U_HIDE_DEPRECATED_API | |
420 | /** | |
421 | * Returns the maximum length of any expansion sequences that end with the | |
422 | * specified comparison order. | |
423 | * | |
424 | * This is specific to the kind of collation element values and sequences | |
425 | * returned by the CollationElementIterator. | |
426 | * Call CollationElementIterator::getMaxExpansion() instead. | |
427 | * | |
428 | * @param order a collation order returned by CollationElementIterator::previous | |
429 | * or CollationElementIterator::next. | |
430 | * @return maximum size of the expansion sequences ending with the collation | |
431 | * element, or 1 if the collation element does not occur at the end of | |
432 | * any expansion sequence | |
433 | * @see CollationElementIterator#getMaxExpansion | |
434 | * @deprecated ICU 51 Use CollationElementIterator::getMaxExpansion() instead. | |
435 | */ | |
436 | int32_t getMaxExpansion(int32_t order) const; | |
437 | #endif /* U_HIDE_DEPRECATED_API */ | |
438 | ||
439 | /** | |
440 | * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This | |
441 | * method is to implement a simple version of RTTI, since not all C++ | |
442 | * compilers support genuine RTTI. Polymorphic operator==() and clone() | |
443 | * methods call this method. | |
444 | * @return The class ID for this object. All objects of a given class have | |
445 | * the same class ID. Objects of other classes have different class | |
446 | * IDs. | |
447 | * @stable ICU 2.0 | |
448 | */ | |
449 | virtual UClassID getDynamicClassID(void) const; | |
450 | ||
451 | /** | |
452 | * Returns the class ID for this class. This is useful only for comparing to | |
453 | * a return value from getDynamicClassID(). For example: | |
454 | * <pre> | |
455 | * Base* polymorphic_pointer = createPolymorphicObject(); | |
456 | * if (polymorphic_pointer->getDynamicClassID() == | |
457 | * Derived::getStaticClassID()) ... | |
458 | * </pre> | |
459 | * @return The class ID for all objects of this class. | |
460 | * @stable ICU 2.0 | |
461 | */ | |
462 | static UClassID U_EXPORT2 getStaticClassID(void); | |
463 | ||
464 | #ifndef U_HIDE_DEPRECATED_API | |
465 | /** | |
466 | * Do not use this method: The caller and the ICU library might use different heaps. | |
467 | * Use cloneBinary() instead which writes to caller-provided memory. | |
468 | * | |
469 | * Returns a binary format of this collator. | |
470 | * @param length Returns the length of the data, in bytes | |
471 | * @param status the error code status. | |
472 | * @return memory, owned by the caller, of size 'length' bytes. | |
473 | * @deprecated ICU 52. Use cloneBinary() instead. | |
474 | */ | |
475 | uint8_t *cloneRuleData(int32_t &length, UErrorCode &status) const; | |
476 | #endif /* U_HIDE_DEPRECATED_API */ | |
477 | ||
478 | /** Creates a binary image of a collator. This binary image can be stored and | |
479 | * later used to instantiate a collator using ucol_openBinary. | |
480 | * This API supports preflighting. | |
481 | * @param buffer a fill-in buffer to receive the binary image | |
482 | * @param capacity capacity of the destination buffer | |
483 | * @param status for catching errors | |
484 | * @return size of the image | |
485 | * @see ucol_openBinary | |
486 | * @stable ICU 3.4 | |
487 | */ | |
488 | int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) const; | |
489 | ||
490 | /** | |
491 | * Returns current rules. Delta defines whether full rules are returned or | |
492 | * just the tailoring. | |
493 | * | |
494 | * getRules(void) should normally be used instead. | |
495 | * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales | |
496 | * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. | |
497 | * @param buffer UnicodeString to store the result rules | |
498 | * @stable ICU 2.2 | |
499 | * @see UCOL_FULL_RULES | |
500 | */ | |
501 | void getRules(UColRuleOption delta, UnicodeString &buffer) const; | |
502 | ||
503 | /** | |
504 | * Universal attribute setter | |
505 | * @param attr attribute type | |
506 | * @param value attribute value | |
507 | * @param status to indicate whether the operation went on smoothly or there were errors | |
508 | * @stable ICU 2.2 | |
509 | */ | |
510 | virtual void setAttribute(UColAttribute attr, UColAttributeValue value, | |
511 | UErrorCode &status); | |
512 | ||
513 | /** | |
514 | * Universal attribute getter. | |
515 | * @param attr attribute type | |
516 | * @param status to indicate whether the operation went on smoothly or there were errors | |
517 | * @return attribute value | |
518 | * @stable ICU 2.2 | |
519 | */ | |
520 | virtual UColAttributeValue getAttribute(UColAttribute attr, | |
521 | UErrorCode &status) const; | |
522 | ||
523 | /** | |
524 | * Sets the variable top to the top of the specified reordering group. | |
525 | * The variable top determines the highest-sorting character | |
526 | * which is affected by UCOL_ALTERNATE_HANDLING. | |
527 | * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect. | |
528 | * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, | |
529 | * UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY; | |
530 | * or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group | |
531 | * @param errorCode Standard ICU error code. Its input value must | |
532 | * pass the U_SUCCESS() test, or else the function returns | |
533 | * immediately. Check for U_FAILURE() on output or use with | |
534 | * function chaining. (See User Guide for details.) | |
535 | * @return *this | |
536 | * @see getMaxVariable | |
537 | * @stable ICU 53 | |
538 | */ | |
539 | virtual Collator &setMaxVariable(UColReorderCode group, UErrorCode &errorCode); | |
540 | ||
541 | /** | |
542 | * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING. | |
543 | * @return the maximum variable reordering group. | |
544 | * @see setMaxVariable | |
545 | * @stable ICU 53 | |
546 | */ | |
547 | virtual UColReorderCode getMaxVariable() const; | |
548 | ||
549 | /** | |
550 | * Sets the variable top to the primary weight of the specified string. | |
551 | * | |
552 | * Beginning with ICU 53, the variable top is pinned to | |
553 | * the top of one of the supported reordering groups, | |
554 | * and it must not be beyond the last of those groups. | |
555 | * See setMaxVariable(). | |
556 | * @param varTop one or more (if contraction) char16_ts to which the variable top should be set | |
557 | * @param len length of variable top string. If -1 it is considered to be zero terminated. | |
558 | * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br> | |
559 | * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br> | |
560 | * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond | |
561 | * the last reordering group supported by setMaxVariable() | |
562 | * @return variable top primary weight | |
563 | * @deprecated ICU 53 Call setMaxVariable() instead. | |
564 | */ | |
565 | virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status); | |
566 | ||
567 | /** | |
568 | * Sets the variable top to the primary weight of the specified string. | |
569 | * | |
570 | * Beginning with ICU 53, the variable top is pinned to | |
571 | * the top of one of the supported reordering groups, | |
572 | * and it must not be beyond the last of those groups. | |
573 | * See setMaxVariable(). | |
574 | * @param varTop a UnicodeString size 1 or more (if contraction) of char16_ts to which the variable top should be set | |
575 | * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br> | |
576 | * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br> | |
577 | * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond | |
578 | * the last reordering group supported by setMaxVariable() | |
579 | * @return variable top primary weight | |
580 | * @deprecated ICU 53 Call setMaxVariable() instead. | |
581 | */ | |
582 | virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status); | |
583 | ||
584 | /** | |
585 | * Sets the variable top to the specified primary weight. | |
586 | * | |
587 | * Beginning with ICU 53, the variable top is pinned to | |
588 | * the top of one of the supported reordering groups, | |
589 | * and it must not be beyond the last of those groups. | |
590 | * See setMaxVariable(). | |
591 | * @param varTop primary weight, as returned by setVariableTop or ucol_getVariableTop | |
592 | * @param status error code | |
593 | * @deprecated ICU 53 Call setMaxVariable() instead. | |
594 | */ | |
595 | virtual void setVariableTop(uint32_t varTop, UErrorCode &status); | |
596 | ||
597 | /** | |
598 | * Gets the variable top value of a Collator. | |
599 | * @param status error code (not changed by function). If error code is set, the return value is undefined. | |
600 | * @return the variable top primary weight | |
601 | * @see getMaxVariable | |
602 | * @stable ICU 2.0 | |
603 | */ | |
604 | virtual uint32_t getVariableTop(UErrorCode &status) const; | |
605 | ||
606 | /** | |
607 | * Get a UnicodeSet that contains all the characters and sequences tailored in | |
608 | * this collator. | |
609 | * @param status error code of the operation | |
610 | * @return a pointer to a UnicodeSet object containing all the | |
611 | * code points and sequences that may sort differently than | |
612 | * in the root collator. The object must be disposed of by using delete | |
613 | * @stable ICU 2.4 | |
614 | */ | |
615 | virtual UnicodeSet *getTailoredSet(UErrorCode &status) const; | |
616 | ||
617 | /** | |
618 | * Get the sort key as an array of bytes from a UnicodeString. | |
619 | * | |
620 | * Note that sort keys are often less efficient than simply doing comparison. | |
621 | * For more details, see the ICU User Guide. | |
622 | * | |
623 | * @param source string to be processed. | |
624 | * @param result buffer to store result in. If NULL, number of bytes needed | |
625 | * will be returned. | |
626 | * @param resultLength length of the result buffer. If if not enough the | |
627 | * buffer will be filled to capacity. | |
628 | * @return Number of bytes needed for storing the sort key | |
629 | * @stable ICU 2.0 | |
630 | */ | |
631 | virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result, | |
632 | int32_t resultLength) const; | |
633 | ||
634 | /** | |
635 | * Get the sort key as an array of bytes from a char16_t buffer. | |
636 | * | |
637 | * Note that sort keys are often less efficient than simply doing comparison. | |
638 | * For more details, see the ICU User Guide. | |
639 | * | |
640 | * @param source string to be processed. | |
641 | * @param sourceLength length of string to be processed. If -1, the string | |
642 | * is 0 terminated and length will be decided by the function. | |
643 | * @param result buffer to store result in. If NULL, number of bytes needed | |
644 | * will be returned. | |
645 | * @param resultLength length of the result buffer. If if not enough the | |
646 | * buffer will be filled to capacity. | |
647 | * @return Number of bytes needed for storing the sort key | |
648 | * @stable ICU 2.2 | |
649 | */ | |
650 | virtual int32_t getSortKey(const char16_t *source, int32_t sourceLength, | |
651 | uint8_t *result, int32_t resultLength) const; | |
652 | ||
653 | /** | |
654 | * Retrieves the reordering codes for this collator. | |
655 | * @param dest The array to fill with the script ordering. | |
656 | * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function | |
657 | * will only return the length of the result without writing any codes (pre-flighting). | |
658 | * @param status A reference to an error code value, which must not indicate | |
659 | * a failure before the function call. | |
660 | * @return The length of the script ordering array. | |
661 | * @see ucol_setReorderCodes | |
662 | * @see Collator#getEquivalentReorderCodes | |
663 | * @see Collator#setReorderCodes | |
664 | * @stable ICU 4.8 | |
665 | */ | |
666 | virtual int32_t getReorderCodes(int32_t *dest, | |
667 | int32_t destCapacity, | |
668 | UErrorCode& status) const; | |
669 | ||
670 | /** | |
671 | * Sets the ordering of scripts for this collator. | |
672 | * @param reorderCodes An array of script codes in the new order. This can be NULL if the | |
673 | * length is also set to 0. An empty array will clear any reordering codes on the collator. | |
674 | * @param reorderCodesLength The length of reorderCodes. | |
675 | * @param status error code | |
676 | * @see ucol_setReorderCodes | |
677 | * @see Collator#getReorderCodes | |
678 | * @see Collator#getEquivalentReorderCodes | |
679 | * @stable ICU 4.8 | |
680 | */ | |
681 | virtual void setReorderCodes(const int32_t* reorderCodes, | |
682 | int32_t reorderCodesLength, | |
683 | UErrorCode& status) ; | |
684 | ||
685 | /** | |
686 | * Implements ucol_strcollUTF8(). | |
687 | * @internal | |
688 | */ | |
689 | virtual UCollationResult internalCompareUTF8( | |
690 | const char *left, int32_t leftLength, | |
691 | const char *right, int32_t rightLength, | |
692 | UErrorCode &errorCode) const; | |
693 | ||
694 | /** Get the short definition string for a collator. This internal API harvests the collator's | |
695 | * locale and the attribute set and produces a string that can be used for opening | |
696 | * a collator with the same attributes using the ucol_openFromShortString API. | |
697 | * This string will be normalized. | |
698 | * The structure and the syntax of the string is defined in the "Naming collators" | |
699 | * section of the users guide: | |
700 | * http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme | |
701 | * This function supports preflighting. | |
702 | * | |
703 | * This is internal, and intended to be used with delegate converters. | |
704 | * | |
705 | * @param locale a locale that will appear as a collators locale in the resulting | |
706 | * short string definition. If NULL, the locale will be harvested | |
707 | * from the collator. | |
708 | * @param buffer space to hold the resulting string | |
709 | * @param capacity capacity of the buffer | |
710 | * @param status for returning errors. All the preflighting errors are featured | |
711 | * @return length of the resulting string | |
712 | * @see ucol_openFromShortString | |
713 | * @see ucol_normalizeShortDefinitionString | |
714 | * @see ucol_getShortDefinitionString | |
715 | * @internal | |
716 | */ | |
717 | virtual int32_t internalGetShortDefinitionString(const char *locale, | |
718 | char *buffer, | |
719 | int32_t capacity, | |
720 | UErrorCode &status) const; | |
721 | ||
722 | /** | |
723 | * Implements ucol_nextSortKeyPart(). | |
724 | * @internal | |
725 | */ | |
726 | virtual int32_t internalNextSortKeyPart( | |
727 | UCharIterator *iter, uint32_t state[2], | |
728 | uint8_t *dest, int32_t count, UErrorCode &errorCode) const; | |
729 | ||
730 | // Do not enclose the default constructor with #ifndef U_HIDE_INTERNAL_API | |
731 | /** | |
732 | * Only for use in ucol_openRules(). | |
733 | * @internal | |
734 | */ | |
735 | RuleBasedCollator(); | |
736 | ||
737 | #ifndef U_HIDE_INTERNAL_API | |
738 | /** | |
739 | * Implements ucol_getLocaleByType(). | |
740 | * Needed because the lifetime of the locale ID string must match that of the collator. | |
741 | * getLocale() returns a copy of a Locale, with minimal lifetime in a C wrapper. | |
742 | * @internal | |
743 | */ | |
744 | const char *internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const; | |
745 | ||
746 | /** | |
747 | * Implements ucol_getContractionsAndExpansions(). | |
748 | * Gets this collator's sets of contraction strings and/or | |
749 | * characters and strings that map to multiple collation elements (expansions). | |
750 | * If addPrefixes is TRUE, then contractions that are expressed as | |
751 | * prefix/pre-context rules are included. | |
752 | * @param contractions if not NULL, the set to hold the contractions | |
753 | * @param expansions if not NULL, the set to hold the expansions | |
754 | * @param addPrefixes include prefix contextual mappings | |
755 | * @param errorCode in/out ICU error code | |
756 | * @internal | |
757 | */ | |
758 | void internalGetContractionsAndExpansions( | |
759 | UnicodeSet *contractions, UnicodeSet *expansions, | |
760 | UBool addPrefixes, UErrorCode &errorCode) const; | |
761 | ||
762 | /** | |
763 | * Adds the contractions that start with character c to the set. | |
764 | * Ignores prefixes. Used by AlphabeticIndex. | |
765 | * @internal | |
766 | */ | |
767 | void internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const; | |
768 | ||
769 | /** | |
770 | * Implements from-rule constructors, and ucol_openRules(). | |
771 | * @internal | |
772 | */ | |
773 | void internalBuildTailoring( | |
774 | const UnicodeString &rules, | |
775 | int32_t strength, | |
776 | UColAttributeValue decompositionMode, | |
777 | UParseError *outParseError, UnicodeString *outReason, | |
778 | UErrorCode &errorCode); | |
779 | ||
780 | /** @internal */ | |
781 | static inline RuleBasedCollator *rbcFromUCollator(UCollator *uc) { | |
782 | return dynamic_cast<RuleBasedCollator *>(fromUCollator(uc)); | |
783 | } | |
784 | /** @internal */ | |
785 | static inline const RuleBasedCollator *rbcFromUCollator(const UCollator *uc) { | |
786 | return dynamic_cast<const RuleBasedCollator *>(fromUCollator(uc)); | |
787 | } | |
788 | ||
789 | /** | |
790 | * Appends the CEs for the string to the vector. | |
791 | * @internal for tests & tools | |
792 | */ | |
793 | void internalGetCEs(const UnicodeString &str, UVector64 &ces, UErrorCode &errorCode) const; | |
794 | #endif // U_HIDE_INTERNAL_API | |
795 | ||
796 | protected: | |
797 | /** | |
798 | * Used internally by registration to define the requested and valid locales. | |
799 | * @param requestedLocale the requested locale | |
800 | * @param validLocale the valid locale | |
801 | * @param actualLocale the actual locale | |
802 | * @internal | |
803 | */ | |
804 | virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale); | |
805 | ||
806 | private: | |
807 | friend class CollationElementIterator; | |
808 | friend class Collator; | |
809 | ||
810 | RuleBasedCollator(const CollationCacheEntry *entry); | |
811 | ||
812 | /** | |
813 | * Enumeration of attributes that are relevant for short definition strings | |
814 | * (e.g., ucol_getShortDefinitionString()). | |
815 | * Effectively extends UColAttribute. | |
816 | */ | |
817 | enum Attributes { | |
818 | ATTR_VARIABLE_TOP = UCOL_ATTRIBUTE_COUNT, | |
819 | ATTR_LIMIT | |
820 | }; | |
821 | ||
822 | void adoptTailoring(CollationTailoring *t, UErrorCode &errorCode); | |
823 | ||
824 | // Both lengths must be <0 or else both must be >=0. | |
825 | UCollationResult doCompare(const char16_t *left, int32_t leftLength, | |
826 | const char16_t *right, int32_t rightLength, | |
827 | UErrorCode &errorCode) const; | |
828 | UCollationResult doCompare(const uint8_t *left, int32_t leftLength, | |
829 | const uint8_t *right, int32_t rightLength, | |
830 | UErrorCode &errorCode) const; | |
831 | ||
832 | void writeSortKey(const char16_t *s, int32_t length, | |
833 | SortKeyByteSink &sink, UErrorCode &errorCode) const; | |
834 | ||
835 | void writeIdenticalLevel(const char16_t *s, const char16_t *limit, | |
836 | SortKeyByteSink &sink, UErrorCode &errorCode) const; | |
837 | ||
838 | const CollationSettings &getDefaultSettings() const; | |
839 | ||
840 | void setAttributeDefault(int32_t attribute) { | |
841 | explicitlySetAttributes &= ~((uint32_t)1 << attribute); | |
842 | } | |
843 | void setAttributeExplicitly(int32_t attribute) { | |
844 | explicitlySetAttributes |= (uint32_t)1 << attribute; | |
845 | } | |
846 | UBool attributeHasBeenSetExplicitly(int32_t attribute) const { | |
847 | // assert(0 <= attribute < ATTR_LIMIT); | |
848 | return (UBool)((explicitlySetAttributes & ((uint32_t)1 << attribute)) != 0); | |
849 | } | |
850 | ||
851 | /** | |
852 | * Tests whether a character is "unsafe" for use as a collation starting point. | |
853 | * | |
854 | * @param c code point or code unit | |
855 | * @return TRUE if c is unsafe | |
856 | * @see CollationElementIterator#setOffset(int) | |
857 | */ | |
858 | UBool isUnsafe(UChar32 c) const; | |
859 | ||
860 | static void U_CALLCONV computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode); | |
861 | UBool initMaxExpansions(UErrorCode &errorCode) const; | |
862 | ||
863 | void setFastLatinOptions(CollationSettings &ownedSettings) const; | |
864 | ||
865 | const CollationData *data; | |
866 | const CollationSettings *settings; // reference-counted | |
867 | const CollationTailoring *tailoring; // alias of cacheEntry->tailoring | |
868 | const CollationCacheEntry *cacheEntry; // reference-counted | |
869 | Locale validLocale; | |
870 | uint32_t explicitlySetAttributes; | |
871 | ||
872 | UBool actualLocaleIsSameAsValid; | |
873 | }; | |
874 | ||
875 | U_NAMESPACE_END | |
876 | #endif // U_SHOW_CPLUSPLUS_API | |
877 | ||
878 | #endif // !UCONFIG_NO_COLLATION | |
879 | #endif // TBLCOLL_H |