1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
5 * Copyright (C) 1996-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ******************************************************************************
12 * \brief C++ API: The RuleBasedCollator class implements the Collator abstract base class.
18 * Created by: Helena Shih
20 * Modification History:
22 * Date Name Description
23 * 2/5/97 aliu Added streamIn and streamOut methods. Added
24 * constructor which reads RuleBasedCollator object from
25 * a binary file. Added writeToFile method which streams
26 * RuleBasedCollator out to a binary file. The streamIn
27 * and streamOut methods use istream and ostream objects
29 * 2/12/97 aliu Modified to use TableCollationData sub-object to
30 * hold invariant data.
31 * 2/13/97 aliu Moved several methods into this class from Collation.
32 * Added a private RuleBasedCollator(Locale&) constructor,
33 * to be used by Collator::createDefault(). General
35 * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy
36 * constructor and getDynamicClassID.
37 * 3/5/97 aliu Modified constructFromFile() to add parameter
38 * specifying whether or not binary loading is to be
39 * attempted. This is required for dynamic rule loading.
40 * 05/07/97 helena Added memory allocation error detection.
41 * 6/17/97 helena Added IDENTICAL strength for compare, changed getRules to
42 * use MergeCollation::getPattern.
43 * 6/20/97 helena Java class name change.
44 * 8/18/97 helena Added internal API documentation.
45 * 09/03/97 helena Added createCollationKeyValues().
46 * 02/10/98 damiba Added compare with "length" parameter
47 * 08/05/98 erm Synched with 1.2 version of RuleBasedCollator.java
48 * 04/23/99 stephen Removed EDecompositionMode, merged with
50 * 06/14/99 stephen Removed kResourceBundleSuffix
51 * 11/02/99 helena Collator performance enhancements. Eliminates the
52 * UnicodeString construction and special case for NO_OP.
53 * 11/23/99 srl More performance enhancements. Updates to NormalizerIterator
54 * internal state management.
55 * 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator
56 * to implementation file.
57 * 01/29/01 synwee Modified into a C++ wrapper which calls C API
59 * 2012-2014 markus Rewritten in C++ again.
65 #include "unicode/utypes.h"
67 #if U_SHOW_CPLUSPLUS_API
69 #if !UCONFIG_NO_COLLATION
71 #include "unicode/coll.h"
72 #include "unicode/locid.h"
73 #include "unicode/uiter.h"
74 #include "unicode/ucol.h"
78 struct CollationCacheEntry
;
80 struct CollationSettings
;
81 struct CollationTailoring
;
89 class CollationElementIterator
;
91 class SortKeyByteSink
;
97 * The RuleBasedCollator class provides the implementation of
98 * Collator, using data-driven tables. The user can create a customized
99 * table-based collation.
101 * For more information about the collation service see
102 * <a href="http://userguide.icu-project.org/collation">the User Guide</a>.
104 * Collation service provides correct sorting orders for most locales supported in ICU.
105 * If specific data for a locale is not available, the orders eventually falls back
106 * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>.
108 * Sort ordering may be customized by providing your own set of rules. For more on
109 * this subject see the <a href="http://userguide.icu-project.org/collation/customization">
110 * Collation Customization</a> section of the User Guide.
112 * Note, RuleBasedCollator is not to be subclassed.
115 class U_I18N_API RuleBasedCollator
: public Collator
{
118 * RuleBasedCollator constructor. This takes the table rules and builds a
119 * collation table out of them. Please see RuleBasedCollator class
120 * description for more details on the collation rule syntax.
121 * @param rules the collation rules to build the collation table from.
122 * @param status reporting a success or an error.
125 RuleBasedCollator(const UnicodeString
& rules
, UErrorCode
& status
);
128 * RuleBasedCollator constructor. This takes the table rules and builds a
129 * collation table out of them. Please see RuleBasedCollator class
130 * description for more details on the collation rule syntax.
131 * @param rules the collation rules to build the collation table from.
132 * @param collationStrength strength for comparison
133 * @param status reporting a success or an error.
136 RuleBasedCollator(const UnicodeString
& rules
,
137 ECollationStrength collationStrength
,
141 * RuleBasedCollator constructor. This takes the table rules and builds a
142 * collation table out of them. Please see RuleBasedCollator class
143 * description for more details on the collation rule syntax.
144 * @param rules the collation rules to build the collation table from.
145 * @param decompositionMode the normalisation mode
146 * @param status reporting a success or an error.
149 RuleBasedCollator(const UnicodeString
& rules
,
150 UColAttributeValue decompositionMode
,
154 * RuleBasedCollator constructor. This takes the table rules and builds a
155 * collation table out of them. Please see RuleBasedCollator class
156 * description for more details on the collation rule syntax.
157 * @param rules the collation rules to build the collation table from.
158 * @param collationStrength strength for comparison
159 * @param decompositionMode the normalisation mode
160 * @param status reporting a success or an error.
163 RuleBasedCollator(const UnicodeString
& rules
,
164 ECollationStrength collationStrength
,
165 UColAttributeValue decompositionMode
,
168 #ifndef U_HIDE_INTERNAL_API
170 * TODO: document & propose as public API
173 RuleBasedCollator(const UnicodeString
&rules
,
174 UParseError
&parseError
, UnicodeString
&reason
,
175 UErrorCode
&errorCode
);
176 #endif /* U_HIDE_INTERNAL_API */
180 * @param other the RuleBasedCollator object to be copied
183 RuleBasedCollator(const RuleBasedCollator
& other
);
186 /** Opens a collator from a collator binary image created using
187 * cloneBinary. Binary image used in instantiation of the
188 * collator remains owned by the user and should stay around for
189 * the lifetime of the collator. The API also takes a base collator
190 * which must be the root collator.
191 * @param bin binary image owned by the user and required through the
192 * lifetime of the collator
193 * @param length size of the image. If negative, the API will try to
194 * figure out the length of the image
195 * @param base Base collator, for lookup of untailored characters.
196 * Must be the root collator, must not be NULL.
197 * The base is required to be present through the lifetime of the collator.
198 * @param status for catching errors
199 * @return newly created collator
203 RuleBasedCollator(const uint8_t *bin
, int32_t length
,
204 const RuleBasedCollator
*base
,
211 virtual ~RuleBasedCollator();
214 * Assignment operator.
215 * @param other other RuleBasedCollator object to copy from.
218 RuleBasedCollator
& operator=(const RuleBasedCollator
& other
);
221 * Returns true if argument is the same as this object.
222 * @param other Collator object to be compared.
223 * @return true if arguments is the same as this object.
226 virtual UBool
operator==(const Collator
& other
) const;
229 * Makes a copy of this object.
230 * @return a copy of this object, owned by the caller
233 virtual RuleBasedCollator
* clone() const;
236 * Creates a collation element iterator for the source string. The caller of
237 * this method is responsible for the memory management of the return
239 * @param source the string over which the CollationElementIterator will
241 * @return the collation element iterator of the source string using this as
242 * the based Collator.
245 virtual CollationElementIterator
* createCollationElementIterator(
246 const UnicodeString
& source
) const;
249 * Creates a collation element iterator for the source. The caller of this
250 * method is responsible for the memory management of the returned pointer.
251 * @param source the CharacterIterator which produces the characters over
252 * which the CollationElementItgerator will iterate.
253 * @return the collation element iterator of the source using this as the
257 virtual CollationElementIterator
* createCollationElementIterator(
258 const CharacterIterator
& source
) const;
260 // Make deprecated versions of Collator::compare() visible.
261 using Collator::compare
;
264 * The comparison function compares the character data stored in two
265 * different strings. Returns information about whether a string is less
266 * than, greater than or equal to another string.
267 * @param source the source string to be compared with.
268 * @param target the string that is to be compared with the source string.
269 * @param status possible error code
270 * @return Returns an enum value. UCOL_GREATER if source is greater
271 * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
275 virtual UCollationResult
compare(const UnicodeString
& source
,
276 const UnicodeString
& target
,
277 UErrorCode
&status
) const;
280 * Does the same thing as compare but limits the comparison to a specified
282 * @param source the source string to be compared with.
283 * @param target the string that is to be compared with the source string.
284 * @param length the length the comparison is limited to
285 * @param status possible error code
286 * @return Returns an enum value. UCOL_GREATER if source (up to the specified
287 * length) is greater than target; UCOL_EQUAL if source (up to specified
288 * length) is equal to target; UCOL_LESS if source (up to the specified
289 * length) is less than target.
292 virtual UCollationResult
compare(const UnicodeString
& source
,
293 const UnicodeString
& target
,
295 UErrorCode
&status
) const;
298 * The comparison function compares the character data stored in two
299 * different string arrays. Returns information about whether a string array
300 * is less than, greater than or equal to another string array.
301 * @param source the source string array to be compared with.
302 * @param sourceLength the length of the source string array. If this value
303 * is equal to -1, the string array is null-terminated.
304 * @param target the string that is to be compared with the source string.
305 * @param targetLength the length of the target string array. If this value
306 * is equal to -1, the string array is null-terminated.
307 * @param status possible error code
308 * @return Returns an enum value. UCOL_GREATER if source is greater
309 * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
313 virtual UCollationResult
compare(const char16_t* source
, int32_t sourceLength
,
314 const char16_t* target
, int32_t targetLength
,
315 UErrorCode
&status
) const;
318 * Compares two strings using the Collator.
319 * Returns whether the first one compares less than/equal to/greater than
321 * This version takes UCharIterator input.
322 * @param sIter the first ("source") string iterator
323 * @param tIter the second ("target") string iterator
324 * @param status ICU status
325 * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
328 virtual UCollationResult
compare(UCharIterator
&sIter
,
329 UCharIterator
&tIter
,
330 UErrorCode
&status
) const;
333 * Compares two UTF-8 strings using the Collator.
334 * Returns whether the first one compares less than/equal to/greater than
336 * This version takes UTF-8 input.
337 * Note that a StringPiece can be implicitly constructed
338 * from a std::string or a NUL-terminated const char * string.
339 * @param source the first UTF-8 string
340 * @param target the second UTF-8 string
341 * @param status ICU status
342 * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
345 virtual UCollationResult
compareUTF8(const StringPiece
&source
,
346 const StringPiece
&target
,
347 UErrorCode
&status
) const;
350 * Transforms the string into a series of characters
351 * that can be compared with CollationKey.compare().
353 * Note that sort keys are often less efficient than simply doing comparison.
354 * For more details, see the ICU User Guide.
356 * @param source the source string.
357 * @param key the transformed key of the source string.
358 * @param status the error code status.
359 * @return the transformed key.
363 virtual CollationKey
& getCollationKey(const UnicodeString
& source
,
365 UErrorCode
& status
) const;
368 * Transforms a specified region of the string into a series of characters
369 * that can be compared with CollationKey.compare.
371 * Note that sort keys are often less efficient than simply doing comparison.
372 * For more details, see the ICU User Guide.
374 * @param source the source string.
375 * @param sourceLength the length of the source string.
376 * @param key the transformed key of the source string.
377 * @param status the error code status.
378 * @return the transformed key.
382 virtual CollationKey
& getCollationKey(const char16_t *source
,
383 int32_t sourceLength
,
385 UErrorCode
& status
) const;
388 * Generates the hash code for the rule-based collation object.
389 * @return the hash code.
392 virtual int32_t hashCode() const;
394 #ifndef U_FORCE_HIDE_DEPRECATED_API
396 * Gets the locale of the Collator
397 * @param type can be either requested, valid or actual locale. For more
398 * information see the definition of ULocDataLocaleType in
400 * @param status the error code status.
401 * @return locale where the collation data lives. If the collator
402 * was instantiated from rules, locale is empty.
403 * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback
405 virtual Locale
getLocale(ULocDataLocaleType type
, UErrorCode
& status
) const;
406 #endif // U_FORCE_HIDE_DEPRECATED_API
409 * Gets the tailoring rules for this collator.
410 * @return the collation tailoring from which this collator was created
413 const UnicodeString
& getRules() const;
416 * Gets the version information for a Collator.
417 * @param info the version # information, the result will be filled in
420 virtual void getVersion(UVersionInfo info
) const;
422 #ifndef U_HIDE_DEPRECATED_API
424 * Returns the maximum length of any expansion sequences that end with the
425 * specified comparison order.
427 * This is specific to the kind of collation element values and sequences
428 * returned by the CollationElementIterator.
429 * Call CollationElementIterator::getMaxExpansion() instead.
431 * @param order a collation order returned by CollationElementIterator::previous
432 * or CollationElementIterator::next.
433 * @return maximum size of the expansion sequences ending with the collation
434 * element, or 1 if the collation element does not occur at the end of
435 * any expansion sequence
436 * @see CollationElementIterator#getMaxExpansion
437 * @deprecated ICU 51 Use CollationElementIterator::getMaxExpansion() instead.
439 int32_t getMaxExpansion(int32_t order
) const;
440 #endif /* U_HIDE_DEPRECATED_API */
443 * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
444 * method is to implement a simple version of RTTI, since not all C++
445 * compilers support genuine RTTI. Polymorphic operator==() and clone()
446 * methods call this method.
447 * @return The class ID for this object. All objects of a given class have
448 * the same class ID. Objects of other classes have different class
452 virtual UClassID
getDynamicClassID(void) const;
455 * Returns the class ID for this class. This is useful only for comparing to
456 * a return value from getDynamicClassID(). For example:
458 * Base* polymorphic_pointer = createPolymorphicObject();
459 * if (polymorphic_pointer->getDynamicClassID() ==
460 * Derived::getStaticClassID()) ...
462 * @return The class ID for all objects of this class.
465 static UClassID U_EXPORT2
getStaticClassID(void);
467 #ifndef U_HIDE_DEPRECATED_API
469 * Do not use this method: The caller and the ICU library might use different heaps.
470 * Use cloneBinary() instead which writes to caller-provided memory.
472 * Returns a binary format of this collator.
473 * @param length Returns the length of the data, in bytes
474 * @param status the error code status.
475 * @return memory, owned by the caller, of size 'length' bytes.
476 * @deprecated ICU 52. Use cloneBinary() instead.
478 uint8_t *cloneRuleData(int32_t &length
, UErrorCode
&status
) const;
479 #endif /* U_HIDE_DEPRECATED_API */
481 /** Creates a binary image of a collator. This binary image can be stored and
482 * later used to instantiate a collator using ucol_openBinary.
483 * This API supports preflighting.
484 * @param buffer a fill-in buffer to receive the binary image
485 * @param capacity capacity of the destination buffer
486 * @param status for catching errors
487 * @return size of the image
488 * @see ucol_openBinary
491 int32_t cloneBinary(uint8_t *buffer
, int32_t capacity
, UErrorCode
&status
) const;
494 * Returns current rules. Delta defines whether full rules are returned or
495 * just the tailoring.
497 * getRules(void) should normally be used instead.
498 * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
499 * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
500 * @param buffer UnicodeString to store the result rules
502 * @see UCOL_FULL_RULES
504 void getRules(UColRuleOption delta
, UnicodeString
&buffer
) const;
507 * Universal attribute setter
508 * @param attr attribute type
509 * @param value attribute value
510 * @param status to indicate whether the operation went on smoothly or there were errors
513 virtual void setAttribute(UColAttribute attr
, UColAttributeValue value
,
517 * Universal attribute getter.
518 * @param attr attribute type
519 * @param status to indicate whether the operation went on smoothly or there were errors
520 * @return attribute value
523 virtual UColAttributeValue
getAttribute(UColAttribute attr
,
524 UErrorCode
&status
) const;
527 * Sets the variable top to the top of the specified reordering group.
528 * The variable top determines the highest-sorting character
529 * which is affected by UCOL_ALTERNATE_HANDLING.
530 * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect.
531 * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION,
532 * UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY;
533 * or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group
534 * @param errorCode Standard ICU error code. Its input value must
535 * pass the U_SUCCESS() test, or else the function returns
536 * immediately. Check for U_FAILURE() on output or use with
537 * function chaining. (See User Guide for details.)
539 * @see getMaxVariable
542 virtual Collator
&setMaxVariable(UColReorderCode group
, UErrorCode
&errorCode
);
545 * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
546 * @return the maximum variable reordering group.
547 * @see setMaxVariable
550 virtual UColReorderCode
getMaxVariable() const;
552 #ifndef U_FORCE_HIDE_DEPRECATED_API
554 * Sets the variable top to the primary weight of the specified string.
556 * Beginning with ICU 53, the variable top is pinned to
557 * the top of one of the supported reordering groups,
558 * and it must not be beyond the last of those groups.
559 * See setMaxVariable().
560 * @param varTop one or more (if contraction) char16_ts to which the variable top should be set
561 * @param len length of variable top string. If -1 it is considered to be zero terminated.
562 * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
563 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br>
564 * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond
565 * the last reordering group supported by setMaxVariable()
566 * @return variable top primary weight
567 * @deprecated ICU 53 Call setMaxVariable() instead.
569 virtual uint32_t setVariableTop(const char16_t *varTop
, int32_t len
, UErrorCode
&status
);
572 * Sets the variable top to the primary weight of the specified string.
574 * Beginning with ICU 53, the variable top is pinned to
575 * the top of one of the supported reordering groups,
576 * and it must not be beyond the last of those groups.
577 * See setMaxVariable().
578 * @param varTop a UnicodeString size 1 or more (if contraction) of char16_ts to which the variable top should be set
579 * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
580 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br>
581 * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond
582 * the last reordering group supported by setMaxVariable()
583 * @return variable top primary weight
584 * @deprecated ICU 53 Call setMaxVariable() instead.
586 virtual uint32_t setVariableTop(const UnicodeString
&varTop
, UErrorCode
&status
);
589 * Sets the variable top to the specified primary weight.
591 * Beginning with ICU 53, the variable top is pinned to
592 * the top of one of the supported reordering groups,
593 * and it must not be beyond the last of those groups.
594 * See setMaxVariable().
595 * @param varTop primary weight, as returned by setVariableTop or ucol_getVariableTop
596 * @param status error code
597 * @deprecated ICU 53 Call setMaxVariable() instead.
599 virtual void setVariableTop(uint32_t varTop
, UErrorCode
&status
);
600 #endif // U_FORCE_HIDE_DEPRECATED_API
603 * Gets the variable top value of a Collator.
604 * @param status error code (not changed by function). If error code is set, the return value is undefined.
605 * @return the variable top primary weight
606 * @see getMaxVariable
609 virtual uint32_t getVariableTop(UErrorCode
&status
) const;
612 * Get a UnicodeSet that contains all the characters and sequences tailored in
614 * @param status error code of the operation
615 * @return a pointer to a UnicodeSet object containing all the
616 * code points and sequences that may sort differently than
617 * in the root collator. The object must be disposed of by using delete
620 virtual UnicodeSet
*getTailoredSet(UErrorCode
&status
) const;
623 * Get the sort key as an array of bytes from a UnicodeString.
625 * Note that sort keys are often less efficient than simply doing comparison.
626 * For more details, see the ICU User Guide.
628 * @param source string to be processed.
629 * @param result buffer to store result in. If NULL, number of bytes needed
631 * @param resultLength length of the result buffer. If if not enough the
632 * buffer will be filled to capacity.
633 * @return Number of bytes needed for storing the sort key
636 virtual int32_t getSortKey(const UnicodeString
& source
, uint8_t *result
,
637 int32_t resultLength
) const;
640 * Get the sort key as an array of bytes from a char16_t buffer.
642 * Note that sort keys are often less efficient than simply doing comparison.
643 * For more details, see the ICU User Guide.
645 * @param source string to be processed.
646 * @param sourceLength length of string to be processed. If -1, the string
647 * is 0 terminated and length will be decided by the function.
648 * @param result buffer to store result in. If NULL, number of bytes needed
650 * @param resultLength length of the result buffer. If if not enough the
651 * buffer will be filled to capacity.
652 * @return Number of bytes needed for storing the sort key
655 virtual int32_t getSortKey(const char16_t *source
, int32_t sourceLength
,
656 uint8_t *result
, int32_t resultLength
) const;
659 * Retrieves the reordering codes for this collator.
660 * @param dest The array to fill with the script ordering.
661 * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
662 * will only return the length of the result without writing any codes (pre-flighting).
663 * @param status A reference to an error code value, which must not indicate
664 * a failure before the function call.
665 * @return The length of the script ordering array.
666 * @see ucol_setReorderCodes
667 * @see Collator#getEquivalentReorderCodes
668 * @see Collator#setReorderCodes
671 virtual int32_t getReorderCodes(int32_t *dest
,
672 int32_t destCapacity
,
673 UErrorCode
& status
) const;
676 * Sets the ordering of scripts for this collator.
677 * @param reorderCodes An array of script codes in the new order. This can be NULL if the
678 * length is also set to 0. An empty array will clear any reordering codes on the collator.
679 * @param reorderCodesLength The length of reorderCodes.
680 * @param status error code
681 * @see ucol_setReorderCodes
682 * @see Collator#getReorderCodes
683 * @see Collator#getEquivalentReorderCodes
686 virtual void setReorderCodes(const int32_t* reorderCodes
,
687 int32_t reorderCodesLength
,
688 UErrorCode
& status
) ;
691 * Implements ucol_strcollUTF8().
694 virtual UCollationResult
internalCompareUTF8(
695 const char *left
, int32_t leftLength
,
696 const char *right
, int32_t rightLength
,
697 UErrorCode
&errorCode
) const;
699 /** Get the short definition string for a collator. This internal API harvests the collator's
700 * locale and the attribute set and produces a string that can be used for opening
701 * a collator with the same attributes using the ucol_openFromShortString API.
702 * This string will be normalized.
703 * The structure and the syntax of the string is defined in the "Naming collators"
704 * section of the users guide:
705 * http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme
706 * This function supports preflighting.
708 * This is internal, and intended to be used with delegate converters.
710 * @param locale a locale that will appear as a collators locale in the resulting
711 * short string definition. If NULL, the locale will be harvested
713 * @param buffer space to hold the resulting string
714 * @param capacity capacity of the buffer
715 * @param status for returning errors. All the preflighting errors are featured
716 * @return length of the resulting string
717 * @see ucol_openFromShortString
718 * @see ucol_normalizeShortDefinitionString
719 * @see ucol_getShortDefinitionString
722 virtual int32_t internalGetShortDefinitionString(const char *locale
,
725 UErrorCode
&status
) const;
728 * Implements ucol_nextSortKeyPart().
731 virtual int32_t internalNextSortKeyPart(
732 UCharIterator
*iter
, uint32_t state
[2],
733 uint8_t *dest
, int32_t count
, UErrorCode
&errorCode
) const;
735 // Do not enclose the default constructor with #ifndef U_HIDE_INTERNAL_API
737 * Only for use in ucol_openRules().
742 #ifndef U_HIDE_INTERNAL_API
744 * Implements ucol_getLocaleByType().
745 * Needed because the lifetime of the locale ID string must match that of the collator.
746 * getLocale() returns a copy of a Locale, with minimal lifetime in a C wrapper.
749 const char *internalGetLocaleID(ULocDataLocaleType type
, UErrorCode
&errorCode
) const;
752 * Implements ucol_getContractionsAndExpansions().
753 * Gets this collator's sets of contraction strings and/or
754 * characters and strings that map to multiple collation elements (expansions).
755 * If addPrefixes is TRUE, then contractions that are expressed as
756 * prefix/pre-context rules are included.
757 * @param contractions if not NULL, the set to hold the contractions
758 * @param expansions if not NULL, the set to hold the expansions
759 * @param addPrefixes include prefix contextual mappings
760 * @param errorCode in/out ICU error code
763 void internalGetContractionsAndExpansions(
764 UnicodeSet
*contractions
, UnicodeSet
*expansions
,
765 UBool addPrefixes
, UErrorCode
&errorCode
) const;
768 * Adds the contractions that start with character c to the set.
769 * Ignores prefixes. Used by AlphabeticIndex.
772 void internalAddContractions(UChar32 c
, UnicodeSet
&set
, UErrorCode
&errorCode
) const;
775 * Implements from-rule constructors, and ucol_openRules().
778 void internalBuildTailoring(
779 const UnicodeString
&rules
,
781 UColAttributeValue decompositionMode
,
782 UParseError
*outParseError
, UnicodeString
*outReason
,
783 UErrorCode
&errorCode
);
786 static inline RuleBasedCollator
*rbcFromUCollator(UCollator
*uc
) {
787 return dynamic_cast<RuleBasedCollator
*>(fromUCollator(uc
));
790 static inline const RuleBasedCollator
*rbcFromUCollator(const UCollator
*uc
) {
791 return dynamic_cast<const RuleBasedCollator
*>(fromUCollator(uc
));
795 * Appends the CEs for the string to the vector.
796 * @internal for tests & tools
798 void internalGetCEs(const UnicodeString
&str
, UVector64
&ces
, UErrorCode
&errorCode
) const;
799 #endif // U_HIDE_INTERNAL_API
803 * Used internally by registration to define the requested and valid locales.
804 * @param requestedLocale the requested locale
805 * @param validLocale the valid locale
806 * @param actualLocale the actual locale
809 virtual void setLocales(const Locale
& requestedLocale
, const Locale
& validLocale
, const Locale
& actualLocale
);
812 friend class CollationElementIterator
;
813 friend class Collator
;
815 RuleBasedCollator(const CollationCacheEntry
*entry
);
818 * Enumeration of attributes that are relevant for short definition strings
819 * (e.g., ucol_getShortDefinitionString()).
820 * Effectively extends UColAttribute.
823 ATTR_VARIABLE_TOP
= UCOL_ATTRIBUTE_COUNT
,
827 void adoptTailoring(CollationTailoring
*t
, UErrorCode
&errorCode
);
829 // Both lengths must be <0 or else both must be >=0.
830 UCollationResult
doCompare(const char16_t *left
, int32_t leftLength
,
831 const char16_t *right
, int32_t rightLength
,
832 UErrorCode
&errorCode
) const;
833 UCollationResult
doCompare(const uint8_t *left
, int32_t leftLength
,
834 const uint8_t *right
, int32_t rightLength
,
835 UErrorCode
&errorCode
) const;
837 void writeSortKey(const char16_t *s
, int32_t length
,
838 SortKeyByteSink
&sink
, UErrorCode
&errorCode
) const;
840 void writeIdenticalLevel(const char16_t *s
, const char16_t *limit
,
841 SortKeyByteSink
&sink
, UErrorCode
&errorCode
) const;
843 const CollationSettings
&getDefaultSettings() const;
845 void setAttributeDefault(int32_t attribute
) {
846 explicitlySetAttributes
&= ~((uint32_t)1 << attribute
);
848 void setAttributeExplicitly(int32_t attribute
) {
849 explicitlySetAttributes
|= (uint32_t)1 << attribute
;
851 UBool
attributeHasBeenSetExplicitly(int32_t attribute
) const {
852 // assert(0 <= attribute < ATTR_LIMIT);
853 return (UBool
)((explicitlySetAttributes
& ((uint32_t)1 << attribute
)) != 0);
857 * Tests whether a character is "unsafe" for use as a collation starting point.
859 * @param c code point or code unit
860 * @return TRUE if c is unsafe
861 * @see CollationElementIterator#setOffset(int)
863 UBool
isUnsafe(UChar32 c
) const;
865 static void U_CALLCONV
computeMaxExpansions(const CollationTailoring
*t
, UErrorCode
&errorCode
);
866 UBool
initMaxExpansions(UErrorCode
&errorCode
) const;
868 void setFastLatinOptions(CollationSettings
&ownedSettings
) const;
870 const CollationData
*data
;
871 const CollationSettings
*settings
; // reference-counted
872 const CollationTailoring
*tailoring
; // alias of cacheEntry->tailoring
873 const CollationCacheEntry
*cacheEntry
; // reference-counted
875 uint32_t explicitlySetAttributes
;
877 UBool actualLocaleIsSameAsValid
;
882 #endif // !UCONFIG_NO_COLLATION
884 #endif /* U_SHOW_CPLUSPLUS_API */