]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/unicode/tblcoll.h
ICU-6.2.8.tar.gz
[apple/icu.git] / icuSources / i18n / unicode / tblcoll.h
1 /*
2 ******************************************************************************
3 * Copyright (C) 1996-2004, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ******************************************************************************
6 */
7
8 /**
9 * File tblcoll.h
10 *
11 * Created by: Helena Shih
12 *
13 * Modification History:
14 *
15 * Date Name Description
16 * 2/5/97 aliu Added streamIn and streamOut methods. Added
17 * constructor which reads RuleBasedCollator object from
18 * a binary file. Added writeToFile method which streams
19 * RuleBasedCollator out to a binary file. The streamIn
20 * and streamOut methods use istream and ostream objects
21 * in binary mode.
22 * 2/12/97 aliu Modified to use TableCollationData sub-object to
23 * hold invariant data.
24 * 2/13/97 aliu Moved several methods into this class from Collation.
25 * Added a private RuleBasedCollator(Locale&) constructor,
26 * to be used by Collator::createDefault(). General
27 * clean up.
28 * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy
29 * constructor and getDynamicClassID.
30 * 3/5/97 aliu Modified constructFromFile() to add parameter
31 * specifying whether or not binary loading is to be
32 * attempted. This is required for dynamic rule loading.
33 * 05/07/97 helena Added memory allocation error detection.
34 * 6/17/97 helena Added IDENTICAL strength for compare, changed getRules to
35 * use MergeCollation::getPattern.
36 * 6/20/97 helena Java class name change.
37 * 8/18/97 helena Added internal API documentation.
38 * 09/03/97 helena Added createCollationKeyValues().
39 * 02/10/98 damiba Added compare with "length" parameter
40 * 08/05/98 erm Synched with 1.2 version of RuleBasedCollator.java
41 * 04/23/99 stephen Removed EDecompositionMode, merged with
42 * Normalizer::EMode
43 * 06/14/99 stephen Removed kResourceBundleSuffix
44 * 11/02/99 helena Collator performance enhancements. Eliminates the
45 * UnicodeString construction and special case for NO_OP.
46 * 11/23/99 srl More performance enhancements. Updates to NormalizerIterator
47 * internal state management.
48 * 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator
49 * to implementation file.
50 * 01/29/01 synwee Modified into a C++ wrapper which calls C API
51 * (ucol.h)
52 */
53
54 #ifndef TBLCOLL_H
55 #define TBLCOLL_H
56
57 #include "unicode/utypes.h"
58
59 #if !UCONFIG_NO_COLLATION
60
61 #include "unicode/coll.h"
62 #include "unicode/ucol.h"
63 #include "unicode/sortkey.h"
64 #include "unicode/normlzr.h"
65
66 U_NAMESPACE_BEGIN
67
68 /**
69 * @stable ICU 2.0
70 */
71 class StringSearch;
72 /**
73 * @stable ICU 2.0
74 */
75 class CollationElementIterator;
76
77 /**
78 * The RuleBasedCollator class provides the simple implementation of
79 * Collator, using data-driven tables. The user can create a customized
80 * table-based collation.
81 * <P>
82 * <em>Important: </em>The ICU collation service has been reimplemented
83 * in order to achieve better performance and UCA compliance.
84 * For details, see the
85 * <a href="http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/collation/ICU_collation_design.htm">
86 * collation design document</a>.
87 * <p>
88 * RuleBasedCollator is a thin C++ wrapper over the C implementation.
89 * <p>
90 * For more information about the collation service see
91 * <a href="http://oss.software.ibm.com/icu/userguide/Collate_Intro.html">the users guide</a>.
92 * <p>
93 * Collation service provides correct sorting orders for most locales supported in ICU.
94 * If specific data for a locale is not available, the orders eventually falls back
95 * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>.
96 * <p>
97 * Sort ordering may be customized by providing your own set of rules. For more on
98 * this subject see the <a href="http://oss.software.ibm.com/icu/userguide/Collate_Customization.html">
99 * Collation customization</a> section of the users guide.
100 * <p>
101 * Note, RuleBasedCollator is not to be subclassed.
102 * @see Collator
103 * @version 2.0 11/15/2001
104 */
105 class U_I18N_API RuleBasedCollator : public Collator
106 {
107 public:
108
109 // constructor -------------------------------------------------------------
110
111 /**
112 * RuleBasedCollator constructor. This takes the table rules and builds a
113 * collation table out of them. Please see RuleBasedCollator class
114 * description for more details on the collation rule syntax.
115 * @param rules the collation rules to build the collation table from.
116 * @param status reporting a success or an error.
117 * @see Locale
118 * @stable ICU 2.0
119 */
120 RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
121
122 /**
123 * RuleBasedCollator constructor. This takes the table rules and builds a
124 * collation table out of them. Please see RuleBasedCollator class
125 * description for more details on the collation rule syntax.
126 * @param rules the collation rules to build the collation table from.
127 * @param collationStrength default strength for comparison
128 * @param status reporting a success or an error.
129 * @see Locale
130 * @stable ICU 2.0
131 */
132 RuleBasedCollator(const UnicodeString& rules,
133 ECollationStrength collationStrength,
134 UErrorCode& status);
135
136 /**
137 * RuleBasedCollator constructor. This takes the table rules and builds a
138 * collation table out of them. Please see RuleBasedCollator class
139 * description for more details on the collation rule syntax.
140 * @param rules the collation rules to build the collation table from.
141 * @param decompositionMode the normalisation mode
142 * @param status reporting a success or an error.
143 * @see Locale
144 * @stable ICU 2.0
145 */
146 RuleBasedCollator(const UnicodeString& rules,
147 UColAttributeValue decompositionMode,
148 UErrorCode& status);
149
150 /**
151 * RuleBasedCollator constructor. This takes the table rules and builds a
152 * collation table out of them. Please see RuleBasedCollator class
153 * description for more details on the collation rule syntax.
154 * @param rules the collation rules to build the collation table from.
155 * @param collationStrength default strength for comparison
156 * @param decompositionMode the normalisation mode
157 * @param status reporting a success or an error.
158 * @see Locale
159 * @stable ICU 2.0
160 */
161 RuleBasedCollator(const UnicodeString& rules,
162 ECollationStrength collationStrength,
163 UColAttributeValue decompositionMode,
164 UErrorCode& status);
165
166 /**
167 * Copy constructor.
168 * @param other the RuleBasedCollator object to be copied
169 * @see Locale
170 * @stable ICU 2.0
171 */
172 RuleBasedCollator(const RuleBasedCollator& other);
173
174 // destructor --------------------------------------------------------------
175
176 /**
177 * Destructor.
178 * @stable ICU 2.0
179 */
180 virtual ~RuleBasedCollator();
181
182 // public methods ----------------------------------------------------------
183
184 /**
185 * Assignment operator.
186 * @param other other RuleBasedCollator object to compare with.
187 * @stable ICU 2.0
188 */
189 RuleBasedCollator& operator=(const RuleBasedCollator& other);
190
191 /**
192 * Returns true if argument is the same as this object.
193 * @param other Collator object to be compared.
194 * @return true if arguments is the same as this object.
195 * @stable ICU 2.0
196 */
197 virtual UBool operator==(const Collator& other) const;
198
199 /**
200 * Returns true if argument is not the same as this object.
201 * @param other Collator object to be compared
202 * @return returns true if argument is not the same as this object.
203 * @stable ICU 2.0
204 */
205 virtual UBool operator!=(const Collator& other) const;
206
207 /**
208 * Makes a deep copy of the object.
209 * The caller owns the returned object.
210 * @return the cloned object.
211 * @stable ICU 2.0
212 */
213 virtual Collator* clone(void) const;
214
215 /**
216 * Creates a collation element iterator for the source string. The caller of
217 * this method is responsible for the memory management of the return
218 * pointer.
219 * @param source the string over which the CollationElementIterator will
220 * iterate.
221 * @return the collation element iterator of the source string using this as
222 * the based Collator.
223 * @stable ICU 2.2
224 */
225 virtual CollationElementIterator* createCollationElementIterator(
226 const UnicodeString& source) const;
227
228 /**
229 * Creates a collation element iterator for the source. The caller of this
230 * method is responsible for the memory management of the returned pointer.
231 * @param source the CharacterIterator which produces the characters over
232 * which the CollationElementItgerator will iterate.
233 * @return the collation element iterator of the source using this as the
234 * based Collator.
235 * @stable ICU 2.2
236 */
237 virtual CollationElementIterator* createCollationElementIterator(
238 const CharacterIterator& source) const;
239
240 /**
241 * Compares a range of character data stored in two different strings based
242 * on the collation rules. Returns information about whether a string is
243 * less than, greater than or equal to another string in a language.
244 * This can be overriden in a subclass.
245 * @param source the source string.
246 * @param target the target string to be compared with the source string.
247 * @return the comparison result. GREATER if the source string is greater
248 * than the target string, LESS if the source is less than the
249 * target. Otherwise, returns EQUAL.
250 * @deprecated ICU 2.6 Use overload with UErrorCode&
251 */
252 virtual EComparisonResult compare(const UnicodeString& source,
253 const UnicodeString& target) const;
254
255
256 /**
257 * The comparison function compares the character data stored in two
258 * different strings. Returns information about whether a string is less
259 * than, greater than or equal to another string.
260 * @param source the source string to be compared with.
261 * @param target the string that is to be compared with the source string.
262 * @param status possible error code
263 * @return Returns an enum value. UCOL_GREATER if source is greater
264 * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
265 * than target
266 * @stable ICU 2.6
267 **/
268 virtual UCollationResult compare(const UnicodeString& source,
269 const UnicodeString& target,
270 UErrorCode &status) const;
271
272 /**
273 * Compares a range of character data stored in two different strings based
274 * on the collation rules up to the specified length. Returns information
275 * about whether a string is less than, greater than or equal to another
276 * string in a language. This can be overriden in a subclass.
277 * @param source the source string.
278 * @param target the target string to be compared with the source string.
279 * @param length compares up to the specified length
280 * @return the comparison result. GREATER if the source string is greater
281 * than the target string, LESS if the source is less than the
282 * target. Otherwise, returns EQUAL.
283 * @deprecated ICU 2.6 Use overload with UErrorCode&
284 */
285 virtual EComparisonResult compare(const UnicodeString& source,
286 const UnicodeString& target,
287 int32_t length) const;
288
289 /**
290 * Does the same thing as compare but limits the comparison to a specified
291 * length
292 * @param source the source string to be compared with.
293 * @param target the string that is to be compared with the source string.
294 * @param length the length the comparison is limited to
295 * @param status possible error code
296 * @return Returns an enum value. UCOL_GREATER if source (up to the specified
297 * length) is greater than target; UCOL_EQUAL if source (up to specified
298 * length) is equal to target; UCOL_LESS if source (up to the specified
299 * length) is less than target.
300 * @stable ICU 2.6
301 */
302 virtual UCollationResult compare(const UnicodeString& source,
303 const UnicodeString& target,
304 int32_t length,
305 UErrorCode &status) const;
306
307 /**
308 * The comparison function compares the character data stored in two
309 * different string arrays. Returns information about whether a string array
310 * is less than, greater than or equal to another string array.
311 * <p>Example of use:
312 * <pre>
313 * . UChar ABC[] = {0x41, 0x42, 0x43, 0}; // = "ABC"
314 * . UChar abc[] = {0x61, 0x62, 0x63, 0}; // = "abc"
315 * . UErrorCode status = U_ZERO_ERROR;
316 * . Collator *myCollation =
317 * . Collator::createInstance(Locale::US, status);
318 * . if (U_FAILURE(status)) return;
319 * . myCollation->setStrength(Collator::PRIMARY);
320 * . // result would be Collator::EQUAL ("abc" == "ABC")
321 * . // (no primary difference between "abc" and "ABC")
322 * . Collator::EComparisonResult result =
323 * . myCollation->compare(abc, 3, ABC, 3);
324 * . myCollation->setStrength(Collator::TERTIARY);
325 * . // result would be Collator::LESS ("abc" &lt;&lt;&lt; "ABC")
326 * . // (with tertiary difference between "abc" and "ABC")
327 * . result = myCollation->compare(abc, 3, ABC, 3);
328 * </pre>
329 * @param source the source string array to be compared with.
330 * @param sourceLength the length of the source string array. If this value
331 * is equal to -1, the string array is null-terminated.
332 * @param target the string that is to be compared with the source string.
333 * @param targetLength the length of the target string array. If this value
334 * is equal to -1, the string array is null-terminated.
335 * @return Returns a byte value. GREATER if source is greater than target;
336 * EQUAL if source is equal to target; LESS if source is less than
337 * target
338 * @deprecated ICU 2.6 Use overload with UErrorCode&
339 */
340 virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
341 const UChar* target, int32_t targetLength)
342 const;
343
344 /**
345 * The comparison function compares the character data stored in two
346 * different string arrays. Returns information about whether a string array
347 * is less than, greater than or equal to another string array.
348 * @param source the source string array to be compared with.
349 * @param sourceLength the length of the source string array. If this value
350 * is equal to -1, the string array is null-terminated.
351 * @param target the string that is to be compared with the source string.
352 * @param targetLength the length of the target string array. If this value
353 * is equal to -1, the string array is null-terminated.
354 * @param status possible error code
355 * @return Returns an enum value. UCOL_GREATER if source is greater
356 * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
357 * than target
358 * @stable ICU 2.6
359 */
360 virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
361 const UChar* target, int32_t targetLength,
362 UErrorCode &status) const;
363
364 /**
365 * Transforms a specified region of the string into a series of characters
366 * that can be compared with CollationKey.compare. Use a CollationKey when
367 * you need to do repeated comparisions on the same string. For a single
368 * comparison the compare method will be faster.
369 * @param source the source string.
370 * @param key the transformed key of the source string.
371 * @param status the error code status.
372 * @return the transformed key.
373 * @see CollationKey
374 * @deprecated ICU 2.8 Use getSortKey(...) instead
375 */
376 virtual CollationKey& getCollationKey(const UnicodeString& source,
377 CollationKey& key,
378 UErrorCode& status) const;
379
380 /**
381 * Transforms a specified region of the string into a series of characters
382 * that can be compared with CollationKey.compare. Use a CollationKey when
383 * you need to do repeated comparisions on the same string. For a single
384 * comparison the compare method will be faster.
385 * @param source the source string.
386 * @param sourceLength the length of the source string.
387 * @param key the transformed key of the source string.
388 * @param status the error code status.
389 * @return the transformed key.
390 * @see CollationKey
391 * @deprecated ICU 2.8 Use getSortKey(...) instead
392 */
393 virtual CollationKey& getCollationKey(const UChar *source,
394 int32_t sourceLength,
395 CollationKey& key,
396 UErrorCode& status) const;
397
398 /**
399 * Generates the hash code for the rule-based collation object.
400 * @return the hash code.
401 * @stable ICU 2.0
402 */
403 virtual int32_t hashCode(void) const;
404
405 /**
406 * Gets the locale of the Collator
407 * @param type can be either requested, valid or actual locale. For more
408 * information see the definition of ULocDataLocaleType in
409 * uloc.h
410 * @param status the error code status.
411 * @return locale where the collation data lives. If the collator
412 * was instantiated from rules, locale is empty.
413 * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback
414 */
415 virtual const Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
416
417 /**
418 * Gets the table-based rules for the collation object.
419 * @return returns the collation rules that the table collation object was
420 * created from.
421 * @stable ICU 2.0
422 */
423 const UnicodeString& getRules(void) const;
424
425 /**
426 * Gets the version information for a Collator.
427 * @param info the version # information, the result will be filled in
428 * @stable ICU 2.0
429 */
430 virtual void getVersion(UVersionInfo info) const;
431
432 /**
433 * Return the maximum length of any expansion sequences that end with the
434 * specified comparison order.
435 * @param order a collation order returned by previous or next.
436 * @return maximum size of the expansion sequences ending with the collation
437 * element or 1 if collation element does not occur at the end of
438 * any expansion sequence
439 * @see CollationElementIterator#getMaxExpansion
440 * @stable ICU 2.0
441 */
442 int32_t getMaxExpansion(int32_t order) const;
443
444 /**
445 * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
446 * method is to implement a simple version of RTTI, since not all C++
447 * compilers support genuine RTTI. Polymorphic operator==() and clone()
448 * methods call this method.
449 * @return The class ID for this object. All objects of a given class have
450 * the same class ID. Objects of other classes have different class
451 * IDs.
452 * @stable ICU 2.0
453 */
454 virtual UClassID getDynamicClassID(void) const;
455
456 /**
457 * Returns the class ID for this class. This is useful only for comparing to
458 * a return value from getDynamicClassID(). For example:
459 * <pre>
460 * Base* polymorphic_pointer = createPolymorphicObject();
461 * if (polymorphic_pointer->getDynamicClassID() ==
462 * Derived::getStaticClassID()) ...
463 * </pre>
464 * @return The class ID for all objects of this class.
465 * @stable ICU 2.0
466 */
467 static UClassID U_EXPORT2 getStaticClassID(void);
468
469 /**
470 * Returns the binary format of the class's rules. The format is that of
471 * .col files.
472 * @param length Returns the length of the data, in bytes
473 * @param status the error code status.
474 * @return memory, owned by the caller, of size 'length' bytes.
475 * @stable ICU 2.2
476 */
477 uint8_t *cloneRuleData(int32_t &length, UErrorCode &status);
478
479 /**
480 * Returns current rules. Delta defines whether full rules are returned or
481 * just the tailoring.
482 * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
483 * @param buffer UnicodeString to store the result rules
484 * @stable ICU 2.2
485 */
486 void getRules(UColRuleOption delta, UnicodeString &buffer);
487
488 /**
489 * Universal attribute setter
490 * @param attr attribute type
491 * @param value attribute value
492 * @param status to indicate whether the operation went on smoothly or there were errors
493 * @stable ICU 2.2
494 */
495 virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
496 UErrorCode &status);
497
498 /**
499 * Universal attribute getter.
500 * @param attr attribute type
501 * @param status to indicate whether the operation went on smoothly or there were errors
502 * @return attribute value
503 * @stable ICU 2.2
504 */
505 virtual UColAttributeValue getAttribute(UColAttribute attr,
506 UErrorCode &status);
507
508 /**
509 * Sets the variable top to a collation element value of a string supplied.
510 * @param varTop one or more (if contraction) UChars to which the variable top should be set
511 * @param len length of variable top string. If -1 it is considered to be zero terminated.
512 * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
513 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
514 * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
515 * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
516 * @stable ICU 2.0
517 */
518 virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
519
520 /**
521 * Sets the variable top to a collation element value of a string supplied.
522 * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
523 * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
524 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
525 * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
526 * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
527 * @stable ICU 2.0
528 */
529 virtual uint32_t setVariableTop(const UnicodeString varTop, UErrorCode &status);
530
531 /**
532 * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
533 * Lower 16 bits are ignored.
534 * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
535 * @param status error code (not changed by function)
536 * @stable ICU 2.0
537 */
538 virtual void setVariableTop(const uint32_t varTop, UErrorCode &status);
539
540 /**
541 * Gets the variable top value of a Collator.
542 * Lower 16 bits are undefined and should be ignored.
543 * @param status error code (not changed by function). If error code is set, the return value is undefined.
544 * @stable ICU 2.0
545 */
546 virtual uint32_t getVariableTop(UErrorCode &status) const;
547
548 /**
549 * Get an UnicodeSet that contains all the characters and sequences tailored in
550 * this collator.
551 * @param status error code of the operation
552 * @return a pointer to a UnicodeSet object containing all the
553 * code points and sequences that may sort differently than
554 * in the UCA. The object must be disposed of by using delete
555 * @stable ICU 2.4
556 */
557 virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
558
559 /**
560 * Thread safe cloning operation.
561 * @return pointer to the new clone, user should remove it.
562 * @stable ICU 2.2
563 */
564 virtual Collator* safeClone(void);
565
566 /**
567 * Get the sort key as an array of bytes from an UnicodeString.
568 * @param source string to be processed.
569 * @param result buffer to store result in. If NULL, number of bytes needed
570 * will be returned.
571 * @param resultLength length of the result buffer. If if not enough the
572 * buffer will be filled to capacity.
573 * @return Number of bytes needed for storing the sort key
574 * @stable ICU 2.0
575 */
576 virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
577 int32_t resultLength) const;
578
579 /**
580 * Get the sort key as an array of bytes from an UChar buffer.
581 * @param source string to be processed.
582 * @param sourceLength length of string to be processed. If -1, the string
583 * is 0 terminated and length will be decided by the function.
584 * @param result buffer to store result in. If NULL, number of bytes needed
585 * will be returned.
586 * @param resultLength length of the result buffer. If if not enough the
587 * buffer will be filled to capacity.
588 * @return Number of bytes needed for storing the sort key
589 * @stable ICU 2.2
590 */
591 virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
592 uint8_t *result, int32_t resultLength) const;
593
594 /**
595 * Determines the minimum strength that will be use in comparison or
596 * transformation.
597 * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored
598 * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference
599 * are ignored.
600 * @return the current comparison level.
601 * @see RuleBasedCollator#setStrength
602 * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead
603 */
604 virtual ECollationStrength getStrength(void) const;
605
606 /**
607 * Sets the minimum strength to be used in comparison or transformation.
608 * @see RuleBasedCollator#getStrength
609 * @param newStrength the new comparison level.
610 * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead
611 */
612 virtual void setStrength(ECollationStrength newStrength);
613
614 private:
615
616 // private static constants -----------------------------------------------
617
618 static const int32_t UNMAPPED;
619 static const int32_t CHARINDEX; // need look up in .commit()
620 static const int32_t EXPANDCHARINDEX; // Expand index follows
621 static const int32_t CONTRACTCHARINDEX; // contract indexes follow
622
623 static const int32_t PRIMARYORDERINCREMENT;
624 static const int32_t SECONDARYORDERINCREMENT;
625 static const int32_t TERTIARYORDERINCREMENT;
626 static const int32_t PRIMARYORDERMASK;
627 static const int32_t SECONDARYORDERMASK;
628 static const int32_t TERTIARYORDERMASK;
629 static const int32_t IGNORABLEMASK;
630 static const int32_t PRIMARYDIFFERENCEONLY;
631 static const int32_t SECONDARYDIFFERENCEONLY;
632 static const int32_t PRIMARYORDERSHIFT;
633 static const int32_t SECONDARYORDERSHIFT;
634
635 static const int32_t COLELEMENTSTART;
636 static const int32_t PRIMARYLOWZEROMASK;
637 static const int32_t RESETSECONDARYTERTIARY;
638 static const int32_t RESETTERTIARY;
639
640 static const int32_t PRIMIGNORABLE;
641
642 // private data members ---------------------------------------------------
643
644 UBool dataIsOwned;
645
646 UBool isWriteThroughAlias;
647
648 /**
649 * c struct for collation. All initialisation for it has to be done through
650 * setUCollator().
651 */
652 UCollator *ucollator;
653
654 /**
655 * Rule UnicodeString
656 */
657 UnicodeString *urulestring;
658
659 // friend classes --------------------------------------------------------
660
661 /**
662 * Used to iterate over collation elements in a character source.
663 */
664 friend class CollationElementIterator;
665
666 /**
667 * Collator ONLY needs access to RuleBasedCollator(const Locale&,
668 * UErrorCode&)
669 */
670 friend class Collator;
671
672 /**
673 * Searching over collation elements in a character source
674 */
675 friend class StringSearch;
676
677 // private constructors --------------------------------------------------
678
679 /**
680 * Default constructor
681 */
682 RuleBasedCollator();
683
684 /**
685 * Constructor that takes in a UCollator struct
686 * @param collator UCollator struct
687 * @param rule the rule for the collator.
688 */
689 RuleBasedCollator(UCollator *collator, UnicodeString *rule);
690
691 /**
692 * RuleBasedCollator constructor. This constructor takes a locale. The
693 * only caller of this class should be Collator::createInstance(). If
694 * createInstance() happens to know that the requested locale's collation is
695 * implemented as a RuleBasedCollator, it can then call this constructor.
696 * OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID
697 * COLLATION TABLE. It does this by falling back to defaults.
698 * @param desiredLocale locale used
699 * @param status error code status
700 */
701 RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status);
702
703 /**
704 * common constructor implementation
705 *
706 * @param rules the collation rules to build the collation table from.
707 * @param collationStrength default strength for comparison
708 * @param decompositionMode the normalisation mode
709 * @param status reporting a success or an error.
710 */
711 void
712 construct(const UnicodeString& rules,
713 UColAttributeValue collationStrength,
714 UColAttributeValue decompositionMode,
715 UErrorCode& status);
716
717 // private methods -------------------------------------------------------
718
719 /**
720 * Creates the c struct for ucollator
721 * @param locale desired locale
722 * @param status error status
723 */
724 void setUCollator(const Locale& locale, UErrorCode& status);
725
726 /**
727 * Creates the c struct for ucollator
728 * @param locale desired locale name
729 * @param status error status
730 */
731 void setUCollator(const char* locale, UErrorCode& status);
732
733 /**
734 * Creates the c struct for ucollator. This used internally by StringSearch.
735 * Hence the responsibility of cleaning up the ucollator is not done by
736 * this RuleBasedCollator. The isDataOwned flag is set to FALSE.
737 * @param collator new ucollator data
738 * @param rules corresponding collation rules
739 */
740 void setUCollator(UCollator *collator, UnicodeString *rules);
741
742 public:
743 /**
744 * Get UCollator data struct. Used only by StringSearch & intltest.
745 * @return UCollator data struct
746 * @internal
747 */
748 const UCollator * getUCollator();
749
750 protected:
751 /**
752 * Used internally by registraton to define the requested and valid locales.
753 * @param requestedLocale the requsted locale
754 * @param validLocale the valid locale
755 * @internal
756 */
757 virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale);
758
759 private:
760
761 // if not owned and not a write through alias, copy the ucollator
762 void checkOwned(void);
763
764 // utility to init rule string used by checkOwned and construct
765 void setRuleStringFromCollator(UErrorCode& status);
766
767 /**
768 * Converts C's UCollationResult to EComparisonResult
769 * @param result member of the enum UComparisonResult
770 * @return EComparisonResult equivalent of UCollationResult
771 * @deprecated ICU 2.6. We will not need it.
772 */
773 Collator::EComparisonResult getEComparisonResult(
774 const UCollationResult &result) const;
775
776 /**
777 * Converts C's UCollationStrength to ECollationStrength
778 * @param strength member of the enum UCollationStrength
779 * @return ECollationStrength equivalent of UCollationStrength
780 */
781 Collator::ECollationStrength getECollationStrength(
782 const UCollationStrength &strength) const;
783
784 /**
785 * Converts C++'s ECollationStrength to UCollationStrength
786 * @param strength member of the enum ECollationStrength
787 * @return UCollationStrength equivalent of ECollationStrength
788 */
789 UCollationStrength getUCollationStrength(
790 const Collator::ECollationStrength &strength) const;
791 };
792
793 // inline method implementation ---------------------------------------------
794
795 inline void RuleBasedCollator::setUCollator(const Locale &locale,
796 UErrorCode &status)
797 {
798 setUCollator(locale.getName(), status);
799 }
800
801
802 inline void RuleBasedCollator::setUCollator(UCollator *collator,
803 UnicodeString *rules)
804 {
805 if (ucollator && dataIsOwned) {
806 ucol_close(ucollator);
807 delete urulestring;
808 }
809 ucollator = collator;
810 urulestring = rules;
811 dataIsOwned = FALSE;
812 isWriteThroughAlias = TRUE;
813 }
814
815 inline const UCollator * RuleBasedCollator::getUCollator()
816 {
817 return ucollator;
818 }
819
820 inline Collator::EComparisonResult RuleBasedCollator::getEComparisonResult(
821 const UCollationResult &result) const
822 {
823 switch (result)
824 {
825 case UCOL_LESS :
826 return Collator::LESS;
827 case UCOL_EQUAL :
828 return Collator::EQUAL;
829 default :
830 return Collator::GREATER;
831 }
832 }
833
834 inline Collator::ECollationStrength RuleBasedCollator::getECollationStrength(
835 const UCollationStrength &strength) const
836 {
837 switch (strength)
838 {
839 case UCOL_PRIMARY :
840 return Collator::PRIMARY;
841 case UCOL_SECONDARY :
842 return Collator::SECONDARY;
843 case UCOL_TERTIARY :
844 return Collator::TERTIARY;
845 case UCOL_QUATERNARY :
846 return Collator::QUATERNARY;
847 default :
848 return Collator::IDENTICAL;
849 }
850 }
851
852 inline UCollationStrength RuleBasedCollator::getUCollationStrength(
853 const Collator::ECollationStrength &strength) const
854 {
855 switch (strength)
856 {
857 case Collator::PRIMARY :
858 return UCOL_PRIMARY;
859 case Collator::SECONDARY :
860 return UCOL_SECONDARY;
861 case Collator::TERTIARY :
862 return UCOL_TERTIARY;
863 case Collator::QUATERNARY :
864 return UCOL_QUATERNARY;
865 default :
866 return UCOL_IDENTICAL;
867 }
868 }
869
870 U_NAMESPACE_END
871
872 #endif /* #if !UCONFIG_NO_COLLATION */
873
874 #endif