]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ****************************************************************************** | |
374ca955 | 3 | * Copyright (C) 1996-2004, International Business Machines * |
b75a7d8f A |
4 | * Corporation and others. All Rights Reserved. * |
5 | ****************************************************************************** | |
6 | */ | |
7 | ||
8 | /** | |
9 | * File coll.h | |
374ca955 | 10 | * |
b75a7d8f A |
11 | * Created by: Helena Shih |
12 | * | |
13 | * Modification History: | |
14 | * | |
15 | * Date Name Description | |
16 | * 02/5/97 aliu Modified createDefault to load collation data from | |
17 | * binary files when possible. Added related methods | |
18 | * createCollationFromFile, chopLocale, createPathName. | |
19 | * 02/11/97 aliu Added members addToCache, findInCache, and fgCache. | |
20 | * 02/12/97 aliu Modified to create objects from RuleBasedCollator cache. | |
21 | * Moved cache out of Collation class. | |
22 | * 02/13/97 aliu Moved several methods out of this class and into | |
23 | * RuleBasedCollator, with modifications. Modified | |
24 | * createDefault() to call new RuleBasedCollator(Locale&) | |
25 | * constructor. General clean up and documentation. | |
26 | * 02/20/97 helena Added clone, operator==, operator!=, operator=, copy | |
27 | * constructor and getDynamicClassID. | |
28 | * 03/25/97 helena Updated with platform independent data types. | |
29 | * 05/06/97 helena Added memory allocation error detection. | |
30 | * 06/20/97 helena Java class name change. | |
31 | * 09/03/97 helena Added createCollationKeyValues(). | |
32 | * 02/10/98 damiba Added compare() with length as parameter. | |
33 | * 04/23/99 stephen Removed EDecompositionMode, merged with | |
34 | * Normalizer::EMode. | |
374ca955 | 35 | * 11/02/99 helena Collator performance enhancements. Eliminates the |
b75a7d8f A |
36 | * UnicodeString construction and special case for NO_OP. |
37 | * 11/23/99 srl More performance enhancements. Inlining of | |
38 | * critical accessors. | |
374ca955 A |
39 | * 05/15/00 helena Added version information API. |
40 | * 01/29/01 synwee Modified into a C++ wrapper which calls C apis | |
41 | * (ucoll.h). | |
b75a7d8f A |
42 | */ |
43 | ||
44 | #ifndef COLL_H | |
45 | #define COLL_H | |
46 | ||
47 | #include "unicode/utypes.h" | |
48 | ||
49 | #if !UCONFIG_NO_COLLATION | |
50 | ||
51 | #include "unicode/uobject.h" | |
52 | #include "unicode/ucol.h" | |
53 | #include "unicode/normlzr.h" | |
54 | #include "unicode/locid.h" | |
55 | #include "unicode/uniset.h" | |
56 | ||
57 | U_NAMESPACE_BEGIN | |
58 | ||
59 | class StringEnumeration; | |
60 | ||
374ca955 | 61 | #if !UCONFIG_NO_SERVICE |
b75a7d8f | 62 | /** |
374ca955 | 63 | * @stable ICU 2.6 |
b75a7d8f A |
64 | */ |
65 | typedef const void* URegistryKey; | |
66 | ||
67 | /** | |
374ca955 | 68 | * @stable ICU 2.6 |
b75a7d8f A |
69 | */ |
70 | class CollatorFactory; | |
374ca955 | 71 | #endif |
b75a7d8f A |
72 | |
73 | /** | |
74 | * @stable ICU 2.0 | |
75 | */ | |
76 | class CollationKey; | |
77 | ||
78 | /** | |
374ca955 | 79 | * The <code>Collator</code> class performs locale-sensitive string |
b75a7d8f | 80 | * comparison.<br> |
374ca955 | 81 | * You use this class to build searching and sorting routines for natural |
b75a7d8f | 82 | * language text.<br> |
374ca955 A |
83 | * <em>Important: </em>The ICU collation service has been reimplemented |
84 | * in order to achieve better performance and UCA compliance. | |
85 | * For details, see the | |
b75a7d8f A |
86 | * <a href="http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/collation/ICU_collation_design.htm"> |
87 | * collation design document</a>. | |
88 | * <p> | |
374ca955 A |
89 | * <code>Collator</code> is an abstract base class. Subclasses implement |
90 | * specific collation strategies. One subclass, | |
91 | * <code>RuleBasedCollator</code>, is currently provided and is applicable | |
92 | * to a wide set of languages. Other subclasses may be created to handle more | |
b75a7d8f A |
93 | * specialized needs. |
94 | * <p> | |
374ca955 A |
95 | * Like other locale-sensitive classes, you can use the static factory method, |
96 | * <code>createInstance</code>, to obtain the appropriate | |
97 | * <code>Collator</code> object for a given locale. You will only need to | |
98 | * look at the subclasses of <code>Collator</code> if you need to | |
99 | * understand the details of a particular collation strategy or if you need to | |
b75a7d8f A |
100 | * modify that strategy. |
101 | * <p> | |
374ca955 | 102 | * The following example shows how to compare two strings using the |
b75a7d8f | 103 | * <code>Collator</code> for the default locale. |
374ca955 | 104 | * \htmlonly<blockquote>\endhtmlonly |
b75a7d8f A |
105 | * <pre> |
106 | * \code | |
107 | * // Compare two strings in the default locale | |
108 | * UErrorCode success = U_ZERO_ERROR; | |
109 | * Collator* myCollator = Collator::createInstance(success); | |
110 | * if (myCollator->compare("abc", "ABC") < 0) | |
111 | * cout << "abc is less than ABC" << endl; | |
112 | * else | |
113 | * cout << "abc is greater than or equal to ABC" << endl; | |
114 | * \endcode | |
115 | * </pre> | |
374ca955 | 116 | * \htmlonly</blockquote>\endhtmlonly |
b75a7d8f | 117 | * <p> |
374ca955 A |
118 | * You can set a <code>Collator</code>'s <em>strength</em> property to |
119 | * determine the level of difference considered significant in comparisons. | |
120 | * Five strengths are provided: <code>PRIMARY</code>, <code>SECONDARY</code>, | |
121 | * <code>TERTIARY</code>, <code>QUATERNARY</code> and <code>IDENTICAL</code>. | |
122 | * The exact assignment of strengths to language features is locale dependant. | |
123 | * For example, in Czech, "e" and "f" are considered primary differences, | |
124 | * while "e" and "\u00EA" are secondary differences, "e" and "E" are tertiary | |
125 | * differences and "e" and "e" are identical. The following shows how both case | |
126 | * and accents could be ignored for US English. | |
127 | * \htmlonly<blockquote>\endhtmlonly | |
b75a7d8f A |
128 | * <pre> |
129 | * \code | |
374ca955 | 130 | * //Get the Collator for US English and set its strength to PRIMARY |
b75a7d8f | 131 | * UErrorCode success = U_ZERO_ERROR; |
374ca955 | 132 | * Collator* usCollator = Collator::createInstance(Locale::US, success); |
b75a7d8f A |
133 | * usCollator->setStrength(Collator::PRIMARY); |
134 | * if (usCollator->compare("abc", "ABC") == 0) | |
374ca955 | 135 | * cout << "'abc' and 'ABC' strings are equivalent with strength PRIMARY" << endl; |
b75a7d8f A |
136 | * \endcode |
137 | * </pre> | |
374ca955 | 138 | * \htmlonly</blockquote>\endhtmlonly |
b75a7d8f | 139 | * <p> |
374ca955 A |
140 | * For comparing strings exactly once, the <code>compare</code> method |
141 | * provides the best performance. When sorting a list of strings however, it | |
142 | * is generally necessary to compare each string multiple times. In this case, | |
143 | * sort keys provide better performance. The <code>getSortKey</code> methods | |
144 | * convert a string to a series of bytes that can be compared bitwise against | |
145 | * other sort keys using <code>strcmp()</code>. Sort keys are written as | |
146 | * zero-terminated byte strings. They consist of several substrings, one for | |
b75a7d8f | 147 | * each collation strength level, that are delimited by 0x01 bytes. |
374ca955 A |
148 | * If the string code points are appended for UCOL_IDENTICAL, then they are |
149 | * processed for correct code point order comparison and may contain 0x01 | |
b75a7d8f A |
150 | * bytes but not zero bytes. |
151 | * </p> | |
152 | * <p> | |
374ca955 | 153 | * An older set of APIs returns a <code>CollationKey</code> object that wraps |
b75a7d8f | 154 | * the sort key bytes instead of returning the bytes themselves. |
374ca955 | 155 | * Its use is deprecated, but it is still available for compatibility with |
b75a7d8f A |
156 | * Java. |
157 | * </p> | |
158 | * <p> | |
159 | * <strong>Note:</strong> <code>Collator</code>s with different Locale, | |
374ca955 A |
160 | * and CollationStrength settings will return different sort |
161 | * orders for the same set of strings. Locales have specific collation rules, | |
162 | * and the way in which secondary and tertiary differences are taken into | |
163 | * account, for example, will result in a different sorting order for same | |
b75a7d8f A |
164 | * strings. |
165 | * </p> | |
166 | * @see RuleBasedCollator | |
167 | * @see CollationKey | |
168 | * @see CollationElementIterator | |
169 | * @see Locale | |
170 | * @see Normalizer | |
171 | * @version 2.0 11/15/01 | |
172 | */ | |
173 | ||
174 | class U_I18N_API Collator : public UObject { | |
175 | public: | |
176 | ||
374ca955 A |
177 | // Collator public enums ----------------------------------------------- |
178 | ||
179 | /** | |
180 | * Base letter represents a primary difference. Set comparison level to | |
181 | * PRIMARY to ignore secondary and tertiary differences.<br> | |
182 | * Use this to set the strength of a Collator object.<br> | |
183 | * Example of primary difference, "abc" < "abd" | |
184 | * | |
185 | * Diacritical differences on the same base letter represent a secondary | |
186 | * difference. Set comparison level to SECONDARY to ignore tertiary | |
187 | * differences. Use this to set the strength of a Collator object.<br> | |
188 |