]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************************* | |
51004dcb A |
3 | * Copyright (C) 1996-2012, International Business Machines Corporation and |
4 | * others. All Rights Reserved. | |
b75a7d8f A |
5 | ******************************************************************************* |
6 | */ | |
7 | //=============================================================================== | |
8 | // | |
9 | // File sortkey.cpp | |
10 | // | |
11 | // | |
12 | // | |
13 | // Created by: Helena Shih | |
14 | // | |
15 | // Modification History: | |
16 | // | |
17 | // Date Name Description | |
18 | // | |
19 | // 6/20/97 helena Java class name change. | |
20 | // 6/23/97 helena Added comments to make code more readable. | |
21 | // 6/26/98 erm Canged to use byte arrays instead of UnicodeString | |
22 | // 7/31/98 erm hashCode: minimum inc should be 2 not 1, | |
23 | // Cleaned up operator= | |
24 | // 07/12/99 helena HPUX 11 CC port. | |
25 | // 03/06/01 synwee Modified compareTo, to handle the result of | |
26 | // 2 string similar in contents, but one is longer | |
27 | // than the other | |
28 | //=============================================================================== | |
29 | ||
30 | #include "unicode/utypes.h" | |
31 | ||
32 | #if !UCONFIG_NO_COLLATION | |
33 | ||
34 | #include "unicode/sortkey.h" | |
35 | #include "cmemory.h" | |
4388f060 A |
36 | #include "uelement.h" |
37 | #include "ustr_imp.h" | |
b75a7d8f A |
38 | |
39 | U_NAMESPACE_BEGIN | |
40 | ||
51004dcb | 41 | // A hash code of kInvalidHashCode indicates that the hash code needs |
b75a7d8f A |
42 | // to be computed. A hash code of kEmptyHashCode is used for empty keys |
43 | // and for any key whose computed hash code is kInvalidHashCode. | |
51004dcb A |
44 | static const int32_t kInvalidHashCode = 0; |
45 | static const int32_t kEmptyHashCode = 1; | |
46 | // The "bogus hash code" replaces a separate fBogus flag. | |
47 | static const int32_t kBogusHashCode = 2; | |
b75a7d8f | 48 | |
374ca955 | 49 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey) |
b75a7d8f A |
50 | |
51 | CollationKey::CollationKey() | |
51004dcb A |
52 | : UObject(), fFlagAndLength(0), |
53 | fHashCode(kEmptyHashCode) | |
b75a7d8f A |
54 | { |
55 | } | |
56 | ||
57 | // Create a collation key from a bit array. | |
58 | CollationKey::CollationKey(const uint8_t* newValues, int32_t count) | |
51004dcb | 59 | : UObject(), fFlagAndLength(count), |
b75a7d8f A |
60 | fHashCode(kInvalidHashCode) |
61 | { | |
51004dcb A |
62 | if (count < 0 || (newValues == NULL && count != 0) || |
63 | (count > getCapacity() && reallocate(count, 0) == NULL)) { | |
b75a7d8f A |
64 | setToBogus(); |
65 | return; | |
66 | } | |
67 | ||
51004dcb A |
68 | if (count > 0) { |
69 | uprv_memcpy(getBytes(), newValues, count); | |
70 | } | |
b75a7d8f A |
71 | } |
72 | ||
73 | CollationKey::CollationKey(const CollationKey& other) | |
51004dcb A |
74 | : UObject(other), fFlagAndLength(other.getLength()), |
75 | fHashCode(other.fHashCode) | |
b75a7d8f | 76 | { |
51004dcb | 77 | if (other.isBogus()) |
b75a7d8f A |
78 | { |
79 | setToBogus(); | |
80 | return; | |
81 | } | |
82 | ||
51004dcb A |
83 | int32_t length = fFlagAndLength; |
84 | if (length > getCapacity() && reallocate(length, 0) == NULL) { | |
b75a7d8f A |
85 | setToBogus(); |
86 | return; | |
87 | } | |
88 | ||
51004dcb A |
89 | if (length > 0) { |
90 | uprv_memcpy(getBytes(), other.getBytes(), length); | |
b75a7d8f A |
91 | } |
92 | } | |
93 | ||
94 | CollationKey::~CollationKey() | |
95 | { | |
51004dcb | 96 | if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } |
b75a7d8f A |
97 | } |
98 | ||
51004dcb A |
99 | uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) { |
100 | uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity)); | |
101 | if(newBytes == NULL) { return NULL; } | |
102 | if(length > 0) { | |
103 | uprv_memcpy(newBytes, getBytes(), length); | |
b75a7d8f | 104 | } |
51004dcb A |
105 | if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } |
106 | fUnion.fFields.fBytes = newBytes; | |
107 | fUnion.fFields.fCapacity = newCapacity; | |
108 | fFlagAndLength |= 0x80000000; | |
109 | return newBytes; | |
4388f060 A |
110 | } |
111 | ||
112 | void CollationKey::setLength(int32_t newLength) { | |
51004dcb A |
113 | // U_ASSERT(newLength >= 0 && newLength <= getCapacity()); |
114 | fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength; | |
b75a7d8f A |
115 | fHashCode = kInvalidHashCode; |
116 | } | |
117 | ||
118 | // set the key to an empty state | |
119 | CollationKey& | |
120 | CollationKey::reset() | |
121 | { | |
51004dcb | 122 | fFlagAndLength &= 0x80000000; |
b75a7d8f A |
123 | fHashCode = kEmptyHashCode; |
124 | ||
125 | return *this; | |
126 | } | |
127 | ||
128 | // set the key to a "bogus" or invalid state | |
129 | CollationKey& | |
130 | CollationKey::setToBogus() | |
131 | { | |
51004dcb A |
132 | fFlagAndLength &= 0x80000000; |
133 | fHashCode = kBogusHashCode; | |
b75a7d8f A |
134 | |
135 | return *this; | |
136 | } | |
137 | ||
138 | UBool | |
139 | CollationKey::operator==(const CollationKey& source) const | |
140 | { | |
51004dcb A |
141 | return getLength() == source.getLength() && |
142 | (this == &source || | |
143 | uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0); | |
b75a7d8f A |
144 | } |
145 | ||
146 | const CollationKey& | |
147 | CollationKey::operator=(const CollationKey& other) | |
148 | { | |
149 | if (this != &other) | |
150 | { | |
151 | if (other.isBogus()) | |
152 | { | |
153 | return setToBogus(); | |
154 | } | |
155 | ||
51004dcb A |
156 | int32_t length = other.getLength(); |
157 | if (length > getCapacity() && reallocate(length, 0) == NULL) { | |
158 | return setToBogus(); | |
b75a7d8f | 159 | } |
51004dcb A |
160 | if (length > 0) { |
161 | uprv_memcpy(getBytes(), other.getBytes(), length); | |
b75a7d8f | 162 | } |
51004dcb A |
163 | fFlagAndLength = (fFlagAndLength & 0x80000000) | length; |
164 | fHashCode = other.fHashCode; | |
b75a7d8f A |
165 | } |
166 | ||
167 | return *this; | |
168 | } | |
169 | ||
170 | // Bitwise comparison for the collation keys. | |
b75a7d8f A |
171 | Collator::EComparisonResult |
172 | CollationKey::compareTo(const CollationKey& target) const | |
173 | { | |
51004dcb A |
174 | UErrorCode errorCode = U_ZERO_ERROR; |
175 | return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode)); | |
b75a7d8f A |
176 | } |
177 | ||
178 | // Bitwise comparison for the collation keys. | |
179 | UCollationResult | |
180 | CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const | |
181 | { | |
182 | if(U_SUCCESS(status)) { | |
51004dcb A |
183 | const uint8_t *src = getBytes(); |
184 | const uint8_t *tgt = target.getBytes(); | |
b75a7d8f A |
185 | |
186 | // are we comparing the same string | |
187 | if (src == tgt) | |
188 | return UCOL_EQUAL; | |
189 | ||
b75a7d8f A |
190 | UCollationResult result; |
191 | ||
192 | // are we comparing different lengths? | |
51004dcb A |
193 | int32_t minLength = getLength(); |
194 | int32_t targetLength = target.getLength(); | |
195 | if (minLength < targetLength) { | |
196 | result = UCOL_LESS; | |
197 | } else if (minLength == targetLength) { | |
198 | result = UCOL_EQUAL; | |
199 | } else { | |
200 | minLength = targetLength; | |
201 | result = UCOL_GREATER; | |
b75a7d8f A |
202 | } |
203 | ||
204 | if (minLength > 0) { | |
205 | int diff = uprv_memcmp(src, tgt, minLength); | |
206 | if (diff > 0) { | |
207 | return UCOL_GREATER; | |
208 | } | |
209 | else | |
210 | if (diff < 0) { | |
211 | return UCOL_LESS; | |
212 | } | |
213 | } | |
214 | ||
215 | return result; | |
216 | } else { | |
217 | return UCOL_EQUAL; | |
218 | } | |
219 | } | |
220 | ||
b75a7d8f A |
221 | #ifdef U_USE_COLLATION_KEY_DEPRECATES |
222 | // Create a copy of the byte array. | |
223 | uint8_t* | |
224 | CollationKey::toByteArray(int32_t& count) const | |
225 | { | |
226 | uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount ); | |
227 | ||
228 | if (result == NULL) | |
229 | { | |
230 | count = 0; | |
231 | } | |
232 | else | |
233 | { | |
234 | count = fCount; | |
51004dcb A |
235 | if (count > 0) { |
236 | uprv_memcpy(result, fBytes, fCount); | |
237 | } | |
b75a7d8f A |
238 | } |
239 | ||
240 | return result; | |
241 | } | |
242 | #endif | |
243 | ||
51004dcb A |
244 | static int32_t |
245 | computeHashCode(const uint8_t *key, int32_t length) { | |
246 | const char *s = reinterpret_cast<const char *>(key); | |
247 | int32_t hash; | |
248 | if (s == NULL || length == 0) { | |
249 | hash = kEmptyHashCode; | |
250 | } else { | |
251 | hash = ustr_hashCharsN(s, length); | |
252 | if (hash == kInvalidHashCode || hash == kBogusHashCode) { | |
253 | hash = kEmptyHashCode; | |
254 | } | |
255 | } | |
256 | return hash; | |
257 | } | |
258 | ||
b75a7d8f A |
259 | int32_t |
260 | CollationKey::hashCode() const | |
261 | { | |
262 | // (Cribbed from UnicodeString) | |
263 | // We cache the hashCode; when it becomes invalid, due to any change to the | |
264 | // string, we note this by setting it to kInvalidHashCode. [LIU] | |
265 | ||
266 | // Note: This method is semantically const, but physically non-const. | |
267 | ||
268 | if (fHashCode == kInvalidHashCode) | |
269 | { | |
51004dcb | 270 | fHashCode = computeHashCode(getBytes(), getLength()); |
b75a7d8f A |
271 | } |
272 | ||
273 | return fHashCode; | |
274 | } | |
275 | ||
276 | U_NAMESPACE_END | |
277 | ||
73c04bcf A |
278 | U_CAPI int32_t U_EXPORT2 |
279 | ucol_keyHashCode(const uint8_t *key, | |
280 | int32_t length) | |
281 | { | |
51004dcb | 282 | return icu::computeHashCode(key, length); |
73c04bcf A |
283 | } |
284 | ||
b75a7d8f | 285 | #endif /* #if !UCONFIG_NO_COLLATION */ |