]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************************* | |
4388f060 | 3 | * Copyright (C) 1996-2011, International Business Machines Corporation and * |
b75a7d8f A |
4 | * others. All Rights Reserved. * |
5 | ******************************************************************************* | |
6 | */ | |
7 | //=============================================================================== | |
8 | // | |
9 | // File sortkey.cpp | |
10 | // | |
11 | // | |
12 | // | |
13 | // Created by: Helena Shih | |
14 | // | |
15 | // Modification History: | |
16 | // | |
17 | // Date Name Description | |
18 | // | |
19 | // 6/20/97 helena Java class name change. | |
20 | // 6/23/97 helena Added comments to make code more readable. | |
21 | // 6/26/98 erm Canged to use byte arrays instead of UnicodeString | |
22 | // 7/31/98 erm hashCode: minimum inc should be 2 not 1, | |
23 | // Cleaned up operator= | |
24 | // 07/12/99 helena HPUX 11 CC port. | |
25 | // 03/06/01 synwee Modified compareTo, to handle the result of | |
26 | // 2 string similar in contents, but one is longer | |
27 | // than the other | |
28 | //=============================================================================== | |
29 | ||
30 | #include "unicode/utypes.h" | |
31 | ||
32 | #if !UCONFIG_NO_COLLATION | |
33 | ||
34 | #include "unicode/sortkey.h" | |
35 | #include "cmemory.h" | |
4388f060 A |
36 | #include "uelement.h" |
37 | #include "ustr_imp.h" | |
b75a7d8f A |
38 | |
39 | U_NAMESPACE_BEGIN | |
40 | ||
41 | // A hash code of kInvalidHashCode indicates that the has code needs | |
42 | // to be computed. A hash code of kEmptyHashCode is used for empty keys | |
43 | // and for any key whose computed hash code is kInvalidHashCode. | |
44 | #define kInvalidHashCode ((int32_t)0) | |
45 | #define kEmptyHashCode ((int32_t)1) | |
46 | ||
374ca955 | 47 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey) |
b75a7d8f A |
48 | |
49 | CollationKey::CollationKey() | |
50 | : UObject(), fBogus(FALSE), fCount(0), fCapacity(0), | |
51 | fHashCode(kEmptyHashCode), fBytes(NULL) | |
52 | { | |
53 | } | |
54 | ||
55 | // Create a collation key from a bit array. | |
56 | CollationKey::CollationKey(const uint8_t* newValues, int32_t count) | |
57 | : UObject(), fBogus(FALSE), fCount(count), fCapacity(count), | |
58 | fHashCode(kInvalidHashCode) | |
59 | { | |
60 | fBytes = (uint8_t *)uprv_malloc(count); | |
61 | ||
62 | if (fBytes == NULL) | |
63 | { | |
64 | setToBogus(); | |
65 | return; | |
66 | } | |
67 | ||
68 | uprv_memcpy(fBytes, newValues, fCount); | |
69 | } | |
70 | ||
71 | CollationKey::CollationKey(const CollationKey& other) | |
72 | : UObject(other), fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity), | |
73 | fHashCode(other.fHashCode), fBytes(NULL) | |
74 | { | |
75 | if (other.fBogus) | |
76 | { | |
77 | setToBogus(); | |
78 | return; | |
79 | } | |
80 | ||
81 | fBytes = (uint8_t *)uprv_malloc(fCapacity); | |
82 | ||
83 | if (fBytes == NULL) | |
84 | { | |
85 | setToBogus(); | |
86 | return; | |
87 | } | |
88 | ||
89 | uprv_memcpy(fBytes, other.fBytes, other.fCount); | |
90 | if(fCapacity>fCount) { | |
91 | uprv_memset(fBytes+fCount, 0, fCapacity-fCount); | |
92 | } | |
93 | } | |
94 | ||
95 | CollationKey::~CollationKey() | |
96 | { | |
97 | uprv_free(fBytes); | |
98 | } | |
99 | ||
4388f060 | 100 | void CollationKey::adopt(uint8_t *values, int32_t capacity, int32_t count) { |
b75a7d8f A |
101 | if(fBytes != NULL) { |
102 | uprv_free(fBytes); | |
103 | } | |
b75a7d8f | 104 | fBytes = values; |
4388f060 A |
105 | fCapacity = capacity; |
106 | setLength(count); | |
107 | } | |
108 | ||
109 | void CollationKey::setLength(int32_t newLength) { | |
110 | fBogus = FALSE; | |
111 | fCount = newLength; | |
b75a7d8f A |
112 | fHashCode = kInvalidHashCode; |
113 | } | |
114 | ||
115 | // set the key to an empty state | |
116 | CollationKey& | |
117 | CollationKey::reset() | |
118 | { | |
119 | fCount = 0; | |
120 | fBogus = FALSE; | |
121 | fHashCode = kEmptyHashCode; | |
122 | ||
123 | return *this; | |
124 | } | |
125 | ||
126 | // set the key to a "bogus" or invalid state | |
127 | CollationKey& | |
128 | CollationKey::setToBogus() | |
129 | { | |
130 | uprv_free(fBytes); | |
131 | fBytes = NULL; | |
132 | ||
133 | fCapacity = 0; | |
134 | fCount = 0; | |
135 | fHashCode = kInvalidHashCode; | |
136 | ||
137 | return *this; | |
138 | } | |
139 | ||
140 | UBool | |
141 | CollationKey::operator==(const CollationKey& source) const | |
142 | { | |
143 | return (this->fCount == source.fCount && | |
144 | (this->fBytes == source.fBytes || | |
145 | uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0)); | |
146 | } | |
147 | ||
148 | const CollationKey& | |
149 | CollationKey::operator=(const CollationKey& other) | |
150 | { | |
151 | if (this != &other) | |
152 | { | |
153 | if (other.isBogus()) | |
154 | { | |
155 | return setToBogus(); | |
156 | } | |
157 | ||
158 | if (other.fBytes != NULL) | |
159 | { | |
160 | ensureCapacity(other.fCount); | |
161 | ||
162 | if (isBogus()) | |
163 | { | |
164 | return *this; | |
165 | } | |
166 | ||
167 | fHashCode = other.fHashCode; | |
168 | uprv_memcpy(fBytes, other.fBytes, fCount); | |
169 | } | |
170 | else | |
171 | { | |
172 | fCount = 0; | |
173 | fBogus = FALSE; | |
174 | fHashCode = kEmptyHashCode; | |
175 | } | |
176 | } | |
177 | ||
178 | return *this; | |
179 | } | |
180 | ||
181 | // Bitwise comparison for the collation keys. | |
182 | // NOTE: this is somewhat messy 'cause we can't count | |
183 | // on memcmp returning the exact values which match | |
184 | // Collator::EComparisonResult | |
185 | Collator::EComparisonResult | |
186 | CollationKey::compareTo(const CollationKey& target) const | |
187 | { | |
188 | uint8_t *src = this->fBytes; | |
189 | uint8_t *tgt = target.fBytes; | |
190 | ||
191 | // are we comparing the same string | |
192 | if (src == tgt) | |
193 | return Collator::EQUAL; | |
194 | ||
195 | /* | |
196 | int count = (this->fCount < target.fCount) ? this->fCount : target.fCount; | |
197 | if (count == 0) | |
198 | { | |
199 | // If count is 0, at least one of the keys is empty. | |
200 | // An empty key is always LESS than a non-empty one | |
201 | // and EQUAL to another empty | |
202 | if (this->fCount < target.fCount) | |
203 | { | |
204 | return Collator::LESS; | |
205 | } | |
206 | ||
207 | if (this->fCount > target.fCount) | |
208 | { | |
209 | return Collator::GREATER; | |
210 | } | |
211 | return Collator::EQUAL; | |
212 | } | |
213 | */ | |
214 | ||
215 | int minLength; | |
216 | Collator::EComparisonResult result; | |
217 | ||
218 | // are we comparing different lengths? | |
219 | if (this->fCount != target.fCount) { | |
220 | if (this->fCount < target.fCount) { | |
221 | minLength = this->fCount; | |
222 | result = Collator::LESS; | |
223 | } | |
224 | else { | |
225 | minLength = target.fCount; | |
226 | result = Collator::GREATER; | |
227 | } | |
228 | } | |
229 | else { | |
230 | minLength = target.fCount; | |
231 | result = Collator::EQUAL; | |
232 | } | |
233 | ||
234 | if (minLength > 0) { | |
235 | int diff = uprv_memcmp(src, tgt, minLength); | |
236 | if (diff > 0) { | |
237 | return Collator::GREATER; | |
238 | } | |
239 | else | |
240 | if (diff < 0) { | |
241 | return Collator::LESS; | |
242 | } | |
243 | } | |
244 | ||
245 | return result; | |
246 | /* | |
247 | if (result < 0) | |
248 | { | |
249 | return Collator::LESS; | |
250 | } | |
251 | ||
252 | if (result > 0) | |
253 | { | |
254 | return Collator::GREATER; | |
255 | } | |
256 | return Collator::EQUAL; | |
257 | */ | |
258 | } | |
259 | ||
260 | // Bitwise comparison for the collation keys. | |
261 | UCollationResult | |
262 | CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const | |
263 | { | |
264 | if(U_SUCCESS(status)) { | |
265 | uint8_t *src = this->fBytes; | |
266 | uint8_t *tgt = target.fBytes; | |
267 | ||
268 | // are we comparing the same string | |
269 | if (src == tgt) | |
270 | return UCOL_EQUAL; | |
271 | ||
272 | int minLength; | |
273 | UCollationResult result; | |
274 | ||
275 | // are we comparing different lengths? | |
276 | if (this->fCount != target.fCount) { | |
277 | if (this->fCount < target.fCount) { | |
278 | minLength = this->fCount; | |
279 | result = UCOL_LESS; | |
280 | } | |
281 | else { | |
282 | minLength = target.fCount; | |
283 | result = UCOL_GREATER; | |
284 | } | |
285 | } | |
286 | else { | |
287 | minLength = target.fCount; | |
288 | result = UCOL_EQUAL; | |
289 | } | |
290 | ||
291 | if (minLength > 0) { | |
292 | int diff = uprv_memcmp(src, tgt, minLength); | |
293 | if (diff > 0) { | |
294 | return UCOL_GREATER; | |
295 | } | |
296 | else | |
297 | if (diff < 0) { | |
298 | return UCOL_LESS; | |
299 | } | |
300 | } | |
301 | ||
302 | return result; | |
303 | } else { | |
304 | return UCOL_EQUAL; | |
305 | } | |
306 | } | |
307 | ||
308 | CollationKey& | |
309 | CollationKey::ensureCapacity(int32_t newSize) | |
310 | { | |
311 | if (fCapacity < newSize) | |
312 | { | |
313 | uprv_free(fBytes); | |
314 | ||
315 | fBytes = (uint8_t *)uprv_malloc(newSize); | |
316 | ||
317 | if (fBytes == NULL) | |
318 | { | |
319 | return setToBogus(); | |
320 | } | |
321 | ||
322 | uprv_memset(fBytes, 0, fCapacity); | |
323 | fCapacity = newSize; | |
324 | } | |
325 | ||
326 | fBogus = FALSE; | |
327 | fCount = newSize; | |
328 | fHashCode = kInvalidHashCode; | |
329 | ||
330 | return *this; | |
331 | } | |
332 | ||
333 | #ifdef U_USE_COLLATION_KEY_DEPRECATES | |
334 | // Create a copy of the byte array. | |
335 | uint8_t* | |
336 | CollationKey::toByteArray(int32_t& count) const | |
337 | { | |
338 | uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount ); | |
339 | ||
340 | if (result == NULL) | |
341 | { | |
342 | count = 0; | |
343 | } | |
344 | else | |
345 | { | |
346 | count = fCount; | |
347 | uprv_memcpy(result, fBytes, fCount); | |
348 | } | |
349 | ||
350 | return result; | |
351 | } | |
352 | #endif | |
353 | ||
354 | int32_t | |
355 | CollationKey::hashCode() const | |
356 | { | |
357 | // (Cribbed from UnicodeString) | |
358 | // We cache the hashCode; when it becomes invalid, due to any change to the | |
359 | // string, we note this by setting it to kInvalidHashCode. [LIU] | |
360 | ||
361 | // Note: This method is semantically const, but physically non-const. | |
362 | ||
363 | if (fHashCode == kInvalidHashCode) | |
364 | { | |
4388f060 A |
365 | const char *s = reinterpret_cast<const char *>(fBytes); |
366 | ((CollationKey *)this)->fHashCode = s == NULL ? 0 : ustr_hashCharsN(s, fCount); | |
b75a7d8f A |
367 | #if 0 |
368 | // We compute the hash by iterating sparsely over 64 (at most) characters | |
369 | // spaced evenly through the string. For each character, we multiply the | |
370 | // previous hash value by a prime number and add the new character in, | |
371 | // in the manner of a additive linear congruential random number generator, | |
372 | // thus producing a pseudorandom deterministic value which should be well | |
373 | // distributed over the output range. [LIU] | |
374 | const uint8_t *p = fBytes, *limit = fBytes + fCount; | |
375 | int32_t inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1); | |
376 | int32_t hash = 0; | |
377 | ||
378 | while (p < limit) | |
379 | { | |
380 | hash = ( hash * 37 ) + ((p[0] << 8) + p[1]); | |
381 | p += inc; | |
382 | } | |
383 | ||
384 | // If we happened to get kInvalidHashCode, replace it with kEmptyHashCode | |
385 | if (hash == kInvalidHashCode) | |
386 | { | |
387 | hash = kEmptyHashCode; | |
388 | } | |
389 | ||
390 | ((CollationKey *)this)->fHashCode = hash; // cast away const | |
391 | #endif | |
392 | } | |
393 | ||
394 | return fHashCode; | |
395 | } | |
396 | ||
397 | U_NAMESPACE_END | |
398 | ||
73c04bcf A |
399 | U_CAPI int32_t U_EXPORT2 |
400 | ucol_keyHashCode(const uint8_t *key, | |
401 | int32_t length) | |
402 | { | |
4388f060 | 403 | icu::CollationKey newKey(key, length); |
73c04bcf A |
404 | return newKey.hashCode(); |
405 | } | |
406 | ||
b75a7d8f | 407 | #endif /* #if !UCONFIG_NO_COLLATION */ |