]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************************* | |
73c04bcf | 3 | * Copyright (C) 1996-2006, International Business Machines Corporation and * |
b75a7d8f A |
4 | * others. All Rights Reserved. * |
5 | ******************************************************************************* | |
6 | */ | |
7 | //=============================================================================== | |
8 | // | |
9 | // File sortkey.cpp | |
10 | // | |
11 | // | |
12 | // | |
13 | // Created by: Helena Shih | |
14 | // | |
15 | // Modification History: | |
16 | // | |
17 | // Date Name Description | |
18 | // | |
19 | // 6/20/97 helena Java class name change. | |
20 | // 6/23/97 helena Added comments to make code more readable. | |
21 | // 6/26/98 erm Canged to use byte arrays instead of UnicodeString | |
22 | // 7/31/98 erm hashCode: minimum inc should be 2 not 1, | |
23 | // Cleaned up operator= | |
24 | // 07/12/99 helena HPUX 11 CC port. | |
25 | // 03/06/01 synwee Modified compareTo, to handle the result of | |
26 | // 2 string similar in contents, but one is longer | |
27 | // than the other | |
28 | //=============================================================================== | |
29 | ||
30 | #include "unicode/utypes.h" | |
31 | ||
32 | #if !UCONFIG_NO_COLLATION | |
33 | ||
34 | #include "unicode/sortkey.h" | |
35 | #include "cmemory.h" | |
36 | #include "uhash.h" | |
37 | ||
38 | U_NAMESPACE_BEGIN | |
39 | ||
40 | // A hash code of kInvalidHashCode indicates that the has code needs | |
41 | // to be computed. A hash code of kEmptyHashCode is used for empty keys | |
42 | // and for any key whose computed hash code is kInvalidHashCode. | |
43 | #define kInvalidHashCode ((int32_t)0) | |
44 | #define kEmptyHashCode ((int32_t)1) | |
45 | ||
374ca955 | 46 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey) |
b75a7d8f A |
47 | |
48 | CollationKey::CollationKey() | |
49 | : UObject(), fBogus(FALSE), fCount(0), fCapacity(0), | |
50 | fHashCode(kEmptyHashCode), fBytes(NULL) | |
51 | { | |
52 | } | |
53 | ||
54 | // Create a collation key from a bit array. | |
55 | CollationKey::CollationKey(const uint8_t* newValues, int32_t count) | |
56 | : UObject(), fBogus(FALSE), fCount(count), fCapacity(count), | |
57 | fHashCode(kInvalidHashCode) | |
58 | { | |
59 | fBytes = (uint8_t *)uprv_malloc(count); | |
60 | ||
61 | if (fBytes == NULL) | |
62 | { | |
63 | setToBogus(); | |
64 | return; | |
65 | } | |
66 | ||
67 | uprv_memcpy(fBytes, newValues, fCount); | |
68 | } | |
69 | ||
70 | CollationKey::CollationKey(const CollationKey& other) | |
71 | : UObject(other), fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity), | |
72 | fHashCode(other.fHashCode), fBytes(NULL) | |
73 | { | |
74 | if (other.fBogus) | |
75 | { | |
76 | setToBogus(); | |
77 | return; | |
78 | } | |
79 | ||
80 | fBytes = (uint8_t *)uprv_malloc(fCapacity); | |
81 | ||
82 | if (fBytes == NULL) | |
83 | { | |
84 | setToBogus(); | |
85 | return; | |
86 | } | |
87 | ||
88 | uprv_memcpy(fBytes, other.fBytes, other.fCount); | |
89 | if(fCapacity>fCount) { | |
90 | uprv_memset(fBytes+fCount, 0, fCapacity-fCount); | |
91 | } | |
92 | } | |
93 | ||
94 | CollationKey::~CollationKey() | |
95 | { | |
96 | uprv_free(fBytes); | |
97 | } | |
98 | ||
99 | void CollationKey::adopt(uint8_t *values, int32_t count) { | |
100 | if(fBytes != NULL) { | |
101 | uprv_free(fBytes); | |
102 | } | |
103 | fBogus = FALSE; | |
104 | fBytes = values; | |
105 | fCount = count; | |
106 | fCapacity = count; | |
107 | fHashCode = kInvalidHashCode; | |
108 | } | |
109 | ||
110 | // set the key to an empty state | |
111 | CollationKey& | |
112 | CollationKey::reset() | |
113 | { | |
114 | fCount = 0; | |
115 | fBogus = FALSE; | |
116 | fHashCode = kEmptyHashCode; | |
117 | ||
118 | return *this; | |
119 | } | |
120 | ||
121 | // set the key to a "bogus" or invalid state | |
122 | CollationKey& | |
123 | CollationKey::setToBogus() | |
124 | { | |
125 | uprv_free(fBytes); | |
126 | fBytes = NULL; | |
127 | ||
128 | fCapacity = 0; | |
129 | fCount = 0; | |
130 | fHashCode = kInvalidHashCode; | |
131 | ||
132 | return *this; | |
133 | } | |
134 | ||
135 | UBool | |
136 | CollationKey::operator==(const CollationKey& source) const | |
137 | { | |
138 | return (this->fCount == source.fCount && | |
139 | (this->fBytes == source.fBytes || | |
140 | uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0)); | |
141 | } | |
142 | ||
143 | const CollationKey& | |
144 | CollationKey::operator=(const CollationKey& other) | |
145 | { | |
146 | if (this != &other) | |
147 | { | |
148 | if (other.isBogus()) | |
149 | { | |
150 | return setToBogus(); | |
151 | } | |
152 | ||
153 | if (other.fBytes != NULL) | |
154 | { | |
155 | ensureCapacity(other.fCount); | |
156 | ||
157 | if (isBogus()) | |
158 | { | |
159 | return *this; | |
160 | } | |
161 | ||
162 | fHashCode = other.fHashCode; | |
163 | uprv_memcpy(fBytes, other.fBytes, fCount); | |
164 | } | |
165 | else | |
166 | { | |
167 | fCount = 0; | |
168 | fBogus = FALSE; | |
169 | fHashCode = kEmptyHashCode; | |
170 | } | |
171 | } | |
172 | ||
173 | return *this; | |
174 | } | |
175 | ||
176 | // Bitwise comparison for the collation keys. | |
177 | // NOTE: this is somewhat messy 'cause we can't count | |
178 | // on memcmp returning the exact values which match | |
179 | // Collator::EComparisonResult | |
180 | Collator::EComparisonResult | |
181 | CollationKey::compareTo(const CollationKey& target) const | |
182 | { | |
183 | uint8_t *src = this->fBytes; | |
184 | uint8_t *tgt = target.fBytes; | |
185 | ||
186 | // are we comparing the same string | |
187 | if (src == tgt) | |
188 | return Collator::EQUAL; | |
189 | ||
190 | /* | |
191 | int count = (this->fCount < target.fCount) ? this->fCount : target.fCount; | |
192 | if (count == 0) | |
193 | { | |
194 | // If count is 0, at least one of the keys is empty. | |
195 | // An empty key is always LESS than a non-empty one | |
196 | // and EQUAL to another empty | |
197 | if (this->fCount < target.fCount) | |
198 | { | |
199 | return Collator::LESS; | |
200 | } | |
201 | ||
202 | if (this->fCount > target.fCount) | |
203 | { | |
204 | return Collator::GREATER; | |
205 | } | |
206 | return Collator::EQUAL; | |
207 | } | |
208 | */ | |
209 | ||
210 | int minLength; | |
211 | Collator::EComparisonResult result; | |
212 | ||
213 | // are we comparing different lengths? | |
214 | if (this->fCount != target.fCount) { | |
215 | if (this->fCount < target.fCount) { | |
216 | minLength = this->fCount; | |
217 | result = Collator::LESS; | |
218 | } | |
219 | else { | |
220 | minLength = target.fCount; | |
221 | result = Collator::GREATER; | |
222 | } | |
223 | } | |
224 | else { | |
225 | minLength = target.fCount; | |
226 | result = Collator::EQUAL; | |
227 | } | |
228 | ||
229 | if (minLength > 0) { | |
230 | int diff = uprv_memcmp(src, tgt, minLength); | |
231 | if (diff > 0) { | |
232 | return Collator::GREATER; | |
233 | } | |
234 | else | |
235 | if (diff < 0) { | |
236 | return Collator::LESS; | |
237 | } | |
238 | } | |
239 | ||
240 | return result; | |
241 | /* | |
242 | if (result < 0) | |
243 | { | |
244 | return Collator::LESS; | |
245 | } | |
246 | ||
247 | if (result > 0) | |
248 | { | |
249 | return Collator::GREATER; | |
250 | } | |
251 | return Collator::EQUAL; | |
252 | */ | |
253 | } | |
254 | ||
255 | // Bitwise comparison for the collation keys. | |
256 | UCollationResult | |
257 | CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const | |
258 | { | |
259 | if(U_SUCCESS(status)) { | |
260 | uint8_t *src = this->fBytes; | |
261 | uint8_t *tgt = target.fBytes; | |
262 | ||
263 | // are we comparing the same string | |
264 | if (src == tgt) | |
265 | return UCOL_EQUAL; | |
266 | ||
267 | int minLength; | |
268 | UCollationResult result; | |
269 | ||
270 | // are we comparing different lengths? | |
271 | if (this->fCount != target.fCount) { | |
272 | if (this->fCount < target.fCount) { | |
273 | minLength = this->fCount; | |
274 | result = UCOL_LESS; | |
275 | } | |
276 | else { | |
277 | minLength = target.fCount; | |
278 | result = UCOL_GREATER; | |
279 | } | |
280 | } | |
281 | else { | |
282 | minLength = target.fCount; | |
283 | result = UCOL_EQUAL; | |
284 | } | |
285 | ||
286 | if (minLength > 0) { | |
287 | int diff = uprv_memcmp(src, tgt, minLength); | |
288 | if (diff > 0) { | |
289 | return UCOL_GREATER; | |
290 | } | |
291 | else | |
292 | if (diff < 0) { | |
293 | return UCOL_LESS; | |
294 | } | |
295 | } | |
296 | ||
297 | return result; | |
298 | } else { | |
299 | return UCOL_EQUAL; | |
300 | } | |
301 | } | |
302 | ||
303 | CollationKey& | |
304 | CollationKey::ensureCapacity(int32_t newSize) | |
305 | { | |
306 | if (fCapacity < newSize) | |
307 | { | |
308 | uprv_free(fBytes); | |
309 | ||
310 | fBytes = (uint8_t *)uprv_malloc(newSize); | |
311 | ||
312 | if (fBytes == NULL) | |
313 | { | |
314 | return setToBogus(); | |
315 | } | |
316 | ||
317 | uprv_memset(fBytes, 0, fCapacity); | |
318 | fCapacity = newSize; | |
319 | } | |
320 | ||
321 | fBogus = FALSE; | |
322 | fCount = newSize; | |
323 | fHashCode = kInvalidHashCode; | |
324 | ||
325 | return *this; | |
326 | } | |
327 | ||
328 | #ifdef U_USE_COLLATION_KEY_DEPRECATES | |
329 | // Create a copy of the byte array. | |
330 | uint8_t* | |
331 | CollationKey::toByteArray(int32_t& count) const | |
332 | { | |
333 | uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount ); | |
334 | ||
335 | if (result == NULL) | |
336 | { | |
337 | count = 0; | |
338 | } | |
339 | else | |
340 | { | |
341 | count = fCount; | |
342 | uprv_memcpy(result, fBytes, fCount); | |
343 | } | |
344 | ||
345 | return result; | |
346 | } | |
347 | #endif | |
348 | ||
349 | int32_t | |
350 | CollationKey::hashCode() const | |
351 | { | |
352 | // (Cribbed from UnicodeString) | |
353 | // We cache the hashCode; when it becomes invalid, due to any change to the | |
354 | // string, we note this by setting it to kInvalidHashCode. [LIU] | |
355 | ||
356 | // Note: This method is semantically const, but physically non-const. | |
357 | ||
358 | if (fHashCode == kInvalidHashCode) | |
359 | { | |
360 | UHashTok key; | |
361 | key.pointer = fBytes; | |
362 | ((CollationKey *)this)->fHashCode = uhash_hashChars(key); | |
363 | #if 0 | |
364 | // We compute the hash by iterating sparsely over 64 (at most) characters | |
365 | // spaced evenly through the string. For each character, we multiply the | |
366 | // previous hash value by a prime number and add the new character in, | |
367 | // in the manner of a additive linear congruential random number generator, | |
368 | // thus producing a pseudorandom deterministic value which should be well | |
369 | // distributed over the output range. [LIU] | |
370 | const uint8_t *p = fBytes, *limit = fBytes + fCount; | |
371 | int32_t inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1); | |
372 | int32_t hash = 0; | |
373 | ||
374 | while (p < limit) | |
375 | { | |
376 | hash = ( hash * 37 ) + ((p[0] << 8) + p[1]); | |
377 | p += inc; | |
378 | } | |
379 | ||
380 | // If we happened to get kInvalidHashCode, replace it with kEmptyHashCode | |
381 | if (hash == kInvalidHashCode) | |
382 | { | |
383 | hash = kEmptyHashCode; | |
384 | } | |
385 | ||
386 | ((CollationKey *)this)->fHashCode = hash; // cast away const | |
387 | #endif | |
388 | } | |
389 | ||
390 | return fHashCode; | |
391 | } | |
392 | ||
393 | U_NAMESPACE_END | |
394 | ||
73c04bcf A |
395 | U_CAPI int32_t U_EXPORT2 |
396 | ucol_keyHashCode(const uint8_t *key, | |
397 | int32_t length) | |
398 | { | |
46f4442e | 399 | U_NAMESPACE_QUALIFIER CollationKey newKey(key, length); |
73c04bcf A |
400 | return newKey.hashCode(); |
401 | } | |
402 | ||
b75a7d8f | 403 | #endif /* #if !UCONFIG_NO_COLLATION */ |