]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/sortkey.cpp
ICU-400.40.tar.gz
[apple/icu.git] / icuSources / i18n / sortkey.cpp
1 /*
2 *******************************************************************************
3 * Copyright (C) 1996-2006, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
6 */
7 //===============================================================================
8 //
9 // File sortkey.cpp
10 //
11 //
12 //
13 // Created by: Helena Shih
14 //
15 // Modification History:
16 //
17 // Date Name Description
18 //
19 // 6/20/97 helena Java class name change.
20 // 6/23/97 helena Added comments to make code more readable.
21 // 6/26/98 erm Canged to use byte arrays instead of UnicodeString
22 // 7/31/98 erm hashCode: minimum inc should be 2 not 1,
23 // Cleaned up operator=
24 // 07/12/99 helena HPUX 11 CC port.
25 // 03/06/01 synwee Modified compareTo, to handle the result of
26 // 2 string similar in contents, but one is longer
27 // than the other
28 //===============================================================================
29
30 #include "unicode/utypes.h"
31
32 #if !UCONFIG_NO_COLLATION
33
34 #include "unicode/sortkey.h"
35 #include "cmemory.h"
36 #include "uhash.h"
37
38 U_NAMESPACE_BEGIN
39
40 // A hash code of kInvalidHashCode indicates that the has code needs
41 // to be computed. A hash code of kEmptyHashCode is used for empty keys
42 // and for any key whose computed hash code is kInvalidHashCode.
43 #define kInvalidHashCode ((int32_t)0)
44 #define kEmptyHashCode ((int32_t)1)
45
46 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
47
48 CollationKey::CollationKey()
49 : UObject(), fBogus(FALSE), fCount(0), fCapacity(0),
50 fHashCode(kEmptyHashCode), fBytes(NULL)
51 {
52 }
53
54 // Create a collation key from a bit array.
55 CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
56 : UObject(), fBogus(FALSE), fCount(count), fCapacity(count),
57 fHashCode(kInvalidHashCode)
58 {
59 fBytes = (uint8_t *)uprv_malloc(count);
60
61 if (fBytes == NULL)
62 {
63 setToBogus();
64 return;
65 }
66
67 uprv_memcpy(fBytes, newValues, fCount);
68 }
69
70 CollationKey::CollationKey(const CollationKey& other)
71 : UObject(other), fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity),
72 fHashCode(other.fHashCode), fBytes(NULL)
73 {
74 if (other.fBogus)
75 {
76 setToBogus();
77 return;
78 }
79
80 fBytes = (uint8_t *)uprv_malloc(fCapacity);
81
82 if (fBytes == NULL)
83 {
84 setToBogus();
85 return;
86 }
87
88 uprv_memcpy(fBytes, other.fBytes, other.fCount);
89 if(fCapacity>fCount) {
90 uprv_memset(fBytes+fCount, 0, fCapacity-fCount);
91 }
92 }
93
94 CollationKey::~CollationKey()
95 {
96 uprv_free(fBytes);
97 }
98
99 void CollationKey::adopt(uint8_t *values, int32_t count) {
100 if(fBytes != NULL) {
101 uprv_free(fBytes);
102 }
103 fBogus = FALSE;
104 fBytes = values;
105 fCount = count;
106 fCapacity = count;
107 fHashCode = kInvalidHashCode;
108 }
109
110 // set the key to an empty state
111 CollationKey&
112 CollationKey::reset()
113 {
114 fCount = 0;
115 fBogus = FALSE;
116 fHashCode = kEmptyHashCode;
117
118 return *this;
119 }
120
121 // set the key to a "bogus" or invalid state
122 CollationKey&
123 CollationKey::setToBogus()
124 {
125 uprv_free(fBytes);
126 fBytes = NULL;
127
128 fCapacity = 0;
129 fCount = 0;
130 fHashCode = kInvalidHashCode;
131
132 return *this;
133 }
134
135 UBool
136 CollationKey::operator==(const CollationKey& source) const
137 {
138 return (this->fCount == source.fCount &&
139 (this->fBytes == source.fBytes ||
140 uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0));
141 }
142
143 const CollationKey&
144 CollationKey::operator=(const CollationKey& other)
145 {
146 if (this != &other)
147 {
148 if (other.isBogus())
149 {
150 return setToBogus();
151 }
152
153 if (other.fBytes != NULL)
154 {
155 ensureCapacity(other.fCount);
156
157 if (isBogus())
158 {
159 return *this;
160 }
161
162 fHashCode = other.fHashCode;
163 uprv_memcpy(fBytes, other.fBytes, fCount);
164 }
165 else
166 {
167 fCount = 0;
168 fBogus = FALSE;
169 fHashCode = kEmptyHashCode;
170 }
171 }
172
173 return *this;
174 }
175
176 // Bitwise comparison for the collation keys.
177 // NOTE: this is somewhat messy 'cause we can't count
178 // on memcmp returning the exact values which match
179 // Collator::EComparisonResult
180 Collator::EComparisonResult
181 CollationKey::compareTo(const CollationKey& target) const
182 {
183 uint8_t *src = this->fBytes;
184 uint8_t *tgt = target.fBytes;
185
186 // are we comparing the same string
187 if (src == tgt)
188 return Collator::EQUAL;
189
190 /*
191 int count = (this->fCount < target.fCount) ? this->fCount : target.fCount;
192 if (count == 0)
193 {
194 // If count is 0, at least one of the keys is empty.
195 // An empty key is always LESS than a non-empty one
196 // and EQUAL to another empty
197 if (this->fCount < target.fCount)
198 {
199 return Collator::LESS;
200 }
201
202 if (this->fCount > target.fCount)
203 {
204 return Collator::GREATER;
205 }
206 return Collator::EQUAL;
207 }
208 */
209
210 int minLength;
211 Collator::EComparisonResult result;
212
213 // are we comparing different lengths?
214 if (this->fCount != target.fCount) {
215 if (this->fCount < target.fCount) {
216 minLength = this->fCount;
217 result = Collator::LESS;
218 }
219 else {
220 minLength = target.fCount;
221 result = Collator::GREATER;
222 }
223 }
224 else {
225 minLength = target.fCount;
226 result = Collator::EQUAL;
227 }
228
229 if (minLength > 0) {
230 int diff = uprv_memcmp(src, tgt, minLength);
231 if (diff > 0) {
232 return Collator::GREATER;
233 }
234 else
235 if (diff < 0) {
236 return Collator::LESS;
237 }
238 }
239
240 return result;
241 /*
242 if (result < 0)
243 {
244 return Collator::LESS;
245 }
246
247 if (result > 0)
248 {
249 return Collator::GREATER;
250 }
251 return Collator::EQUAL;
252 */
253 }
254
255 // Bitwise comparison for the collation keys.
256 UCollationResult
257 CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
258 {
259 if(U_SUCCESS(status)) {
260 uint8_t *src = this->fBytes;
261 uint8_t *tgt = target.fBytes;
262
263 // are we comparing the same string
264 if (src == tgt)
265 return UCOL_EQUAL;
266
267 int minLength;
268 UCollationResult result;
269
270 // are we comparing different lengths?
271 if (this->fCount != target.fCount) {
272 if (this->fCount < target.fCount) {
273 minLength = this->fCount;
274 result = UCOL_LESS;
275 }
276 else {
277 minLength = target.fCount;
278 result = UCOL_GREATER;
279 }
280 }
281 else {
282 minLength = target.fCount;
283 result = UCOL_EQUAL;
284 }
285
286 if (minLength > 0) {
287 int diff = uprv_memcmp(src, tgt, minLength);
288 if (diff > 0) {
289 return UCOL_GREATER;
290 }
291 else
292 if (diff < 0) {
293 return UCOL_LESS;
294 }
295 }
296
297 return result;
298 } else {
299 return UCOL_EQUAL;
300 }
301 }
302
303 CollationKey&
304 CollationKey::ensureCapacity(int32_t newSize)
305 {
306 if (fCapacity < newSize)
307 {
308 uprv_free(fBytes);
309
310 fBytes = (uint8_t *)uprv_malloc(newSize);
311
312 if (fBytes == NULL)
313 {
314 return setToBogus();
315 }
316
317 uprv_memset(fBytes, 0, fCapacity);
318 fCapacity = newSize;
319 }
320
321 fBogus = FALSE;
322 fCount = newSize;
323 fHashCode = kInvalidHashCode;
324
325 return *this;
326 }
327
328 #ifdef U_USE_COLLATION_KEY_DEPRECATES
329 // Create a copy of the byte array.
330 uint8_t*
331 CollationKey::toByteArray(int32_t& count) const
332 {
333 uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
334
335 if (result == NULL)
336 {
337 count = 0;
338 }
339 else
340 {
341 count = fCount;
342 uprv_memcpy(result, fBytes, fCount);
343 }
344
345 return result;
346 }
347 #endif
348
349 int32_t
350 CollationKey::hashCode() const
351 {
352 // (Cribbed from UnicodeString)
353 // We cache the hashCode; when it becomes invalid, due to any change to the
354 // string, we note this by setting it to kInvalidHashCode. [LIU]
355
356 // Note: This method is semantically const, but physically non-const.
357
358 if (fHashCode == kInvalidHashCode)
359 {
360 UHashTok key;
361 key.pointer = fBytes;
362 ((CollationKey *)this)->fHashCode = uhash_hashChars(key);
363 #if 0
364 // We compute the hash by iterating sparsely over 64 (at most) characters
365 // spaced evenly through the string. For each character, we multiply the
366 // previous hash value by a prime number and add the new character in,
367 // in the manner of a additive linear congruential random number generator,
368 // thus producing a pseudorandom deterministic value which should be well
369 // distributed over the output range. [LIU]
370 const uint8_t *p = fBytes, *limit = fBytes + fCount;
371 int32_t inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1);
372 int32_t hash = 0;
373
374 while (p < limit)
375 {
376 hash = ( hash * 37 ) + ((p[0] << 8) + p[1]);
377 p += inc;
378 }
379
380 // If we happened to get kInvalidHashCode, replace it with kEmptyHashCode
381 if (hash == kInvalidHashCode)
382 {
383 hash = kEmptyHashCode;
384 }
385
386 ((CollationKey *)this)->fHashCode = hash; // cast away const
387 #endif
388 }
389
390 return fHashCode;
391 }
392
393 U_NAMESPACE_END
394
395 U_CAPI int32_t U_EXPORT2
396 ucol_keyHashCode(const uint8_t *key,
397 int32_t length)
398 {
399 U_NAMESPACE_QUALIFIER CollationKey newKey(key, length);
400 return newKey.hashCode();
401 }
402
403 #endif /* #if !UCONFIG_NO_COLLATION */