]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/sortkey.cpp
2 *******************************************************************************
3 * Copyright (C) 1996-2003, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 //===============================================================================
13 // Created by: Helena Shih
15 // Modification History:
17 // Date Name Description
19 // 6/20/97 helena Java class name change.
20 // 6/23/97 helena Added comments to make code more readable.
21 // 6/26/98 erm Canged to use byte arrays instead of UnicodeString
22 // 7/31/98 erm hashCode: minimum inc should be 2 not 1,
23 // Cleaned up operator=
24 // 07/12/99 helena HPUX 11 CC port.
25 // 03/06/01 synwee Modified compareTo, to handle the result of
26 // 2 string similar in contents, but one is longer
28 //===============================================================================
30 #include "unicode/utypes.h"
32 #if !UCONFIG_NO_COLLATION
34 #include "unicode/sortkey.h"
40 // A hash code of kInvalidHashCode indicates that the has code needs
41 // to be computed. A hash code of kEmptyHashCode is used for empty keys
42 // and for any key whose computed hash code is kInvalidHashCode.
43 #define kInvalidHashCode ((int32_t)0)
44 #define kEmptyHashCode ((int32_t)1)
46 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey
)
48 CollationKey::CollationKey()
49 : UObject(), fBogus(FALSE
), fCount(0), fCapacity(0),
50 fHashCode(kEmptyHashCode
), fBytes(NULL
)
54 // Create a collation key from a bit array.
55 CollationKey::CollationKey(const uint8_t* newValues
, int32_t count
)
56 : UObject(), fBogus(FALSE
), fCount(count
), fCapacity(count
),
57 fHashCode(kInvalidHashCode
)
59 fBytes
= (uint8_t *)uprv_malloc(count
);
67 uprv_memcpy(fBytes
, newValues
, fCount
);
70 CollationKey::CollationKey(const CollationKey
& other
)
71 : UObject(other
), fBogus(FALSE
), fCount(other
.fCount
), fCapacity(other
.fCapacity
),
72 fHashCode(other
.fHashCode
), fBytes(NULL
)
80 fBytes
= (uint8_t *)uprv_malloc(fCapacity
);
88 uprv_memcpy(fBytes
, other
.fBytes
, other
.fCount
);
89 if(fCapacity
>fCount
) {
90 uprv_memset(fBytes
+fCount
, 0, fCapacity
-fCount
);
94 CollationKey::~CollationKey()
99 void CollationKey::adopt(uint8_t *values
, int32_t count
) {
107 fHashCode
= kInvalidHashCode
;
110 // set the key to an empty state
112 CollationKey::reset()
116 fHashCode
= kEmptyHashCode
;
121 // set the key to a "bogus" or invalid state
123 CollationKey::setToBogus()
130 fHashCode
= kInvalidHashCode
;
136 CollationKey::operator==(const CollationKey
& source
) const
138 return (this->fCount
== source
.fCount
&&
139 (this->fBytes
== source
.fBytes
||
140 uprv_memcmp(this->fBytes
, source
.fBytes
, this->fCount
) == 0));
144 CollationKey::operator=(const CollationKey
& other
)
153 if (other
.fBytes
!= NULL
)
155 ensureCapacity(other
.fCount
);
162 fHashCode
= other
.fHashCode
;
163 uprv_memcpy(fBytes
, other
.fBytes
, fCount
);
169 fHashCode
= kEmptyHashCode
;
176 // Bitwise comparison for the collation keys.
177 // NOTE: this is somewhat messy 'cause we can't count
178 // on memcmp returning the exact values which match
179 // Collator::EComparisonResult
180 Collator::EComparisonResult
181 CollationKey::compareTo(const CollationKey
& target
) const
183 uint8_t *src
= this->fBytes
;
184 uint8_t *tgt
= target
.fBytes
;
186 // are we comparing the same string
188 return Collator::EQUAL
;
191 int count = (this->fCount < target.fCount) ? this->fCount : target.fCount;
194 // If count is 0, at least one of the keys is empty.
195 // An empty key is always LESS than a non-empty one
196 // and EQUAL to another empty
197 if (this->fCount < target.fCount)
199 return Collator::LESS;
202 if (this->fCount > target.fCount)
204 return Collator::GREATER;
206 return Collator::EQUAL;
211 Collator::EComparisonResult result
;
213 // are we comparing different lengths?
214 if (this->fCount
!= target
.fCount
) {
215 if (this->fCount
< target
.fCount
) {
216 minLength
= this->fCount
;
217 result
= Collator::LESS
;
220 minLength
= target
.fCount
;
221 result
= Collator::GREATER
;
225 minLength
= target
.fCount
;
226 result
= Collator::EQUAL
;
230 int diff
= uprv_memcmp(src
, tgt
, minLength
);
232 return Collator::GREATER
;
236 return Collator::LESS
;
244 return Collator::LESS;
249 return Collator::GREATER;
251 return Collator::EQUAL;
255 // Bitwise comparison for the collation keys.
257 CollationKey::compareTo(const CollationKey
& target
, UErrorCode
&status
) const
259 if(U_SUCCESS(status
)) {
260 uint8_t *src
= this->fBytes
;
261 uint8_t *tgt
= target
.fBytes
;
263 // are we comparing the same string
268 UCollationResult result
;
270 // are we comparing different lengths?
271 if (this->fCount
!= target
.fCount
) {
272 if (this->fCount
< target
.fCount
) {
273 minLength
= this->fCount
;
277 minLength
= target
.fCount
;
278 result
= UCOL_GREATER
;
282 minLength
= target
.fCount
;
287 int diff
= uprv_memcmp(src
, tgt
, minLength
);
304 CollationKey::ensureCapacity(int32_t newSize
)
306 if (fCapacity
< newSize
)
310 fBytes
= (uint8_t *)uprv_malloc(newSize
);
317 uprv_memset(fBytes
, 0, fCapacity
);
323 fHashCode
= kInvalidHashCode
;
328 #ifdef U_USE_COLLATION_KEY_DEPRECATES
329 // Create a copy of the byte array.
331 CollationKey::toByteArray(int32_t& count
) const
333 uint8_t *result
= (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount
);
342 uprv_memcpy(result
, fBytes
, fCount
);
350 CollationKey::hashCode() const
352 // (Cribbed from UnicodeString)
353 // We cache the hashCode; when it becomes invalid, due to any change to the
354 // string, we note this by setting it to kInvalidHashCode. [LIU]
356 // Note: This method is semantically const, but physically non-const.
358 if (fHashCode
== kInvalidHashCode
)
361 key
.pointer
= fBytes
;
362 ((CollationKey
*)this)->fHashCode
= uhash_hashChars(key
);
364 // We compute the hash by iterating sparsely over 64 (at most) characters
365 // spaced evenly through the string. For each character, we multiply the
366 // previous hash value by a prime number and add the new character in,
367 // in the manner of a additive linear congruential random number generator,
368 // thus producing a pseudorandom deterministic value which should be well
369 // distributed over the output range. [LIU]
370 const uint8_t *p
= fBytes
, *limit
= fBytes
+ fCount
;
371 int32_t inc
= (fCount
>= 256) ? fCount
/128 : 2; // inc = max(fSize/64, 1);
376 hash
= ( hash
* 37 ) + ((p
[0] << 8) + p
[1]);
380 // If we happened to get kInvalidHashCode, replace it with kEmptyHashCode
381 if (hash
== kInvalidHashCode
)
383 hash
= kEmptyHashCode
;
386 ((CollationKey
*)this)->fHashCode
= hash
; // cast away const
395 #endif /* #if !UCONFIG_NO_COLLATION */