]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/sortkey.cpp
2 *******************************************************************************
3 * Copyright (C) 1996-2011, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 //===============================================================================
13 // Created by: Helena Shih
15 // Modification History:
17 // Date Name Description
19 // 6/20/97 helena Java class name change.
20 // 6/23/97 helena Added comments to make code more readable.
21 // 6/26/98 erm Canged to use byte arrays instead of UnicodeString
22 // 7/31/98 erm hashCode: minimum inc should be 2 not 1,
23 // Cleaned up operator=
24 // 07/12/99 helena HPUX 11 CC port.
25 // 03/06/01 synwee Modified compareTo, to handle the result of
26 // 2 string similar in contents, but one is longer
28 //===============================================================================
30 #include "unicode/utypes.h"
32 #if !UCONFIG_NO_COLLATION
34 #include "unicode/sortkey.h"
41 // A hash code of kInvalidHashCode indicates that the has code needs
42 // to be computed. A hash code of kEmptyHashCode is used for empty keys
43 // and for any key whose computed hash code is kInvalidHashCode.
44 #define kInvalidHashCode ((int32_t)0)
45 #define kEmptyHashCode ((int32_t)1)
47 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey
)
49 CollationKey::CollationKey()
50 : UObject(), fBogus(FALSE
), fCount(0), fCapacity(0),
51 fHashCode(kEmptyHashCode
), fBytes(NULL
)
55 // Create a collation key from a bit array.
56 CollationKey::CollationKey(const uint8_t* newValues
, int32_t count
)
57 : UObject(), fBogus(FALSE
), fCount(count
), fCapacity(count
),
58 fHashCode(kInvalidHashCode
)
60 fBytes
= (uint8_t *)uprv_malloc(count
);
68 uprv_memcpy(fBytes
, newValues
, fCount
);
71 CollationKey::CollationKey(const CollationKey
& other
)
72 : UObject(other
), fBogus(FALSE
), fCount(other
.fCount
), fCapacity(other
.fCapacity
),
73 fHashCode(other
.fHashCode
), fBytes(NULL
)
81 fBytes
= (uint8_t *)uprv_malloc(fCapacity
);
89 uprv_memcpy(fBytes
, other
.fBytes
, other
.fCount
);
90 if(fCapacity
>fCount
) {
91 uprv_memset(fBytes
+fCount
, 0, fCapacity
-fCount
);
95 CollationKey::~CollationKey()
100 void CollationKey::adopt(uint8_t *values
, int32_t capacity
, int32_t count
) {
105 fCapacity
= capacity
;
109 void CollationKey::setLength(int32_t newLength
) {
112 fHashCode
= kInvalidHashCode
;
115 // set the key to an empty state
117 CollationKey::reset()
121 fHashCode
= kEmptyHashCode
;
126 // set the key to a "bogus" or invalid state
128 CollationKey::setToBogus()
135 fHashCode
= kInvalidHashCode
;
141 CollationKey::operator==(const CollationKey
& source
) const
143 return (this->fCount
== source
.fCount
&&
144 (this->fBytes
== source
.fBytes
||
145 uprv_memcmp(this->fBytes
, source
.fBytes
, this->fCount
) == 0));
149 CollationKey::operator=(const CollationKey
& other
)
158 if (other
.fBytes
!= NULL
)
160 ensureCapacity(other
.fCount
);
167 fHashCode
= other
.fHashCode
;
168 uprv_memcpy(fBytes
, other
.fBytes
, fCount
);
174 fHashCode
= kEmptyHashCode
;
181 // Bitwise comparison for the collation keys.
182 // NOTE: this is somewhat messy 'cause we can't count
183 // on memcmp returning the exact values which match
184 // Collator::EComparisonResult
185 Collator::EComparisonResult
186 CollationKey::compareTo(const CollationKey
& target
) const
188 uint8_t *src
= this->fBytes
;
189 uint8_t *tgt
= target
.fBytes
;
191 // are we comparing the same string
193 return Collator::EQUAL
;
196 int count = (this->fCount < target.fCount) ? this->fCount : target.fCount;
199 // If count is 0, at least one of the keys is empty.
200 // An empty key is always LESS than a non-empty one
201 // and EQUAL to another empty
202 if (this->fCount < target.fCount)
204 return Collator::LESS;
207 if (this->fCount > target.fCount)
209 return Collator::GREATER;
211 return Collator::EQUAL;
216 Collator::EComparisonResult result
;
218 // are we comparing different lengths?
219 if (this->fCount
!= target
.fCount
) {
220 if (this->fCount
< target
.fCount
) {
221 minLength
= this->fCount
;
222 result
= Collator::LESS
;
225 minLength
= target
.fCount
;
226 result
= Collator::GREATER
;
230 minLength
= target
.fCount
;
231 result
= Collator::EQUAL
;
235 int diff
= uprv_memcmp(src
, tgt
, minLength
);
237 return Collator::GREATER
;
241 return Collator::LESS
;
249 return Collator::LESS;
254 return Collator::GREATER;
256 return Collator::EQUAL;
260 // Bitwise comparison for the collation keys.
262 CollationKey::compareTo(const CollationKey
& target
, UErrorCode
&status
) const
264 if(U_SUCCESS(status
)) {
265 uint8_t *src
= this->fBytes
;
266 uint8_t *tgt
= target
.fBytes
;
268 // are we comparing the same string
273 UCollationResult result
;
275 // are we comparing different lengths?
276 if (this->fCount
!= target
.fCount
) {
277 if (this->fCount
< target
.fCount
) {
278 minLength
= this->fCount
;
282 minLength
= target
.fCount
;
283 result
= UCOL_GREATER
;
287 minLength
= target
.fCount
;
292 int diff
= uprv_memcmp(src
, tgt
, minLength
);
309 CollationKey::ensureCapacity(int32_t newSize
)
311 if (fCapacity
< newSize
)
315 fBytes
= (uint8_t *)uprv_malloc(newSize
);
322 uprv_memset(fBytes
, 0, fCapacity
);
328 fHashCode
= kInvalidHashCode
;
333 #ifdef U_USE_COLLATION_KEY_DEPRECATES
334 // Create a copy of the byte array.
336 CollationKey::toByteArray(int32_t& count
) const
338 uint8_t *result
= (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount
);
347 uprv_memcpy(result
, fBytes
, fCount
);
355 CollationKey::hashCode() const
357 // (Cribbed from UnicodeString)
358 // We cache the hashCode; when it becomes invalid, due to any change to the
359 // string, we note this by setting it to kInvalidHashCode. [LIU]
361 // Note: This method is semantically const, but physically non-const.
363 if (fHashCode
== kInvalidHashCode
)
365 const char *s
= reinterpret_cast<const char *>(fBytes
);
366 ((CollationKey
*)this)->fHashCode
= s
== NULL
? 0 : ustr_hashCharsN(s
, fCount
);
368 // We compute the hash by iterating sparsely over 64 (at most) characters
369 // spaced evenly through the string. For each character, we multiply the
370 // previous hash value by a prime number and add the new character in,
371 // in the manner of a additive linear congruential random number generator,
372 // thus producing a pseudorandom deterministic value which should be well
373 // distributed over the output range. [LIU]
374 const uint8_t *p
= fBytes
, *limit
= fBytes
+ fCount
;
375 int32_t inc
= (fCount
>= 256) ? fCount
/128 : 2; // inc = max(fSize/64, 1);
380 hash
= ( hash
* 37 ) + ((p
[0] << 8) + p
[1]);
384 // If we happened to get kInvalidHashCode, replace it with kEmptyHashCode
385 if (hash
== kInvalidHashCode
)
387 hash
= kEmptyHashCode
;
390 ((CollationKey
*)this)->fHashCode
= hash
; // cast away const
399 U_CAPI
int32_t U_EXPORT2
400 ucol_keyHashCode(const uint8_t *key
,
403 icu::CollationKey
newKey(key
, length
);
404 return newKey
.hashCode();
407 #endif /* #if !UCONFIG_NO_COLLATION */