]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/sortkey.cpp
ICU-461.12.tar.gz
[apple/icu.git] / icuSources / i18n / sortkey.cpp
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
73c04bcf 3* Copyright (C) 1996-2006, International Business Machines Corporation and *
b75a7d8f
A
4* others. All Rights Reserved. *
5*******************************************************************************
6*/
7//===============================================================================
8//
9// File sortkey.cpp
10//
11//
12//
13// Created by: Helena Shih
14//
15// Modification History:
16//
17// Date Name Description
18//
19// 6/20/97 helena Java class name change.
20// 6/23/97 helena Added comments to make code more readable.
21// 6/26/98 erm Canged to use byte arrays instead of UnicodeString
22// 7/31/98 erm hashCode: minimum inc should be 2 not 1,
23// Cleaned up operator=
24// 07/12/99 helena HPUX 11 CC port.
25// 03/06/01 synwee Modified compareTo, to handle the result of
26// 2 string similar in contents, but one is longer
27// than the other
28//===============================================================================
29
30#include "unicode/utypes.h"
31
32#if !UCONFIG_NO_COLLATION
33
34#include "unicode/sortkey.h"
35#include "cmemory.h"
36#include "uhash.h"
37
38U_NAMESPACE_BEGIN
39
40// A hash code of kInvalidHashCode indicates that the has code needs
41// to be computed. A hash code of kEmptyHashCode is used for empty keys
42// and for any key whose computed hash code is kInvalidHashCode.
43#define kInvalidHashCode ((int32_t)0)
44#define kEmptyHashCode ((int32_t)1)
45
374ca955 46UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
b75a7d8f
A
47
48CollationKey::CollationKey()
49 : UObject(), fBogus(FALSE), fCount(0), fCapacity(0),
50 fHashCode(kEmptyHashCode), fBytes(NULL)
51{
52}
53
54// Create a collation key from a bit array.
55CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
56 : UObject(), fBogus(FALSE), fCount(count), fCapacity(count),
57 fHashCode(kInvalidHashCode)
58{
59 fBytes = (uint8_t *)uprv_malloc(count);
60
61 if (fBytes == NULL)
62 {
63 setToBogus();
64 return;
65 }
66
67 uprv_memcpy(fBytes, newValues, fCount);
68}
69
70CollationKey::CollationKey(const CollationKey& other)
71: UObject(other), fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity),
72 fHashCode(other.fHashCode), fBytes(NULL)
73{
74 if (other.fBogus)
75 {
76 setToBogus();
77 return;
78 }
79
80 fBytes = (uint8_t *)uprv_malloc(fCapacity);
81
82 if (fBytes == NULL)
83 {
84 setToBogus();
85 return;
86 }
87
88 uprv_memcpy(fBytes, other.fBytes, other.fCount);
89 if(fCapacity>fCount) {
90 uprv_memset(fBytes+fCount, 0, fCapacity-fCount);
91 }
92}
93
94CollationKey::~CollationKey()
95{
96 uprv_free(fBytes);
97}
98
99void CollationKey::adopt(uint8_t *values, int32_t count) {
100 if(fBytes != NULL) {
101 uprv_free(fBytes);
102 }
103 fBogus = FALSE;
104 fBytes = values;
105 fCount = count;
106 fCapacity = count;
107 fHashCode = kInvalidHashCode;
108}
109
110// set the key to an empty state
111CollationKey&
112CollationKey::reset()
113{
114 fCount = 0;
115 fBogus = FALSE;
116 fHashCode = kEmptyHashCode;
117
118 return *this;
119}
120
121// set the key to a "bogus" or invalid state
122CollationKey&
123CollationKey::setToBogus()
124{
125 uprv_free(fBytes);
126 fBytes = NULL;
127
128 fCapacity = 0;
129 fCount = 0;
130 fHashCode = kInvalidHashCode;
131
132 return *this;
133}
134
135UBool
136CollationKey::operator==(const CollationKey& source) const
137{
138 return (this->fCount == source.fCount &&
139 (this->fBytes == source.fBytes ||
140 uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0));
141}
142
143const CollationKey&
144CollationKey::operator=(const CollationKey& other)
145{
146 if (this != &other)
147 {
148 if (other.isBogus())
149 {
150 return setToBogus();
151 }
152
153 if (other.fBytes != NULL)
154 {
155 ensureCapacity(other.fCount);
156
157 if (isBogus())
158 {
159 return *this;
160 }
161
162 fHashCode = other.fHashCode;
163 uprv_memcpy(fBytes, other.fBytes, fCount);
164 }
165 else
166 {
167 fCount = 0;
168 fBogus = FALSE;
169 fHashCode = kEmptyHashCode;
170 }
171 }
172
173 return *this;
174}
175
176// Bitwise comparison for the collation keys.
177// NOTE: this is somewhat messy 'cause we can't count
178// on memcmp returning the exact values which match
179// Collator::EComparisonResult
180Collator::EComparisonResult
181CollationKey::compareTo(const CollationKey& target) const
182{
183 uint8_t *src = this->fBytes;
184 uint8_t *tgt = target.fBytes;
185
186 // are we comparing the same string
187 if (src == tgt)
188 return Collator::EQUAL;
189
190 /*
191 int count = (this->fCount < target.fCount) ? this->fCount : target.fCount;
192 if (count == 0)
193 {
194 // If count is 0, at least one of the keys is empty.
195 // An empty key is always LESS than a non-empty one
196 // and EQUAL to another empty
197 if (this->fCount < target.fCount)
198 {
199 return Collator::LESS;
200 }
201
202 if (this->fCount > target.fCount)
203 {
204 return Collator::GREATER;
205 }
206 return Collator::EQUAL;
207 }
208 */
209
210 int minLength;
211 Collator::EComparisonResult result;
212
213 // are we comparing different lengths?
214 if (this->fCount != target.fCount) {
215 if (this->fCount < target.fCount) {
216 minLength = this->fCount;
217 result = Collator::LESS;
218 }
219 else {
220 minLength = target.fCount;
221 result = Collator::GREATER;
222 }
223 }
224 else {
225 minLength = target.fCount;
226 result = Collator::EQUAL;
227 }
228
229 if (minLength > 0) {
230 int diff = uprv_memcmp(src, tgt, minLength);
231 if (diff > 0) {
232 return Collator::GREATER;
233 }
234 else
235 if (diff < 0) {
236 return Collator::LESS;
237 }
238 }
239
240 return result;
241 /*
242 if (result < 0)
243 {
244 return Collator::LESS;
245 }
246
247 if (result > 0)
248 {
249 return Collator::GREATER;
250 }
251 return Collator::EQUAL;
252 */
253}
254
255// Bitwise comparison for the collation keys.
256UCollationResult
257CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
258{
259 if(U_SUCCESS(status)) {
260 uint8_t *src = this->fBytes;
261 uint8_t *tgt = target.fBytes;
262
263 // are we comparing the same string
264 if (src == tgt)
265 return UCOL_EQUAL;
266
267 int minLength;
268 UCollationResult result;
269
270 // are we comparing different lengths?
271 if (this->fCount != target.fCount) {
272 if (this->fCount < target.fCount) {
273 minLength = this->fCount;
274 result = UCOL_LESS;
275 }
276 else {
277 minLength = target.fCount;
278 result = UCOL_GREATER;
279 }
280 }
281 else {
282 minLength = target.fCount;
283 result = UCOL_EQUAL;
284 }
285
286 if (minLength > 0) {
287 int diff = uprv_memcmp(src, tgt, minLength);
288 if (diff > 0) {
289 return UCOL_GREATER;
290 }
291 else
292 if (diff < 0) {
293 return UCOL_LESS;
294 }
295 }
296
297 return result;
298 } else {
299 return UCOL_EQUAL;
300 }
301}
302
303CollationKey&
304CollationKey::ensureCapacity(int32_t newSize)
305{
306 if (fCapacity < newSize)
307 {
308 uprv_free(fBytes);
309
310 fBytes = (uint8_t *)uprv_malloc(newSize);
311
312 if (fBytes == NULL)
313 {
314 return setToBogus();
315 }
316
317 uprv_memset(fBytes, 0, fCapacity);
318 fCapacity = newSize;
319 }
320
321 fBogus = FALSE;
322 fCount = newSize;
323 fHashCode = kInvalidHashCode;
324
325 return *this;
326}
327
328#ifdef U_USE_COLLATION_KEY_DEPRECATES
329// Create a copy of the byte array.
330uint8_t*
331CollationKey::toByteArray(int32_t& count) const
332{
333 uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
334
335 if (result == NULL)
336 {
337 count = 0;
338 }
339 else
340 {
341 count = fCount;
342 uprv_memcpy(result, fBytes, fCount);
343 }
344
345 return result;
346}
347#endif
348
349int32_t
350CollationKey::hashCode() const
351{
352 // (Cribbed from UnicodeString)
353 // We cache the hashCode; when it becomes invalid, due to any change to the
354 // string, we note this by setting it to kInvalidHashCode. [LIU]
355
356 // Note: This method is semantically const, but physically non-const.
357
358 if (fHashCode == kInvalidHashCode)
359 {
360 UHashTok key;
361 key.pointer = fBytes;
362 ((CollationKey *)this)->fHashCode = uhash_hashChars(key);
363#if 0
364 // We compute the hash by iterating sparsely over 64 (at most) characters
365 // spaced evenly through the string. For each character, we multiply the
366 // previous hash value by a prime number and add the new character in,
367 // in the manner of a additive linear congruential random number generator,
368 // thus producing a pseudorandom deterministic value which should be well
369 // distributed over the output range. [LIU]
370 const uint8_t *p = fBytes, *limit = fBytes + fCount;
371 int32_t inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1);
372 int32_t hash = 0;
373
374 while (p < limit)
375 {
376 hash = ( hash * 37 ) + ((p[0] << 8) + p[1]);
377 p += inc;
378 }
379
380 // If we happened to get kInvalidHashCode, replace it with kEmptyHashCode
381 if (hash == kInvalidHashCode)
382 {
383 hash = kEmptyHashCode;
384 }
385
386 ((CollationKey *)this)->fHashCode = hash; // cast away const
387#endif
388 }
389
390 return fHashCode;
391}
392
393U_NAMESPACE_END
394
73c04bcf
A
395U_CAPI int32_t U_EXPORT2
396ucol_keyHashCode(const uint8_t *key,
397 int32_t length)
398{
46f4442e 399 U_NAMESPACE_QUALIFIER CollationKey newKey(key, length);
73c04bcf
A
400 return newKey.hashCode();
401}
402
b75a7d8f 403#endif /* #if !UCONFIG_NO_COLLATION */