]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/sortkey.cpp
ICU-491.11.1.tar.gz
[apple/icu.git] / icuSources / i18n / sortkey.cpp
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
4388f060 3* Copyright (C) 1996-2011, International Business Machines Corporation and *
b75a7d8f
A
4* others. All Rights Reserved. *
5*******************************************************************************
6*/
7//===============================================================================
8//
9// File sortkey.cpp
10//
11//
12//
13// Created by: Helena Shih
14//
15// Modification History:
16//
17// Date Name Description
18//
19// 6/20/97 helena Java class name change.
20// 6/23/97 helena Added comments to make code more readable.
21// 6/26/98 erm Canged to use byte arrays instead of UnicodeString
22// 7/31/98 erm hashCode: minimum inc should be 2 not 1,
23// Cleaned up operator=
24// 07/12/99 helena HPUX 11 CC port.
25// 03/06/01 synwee Modified compareTo, to handle the result of
26// 2 string similar in contents, but one is longer
27// than the other
28//===============================================================================
29
30#include "unicode/utypes.h"
31
32#if !UCONFIG_NO_COLLATION
33
34#include "unicode/sortkey.h"
35#include "cmemory.h"
4388f060
A
36#include "uelement.h"
37#include "ustr_imp.h"
b75a7d8f
A
38
39U_NAMESPACE_BEGIN
40
41// A hash code of kInvalidHashCode indicates that the has code needs
42// to be computed. A hash code of kEmptyHashCode is used for empty keys
43// and for any key whose computed hash code is kInvalidHashCode.
44#define kInvalidHashCode ((int32_t)0)
45#define kEmptyHashCode ((int32_t)1)
46
374ca955 47UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
b75a7d8f
A
48
49CollationKey::CollationKey()
50 : UObject(), fBogus(FALSE), fCount(0), fCapacity(0),
51 fHashCode(kEmptyHashCode), fBytes(NULL)
52{
53}
54
55// Create a collation key from a bit array.
56CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
57 : UObject(), fBogus(FALSE), fCount(count), fCapacity(count),
58 fHashCode(kInvalidHashCode)
59{
60 fBytes = (uint8_t *)uprv_malloc(count);
61
62 if (fBytes == NULL)
63 {
64 setToBogus();
65 return;
66 }
67
68 uprv_memcpy(fBytes, newValues, fCount);
69}
70
71CollationKey::CollationKey(const CollationKey& other)
72: UObject(other), fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity),
73 fHashCode(other.fHashCode), fBytes(NULL)
74{
75 if (other.fBogus)
76 {
77 setToBogus();
78 return;
79 }
80
81 fBytes = (uint8_t *)uprv_malloc(fCapacity);
82
83 if (fBytes == NULL)
84 {
85 setToBogus();
86 return;
87 }
88
89 uprv_memcpy(fBytes, other.fBytes, other.fCount);
90 if(fCapacity>fCount) {
91 uprv_memset(fBytes+fCount, 0, fCapacity-fCount);
92 }
93}
94
95CollationKey::~CollationKey()
96{
97 uprv_free(fBytes);
98}
99
4388f060 100void CollationKey::adopt(uint8_t *values, int32_t capacity, int32_t count) {
b75a7d8f
A
101 if(fBytes != NULL) {
102 uprv_free(fBytes);
103 }
b75a7d8f 104 fBytes = values;
4388f060
A
105 fCapacity = capacity;
106 setLength(count);
107}
108
109void CollationKey::setLength(int32_t newLength) {
110 fBogus = FALSE;
111 fCount = newLength;
b75a7d8f
A
112 fHashCode = kInvalidHashCode;
113}
114
115// set the key to an empty state
116CollationKey&
117CollationKey::reset()
118{
119 fCount = 0;
120 fBogus = FALSE;
121 fHashCode = kEmptyHashCode;
122
123 return *this;
124}
125
126// set the key to a "bogus" or invalid state
127CollationKey&
128CollationKey::setToBogus()
129{
130 uprv_free(fBytes);
131 fBytes = NULL;
132
133 fCapacity = 0;
134 fCount = 0;
135 fHashCode = kInvalidHashCode;
136
137 return *this;
138}
139
140UBool
141CollationKey::operator==(const CollationKey& source) const
142{
143 return (this->fCount == source.fCount &&
144 (this->fBytes == source.fBytes ||
145 uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0));
146}
147
148const CollationKey&
149CollationKey::operator=(const CollationKey& other)
150{
151 if (this != &other)
152 {
153 if (other.isBogus())
154 {
155 return setToBogus();
156 }
157
158 if (other.fBytes != NULL)
159 {
160 ensureCapacity(other.fCount);
161
162 if (isBogus())
163 {
164 return *this;
165 }
166
167 fHashCode = other.fHashCode;
168 uprv_memcpy(fBytes, other.fBytes, fCount);
169 }
170 else
171 {
172 fCount = 0;
173 fBogus = FALSE;
174 fHashCode = kEmptyHashCode;
175 }
176 }
177
178 return *this;
179}
180
181// Bitwise comparison for the collation keys.
182// NOTE: this is somewhat messy 'cause we can't count
183// on memcmp returning the exact values which match
184// Collator::EComparisonResult
185Collator::EComparisonResult
186CollationKey::compareTo(const CollationKey& target) const
187{
188 uint8_t *src = this->fBytes;
189 uint8_t *tgt = target.fBytes;
190
191 // are we comparing the same string
192 if (src == tgt)
193 return Collator::EQUAL;
194
195 /*
196 int count = (this->fCount < target.fCount) ? this->fCount : target.fCount;
197 if (count == 0)
198 {
199 // If count is 0, at least one of the keys is empty.
200 // An empty key is always LESS than a non-empty one
201 // and EQUAL to another empty
202 if (this->fCount < target.fCount)
203 {
204 return Collator::LESS;
205 }
206
207 if (this->fCount > target.fCount)
208 {
209 return Collator::GREATER;
210 }
211 return Collator::EQUAL;
212 }
213 */
214
215 int minLength;
216 Collator::EComparisonResult result;
217
218 // are we comparing different lengths?
219 if (this->fCount != target.fCount) {
220 if (this->fCount < target.fCount) {
221 minLength = this->fCount;
222 result = Collator::LESS;
223 }
224 else {
225 minLength = target.fCount;
226 result = Collator::GREATER;
227 }
228 }
229 else {
230 minLength = target.fCount;
231 result = Collator::EQUAL;
232 }
233
234 if (minLength > 0) {
235 int diff = uprv_memcmp(src, tgt, minLength);
236 if (diff > 0) {
237 return Collator::GREATER;
238 }
239 else
240 if (diff < 0) {
241 return Collator::LESS;
242 }
243 }
244
245 return result;
246 /*
247 if (result < 0)
248 {
249 return Collator::LESS;
250 }
251
252 if (result > 0)
253 {
254 return Collator::GREATER;
255 }
256 return Collator::EQUAL;
257 */
258}
259
260// Bitwise comparison for the collation keys.
261UCollationResult
262CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
263{
264 if(U_SUCCESS(status)) {
265 uint8_t *src = this->fBytes;
266 uint8_t *tgt = target.fBytes;
267
268 // are we comparing the same string
269 if (src == tgt)
270 return UCOL_EQUAL;
271
272 int minLength;
273 UCollationResult result;
274
275 // are we comparing different lengths?
276 if (this->fCount != target.fCount) {
277 if (this->fCount < target.fCount) {
278 minLength = this->fCount;
279 result = UCOL_LESS;
280 }
281 else {
282 minLength = target.fCount;
283 result = UCOL_GREATER;
284 }
285 }
286 else {
287 minLength = target.fCount;
288 result = UCOL_EQUAL;
289 }
290
291 if (minLength > 0) {
292 int diff = uprv_memcmp(src, tgt, minLength);
293 if (diff > 0) {
294 return UCOL_GREATER;
295 }
296 else
297 if (diff < 0) {
298 return UCOL_LESS;
299 }
300 }
301
302 return result;
303 } else {
304 return UCOL_EQUAL;
305 }
306}
307
308CollationKey&
309CollationKey::ensureCapacity(int32_t newSize)
310{
311 if (fCapacity < newSize)
312 {
313 uprv_free(fBytes);
314
315 fBytes = (uint8_t *)uprv_malloc(newSize);
316
317 if (fBytes == NULL)
318 {
319 return setToBogus();
320 }
321
322 uprv_memset(fBytes, 0, fCapacity);
323 fCapacity = newSize;
324 }
325
326 fBogus = FALSE;
327 fCount = newSize;
328 fHashCode = kInvalidHashCode;
329
330 return *this;
331}
332
333#ifdef U_USE_COLLATION_KEY_DEPRECATES
334// Create a copy of the byte array.
335uint8_t*
336CollationKey::toByteArray(int32_t& count) const
337{
338 uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
339
340 if (result == NULL)
341 {
342 count = 0;
343 }
344 else
345 {
346 count = fCount;
347 uprv_memcpy(result, fBytes, fCount);
348 }
349
350 return result;
351}
352#endif
353
354int32_t
355CollationKey::hashCode() const
356{
357 // (Cribbed from UnicodeString)
358 // We cache the hashCode; when it becomes invalid, due to any change to the
359 // string, we note this by setting it to kInvalidHashCode. [LIU]
360
361 // Note: This method is semantically const, but physically non-const.
362
363 if (fHashCode == kInvalidHashCode)
364 {
4388f060
A
365 const char *s = reinterpret_cast<const char *>(fBytes);
366 ((CollationKey *)this)->fHashCode = s == NULL ? 0 : ustr_hashCharsN(s, fCount);
b75a7d8f
A
367#if 0
368 // We compute the hash by iterating sparsely over 64 (at most) characters
369 // spaced evenly through the string. For each character, we multiply the
370 // previous hash value by a prime number and add the new character in,
371 // in the manner of a additive linear congruential random number generator,
372 // thus producing a pseudorandom deterministic value which should be well
373 // distributed over the output range. [LIU]
374 const uint8_t *p = fBytes, *limit = fBytes + fCount;
375 int32_t inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1);
376 int32_t hash = 0;
377
378 while (p < limit)
379 {
380 hash = ( hash * 37 ) + ((p[0] << 8) + p[1]);
381 p += inc;
382 }
383
384 // If we happened to get kInvalidHashCode, replace it with kEmptyHashCode
385 if (hash == kInvalidHashCode)
386 {
387 hash = kEmptyHashCode;
388 }
389
390 ((CollationKey *)this)->fHashCode = hash; // cast away const
391#endif
392 }
393
394 return fHashCode;
395}
396
397U_NAMESPACE_END
398
73c04bcf
A
399U_CAPI int32_t U_EXPORT2
400ucol_keyHashCode(const uint8_t *key,
401 int32_t length)
402{
4388f060 403 icu::CollationKey newKey(key, length);
73c04bcf
A
404 return newKey.hashCode();
405}
406
b75a7d8f 407#endif /* #if !UCONFIG_NO_COLLATION */