]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/sortkey.cpp
ICU-491.11.1.tar.gz
[apple/icu.git] / icuSources / i18n / sortkey.cpp
1 /*
2 *******************************************************************************
3 * Copyright (C) 1996-2011, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
6 */
7 //===============================================================================
8 //
9 // File sortkey.cpp
10 //
11 //
12 //
13 // Created by: Helena Shih
14 //
15 // Modification History:
16 //
17 // Date Name Description
18 //
19 // 6/20/97 helena Java class name change.
20 // 6/23/97 helena Added comments to make code more readable.
21 // 6/26/98 erm Canged to use byte arrays instead of UnicodeString
22 // 7/31/98 erm hashCode: minimum inc should be 2 not 1,
23 // Cleaned up operator=
24 // 07/12/99 helena HPUX 11 CC port.
25 // 03/06/01 synwee Modified compareTo, to handle the result of
26 // 2 string similar in contents, but one is longer
27 // than the other
28 //===============================================================================
29
30 #include "unicode/utypes.h"
31
32 #if !UCONFIG_NO_COLLATION
33
34 #include "unicode/sortkey.h"
35 #include "cmemory.h"
36 #include "uelement.h"
37 #include "ustr_imp.h"
38
39 U_NAMESPACE_BEGIN
40
41 // A hash code of kInvalidHashCode indicates that the has code needs
42 // to be computed. A hash code of kEmptyHashCode is used for empty keys
43 // and for any key whose computed hash code is kInvalidHashCode.
44 #define kInvalidHashCode ((int32_t)0)
45 #define kEmptyHashCode ((int32_t)1)
46
47 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
48
49 CollationKey::CollationKey()
50 : UObject(), fBogus(FALSE), fCount(0), fCapacity(0),
51 fHashCode(kEmptyHashCode), fBytes(NULL)
52 {
53 }
54
55 // Create a collation key from a bit array.
56 CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
57 : UObject(), fBogus(FALSE), fCount(count), fCapacity(count),
58 fHashCode(kInvalidHashCode)
59 {
60 fBytes = (uint8_t *)uprv_malloc(count);
61
62 if (fBytes == NULL)
63 {
64 setToBogus();
65 return;
66 }
67
68 uprv_memcpy(fBytes, newValues, fCount);
69 }
70
71 CollationKey::CollationKey(const CollationKey& other)
72 : UObject(other), fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity),
73 fHashCode(other.fHashCode), fBytes(NULL)
74 {
75 if (other.fBogus)
76 {
77 setToBogus();
78 return;
79 }
80
81 fBytes = (uint8_t *)uprv_malloc(fCapacity);
82
83 if (fBytes == NULL)
84 {
85 setToBogus();
86 return;
87 }
88
89 uprv_memcpy(fBytes, other.fBytes, other.fCount);
90 if(fCapacity>fCount) {
91 uprv_memset(fBytes+fCount, 0, fCapacity-fCount);
92 }
93 }
94
95 CollationKey::~CollationKey()
96 {
97 uprv_free(fBytes);
98 }
99
100 void CollationKey::adopt(uint8_t *values, int32_t capacity, int32_t count) {
101 if(fBytes != NULL) {
102 uprv_free(fBytes);
103 }
104 fBytes = values;
105 fCapacity = capacity;
106 setLength(count);
107 }
108
109 void CollationKey::setLength(int32_t newLength) {
110 fBogus = FALSE;
111 fCount = newLength;
112 fHashCode = kInvalidHashCode;
113 }
114
115 // set the key to an empty state
116 CollationKey&
117 CollationKey::reset()
118 {
119 fCount = 0;
120 fBogus = FALSE;
121 fHashCode = kEmptyHashCode;
122
123 return *this;
124 }
125
126 // set the key to a "bogus" or invalid state
127 CollationKey&
128 CollationKey::setToBogus()
129 {
130 uprv_free(fBytes);
131 fBytes = NULL;
132
133 fCapacity = 0;
134 fCount = 0;
135 fHashCode = kInvalidHashCode;
136
137 return *this;
138 }
139
140 UBool
141 CollationKey::operator==(const CollationKey& source) const
142 {
143 return (this->fCount == source.fCount &&
144 (this->fBytes == source.fBytes ||
145 uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0));
146 }
147
148 const CollationKey&
149 CollationKey::operator=(const CollationKey& other)
150 {
151 if (this != &other)
152 {
153 if (other.isBogus())
154 {
155 return setToBogus();
156 }
157
158 if (other.fBytes != NULL)
159 {
160 ensureCapacity(other.fCount);
161
162 if (isBogus())
163 {
164 return *this;
165 }
166
167 fHashCode = other.fHashCode;
168 uprv_memcpy(fBytes, other.fBytes, fCount);
169 }
170 else
171 {
172 fCount = 0;
173 fBogus = FALSE;
174 fHashCode = kEmptyHashCode;
175 }
176 }
177
178 return *this;
179 }
180
181 // Bitwise comparison for the collation keys.
182 // NOTE: this is somewhat messy 'cause we can't count
183 // on memcmp returning the exact values which match
184 // Collator::EComparisonResult
185 Collator::EComparisonResult
186 CollationKey::compareTo(const CollationKey& target) const
187 {
188 uint8_t *src = this->fBytes;
189 uint8_t *tgt = target.fBytes;
190
191 // are we comparing the same string
192 if (src == tgt)
193 return Collator::EQUAL;
194
195 /*
196 int count = (this->fCount < target.fCount) ? this->fCount : target.fCount;
197 if (count == 0)
198 {
199 // If count is 0, at least one of the keys is empty.
200 // An empty key is always LESS than a non-empty one
201 // and EQUAL to another empty
202 if (this->fCount < target.fCount)
203 {
204 return Collator::LESS;
205 }
206
207 if (this->fCount > target.fCount)
208 {
209 return Collator::GREATER;
210 }
211 return Collator::EQUAL;
212 }
213 */
214
215 int minLength;
216 Collator::EComparisonResult result;
217
218 // are we comparing different lengths?
219 if (this->fCount != target.fCount) {
220 if (this->fCount < target.fCount) {
221 minLength = this->fCount;
222 result = Collator::LESS;
223 }
224 else {
225 minLength = target.fCount;
226 result = Collator::GREATER;
227 }
228 }
229 else {
230 minLength = target.fCount;
231 result = Collator::EQUAL;
232 }
233
234 if (minLength > 0) {
235 int diff = uprv_memcmp(src, tgt, minLength);
236 if (diff > 0) {
237 return Collator::GREATER;
238 }
239 else
240 if (diff < 0) {
241 return Collator::LESS;
242 }
243 }
244
245 return result;
246 /*
247 if (result < 0)
248 {
249 return Collator::LESS;
250 }
251
252 if (result > 0)
253 {
254 return Collator::GREATER;
255 }
256 return Collator::EQUAL;
257 */
258 }
259
260 // Bitwise comparison for the collation keys.
261 UCollationResult
262 CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
263 {
264 if(U_SUCCESS(status)) {
265 uint8_t *src = this->fBytes;
266 uint8_t *tgt = target.fBytes;
267
268 // are we comparing the same string
269 if (src == tgt)
270 return UCOL_EQUAL;
271
272 int minLength;
273 UCollationResult result;
274
275 // are we comparing different lengths?
276 if (this->fCount != target.fCount) {
277 if (this->fCount < target.fCount) {
278 minLength = this->fCount;
279 result = UCOL_LESS;
280 }
281 else {
282 minLength = target.fCount;
283 result = UCOL_GREATER;
284 }
285 }
286 else {
287 minLength = target.fCount;
288 result = UCOL_EQUAL;
289 }
290
291 if (minLength > 0) {
292 int diff = uprv_memcmp(src, tgt, minLength);
293 if (diff > 0) {
294 return UCOL_GREATER;
295 }
296 else
297 if (diff < 0) {
298 return UCOL_LESS;
299 }
300 }
301
302 return result;
303 } else {
304 return UCOL_EQUAL;
305 }
306 }
307
308 CollationKey&
309 CollationKey::ensureCapacity(int32_t newSize)
310 {
311 if (fCapacity < newSize)
312 {
313 uprv_free(fBytes);
314
315 fBytes = (uint8_t *)uprv_malloc(newSize);
316
317 if (fBytes == NULL)
318 {
319 return setToBogus();
320 }
321
322 uprv_memset(fBytes, 0, fCapacity);
323 fCapacity = newSize;
324 }
325
326 fBogus = FALSE;
327 fCount = newSize;
328 fHashCode = kInvalidHashCode;
329
330 return *this;
331 }
332
333 #ifdef U_USE_COLLATION_KEY_DEPRECATES
334 // Create a copy of the byte array.
335 uint8_t*
336 CollationKey::toByteArray(int32_t& count) const
337 {
338 uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
339
340 if (result == NULL)
341 {
342 count = 0;
343 }
344 else
345 {
346 count = fCount;
347 uprv_memcpy(result, fBytes, fCount);
348 }
349
350 return result;
351 }
352 #endif
353
354 int32_t
355 CollationKey::hashCode() const
356 {
357 // (Cribbed from UnicodeString)
358 // We cache the hashCode; when it becomes invalid, due to any change to the
359 // string, we note this by setting it to kInvalidHashCode. [LIU]
360
361 // Note: This method is semantically const, but physically non-const.
362
363 if (fHashCode == kInvalidHashCode)
364 {
365 const char *s = reinterpret_cast<const char *>(fBytes);
366 ((CollationKey *)this)->fHashCode = s == NULL ? 0 : ustr_hashCharsN(s, fCount);
367 #if 0
368 // We compute the hash by iterating sparsely over 64 (at most) characters
369 // spaced evenly through the string. For each character, we multiply the
370 // previous hash value by a prime number and add the new character in,
371 // in the manner of a additive linear congruential random number generator,
372 // thus producing a pseudorandom deterministic value which should be well
373 // distributed over the output range. [LIU]
374 const uint8_t *p = fBytes, *limit = fBytes + fCount;
375 int32_t inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1);
376 int32_t hash = 0;
377
378 while (p < limit)
379 {
380 hash = ( hash * 37 ) + ((p[0] << 8) + p[1]);
381 p += inc;
382 }
383
384 // If we happened to get kInvalidHashCode, replace it with kEmptyHashCode
385 if (hash == kInvalidHashCode)
386 {
387 hash = kEmptyHashCode;
388 }
389
390 ((CollationKey *)this)->fHashCode = hash; // cast away const
391 #endif
392 }
393
394 return fHashCode;
395 }
396
397 U_NAMESPACE_END
398
399 U_CAPI int32_t U_EXPORT2
400 ucol_keyHashCode(const uint8_t *key,
401 int32_t length)
402 {
403 icu::CollationKey newKey(key, length);
404 return newKey.hashCode();
405 }
406
407 #endif /* #if !UCONFIG_NO_COLLATION */