]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f A |
3 | /* |
4 | ******************************************************************************* | |
51004dcb A |
5 | * Copyright (C) 1996-2012, International Business Machines Corporation and |
6 | * others. All Rights Reserved. | |
b75a7d8f A |
7 | ******************************************************************************* |
8 | */ | |
9 | //=============================================================================== | |
10 | // | |
11 | // File sortkey.cpp | |
12 | // | |
13 | // | |
14 | // | |
15 | // Created by: Helena Shih | |
16 | // | |
17 | // Modification History: | |
18 | // | |
19 | // Date Name Description | |
20 | // | |
21 | // 6/20/97 helena Java class name change. | |
22 | // 6/23/97 helena Added comments to make code more readable. | |
23 | // 6/26/98 erm Canged to use byte arrays instead of UnicodeString | |
24 | // 7/31/98 erm hashCode: minimum inc should be 2 not 1, | |
25 | // Cleaned up operator= | |
26 | // 07/12/99 helena HPUX 11 CC port. | |
27 | // 03/06/01 synwee Modified compareTo, to handle the result of | |
28 | // 2 string similar in contents, but one is longer | |
29 | // than the other | |
30 | //=============================================================================== | |
31 | ||
32 | #include "unicode/utypes.h" | |
33 | ||
34 | #if !UCONFIG_NO_COLLATION | |
35 | ||
36 | #include "unicode/sortkey.h" | |
37 | #include "cmemory.h" | |
4388f060 A |
38 | #include "uelement.h" |
39 | #include "ustr_imp.h" | |
b75a7d8f A |
40 | |
41 | U_NAMESPACE_BEGIN | |
42 | ||
51004dcb | 43 | // A hash code of kInvalidHashCode indicates that the hash code needs |
b75a7d8f A |
44 | // to be computed. A hash code of kEmptyHashCode is used for empty keys |
45 | // and for any key whose computed hash code is kInvalidHashCode. | |
51004dcb A |
46 | static const int32_t kInvalidHashCode = 0; |
47 | static const int32_t kEmptyHashCode = 1; | |
48 | // The "bogus hash code" replaces a separate fBogus flag. | |
49 | static const int32_t kBogusHashCode = 2; | |
b75a7d8f | 50 | |
374ca955 | 51 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey) |
b75a7d8f A |
52 | |
53 | CollationKey::CollationKey() | |
51004dcb A |
54 | : UObject(), fFlagAndLength(0), |
55 | fHashCode(kEmptyHashCode) | |
b75a7d8f A |
56 | { |
57 | } | |
58 | ||
59 | // Create a collation key from a bit array. | |
60 | CollationKey::CollationKey(const uint8_t* newValues, int32_t count) | |
51004dcb | 61 | : UObject(), fFlagAndLength(count), |
b75a7d8f A |
62 | fHashCode(kInvalidHashCode) |
63 | { | |
51004dcb A |
64 | if (count < 0 || (newValues == NULL && count != 0) || |
65 | (count > getCapacity() && reallocate(count, 0) == NULL)) { | |
b75a7d8f A |
66 | setToBogus(); |
67 | return; | |
68 | } | |
69 | ||
51004dcb A |
70 | if (count > 0) { |
71 | uprv_memcpy(getBytes(), newValues, count); | |
72 | } | |
b75a7d8f A |
73 | } |
74 | ||
75 | CollationKey::CollationKey(const CollationKey& other) | |
51004dcb A |
76 | : UObject(other), fFlagAndLength(other.getLength()), |
77 | fHashCode(other.fHashCode) | |
b75a7d8f | 78 | { |
51004dcb | 79 | if (other.isBogus()) |
b75a7d8f A |
80 | { |
81 | setToBogus(); | |
82 | return; | |
83 | } | |
84 | ||
51004dcb A |
85 | int32_t length = fFlagAndLength; |
86 | if (length > getCapacity() && reallocate(length, 0) == NULL) { | |
b75a7d8f A |
87 | setToBogus(); |
88 | return; | |
89 | } | |
90 | ||
51004dcb A |
91 | if (length > 0) { |
92 | uprv_memcpy(getBytes(), other.getBytes(), length); | |
b75a7d8f A |
93 | } |
94 | } | |
95 | ||
96 | CollationKey::~CollationKey() | |
97 | { | |
51004dcb | 98 | if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } |
b75a7d8f A |
99 | } |
100 | ||
51004dcb A |
101 | uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) { |
102 | uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity)); | |
103 | if(newBytes == NULL) { return NULL; } | |
104 | if(length > 0) { | |
105 | uprv_memcpy(newBytes, getBytes(), length); | |
b75a7d8f | 106 | } |
51004dcb A |
107 | if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } |
108 | fUnion.fFields.fBytes = newBytes; | |
109 | fUnion.fFields.fCapacity = newCapacity; | |
110 | fFlagAndLength |= 0x80000000; | |
111 | return newBytes; | |
4388f060 A |
112 | } |
113 | ||
114 | void CollationKey::setLength(int32_t newLength) { | |
51004dcb A |
115 | // U_ASSERT(newLength >= 0 && newLength <= getCapacity()); |
116 | fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength; | |
b75a7d8f A |
117 | fHashCode = kInvalidHashCode; |
118 | } | |
119 | ||
120 | // set the key to an empty state | |
121 | CollationKey& | |
122 | CollationKey::reset() | |
123 | { | |
51004dcb | 124 | fFlagAndLength &= 0x80000000; |
b75a7d8f A |
125 | fHashCode = kEmptyHashCode; |
126 | ||
127 | return *this; | |
128 | } | |
129 | ||
130 | // set the key to a "bogus" or invalid state | |
131 | CollationKey& | |
132 | CollationKey::setToBogus() | |
133 | { | |
51004dcb A |
134 | fFlagAndLength &= 0x80000000; |
135 | fHashCode = kBogusHashCode; | |
b75a7d8f A |
136 | |
137 | return *this; | |
138 | } | |
139 | ||
140 | UBool | |
141 | CollationKey::operator==(const CollationKey& source) const | |
142 | { | |
51004dcb A |
143 | return getLength() == source.getLength() && |
144 | (this == &source || | |
145 | uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0); | |
b75a7d8f A |
146 | } |
147 | ||
148 | const CollationKey& | |
149 | CollationKey::operator=(const CollationKey& other) | |
150 | { | |
151 | if (this != &other) | |
152 | { | |
153 | if (other.isBogus()) | |
154 | { | |
155 | return setToBogus(); | |
156 | } | |
157 | ||
51004dcb A |
158 | int32_t length = other.getLength(); |
159 | if (length > getCapacity() && reallocate(length, 0) == NULL) { | |
160 | return setToBogus(); | |
b75a7d8f | 161 | } |
51004dcb A |
162 | if (length > 0) { |
163 | uprv_memcpy(getBytes(), other.getBytes(), length); | |
b75a7d8f | 164 | } |
51004dcb A |
165 | fFlagAndLength = (fFlagAndLength & 0x80000000) | length; |
166 | fHashCode = other.fHashCode; | |
b75a7d8f A |
167 | } |
168 | ||
169 | return *this; | |
170 | } | |
171 | ||
172 | // Bitwise comparison for the collation keys. | |
b75a7d8f A |
173 | Collator::EComparisonResult |
174 | CollationKey::compareTo(const CollationKey& target) const | |
175 | { | |
51004dcb A |
176 | UErrorCode errorCode = U_ZERO_ERROR; |
177 | return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode)); | |
b75a7d8f A |
178 | } |
179 | ||
180 | // Bitwise comparison for the collation keys. | |
181 | UCollationResult | |
182 | CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const | |
183 | { | |
184 | if(U_SUCCESS(status)) { | |
51004dcb A |
185 | const uint8_t *src = getBytes(); |
186 | const uint8_t *tgt = target.getBytes(); | |
b75a7d8f A |
187 | |
188 | // are we comparing the same string | |
189 | if (src == tgt) | |
190 | return UCOL_EQUAL; | |
191 | ||
b75a7d8f A |
192 | UCollationResult result; |
193 | ||
194 | // are we comparing different lengths? | |
51004dcb A |
195 | int32_t minLength = getLength(); |
196 | int32_t targetLength = target.getLength(); | |
197 | if (minLength < targetLength) { | |
198 | result = UCOL_LESS; | |
199 | } else if (minLength == targetLength) { | |
200 | result = UCOL_EQUAL; | |
201 | } else { | |
202 | minLength = targetLength; | |
203 | result = UCOL_GREATER; | |
b75a7d8f A |
204 | } |
205 | ||
206 | if (minLength > 0) { | |
207 | int diff = uprv_memcmp(src, tgt, minLength); | |
208 | if (diff > 0) { | |
209 | return UCOL_GREATER; | |
210 | } | |
211 | else | |
212 | if (diff < 0) { | |
213 | return UCOL_LESS; | |
214 | } | |
215 | } | |
216 | ||
217 | return result; | |
218 | } else { | |
219 | return UCOL_EQUAL; | |
220 | } | |
221 | } | |
222 | ||
b75a7d8f A |
223 | #ifdef U_USE_COLLATION_KEY_DEPRECATES |
224 | // Create a copy of the byte array. | |
225 | uint8_t* | |
226 | CollationKey::toByteArray(int32_t& count) const | |
227 | { | |
228 | uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount ); | |
229 | ||
230 | if (result == NULL) | |
231 | { | |
232 | count = 0; | |
233 | } | |
234 | else | |
235 | { | |
236 | count = fCount; | |
51004dcb A |
237 | if (count > 0) { |
238 | uprv_memcpy(result, fBytes, fCount); | |
239 | } | |
b75a7d8f A |
240 | } |
241 | ||
242 | return result; | |
243 | } | |
244 | #endif | |
245 | ||
51004dcb A |
246 | static int32_t |
247 | computeHashCode(const uint8_t *key, int32_t length) { | |
248 | const char *s = reinterpret_cast<const char *>(key); | |
249 | int32_t hash; | |
250 | if (s == NULL || length == 0) { | |
251 | hash = kEmptyHashCode; | |
252 | } else { | |
253 | hash = ustr_hashCharsN(s, length); | |
254 | if (hash == kInvalidHashCode || hash == kBogusHashCode) { | |
255 | hash = kEmptyHashCode; | |
256 | } | |
257 | } | |
258 | return hash; | |
259 | } | |
260 | ||
b75a7d8f A |
261 | int32_t |
262 | CollationKey::hashCode() const | |
263 | { | |
264 | // (Cribbed from UnicodeString) | |
265 | // We cache the hashCode; when it becomes invalid, due to any change to the | |
266 | // string, we note this by setting it to kInvalidHashCode. [LIU] | |
267 | ||
268 | // Note: This method is semantically const, but physically non-const. | |
269 | ||
270 | if (fHashCode == kInvalidHashCode) | |
271 | { | |
51004dcb | 272 | fHashCode = computeHashCode(getBytes(), getLength()); |
b75a7d8f A |
273 | } |
274 | ||
275 | return fHashCode; | |
276 | } | |
277 | ||
278 | U_NAMESPACE_END | |
279 | ||
73c04bcf A |
280 | U_CAPI int32_t U_EXPORT2 |
281 | ucol_keyHashCode(const uint8_t *key, | |
282 | int32_t length) | |
283 | { | |
51004dcb | 284 | return icu::computeHashCode(key, length); |
73c04bcf A |
285 | } |
286 | ||
b75a7d8f | 287 | #endif /* #if !UCONFIG_NO_COLLATION */ |