]>
Commit | Line | Data |
---|---|---|
1 | // © 2016 and later: Unicode, Inc. and others. | |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | /* | |
4 | ******************************************************************************* | |
5 | * Copyright (C) 1996-2012, International Business Machines Corporation and | |
6 | * others. All Rights Reserved. | |
7 | ******************************************************************************* | |
8 | */ | |
9 | //=============================================================================== | |
10 | // | |
11 | // File sortkey.cpp | |
12 | // | |
13 | // | |
14 | // | |
15 | // Created by: Helena Shih | |
16 | // | |
17 | // Modification History: | |
18 | // | |
19 | // Date Name Description | |
20 | // | |
21 | // 6/20/97 helena Java class name change. | |
22 | // 6/23/97 helena Added comments to make code more readable. | |
23 | // 6/26/98 erm Canged to use byte arrays instead of UnicodeString | |
24 | // 7/31/98 erm hashCode: minimum inc should be 2 not 1, | |
25 | // Cleaned up operator= | |
26 | // 07/12/99 helena HPUX 11 CC port. | |
27 | // 03/06/01 synwee Modified compareTo, to handle the result of | |
28 | // 2 string similar in contents, but one is longer | |
29 | // than the other | |
30 | //=============================================================================== | |
31 | ||
32 | #include "unicode/utypes.h" | |
33 | ||
34 | #if !UCONFIG_NO_COLLATION | |
35 | ||
36 | #include "unicode/sortkey.h" | |
37 | #include "cmemory.h" | |
38 | #include "uelement.h" | |
39 | #include "ustr_imp.h" | |
40 | ||
41 | U_NAMESPACE_BEGIN | |
42 | ||
43 | // A hash code of kInvalidHashCode indicates that the hash code needs | |
44 | // to be computed. A hash code of kEmptyHashCode is used for empty keys | |
45 | // and for any key whose computed hash code is kInvalidHashCode. | |
46 | static const int32_t kInvalidHashCode = 0; | |
47 | static const int32_t kEmptyHashCode = 1; | |
48 | // The "bogus hash code" replaces a separate fBogus flag. | |
49 | static const int32_t kBogusHashCode = 2; | |
50 | ||
51 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey) | |
52 | ||
53 | CollationKey::CollationKey() | |
54 | : UObject(), fFlagAndLength(0), | |
55 | fHashCode(kEmptyHashCode) | |
56 | { | |
57 | } | |
58 | ||
59 | // Create a collation key from a bit array. | |
60 | CollationKey::CollationKey(const uint8_t* newValues, int32_t count) | |
61 | : UObject(), fFlagAndLength(count), | |
62 | fHashCode(kInvalidHashCode) | |
63 | { | |
64 | if (count < 0 || (newValues == NULL && count != 0) || | |
65 | (count > getCapacity() && reallocate(count, 0) == NULL)) { | |
66 | setToBogus(); | |
67 | return; | |
68 | } | |
69 | ||
70 | if (count > 0) { | |
71 | uprv_memcpy(getBytes(), newValues, count); | |
72 | } | |
73 | } | |
74 | ||
75 | CollationKey::CollationKey(const CollationKey& other) | |
76 | : UObject(other), fFlagAndLength(other.getLength()), | |
77 | fHashCode(other.fHashCode) | |
78 | { | |
79 | if (other.isBogus()) | |
80 | { | |
81 | setToBogus(); | |
82 | return; | |
83 | } | |
84 | ||
85 | int32_t length = fFlagAndLength; | |
86 | if (length > getCapacity() && reallocate(length, 0) == NULL) { | |
87 | setToBogus(); | |
88 | return; | |
89 | } | |
90 | ||
91 | if (length > 0) { | |
92 | uprv_memcpy(getBytes(), other.getBytes(), length); | |
93 | } | |
94 | } | |
95 | ||
96 | CollationKey::~CollationKey() | |
97 | { | |
98 | if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } | |
99 | } | |
100 | ||
101 | uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) { | |
102 | uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity)); | |
103 | if(newBytes == NULL) { return NULL; } | |
104 | if(length > 0) { | |
105 | uprv_memcpy(newBytes, getBytes(), length); | |
106 | } | |
107 | if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } | |
108 | fUnion.fFields.fBytes = newBytes; | |
109 | fUnion.fFields.fCapacity = newCapacity; | |
110 | fFlagAndLength |= 0x80000000; | |
111 | return newBytes; | |
112 | } | |
113 | ||
114 | void CollationKey::setLength(int32_t newLength) { | |
115 | // U_ASSERT(newLength >= 0 && newLength <= getCapacity()); | |
116 | fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength; | |
117 | fHashCode = kInvalidHashCode; | |
118 | } | |
119 | ||
120 | // set the key to an empty state | |
121 | CollationKey& | |
122 | CollationKey::reset() | |
123 | { | |
124 | fFlagAndLength &= 0x80000000; | |
125 | fHashCode = kEmptyHashCode; | |
126 | ||
127 | return *this; | |
128 | } | |
129 | ||
130 | // set the key to a "bogus" or invalid state | |
131 | CollationKey& | |
132 | CollationKey::setToBogus() | |
133 | { | |
134 | fFlagAndLength &= 0x80000000; | |
135 | fHashCode = kBogusHashCode; | |
136 | ||
137 | return *this; | |
138 | } | |
139 | ||
140 | UBool | |
141 | CollationKey::operator==(const CollationKey& source) const | |
142 | { | |
143 | return getLength() == source.getLength() && | |
144 | (this == &source || | |
145 | uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0); | |
146 | } | |
147 | ||
148 | const CollationKey& | |
149 | CollationKey::operator=(const CollationKey& other) | |
150 | { | |
151 | if (this != &other) | |
152 | { | |
153 | if (other.isBogus()) | |
154 | { | |
155 | return setToBogus(); | |
156 | } | |
157 | ||
158 | int32_t length = other.getLength(); | |
159 | if (length > getCapacity() && reallocate(length, 0) == NULL) { | |
160 | return setToBogus(); | |
161 | } | |
162 | if (length > 0) { | |
163 | uprv_memcpy(getBytes(), other.getBytes(), length); | |
164 | } | |
165 | fFlagAndLength = (fFlagAndLength & 0x80000000) | length; | |
166 | fHashCode = other.fHashCode; | |
167 | } | |
168 | ||
169 | return *this; | |
170 | } | |
171 | ||
172 | // Bitwise comparison for the collation keys. | |
173 | Collator::EComparisonResult | |
174 | CollationKey::compareTo(const CollationKey& target) const | |
175 | { | |
176 | UErrorCode errorCode = U_ZERO_ERROR; | |
177 | return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode)); | |
178 | } | |
179 | ||
180 | // Bitwise comparison for the collation keys. | |
181 | UCollationResult | |
182 | CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const | |
183 | { | |
184 | if(U_SUCCESS(status)) { | |
185 | const uint8_t *src = getBytes(); | |
186 | const uint8_t *tgt = target.getBytes(); | |
187 | ||
188 | // are we comparing the same string | |
189 | if (src == tgt) | |
190 | return UCOL_EQUAL; | |
191 | ||
192 | UCollationResult result; | |
193 | ||
194 | // are we comparing different lengths? | |
195 | int32_t minLength = getLength(); | |
196 | int32_t targetLength = target.getLength(); | |
197 | if (minLength < targetLength) { | |
198 | result = UCOL_LESS; | |
199 | } else if (minLength == targetLength) { | |
200 | result = UCOL_EQUAL; | |
201 | } else { | |
202 | minLength = targetLength; | |
203 | result = UCOL_GREATER; | |
204 | } | |
205 | ||
206 | if (minLength > 0) { | |
207 | int diff = uprv_memcmp(src, tgt, minLength); | |
208 | if (diff > 0) { | |
209 | return UCOL_GREATER; | |
210 | } | |
211 | else | |
212 | if (diff < 0) { | |
213 | return UCOL_LESS; | |
214 | } | |
215 | } | |
216 | ||
217 | return result; | |
218 | } else { | |
219 | return UCOL_EQUAL; | |
220 | } | |
221 | } | |
222 | ||
223 | #ifdef U_USE_COLLATION_KEY_DEPRECATES | |
224 | // Create a copy of the byte array. | |
225 | uint8_t* | |
226 | CollationKey::toByteArray(int32_t& count) const | |
227 | { | |
228 | uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount ); | |
229 | ||
230 | if (result == NULL) | |
231 | { | |
232 | count = 0; | |
233 | } | |
234 | else | |
235 | { | |
236 | count = fCount; | |
237 | if (count > 0) { | |
238 | uprv_memcpy(result, fBytes, fCount); | |
239 | } | |
240 | } | |
241 | ||
242 | return result; | |
243 | } | |
244 | #endif | |
245 | ||
246 | static int32_t | |
247 | computeHashCode(const uint8_t *key, int32_t length) { | |
248 | const char *s = reinterpret_cast<const char *>(key); | |
249 | int32_t hash; | |
250 | if (s == NULL || length == 0) { | |
251 | hash = kEmptyHashCode; | |
252 | } else { | |
253 | hash = ustr_hashCharsN(s, length); | |
254 | if (hash == kInvalidHashCode || hash == kBogusHashCode) { | |
255 | hash = kEmptyHashCode; | |
256 | } | |
257 | } | |
258 | return hash; | |
259 | } | |
260 | ||
261 | int32_t | |
262 | CollationKey::hashCode() const | |
263 | { | |
264 | // (Cribbed from UnicodeString) | |
265 | // We cache the hashCode; when it becomes invalid, due to any change to the | |
266 | // string, we note this by setting it to kInvalidHashCode. [LIU] | |
267 | ||
268 | // Note: This method is semantically const, but physically non-const. | |
269 | ||
270 | if (fHashCode == kInvalidHashCode) | |
271 | { | |
272 | fHashCode = computeHashCode(getBytes(), getLength()); | |
273 | } | |
274 | ||
275 | return fHashCode; | |
276 | } | |
277 | ||
278 | U_NAMESPACE_END | |
279 | ||
280 | U_CAPI int32_t U_EXPORT2 | |
281 | ucol_keyHashCode(const uint8_t *key, | |
282 | int32_t length) | |
283 | { | |
284 | return icu::computeHashCode(key, length); | |
285 | } | |
286 | ||
287 | #endif /* #if !UCONFIG_NO_COLLATION */ |