]>
Commit | Line | Data |
---|---|---|
4e4e5a6f A |
1 | /* |
2 | * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved | |
3 | * Copyright (C) Research In Motion Limited 2009. All rights reserved. | |
4 | * | |
5 | * This library is free software; you can redistribute it and/or | |
6 | * modify it under the terms of the GNU Library General Public | |
7 | * License as published by the Free Software Foundation; either | |
8 | * version 2 of the License, or (at your option) any later version. | |
9 | * | |
10 | * This library is distributed in the hope that it will be useful, | |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * Library General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU Library General Public License | |
16 | * along with this library; see the file COPYING.LIB. If not, write to | |
17 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
18 | * Boston, MA 02110-1301, USA. | |
19 | * | |
20 | */ | |
21 | ||
22 | #ifndef StringHash_h | |
23 | #define StringHash_h | |
24 | ||
25 | #include "AtomicString.h" | |
26 | #include "WTFString.h" | |
27 | #include <wtf/HashTraits.h> | |
28 | #include <wtf/StringHashFunctions.h> | |
29 | #include <wtf/unicode/Unicode.h> | |
30 | ||
31 | // FIXME: This is a temporary layering violation while we move string code to WTF. | |
32 | // Landing the file moves in one patch, will follow on with patches to change the namespaces. | |
33 | namespace WebCore { | |
34 | ||
35 | // The hash() functions on StringHash and CaseFoldingHash do not support | |
36 | // null strings. get(), contains(), and add() on HashMap<String,..., StringHash> | |
37 | // cause a null-pointer dereference when passed null strings. | |
38 | ||
39 | // FIXME: We should really figure out a way to put the computeHash function that's | |
40 | // currently a member function of StringImpl into this file so we can be a little | |
41 | // closer to having all the nearly-identical hash functions in one place. | |
42 | ||
43 | struct StringHash { | |
44 | static unsigned hash(StringImpl* key) { return key->hash(); } | |
45 | static bool equal(const StringImpl* a, const StringImpl* b) | |
46 | { | |
47 | if (a == b) | |
48 | return true; | |
49 | if (!a || !b) | |
50 | return false; | |
51 | ||
52 | unsigned aLength = a->length(); | |
53 | unsigned bLength = b->length(); | |
54 | if (aLength != bLength) | |
55 | return false; | |
56 | ||
57 | // FIXME: perhaps we should have a more abstract macro that indicates when | |
58 | // going 4 bytes at a time is unsafe | |
59 | #if CPU(ARM) || CPU(SH4) | |
60 | const UChar* aChars = a->characters(); | |
61 | const UChar* bChars = b->characters(); | |
62 | for (unsigned i = 0; i != aLength; ++i) { | |
63 | if (*aChars++ != *bChars++) | |
64 | return false; | |
65 | } | |
66 | return true; | |
67 | #else | |
68 | /* Do it 4-bytes-at-a-time on architectures where it's safe */ | |
69 | const uint32_t* aChars = reinterpret_cast<const uint32_t*>(a->characters()); | |
70 | const uint32_t* bChars = reinterpret_cast<const uint32_t*>(b->characters()); | |
71 | ||
72 | unsigned halfLength = aLength >> 1; | |
73 | for (unsigned i = 0; i != halfLength; ++i) | |
74 | if (*aChars++ != *bChars++) | |
75 | return false; | |
76 | ||
77 | if (aLength & 1 && *reinterpret_cast<const uint16_t*>(aChars) != *reinterpret_cast<const uint16_t*>(bChars)) | |
78 | return false; | |
79 | ||
80 | return true; | |
81 | #endif | |
82 | } | |
83 | ||
84 | static unsigned hash(const RefPtr<StringImpl>& key) { return key->hash(); } | |
85 | static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b) | |
86 | { | |
87 | return equal(a.get(), b.get()); | |
88 | } | |
89 | ||
90 | static unsigned hash(const String& key) { return key.impl()->hash(); } | |
91 | static bool equal(const String& a, const String& b) | |
92 | { | |
93 | return equal(a.impl(), b.impl()); | |
94 | } | |
95 | ||
96 | static const bool safeToCompareToEmptyOrDeleted = false; | |
97 | }; | |
98 | ||
99 | class CaseFoldingHash { | |
100 | public: | |
101 | // Paul Hsieh's SuperFastHash | |
102 | // http://www.azillionmonkeys.com/qed/hash.html | |
103 | static unsigned hash(const UChar* data, unsigned length) | |
104 | { | |
105 | unsigned l = length; | |
106 | const UChar* s = data; | |
107 | uint32_t hash = WTF::stringHashingStartValue; | |
108 | uint32_t tmp; | |
109 | ||
110 | int rem = l & 1; | |
111 | l >>= 1; | |
112 | ||
113 | // Main loop. | |
114 | for (; l > 0; l--) { | |
115 | hash += WTF::Unicode::foldCase(s[0]); | |
116 | tmp = (WTF::Unicode::foldCase(s[1]) << 11) ^ hash; | |
117 | hash = (hash << 16) ^ tmp; | |
118 | s += 2; | |
119 | hash += hash >> 11; | |
120 | } | |
121 | ||
122 | // Handle end case. | |
123 | if (rem) { | |
124 | hash += WTF::Unicode::foldCase(s[0]); | |
125 | hash ^= hash << 11; | |
126 | hash += hash >> 17; | |
127 | } | |
128 | ||
129 | // Force "avalanching" of final 127 bits. | |
130 | hash ^= hash << 3; | |
131 | hash += hash >> 5; | |
132 | hash ^= hash << 2; | |
133 | hash += hash >> 15; | |
134 | hash ^= hash << 10; | |
135 | ||
136 | // This avoids ever returning a hash code of 0, since that is used to | |
137 | // signal "hash not computed yet", using a value that is likely to be | |
138 | // effectively the same as 0 when the low bits are masked. | |
139 | hash |= !hash << 31; | |
140 | ||
141 | return hash; | |
142 | } | |
143 | ||
144 | static unsigned hash(StringImpl* str) | |
145 | { | |
146 | return hash(str->characters(), str->length()); | |
147 | } | |
148 | ||
149 | static unsigned hash(const char* str, unsigned length) | |
150 | { | |
151 | // This hash is designed to work on 16-bit chunks at a time. But since the normal case | |
152 | // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they | |
153 | // were 16-bit chunks, which will give matching results. | |
154 | ||
155 | unsigned l = length; | |
156 | const char* s = str; | |
157 | uint32_t hash = WTF::stringHashingStartValue; | |
158 | uint32_t tmp; | |
159 | ||
160 | int rem = l & 1; | |
161 | l >>= 1; | |
162 | ||
163 | // Main loop | |
164 | for (; l > 0; l--) { | |
165 | hash += WTF::Unicode::foldCase(s[0]); | |
166 | tmp = (WTF::Unicode::foldCase(s[1]) << 11) ^ hash; | |
167 | hash = (hash << 16) ^ tmp; | |
168 | s += 2; | |
169 | hash += hash >> 11; | |
170 | } | |
171 | ||
172 | // Handle end case | |
173 | if (rem) { | |
174 | hash += WTF::Unicode::foldCase(s[0]); | |
175 | hash ^= hash << 11; | |
176 | hash += hash >> 17; | |
177 | } | |
178 | ||
179 | // Force "avalanching" of final 127 bits | |
180 | hash ^= hash << 3; | |
181 | hash += hash >> 5; | |
182 | hash ^= hash << 2; | |
183 | hash += hash >> 15; | |
184 | hash ^= hash << 10; | |
185 | ||
186 | // this avoids ever returning a hash code of 0, since that is used to | |
187 | // signal "hash not computed yet", using a value that is likely to be | |
188 | // effectively the same as 0 when the low bits are masked | |
189 | hash |= !hash << 31; | |
190 | ||
191 | return hash; | |
192 | } | |
193 | ||
194 | static bool equal(const StringImpl* a, const StringImpl* b) | |
195 | { | |
196 | if (a == b) | |
197 | return true; | |
198 | if (!a || !b) | |
199 | return false; | |
200 | unsigned length = a->length(); | |
201 | if (length != b->length()) | |
202 | return false; | |
203 | return WTF::Unicode::umemcasecmp(a->characters(), b->characters(), length) == 0; | |
204 | } | |
205 | ||
206 | static unsigned hash(const RefPtr<StringImpl>& key) | |
207 | { | |
208 | return hash(key.get()); | |
209 | } | |
210 | ||
211 | static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b) | |
212 | { | |
213 | return equal(a.get(), b.get()); | |
214 | } | |
215 | ||
216 | static unsigned hash(const String& key) | |
217 | { | |
218 | return hash(key.impl()); | |
219 | } | |
220 | static unsigned hash(const AtomicString& key) | |
221 | { | |
222 | return hash(key.impl()); | |
223 | } | |
224 | static bool equal(const String& a, const String& b) | |
225 | { | |
226 | return equal(a.impl(), b.impl()); | |
227 | } | |
228 | static bool equal(const AtomicString& a, const AtomicString& b) | |
229 | { | |
230 | return (a == b) || equal(a.impl(), b.impl()); | |
231 | } | |
232 | ||
233 | static const bool safeToCompareToEmptyOrDeleted = false; | |
234 | }; | |
235 | ||
236 | // This hash can be used in cases where the key is a hash of a string, but we don't | |
237 | // want to store the string. It's not really specific to string hashing, but all our | |
238 | // current uses of it are for strings. | |
239 | struct AlreadyHashed : IntHash<unsigned> { | |
240 | static unsigned hash(unsigned key) { return key; } | |
241 | ||
242 | // To use a hash value as a key for a hash table, we need to eliminate the | |
243 | // "deleted" value, which is negative one. That could be done by changing | |
244 | // the string hash function to never generate negative one, but this works | |
245 | // and is still relatively efficient. | |
246 | static unsigned avoidDeletedValue(unsigned hash) | |
247 | { | |
248 | ASSERT(hash); | |
249 | unsigned newHash = hash | (!(hash + 1) << 31); | |
250 | ASSERT(newHash); | |
251 | ASSERT(newHash != 0xFFFFFFFF); | |
252 | return newHash; | |
253 | } | |
254 | }; | |
255 | ||
256 | } | |
257 | ||
258 | namespace WTF { | |
259 | ||
260 | template<> struct HashTraits<WebCore::String> : GenericHashTraits<WebCore::String> { | |
261 | static const bool emptyValueIsZero = true; | |
262 | static void constructDeletedValue(WebCore::String& slot) { new (&slot) WebCore::String(HashTableDeletedValue); } | |
263 | static bool isDeletedValue(const WebCore::String& slot) { return slot.isHashTableDeletedValue(); } | |
264 | }; | |
265 | ||
266 | } | |
267 | ||
268 | #endif |