]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (C) 2005, 2006, 2008, 2010 Apple Inc. All rights reserved. | |
3 | * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> | |
4 | * | |
5 | * This library is free software; you can redistribute it and/or | |
6 | * modify it under the terms of the GNU Library General Public | |
7 | * License as published by the Free Software Foundation; either | |
8 | * version 2 of the License, or (at your option) any later version. | |
9 | * | |
10 | * This library is distributed in the hope that it will be useful, | |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * Library General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU Library General Public License | |
16 | * along with this library; see the file COPYING.LIB. If not, write to | |
17 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
18 | * Boston, MA 02110-1301, USA. | |
19 | * | |
20 | */ | |
21 | #ifndef WTF_StringHasher_h | |
22 | #define WTF_StringHasher_h | |
23 | ||
24 | #include <wtf/unicode/Unicode.h> | |
25 | ||
26 | namespace WTF { | |
27 | ||
28 | // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's | |
29 | static const unsigned stringHashingStartValue = 0x9e3779b9U; | |
30 | ||
31 | // Paul Hsieh's SuperFastHash | |
32 | // http://www.azillionmonkeys.com/qed/hash.html | |
33 | // char* data is interpreted as latin-encoded (zero extended to 16 bits). | |
34 | class StringHasher { | |
35 | public: | |
36 | inline StringHasher() | |
37 | : m_hash(stringHashingStartValue) | |
38 | , m_hasPendingCharacter(false) | |
39 | , m_pendingCharacter(0) | |
40 | { | |
41 | } | |
42 | ||
43 | inline void addCharacters(UChar a, UChar b) | |
44 | { | |
45 | ASSERT(!m_hasPendingCharacter); | |
46 | addCharactersToHash(a, b); | |
47 | } | |
48 | ||
49 | inline void addCharacter(UChar ch) | |
50 | { | |
51 | if (m_hasPendingCharacter) { | |
52 | addCharactersToHash(m_pendingCharacter, ch); | |
53 | m_hasPendingCharacter = false; | |
54 | return; | |
55 | } | |
56 | ||
57 | m_pendingCharacter = ch; | |
58 | m_hasPendingCharacter = true; | |
59 | } | |
60 | ||
61 | inline unsigned hash() const | |
62 | { | |
63 | unsigned result = m_hash; | |
64 | ||
65 | // Handle end case. | |
66 | if (m_hasPendingCharacter) { | |
67 | result += m_pendingCharacter; | |
68 | result ^= result << 11; | |
69 | result += result >> 17; | |
70 | } | |
71 | ||
72 | // Force "avalanching" of final 31 bits. | |
73 | result ^= result << 3; | |
74 | result += result >> 5; | |
75 | result ^= result << 2; | |
76 | result += result >> 15; | |
77 | result ^= result << 10; | |
78 | ||
79 | // First bit is used in UStringImpl for m_isIdentifier. | |
80 | result &= 0x7fffffff; | |
81 | ||
82 | // This avoids ever returning a hash code of 0, since that is used to | |
83 | // signal "hash not computed yet", using a value that is likely to be | |
84 | // effectively the same as 0 when the low bits are masked. | |
85 | if (!result) | |
86 | return 0x40000000; | |
87 | ||
88 | return result; | |
89 | } | |
90 | ||
91 | template<typename T, UChar Converter(T)> static inline unsigned computeHash(const T* data, unsigned length) | |
92 | { | |
93 | StringHasher hasher; | |
94 | bool rem = length & 1; | |
95 | length >>= 1; | |
96 | ||
97 | while (length--) { | |
98 | hasher.addCharacters(Converter(data[0]), Converter(data[1])); | |
99 | data += 2; | |
100 | } | |
101 | ||
102 | if (rem) | |
103 | hasher.addCharacter(Converter(*data)); | |
104 | ||
105 | return hasher.hash(); | |
106 | } | |
107 | ||
108 | template<typename T, UChar Converter(T)> static inline unsigned computeHash(const T* data) | |
109 | { | |
110 | StringHasher hasher; | |
111 | ||
112 | while (true) { | |
113 | UChar b0 = Converter(*data++); | |
114 | if (!b0) | |
115 | break; | |
116 | UChar b1 = Converter(*data++); | |
117 | if (!b1) { | |
118 | hasher.addCharacter(b0); | |
119 | break; | |
120 | } | |
121 | ||
122 | hasher.addCharacters(b0, b1); | |
123 | } | |
124 | ||
125 | return hasher.hash(); | |
126 | } | |
127 | ||
128 | template<typename T> static inline unsigned computeHash(const T* data, unsigned length) | |
129 | { | |
130 | return computeHash<T, defaultCoverter>(data, length); | |
131 | } | |
132 | ||
133 | template<typename T> static inline unsigned computeHash(const T* data) | |
134 | { | |
135 | return computeHash<T, defaultCoverter>(data); | |
136 | } | |
137 | ||
138 | template<size_t length> static inline unsigned hashMemory(const void* data) | |
139 | { | |
140 | COMPILE_ASSERT(!(length % 4), length_must_be_a_multible_of_four); | |
141 | return computeHash<UChar>(static_cast<const UChar*>(data), length / sizeof(UChar)); | |
142 | } | |
143 | ||
144 | static inline unsigned hashMemory(const void* data, unsigned size) | |
145 | { | |
146 | ASSERT(!(size % 2)); | |
147 | return computeHash<UChar>(static_cast<const UChar*>(data), size / sizeof(UChar)); | |
148 | } | |
149 | ||
150 | private: | |
151 | static inline UChar defaultCoverter(UChar ch) | |
152 | { | |
153 | return ch; | |
154 | } | |
155 | ||
156 | static inline UChar defaultCoverter(char ch) | |
157 | { | |
158 | return static_cast<unsigned char>(ch); | |
159 | } | |
160 | ||
161 | inline void addCharactersToHash(UChar a, UChar b) | |
162 | { | |
163 | m_hash += a; | |
164 | unsigned tmp = (b << 11) ^ m_hash; | |
165 | m_hash = (m_hash << 16) ^ tmp; | |
166 | m_hash += m_hash >> 11; | |
167 | } | |
168 | ||
169 | unsigned m_hash; | |
170 | bool m_hasPendingCharacter; | |
171 | UChar m_pendingCharacter; | |
172 | }; | |
173 | ||
174 | } // namespace WTF | |
175 | ||
176 | using WTF::StringHasher; | |
177 | ||
178 | #endif // WTF_StringHasher_h |