WebCore/StringHash.h

   1 /*
   2  * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved
   3  *
   4  * This library is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Library General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Library General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Library General Public License
  15  * along with this library; see the file COPYING.LIB.  If not, write to
  16  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  17  * Boston, MA 02110-1301, USA.
  18  *
  19  */
  20
  21 #ifndef StringHash_h
  22 #define StringHash_h
  23
  24 #include "AtomicString.h"
  25 #include "PlatformString.h"
  26 #include <wtf/HashFunctions.h>
  27 #include <wtf/HashTraits.h>
  28 #include <wtf/unicode/Unicode.h>
  29
  30 namespace WebCore {
  31
  32     // FIXME: We should really figure out a way to put the computeHash function that's
  33     // currently a member function of StringImpl into this file so we can be a little
  34     // closer to having all the nearly-identical hash functions in one place.
  35
  36     struct StringHash {
  37         static unsigned hash(StringImpl* key) { return key->hash(); }
  38         static bool equal(StringImpl* a, StringImpl* b)
  39         {
  40             if (a == b)
  41                 return true;
  42             if (!a || !b)
  43                 return false;
  44
  45             unsigned aLength = a->length();
  46             unsigned bLength = b->length();
  47             if (aLength != bLength)
  48                 return false;
  49
  50             const uint32_t* aChars = reinterpret_cast<const uint32_t*>(a->characters());
  51             const uint32_t* bChars = reinterpret_cast<const uint32_t*>(b->characters());
  52
  53             unsigned halfLength = aLength >> 1;
  54             for (unsigned i = 0; i != halfLength; ++i)
  55                 if (*aChars++ != *bChars++)
  56                     return false;
  57
  58             if (aLength & 1 && *reinterpret_cast<const uint16_t*>(aChars) != *reinterpret_cast<const uint16_t*>(bChars))
  59                 return false;
  60
  61             return true;
  62         }
  63
  64         static unsigned hash(const RefPtr<StringImpl>& key) { return key->hash(); }
  65         static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b)
  66         {
  67             return equal(a.get(), b.get());
  68         }
  69
  70         static unsigned hash(const String& key) { return key.impl()->hash(); }
  71         static bool equal(const String& a, const String& b)
  72         {
  73             return equal(a.impl(), b.impl());
  74         }
  75
  76         static const bool safeToCompareToEmptyOrDeleted = false;
  77     };
  78
  79     class CaseFoldingHash {
  80     public:
  81         // Paul Hsieh's SuperFastHash
  82         // http://www.azillionmonkeys.com/qed/hash.html
  83         static unsigned hash(const UChar* data, unsigned length)
  84         {
  85             unsigned l = length;
  86             const UChar* s = data;
  87             uint32_t hash = WTF::stringHashingStartValue;
  88             uint32_t tmp;
  89
  90             int rem = l & 1;
  91             l >>= 1;
  92
  93             // Main loop.
  94             for (; l > 0; l--) {
  95                 hash += WTF::Unicode::foldCase(s[0]);
  96                 tmp = (WTF::Unicode::foldCase(s[1]) << 11) ^ hash;
  97                 hash = (hash << 16) ^ tmp;
  98                 s += 2;
  99                 hash += hash >> 11;
 100             }
 101
 102             // Handle end case.
 103             if (rem) {
 104                 hash += WTF::Unicode::foldCase(s[0]);
 105                 hash ^= hash << 11;
 106                 hash += hash >> 17;
 107             }
 108
 109             // Force "avalanching" of final 127 bits.
 110             hash ^= hash << 3;
 111             hash += hash >> 5;
 112             hash ^= hash << 2;
 113             hash += hash >> 15;
 114             hash ^= hash << 10;
 115
 116             // This avoids ever returning a hash code of 0, since that is used to
 117             // signal "hash not computed yet", using a value that is likely to be
 118             // effectively the same as 0 when the low bits are masked.
 119             hash |= !hash << 31;
 120
 121             return hash;
 122         }
 123
 124         static unsigned hash(StringImpl* str)
 125         {
 126             return hash(str->characters(), str->length());
 127         }
 128
 129         static unsigned hash(const char* str, unsigned length)
 130         {
 131             // This hash is designed to work on 16-bit chunks at a time. But since the normal case
 132             // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
 133             // were 16-bit chunks, which will give matching results.
 134
 135             unsigned l = length;
 136             const char* s = str;
 137             uint32_t hash = WTF::stringHashingStartValue;
 138             uint32_t tmp;
 139
 140             int rem = l & 1;
 141             l >>= 1;
 142
 143             // Main loop
 144             for (; l > 0; l--) {
 145                 hash += WTF::Unicode::foldCase(s[0]);
 146                 tmp = (WTF::Unicode::foldCase(s[1]) << 11) ^ hash;
 147                 hash = (hash << 16) ^ tmp;
 148                 s += 2;
 149                 hash += hash >> 11;
 150             }
 151
 152             // Handle end case
 153             if (rem) {
 154                 hash += WTF::Unicode::foldCase(s[0]);
 155                 hash ^= hash << 11;
 156                 hash += hash >> 17;
 157             }
 158
 159             // Force "avalanching" of final 127 bits
 160             hash ^= hash << 3;
 161             hash += hash >> 5;
 162             hash ^= hash << 2;
 163             hash += hash >> 15;
 164             hash ^= hash << 10;
 165
 166             // this avoids ever returning a hash code of 0, since that is used to
 167             // signal "hash not computed yet", using a value that is likely to be
 168             // effectively the same as 0 when the low bits are masked
 169             hash |= !hash << 31;
 170
 171             return hash;
 172         }
 173
 174         static bool equal(StringImpl* a, StringImpl* b)
 175         {
 176             if (a == b)
 177                 return true;
 178             if (!a || !b)
 179                 return false;
 180             unsigned length = a->length();
 181             if (length != b->length())
 182                 return false;
 183             return WTF::Unicode::umemcasecmp(a->characters(), b->characters(), length) == 0;
 184         }
 185
 186         static unsigned hash(const RefPtr<StringImpl>& key)
 187         {
 188             return hash(key.get());
 189         }
 190
 191         static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b)
 192         {
 193             return equal(a.get(), b.get());
 194         }
 195
 196         static unsigned hash(const String& key)
 197         {
 198             return hash(key.impl());
 199         }
 200         static unsigned hash(const AtomicString& key)
 201         {
 202             return hash(key.impl());
 203         }
 204         static bool equal(const String& a, const String& b)
 205         {
 206             return equal(a.impl(), b.impl());
 207         }
 208         static bool equal(const AtomicString& a, const AtomicString& b)
 209         {
 210             return (a == b) || equal(a.impl(), b.impl());
 211         }
 212
 213         static const bool safeToCompareToEmptyOrDeleted = false;
 214     };
 215
 216     // This hash can be used in cases where the key is a hash of a string, but we don't
 217     // want to store the string. It's not really specific to string hashing, but all our
 218     // current uses of it are for strings.
 219     struct AlreadyHashed : IntHash<unsigned> {
 220         static unsigned hash(unsigned key) { return key; }
 221
 222         // To use a hash value as a key for a hash table, we need to eliminate the
 223         // "deleted" value, which is negative one. That could be done by changing
 224         // the string hash function to never generate negative one, but this works
 225         // and is still relatively efficient.
 226         static unsigned avoidDeletedValue(unsigned hash)
 227         {
 228             ASSERT(hash);
 229             unsigned newHash = hash | (!(hash + 1) << 31);
 230             ASSERT(newHash);
 231             ASSERT(newHash != 0xFFFFFFFF);
 232             return newHash;
 233         }
 234     };
 235
 236 }
 237
 238 namespace WTF {
 239
 240     template<> struct HashTraits<WebCore::String> : GenericHashTraits<WebCore::String> {
 241         static const bool emptyValueIsZero = true;
 242         static void constructDeletedValue(WebCore::String& slot) { new (&slot) WebCore::String(HashTableDeletedValue); }
 243 #if 0
 244         static bool isDeletedValue(const WebCore::String& slot) { return slot.isHashTableDeletedValue(); }
 245 #endif
 246     };
 247
 248 }
 249
 250 #endif