[apple/javascriptcore.git] / wtf / StringHasher.h

/*
 * Copyright (C) 2005, 2006, 2008, 2010 Apple Inc. All rights reserved.
 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public License
 * along with this library; see the file COPYING.LIB.  If not, write to
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 *
 */
#ifndef WTF_StringHasher_h
#define WTF_StringHasher_h

#include <wtf/unicode/Unicode.h>

namespace WTF {

// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
static const unsigned stringHashingStartValue = 0x9e3779b9U;

// Paul Hsieh's SuperFastHash
// http://www.azillionmonkeys.com/qed/hash.html
// char* data is interpreted as latin-encoded (zero extended to 16 bits).
class StringHasher {
public:
    inline StringHasher()
        : m_hash(stringHashingStartValue)
        , m_hasPendingCharacter(false)
        , m_pendingCharacter(0)
    {
    }

    inline void addCharacters(UChar a, UChar b)
    {
        ASSERT(!m_hasPendingCharacter);
        addCharactersToHash(a, b);
    }

    inline void addCharacter(UChar ch)
    {
        if (m_hasPendingCharacter) {
            addCharactersToHash(m_pendingCharacter, ch);
            m_hasPendingCharacter = false;
            return;
        }

        m_pendingCharacter = ch;
        m_hasPendingCharacter = true;
    }

    inline unsigned hash() const
    {
        unsigned result = m_hash;

        // Handle end case.
        if (m_hasPendingCharacter) {
            result += m_pendingCharacter;
            result ^= result << 11;
            result += result >> 17;
        }

        // Force "avalanching" of final 31 bits.
        result ^= result << 3;
        result += result >> 5;
        result ^= result << 2;
        result += result >> 15;
        result ^= result << 10;

        // First bit is used in UStringImpl for m_isIdentifier.
        result &= 0x7fffffff;

        // This avoids ever returning a hash code of 0, since that is used to
        // signal "hash not computed yet", using a value that is likely to be
        // effectively the same as 0 when the low bits are masked.
        if (!result)
            return 0x40000000;

        return result;
    }

    template<typename T, UChar Converter(T)> static inline unsigned computeHash(const T* data, unsigned length)
    {
        StringHasher hasher;
        bool rem = length & 1;
        length >>= 1;

        while (length--) {
            hasher.addCharacters(Converter(data[0]), Converter(data[1]));
            data += 2;
        }

        if (rem)
            hasher.addCharacter(Converter(*data));

        return hasher.hash();
    }

    template<typename T, UChar Converter(T)> static inline unsigned computeHash(const T* data)
    {
        StringHasher hasher;

        while (true) {
            UChar b0 = Converter(*data++);
            if (!b0)
                break;
            UChar b1 = Converter(*data++);
            if (!b1) {
                hasher.addCharacter(b0);
                break;
            }

            hasher.addCharacters(b0, b1);
        }

        return hasher.hash();
    }

    template<typename T> static inline unsigned computeHash(const T* data, unsigned length)
    {
        return computeHash<T, defaultCoverter>(data, length);
    }

    template<typename T> static inline unsigned computeHash(const T* data)
    {
        return computeHash<T, defaultCoverter>(data);
    }

    template<size_t length> static inline unsigned hashMemory(const void* data)
    {
        COMPILE_ASSERT(!(length % 4), length_must_be_a_multible_of_four);
        return computeHash<UChar>(static_cast<const UChar*>(data), length / sizeof(UChar));
    }

    static inline unsigned hashMemory(const void* data, unsigned size)
    {
        ASSERT(!(size % 2));
        return computeHash<UChar>(static_cast<const UChar*>(data), size / sizeof(UChar));
    }

private:
    static inline UChar defaultCoverter(UChar ch)
    {
        return ch;
    }

    static inline UChar defaultCoverter(char ch)
    {
        return static_cast<unsigned char>(ch);
    }

    inline void addCharactersToHash(UChar a, UChar b)
    {
        m_hash += a;
        unsigned tmp = (b << 11) ^ m_hash;
        m_hash = (m_hash << 16) ^ tmp;
        m_hash += m_hash >> 11;
    }

    unsigned m_hash;
    bool m_hasPendingCharacter;
    UChar m_pendingCharacter;
};

} // namespace WTF

using WTF::StringHasher;

#endif // WTF_StringHasher_h
Commit	Line	Data
14957cd0 A	1	/*
	2	* Copyright (C) 2005, 2006, 2008, 2010 Apple Inc. All rights reserved.
	3	* Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
	4	*
	5	* This library is free software; you can redistribute it and/or
	6	* modify it under the terms of the GNU Library General Public
	7	* License as published by the Free Software Foundation; either
	8	* version 2 of the License, or (at your option) any later version.
	9	*
	10	* This library is distributed in the hope that it will be useful,
	11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	13	* Library General Public License for more details.
	14	*
	15	* You should have received a copy of the GNU Library General Public License
	16	* along with this library; see the file COPYING.LIB. If not, write to
	17	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
	18	* Boston, MA 02110-1301, USA.
	19	*
	20	*/
	21	#ifndef WTF_StringHasher_h
	22	#define WTF_StringHasher_h
	23
	24	#include <wtf/unicode/Unicode.h>
	25
	26	namespace WTF {
	27
	28	// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
	29	static const unsigned stringHashingStartValue = 0x9e3779b9U;
	30
	31	// Paul Hsieh's SuperFastHash
	32	// http://www.azillionmonkeys.com/qed/hash.html
	33	// char* data is interpreted as latin-encoded (zero extended to 16 bits).
	34	class StringHasher {
	35	public:
	36	inline StringHasher()
	37	: m_hash(stringHashingStartValue)
	38	, m_hasPendingCharacter(false)
	39	, m_pendingCharacter(0)
	40	{
	41	}
	42
	43	inline void addCharacters(UChar a, UChar b)
	44	{
	45	ASSERT(!m_hasPendingCharacter);
	46	addCharactersToHash(a, b);
	47	}
	48
	49	inline void addCharacter(UChar ch)
	50	{
	51	if (m_hasPendingCharacter) {
	52	addCharactersToHash(m_pendingCharacter, ch);
	53	m_hasPendingCharacter = false;
	54	return;
	55	}
	56
	57	m_pendingCharacter = ch;
	58	m_hasPendingCharacter = true;
	59	}
	60
	61	inline unsigned hash() const
	62	{
	63	unsigned result = m_hash;
	64
65	// Handle end case.
66	if (m_hasPendingCharacter) {
67	result += m_pendingCharacter;
68	result ^= result << 11;
69	result += result >> 17;
70	}
71
72	// Force "avalanching" of final 31 bits.
73	result ^= result << 3;
74	result += result >> 5;
75	result ^= result << 2;
76	result += result >> 15;
77	result ^= result << 10;
78
79	// First bit is used in UStringImpl for m_isIdentifier.
80	result &= 0x7fffffff;
81
82	// This avoids ever returning a hash code of 0, since that is used to
83	// signal "hash not computed yet", using a value that is likely to be
84	// effectively the same as 0 when the low bits are masked.
85	if (!result)
86	return 0x40000000;
87
88	return result;
89	}
90
91	template<typename T, UChar Converter(T)> static inline unsigned computeHash(const T* data, unsigned length)
92	{
93	StringHasher hasher;
94	bool rem = length & 1;
95	length >>= 1;
96
97	while (length--) {
98	hasher.addCharacters(Converter(data[0]), Converter(data[1]));
99	data += 2;
100	}
101
102	if (rem)
103	hasher.addCharacter(Converter(*data));
104
105	return hasher.hash();
106	}
107
108	template<typename T, UChar Converter(T)> static inline unsigned computeHash(const T* data)
109	{
110	StringHasher hasher;
111
112	while (true) {
113	UChar b0 = Converter(*data++);
114	if (!b0)
115	break;
116	UChar b1 = Converter(*data++);
117	if (!b1) {
118	hasher.addCharacter(b0);
119	break;
120	}
121
122	hasher.addCharacters(b0, b1);
123	}
124
125	return hasher.hash();
126	}
127
128	template<typename T> static inline unsigned computeHash(const T* data, unsigned length)
129	{
130	return computeHash<T, defaultCoverter>(data, length);
131	}
132
133	template<typename T> static inline unsigned computeHash(const T* data)
134	{
135	return computeHash<T, defaultCoverter>(data);
136	}
137
138	template<size_t length> static inline unsigned hashMemory(const void* data)
139	{
140	COMPILE_ASSERT(!(length % 4), length_must_be_a_multible_of_four);
141	return computeHash<UChar>(static_cast<const UChar*>(data), length / sizeof(UChar));
142	}
143
144	static inline unsigned hashMemory(const void* data, unsigned size)
145	{
146	ASSERT(!(size % 2));
147	return computeHash<UChar>(static_cast<const UChar*>(data), size / sizeof(UChar));
148	}
149
150	private:
151	static inline UChar defaultCoverter(UChar ch)
152	{
153	return ch;
154	}
155
156	static inline UChar defaultCoverter(char ch)
157	{
158	return static_cast<unsigned char>(ch);
159	}
160
161	inline void addCharactersToHash(UChar a, UChar b)
162	{
163	m_hash += a;
164	unsigned tmp = (b << 11) ^ m_hash;
165	m_hash = (m_hash << 16) ^ tmp;
166	m_hash += m_hash >> 11;
167	}
168
169	unsigned m_hash;
170	bool m_hasPendingCharacter;
171	UChar m_pendingCharacter;
172	};
173
174	} // namespace WTF
175
176	using WTF::StringHasher;
177
178	#endif // WTF_StringHasher_h