]>
git.saurik.com Git - apple/network_cmds.git/blob - unbound/util/storage/lookup3.c
2 February 2013(Wouter) patch defines for BSD endianness, from Brad Smith.
3 January 2012(Wouter) added randomised initial value, fallout from 28c3.
4 March 2007(Wouter) adapted from lookup3.c original, add config.h include.
5 added #ifdef VALGRIND to remove 298,384,660 'unused variable k8' warnings.
6 added include of lookup3.h to check definitions match declarations.
7 removed include of stdint - config.h takes care of platform independence.
8 url http://burtleburtle.net/bob/hash/index.html.
11 -------------------------------------------------------------------------------
12 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
14 These are functions for producing 32-bit hashes for hash table lookup.
15 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
16 are externally useful functions. Routines to test the hash are included
17 if SELF_TEST is defined. You can use this free for any purpose. It's in
18 the public domain. It has no warranty.
20 You probably want to use hashlittle(). hashlittle() and hashbig()
21 hash byte arrays. hashlittle() is is faster than hashbig() on
22 little-endian machines. Intel and AMD are little-endian machines.
23 On second thought, you probably want hashlittle2(), which is identical to
24 hashlittle() except it returns two 32-bit hashes for the price of one.
25 You could implement hashbig2() if you wanted but I haven't bothered here.
27 If you want to find a hash of, say, exactly 7 integers, do
28 a = i1; b = i2; c = i3;
30 a += i4; b += i5; c += i6;
34 then use c as the hash value. If you have a variable length array of
35 4-byte integers to hash, use hashword(). If you have a byte array (like
36 a character string), use hashlittle(). If you have several byte arrays, or
37 a mix of things, see the comments above hashlittle().
39 Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
40 then mix those integers. This is fast (you can do a lot more thorough
41 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
42 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
43 -------------------------------------------------------------------------------
45 /*#define SELF_TEST 1*/
48 #include "util/storage/lookup3.h"
49 #include <stdio.h> /* defines printf for tests */
50 #include <time.h> /* defines time_t for timings in the test */
51 /*#include <stdint.h> defines uint32_t etc (from config.h) */
52 #include <sys/param.h> /* attempt to define endianness */
53 #ifdef HAVE_SYS_TYPES_H
54 # include <sys/types.h> /* attempt to define endianness (solaris) */
56 #if defined(linux) || defined(__OpenBSD__)
58 # include <endian.h> /* attempt to define endianness */
60 # include <machine/endian.h> /* on older OpenBSD */
63 #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
64 #include <sys/endian.h> /* attempt to define endianness */
67 /* random initial value */
68 static uint32_t raninit
= (uint32_t)0xdeadbeef;
71 hash_set_raninit(uint32_t v
)
77 * My best guess at if you are big-endian or little-endian. This may
80 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
81 __BYTE_ORDER == __LITTLE_ENDIAN) || \
82 (defined(i386) || defined(__i386__) || defined(__i486__) || \
83 defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL) || defined(__x86))
84 # define HASH_LITTLE_ENDIAN 1
85 # define HASH_BIG_ENDIAN 0
86 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
87 __BYTE_ORDER == __BIG_ENDIAN) || \
88 (defined(sparc) || defined(__sparc) || defined(__sparc__) || defined(POWERPC) || defined(mc68000) || defined(sel))
89 # define HASH_LITTLE_ENDIAN 0
90 # define HASH_BIG_ENDIAN 1
91 #elif defined(_MACHINE_ENDIAN_H_)
92 /* test for machine_endian_h protects failure if some are empty strings */
93 # if defined(_BYTE_ORDER) && defined(_BIG_ENDIAN) && _BYTE_ORDER == _BIG_ENDIAN
94 # define HASH_LITTLE_ENDIAN 0
95 # define HASH_BIG_ENDIAN 1
97 # if defined(_BYTE_ORDER) && defined(_LITTLE_ENDIAN) && _BYTE_ORDER == _LITTLE_ENDIAN
98 # define HASH_LITTLE_ENDIAN 1
99 # define HASH_BIG_ENDIAN 0
100 # endif /* _MACHINE_ENDIAN_H_ */
102 # define HASH_LITTLE_ENDIAN 0
103 # define HASH_BIG_ENDIAN 0
106 #define hashsize(n) ((uint32_t)1<<(n))
107 #define hashmask(n) (hashsize(n)-1)
108 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
111 -------------------------------------------------------------------------------
112 mix -- mix 3 32-bit values reversibly.
114 This is reversible, so any information in (a,b,c) before mix() is
115 still in (a,b,c) after mix().
117 If four pairs of (a,b,c) inputs are run through mix(), or through
118 mix() in reverse, there are at least 32 bits of the output that
119 are sometimes the same for one pair and different for another pair.
121 * pairs that differed by one bit, by two bits, in any combination
122 of top bits of (a,b,c), or in any combination of bottom bits of
124 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
125 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
126 is commonly produced by subtraction) look like a single 1-bit
128 * the base values were pseudorandom, all zero but one bit set, or
129 all zero plus a counter that starts at zero.
131 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
136 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
137 for "differ" defined as + with a one-bit base and a two-bit delta. I
138 used http://burtleburtle.net/bob/hash/avalanche.html to choose
139 the operations, constants, and arrangements of the variables.
141 This does not achieve avalanche. There are input bits of (a,b,c)
142 that fail to affect some output bits of (a,b,c), especially of a. The
143 most thoroughly mixed value is c, but it doesn't really even achieve
146 This allows some parallelism. Read-after-writes are good at doubling
147 the number of bits affected, so the goal of mixing pulls in the opposite
148 direction as the goal of parallelism. I did what I could. Rotates
149 seem to cost as much as shifts on every machine I could lay my hands
150 on, and rotates are much kinder to the top and bottom bits, so I used
152 -------------------------------------------------------------------------------
156 a -= c; a ^= rot(c, 4); c += b; \
157 b -= a; b ^= rot(a, 6); a += c; \
158 c -= b; c ^= rot(b, 8); b += a; \
159 a -= c; a ^= rot(c,16); c += b; \
160 b -= a; b ^= rot(a,19); a += c; \
161 c -= b; c ^= rot(b, 4); b += a; \
165 -------------------------------------------------------------------------------
166 final -- final mixing of 3 32-bit values (a,b,c) into c
168 Pairs of (a,b,c) values differing in only a few bits will usually
169 produce values of c that look totally different. This was tested for
170 * pairs that differed by one bit, by two bits, in any combination
171 of top bits of (a,b,c), or in any combination of bottom bits of
173 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
174 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
175 is commonly produced by subtraction) look like a single 1-bit
177 * the base values were pseudorandom, all zero but one bit set, or
178 all zero plus a counter that starts at zero.
180 These constants passed:
183 and these came close:
187 -------------------------------------------------------------------------------
189 #define final(a,b,c) \
191 c ^= b; c -= rot(b,14); \
192 a ^= c; a -= rot(c,11); \
193 b ^= a; b -= rot(a,25); \
194 c ^= b; c -= rot(b,16); \
195 a ^= c; a -= rot(c,4); \
196 b ^= a; b -= rot(a,14); \
197 c ^= b; c -= rot(b,24); \
201 --------------------------------------------------------------------
202 This works on all machines. To be useful, it requires
203 -- that the key be an array of uint32_t's, and
204 -- that the length be the number of uint32_t's in the key
206 The function hashword() is identical to hashlittle() on little-endian
207 machines, and identical to hashbig() on big-endian machines,
208 except that the length has to be measured in uint32_ts rather than in
209 bytes. hashlittle() is more complicated than hashword() only because
210 hashlittle() has to dance around fitting the key bytes into registers.
211 --------------------------------------------------------------------
214 const uint32_t *k
, /* the key, an array of uint32_t values */
215 size_t length
, /* the length of the key, in uint32_ts */
216 uint32_t initval
) /* the previous hash, or an arbitrary value */
220 /* Set up the internal state */
221 a
= b
= c
= raninit
+ (((uint32_t)length
)<<2) + initval
;
223 /*------------------------------------------------- handle most of the key */
234 /*------------------------------------------- handle the last 3 uint32_t's */
235 switch(length
) /* all the case statements fall through */
241 case 0: /* case 0: nothing left to add */
244 /*------------------------------------------------------ report the result */
252 --------------------------------------------------------------------
253 hashword2() -- same as hashword(), but take two seeds and return two
254 32-bit values. pc and pb must both be nonnull, and *pc and *pb must
255 both be initialized with seeds. If you pass in (*pb)==0, the output
256 (*pc) will be the same as the return value from hashword().
257 --------------------------------------------------------------------
260 const uint32_t *k
, /* the key, an array of uint32_t values */
261 size_t length
, /* the length of the key, in uint32_ts */
262 uint32_t *pc
, /* IN: seed OUT: primary hash value */
263 uint32_t *pb
) /* IN: more seed OUT: secondary hash value */
267 /* Set up the internal state */
268 a
= b
= c
= raninit
+ ((uint32_t)(length
<<2)) + *pc
;
271 /*------------------------------------------------- handle most of the key */
282 /*------------------------------------------- handle the last 3 uint32_t's */
283 switch(length
) /* all the case statements fall through */
289 case 0: /* case 0: nothing left to add */
292 /*------------------------------------------------------ report the result */
296 #endif /* SELF_TEST */
299 -------------------------------------------------------------------------------
300 hashlittle() -- hash a variable-length key into a 32-bit value
301 k : the key (the unaligned variable-length array of bytes)
302 length : the length of the key, counting by bytes
303 initval : can be any 4-byte value
304 Returns a 32-bit value. Every bit of the key affects every bit of
305 the return value. Two keys differing by one or two bits will have
306 totally different hash values.
308 The best hash table sizes are powers of 2. There is no need to do
309 mod a prime (mod is sooo slow!). If you need less than 32 bits,
310 use a bitmask. For example, if you need only 10 bits, do
311 h = (h & hashmask(10));
312 In which case, the hash table should have hashsize(10) elements.
314 If you are hashing n strings (uint8_t **)k, do it like this:
315 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
317 By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
318 code any way you wish, private, educational, or commercial. It's free.
320 Use for hash table lookup, or anything where one collision in 2^^32 is
321 acceptable. Do NOT use for cryptographic purposes.
322 -------------------------------------------------------------------------------
325 uint32_t hashlittle( const void *key
, size_t length
, uint32_t initval
)
327 uint32_t a
,b
,c
; /* internal state */
328 union { const void *ptr
; size_t i
; } u
; /* needed for Mac Powerbook G4 */
330 /* Set up the internal state */
331 a
= b
= c
= raninit
+ ((uint32_t)length
) + initval
;
334 if (HASH_LITTLE_ENDIAN
&& ((u
.i
& 0x3) == 0)) {
335 const uint32_t *k
= (const uint32_t *)key
; /* read 32-bit chunks */
340 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
351 /*----------------------------- handle the last (probably partial) block */
353 * "k[2]&0xffffff" actually reads beyond the end of the string, but
354 * then masks off the part it's not allowed to read. Because the
355 * string is aligned, the masked-off tail is in the same word as the
356 * rest of the string. Every machine with memory protection I've seen
357 * does it on word boundaries, so is OK with this. But VALGRIND will
358 * still catch it and complain. The masking trick does make the hash
359 * noticably faster for short strings (like English words).
365 case 12: c
+=k
[2]; b
+=k
[1]; a
+=k
[0]; break;
366 case 11: c
+=k
[2]&0xffffff; b
+=k
[1]; a
+=k
[0]; break;
367 case 10: c
+=k
[2]&0xffff; b
+=k
[1]; a
+=k
[0]; break;
368 case 9 : c
+=k
[2]&0xff; b
+=k
[1]; a
+=k
[0]; break;
369 case 8 : b
+=k
[1]; a
+=k
[0]; break;
370 case 7 : b
+=k
[1]&0xffffff; a
+=k
[0]; break;
371 case 6 : b
+=k
[1]&0xffff; a
+=k
[0]; break;
372 case 5 : b
+=k
[1]&0xff; a
+=k
[0]; break;
373 case 4 : a
+=k
[0]; break;
374 case 3 : a
+=k
[0]&0xffffff; break;
375 case 2 : a
+=k
[0]&0xffff; break;
376 case 1 : a
+=k
[0]&0xff; break;
377 case 0 : return c
; /* zero length strings require no mixing */
380 #else /* make valgrind happy */
382 k8
= (const uint8_t *)k
;
385 case 12: c
+=k
[2]; b
+=k
[1]; a
+=k
[0]; break;
386 case 11: c
+=((uint32_t)k8
[10])<<16; /* fall through */
387 case 10: c
+=((uint32_t)k8
[9])<<8; /* fall through */
388 case 9 : c
+=k8
[8]; /* fall through */
389 case 8 : b
+=k
[1]; a
+=k
[0]; break;
390 case 7 : b
+=((uint32_t)k8
[6])<<16; /* fall through */
391 case 6 : b
+=((uint32_t)k8
[5])<<8; /* fall through */
392 case 5 : b
+=k8
[4]; /* fall through */
393 case 4 : a
+=k
[0]; break;
394 case 3 : a
+=((uint32_t)k8
[2])<<16; /* fall through */
395 case 2 : a
+=((uint32_t)k8
[1])<<8; /* fall through */
396 case 1 : a
+=k8
[0]; break;
400 #endif /* !valgrind */
402 } else if (HASH_LITTLE_ENDIAN
&& ((u
.i
& 0x1) == 0)) {
403 const uint16_t *k
= (const uint16_t *)key
; /* read 16-bit chunks */
406 /*--------------- all but last block: aligned reads and different mixing */
409 a
+= k
[0] + (((uint32_t)k
[1])<<16);
410 b
+= k
[2] + (((uint32_t)k
[3])<<16);
411 c
+= k
[4] + (((uint32_t)k
[5])<<16);
417 /*----------------------------- handle the last (probably partial) block */
418 k8
= (const uint8_t *)k
;
421 case 12: c
+=k
[4]+(((uint32_t)k
[5])<<16);
422 b
+=k
[2]+(((uint32_t)k
[3])<<16);
423 a
+=k
[0]+(((uint32_t)k
[1])<<16);
425 case 11: c
+=((uint32_t)k8
[10])<<16; /* fall through */
427 b
+=k
[2]+(((uint32_t)k
[3])<<16);
428 a
+=k
[0]+(((uint32_t)k
[1])<<16);
430 case 9 : c
+=k8
[8]; /* fall through */
431 case 8 : b
+=k
[2]+(((uint32_t)k
[3])<<16);
432 a
+=k
[0]+(((uint32_t)k
[1])<<16);
434 case 7 : b
+=((uint32_t)k8
[6])<<16; /* fall through */
436 a
+=k
[0]+(((uint32_t)k
[1])<<16);
438 case 5 : b
+=k8
[4]; /* fall through */
439 case 4 : a
+=k
[0]+(((uint32_t)k
[1])<<16);
441 case 3 : a
+=((uint32_t)k8
[2])<<16; /* fall through */
446 case 0 : return c
; /* zero length requires no mixing */
449 } else { /* need to read the key one byte at a time */
450 const uint8_t *k
= (const uint8_t *)key
;
452 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
456 a
+= ((uint32_t)k
[1])<<8;
457 a
+= ((uint32_t)k
[2])<<16;
458 a
+= ((uint32_t)k
[3])<<24;
460 b
+= ((uint32_t)k
[5])<<8;
461 b
+= ((uint32_t)k
[6])<<16;
462 b
+= ((uint32_t)k
[7])<<24;
464 c
+= ((uint32_t)k
[9])<<8;
465 c
+= ((uint32_t)k
[10])<<16;
466 c
+= ((uint32_t)k
[11])<<24;
472 /*-------------------------------- last block: affect all 32 bits of (c) */
473 switch(length
) /* all the case statements fall through */
475 case 12: c
+=((uint32_t)k
[11])<<24;
476 case 11: c
+=((uint32_t)k
[10])<<16;
477 case 10: c
+=((uint32_t)k
[9])<<8;
479 case 8 : b
+=((uint32_t)k
[7])<<24;
480 case 7 : b
+=((uint32_t)k
[6])<<16;
481 case 6 : b
+=((uint32_t)k
[5])<<8;
483 case 4 : a
+=((uint32_t)k
[3])<<24;
484 case 3 : a
+=((uint32_t)k
[2])<<16;
485 case 2 : a
+=((uint32_t)k
[1])<<8;
499 * hashlittle2: return 2 32-bit hash values
501 * This is identical to hashlittle(), except it returns two 32-bit hash
502 * values instead of just one. This is good enough for hash table
503 * lookup with 2^^64 buckets, or if you want a second hash if you're not
504 * happy with the first, or if you want a probably-unique 64-bit ID for
505 * the key. *pc is better mixed than *pb, so use *pc first. If you want
506 * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
509 const void *key
, /* the key to hash */
510 size_t length
, /* length of the key */
511 uint32_t *pc
, /* IN: primary initval, OUT: primary hash */
512 uint32_t *pb
) /* IN: secondary initval, OUT: secondary hash */
514 uint32_t a
,b
,c
; /* internal state */
515 union { const void *ptr
; size_t i
; } u
; /* needed for Mac Powerbook G4 */
517 /* Set up the internal state */
518 a
= b
= c
= raninit
+ ((uint32_t)length
) + *pc
;
522 if (HASH_LITTLE_ENDIAN
&& ((u
.i
& 0x3) == 0)) {
523 const uint32_t *k
= (const uint32_t *)key
; /* read 32-bit chunks */
528 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
539 /*----------------------------- handle the last (probably partial) block */
541 * "k[2]&0xffffff" actually reads beyond the end of the string, but
542 * then masks off the part it's not allowed to read. Because the
543 * string is aligned, the masked-off tail is in the same word as the
544 * rest of the string. Every machine with memory protection I've seen
545 * does it on word boundaries, so is OK with this. But VALGRIND will
546 * still catch it and complain. The masking trick does make the hash
547 * noticably faster for short strings (like English words).
553 case 12: c
+=k
[2]; b
+=k
[1]; a
+=k
[0]; break;
554 case 11: c
+=k
[2]&0xffffff; b
+=k
[1]; a
+=k
[0]; break;
555 case 10: c
+=k
[2]&0xffff; b
+=k
[1]; a
+=k
[0]; break;
556 case 9 : c
+=k
[2]&0xff; b
+=k
[1]; a
+=k
[0]; break;
557 case 8 : b
+=k
[1]; a
+=k
[0]; break;
558 case 7 : b
+=k
[1]&0xffffff; a
+=k
[0]; break;
559 case 6 : b
+=k
[1]&0xffff; a
+=k
[0]; break;
560 case 5 : b
+=k
[1]&0xff; a
+=k
[0]; break;
561 case 4 : a
+=k
[0]; break;
562 case 3 : a
+=k
[0]&0xffffff; break;
563 case 2 : a
+=k
[0]&0xffff; break;
564 case 1 : a
+=k
[0]&0xff; break;
565 case 0 : *pc
=c
; *pb
=b
; return; /* zero length strings require no mixing */
568 #else /* make valgrind happy */
570 k8
= (const uint8_t *)k
;
573 case 12: c
+=k
[2]; b
+=k
[1]; a
+=k
[0]; break;
574 case 11: c
+=((uint32_t)k8
[10])<<16; /* fall through */
575 case 10: c
+=((uint32_t)k8
[9])<<8; /* fall through */
576 case 9 : c
+=k8
[8]; /* fall through */
577 case 8 : b
+=k
[1]; a
+=k
[0]; break;
578 case 7 : b
+=((uint32_t)k8
[6])<<16; /* fall through */
579 case 6 : b
+=((uint32_t)k8
[5])<<8; /* fall through */
580 case 5 : b
+=k8
[4]; /* fall through */
581 case 4 : a
+=k
[0]; break;
582 case 3 : a
+=((uint32_t)k8
[2])<<16; /* fall through */
583 case 2 : a
+=((uint32_t)k8
[1])<<8; /* fall through */
584 case 1 : a
+=k8
[0]; break;
585 case 0 : *pc
=c
; *pb
=b
; return; /* zero length strings require no mixing */
588 #endif /* !valgrind */
590 } else if (HASH_LITTLE_ENDIAN
&& ((u
.i
& 0x1) == 0)) {
591 const uint16_t *k
= (const uint16_t *)key
; /* read 16-bit chunks */
594 /*--------------- all but last block: aligned reads and different mixing */
597 a
+= k
[0] + (((uint32_t)k
[1])<<16);
598 b
+= k
[2] + (((uint32_t)k
[3])<<16);
599 c
+= k
[4] + (((uint32_t)k
[5])<<16);
605 /*----------------------------- handle the last (probably partial) block */
606 k8
= (const uint8_t *)k
;
609 case 12: c
+=k
[4]+(((uint32_t)k
[5])<<16);
610 b
+=k
[2]+(((uint32_t)k
[3])<<16);
611 a
+=k
[0]+(((uint32_t)k
[1])<<16);
613 case 11: c
+=((uint32_t)k8
[10])<<16; /* fall through */
615 b
+=k
[2]+(((uint32_t)k
[3])<<16);
616 a
+=k
[0]+(((uint32_t)k
[1])<<16);
618 case 9 : c
+=k8
[8]; /* fall through */
619 case 8 : b
+=k
[2]+(((uint32_t)k
[3])<<16);
620 a
+=k
[0]+(((uint32_t)k
[1])<<16);
622 case 7 : b
+=((uint32_t)k8
[6])<<16; /* fall through */
624 a
+=k
[0]+(((uint32_t)k
[1])<<16);
626 case 5 : b
+=k8
[4]; /* fall through */
627 case 4 : a
+=k
[0]+(((uint32_t)k
[1])<<16);
629 case 3 : a
+=((uint32_t)k8
[2])<<16; /* fall through */
634 case 0 : *pc
=c
; *pb
=b
; return; /* zero length strings require no mixing */
637 } else { /* need to read the key one byte at a time */
638 const uint8_t *k
= (const uint8_t *)key
;
640 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
644 a
+= ((uint32_t)k
[1])<<8;
645 a
+= ((uint32_t)k
[2])<<16;
646 a
+= ((uint32_t)k
[3])<<24;
648 b
+= ((uint32_t)k
[5])<<8;
649 b
+= ((uint32_t)k
[6])<<16;
650 b
+= ((uint32_t)k
[7])<<24;
652 c
+= ((uint32_t)k
[9])<<8;
653 c
+= ((uint32_t)k
[10])<<16;
654 c
+= ((uint32_t)k
[11])<<24;
660 /*-------------------------------- last block: affect all 32 bits of (c) */
661 switch(length
) /* all the case statements fall through */
663 case 12: c
+=((uint32_t)k
[11])<<24;
664 case 11: c
+=((uint32_t)k
[10])<<16;
665 case 10: c
+=((uint32_t)k
[9])<<8;
667 case 8 : b
+=((uint32_t)k
[7])<<24;
668 case 7 : b
+=((uint32_t)k
[6])<<16;
669 case 6 : b
+=((uint32_t)k
[5])<<8;
671 case 4 : a
+=((uint32_t)k
[3])<<24;
672 case 3 : a
+=((uint32_t)k
[2])<<16;
673 case 2 : a
+=((uint32_t)k
[1])<<8;
676 case 0 : *pc
=c
; *pb
=b
; return; /* zero length strings require no mixing */
684 #endif /* SELF_TEST */
686 #if 0 /* currently not used */
690 * This is the same as hashword() on big-endian machines. It is different
691 * from hashlittle() on all machines. hashbig() takes advantage of
692 * big-endian byte ordering.
694 uint32_t hashbig( const void *key
, size_t length
, uint32_t initval
)
697 union { const void *ptr
; size_t i
; } u
; /* to cast key to (size_t) happily */
699 /* Set up the internal state */
700 a
= b
= c
= raninit
+ ((uint32_t)length
) + initval
;
703 if (HASH_BIG_ENDIAN
&& ((u
.i
& 0x3) == 0)) {
704 const uint32_t *k
= (const uint32_t *)key
; /* read 32-bit chunks */
709 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
720 /*----------------------------- handle the last (probably partial) block */
722 * "k[2]<<8" actually reads beyond the end of the string, but
723 * then shifts out the part it's not allowed to read. Because the
724 * string is aligned, the illegal read is in the same word as the
725 * rest of the string. Every machine with memory protection I've seen
726 * does it on word boundaries, so is OK with this. But VALGRIND will
727 * still catch it and complain. The masking trick does make the hash
728 * noticably faster for short strings (like English words).
734 case 12: c
+=k
[2]; b
+=k
[1]; a
+=k
[0]; break;
735 case 11: c
+=k
[2]&0xffffff00; b
+=k
[1]; a
+=k
[0]; break;
736 case 10: c
+=k
[2]&0xffff0000; b
+=k
[1]; a
+=k
[0]; break;
737 case 9 : c
+=k
[2]&0xff000000; b
+=k
[1]; a
+=k
[0]; break;
738 case 8 : b
+=k
[1]; a
+=k
[0]; break;
739 case 7 : b
+=k
[1]&0xffffff00; a
+=k
[0]; break;
740 case 6 : b
+=k
[1]&0xffff0000; a
+=k
[0]; break;
741 case 5 : b
+=k
[1]&0xff000000; a
+=k
[0]; break;
742 case 4 : a
+=k
[0]; break;
743 case 3 : a
+=k
[0]&0xffffff00; break;
744 case 2 : a
+=k
[0]&0xffff0000; break;
745 case 1 : a
+=k
[0]&0xff000000; break;
746 case 0 : return c
; /* zero length strings require no mixing */
749 #else /* make valgrind happy */
751 k8
= (const uint8_t *)k
;
752 switch(length
) /* all the case statements fall through */
754 case 12: c
+=k
[2]; b
+=k
[1]; a
+=k
[0]; break;
755 case 11: c
+=((uint32_t)k8
[10])<<8; /* fall through */
756 case 10: c
+=((uint32_t)k8
[9])<<16; /* fall through */
757 case 9 : c
+=((uint32_t)k8
[8])<<24; /* fall through */
758 case 8 : b
+=k
[1]; a
+=k
[0]; break;
759 case 7 : b
+=((uint32_t)k8
[6])<<8; /* fall through */
760 case 6 : b
+=((uint32_t)k8
[5])<<16; /* fall through */
761 case 5 : b
+=((uint32_t)k8
[4])<<24; /* fall through */
762 case 4 : a
+=k
[0]; break;
763 case 3 : a
+=((uint32_t)k8
[2])<<8; /* fall through */
764 case 2 : a
+=((uint32_t)k8
[1])<<16; /* fall through */
765 case 1 : a
+=((uint32_t)k8
[0])<<24; break;
769 #endif /* !VALGRIND */
771 } else { /* need to read the key one byte at a time */
772 const uint8_t *k
= (const uint8_t *)key
;
774 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
777 a
+= ((uint32_t)k
[0])<<24;
778 a
+= ((uint32_t)k
[1])<<16;
779 a
+= ((uint32_t)k
[2])<<8;
780 a
+= ((uint32_t)k
[3]);
781 b
+= ((uint32_t)k
[4])<<24;
782 b
+= ((uint32_t)k
[5])<<16;
783 b
+= ((uint32_t)k
[6])<<8;
784 b
+= ((uint32_t)k
[7]);
785 c
+= ((uint32_t)k
[8])<<24;
786 c
+= ((uint32_t)k
[9])<<16;
787 c
+= ((uint32_t)k
[10])<<8;
788 c
+= ((uint32_t)k
[11]);
794 /*-------------------------------- last block: affect all 32 bits of (c) */
795 switch(length
) /* all the case statements fall through */
798 case 11: c
+=((uint32_t)k
[10])<<8;
799 case 10: c
+=((uint32_t)k
[9])<<16;
800 case 9 : c
+=((uint32_t)k
[8])<<24;
802 case 7 : b
+=((uint32_t)k
[6])<<8;
803 case 6 : b
+=((uint32_t)k
[5])<<16;
804 case 5 : b
+=((uint32_t)k
[4])<<24;
806 case 3 : a
+=((uint32_t)k
[2])<<8;
807 case 2 : a
+=((uint32_t)k
[1])<<16;
808 case 1 : a
+=((uint32_t)k
[0])<<24;
818 #endif /* 0 == currently not used */
822 /* used for timings */
831 for (i
=0; i
<256; ++i
) buf
[i
] = 'x';
834 h
= hashlittle(&buf
[0],1,h
);
837 if (z
-a
> 0) printf("time %d %.8x\n", z
-a
, h
);
840 /* check that every input bit changes every output bit half the time */
847 uint8_t qa
[MAXLEN
+1], qb
[MAXLEN
+2], *a
= &qa
[0], *b
= &qb
[1];
848 uint32_t c
[HASHSTATE
], d
[HASHSTATE
], i
=0, j
=0, k
, l
, m
=0, z
;
849 uint32_t e
[HASHSTATE
],f
[HASHSTATE
],g
[HASHSTATE
],h
[HASHSTATE
];
850 uint32_t x
[HASHSTATE
],y
[HASHSTATE
];
853 printf("No more than %d trials should ever be needed \n",MAXPAIR
/2);
854 for (hlen
=0; hlen
< MAXLEN
; ++hlen
)
857 for (i
=0; i
<hlen
; ++i
) /*----------------------- for each input byte, */
859 for (j
=0; j
<8; ++j
) /*------------------------ for each input bit, */
861 for (m
=1; m
<8; ++m
) /*------------ for serveral possible initvals, */
863 for (l
=0; l
<HASHSTATE
; ++l
)
864 e
[l
]=f
[l
]=g
[l
]=h
[l
]=x
[l
]=y
[l
]=~((uint32_t)0);
866 /*---- check that every output bit is affected by that input bit */
867 for (k
=0; k
<MAXPAIR
; k
+=2)
870 /* keys have one bit different */
871 for (l
=0; l
<hlen
+1; ++l
) {a
[l
] = b
[l
] = (uint8_t)0;}
872 /* have a and b be two keys differing in only one bit */
875 c
[0] = hashlittle(a
, hlen
, m
);
877 b
[i
] ^= ((k
+1)>>(8-j
));
878 d
[0] = hashlittle(b
, hlen
, m
);
879 /* check every bit is 1, 0, set, and not set at least once */
880 for (l
=0; l
<HASHSTATE
; ++l
)
883 f
[l
] &= ~(c
[l
]^d
[l
]);
888 if (e
[l
]|f
[l
]|g
[l
]|h
[l
]|x
[l
]|y
[l
]) finished
=0;
895 printf("Some bit didn't change: ");
896 printf("%.8x %.8x %.8x %.8x %.8x %.8x ",
897 e
[0],f
[0],g
[0],h
[0],x
[0],y
[0]);
898 printf("i %d j %d m %d len %d\n", i
, j
, m
, hlen
);
900 if (z
==MAXPAIR
) goto done
;
907 printf("Mix success %2d bytes %2d initvals ",i
,m
);
908 printf("required %d trials\n", z
/2);
914 /* Check for reading beyond the end of the buffer and alignment problems */
917 uint8_t buf
[MAXLEN
+20], *b
;
919 uint8_t q
[] = "This is the time for all good men to come to the aid of their country...";
921 uint8_t qq
[] = "xThis is the time for all good men to come to the aid of their country...";
923 uint8_t qqq
[] = "xxThis is the time for all good men to come to the aid of their country...";
925 uint8_t qqqq
[] = "xxxThis is the time for all good men to come to the aid of their country...";
929 printf("Endianness. These lines should all be the same (for values filled in):\n");
930 printf("%.8x %.8x %.8x\n",
931 hashword((const uint32_t *)q
, (sizeof(q
)-1)/4, 13),
932 hashword((const uint32_t *)q
, (sizeof(q
)-5)/4, 13),
933 hashword((const uint32_t *)q
, (sizeof(q
)-9)/4, 13));
935 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
936 hashlittle(p
, sizeof(q
)-1, 13), hashlittle(p
, sizeof(q
)-2, 13),
937 hashlittle(p
, sizeof(q
)-3, 13), hashlittle(p
, sizeof(q
)-4, 13),
938 hashlittle(p
, sizeof(q
)-5, 13), hashlittle(p
, sizeof(q
)-6, 13),
939 hashlittle(p
, sizeof(q
)-7, 13), hashlittle(p
, sizeof(q
)-8, 13),
940 hashlittle(p
, sizeof(q
)-9, 13), hashlittle(p
, sizeof(q
)-10, 13),
941 hashlittle(p
, sizeof(q
)-11, 13), hashlittle(p
, sizeof(q
)-12, 13));
943 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
944 hashlittle(p
, sizeof(q
)-1, 13), hashlittle(p
, sizeof(q
)-2, 13),
945 hashlittle(p
, sizeof(q
)-3, 13), hashlittle(p
, sizeof(q
)-4, 13),
946 hashlittle(p
, sizeof(q
)-5, 13), hashlittle(p
, sizeof(q
)-6, 13),
947 hashlittle(p
, sizeof(q
)-7, 13), hashlittle(p
, sizeof(q
)-8, 13),
948 hashlittle(p
, sizeof(q
)-9, 13), hashlittle(p
, sizeof(q
)-10, 13),
949 hashlittle(p
, sizeof(q
)-11, 13), hashlittle(p
, sizeof(q
)-12, 13));
951 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
952 hashlittle(p
, sizeof(q
)-1, 13), hashlittle(p
, sizeof(q
)-2, 13),
953 hashlittle(p
, sizeof(q
)-3, 13), hashlittle(p
, sizeof(q
)-4, 13),
954 hashlittle(p
, sizeof(q
)-5, 13), hashlittle(p
, sizeof(q
)-6, 13),
955 hashlittle(p
, sizeof(q
)-7, 13), hashlittle(p
, sizeof(q
)-8, 13),
956 hashlittle(p
, sizeof(q
)-9, 13), hashlittle(p
, sizeof(q
)-10, 13),
957 hashlittle(p
, sizeof(q
)-11, 13), hashlittle(p
, sizeof(q
)-12, 13));
959 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
960 hashlittle(p
, sizeof(q
)-1, 13), hashlittle(p
, sizeof(q
)-2, 13),
961 hashlittle(p
, sizeof(q
)-3, 13), hashlittle(p
, sizeof(q
)-4, 13),
962 hashlittle(p
, sizeof(q
)-5, 13), hashlittle(p
, sizeof(q
)-6, 13),
963 hashlittle(p
, sizeof(q
)-7, 13), hashlittle(p
, sizeof(q
)-8, 13),
964 hashlittle(p
, sizeof(q
)-9, 13), hashlittle(p
, sizeof(q
)-10, 13),
965 hashlittle(p
, sizeof(q
)-11, 13), hashlittle(p
, sizeof(q
)-12, 13));
968 /* check that hashlittle2 and hashlittle produce the same results */
970 hashlittle2(q
, sizeof(q
), &i
, &j
);
971 if (hashlittle(q
, sizeof(q
), 47) != i
)
972 printf("hashlittle2 and hashlittle mismatch\n");
974 /* check that hashword2 and hashword produce the same results */
977 hashword2(&len
, 1, &i
, &j
);
978 if (hashword(&len
, 1, 47) != i
)
979 printf("hashword2 and hashword mismatch %x %x\n",
980 i
, hashword(&len
, 1, 47));
982 /* check hashlittle doesn't read before or after the ends of the string */
983 for (h
=0, b
=buf
+1; h
<8; ++h
, ++b
)
985 for (i
=0; i
<MAXLEN
; ++i
)
988 for (j
=0; j
<i
; ++j
) *(b
+j
)=0;
990 /* these should all be equal */
991 ref
= hashlittle(b
, len
, (uint32_t)1);
994 x
= hashlittle(b
, len
, (uint32_t)1);
995 y
= hashlittle(b
, len
, (uint32_t)1);
996 if ((ref
!= x
) || (ref
!= y
))
998 printf("alignment error: %.8x %.8x %.8x %d %d\n",ref
,x
,y
,
1005 /* check for problems with nulls */
1009 uint32_t h
,i
,state
[HASHSTATE
];
1013 for (i
=0; i
<HASHSTATE
; ++i
) state
[i
] = 1;
1014 printf("These should all be different\n");
1015 for (i
=0, h
=0; i
<8; ++i
)
1017 h
= hashlittle(buf
, 0, h
);
1018 printf("%2ld 0-byte strings, hash is %.8x\n", i
, h
);
1025 driver1(); /* test that the key is hashed: used for timings */
1026 driver2(); /* test that whole key is hashed thoroughly */
1027 driver3(); /* test that nothing but the key is hashed */
1028 driver4(); /* test hashing multiple buffers (all buffers are null) */
1032 #endif /* SELF_TEST */