lookup3.c

   1 /*
   2 -------------------------------------------------------------------------------
   3 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
   4
   5 These are functions for producing 32-bit hashes for hash table lookup.
   6 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
   7 are externally useful functions.  Routines to test the hash are included
   8 if SELF_TEST is defined.  You can use this free for any purpose.  It's in
   9 the public domain.  It has no warranty.
  10
  11 You probably want to use hashlittle().  hashlittle() and hashbig()
  12 hash byte arrays.  hashlittle() is is faster than hashbig() on
  13 little-endian machines.  Intel and AMD are little-endian machines.
  14 On second thought, you probably want hashlittle2(), which is identical to
  15 hashlittle() except it returns two 32-bit hashes for the price of one.
  16 You could implement hashbig2() if you wanted but I haven't bothered here.
  17
  18 If you want to find a hash of, say, exactly 7 integers, do
  19   a = i1;  b = i2;  c = i3;
  20   mix(a,b,c);
  21   a += i4; b += i5; c += i6;
  22   mix(a,b,c);
  23   a += i7;
  24   final(a,b,c);
  25 then use c as the hash value.  If you have a variable length array of
  26 4-byte integers to hash, use hashword().  If you have a byte array (like
  27 a character string), use hashlittle().  If you have several byte arrays, or
  28 a mix of things, see the comments above hashlittle().
  29
  30 Why is this so big?  I read 12 bytes at a time into 3 4-byte integers,
  31 then mix those integers.  This is fast (you can do a lot more thorough
  32 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
  33 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
  34 -------------------------------------------------------------------------------
  35 */
  36 #define VALGRIND
  37
  38 #include <stdio.h>      /* defines printf for tests */
  39 #include <time.h>       /* defines time_t for timings in the test */
  40 #include <stdint.h>     /* defines uint32_t etc */
  41 #include <sys/param.h>  /* attempt to define endianness */
  42 #ifdef linux
  43 # include <endian.h>    /* attempt to define endianness */
  44 #endif
  45
  46 /*
  47  * My best guess at if you are big-endian or little-endian.  This may
  48  * need adjustment.
  49  */
  50 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
  51      __BYTE_ORDER == __LITTLE_ENDIAN) || \
  52     (defined(i386) || defined(__i386__) || defined(__i486__) || \
  53      defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
  54 # define HASH_LITTLE_ENDIAN 1
  55 # define HASH_BIG_ENDIAN 0
  56 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
  57        __BYTE_ORDER == __BIG_ENDIAN) || \
  58       (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
  59 # define HASH_LITTLE_ENDIAN 0
  60 # define HASH_BIG_ENDIAN 1
  61 #else
  62 # define HASH_LITTLE_ENDIAN 0
  63 # define HASH_BIG_ENDIAN 0
  64 #endif
  65
  66 #define hashsize(n) ((uint32_t)1<<(n))
  67 #define hashmask(n) (hashsize(n)-1)
  68 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
  69
  70 /*
  71 -------------------------------------------------------------------------------
  72 mix -- mix 3 32-bit values reversibly.
  73
  74 This is reversible, so any information in (a,b,c) before mix() is
  75 still in (a,b,c) after mix().
  76
  77 If four pairs of (a,b,c) inputs are run through mix(), or through
  78 mix() in reverse, there are at least 32 bits of the output that
  79 are sometimes the same for one pair and different for another pair.
  80 This was tested for:
  81 * pairs that differed by one bit, by two bits, in any combination
  82   of top bits of (a,b,c), or in any combination of bottom bits of
  83   (a,b,c).
  84 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
  85   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
  86   is commonly produced by subtraction) look like a single 1-bit
  87   difference.
  88 * the base values were pseudorandom, all zero but one bit set, or
  89   all zero plus a counter that starts at zero.
  90
  91 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
  92 satisfy this are
  93     4  6  8 16 19  4
  94     9 15  3 18 27 15
  95    14  9  3  7 17  3
  96 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
  97 for "differ" defined as + with a one-bit base and a two-bit delta.  I
  98 used http://burtleburtle.net/bob/hash/avalanche.html to choose
  99 the operations, constants, and arrangements of the variables.
 100
 101 This does not achieve avalanche.  There are input bits of (a,b,c)
 102 that fail to affect some output bits of (a,b,c), especially of a.  The
 103 most thoroughly mixed value is c, but it doesn't really even achieve
 104 avalanche in c.
 105
 106 This allows some parallelism.  Read-after-writes are good at doubling
 107 the number of bits affected, so the goal of mixing pulls in the opposite
 108 direction as the goal of parallelism.  I did what I could.  Rotates
 109 seem to cost as much as shifts on every machine I could lay my hands
 110 on, and rotates are much kinder to the top and bottom bits, so I used
 111 rotates.
 112 -------------------------------------------------------------------------------
 113 */
 114 #define mix(a,b,c) \
 115 { \
 116   a -= c;  a ^= rot(c, 4);  c += b; \
 117   b -= a;  b ^= rot(a, 6);  a += c; \
 118   c -= b;  c ^= rot(b, 8);  b += a; \
 119   a -= c;  a ^= rot(c,16);  c += b; \
 120   b -= a;  b ^= rot(a,19);  a += c; \
 121   c -= b;  c ^= rot(b, 4);  b += a; \
 122 }
 123
 124 /*
 125 -------------------------------------------------------------------------------
 126 final -- final mixing of 3 32-bit values (a,b,c) into c
 127
 128 Pairs of (a,b,c) values differing in only a few bits will usually
 129 produce values of c that look totally different.  This was tested for
 130 * pairs that differed by one bit, by two bits, in any combination
 131   of top bits of (a,b,c), or in any combination of bottom bits of
 132   (a,b,c).
 133 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
 134   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
 135   is commonly produced by subtraction) look like a single 1-bit
 136   difference.
 137 * the base values were pseudorandom, all zero but one bit set, or
 138   all zero plus a counter that starts at zero.
 139
 140 These constants passed:
 141  14 11 25 16 4 14 24
 142  12 14 25 16 4 14 24
 143 and these came close:
 144   4  8 15 26 3 22 24
 145  10  8 15 26 3 22 24
 146  11  8 15 26 3 22 24
 147 -------------------------------------------------------------------------------
 148 */
 149 #define final(a,b,c) \
 150 { \
 151   c ^= b; c -= rot(b,14); \
 152   a ^= c; a -= rot(c,11); \
 153   b ^= a; b -= rot(a,25); \
 154   c ^= b; c -= rot(b,16); \
 155   a ^= c; a -= rot(c,4);  \
 156   b ^= a; b -= rot(a,14); \
 157   c ^= b; c -= rot(b,24); \
 158 }
 159
 160 /*
 161 --------------------------------------------------------------------
 162  This works on all machines.  To be useful, it requires
 163  -- that the key be an array of uint32_t's, and
 164  -- that the length be the number of uint32_t's in the key
 165
 166  The function hashword() is identical to hashlittle() on little-endian
 167  machines, and identical to hashbig() on big-endian machines,
 168  except that the length has to be measured in uint32_ts rather than in
 169  bytes.  hashlittle() is more complicated than hashword() only because
 170  hashlittle() has to dance around fitting the key bytes into registers.
 171 --------------------------------------------------------------------
 172 */
 173 uint32_t hashword(
 174 const uint32_t *k,                   /* the key, an array of uint32_t values */
 175 size_t          length,               /* the length of the key, in uint32_ts */
 176 uint32_t        initval)         /* the previous hash, or an arbitrary value */
 177 {
 178   uint32_t a,b,c;
 179
 180   /* Set up the internal state */
 181   a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
 182
 183   /*------------------------------------------------- handle most of the key */
 184   while (length > 3)
 185   {
 186     a += k[0];
 187     b += k[1];
 188     c += k[2];
 189     mix(a,b,c);
 190     length -= 3;
 191     k += 3;
 192   }
 193
 194   /*------------------------------------------- handle the last 3 uint32_t's */
 195   switch(length)                     /* all the case statements fall through */
 196   {
 197   case 3 : c+=k[2];
 198   case 2 : b+=k[1];
 199   case 1 : a+=k[0];
 200     final(a,b,c);
 201   case 0:     /* case 0: nothing left to add */
 202     break;
 203   }
 204   /*------------------------------------------------------ report the result */
 205   return c;
 206 }
 207
 208
 209 /*
 210 --------------------------------------------------------------------
 211 hashword2() -- same as hashword(), but take two seeds and return two
 212 32-bit values.  pc and pb must both be nonnull, and *pc and *pb must
 213 both be initialized with seeds.  If you pass in (*pb)==0, the output
 214 (*pc) will be the same as the return value from hashword().
 215 --------------------------------------------------------------------
 216 */
 217 void hashword2 (
 218 const uint32_t *k,                   /* the key, an array of uint32_t values */
 219 size_t          length,               /* the length of the key, in uint32_ts */
 220 uint32_t       *pc,                      /* IN: seed OUT: primary hash value */
 221 uint32_t       *pb)               /* IN: more seed OUT: secondary hash value */
 222 {
 223   uint32_t a,b,c;
 224
 225   /* Set up the internal state */
 226   a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc;
 227   c += *pb;
 228
 229   /*------------------------------------------------- handle most of the key */
 230   while (length > 3)
 231   {
 232     a += k[0];
 233     b += k[1];
 234     c += k[2];
 235     mix(a,b,c);
 236     length -= 3;
 237     k += 3;
 238   }
 239
 240   /*------------------------------------------- handle the last 3 uint32_t's */
 241   switch(length)                     /* all the case statements fall through */
 242   {
 243   case 3 : c+=k[2];
 244   case 2 : b+=k[1];
 245   case 1 : a+=k[0];
 246     final(a,b,c);
 247   case 0:     /* case 0: nothing left to add */
 248     break;
 249   }
 250   /*------------------------------------------------------ report the result */
 251   *pc=c; *pb=b;
 252 }
 253
 254
 255 /*
 256 -------------------------------------------------------------------------------
 257 hashlittle() -- hash a variable-length key into a 32-bit value
 258   k       : the key (the unaligned variable-length array of bytes)
 259   length  : the length of the key, counting by bytes
 260   initval : can be any 4-byte value
 261 Returns a 32-bit value.  Every bit of the key affects every bit of
 262 the return value.  Two keys differing by one or two bits will have
 263 totally different hash values.
 264
 265 The best hash table sizes are powers of 2.  There is no need to do
 266 mod a prime (mod is sooo slow!).  If you need less than 32 bits,
 267 use a bitmask.  For example, if you need only 10 bits, do
 268   h = (h & hashmask(10));
 269 In which case, the hash table should have hashsize(10) elements.
 270
 271 If you are hashing n strings (uint8_t **)k, do it like this:
 272   for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
 273
 274 By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
 275 code any way you wish, private, educational, or commercial.  It's free.
 276
 277 Use for hash table lookup, or anything where one collision in 2^^32 is
 278 acceptable.  Do NOT use for cryptographic purposes.
 279 -------------------------------------------------------------------------------
 280 */
 281
 282 uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
 283 {
 284   uint32_t a,b,c;                                          /* internal state */
 285   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
 286
 287   /* Set up the internal state */
 288   a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
 289
 290   u.ptr = key;
 291   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
 292     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
 293     const uint8_t  *k8;
 294
 295     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
 296     while (length > 12)
 297     {
 298       a += k[0];
 299       b += k[1];
 300       c += k[2];
 301       mix(a,b,c);
 302       length -= 12;
 303       k += 3;
 304     }
 305
 306     /*----------------------------- handle the last (probably partial) block */
 307     /*
 308      * "k[2]&0xffffff" actually reads beyond the end of the string, but
 309      * then masks off the part it's not allowed to read.  Because the
 310      * string is aligned, the masked-off tail is in the same word as the
 311      * rest of the string.  Every machine with memory protection I've seen
 312      * does it on word boundaries, so is OK with this.  But VALGRIND will
 313      * still catch it and complain.  The masking trick does make the hash
 314      * noticably faster for short strings (like English words).
 315      */
 316 #ifndef VALGRIND
 317
 318     switch(length)
 319     {
 320     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
 321     case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
 322     case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
 323     case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
 324     case 8 : b+=k[1]; a+=k[0]; break;
 325     case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
 326     case 6 : b+=k[1]&0xffff; a+=k[0]; break;
 327     case 5 : b+=k[1]&0xff; a+=k[0]; break;
 328     case 4 : a+=k[0]; break;
 329     case 3 : a+=k[0]&0xffffff; break;
 330     case 2 : a+=k[0]&0xffff; break;
 331     case 1 : a+=k[0]&0xff; break;
 332     case 0 : return c;              /* zero length strings require no mixing */
 333     }
 334
 335 #else /* make valgrind happy */
 336
 337     k8 = (const uint8_t *)k;
 338     switch(length)
 339     {
 340     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
 341     case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
 342     case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
 343     case 9 : c+=k8[8];                   /* fall through */
 344     case 8 : b+=k[1]; a+=k[0]; break;
 345     case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
 346     case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
 347     case 5 : b+=k8[4];                   /* fall through */
 348     case 4 : a+=k[0]; break;
 349     case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
 350     case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
 351     case 1 : a+=k8[0]; break;
 352     case 0 : return c;
 353     }
 354
 355 #endif /* !valgrind */
 356
 357   } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
 358     const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
 359     const uint8_t  *k8;
 360
 361     /*--------------- all but last block: aligned reads and different mixing */
 362     while (length > 12)
 363     {
 364       a += k[0] + (((uint32_t)k[1])<<16);
 365       b += k[2] + (((uint32_t)k[3])<<16);
 366       c += k[4] + (((uint32_t)k[5])<<16);
 367       mix(a,b,c);
 368       length -= 12;
 369       k += 6;
 370     }
 371
 372     /*----------------------------- handle the last (probably partial) block */
 373     k8 = (const uint8_t *)k;
 374     switch(length)
 375     {
 376     case 12: c+=k[4]+(((uint32_t)k[5])<<16);
 377              b+=k[2]+(((uint32_t)k[3])<<16);
 378              a+=k[0]+(((uint32_t)k[1])<<16);
 379              break;
 380     case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
 381     case 10: c+=k[4];
 382              b+=k[2]+(((uint32_t)k[3])<<16);
 383              a+=k[0]+(((uint32_t)k[1])<<16);
 384              break;
 385     case 9 : c+=k8[8];                      /* fall through */
 386     case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
 387              a+=k[0]+(((uint32_t)k[1])<<16);
 388              break;
 389     case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
 390     case 6 : b+=k[2];
 391              a+=k[0]+(((uint32_t)k[1])<<16);
 392              break;
 393     case 5 : b+=k8[4];                      /* fall through */
 394     case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
 395              break;
 396     case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
 397     case 2 : a+=k[0];
 398              break;
 399     case 1 : a+=k8[0];
 400              break;
 401     case 0 : return c;                     /* zero length requires no mixing */
 402     }
 403
 404   } else {                        /* need to read the key one byte at a time */
 405     const uint8_t *k = (const uint8_t *)key;
 406
 407     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
 408     while (length > 12)
 409     {
 410       a += k[0];
 411       a += ((uint32_t)k[1])<<8;
 412       a += ((uint32_t)k[2])<<16;
 413       a += ((uint32_t)k[3])<<24;
 414       b += k[4];
 415       b += ((uint32_t)k[5])<<8;
 416       b += ((uint32_t)k[6])<<16;
 417       b += ((uint32_t)k[7])<<24;
 418       c += k[8];
 419       c += ((uint32_t)k[9])<<8;
 420       c += ((uint32_t)k[10])<<16;
 421       c += ((uint32_t)k[11])<<24;
 422       mix(a,b,c);
 423       length -= 12;
 424       k += 12;
 425     }
 426
 427     /*-------------------------------- last block: affect all 32 bits of (c) */
 428     switch(length)                   /* all the case statements fall through */
 429     {
 430     case 12: c+=((uint32_t)k[11])<<24;
 431     case 11: c+=((uint32_t)k[10])<<16;
 432     case 10: c+=((uint32_t)k[9])<<8;
 433     case 9 : c+=k[8];
 434     case 8 : b+=((uint32_t)k[7])<<24;
 435     case 7 : b+=((uint32_t)k[6])<<16;
 436     case 6 : b+=((uint32_t)k[5])<<8;
 437     case 5 : b+=k[4];
 438     case 4 : a+=((uint32_t)k[3])<<24;
 439     case 3 : a+=((uint32_t)k[2])<<16;
 440     case 2 : a+=((uint32_t)k[1])<<8;
 441     case 1 : a+=k[0];
 442              break;
 443     case 0 : return c;
 444     }
 445   }
 446
 447   final(a,b,c);
 448   return c;
 449 }
 450
 451
 452 /*
 453  * hashlittle2: return 2 32-bit hash values
 454  *
 455  * This is identical to hashlittle(), except it returns two 32-bit hash
 456  * values instead of just one.  This is good enough for hash table
 457  * lookup with 2^^64 buckets, or if you want a second hash if you're not
 458  * happy with the first, or if you want a probably-unique 64-bit ID for
 459  * the key.  *pc is better mixed than *pb, so use *pc first.  If you want
 460  * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
 461  */
 462 void hashlittle2(
 463   const void *key,       /* the key to hash */
 464   size_t      length,    /* length of the key */
 465   uint32_t   *pc,        /* IN: primary initval, OUT: primary hash */
 466   uint32_t   *pb)        /* IN: secondary initval, OUT: secondary hash */
 467 {
 468   uint32_t a,b,c;                                          /* internal state */
 469   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
 470
 471   /* Set up the internal state */
 472   a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
 473   c += *pb;
 474
 475   u.ptr = key;
 476   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
 477     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
 478     const uint8_t  *k8;
 479
 480     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
 481     while (length > 12)
 482     {
 483       a += k[0];
 484       b += k[1];
 485       c += k[2];
 486       mix(a,b,c);
 487       length -= 12;
 488       k += 3;
 489     }
 490
 491     /*----------------------------- handle the last (probably partial) block */
 492     /*
 493      * "k[2]&0xffffff" actually reads beyond the end of the string, but
 494      * then masks off the part it's not allowed to read.  Because the
 495      * string is aligned, the masked-off tail is in the same word as the
 496      * rest of the string.  Every machine with memory protection I've seen
 497      * does it on word boundaries, so is OK with this.  But VALGRIND will
 498      * still catch it and complain.  The masking trick does make the hash
 499      * noticably faster for short strings (like English words).
 500      */
 501 #ifndef VALGRIND
 502
 503     switch(length)
 504     {
 505     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
 506     case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
 507     case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
 508     case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
 509     case 8 : b+=k[1]; a+=k[0]; break;
 510     case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
 511     case 6 : b+=k[1]&0xffff; a+=k[0]; break;
 512     case 5 : b+=k[1]&0xff; a+=k[0]; break;
 513     case 4 : a+=k[0]; break;
 514     case 3 : a+=k[0]&0xffffff; break;
 515     case 2 : a+=k[0]&0xffff; break;
 516     case 1 : a+=k[0]&0xff; break;
 517     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
 518     }
 519
 520 #else /* make valgrind happy */
 521
 522     k8 = (const uint8_t *)k;
 523     switch(length)
 524     {
 525     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
 526     case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
 527     case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
 528     case 9 : c+=k8[8];                   /* fall through */
 529     case 8 : b+=k[1]; a+=k[0]; break;
 530     case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
 531     case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
 532     case 5 : b+=k8[4];                   /* fall through */
 533     case 4 : a+=k[0]; break;
 534     case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
 535     case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
 536     case 1 : a+=k8[0]; break;
 537     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
 538     }
 539
 540 #endif /* !valgrind */
 541
 542   } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
 543     const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
 544     const uint8_t  *k8;
 545
 546     /*--------------- all but last block: aligned reads and different mixing */
 547     while (length > 12)
 548     {
 549       a += k[0] + (((uint32_t)k[1])<<16);
 550       b += k[2] + (((uint32_t)k[3])<<16);
 551       c += k[4] + (((uint32_t)k[5])<<16);
 552       mix(a,b,c);
 553       length -= 12;
 554       k += 6;
 555     }
 556
 557     /*----------------------------- handle the last (probably partial) block */
 558     k8 = (const uint8_t *)k;
 559     switch(length)
 560     {
 561     case 12: c+=k[4]+(((uint32_t)k[5])<<16);
 562              b+=k[2]+(((uint32_t)k[3])<<16);
 563              a+=k[0]+(((uint32_t)k[1])<<16);
 564              break;
 565     case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
 566     case 10: c+=k[4];
 567              b+=k[2]+(((uint32_t)k[3])<<16);
 568              a+=k[0]+(((uint32_t)k[1])<<16);
 569              break;
 570     case 9 : c+=k8[8];                      /* fall through */
 571     case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
 572              a+=k[0]+(((uint32_t)k[1])<<16);
 573              break;
 574     case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
 575     case 6 : b+=k[2];
 576              a+=k[0]+(((uint32_t)k[1])<<16);
 577              break;
 578     case 5 : b+=k8[4];                      /* fall through */
 579     case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
 580              break;
 581     case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
 582     case 2 : a+=k[0];
 583              break;
 584     case 1 : a+=k8[0];
 585              break;
 586     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
 587     }
 588
 589   } else {                        /* need to read the key one byte at a time */
 590     const uint8_t *k = (const uint8_t *)key;
 591
 592     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
 593     while (length > 12)
 594     {
 595       a += k[0];
 596       a += ((uint32_t)k[1])<<8;
 597       a += ((uint32_t)k[2])<<16;
 598       a += ((uint32_t)k[3])<<24;
 599       b += k[4];
 600       b += ((uint32_t)k[5])<<8;
 601       b += ((uint32_t)k[6])<<16;
 602       b += ((uint32_t)k[7])<<24;
 603       c += k[8];
 604       c += ((uint32_t)k[9])<<8;
 605       c += ((uint32_t)k[10])<<16;
 606       c += ((uint32_t)k[11])<<24;
 607       mix(a,b,c);
 608       length -= 12;
 609       k += 12;
 610     }
 611
 612     /*-------------------------------- last block: affect all 32 bits of (c) */
 613     switch(length)                   /* all the case statements fall through */
 614     {
 615     case 12: c+=((uint32_t)k[11])<<24;
 616     case 11: c+=((uint32_t)k[10])<<16;
 617     case 10: c+=((uint32_t)k[9])<<8;
 618     case 9 : c+=k[8];
 619     case 8 : b+=((uint32_t)k[7])<<24;
 620     case 7 : b+=((uint32_t)k[6])<<16;
 621     case 6 : b+=((uint32_t)k[5])<<8;
 622     case 5 : b+=k[4];
 623     case 4 : a+=((uint32_t)k[3])<<24;
 624     case 3 : a+=((uint32_t)k[2])<<16;
 625     case 2 : a+=((uint32_t)k[1])<<8;
 626     case 1 : a+=k[0];
 627              break;
 628     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
 629     }
 630   }
 631
 632   final(a,b,c);
 633   *pc=c; *pb=b;
 634 }
 635
 636
 637
 638 /*
 639  * hashbig():
 640  * This is the same as hashword() on big-endian machines.  It is different
 641  * from hashlittle() on all machines.  hashbig() takes advantage of
 642  * big-endian byte ordering.
 643  */
 644 uint32_t hashbig( const void *key, size_t length, uint32_t initval)
 645 {
 646   uint32_t a,b,c;
 647   union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
 648
 649   /* Set up the internal state */
 650   a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
 651
 652   u.ptr = key;
 653   if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
 654     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
 655     const uint8_t  *k8;
 656
 657     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
 658     while (length > 12)
 659     {
 660       a += k[0];
 661       b += k[1];
 662       c += k[2];
 663       mix(a,b,c);
 664       length -= 12;
 665       k += 3;
 666     }
 667
 668     /*----------------------------- handle the last (probably partial) block */
 669     /*
 670      * "k[2]<<8" actually reads beyond the end of the string, but
 671      * then shifts out the part it's not allowed to read.  Because the
 672      * string is aligned, the illegal read is in the same word as the
 673      * rest of the string.  Every machine with memory protection I've seen
 674      * does it on word boundaries, so is OK with this.  But VALGRIND will
 675      * still catch it and complain.  The masking trick does make the hash
 676      * noticably faster for short strings (like English words).
 677      */
 678 #ifndef VALGRIND
 679
 680     switch(length)
 681     {
 682     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
 683     case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
 684     case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
 685     case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
 686     case 8 : b+=k[1]; a+=k[0]; break;
 687     case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
 688     case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
 689     case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
 690     case 4 : a+=k[0]; break;
 691     case 3 : a+=k[0]&0xffffff00; break;
 692     case 2 : a+=k[0]&0xffff0000; break;
 693     case 1 : a+=k[0]&0xff000000; break;
 694     case 0 : return c;              /* zero length strings require no mixing */
 695     }
 696
 697 #else  /* make valgrind happy */
 698
 699     k8 = (const uint8_t *)k;
 700     switch(length)                   /* all the case statements fall through */
 701     {
 702     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
 703     case 11: c+=((uint32_t)k8[10])<<8;  /* fall through */
 704     case 10: c+=((uint32_t)k8[9])<<16;  /* fall through */
 705     case 9 : c+=((uint32_t)k8[8])<<24;  /* fall through */
 706     case 8 : b+=k[1]; a+=k[0]; break;
 707     case 7 : b+=((uint32_t)k8[6])<<8;   /* fall through */
 708     case 6 : b+=((uint32_t)k8[5])<<16;  /* fall through */
 709     case 5 : b+=((uint32_t)k8[4])<<24;  /* fall through */
 710     case 4 : a+=k[0]; break;
 711     case 3 : a+=((uint32_t)k8[2])<<8;   /* fall through */
 712     case 2 : a+=((uint32_t)k8[1])<<16;  /* fall through */
 713     case 1 : a+=((uint32_t)k8[0])<<24; break;
 714     case 0 : return c;
 715     }
 716
 717 #endif /* !VALGRIND */
 718
 719   } else {                        /* need to read the key one byte at a time */
 720     const uint8_t *k = (const uint8_t *)key;
 721
 722     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
 723     while (length > 12)
 724     {
 725       a += ((uint32_t)k[0])<<24;
 726       a += ((uint32_t)k[1])<<16;
 727       a += ((uint32_t)k[2])<<8;
 728       a += ((uint32_t)k[3]);
 729       b += ((uint32_t)k[4])<<24;
 730       b += ((uint32_t)k[5])<<16;
 731       b += ((uint32_t)k[6])<<8;
 732       b += ((uint32_t)k[7]);
 733       c += ((uint32_t)k[8])<<24;
 734       c += ((uint32_t)k[9])<<16;
 735       c += ((uint32_t)k[10])<<8;
 736       c += ((uint32_t)k[11]);
 737       mix(a,b,c);
 738       length -= 12;
 739       k += 12;
 740     }
 741
 742     /*-------------------------------- last block: affect all 32 bits of (c) */
 743     switch(length)                   /* all the case statements fall through */
 744     {
 745     case 12: c+=k[11];
 746     case 11: c+=((uint32_t)k[10])<<8;
 747     case 10: c+=((uint32_t)k[9])<<16;
 748     case 9 : c+=((uint32_t)k[8])<<24;
 749     case 8 : b+=k[7];
 750     case 7 : b+=((uint32_t)k[6])<<8;
 751     case 6 : b+=((uint32_t)k[5])<<16;
 752     case 5 : b+=((uint32_t)k[4])<<24;
 753     case 4 : a+=k[3];
 754     case 3 : a+=((uint32_t)k[2])<<8;
 755     case 2 : a+=((uint32_t)k[1])<<16;
 756     case 1 : a+=((uint32_t)k[0])<<24;
 757              break;
 758     case 0 : return c;
 759     }
 760   }
 761
 762   final(a,b,c);
 763   return c;
 764 }
 765
 766
 767 #ifdef SELF_TEST
 768
 769 /* used for timings */
 770 void driver1()
 771 {
 772   uint8_t buf[256];
 773   uint32_t i;
 774   uint32_t h=0;
 775   time_t a,z;
 776
 777   time(&a);
 778   for (i=0; i<256; ++i) buf[i] = 'x';
 779   for (i=0; i<1; ++i)
 780   {
 781     h = hashlittle(&buf[0],1,h);
 782   }
 783   time(&z);
 784   if (z-a > 0) printf("time %d %.8x\n", z-a, h);
 785 }
 786
 787 /* check that every input bit changes every output bit half the time */
 788 #define HASHSTATE 1
 789 #define HASHLEN   1
 790 #define MAXPAIR 60
 791 #define MAXLEN  70
 792 void driver2()
 793 {
 794   uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
 795   uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
 796   uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
 797   uint32_t x[HASHSTATE],y[HASHSTATE];
 798   uint32_t hlen;
 799
 800   printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
 801   for (hlen=0; hlen < MAXLEN; ++hlen)
 802   {
 803     z=0;
 804     for (i=0; i<hlen; ++i)  /*----------------------- for each input byte, */
 805     {
 806       for (j=0; j<8; ++j)   /*------------------------ for each input bit, */
 807       {
 808         for (m=1; m<8; ++m) /*------------ for serveral possible initvals, */
 809         {
 810           for (l=0; l<HASHSTATE; ++l)
 811             e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
 812
 813           /*---- check that every output bit is affected by that input bit */
 814           for (k=0; k<MAXPAIR; k+=2)
 815           {
 816             uint32_t finished=1;
 817             /* keys have one bit different */
 818             for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
 819             /* have a and b be two keys differing in only one bit */
 820             a[i] ^= (k<<j);
 821             a[i] ^= (k>>(8-j));
 822              c[0] = hashlittle(a, hlen, m);
 823             b[i] ^= ((k+1)<<j);
 824             b[i] ^= ((k+1)>>(8-j));
 825              d[0] = hashlittle(b, hlen, m);
 826             /* check every bit is 1, 0, set, and not set at least once */
 827             for (l=0; l<HASHSTATE; ++l)
 828             {
 829               e[l] &= (c[l]^d[l]);
 830               f[l] &= ~(c[l]^d[l]);
 831               g[l] &= c[l];
 832               h[l] &= ~c[l];
 833               x[l] &= d[l];
 834               y[l] &= ~d[l];
 835               if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
 836             }
 837             if (finished) break;
 838           }
 839           if (k>z) z=k;
 840           if (k==MAXPAIR)
 841           {
 842              printf("Some bit didn't change: ");
 843              printf("%.8x %.8x %.8x %.8x %.8x %.8x  ",
 844                     e[0],f[0],g[0],h[0],x[0],y[0]);
 845              printf("i %d j %d m %d len %d\n", i, j, m, hlen);
 846           }
 847           if (z==MAXPAIR) goto done;
 848         }
 849       }
 850     }
 851    done:
 852     if (z < MAXPAIR)
 853     {
 854       printf("Mix success  %2d bytes  %2d initvals  ",i,m);
 855       printf("required  %d  trials\n", z/2);
 856     }
 857   }
 858   printf("\n");
 859 }
 860
 861 /* Check for reading beyond the end of the buffer and alignment problems */
 862 void driver3()
 863 {
 864   uint8_t buf[MAXLEN+20], *b;
 865   uint32_t len;
 866   uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
 867   uint32_t h;
 868   uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
 869   uint32_t i;
 870   uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
 871   uint32_t j;
 872   uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
 873   uint32_t ref,x,y;
 874   uint8_t *p;
 875
 876   printf("Endianness.  These lines should all be the same (for values filled in):\n");
 877   printf("%.8x                            %.8x                            %.8x\n",
 878          hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13),
 879          hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13),
 880          hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13));
 881   p = q;
 882   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
 883          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
 884          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
 885          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
 886          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
 887          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
 888          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
 889   p = &qq[1];
 890   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
 891          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
 892          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
 893          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
 894          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
 895          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
 896          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
 897   p = &qqq[2];
 898   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
 899          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
 900          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
 901          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
 902          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
 903          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
 904          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
 905   p = &qqqq[3];
 906   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
 907          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
 908          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
 909          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
 910          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
 911          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
 912          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
 913   printf("\n");
 914
 915   /* check that hashlittle2 and hashlittle produce the same results */
 916   i=47; j=0;
 917   hashlittle2(q, sizeof(q), &i, &j);
 918   if (hashlittle(q, sizeof(q), 47) != i)
 919     printf("hashlittle2 and hashlittle mismatch\n");
 920
 921   /* check that hashword2 and hashword produce the same results */
 922   len = 0xdeadbeef;
 923   i=47, j=0;
 924   hashword2(&len, 1, &i, &j);
 925   if (hashword(&len, 1, 47) != i)
 926     printf("hashword2 and hashword mismatch %x %x\n",
 927            i, hashword(&len, 1, 47));
 928
 929   /* check hashlittle doesn't read before or after the ends of the string */
 930   for (h=0, b=buf+1; h<8; ++h, ++b)
 931   {
 932     for (i=0; i<MAXLEN; ++i)
 933     {
 934       len = i;
 935       for (j=0; j<i; ++j) *(b+j)=0;
 936
 937       /* these should all be equal */
 938       ref = hashlittle(b, len, (uint32_t)1);
 939       *(b+i)=(uint8_t)~0;
 940       *(b-1)=(uint8_t)~0;
 941       x = hashlittle(b, len, (uint32_t)1);
 942       y = hashlittle(b, len, (uint32_t)1);
 943       if ((ref != x) || (ref != y))
 944       {
 945         printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
 946                h, i);
 947       }
 948     }
 949   }
 950 }
 951
 952 /* check for problems with nulls */
 953  void driver4()
 954 {
 955   uint8_t buf[1];
 956   uint32_t h,i,state[HASHSTATE];
 957
 958
 959   buf[0] = ~0;
 960   for (i=0; i<HASHSTATE; ++i) state[i] = 1;
 961   printf("These should all be different\n");
 962   for (i=0, h=0; i<8; ++i)
 963   {
 964     h = hashlittle(buf, 0, h);
 965     printf("%2ld  0-byte strings, hash is  %.8x\n", i, h);
 966   }
 967 }
 968
 969 void driver5()
 970 {
 971   uint32_t b,c;
 972   b=0, c=0, hashlittle2("", 0, &c, &b);
 973   printf("hash is %.8lx %.8lx\n", c, b);   /* deadbeef deadbeef */
 974   b=0xdeadbeef, c=0, hashlittle2("", 0, &c, &b);
 975   printf("hash is %.8lx %.8lx\n", c, b);   /* bd5b7dde deadbeef */
 976   b=0xdeadbeef, c=0xdeadbeef, hashlittle2("", 0, &c, &b);
 977   printf("hash is %.8lx %.8lx\n", c, b);   /* 9c093ccd bd5b7dde */
 978   b=0, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
 979   printf("hash is %.8lx %.8lx\n", c, b);   /* 17770551 ce7226e6 */
 980   b=1, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
 981   printf("hash is %.8lx %.8lx\n", c, b);   /* e3607cae bd371de4 */
 982   b=0, c=1, hashlittle2("Four score and seven years ago", 30, &c, &b);
 983   printf("hash is %.8lx %.8lx\n", c, b);   /* cd628161 6cbea4b3 */
 984   c = hashlittle("Four score and seven years ago", 30, 0);
 985   printf("hash is %.8lx\n", c);   /* 17770551 */
 986   c = hashlittle("Four score and seven years ago", 30, 1);
 987   printf("hash is %.8lx\n", c);   /* cd628161 */
 988 }
 989
 990
 991 int main()
 992 {
 993   driver1();   /* test that the key is hashed: used for timings */
 994   driver2();   /* test that whole key is hashed thoroughly */
 995   driver3();   /* test that nothing but the key is hashed */
 996   driver4();   /* test hashing multiple buffers (all buffers are null) */
 997   driver5();   /* test the hash against known vectors */
 998   return 1;
 999 }
1000
1001 #endif  /* SELF_TEST */