bsd/net/flowhash.c

   1 /*
   2  * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 /*
  30  * http://code.google.com/p/smhasher/
  31  *
  32  * Copyright (c) 2009-2011 Austin Appleby.
  33  *
  34  * MurmurHash3 was written by Austin Appleby, and is placed in the public
  35  * domain. The author hereby disclaims copyright to this source code.
  36  */
  37
  38 /*
  39  * http://burtleburtle.net/bob/hash/
  40  *
  41  * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
  42  *
  43  * You can use this free for any purpose.  It's in the public domain.
  44  * It has no warranty.
  45  */
  46
  47 #include <stdbool.h>
  48 #include <sys/types.h>
  49 #include <machine/endian.h>
  50 #include <net/flowhash.h>
  51
  52 static inline u_int32_t getblock32(const u_int32_t *, int);
  53 static inline u_int64_t getblock64(const u_int64_t *, int);
  54 static inline u_int32_t mh3_fmix32(u_int32_t);
  55 static inline u_int64_t mh3_fmix64(u_int64_t);
  56
  57 #define ALIGNED16(v)    ((((uintptr_t)(v)) & 1) == 0)
  58 #define ALIGNED32(v)    ((((uintptr_t)(v)) & 3) == 0)
  59 #define ALIGNED64(v)    ((((uintptr_t)(v)) & 7) == 0)
  60
  61 #define ROTL32(x, r)    (((x) << (r)) | ((x) >> (32 - (r))))
  62 #define ROTL64(x, r)    (((x) << (r)) | ((x) >> (64 - (r))))
  63
  64 /*
  65  * The following hash algorithms are selected based on performance:
  66  *
  67  * 64-bit:      MurmurHash3_x64_128
  68  * 32-bit:      JHash
  69  */
  70 #if   defined(__LP64__)
  71 net_flowhash_fn_t *net_flowhash = net_flowhash_mh3_x64_128;
  72 #else /* !__LP64__ */
  73 net_flowhash_fn_t *net_flowhash = net_flowhash_jhash;
  74 #endif /* !__LP64__ */
  75
  76 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
  77 static inline u_int32_t
  78 getblock32(const u_int32_t *p, int i)
  79 {
  80         return p[i];
  81 }
  82
  83 static inline u_int64_t
  84 getblock64(const u_int64_t *p, int i)
  85 {
  86         return p[i];
  87 }
  88 #else /* !__i386__ && !__x86_64__ && !__arm64__*/
  89 static inline u_int32_t
  90 getblock32(const u_int32_t *p, int i)
  91 {
  92         const u_int8_t *bytes = (u_int8_t *)(void *)(uintptr_t)(p + i);
  93         u_int32_t value;
  94
  95         if (ALIGNED32(p)) {
  96                 value = p[i];
  97         } else {
  98 #if BYTE_ORDER == BIG_ENDIAN
  99                 value =
 100                     (((u_int32_t)bytes[0]) << 24) |
 101                     (((u_int32_t)bytes[1]) << 16) |
 102                     (((u_int32_t)bytes[2]) << 8) |
 103                     ((u_int32_t)bytes[3]);
 104 #else /* LITTLE_ENDIAN */
 105                 value =
 106                     (((u_int32_t)bytes[3]) << 24) |
 107                     (((u_int32_t)bytes[2]) << 16) |
 108                     (((u_int32_t)bytes[1]) << 8) |
 109                     ((u_int32_t)bytes[0]);
 110 #endif /* LITTLE_ENDIAN */
 111         }
 112         return value;
 113 }
 114
 115 static inline u_int64_t
 116 getblock64(const u_int64_t *p, int i)
 117 {
 118         const u_int8_t *bytes = (const u_int8_t *)(void *)(uintptr_t)(p + i);
 119         u_int64_t value;
 120
 121         if (ALIGNED64(p)) {
 122                 value = p[i];
 123         } else {
 124 #if BYTE_ORDER == BIG_ENDIAN
 125                 value =
 126                     (((u_int64_t)bytes[0]) << 56) |
 127                     (((u_int64_t)bytes[1]) << 48) |
 128                     (((u_int64_t)bytes[2]) << 40) |
 129                     (((u_int64_t)bytes[3]) << 32) |
 130                     (((u_int64_t)bytes[4]) << 24) |
 131                     (((u_int64_t)bytes[5]) << 16) |
 132                     (((u_int64_t)bytes[6]) << 8) |
 133                     ((u_int64_t)bytes[7]);
 134 #else /* LITTLE_ENDIAN */
 135                 value =
 136                     (((u_int64_t)bytes[7]) << 56) |
 137                     (((u_int64_t)bytes[6]) << 48) |
 138                     (((u_int64_t)bytes[5]) << 40) |
 139                     (((u_int64_t)bytes[4]) << 32) |
 140                     (((u_int64_t)bytes[3]) << 24) |
 141                     (((u_int64_t)bytes[2]) << 16) |
 142                     (((u_int64_t)bytes[1]) << 8) |
 143                     ((u_int64_t)bytes[0]);
 144 #endif /* LITTLE_ENDIAN */
 145         }
 146         return value;
 147 }
 148 #endif /* !__i386__ && !__x86_64 && !__arm64__ */
 149
 150 static inline u_int32_t
 151 mh3_fmix32(u_int32_t h)
 152 {
 153         h ^= h >> 16;
 154         h *= 0x85ebca6b;
 155         h ^= h >> 13;
 156         h *= 0xc2b2ae35;
 157         h ^= h >> 16;
 158
 159         return h;
 160 }
 161
 162 static inline u_int64_t
 163 mh3_fmix64(u_int64_t k)
 164 {
 165         k ^= k >> 33;
 166         k *= 0xff51afd7ed558ccdLLU;
 167         k ^= k >> 33;
 168         k *= 0xc4ceb9fe1a85ec53LLU;
 169         k ^= k >> 33;
 170
 171         return k;
 172 }
 173
 174 /*
 175  * MurmurHash3_x86_32
 176  */
 177 #define MH3_X86_32_C1   0xcc9e2d51
 178 #define MH3_X86_32_C2   0x1b873593
 179
 180 u_int32_t
 181 net_flowhash_mh3_x86_32(const void *key, u_int32_t len, const u_int32_t seed)
 182 {
 183         const u_int8_t *data = (const u_int8_t *)key;
 184         const u_int32_t nblocks = len / 4;
 185         const u_int32_t *blocks;
 186         const u_int8_t *tail;
 187         u_int32_t h1 = seed, k1;
 188         int i;
 189
 190         /* body */
 191         blocks = (const u_int32_t *)(const void *)(data + nblocks * 4);
 192
 193         for (i = -nblocks; i; i++) {
 194                 k1 = getblock32(blocks, i);
 195
 196                 k1 *= MH3_X86_32_C1;
 197                 k1 = ROTL32(k1, 15);
 198                 k1 *= MH3_X86_32_C2;
 199
 200                 h1 ^= k1;
 201                 h1 = ROTL32(h1, 13);
 202                 h1 = h1 * 5 + 0xe6546b64;
 203         }
 204
 205         /* tail */
 206         tail = (const u_int8_t *)(const void *)(data + nblocks * 4);
 207         k1 = 0;
 208
 209         switch (len & 3) {
 210         case 3:
 211                 k1 ^= tail[2] << 16;
 212         /* FALLTHRU */
 213         case 2:
 214                 k1 ^= tail[1] << 8;
 215         /* FALLTHRU */
 216         case 1:
 217                 k1 ^= tail[0];
 218                 k1 *= MH3_X86_32_C1;
 219                 k1 = ROTL32(k1, 15);
 220                 k1 *= MH3_X86_32_C2;
 221                 h1 ^= k1;
 222         }
 223         ;
 224
 225         /* finalization */
 226         h1 ^= len;
 227
 228         h1 = mh3_fmix32(h1);
 229
 230         return h1;
 231 }
 232
 233 /*
 234  * MurmurHash3_x64_128
 235  */
 236 #define MH3_X64_128_C1  0x87c37b91114253d5LLU
 237 #define MH3_X64_128_C2  0x4cf5ad432745937fLLU
 238
 239 u_int32_t
 240 net_flowhash_mh3_x64_128(const void *key, u_int32_t len, const u_int32_t seed)
 241 {
 242         const u_int8_t *data = (const u_int8_t *)key;
 243         const u_int32_t nblocks = len / 16;
 244         const u_int64_t *blocks;
 245         const u_int8_t *tail;
 246         u_int64_t h1 = seed, k1;
 247         u_int64_t h2 = seed, k2;
 248         u_int32_t i;
 249
 250         /* body */
 251         blocks = (const u_int64_t *)(const void *)data;
 252
 253         for (i = 0; i < nblocks; i++) {
 254                 k1 = getblock64(blocks, i * 2 + 0);
 255                 k2 = getblock64(blocks, i * 2 + 1);
 256
 257                 k1 *= MH3_X64_128_C1;
 258 #if defined(__x86_64__)
 259                 __asm__ ( "rol   $31, %[k1]\n\t" :[k1] "+r" (k1) : :);
 260 #elif defined(__arm64__)
 261                 __asm__ ( "ror   %[k1], %[k1], #(64-31)\n\t" :[k1] "+r" (k1) : :);
 262 #else /* !__x86_64__ && !__arm64__ */
 263                 k1 = ROTL64(k1, 31);
 264 #endif /* !__x86_64__ && !__arm64__ */
 265                 k1 *= MH3_X64_128_C2;
 266                 h1 ^= k1;
 267
 268 #if defined(__x86_64__)
 269                 __asm__ ( "rol   $27, %[h1]\n\t" :[h1] "+r" (h1) : :);
 270 #elif defined(__arm64__)
 271                 __asm__ ( "ror   %[h1], %[h1], #(64-27)\n\t" :[h1] "+r" (h1) : :);
 272 #else /* !__x86_64__ && !__arm64__ */
 273                 h1 = ROTL64(h1, 27);
 274 #endif /* !__x86_64__ && !__arm64__ */
 275                 h1 += h2;
 276                 h1 = h1 * 5 + 0x52dce729;
 277
 278                 k2 *= MH3_X64_128_C2;
 279 #if defined(__x86_64__)
 280                 __asm__ ( "rol   $33, %[k2]\n\t" :[k2] "+r" (k2) : :);
 281 #elif defined(__arm64__)
 282                 __asm__ ( "ror   %[k2], %[k2], #(64-33)\n\t" :[k2] "+r" (k2) : :);
 283 #else /* !__x86_64__ && !__arm64__ */
 284                 k2 = ROTL64(k2, 33);
 285 #endif /* !__x86_64__ && !__arm64__ */
 286                 k2 *= MH3_X64_128_C1;
 287                 h2 ^= k2;
 288
 289 #if defined(__x86_64__)
 290                 __asm__ ( "rol   $31, %[h2]\n\t" :[h2] "+r" (h2) : :);
 291 #elif defined(__arm64__)
 292                 __asm__ ( "ror   %[h2], %[h2], #(64-31)\n\t" :[h2] "+r" (h2) : :);
 293 #else /* !__x86_64__ && !__arm64__ */
 294                 h2 = ROTL64(h2, 31);
 295 #endif /* !__x86_64__ && !__arm64__ */
 296                 h2 += h1;
 297                 h2 = h2 * 5 + 0x38495ab5;
 298         }
 299
 300         /* tail */
 301         tail = (const u_int8_t *)(const void *)(data + nblocks * 16);
 302         k1 = 0;
 303         k2 = 0;
 304
 305         switch (len & 15) {
 306         case 15:
 307                 k2 ^= ((u_int64_t)tail[14]) << 48;
 308         /* FALLTHRU */
 309         case 14:
 310                 k2 ^= ((u_int64_t)tail[13]) << 40;
 311         /* FALLTHRU */
 312         case 13:
 313                 k2 ^= ((u_int64_t)tail[12]) << 32;
 314         /* FALLTHRU */
 315         case 12:
 316                 k2 ^= ((u_int64_t)tail[11]) << 24;
 317         /* FALLTHRU */
 318         case 11:
 319                 k2 ^= ((u_int64_t)tail[10]) << 16;
 320         /* FALLTHRU */
 321         case 10:
 322                 k2 ^= ((u_int64_t)tail[9]) << 8;
 323         /* FALLTHRU */
 324         case 9:
 325                 k2 ^= ((u_int64_t)tail[8]) << 0;
 326                 k2 *= MH3_X64_128_C2;
 327 #if defined(__x86_64__)
 328                 __asm__ ( "rol   $33, %[k2]\n\t" :[k2] "+r" (k2) : :);
 329 #elif defined(__arm64__)
 330                 __asm__ ( "ror   %[k2], %[k2], #(64-33)\n\t" :[k2] "+r" (k2) : :);
 331 #else /* !__x86_64__ && !__arm64__ */
 332                 k2 = ROTL64(k2, 33);
 333 #endif /* !__x86_64__ && !__arm64__ */
 334                 k2 *= MH3_X64_128_C1;
 335                 h2 ^= k2;
 336         /* FALLTHRU */
 337         case 8:
 338                 k1 ^= ((u_int64_t)tail[7]) << 56;
 339         /* FALLTHRU */
 340         case 7:
 341                 k1 ^= ((u_int64_t)tail[6]) << 48;
 342         /* FALLTHRU */
 343         case 6:
 344                 k1 ^= ((u_int64_t)tail[5]) << 40;
 345         /* FALLTHRU */
 346         case 5:
 347                 k1 ^= ((u_int64_t)tail[4]) << 32;
 348         /* FALLTHRU */
 349         case 4:
 350                 k1 ^= ((u_int64_t)tail[3]) << 24;
 351         /* FALLTHRU */
 352         case 3:
 353                 k1 ^= ((u_int64_t)tail[2]) << 16;
 354         /* FALLTHRU */
 355         case 2:
 356                 k1 ^= ((u_int64_t)tail[1]) << 8;
 357         /* FALLTHRU */
 358         case 1:
 359                 k1 ^= ((u_int64_t)tail[0]) << 0;
 360                 k1 *= MH3_X64_128_C1;
 361 #if defined(__x86_64__)
 362                 __asm__ ( "rol   $31, %[k1]\n\t" :[k1] "+r" (k1) : :);
 363 #elif defined(__arm64__)
 364                 __asm__ ( "ror   %[k1], %[k1], #(64-31)\n\t" :[k1] "+r" (k1) : :);
 365 #else /* !__x86_64__ && !__arm64__ */
 366                 k1 = ROTL64(k1, 31);
 367 #endif /* !__x86_64__ && !__arm64__ */
 368                 k1 *= MH3_X64_128_C2;
 369                 h1 ^= k1;
 370         }
 371         ;
 372
 373         /* finalization */
 374         h1 ^= len;
 375         h2 ^= len;
 376
 377         h1 += h2;
 378         h2 += h1;
 379
 380         h1 = mh3_fmix64(h1);
 381         h2 = mh3_fmix64(h2);
 382
 383         h1 += h2;
 384         h2 += h1;
 385
 386         /* throw all but lowest 32-bit */
 387         return h1 & 0xffffffff;
 388 }
 389
 390 #define JHASH_INIT      0xdeadbeef
 391
 392 #define JHASH_MIX(a, b, c) {                    \
 393         a -= c;  a ^= ROTL32(c, 4);   c += b;   \
 394         b -= a;  b ^= ROTL32(a, 6);   a += c;   \
 395         c -= b;  c ^= ROTL32(b, 8);   b += a;   \
 396         a -= c;  a ^= ROTL32(c, 16);  c += b;   \
 397         b -= a;  b ^= ROTL32(a, 19);  a += c;   \
 398         c -= b;  c ^= ROTL32(b, 4);   b += a;   \
 399 }
 400
 401 #define JHASH_FINAL(a, b, c) {                  \
 402         c ^= b;  c -= ROTL32(b, 14);            \
 403         a ^= c;  a -= ROTL32(c, 11);            \
 404         b ^= a;  b -= ROTL32(a, 25);            \
 405         c ^= b;  c -= ROTL32(b, 16);            \
 406         a ^= c;  a -= ROTL32(c, 4);             \
 407         b ^= a;  b -= ROTL32(a, 14);            \
 408         c ^= b;  c -= ROTL32(b, 24);            \
 409 }
 410
 411 #if BYTE_ORDER == BIG_ENDIAN
 412 /*
 413  * hashbig()
 414  */
 415 u_int32_t
 416 net_flowhash_jhash(const void *key, u_int32_t len, const u_int32_t seed)
 417 {
 418         u_int32_t a, b, c;
 419
 420         /* Set up the internal state */
 421         a = b = c = JHASH_INIT + len + seed;
 422
 423         if (ALIGNED32(key)) {
 424                 /* read 32-bit chunks */
 425                 const u_int32_t *k = (const u_int32_t *)key;
 426
 427                 /*
 428                  * all but last block:
 429                  * aligned reads and affect 32 bits of (a,b,c)
 430                  */
 431                 while (len > 12) {
 432                         a += k[0];
 433                         b += k[1];
 434                         c += k[2];
 435                         JHASH_MIX(a, b, c);
 436                         len -= 12;
 437                         k += 3;
 438                 }
 439
 440                 /*
 441                  * handle the last (probably partial) block
 442                  *
 443                  * "k[2] << 8" actually reads beyond the end of the string,
 444                  * but then shifts out the part it's not allowed to read.
 445                  * Because the string is aligned, the illegal read is in
 446                  * the same word as the rest of the string.  The masking
 447                  * trick does make the hash noticably faster for short
 448                  * strings (like English words).
 449                  */
 450                 switch (len) {
 451                 case 12:
 452                         c += k[2];
 453                         b += k[1];
 454                         a += k[0];
 455                         break;
 456
 457                 case 11:
 458                         c += k[2] & 0xffffff00;
 459                         b += k[1];
 460                         a += k[0];
 461                         break;
 462
 463                 case 10:
 464                         c += k[2] & 0xffff0000;
 465                         b += k[1];
 466                         a += k[0];
 467                         break;
 468
 469                 case 9:
 470                         c += k[2] & 0xff000000;
 471                         b += k[1];
 472                         a += k[0];
 473                         break;
 474
 475                 case 8:
 476                         b += k[1];
 477                         a += k[0];
 478                         break;
 479
 480                 case 7:
 481                         b += k[1] & 0xffffff00;
 482                         a += k[0];
 483                         break;
 484
 485                 case 6:
 486                         b += k[1] & 0xffff0000;
 487                         a += k[0];
 488                         break;
 489
 490                 case 5:
 491                         b += k[1] & 0xff000000;
 492                         a += k[0];
 493                         break;
 494
 495                 case 4:
 496                         a += k[0];
 497                         break;
 498
 499                 case 3:
 500                         a += k[0] & 0xffffff00;
 501                         break;
 502
 503                 case 2:
 504                         a += k[0] & 0xffff0000;
 505                         break;
 506
 507                 case 1:
 508                         a += k[0] & 0xff000000;
 509                         break;
 510
 511                 case 0:
 512                         /* zero length requires no mixing */
 513                         return c;
 514                 }
 515
 516                 JHASH_FINAL(a, b, c);
 517
 518                 return c;
 519         }
 520
 521         /* need to read the key one byte at a time */
 522         const u_int8_t *k = (const u_int8_t *)key;
 523
 524         /* all but the last block: affect some 32 bits of (a,b,c) */
 525         while (len > 12) {
 526                 a += ((u_int32_t)k[0]) << 24;
 527                 a += ((u_int32_t)k[1]) << 16;
 528                 a += ((u_int32_t)k[2]) << 8;
 529                 a += ((u_int32_t)k[3]);
 530                 b += ((u_int32_t)k[4]) << 24;
 531                 b += ((u_int32_t)k[5]) << 16;
 532                 b += ((u_int32_t)k[6]) << 8;
 533                 b += ((u_int32_t)k[7]);
 534                 c += ((u_int32_t)k[8]) << 24;
 535                 c += ((u_int32_t)k[9]) << 16;
 536                 c += ((u_int32_t)k[10]) << 8;
 537                 c += ((u_int32_t)k[11]);
 538                 JHASH_MIX(a, b, c);
 539                 len -= 12;
 540                 k += 12;
 541         }
 542
 543         /* last block: affect all 32 bits of (c) */
 544         switch (len) {
 545         case 12:
 546                 c += k[11];
 547         /* FALLTHRU */
 548         case 11:
 549                 c += ((u_int32_t)k[10]) << 8;
 550         /* FALLTHRU */
 551         case 10:
 552                 c += ((u_int32_t)k[9]) << 16;
 553         /* FALLTHRU */
 554         case 9:
 555                 c += ((u_int32_t)k[8]) << 24;
 556         /* FALLTHRU */
 557         case 8:
 558                 b += k[7];
 559         /* FALLTHRU */
 560         case 7:
 561                 b += ((u_int32_t)k[6]) << 8;
 562         /* FALLTHRU */
 563         case 6:
 564                 b += ((u_int32_t)k[5]) << 16;
 565         /* FALLTHRU */
 566         case 5:
 567                 b += ((u_int32_t)k[4]) << 24;
 568         /* FALLTHRU */
 569         case 4:
 570                 a += k[3];
 571         /* FALLTHRU */
 572         case 3:
 573                 a += ((u_int32_t)k[2]) << 8;
 574         /* FALLTHRU */
 575         case 2:
 576                 a += ((u_int32_t)k[1]) << 16;
 577         /* FALLTHRU */
 578         case 1:
 579                 a += ((u_int32_t)k[0]) << 24;
 580                 break;
 581
 582         case 0:
 583                 /* zero length requires no mixing */
 584                 return c;
 585         }
 586
 587         JHASH_FINAL(a, b, c);
 588
 589         return c;
 590 }
 591 #else /* LITTLE_ENDIAN */
 592 /*
 593  * hashlittle()
 594  */
 595 u_int32_t
 596 net_flowhash_jhash(const void *key, u_int32_t len, const u_int32_t seed)
 597 {
 598         u_int32_t a, b, c;
 599
 600         /* Set up the internal state */
 601         a = b = c = JHASH_INIT + len + seed;
 602
 603 #if defined(__i386__) || defined(__x86_64__)
 604         /*
 605          * On i386/x86_64, it is faster to read 32-bit chunks if the key
 606          * is aligned 32-bit OR not 16-bit, and perform 16-bit reads if it
 607          * is aligned 16-bit.
 608          */
 609         if (ALIGNED32(key) || !ALIGNED16(key)) {
 610 #else /* !defined(__i386__) && !defined(__x86_64__) */
 611         if (ALIGNED32(key)) {
 612 #endif /* !defined(__i386__) && !defined(__x86_64__) */
 613                 /* read 32-bit chunks */
 614                 const u_int32_t *k = (const u_int32_t *)key;
 615
 616                 /*
 617                  * all but last block:
 618                  * aligned reads and affect 32 bits of (a,b,c)
 619                  */
 620                 while (len > 12) {
 621                         a += k[0];
 622                         b += k[1];
 623                         c += k[2];
 624                         JHASH_MIX(a, b, c);
 625                         len -= 12;
 626                         k += 3;
 627                 }
 628
 629                 /*
 630                  * handle the last (probably partial) block
 631                  *
 632                  * "k[2] & 0xffffff" actually reads beyond the end of the
 633                  * string, but then masks off the part it's not allowed
 634                  * to read.  Because the string is aligned, the masked-off
 635                  * tail is in the same word as the rest of the string.
 636                  * The masking trick does make the hash noticably faster
 637                  * for short strings (like English words).
 638                  */
 639                 switch (len) {
 640                 case 12:
 641                         c += k[2];
 642                         b += k[1];
 643                         a += k[0];
 644                         break;
 645
 646                 case 11:
 647                         c += k[2] & 0xffffff;
 648                         b += k[1];
 649                         a += k[0];
 650                         break;
 651
 652                 case 10:
 653                         c += k[2] & 0xffff;
 654                         b += k[1];
 655                         a += k[0];
 656                         break;
 657
 658                 case 9:
 659                         c += k[2] & 0xff;
 660                         b += k[1];
 661                         a += k[0];
 662                         break;
 663
 664                 case 8:
 665                         b += k[1];
 666                         a += k[0];
 667                         break;
 668
 669                 case 7:
 670                         b += k[1] & 0xffffff;
 671                         a += k[0];
 672                         break;
 673
 674                 case 6:
 675                         b += k[1] & 0xffff;
 676                         a += k[0];
 677                         break;
 678
 679                 case 5:
 680                         b += k[1] & 0xff;
 681                         a += k[0];
 682                         break;
 683
 684                 case 4:
 685                         a += k[0];
 686                         break;
 687
 688                 case 3:
 689                         a += k[0] & 0xffffff;
 690                         break;
 691
 692                 case 2:
 693                         a += k[0] & 0xffff;
 694                         break;
 695
 696                 case 1:
 697                         a += k[0] & 0xff;
 698                         break;
 699
 700                 case 0:
 701                         /* zero length requires no mixing */
 702                         return c;
 703                 }
 704
 705                 JHASH_FINAL(a, b, c);
 706
 707                 return c;
 708         }
 709 #if !defined(__i386__) && !defined(__x86_64__)
 710         else if (ALIGNED16(key)) {
 711 #endif /* !defined(__i386__) && !defined(__x86_64__) */
 712         /* read 16-bit chunks */
 713         const u_int16_t *k = (const u_int16_t *)key;
 714         const u_int8_t *k8;
 715
 716         /* all but last block: aligned reads and different mixing */
 717         while (len > 12) {
 718                 a += k[0] + (((u_int32_t)k[1]) << 16);
 719                 b += k[2] + (((u_int32_t)k[3]) << 16);
 720                 c += k[4] + (((u_int32_t)k[5]) << 16);
 721                 JHASH_MIX(a, b, c);
 722                 len -= 12;
 723                 k += 6;
 724         }
 725
 726         /* handle the last (probably partial) block */
 727         k8 = (const u_int8_t *)k;
 728         switch (len) {
 729         case 12:
 730                 c += k[4] + (((u_int32_t)k[5]) << 16);
 731                 b += k[2] + (((u_int32_t)k[3]) << 16);
 732                 a += k[0] + (((u_int32_t)k[1]) << 16);
 733                 break;
 734
 735         case 11:
 736                 c += ((u_int32_t)k8[10]) << 16;
 737         /* FALLTHRU */
 738         case 10:
 739                 c += k[4];
 740                 b += k[2] + (((u_int32_t)k[3]) << 16);
 741                 a += k[0] + (((u_int32_t)k[1]) << 16);
 742                 break;
 743
 744         case 9:
 745                 c += k8[8];
 746         /* FALLTHRU */
 747         case 8:
 748                 b += k[2] + (((u_int32_t)k[3]) << 16);
 749                 a += k[0] + (((u_int32_t)k[1]) << 16);
 750                 break;
 751
 752         case 7:
 753                 b += ((u_int32_t)k8[6]) << 16;
 754         /* FALLTHRU */
 755         case 6:
 756                 b += k[2];
 757                 a += k[0] + (((u_int32_t)k[1]) << 16);
 758                 break;
 759
 760         case 5:
 761                 b += k8[4];
 762         /* FALLTHRU */
 763         case 4:
 764                 a += k[0] + (((u_int32_t)k[1]) << 16);
 765                 break;
 766
 767         case 3:
 768                 a += ((u_int32_t)k8[2]) << 16;
 769         /* FALLTHRU */
 770         case 2:
 771                 a += k[0];
 772                 break;
 773
 774         case 1:
 775                 a += k8[0];
 776                 break;
 777
 778         case 0:
 779                 /* zero length requires no mixing */
 780                 return c;
 781         }
 782
 783         JHASH_FINAL(a, b, c);
 784
 785         return c;
 786 #if !defined(__i386__) && !defined(__x86_64__)
 787 }
 788
 789 /* need to read the key one byte at a time */
 790 const u_int8_t *k = (const u_int8_t *)key;
 791
 792 /* all but the last block: affect some 32 bits of (a,b,c) */
 793 while (len > 12) {
 794         a += k[0];
 795         a += ((u_int32_t)k[1]) << 8;
 796         a += ((u_int32_t)k[2]) << 16;
 797         a += ((u_int32_t)k[3]) << 24;
 798         b += k[4];
 799         b += ((u_int32_t)k[5]) << 8;
 800         b += ((u_int32_t)k[6]) << 16;
 801         b += ((u_int32_t)k[7]) << 24;
 802         c += k[8];
 803         c += ((u_int32_t)k[9]) << 8;
 804         c += ((u_int32_t)k[10]) << 16;
 805         c += ((u_int32_t)k[11]) << 24;
 806         JHASH_MIX(a, b, c);
 807         len -= 12;
 808         k += 12;
 809 }
 810
 811 /* last block: affect all 32 bits of (c) */
 812 switch (len) {
 813 case 12:
 814         c += ((u_int32_t)k[11]) << 24;
 815 /* FALLTHRU */
 816 case 11:
 817         c += ((u_int32_t)k[10]) << 16;
 818 /* FALLTHRU */
 819 case 10:
 820         c += ((u_int32_t)k[9]) << 8;
 821 /* FALLTHRU */
 822 case 9:
 823         c += k[8];
 824 /* FALLTHRU */
 825 case 8:
 826         b += ((u_int32_t)k[7]) << 24;
 827 /* FALLTHRU */
 828 case 7:
 829         b += ((u_int32_t)k[6]) << 16;
 830 /* FALLTHRU */
 831 case 6:
 832         b += ((u_int32_t)k[5]) << 8;
 833 /* FALLTHRU */
 834 case 5:
 835         b += k[4];
 836 /* FALLTHRU */
 837 case 4:
 838         a += ((u_int32_t)k[3]) << 24;
 839 /* FALLTHRU */
 840 case 3:
 841         a += ((u_int32_t)k[2]) << 16;
 842 /* FALLTHRU */
 843 case 2:
 844         a += ((u_int32_t)k[1]) << 8;
 845 /* FALLTHRU */
 846 case 1:
 847         a += k[0];
 848         break;
 849
 850 case 0:
 851         /* zero length requires no mixing */
 852         return c;
 853 }
 854
 855 JHASH_FINAL(a, b, c);
 856
 857 return c;
 858 #endif /* !defined(__i386__) && !defined(__x86_64__) */
 859 }
 860 #endif /* LITTLE_ENDIAN */