bsd/net/flowhash.c

   1 /*
   2  * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 /*
  30  * http://code.google.com/p/smhasher/
  31  *
  32  * Copyright (c) 2009-2011 Austin Appleby.
  33  *
  34  * MurmurHash3 was written by Austin Appleby, and is placed in the public
  35  * domain. The author hereby disclaims copyright to this source code.
  36  */
  37
  38 /*
  39  * http://burtleburtle.net/bob/hash/
  40  *
  41  * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
  42  *
  43  * You can use this free for any purpose.  It's in the public domain.
  44  * It has no warranty.
  45  */
  46
  47 #include <stdbool.h>
  48 #include <sys/types.h>
  49 #include <machine/endian.h>
  50 #include <net/flowhash.h>
  51
  52 static inline u_int32_t getblock32(const u_int32_t *, int);
  53 static inline u_int64_t getblock64(const u_int64_t *, int);
  54 static inline u_int32_t mh3_fmix32(u_int32_t);
  55 static inline u_int64_t mh3_fmix64(u_int64_t);
  56
  57 #define ALIGNED16(v)    ((((uintptr_t)(v)) & 1) == 0)
  58 #define ALIGNED32(v)    ((((uintptr_t)(v)) & 3) == 0)
  59 #define ALIGNED64(v)    ((((uintptr_t)(v)) & 7) == 0)
  60
  61 #define ROTL32(x, r)    (((x) << (r)) | ((x) >> (32 - (r))))
  62 #define ROTL64(x, r)    (((x) << (r)) | ((x) >> (64 - (r))))
  63
  64 /*
  65  * The following hash algorithms are selected based on performance:
  66  *
  67  * Intel 32-bit:        MurmurHash3_x86_32
  68  * Intel 64-bit:        MurmurHash3_x64_128
  69  * ARM, et al:          JHash
  70  */
  71 #if   defined(__x86_64__)
  72 net_flowhash_fn_t *net_flowhash = net_flowhash_mh3_x64_128;
  73 #else /* !__i386__ && !__x86_64__ */
  74 net_flowhash_fn_t *net_flowhash = net_flowhash_jhash;
  75 #endif /* !__i386__ && !__x86_64__ */
  76
  77 #if defined(__i386__) || defined(__x86_64__)
  78 static inline u_int32_t
  79 getblock32(const u_int32_t *p, int i)
  80 {
  81         return (p[i]);
  82 }
  83
  84 static inline u_int64_t
  85 getblock64(const u_int64_t *p, int i)
  86 {
  87         return (p[i]);
  88 }
  89 #else /* !__i386__ && !__x86_64 */
  90 static inline u_int32_t
  91 getblock32(const u_int32_t *p, int i)
  92 {
  93         const u_int8_t *bytes = (u_int8_t *)(void *)(uintptr_t)(p + i);
  94         u_int32_t value;
  95
  96         if (ALIGNED32(p)) {
  97                 value = p[i];
  98         } else {
  99 #if BYTE_ORDER == BIG_ENDIAN
 100                 value =
 101                     (((u_int32_t)bytes[0]) << 24) |
 102                     (((u_int32_t)bytes[1]) << 16) |
 103                     (((u_int32_t)bytes[2]) << 8) |
 104                     ((u_int32_t)bytes[3]);
 105 #else /* LITTLE_ENDIAN */
 106                 value =
 107                     (((u_int32_t)bytes[3]) << 24) |
 108                     (((u_int32_t)bytes[2]) << 16) |
 109                     (((u_int32_t)bytes[1]) << 8) |
 110                     ((u_int32_t)bytes[0]);
 111 #endif /* LITTLE_ENDIAN */
 112         }
 113         return (value);
 114 }
 115
 116 static inline u_int64_t
 117 getblock64(const u_int64_t *p, int i)
 118 {
 119         const u_int8_t *bytes = (const u_int8_t *)(void *)(uintptr_t)(p + i);
 120         u_int64_t value;
 121
 122         if (ALIGNED64(p)) {
 123                 value = p[i];
 124         } else {
 125 #if BYTE_ORDER == BIG_ENDIAN
 126                 value =
 127                     (((u_int64_t)bytes[0]) << 56) |
 128                     (((u_int64_t)bytes[1]) << 48) |
 129                     (((u_int64_t)bytes[2]) << 40) |
 130                     (((u_int64_t)bytes[3]) << 32) |
 131                     (((u_int64_t)bytes[4]) << 24) |
 132                     (((u_int64_t)bytes[5]) << 16) |
 133                     (((u_int64_t)bytes[6]) << 8) |
 134                     ((u_int64_t)bytes[7]);
 135 #else /* LITTLE_ENDIAN */
 136                 value =
 137                     (((u_int64_t)bytes[7]) << 56) |
 138                     (((u_int64_t)bytes[6]) << 48) |
 139                     (((u_int64_t)bytes[5]) << 40) |
 140                     (((u_int64_t)bytes[4]) << 32) |
 141                     (((u_int64_t)bytes[3]) << 24) |
 142                     (((u_int64_t)bytes[2]) << 16) |
 143                     (((u_int64_t)bytes[1]) << 8) |
 144                     ((u_int64_t)bytes[0]);
 145 #endif /* LITTLE_ENDIAN */
 146         }
 147         return (value);
 148 }
 149 #endif /* !__i386__ && !__x86_64 */
 150
 151 static inline u_int32_t
 152 mh3_fmix32(u_int32_t h)
 153 {
 154         h ^= h >> 16;
 155         h *= 0x85ebca6b;
 156         h ^= h >> 13;
 157         h *= 0xc2b2ae35;
 158         h ^= h >> 16;
 159
 160         return (h);
 161 }
 162
 163 static inline u_int64_t
 164 mh3_fmix64(u_int64_t k)
 165 {
 166         k ^= k >> 33;
 167         k *= 0xff51afd7ed558ccdLLU;
 168         k ^= k >> 33;
 169         k *= 0xc4ceb9fe1a85ec53LLU;
 170         k ^= k >> 33;
 171
 172         return (k);
 173 }
 174
 175 /*
 176  * MurmurHash3_x86_32
 177  */
 178 #define MH3_X86_32_C1   0xcc9e2d51
 179 #define MH3_X86_32_C2   0x1b873593
 180
 181 u_int32_t
 182 net_flowhash_mh3_x86_32(const void *key, u_int32_t len, const u_int32_t seed)
 183 {
 184         const u_int8_t *data = (const u_int8_t *)key;
 185         const u_int32_t nblocks = len / 4;
 186         const u_int32_t *blocks;
 187         const u_int8_t *tail;
 188         u_int32_t h1 = seed, k1;
 189         int i;
 190
 191         /* body */
 192         blocks = (const u_int32_t *)(const void *)(data + nblocks * 4);
 193
 194         for (i = -nblocks; i; i++) {
 195                 k1 = getblock32(blocks, i);
 196
 197                 k1 *= MH3_X86_32_C1;
 198                 k1 = ROTL32(k1, 15);
 199                 k1 *= MH3_X86_32_C2;
 200
 201                 h1 ^= k1;
 202                 h1 = ROTL32(h1, 13);
 203                 h1 = h1 * 5 + 0xe6546b64;
 204         }
 205
 206         /* tail */
 207         tail = (const u_int8_t *)(const void *)(data + nblocks * 4);
 208         k1 = 0;
 209
 210         switch (len & 3) {
 211         case 3:
 212                 k1 ^= tail[2] << 16;
 213                 /* FALLTHRU */
 214         case 2:
 215                 k1 ^= tail[1] << 8;
 216                 /* FALLTHRU */
 217         case 1:
 218                 k1 ^= tail[0];
 219                 k1 *= MH3_X86_32_C1;
 220                 k1 = ROTL32(k1, 15);
 221                 k1 *= MH3_X86_32_C2;
 222                 h1 ^= k1;
 223         };
 224
 225         /* finalization */
 226         h1 ^= len;
 227
 228         h1 = mh3_fmix32(h1);
 229
 230         return (h1);
 231 }
 232
 233 /*
 234  * MurmurHash3_x64_128
 235  */
 236 #define MH3_X64_128_C1  0x87c37b91114253d5LLU
 237 #define MH3_X64_128_C2  0x4cf5ad432745937fLLU
 238
 239 u_int32_t
 240 net_flowhash_mh3_x64_128(const void *key, u_int32_t len, const u_int32_t seed)
 241 {
 242         const u_int8_t *data = (const u_int8_t *)key;
 243         const u_int32_t nblocks = len / 16;
 244         const u_int64_t *blocks;
 245         const u_int8_t *tail;
 246         u_int64_t h1 = seed, k1;
 247         u_int64_t h2 = seed, k2;
 248         u_int32_t i;
 249
 250         /* body */
 251         blocks = (const u_int64_t *)(const void *)data;
 252
 253         for (i = 0; i < nblocks; i++) {
 254                 k1 = getblock64(blocks, i * 2 + 0);
 255                 k2 = getblock64(blocks, i * 2 + 1);
 256
 257                 k1 *= MH3_X64_128_C1;
 258                 k1 = ROTL64(k1, 31);
 259                 k1 *= MH3_X64_128_C2;
 260                 h1 ^= k1;
 261
 262                 h1 = ROTL64(h1, 27);
 263                 h1 += h2;
 264                 h1 = h1 * 5 + 0x52dce729;
 265
 266                 k2 *= MH3_X64_128_C2;
 267                 k2 = ROTL64(k2, 33);
 268                 k2 *= MH3_X64_128_C1;
 269                 h2 ^= k2;
 270
 271                 h2 = ROTL64(h2, 31);
 272                 h2 += h1;
 273                 h2 = h2 * 5+ 0x38495ab5;
 274         }
 275
 276         /* tail */
 277         tail = (const u_int8_t *)(const void *)(data + nblocks * 16);
 278         k1 = 0;
 279         k2 = 0;
 280
 281         switch (len & 15) {
 282         case 15:
 283                 k2 ^= ((u_int64_t)tail[14]) << 48;
 284                 /* FALLTHRU */
 285         case 14:
 286                 k2 ^= ((u_int64_t)tail[13]) << 40;
 287                 /* FALLTHRU */
 288         case 13:
 289                 k2 ^= ((u_int64_t)tail[12]) << 32;
 290                 /* FALLTHRU */
 291         case 12:
 292                 k2 ^= ((u_int64_t)tail[11]) << 24;
 293                 /* FALLTHRU */
 294         case 11:
 295                 k2 ^= ((u_int64_t)tail[10]) << 16;
 296                 /* FALLTHRU */
 297         case 10:
 298                 k2 ^= ((u_int64_t)tail[9]) << 8;
 299                 /* FALLTHRU */
 300         case 9:
 301                 k2 ^= ((u_int64_t)tail[8]) << 0;
 302                 k2 *= MH3_X64_128_C2;
 303                 k2 = ROTL64(k2, 33);
 304                 k2 *= MH3_X64_128_C1;
 305                 h2 ^= k2;
 306                 /* FALLTHRU */
 307         case 8:
 308                 k1 ^= ((u_int64_t)tail[7]) << 56;
 309                 /* FALLTHRU */
 310         case 7:
 311                 k1 ^= ((u_int64_t)tail[6]) << 48;
 312                 /* FALLTHRU */
 313         case 6:
 314                 k1 ^= ((u_int64_t)tail[5]) << 40;
 315                 /* FALLTHRU */
 316         case 5:
 317                 k1 ^= ((u_int64_t)tail[4]) << 32;
 318                 /* FALLTHRU */
 319         case 4:
 320                 k1 ^= ((u_int64_t)tail[3]) << 24;
 321                 /* FALLTHRU */
 322         case 3:
 323                 k1 ^= ((u_int64_t)tail[2]) << 16;
 324                 /* FALLTHRU */
 325         case 2:
 326                 k1 ^= ((u_int64_t)tail[1]) << 8;
 327                 /* FALLTHRU */
 328         case 1:
 329                 k1 ^= ((u_int64_t)tail[0]) << 0;
 330                 k1 *= MH3_X64_128_C1;
 331                 k1 = ROTL64(k1, 31);
 332                 k1 *= MH3_X64_128_C2;
 333                 h1 ^= k1;
 334         };
 335
 336         /* finalization */
 337         h1 ^= len;
 338         h2 ^= len;
 339
 340         h1 += h2;
 341         h2 += h1;
 342
 343         h1 = mh3_fmix64(h1);
 344         h2 = mh3_fmix64(h2);
 345
 346         h1 += h2;
 347         h2 += h1;
 348
 349         /* throw all but lowest 32-bit */
 350         return (h1 & 0xffffffff);
 351 }
 352
 353 #define JHASH_INIT      0xdeadbeef
 354
 355 #define JHASH_MIX(a, b, c) {                    \
 356         a -= c;  a ^= ROTL32(c, 4);   c += b;   \
 357         b -= a;  b ^= ROTL32(a, 6);   a += c;   \
 358         c -= b;  c ^= ROTL32(b, 8);   b += a;   \
 359         a -= c;  a ^= ROTL32(c, 16);  c += b;   \
 360         b -= a;  b ^= ROTL32(a, 19);  a += c;   \
 361         c -= b;  c ^= ROTL32(b, 4);   b += a;   \
 362 }
 363
 364 #define JHASH_FINAL(a, b, c) {                  \
 365         c ^= b;  c -= ROTL32(b, 14);            \
 366         a ^= c;  a -= ROTL32(c, 11);            \
 367         b ^= a;  b -= ROTL32(a, 25);            \
 368         c ^= b;  c -= ROTL32(b, 16);            \
 369         a ^= c;  a -= ROTL32(c, 4);             \
 370         b ^= a;  b -= ROTL32(a, 14);            \
 371         c ^= b;  c -= ROTL32(b, 24);            \
 372 }
 373
 374 #if BYTE_ORDER == BIG_ENDIAN
 375 /*
 376  * hashbig()
 377  */
 378 u_int32_t
 379 net_flowhash_jhash(const void *key, u_int32_t len, const u_int32_t seed)
 380 {
 381         u_int32_t a, b, c;
 382
 383         /* Set up the internal state */
 384         a = b = c = JHASH_INIT + len + seed;
 385
 386         if (ALIGNED32(key)) {
 387                 /* read 32-bit chunks */
 388                 const u_int32_t *k = (const u_int32_t *)key;
 389
 390                 /*
 391                  * all but last block:
 392                  * aligned reads and affect 32 bits of (a,b,c)
 393                  */
 394                 while (len > 12) {
 395                         a += k[0];
 396                         b += k[1];
 397                         c += k[2];
 398                         JHASH_MIX(a, b, c);
 399                         len -= 12;
 400                         k += 3;
 401                 }
 402
 403                 /*
 404                  * handle the last (probably partial) block
 405                  *
 406                  * "k[2] << 8" actually reads beyond the end of the string,
 407                  * but then shifts out the part it's not allowed to read.
 408                  * Because the string is aligned, the illegal read is in
 409                  * the same word as the rest of the string.  The masking
 410                  * trick does make the hash noticably faster for short
 411                  * strings (like English words).
 412                  */
 413                 switch (len) {
 414                 case 12:
 415                         c += k[2];
 416                         b += k[1];
 417                         a += k[0];
 418                         break;
 419
 420                 case 11:
 421                         c += k[2] & 0xffffff00;
 422                         b += k[1];
 423                         a += k[0];
 424                         break;
 425
 426                 case 10:
 427                         c += k[2] & 0xffff0000;
 428                         b += k[1];
 429                         a += k[0];
 430                         break;
 431
 432                 case 9:
 433                         c += k[2] & 0xff000000;
 434                         b += k[1];
 435                         a += k[0];
 436                         break;
 437
 438                 case 8:
 439                         b += k[1];
 440                         a += k[0];
 441                         break;
 442
 443                 case 7:
 444                         b += k[1] & 0xffffff00;
 445                         a += k[0];
 446                         break;
 447
 448                 case 6:
 449                         b += k[1] & 0xffff0000;
 450                         a += k[0];
 451                         break;
 452
 453                 case 5:
 454                         b += k[1] & 0xff000000;
 455                         a += k[0];
 456                         break;
 457
 458                 case 4:
 459                         a += k[0];
 460                         break;
 461
 462                 case 3:
 463                         a += k[0] & 0xffffff00;
 464                         break;
 465
 466                 case 2:
 467                         a += k[0] & 0xffff0000;
 468                         break;
 469
 470                 case 1:
 471                         a += k[0] & 0xff000000;
 472                         break;
 473
 474                 case 0:
 475                         /* zero length requires no mixing */
 476                         return (c);
 477                 }
 478
 479                 JHASH_FINAL(a, b, c);
 480
 481                 return (c);
 482         }
 483
 484         /* need to read the key one byte at a time */
 485         const u_int8_t *k = (const u_int8_t *)key;
 486
 487         /* all but the last block: affect some 32 bits of (a,b,c) */
 488         while (len > 12) {
 489                 a += ((u_int32_t)k[0]) << 24;
 490                 a += ((u_int32_t)k[1]) << 16;
 491                 a += ((u_int32_t)k[2]) << 8;
 492                 a += ((u_int32_t)k[3]);
 493                 b += ((u_int32_t)k[4]) << 24;
 494                 b += ((u_int32_t)k[5]) << 16;
 495                 b += ((u_int32_t)k[6]) << 8;
 496                 b += ((u_int32_t)k[7]);
 497                 c += ((u_int32_t)k[8]) << 24;
 498                 c += ((u_int32_t)k[9]) << 16;
 499                 c += ((u_int32_t)k[10]) << 8;
 500                 c += ((u_int32_t)k[11]);
 501                 JHASH_MIX(a, b, c);
 502                 len -= 12;
 503                 k += 12;
 504         }
 505
 506         /* last block: affect all 32 bits of (c) */
 507         switch (len) {
 508         case 12:
 509                 c += k[11];
 510                 /* FALLTHRU */
 511         case 11:
 512                 c += ((u_int32_t)k[10]) << 8;
 513                 /* FALLTHRU */
 514         case 10:
 515                 c += ((u_int32_t)k[9]) << 16;
 516                 /* FALLTHRU */
 517         case 9:
 518                 c += ((u_int32_t)k[8]) << 24;
 519                 /* FALLTHRU */
 520         case 8:
 521                 b += k[7];
 522                 /* FALLTHRU */
 523         case 7:
 524                 b += ((u_int32_t)k[6]) << 8;
 525                 /* FALLTHRU */
 526         case 6:
 527                 b += ((u_int32_t)k[5]) << 16;
 528                 /* FALLTHRU */
 529         case 5:
 530                 b += ((u_int32_t)k[4]) << 24;
 531                 /* FALLTHRU */
 532         case 4:
 533                 a += k[3];
 534                 /* FALLTHRU */
 535         case 3:
 536                 a += ((u_int32_t)k[2]) << 8;
 537                 /* FALLTHRU */
 538         case 2:
 539                 a += ((u_int32_t)k[1]) << 16;
 540                 /* FALLTHRU */
 541         case 1:
 542                 a += ((u_int32_t)k[0]) << 24;
 543                 break;
 544
 545         case 0:
 546                 /* zero length requires no mixing */
 547                 return (c);
 548         }
 549
 550         JHASH_FINAL(a, b, c);
 551
 552         return (c);
 553 }
 554 #else /* LITTLE_ENDIAN */
 555 /*
 556  * hashlittle()
 557  */
 558 u_int32_t
 559 net_flowhash_jhash(const void *key, u_int32_t len, const u_int32_t seed)
 560 {
 561         u_int32_t a, b, c;
 562
 563         /* Set up the internal state */
 564         a = b = c = JHASH_INIT + len + seed;
 565
 566 #if defined(__i386__) || defined(__x86_64__)
 567         /*
 568          * On i386/x86_64, it is faster to read 32-bit chunks if the key
 569          * is aligned 32-bit OR not 16-bit, and perform 16-bit reads if it
 570          * is aligned 16-bit.
 571          */
 572         if (ALIGNED32(key) || !ALIGNED16(key)) {
 573 #else /* !defined(__i386__) && !defined(__x86_64__) */
 574         if (ALIGNED32(key)) {
 575 #endif /* !defined(__i386__) && !defined(__x86_64__) */
 576                 /* read 32-bit chunks */
 577                 const u_int32_t *k = (const u_int32_t *)key;
 578
 579                 /*
 580                  * all but last block:
 581                  * aligned reads and affect 32 bits of (a,b,c)
 582                  */
 583                 while (len > 12) {
 584                         a += k[0];
 585                         b += k[1];
 586                         c += k[2];
 587                         JHASH_MIX(a, b, c);
 588                         len -= 12;
 589                         k += 3;
 590                 }
 591
 592                 /*
 593                  * handle the last (probably partial) block
 594                  *
 595                  * "k[2] & 0xffffff" actually reads beyond the end of the
 596                  * string, but then masks off the part it's not allowed
 597                  * to read.  Because the string is aligned, the masked-off
 598                  * tail is in the same word as the rest of the string.
 599                  * The masking trick does make the hash noticably faster
 600                  * for short strings (like English words).
 601                  */
 602                 switch (len) {
 603                 case 12:
 604                         c += k[2];
 605                         b += k[1];
 606                         a += k[0];
 607                         break;
 608
 609                 case 11:
 610                         c += k[2] & 0xffffff;
 611                         b += k[1];
 612                         a += k[0];
 613                         break;
 614
 615                 case 10:
 616                         c += k[2] & 0xffff;
 617                         b += k[1];
 618                         a += k[0];
 619                         break;
 620
 621                 case 9:
 622                         c += k[2] & 0xff;
 623                         b += k[1];
 624                         a += k[0];
 625                         break;
 626
 627                 case 8:
 628                         b += k[1];
 629                         a += k[0];
 630                         break;
 631
 632                 case 7:
 633                         b += k[1] & 0xffffff;
 634                         a += k[0];
 635                         break;
 636
 637                 case 6:
 638                         b += k[1] & 0xffff;
 639                         a += k[0];
 640                         break;
 641
 642                 case 5:
 643                         b += k[1] & 0xff;
 644                         a += k[0];
 645                         break;
 646
 647                 case 4:
 648                         a += k[0];
 649                         break;
 650
 651                 case 3:
 652                         a += k[0] & 0xffffff;
 653                         break;
 654
 655                 case 2:
 656                         a += k[0] & 0xffff;
 657                         break;
 658
 659                 case 1:
 660                         a += k[0] & 0xff;
 661                         break;
 662
 663                 case 0:
 664                         /* zero length requires no mixing */
 665                         return (c);
 666                 }
 667
 668                 JHASH_FINAL(a, b, c);
 669
 670                 return (c);
 671         }
 672 #if !defined(__i386__) && !defined(__x86_64__)
 673         else if (ALIGNED16(key)) {
 674 #endif /* !defined(__i386__) && !defined(__x86_64__) */
 675                 /* read 16-bit chunks */
 676                 const u_int16_t *k = (const u_int16_t *)key;
 677                 const u_int8_t *k8;
 678
 679                 /* all but last block: aligned reads and different mixing */
 680                 while (len > 12) {
 681                         a += k[0] + (((u_int32_t)k[1]) << 16);
 682                         b += k[2] + (((u_int32_t)k[3]) << 16);
 683                         c += k[4] + (((u_int32_t)k[5]) << 16);
 684                         JHASH_MIX(a, b, c);
 685                         len -= 12;
 686                         k += 6;
 687                 }
 688
 689                 /* handle the last (probably partial) block */
 690                 k8 = (const u_int8_t *)k;
 691                 switch (len) {
 692                 case 12:
 693                         c += k[4] + (((u_int32_t)k[5]) << 16);
 694                         b += k[2] + (((u_int32_t)k[3]) << 16);
 695                         a += k[0] + (((u_int32_t)k[1]) << 16);
 696                         break;
 697
 698                 case 11:
 699                         c += ((u_int32_t)k8[10]) << 16;
 700                         /* FALLTHRU */
 701                 case 10:
 702                         c += k[4];
 703                         b += k[2] + (((u_int32_t)k[3]) << 16);
 704                         a += k[0] + (((u_int32_t)k[1]) << 16);
 705                         break;
 706
 707                 case 9:
 708                         c += k8[8];
 709                         /* FALLTHRU */
 710                 case 8:
 711                         b += k[2] + (((u_int32_t)k[3]) << 16);
 712                         a += k[0] + (((u_int32_t)k[1]) << 16);
 713                         break;
 714
 715                 case 7:
 716                         b += ((u_int32_t)k8[6]) << 16;
 717                         /* FALLTHRU */
 718                 case 6:
 719                         b += k[2];
 720                         a += k[0] + (((u_int32_t)k[1]) << 16);
 721                         break;
 722
 723                 case 5:
 724                         b += k8[4];
 725                         /* FALLTHRU */
 726                 case 4:
 727                         a += k[0] + (((u_int32_t)k[1]) << 16);
 728                         break;
 729
 730                 case 3:
 731                         a += ((u_int32_t)k8[2]) << 16;
 732                         /* FALLTHRU */
 733                 case 2:
 734                         a += k[0];
 735                         break;
 736
 737                 case 1:
 738                         a += k8[0];
 739                         break;
 740
 741                 case 0:
 742                         /* zero length requires no mixing */
 743                         return (c);
 744                 }
 745
 746                 JHASH_FINAL(a, b, c);
 747
 748                 return (c);
 749 #if !defined(__i386__) && !defined(__x86_64__)
 750         }
 751
 752         /* need to read the key one byte at a time */
 753         const u_int8_t *k = (const u_int8_t *)key;
 754
 755         /* all but the last block: affect some 32 bits of (a,b,c) */
 756         while (len > 12) {
 757                 a += k[0];
 758                 a += ((u_int32_t)k[1]) << 8;
 759                 a += ((u_int32_t)k[2]) << 16;
 760                 a += ((u_int32_t)k[3]) << 24;
 761                 b += k[4];
 762                 b += ((u_int32_t)k[5]) << 8;
 763                 b += ((u_int32_t)k[6]) << 16;
 764                 b += ((u_int32_t)k[7]) << 24;
 765                 c += k[8];
 766                 c += ((u_int32_t)k[9]) << 8;
 767                 c += ((u_int32_t)k[10]) << 16;
 768                 c += ((u_int32_t)k[11]) << 24;
 769                 JHASH_MIX(a, b, c);
 770                 len -= 12;
 771                 k += 12;
 772         }
 773
 774         /* last block: affect all 32 bits of (c) */
 775         switch (len) {
 776         case 12:
 777                 c += ((u_int32_t)k[11]) << 24;
 778                 /* FALLTHRU */
 779         case 11:
 780                 c += ((u_int32_t)k[10]) << 16;
 781                 /* FALLTHRU */
 782         case 10:
 783                 c += ((u_int32_t)k[9]) << 8;
 784                 /* FALLTHRU */
 785         case 9:
 786                 c += k[8];
 787                 /* FALLTHRU */
 788         case 8:
 789                 b += ((u_int32_t)k[7]) << 24;
 790                 /* FALLTHRU */
 791         case 7:
 792                 b += ((u_int32_t)k[6]) << 16;
 793                 /* FALLTHRU */
 794         case 6:
 795                 b += ((u_int32_t)k[5]) << 8;
 796                 /* FALLTHRU */
 797         case 5:
 798                 b += k[4];
 799                 /* FALLTHRU */
 800         case 4:
 801                 a += ((u_int32_t)k[3]) << 24;
 802                 /* FALLTHRU */
 803         case 3:
 804                 a += ((u_int32_t)k[2]) << 16;
 805                 /* FALLTHRU */
 806         case 2:
 807                 a += ((u_int32_t)k[1]) << 8;
 808                 /* FALLTHRU */
 809         case 1:
 810                 a += k[0];
 811                 break;
 812
 813         case 0:
 814                 /* zero length requires no mixing */
 815                 return (c);
 816         }
 817
 818         JHASH_FINAL(a, b, c);
 819
 820         return (c);
 821 #endif /* !defined(__i386__) && !defined(__x86_64__) */
 822 }
 823 #endif /* LITTLE_ENDIAN */