bsd/net/flowhash.c

   1 /*
   2  * Copyright (c) 2011-2020 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 /*
  30  * http://code.google.com/p/smhasher/
  31  *
  32  * Copyright (c) 2009-2011 Austin Appleby.
  33  *
  34  * MurmurHash3 was written by Austin Appleby, and is placed in the public
  35  * domain. The author hereby disclaims copyright to this source code.
  36  */
  37
  38 /*
  39  * http://burtleburtle.net/bob/hash/
  40  *
  41  * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
  42  *
  43  * You can use this free for any purpose.  It's in the public domain.
  44  * It has no warranty.
  45  */
  46
  47 #include <stdbool.h>
  48 #include <sys/types.h>
  49 #include <machine/endian.h>
  50 #include <net/flowhash.h>
  51 #include <os/base.h>
  52
  53 static inline u_int32_t getblock32(const u_int32_t *, int);
  54 static inline u_int64_t getblock64(const u_int64_t *, int);
  55 static inline u_int32_t mh3_fmix32(u_int32_t);
  56 static inline u_int64_t mh3_fmix64(u_int64_t);
  57
  58 #define ALIGNED16(v)    ((((uintptr_t)(v)) & 1) == 0)
  59 #define ALIGNED32(v)    ((((uintptr_t)(v)) & 3) == 0)
  60 #define ALIGNED64(v)    ((((uintptr_t)(v)) & 7) == 0)
  61
  62 #define ROTL32(x, r)    (((x) << (r)) | ((x) >> (32 - (r))))
  63 #define ROTL64(x, r)    (((x) << (r)) | ((x) >> (64 - (r))))
  64
  65 /*
  66  * The following hash algorithms are selected based on performance:
  67  *
  68  * 64-bit:      MurmurHash3_x64_128
  69  * 32-bit:      JHash
  70  */
  71 #if   defined(__LP64__)
  72 net_flowhash_fn_t *net_flowhash = net_flowhash_mh3_x64_128;
  73 #else /* !__LP64__ */
  74 net_flowhash_fn_t *net_flowhash = net_flowhash_jhash;
  75 #endif /* !__LP64__ */
  76
  77 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
  78 static inline u_int32_t
  79 getblock32(const u_int32_t *p, int i)
  80 {
  81         return p[i];
  82 }
  83
  84 static inline u_int64_t
  85 getblock64(const u_int64_t *p, int i)
  86 {
  87         return p[i];
  88 }
  89 #else /* !__i386__ && !__x86_64__ && !__arm64__*/
  90 static inline u_int32_t
  91 getblock32(const u_int32_t *p, int i)
  92 {
  93         const u_int8_t *bytes = (u_int8_t *)(void *)(uintptr_t)(p + i);
  94         u_int32_t value;
  95
  96         if (ALIGNED32(p)) {
  97                 value = p[i];
  98         } else {
  99 #if BYTE_ORDER == BIG_ENDIAN
 100                 value =
 101                     (((u_int32_t)bytes[0]) << 24) |
 102                     (((u_int32_t)bytes[1]) << 16) |
 103                     (((u_int32_t)bytes[2]) << 8) |
 104                     ((u_int32_t)bytes[3]);
 105 #else /* LITTLE_ENDIAN */
 106                 value =
 107                     (((u_int32_t)bytes[3]) << 24) |
 108                     (((u_int32_t)bytes[2]) << 16) |
 109                     (((u_int32_t)bytes[1]) << 8) |
 110                     ((u_int32_t)bytes[0]);
 111 #endif /* LITTLE_ENDIAN */
 112         }
 113         return value;
 114 }
 115
 116 static inline u_int64_t
 117 getblock64(const u_int64_t *p, int i)
 118 {
 119         const u_int8_t *bytes = (const u_int8_t *)(void *)(uintptr_t)(p + i);
 120         u_int64_t value;
 121
 122         if (ALIGNED64(p)) {
 123                 value = p[i];
 124         } else {
 125 #if BYTE_ORDER == BIG_ENDIAN
 126                 value =
 127                     (((u_int64_t)bytes[0]) << 56) |
 128                     (((u_int64_t)bytes[1]) << 48) |
 129                     (((u_int64_t)bytes[2]) << 40) |
 130                     (((u_int64_t)bytes[3]) << 32) |
 131                     (((u_int64_t)bytes[4]) << 24) |
 132                     (((u_int64_t)bytes[5]) << 16) |
 133                     (((u_int64_t)bytes[6]) << 8) |
 134                     ((u_int64_t)bytes[7]);
 135 #else /* LITTLE_ENDIAN */
 136                 value =
 137                     (((u_int64_t)bytes[7]) << 56) |
 138                     (((u_int64_t)bytes[6]) << 48) |
 139                     (((u_int64_t)bytes[5]) << 40) |
 140                     (((u_int64_t)bytes[4]) << 32) |
 141                     (((u_int64_t)bytes[3]) << 24) |
 142                     (((u_int64_t)bytes[2]) << 16) |
 143                     (((u_int64_t)bytes[1]) << 8) |
 144                     ((u_int64_t)bytes[0]);
 145 #endif /* LITTLE_ENDIAN */
 146         }
 147         return value;
 148 }
 149 #endif /* !__i386__ && !__x86_64 && !__arm64__ */
 150
 151 static inline u_int32_t
 152 mh3_fmix32(u_int32_t h)
 153 {
 154         h ^= h >> 16;
 155         h *= 0x85ebca6b;
 156         h ^= h >> 13;
 157         h *= 0xc2b2ae35;
 158         h ^= h >> 16;
 159
 160         return h;
 161 }
 162
 163 static inline u_int64_t
 164 mh3_fmix64(u_int64_t k)
 165 {
 166         k ^= k >> 33;
 167         k *= 0xff51afd7ed558ccdLLU;
 168         k ^= k >> 33;
 169         k *= 0xc4ceb9fe1a85ec53LLU;
 170         k ^= k >> 33;
 171
 172         return k;
 173 }
 174
 175 /*
 176  * MurmurHash3_x86_32
 177  */
 178 #define MH3_X86_32_C1   0xcc9e2d51
 179 #define MH3_X86_32_C2   0x1b873593
 180
 181 u_int32_t
 182 net_flowhash_mh3_x86_32(const void *key, u_int32_t len, const u_int32_t seed)
 183 {
 184         const u_int8_t *data = (const u_int8_t *)key;
 185         const u_int32_t nblocks = len / 4;
 186         const u_int32_t *blocks;
 187         const u_int8_t *tail;
 188         u_int32_t h1 = seed, k1;
 189         int i;
 190
 191         /* body */
 192         blocks = (const u_int32_t *)(const void *)(data + nblocks * 4);
 193
 194         for (i = -nblocks; i; i++) {
 195                 k1 = getblock32(blocks, i);
 196
 197                 k1 *= MH3_X86_32_C1;
 198                 k1 = ROTL32(k1, 15);
 199                 k1 *= MH3_X86_32_C2;
 200
 201                 h1 ^= k1;
 202                 h1 = ROTL32(h1, 13);
 203                 h1 = h1 * 5 + 0xe6546b64;
 204         }
 205
 206         /* tail */
 207         tail = (const u_int8_t *)(const void *)(data + nblocks * 4);
 208         k1 = 0;
 209
 210         switch (len & 3) {
 211         case 3:
 212                 k1 ^= tail[2] << 16;
 213                 OS_FALLTHROUGH;
 214         case 2:
 215                 k1 ^= tail[1] << 8;
 216                 OS_FALLTHROUGH;
 217         case 1:
 218                 k1 ^= tail[0];
 219                 k1 *= MH3_X86_32_C1;
 220                 k1 = ROTL32(k1, 15);
 221                 k1 *= MH3_X86_32_C2;
 222                 h1 ^= k1;
 223         }
 224         ;
 225
 226         /* finalization */
 227         h1 ^= len;
 228
 229         h1 = mh3_fmix32(h1);
 230
 231         return h1;
 232 }
 233
 234 /*
 235  * MurmurHash3_x64_128
 236  */
 237 #define MH3_X64_128_C1  0x87c37b91114253d5LLU
 238 #define MH3_X64_128_C2  0x4cf5ad432745937fLLU
 239
 240 u_int32_t
 241 net_flowhash_mh3_x64_128(const void *key, u_int32_t len, const u_int32_t seed)
 242 {
 243         const u_int8_t *data = (const u_int8_t *)key;
 244         const u_int32_t nblocks = len / 16;
 245         const u_int64_t *blocks;
 246         const u_int8_t *tail;
 247         u_int64_t h1 = seed, k1;
 248         u_int64_t h2 = seed, k2;
 249         u_int32_t i;
 250
 251         /* body */
 252         blocks = (const u_int64_t *)(const void *)data;
 253
 254         for (i = 0; i < nblocks; i++) {
 255                 k1 = getblock64(blocks, i * 2 + 0);
 256                 k2 = getblock64(blocks, i * 2 + 1);
 257
 258                 k1 *= MH3_X64_128_C1;
 259 #if defined(__x86_64__)
 260                 __asm__ ( "rol   $31, %[k1]\n\t" :[k1] "+r" (k1) : :);
 261 #elif defined(__arm64__)
 262                 __asm__ ( "ror   %[k1], %[k1], #(64-31)\n\t" :[k1] "+r" (k1) : :);
 263 #else /* !__x86_64__ && !__arm64__ */
 264                 k1 = ROTL64(k1, 31);
 265 #endif /* !__x86_64__ && !__arm64__ */
 266                 k1 *= MH3_X64_128_C2;
 267                 h1 ^= k1;
 268
 269 #if defined(__x86_64__)
 270                 __asm__ ( "rol   $27, %[h1]\n\t" :[h1] "+r" (h1) : :);
 271 #elif defined(__arm64__)
 272                 __asm__ ( "ror   %[h1], %[h1], #(64-27)\n\t" :[h1] "+r" (h1) : :);
 273 #else /* !__x86_64__ && !__arm64__ */
 274                 h1 = ROTL64(h1, 27);
 275 #endif /* !__x86_64__ && !__arm64__ */
 276                 h1 += h2;
 277                 h1 = h1 * 5 + 0x52dce729;
 278
 279                 k2 *= MH3_X64_128_C2;
 280 #if defined(__x86_64__)
 281                 __asm__ ( "rol   $33, %[k2]\n\t" :[k2] "+r" (k2) : :);
 282 #elif defined(__arm64__)
 283                 __asm__ ( "ror   %[k2], %[k2], #(64-33)\n\t" :[k2] "+r" (k2) : :);
 284 #else /* !__x86_64__ && !__arm64__ */
 285                 k2 = ROTL64(k2, 33);
 286 #endif /* !__x86_64__ && !__arm64__ */
 287                 k2 *= MH3_X64_128_C1;
 288                 h2 ^= k2;
 289
 290 #if defined(__x86_64__)
 291                 __asm__ ( "rol   $31, %[h2]\n\t" :[h2] "+r" (h2) : :);
 292 #elif defined(__arm64__)
 293                 __asm__ ( "ror   %[h2], %[h2], #(64-31)\n\t" :[h2] "+r" (h2) : :);
 294 #else /* !__x86_64__ && !__arm64__ */
 295                 h2 = ROTL64(h2, 31);
 296 #endif /* !__x86_64__ && !__arm64__ */
 297                 h2 += h1;
 298                 h2 = h2 * 5 + 0x38495ab5;
 299         }
 300
 301         /* tail */
 302         tail = (const u_int8_t *)(const void *)(data + nblocks * 16);
 303         k1 = 0;
 304         k2 = 0;
 305
 306         switch (len & 15) {
 307         case 15:
 308                 k2 ^= ((u_int64_t)tail[14]) << 48;
 309                 OS_FALLTHROUGH;
 310         case 14:
 311                 k2 ^= ((u_int64_t)tail[13]) << 40;
 312                 OS_FALLTHROUGH;
 313         case 13:
 314                 k2 ^= ((u_int64_t)tail[12]) << 32;
 315                 OS_FALLTHROUGH;
 316         case 12:
 317                 k2 ^= ((u_int64_t)tail[11]) << 24;
 318                 OS_FALLTHROUGH;
 319         case 11:
 320                 k2 ^= ((u_int64_t)tail[10]) << 16;
 321                 OS_FALLTHROUGH;
 322         case 10:
 323                 k2 ^= ((u_int64_t)tail[9]) << 8;
 324                 OS_FALLTHROUGH;
 325         case 9:
 326                 k2 ^= ((u_int64_t)tail[8]) << 0;
 327                 k2 *= MH3_X64_128_C2;
 328 #if defined(__x86_64__)
 329                 __asm__ ( "rol   $33, %[k2]\n\t" :[k2] "+r" (k2) : :);
 330 #elif defined(__arm64__)
 331                 __asm__ ( "ror   %[k2], %[k2], #(64-33)\n\t" :[k2] "+r" (k2) : :);
 332 #else /* !__x86_64__ && !__arm64__ */
 333                 k2 = ROTL64(k2, 33);
 334 #endif /* !__x86_64__ && !__arm64__ */
 335                 k2 *= MH3_X64_128_C1;
 336                 h2 ^= k2;
 337                 OS_FALLTHROUGH;
 338         case 8:
 339                 k1 ^= ((u_int64_t)tail[7]) << 56;
 340                 OS_FALLTHROUGH;
 341         case 7:
 342                 k1 ^= ((u_int64_t)tail[6]) << 48;
 343                 OS_FALLTHROUGH;
 344         case 6:
 345                 k1 ^= ((u_int64_t)tail[5]) << 40;
 346                 OS_FALLTHROUGH;
 347         case 5:
 348                 k1 ^= ((u_int64_t)tail[4]) << 32;
 349                 OS_FALLTHROUGH;
 350         case 4:
 351                 k1 ^= ((u_int64_t)tail[3]) << 24;
 352                 OS_FALLTHROUGH;
 353         case 3:
 354                 k1 ^= ((u_int64_t)tail[2]) << 16;
 355                 OS_FALLTHROUGH;
 356         case 2:
 357                 k1 ^= ((u_int64_t)tail[1]) << 8;
 358                 OS_FALLTHROUGH;
 359         case 1:
 360                 k1 ^= ((u_int64_t)tail[0]) << 0;
 361                 k1 *= MH3_X64_128_C1;
 362 #if defined(__x86_64__)
 363                 __asm__ ( "rol   $31, %[k1]\n\t" :[k1] "+r" (k1) : :);
 364 #elif defined(__arm64__)
 365                 __asm__ ( "ror   %[k1], %[k1], #(64-31)\n\t" :[k1] "+r" (k1) : :);
 366 #else /* !__x86_64__ && !__arm64__ */
 367                 k1 = ROTL64(k1, 31);
 368 #endif /* !__x86_64__ && !__arm64__ */
 369                 k1 *= MH3_X64_128_C2;
 370                 h1 ^= k1;
 371         }
 372         ;
 373
 374         /* finalization */
 375         h1 ^= len;
 376         h2 ^= len;
 377
 378         h1 += h2;
 379         h2 += h1;
 380
 381         h1 = mh3_fmix64(h1);
 382         h2 = mh3_fmix64(h2);
 383
 384         h1 += h2;
 385         h2 += h1;
 386
 387         /* throw all but lowest 32-bit */
 388         return h1 & 0xffffffff;
 389 }
 390
 391 #define JHASH_INIT      0xdeadbeef
 392
 393 #define JHASH_MIX(a, b, c) {                    \
 394         a -= c;  a ^= ROTL32(c, 4);   c += b;   \
 395         b -= a;  b ^= ROTL32(a, 6);   a += c;   \
 396         c -= b;  c ^= ROTL32(b, 8);   b += a;   \
 397         a -= c;  a ^= ROTL32(c, 16);  c += b;   \
 398         b -= a;  b ^= ROTL32(a, 19);  a += c;   \
 399         c -= b;  c ^= ROTL32(b, 4);   b += a;   \
 400 }
 401
 402 #define JHASH_FINAL(a, b, c) {                  \
 403         c ^= b;  c -= ROTL32(b, 14);            \
 404         a ^= c;  a -= ROTL32(c, 11);            \
 405         b ^= a;  b -= ROTL32(a, 25);            \
 406         c ^= b;  c -= ROTL32(b, 16);            \
 407         a ^= c;  a -= ROTL32(c, 4);             \
 408         b ^= a;  b -= ROTL32(a, 14);            \
 409         c ^= b;  c -= ROTL32(b, 24);            \
 410 }
 411
 412 #if BYTE_ORDER == BIG_ENDIAN
 413 /*
 414  * hashbig()
 415  */
 416 u_int32_t
 417 net_flowhash_jhash(const void *key, u_int32_t len, const u_int32_t seed)
 418 {
 419         u_int32_t a, b, c;
 420
 421         /* Set up the internal state */
 422         a = b = c = JHASH_INIT + len + seed;
 423
 424         if (ALIGNED32(key)) {
 425                 /* read 32-bit chunks */
 426                 const u_int32_t *k = (const u_int32_t *)key;
 427
 428                 /*
 429                  * all but last block:
 430                  * aligned reads and affect 32 bits of (a,b,c)
 431                  */
 432                 while (len > 12) {
 433                         a += k[0];
 434                         b += k[1];
 435                         c += k[2];
 436                         JHASH_MIX(a, b, c);
 437                         len -= 12;
 438                         k += 3;
 439                 }
 440
 441                 /*
 442                  * handle the last (probably partial) block
 443                  *
 444                  * "k[2] << 8" actually reads beyond the end of the string,
 445                  * but then shifts out the part it's not allowed to read.
 446                  * Because the string is aligned, the illegal read is in
 447                  * the same word as the rest of the string.  The masking
 448                  * trick does make the hash noticably faster for short
 449                  * strings (like English words).
 450                  */
 451                 switch (len) {
 452                 case 12:
 453                         c += k[2];
 454                         b += k[1];
 455                         a += k[0];
 456                         break;
 457
 458                 case 11:
 459                         c += k[2] & 0xffffff00;
 460                         b += k[1];
 461                         a += k[0];
 462                         break;
 463
 464                 case 10:
 465                         c += k[2] & 0xffff0000;
 466                         b += k[1];
 467                         a += k[0];
 468                         break;
 469
 470                 case 9:
 471                         c += k[2] & 0xff000000;
 472                         b += k[1];
 473                         a += k[0];
 474                         break;
 475
 476                 case 8:
 477                         b += k[1];
 478                         a += k[0];
 479                         break;
 480
 481                 case 7:
 482                         b += k[1] & 0xffffff00;
 483                         a += k[0];
 484                         break;
 485
 486                 case 6:
 487                         b += k[1] & 0xffff0000;
 488                         a += k[0];
 489                         break;
 490
 491                 case 5:
 492                         b += k[1] & 0xff000000;
 493                         a += k[0];
 494                         break;
 495
 496                 case 4:
 497                         a += k[0];
 498                         break;
 499
 500                 case 3:
 501                         a += k[0] & 0xffffff00;
 502                         break;
 503
 504                 case 2:
 505                         a += k[0] & 0xffff0000;
 506                         break;
 507
 508                 case 1:
 509                         a += k[0] & 0xff000000;
 510                         break;
 511
 512                 case 0:
 513                         /* zero length requires no mixing */
 514                         return c;
 515                 }
 516
 517                 JHASH_FINAL(a, b, c);
 518
 519                 return c;
 520         }
 521
 522         /* need to read the key one byte at a time */
 523         const u_int8_t *k = (const u_int8_t *)key;
 524
 525         /* all but the last block: affect some 32 bits of (a,b,c) */
 526         while (len > 12) {
 527                 a += ((u_int32_t)k[0]) << 24;
 528                 a += ((u_int32_t)k[1]) << 16;
 529                 a += ((u_int32_t)k[2]) << 8;
 530                 a += ((u_int32_t)k[3]);
 531                 b += ((u_int32_t)k[4]) << 24;
 532                 b += ((u_int32_t)k[5]) << 16;
 533                 b += ((u_int32_t)k[6]) << 8;
 534                 b += ((u_int32_t)k[7]);
 535                 c += ((u_int32_t)k[8]) << 24;
 536                 c += ((u_int32_t)k[9]) << 16;
 537                 c += ((u_int32_t)k[10]) << 8;
 538                 c += ((u_int32_t)k[11]);
 539                 JHASH_MIX(a, b, c);
 540                 len -= 12;
 541                 k += 12;
 542         }
 543
 544         /* last block: affect all 32 bits of (c) */
 545         switch (len) {
 546         case 12:
 547                 c += k[11];
 548                 OS_FALLTHROUGH;
 549         case 11:
 550                 c += ((u_int32_t)k[10]) << 8;
 551                 OS_FALLTHROUGH;
 552         case 10:
 553                 c += ((u_int32_t)k[9]) << 16;
 554                 OS_FALLTHROUGH;
 555         case 9:
 556                 c += ((u_int32_t)k[8]) << 24;
 557                 OS_FALLTHROUGH;
 558         case 8:
 559                 b += k[7];
 560                 OS_FALLTHROUGH;
 561         case 7:
 562                 b += ((u_int32_t)k[6]) << 8;
 563                 OS_FALLTHROUGH;
 564         case 6:
 565                 b += ((u_int32_t)k[5]) << 16;
 566                 OS_FALLTHROUGH;
 567         case 5:
 568                 b += ((u_int32_t)k[4]) << 24;
 569                 OS_FALLTHROUGH;
 570         case 4:
 571                 a += k[3];
 572                 OS_FALLTHROUGH;
 573         case 3:
 574                 a += ((u_int32_t)k[2]) << 8;
 575                 OS_FALLTHROUGH;
 576         case 2:
 577                 a += ((u_int32_t)k[1]) << 16;
 578                 OS_FALLTHROUGH;
 579         case 1:
 580                 a += ((u_int32_t)k[0]) << 24;
 581                 break;
 582
 583         case 0:
 584                 /* zero length requires no mixing */
 585                 return c;
 586         }
 587
 588         JHASH_FINAL(a, b, c);
 589
 590         return c;
 591 }
 592 #else /* LITTLE_ENDIAN */
 593 /*
 594  * hashlittle()
 595  */
 596 u_int32_t
 597 net_flowhash_jhash(const void *key, u_int32_t len, const u_int32_t seed)
 598 {
 599         u_int32_t a, b, c;
 600
 601         /* Set up the internal state */
 602         a = b = c = JHASH_INIT + len + seed;
 603
 604 #if defined(__i386__) || defined(__x86_64__)
 605         /*
 606          * On i386/x86_64, it is faster to read 32-bit chunks if the key
 607          * is aligned 32-bit OR not 16-bit, and perform 16-bit reads if it
 608          * is aligned 16-bit.
 609          */
 610         if (ALIGNED32(key) || !ALIGNED16(key)) {
 611 #else /* !defined(__i386__) && !defined(__x86_64__) */
 612         if (ALIGNED32(key)) {
 613 #endif /* !defined(__i386__) && !defined(__x86_64__) */
 614                 /* read 32-bit chunks */
 615                 const u_int32_t *k = (const u_int32_t *)key;
 616
 617                 /*
 618                  * all but last block:
 619                  * aligned reads and affect 32 bits of (a,b,c)
 620                  */
 621                 while (len > 12) {
 622                         a += k[0];
 623                         b += k[1];
 624                         c += k[2];
 625                         JHASH_MIX(a, b, c);
 626                         len -= 12;
 627                         k += 3;
 628                 }
 629
 630                 /*
 631                  * handle the last (probably partial) block
 632                  *
 633                  * "k[2] & 0xffffff" actually reads beyond the end of the
 634                  * string, but then masks off the part it's not allowed
 635                  * to read.  Because the string is aligned, the masked-off
 636                  * tail is in the same word as the rest of the string.
 637                  * The masking trick does make the hash noticably faster
 638                  * for short strings (like English words).
 639                  */
 640                 switch (len) {
 641                 case 12:
 642                         c += k[2];
 643                         b += k[1];
 644                         a += k[0];
 645                         break;
 646
 647                 case 11:
 648                         c += k[2] & 0xffffff;
 649                         b += k[1];
 650                         a += k[0];
 651                         break;
 652
 653                 case 10:
 654                         c += k[2] & 0xffff;
 655                         b += k[1];
 656                         a += k[0];
 657                         break;
 658
 659                 case 9:
 660                         c += k[2] & 0xff;
 661                         b += k[1];
 662                         a += k[0];
 663                         break;
 664
 665                 case 8:
 666                         b += k[1];
 667                         a += k[0];
 668                         break;
 669
 670                 case 7:
 671                         b += k[1] & 0xffffff;
 672                         a += k[0];
 673                         break;
 674
 675                 case 6:
 676                         b += k[1] & 0xffff;
 677                         a += k[0];
 678                         break;
 679
 680                 case 5:
 681                         b += k[1] & 0xff;
 682                         a += k[0];
 683                         break;
 684
 685                 case 4:
 686                         a += k[0];
 687                         break;
 688
 689                 case 3:
 690                         a += k[0] & 0xffffff;
 691                         break;
 692
 693                 case 2:
 694                         a += k[0] & 0xffff;
 695                         break;
 696
 697                 case 1:
 698                         a += k[0] & 0xff;
 699                         break;
 700
 701                 case 0:
 702                         /* zero length requires no mixing */
 703                         return c;
 704                 }
 705
 706                 JHASH_FINAL(a, b, c);
 707
 708                 return c;
 709         }
 710 #if !defined(__i386__) && !defined(__x86_64__)
 711         else if (ALIGNED16(key)) {
 712 #endif /* !defined(__i386__) && !defined(__x86_64__) */
 713         /* read 16-bit chunks */
 714         const u_int16_t *k = (const u_int16_t *)key;
 715         const u_int8_t *k8;
 716
 717         /* all but last block: aligned reads and different mixing */
 718         while (len > 12) {
 719                 a += k[0] + (((u_int32_t)k[1]) << 16);
 720                 b += k[2] + (((u_int32_t)k[3]) << 16);
 721                 c += k[4] + (((u_int32_t)k[5]) << 16);
 722                 JHASH_MIX(a, b, c);
 723                 len -= 12;
 724                 k += 6;
 725         }
 726
 727         /* handle the last (probably partial) block */
 728         k8 = (const u_int8_t *)k;
 729         switch (len) {
 730         case 12:
 731                 c += k[4] + (((u_int32_t)k[5]) << 16);
 732                 b += k[2] + (((u_int32_t)k[3]) << 16);
 733                 a += k[0] + (((u_int32_t)k[1]) << 16);
 734                 break;
 735
 736         case 11:
 737                 c += ((u_int32_t)k8[10]) << 16;
 738                 OS_FALLTHROUGH;
 739         case 10:
 740                 c += k[4];
 741                 b += k[2] + (((u_int32_t)k[3]) << 16);
 742                 a += k[0] + (((u_int32_t)k[1]) << 16);
 743                 break;
 744
 745         case 9:
 746                 c += k8[8];
 747                 OS_FALLTHROUGH;
 748         case 8:
 749                 b += k[2] + (((u_int32_t)k[3]) << 16);
 750                 a += k[0] + (((u_int32_t)k[1]) << 16);
 751                 break;
 752
 753         case 7:
 754                 b += ((u_int32_t)k8[6]) << 16;
 755                 OS_FALLTHROUGH;
 756         case 6:
 757                 b += k[2];
 758                 a += k[0] + (((u_int32_t)k[1]) << 16);
 759                 break;
 760
 761         case 5:
 762                 b += k8[4];
 763                 OS_FALLTHROUGH;
 764         case 4:
 765                 a += k[0] + (((u_int32_t)k[1]) << 16);
 766                 break;
 767
 768         case 3:
 769                 a += ((u_int32_t)k8[2]) << 16;
 770                 OS_FALLTHROUGH;
 771         case 2:
 772                 a += k[0];
 773                 break;
 774
 775         case 1:
 776                 a += k8[0];
 777                 break;
 778
 779         case 0:
 780                 /* zero length requires no mixing */
 781                 return c;
 782         }
 783
 784         JHASH_FINAL(a, b, c);
 785
 786         return c;
 787 #if !defined(__i386__) && !defined(__x86_64__)
 788 }
 789
 790 /* need to read the key one byte at a time */
 791 const u_int8_t *k = (const u_int8_t *)key;
 792
 793 /* all but the last block: affect some 32 bits of (a,b,c) */
 794 while (len > 12) {
 795         a += k[0];
 796         a += ((u_int32_t)k[1]) << 8;
 797         a += ((u_int32_t)k[2]) << 16;
 798         a += ((u_int32_t)k[3]) << 24;
 799         b += k[4];
 800         b += ((u_int32_t)k[5]) << 8;
 801         b += ((u_int32_t)k[6]) << 16;
 802         b += ((u_int32_t)k[7]) << 24;
 803         c += k[8];
 804         c += ((u_int32_t)k[9]) << 8;
 805         c += ((u_int32_t)k[10]) << 16;
 806         c += ((u_int32_t)k[11]) << 24;
 807         JHASH_MIX(a, b, c);
 808         len -= 12;
 809         k += 12;
 810 }
 811
 812 /* last block: affect all 32 bits of (c) */
 813 switch (len) {
 814 case 12:
 815         c += ((u_int32_t)k[11]) << 24;
 816         OS_FALLTHROUGH;
 817 case 11:
 818         c += ((u_int32_t)k[10]) << 16;
 819         OS_FALLTHROUGH;
 820 case 10:
 821         c += ((u_int32_t)k[9]) << 8;
 822         OS_FALLTHROUGH;
 823 case 9:
 824         c += k[8];
 825         OS_FALLTHROUGH;
 826 case 8:
 827         b += ((u_int32_t)k[7]) << 24;
 828         OS_FALLTHROUGH;
 829 case 7:
 830         b += ((u_int32_t)k[6]) << 16;
 831         OS_FALLTHROUGH;
 832 case 6:
 833         b += ((u_int32_t)k[5]) << 8;
 834         OS_FALLTHROUGH;
 835 case 5:
 836         b += k[4];
 837         OS_FALLTHROUGH;
 838 case 4:
 839         a += ((u_int32_t)k[3]) << 24;
 840         OS_FALLTHROUGH;
 841 case 3:
 842         a += ((u_int32_t)k[2]) << 16;
 843         OS_FALLTHROUGH;
 844 case 2:
 845         a += ((u_int32_t)k[1]) << 8;
 846         OS_FALLTHROUGH;
 847 case 1:
 848         a += k[0];
 849         break;
 850
 851 case 0:
 852         /* zero length requires no mixing */
 853         return c;
 854 }
 855
 856 JHASH_FINAL(a, b, c);
 857
 858 return c;
 859 #endif /* !defined(__i386__) && !defined(__x86_64__) */
 860 }
 861 #endif /* LITTLE_ENDIAN */