]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/flowhash.c
xnu-6153.141.1.tar.gz
[apple/xnu.git] / bsd / net / flowhash.c
CommitLineData
316670eb 1/*
39236c6e 2 * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
316670eb
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * http://code.google.com/p/smhasher/
31 *
32 * Copyright (c) 2009-2011 Austin Appleby.
33 *
34 * MurmurHash3 was written by Austin Appleby, and is placed in the public
35 * domain. The author hereby disclaims copyright to this source code.
36 */
37
38/*
39 * http://burtleburtle.net/bob/hash/
40 *
41 * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
42 *
43 * You can use this free for any purpose. It's in the public domain.
44 * It has no warranty.
45 */
46
47#include <stdbool.h>
48#include <sys/types.h>
49#include <machine/endian.h>
50#include <net/flowhash.h>
51
52static inline u_int32_t getblock32(const u_int32_t *, int);
53static inline u_int64_t getblock64(const u_int64_t *, int);
54static inline u_int32_t mh3_fmix32(u_int32_t);
55static inline u_int64_t mh3_fmix64(u_int64_t);
56
0a7de745
A
57#define ALIGNED16(v) ((((uintptr_t)(v)) & 1) == 0)
58#define ALIGNED32(v) ((((uintptr_t)(v)) & 3) == 0)
59#define ALIGNED64(v) ((((uintptr_t)(v)) & 7) == 0)
316670eb 60
0a7de745
A
61#define ROTL32(x, r) (((x) << (r)) | ((x) >> (32 - (r))))
62#define ROTL64(x, r) (((x) << (r)) | ((x) >> (64 - (r))))
316670eb
A
63
64/*
65 * The following hash algorithms are selected based on performance:
66 *
5ba3f43e
A
67 * 64-bit: MurmurHash3_x64_128
68 * 32-bit: JHash
316670eb 69 */
5ba3f43e 70#if defined(__LP64__)
316670eb 71net_flowhash_fn_t *net_flowhash = net_flowhash_mh3_x64_128;
5ba3f43e 72#else /* !__LP64__ */
316670eb 73net_flowhash_fn_t *net_flowhash = net_flowhash_jhash;
5ba3f43e 74#endif /* !__LP64__ */
316670eb 75
5ba3f43e 76#if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
316670eb
A
77static inline u_int32_t
78getblock32(const u_int32_t *p, int i)
79{
0a7de745 80 return p[i];
316670eb
A
81}
82
83static inline u_int64_t
84getblock64(const u_int64_t *p, int i)
85{
0a7de745 86 return p[i];
316670eb 87}
5ba3f43e 88#else /* !__i386__ && !__x86_64__ && !__arm64__*/
316670eb
A
89static inline u_int32_t
90getblock32(const u_int32_t *p, int i)
91{
92 const u_int8_t *bytes = (u_int8_t *)(void *)(uintptr_t)(p + i);
93 u_int32_t value;
94
95 if (ALIGNED32(p)) {
96 value = p[i];
97 } else {
98#if BYTE_ORDER == BIG_ENDIAN
99 value =
100 (((u_int32_t)bytes[0]) << 24) |
101 (((u_int32_t)bytes[1]) << 16) |
102 (((u_int32_t)bytes[2]) << 8) |
103 ((u_int32_t)bytes[3]);
104#else /* LITTLE_ENDIAN */
105 value =
106 (((u_int32_t)bytes[3]) << 24) |
107 (((u_int32_t)bytes[2]) << 16) |
108 (((u_int32_t)bytes[1]) << 8) |
109 ((u_int32_t)bytes[0]);
110#endif /* LITTLE_ENDIAN */
111 }
0a7de745 112 return value;
316670eb
A
113}
114
115static inline u_int64_t
116getblock64(const u_int64_t *p, int i)
117{
118 const u_int8_t *bytes = (const u_int8_t *)(void *)(uintptr_t)(p + i);
119 u_int64_t value;
120
121 if (ALIGNED64(p)) {
122 value = p[i];
123 } else {
124#if BYTE_ORDER == BIG_ENDIAN
125 value =
126 (((u_int64_t)bytes[0]) << 56) |
127 (((u_int64_t)bytes[1]) << 48) |
128 (((u_int64_t)bytes[2]) << 40) |
129 (((u_int64_t)bytes[3]) << 32) |
130 (((u_int64_t)bytes[4]) << 24) |
131 (((u_int64_t)bytes[5]) << 16) |
132 (((u_int64_t)bytes[6]) << 8) |
133 ((u_int64_t)bytes[7]);
134#else /* LITTLE_ENDIAN */
135 value =
136 (((u_int64_t)bytes[7]) << 56) |
137 (((u_int64_t)bytes[6]) << 48) |
138 (((u_int64_t)bytes[5]) << 40) |
139 (((u_int64_t)bytes[4]) << 32) |
140 (((u_int64_t)bytes[3]) << 24) |
141 (((u_int64_t)bytes[2]) << 16) |
142 (((u_int64_t)bytes[1]) << 8) |
143 ((u_int64_t)bytes[0]);
144#endif /* LITTLE_ENDIAN */
145 }
0a7de745 146 return value;
316670eb 147}
5ba3f43e 148#endif /* !__i386__ && !__x86_64 && !__arm64__ */
316670eb
A
149
150static inline u_int32_t
151mh3_fmix32(u_int32_t h)
152{
153 h ^= h >> 16;
154 h *= 0x85ebca6b;
155 h ^= h >> 13;
156 h *= 0xc2b2ae35;
157 h ^= h >> 16;
158
0a7de745 159 return h;
316670eb
A
160}
161
162static inline u_int64_t
163mh3_fmix64(u_int64_t k)
164{
165 k ^= k >> 33;
166 k *= 0xff51afd7ed558ccdLLU;
167 k ^= k >> 33;
168 k *= 0xc4ceb9fe1a85ec53LLU;
169 k ^= k >> 33;
170
0a7de745 171 return k;
316670eb
A
172}
173
174/*
175 * MurmurHash3_x86_32
176 */
0a7de745
A
177#define MH3_X86_32_C1 0xcc9e2d51
178#define MH3_X86_32_C2 0x1b873593
316670eb
A
179
180u_int32_t
181net_flowhash_mh3_x86_32(const void *key, u_int32_t len, const u_int32_t seed)
182{
183 const u_int8_t *data = (const u_int8_t *)key;
184 const u_int32_t nblocks = len / 4;
185 const u_int32_t *blocks;
186 const u_int8_t *tail;
187 u_int32_t h1 = seed, k1;
188 int i;
189
190 /* body */
191 blocks = (const u_int32_t *)(const void *)(data + nblocks * 4);
192
193 for (i = -nblocks; i; i++) {
194 k1 = getblock32(blocks, i);
195
196 k1 *= MH3_X86_32_C1;
197 k1 = ROTL32(k1, 15);
198 k1 *= MH3_X86_32_C2;
199
200 h1 ^= k1;
201 h1 = ROTL32(h1, 13);
202 h1 = h1 * 5 + 0xe6546b64;
203 }
204
205 /* tail */
206 tail = (const u_int8_t *)(const void *)(data + nblocks * 4);
207 k1 = 0;
208
209 switch (len & 3) {
210 case 3:
211 k1 ^= tail[2] << 16;
0a7de745 212 /* FALLTHRU */
316670eb
A
213 case 2:
214 k1 ^= tail[1] << 8;
0a7de745 215 /* FALLTHRU */
316670eb
A
216 case 1:
217 k1 ^= tail[0];
218 k1 *= MH3_X86_32_C1;
219 k1 = ROTL32(k1, 15);
220 k1 *= MH3_X86_32_C2;
221 h1 ^= k1;
0a7de745
A
222 }
223 ;
316670eb
A
224
225 /* finalization */
226 h1 ^= len;
227
228 h1 = mh3_fmix32(h1);
229
0a7de745 230 return h1;
316670eb
A
231}
232
233/*
234 * MurmurHash3_x64_128
235 */
0a7de745
A
236#define MH3_X64_128_C1 0x87c37b91114253d5LLU
237#define MH3_X64_128_C2 0x4cf5ad432745937fLLU
316670eb
A
238
239u_int32_t
240net_flowhash_mh3_x64_128(const void *key, u_int32_t len, const u_int32_t seed)
241{
242 const u_int8_t *data = (const u_int8_t *)key;
243 const u_int32_t nblocks = len / 16;
244 const u_int64_t *blocks;
245 const u_int8_t *tail;
246 u_int64_t h1 = seed, k1;
247 u_int64_t h2 = seed, k2;
248 u_int32_t i;
249
250 /* body */
251 blocks = (const u_int64_t *)(const void *)data;
252
253 for (i = 0; i < nblocks; i++) {
254 k1 = getblock64(blocks, i * 2 + 0);
255 k2 = getblock64(blocks, i * 2 + 1);
256
257 k1 *= MH3_X64_128_C1;
5ba3f43e 258#if defined(__x86_64__)
0a7de745 259 __asm__ ( "rol $31, %[k1]\n\t" :[k1] "+r" (k1) : :);
5ba3f43e 260#elif defined(__arm64__)
0a7de745 261 __asm__ ( "ror %[k1], %[k1], #(64-31)\n\t" :[k1] "+r" (k1) : :);
5ba3f43e 262#else /* !__x86_64__ && !__arm64__ */
316670eb 263 k1 = ROTL64(k1, 31);
5ba3f43e 264#endif /* !__x86_64__ && !__arm64__ */
316670eb
A
265 k1 *= MH3_X64_128_C2;
266 h1 ^= k1;
267
5ba3f43e 268#if defined(__x86_64__)
0a7de745 269 __asm__ ( "rol $27, %[h1]\n\t" :[h1] "+r" (h1) : :);
5ba3f43e 270#elif defined(__arm64__)
0a7de745 271 __asm__ ( "ror %[h1], %[h1], #(64-27)\n\t" :[h1] "+r" (h1) : :);
5ba3f43e 272#else /* !__x86_64__ && !__arm64__ */
0a7de745 273 h1 = ROTL64(h1, 27);
5ba3f43e 274#endif /* !__x86_64__ && !__arm64__ */
316670eb
A
275 h1 += h2;
276 h1 = h1 * 5 + 0x52dce729;
277
278 k2 *= MH3_X64_128_C2;
5ba3f43e 279#if defined(__x86_64__)
0a7de745 280 __asm__ ( "rol $33, %[k2]\n\t" :[k2] "+r" (k2) : :);
5ba3f43e 281#elif defined(__arm64__)
0a7de745 282 __asm__ ( "ror %[k2], %[k2], #(64-33)\n\t" :[k2] "+r" (k2) : :);
5ba3f43e 283#else /* !__x86_64__ && !__arm64__ */
0a7de745 284 k2 = ROTL64(k2, 33);
5ba3f43e 285#endif /* !__x86_64__ && !__arm64__ */
316670eb
A
286 k2 *= MH3_X64_128_C1;
287 h2 ^= k2;
288
5ba3f43e 289#if defined(__x86_64__)
0a7de745 290 __asm__ ( "rol $31, %[h2]\n\t" :[h2] "+r" (h2) : :);
5ba3f43e 291#elif defined(__arm64__)
0a7de745 292 __asm__ ( "ror %[h2], %[h2], #(64-31)\n\t" :[h2] "+r" (h2) : :);
5ba3f43e 293#else /* !__x86_64__ && !__arm64__ */
0a7de745 294 h2 = ROTL64(h2, 31);
5ba3f43e 295#endif /* !__x86_64__ && !__arm64__ */
316670eb 296 h2 += h1;
0a7de745 297 h2 = h2 * 5 + 0x38495ab5;
316670eb
A
298 }
299
300 /* tail */
301 tail = (const u_int8_t *)(const void *)(data + nblocks * 16);
302 k1 = 0;
303 k2 = 0;
304
305 switch (len & 15) {
306 case 15:
307 k2 ^= ((u_int64_t)tail[14]) << 48;
0a7de745 308 /* FALLTHRU */
316670eb
A
309 case 14:
310 k2 ^= ((u_int64_t)tail[13]) << 40;
0a7de745 311 /* FALLTHRU */
316670eb
A
312 case 13:
313 k2 ^= ((u_int64_t)tail[12]) << 32;
0a7de745 314 /* FALLTHRU */
316670eb
A
315 case 12:
316 k2 ^= ((u_int64_t)tail[11]) << 24;
0a7de745 317 /* FALLTHRU */
316670eb
A
318 case 11:
319 k2 ^= ((u_int64_t)tail[10]) << 16;
0a7de745 320 /* FALLTHRU */
316670eb
A
321 case 10:
322 k2 ^= ((u_int64_t)tail[9]) << 8;
0a7de745 323 /* FALLTHRU */
316670eb
A
324 case 9:
325 k2 ^= ((u_int64_t)tail[8]) << 0;
326 k2 *= MH3_X64_128_C2;
5ba3f43e 327#if defined(__x86_64__)
0a7de745 328 __asm__ ( "rol $33, %[k2]\n\t" :[k2] "+r" (k2) : :);
5ba3f43e 329#elif defined(__arm64__)
0a7de745 330 __asm__ ( "ror %[k2], %[k2], #(64-33)\n\t" :[k2] "+r" (k2) : :);
5ba3f43e 331#else /* !__x86_64__ && !__arm64__ */
0a7de745 332 k2 = ROTL64(k2, 33);
5ba3f43e 333#endif /* !__x86_64__ && !__arm64__ */
316670eb
A
334 k2 *= MH3_X64_128_C1;
335 h2 ^= k2;
0a7de745 336 /* FALLTHRU */
316670eb
A
337 case 8:
338 k1 ^= ((u_int64_t)tail[7]) << 56;
0a7de745 339 /* FALLTHRU */
316670eb
A
340 case 7:
341 k1 ^= ((u_int64_t)tail[6]) << 48;
0a7de745 342 /* FALLTHRU */
316670eb
A
343 case 6:
344 k1 ^= ((u_int64_t)tail[5]) << 40;
0a7de745 345 /* FALLTHRU */
316670eb
A
346 case 5:
347 k1 ^= ((u_int64_t)tail[4]) << 32;
0a7de745 348 /* FALLTHRU */
316670eb
A
349 case 4:
350 k1 ^= ((u_int64_t)tail[3]) << 24;
0a7de745 351 /* FALLTHRU */
316670eb
A
352 case 3:
353 k1 ^= ((u_int64_t)tail[2]) << 16;
0a7de745 354 /* FALLTHRU */
316670eb
A
355 case 2:
356 k1 ^= ((u_int64_t)tail[1]) << 8;
0a7de745 357 /* FALLTHRU */
316670eb
A
358 case 1:
359 k1 ^= ((u_int64_t)tail[0]) << 0;
360 k1 *= MH3_X64_128_C1;
5ba3f43e 361#if defined(__x86_64__)
0a7de745 362 __asm__ ( "rol $31, %[k1]\n\t" :[k1] "+r" (k1) : :);
5ba3f43e 363#elif defined(__arm64__)
0a7de745 364 __asm__ ( "ror %[k1], %[k1], #(64-31)\n\t" :[k1] "+r" (k1) : :);
5ba3f43e 365#else /* !__x86_64__ && !__arm64__ */
0a7de745 366 k1 = ROTL64(k1, 31);
5ba3f43e 367#endif /* !__x86_64__ && !__arm64__ */
316670eb
A
368 k1 *= MH3_X64_128_C2;
369 h1 ^= k1;
0a7de745
A
370 }
371 ;
316670eb
A
372
373 /* finalization */
374 h1 ^= len;
375 h2 ^= len;
376
377 h1 += h2;
378 h2 += h1;
379
380 h1 = mh3_fmix64(h1);
381 h2 = mh3_fmix64(h2);
382
383 h1 += h2;
384 h2 += h1;
385
386 /* throw all but lowest 32-bit */
0a7de745 387 return h1 & 0xffffffff;
316670eb
A
388}
389
0a7de745 390#define JHASH_INIT 0xdeadbeef
316670eb 391
0a7de745
A
392#define JHASH_MIX(a, b, c) { \
393 a -= c; a ^= ROTL32(c, 4); c += b; \
394 b -= a; b ^= ROTL32(a, 6); a += c; \
395 c -= b; c ^= ROTL32(b, 8); b += a; \
396 a -= c; a ^= ROTL32(c, 16); c += b; \
397 b -= a; b ^= ROTL32(a, 19); a += c; \
398 c -= b; c ^= ROTL32(b, 4); b += a; \
316670eb
A
399}
400
0a7de745
A
401#define JHASH_FINAL(a, b, c) { \
402 c ^= b; c -= ROTL32(b, 14); \
403 a ^= c; a -= ROTL32(c, 11); \
404 b ^= a; b -= ROTL32(a, 25); \
405 c ^= b; c -= ROTL32(b, 16); \
406 a ^= c; a -= ROTL32(c, 4); \
407 b ^= a; b -= ROTL32(a, 14); \
408 c ^= b; c -= ROTL32(b, 24); \
316670eb
A
409}
410
411#if BYTE_ORDER == BIG_ENDIAN
412/*
413 * hashbig()
414 */
415u_int32_t
416net_flowhash_jhash(const void *key, u_int32_t len, const u_int32_t seed)
417{
418 u_int32_t a, b, c;
419
420 /* Set up the internal state */
421 a = b = c = JHASH_INIT + len + seed;
422
423 if (ALIGNED32(key)) {
424 /* read 32-bit chunks */
425 const u_int32_t *k = (const u_int32_t *)key;
426
427 /*
428 * all but last block:
429 * aligned reads and affect 32 bits of (a,b,c)
430 */
431 while (len > 12) {
432 a += k[0];
433 b += k[1];
434 c += k[2];
435 JHASH_MIX(a, b, c);
436 len -= 12;
437 k += 3;
438 }
439
440 /*
441 * handle the last (probably partial) block
442 *
443 * "k[2] << 8" actually reads beyond the end of the string,
444 * but then shifts out the part it's not allowed to read.
445 * Because the string is aligned, the illegal read is in
446 * the same word as the rest of the string. The masking
447 * trick does make the hash noticably faster for short
448 * strings (like English words).
449 */
450 switch (len) {
451 case 12:
452 c += k[2];
453 b += k[1];
454 a += k[0];
455 break;
456
457 case 11:
458 c += k[2] & 0xffffff00;
459 b += k[1];
460 a += k[0];
461 break;
462
463 case 10:
464 c += k[2] & 0xffff0000;
465 b += k[1];
466 a += k[0];
467 break;
468
469 case 9:
470 c += k[2] & 0xff000000;
471 b += k[1];
472 a += k[0];
473 break;
474
475 case 8:
476 b += k[1];
477 a += k[0];
478 break;
479
480 case 7:
481 b += k[1] & 0xffffff00;
482 a += k[0];
483 break;
484
485 case 6:
486 b += k[1] & 0xffff0000;
487 a += k[0];
488 break;
489
490 case 5:
491 b += k[1] & 0xff000000;
492 a += k[0];
493 break;
494
495 case 4:
496 a += k[0];
497 break;
498
499 case 3:
500 a += k[0] & 0xffffff00;
501 break;
502
503 case 2:
504 a += k[0] & 0xffff0000;
505 break;
506
507 case 1:
508 a += k[0] & 0xff000000;
509 break;
510
511 case 0:
512 /* zero length requires no mixing */
0a7de745 513 return c;
316670eb
A
514 }
515
516 JHASH_FINAL(a, b, c);
517
0a7de745 518 return c;
316670eb
A
519 }
520
521 /* need to read the key one byte at a time */
522 const u_int8_t *k = (const u_int8_t *)key;
523
524 /* all but the last block: affect some 32 bits of (a,b,c) */
525 while (len > 12) {
526 a += ((u_int32_t)k[0]) << 24;
527 a += ((u_int32_t)k[1]) << 16;
528 a += ((u_int32_t)k[2]) << 8;
529 a += ((u_int32_t)k[3]);
530 b += ((u_int32_t)k[4]) << 24;
531 b += ((u_int32_t)k[5]) << 16;
532 b += ((u_int32_t)k[6]) << 8;
533 b += ((u_int32_t)k[7]);
534 c += ((u_int32_t)k[8]) << 24;
535 c += ((u_int32_t)k[9]) << 16;
536 c += ((u_int32_t)k[10]) << 8;
537 c += ((u_int32_t)k[11]);
538 JHASH_MIX(a, b, c);
539 len -= 12;
540 k += 12;
541 }
542
543 /* last block: affect all 32 bits of (c) */
544 switch (len) {
545 case 12:
546 c += k[11];
0a7de745 547 /* FALLTHRU */
316670eb
A
548 case 11:
549 c += ((u_int32_t)k[10]) << 8;
0a7de745 550 /* FALLTHRU */
316670eb
A
551 case 10:
552 c += ((u_int32_t)k[9]) << 16;
0a7de745 553 /* FALLTHRU */
316670eb
A
554 case 9:
555 c += ((u_int32_t)k[8]) << 24;
0a7de745 556 /* FALLTHRU */
316670eb
A
557 case 8:
558 b += k[7];
0a7de745 559 /* FALLTHRU */
316670eb
A
560 case 7:
561 b += ((u_int32_t)k[6]) << 8;
0a7de745 562 /* FALLTHRU */
316670eb
A
563 case 6:
564 b += ((u_int32_t)k[5]) << 16;
0a7de745 565 /* FALLTHRU */
316670eb
A
566 case 5:
567 b += ((u_int32_t)k[4]) << 24;
0a7de745 568 /* FALLTHRU */
316670eb
A
569 case 4:
570 a += k[3];
0a7de745 571 /* FALLTHRU */
316670eb
A
572 case 3:
573 a += ((u_int32_t)k[2]) << 8;
0a7de745 574 /* FALLTHRU */
316670eb
A
575 case 2:
576 a += ((u_int32_t)k[1]) << 16;
0a7de745 577 /* FALLTHRU */
316670eb
A
578 case 1:
579 a += ((u_int32_t)k[0]) << 24;
580 break;
581
582 case 0:
583 /* zero length requires no mixing */
0a7de745 584 return c;
316670eb
A
585 }
586
587 JHASH_FINAL(a, b, c);
588
0a7de745 589 return c;
316670eb
A
590}
591#else /* LITTLE_ENDIAN */
592/*
593 * hashlittle()
594 */
595u_int32_t
596net_flowhash_jhash(const void *key, u_int32_t len, const u_int32_t seed)
597{
598 u_int32_t a, b, c;
599
600 /* Set up the internal state */
601 a = b = c = JHASH_INIT + len + seed;
602
603#if defined(__i386__) || defined(__x86_64__)
604 /*
605 * On i386/x86_64, it is faster to read 32-bit chunks if the key
606 * is aligned 32-bit OR not 16-bit, and perform 16-bit reads if it
607 * is aligned 16-bit.
608 */
609 if (ALIGNED32(key) || !ALIGNED16(key)) {
610#else /* !defined(__i386__) && !defined(__x86_64__) */
611 if (ALIGNED32(key)) {
612#endif /* !defined(__i386__) && !defined(__x86_64__) */
613 /* read 32-bit chunks */
614 const u_int32_t *k = (const u_int32_t *)key;
615
616 /*
617 * all but last block:
618 * aligned reads and affect 32 bits of (a,b,c)
619 */
620 while (len > 12) {
621 a += k[0];
622 b += k[1];
623 c += k[2];
624 JHASH_MIX(a, b, c);
625 len -= 12;
626 k += 3;
627 }
628
629 /*
630 * handle the last (probably partial) block
631 *
632 * "k[2] & 0xffffff" actually reads beyond the end of the
633 * string, but then masks off the part it's not allowed
634 * to read. Because the string is aligned, the masked-off
635 * tail is in the same word as the rest of the string.
636 * The masking trick does make the hash noticably faster
637 * for short strings (like English words).
638 */
639 switch (len) {
640 case 12:
641 c += k[2];
642 b += k[1];
643 a += k[0];
644 break;
645
646 case 11:
647 c += k[2] & 0xffffff;
648 b += k[1];
649 a += k[0];
650 break;
651
652 case 10:
653 c += k[2] & 0xffff;
654 b += k[1];
655 a += k[0];
656 break;
657
658 case 9:
659 c += k[2] & 0xff;
660 b += k[1];
661 a += k[0];
662 break;
663
664 case 8:
665 b += k[1];
666 a += k[0];
667 break;
668
669 case 7:
670 b += k[1] & 0xffffff;
671 a += k[0];
672 break;
673
674 case 6:
675 b += k[1] & 0xffff;
676 a += k[0];
677 break;
678
679 case 5:
680 b += k[1] & 0xff;
681 a += k[0];
682 break;
683
684 case 4:
685 a += k[0];
686 break;
687
688 case 3:
689 a += k[0] & 0xffffff;
690 break;
691
692 case 2:
693 a += k[0] & 0xffff;
694 break;
695
696 case 1:
697 a += k[0] & 0xff;
698 break;
699
700 case 0:
701 /* zero length requires no mixing */
0a7de745 702 return c;
316670eb
A
703 }
704
705 JHASH_FINAL(a, b, c);
706
0a7de745 707 return c;
316670eb
A
708 }
709#if !defined(__i386__) && !defined(__x86_64__)
710 else if (ALIGNED16(key)) {
711#endif /* !defined(__i386__) && !defined(__x86_64__) */
0a7de745
A
712 /* read 16-bit chunks */
713 const u_int16_t *k = (const u_int16_t *)key;
714 const u_int8_t *k8;
316670eb 715
0a7de745 716 /* all but last block: aligned reads and different mixing */
316670eb 717 while (len > 12) {
0a7de745
A
718 a += k[0] + (((u_int32_t)k[1]) << 16);
719 b += k[2] + (((u_int32_t)k[3]) << 16);
720 c += k[4] + (((u_int32_t)k[5]) << 16);
316670eb
A
721 JHASH_MIX(a, b, c);
722 len -= 12;
0a7de745 723 k += 6;
316670eb
A
724 }
725
0a7de745
A
726 /* handle the last (probably partial) block */
727 k8 = (const u_int8_t *)k;
316670eb
A
728 switch (len) {
729 case 12:
0a7de745
A
730 c += k[4] + (((u_int32_t)k[5]) << 16);
731 b += k[2] + (((u_int32_t)k[3]) << 16);
732 a += k[0] + (((u_int32_t)k[1]) << 16);
733 break;
734
316670eb 735 case 11:
0a7de745
A
736 c += ((u_int32_t)k8[10]) << 16;
737 /* FALLTHRU */
316670eb 738 case 10:
0a7de745
A
739 c += k[4];
740 b += k[2] + (((u_int32_t)k[3]) << 16);
741 a += k[0] + (((u_int32_t)k[1]) << 16);
742 break;
743
316670eb 744 case 9:
0a7de745
A
745 c += k8[8];
746 /* FALLTHRU */
316670eb 747 case 8:
0a7de745
A
748 b += k[2] + (((u_int32_t)k[3]) << 16);
749 a += k[0] + (((u_int32_t)k[1]) << 16);
750 break;
751
316670eb 752 case 7:
0a7de745
A
753 b += ((u_int32_t)k8[6]) << 16;
754 /* FALLTHRU */
316670eb 755 case 6:
0a7de745
A
756 b += k[2];
757 a += k[0] + (((u_int32_t)k[1]) << 16);
758 break;
759
316670eb 760 case 5:
0a7de745
A
761 b += k8[4];
762 /* FALLTHRU */
316670eb 763 case 4:
0a7de745
A
764 a += k[0] + (((u_int32_t)k[1]) << 16);
765 break;
766
316670eb 767 case 3:
0a7de745
A
768 a += ((u_int32_t)k8[2]) << 16;
769 /* FALLTHRU */
316670eb 770 case 2:
316670eb
A
771 a += k[0];
772 break;
773
0a7de745
A
774 case 1:
775 a += k8[0];
776 break;
777
316670eb
A
778 case 0:
779 /* zero length requires no mixing */
0a7de745 780 return c;
316670eb
A
781 }
782
783 JHASH_FINAL(a, b, c);
784
0a7de745
A
785 return c;
786#if !defined(__i386__) && !defined(__x86_64__)
787}
788
789/* need to read the key one byte at a time */
790const u_int8_t *k = (const u_int8_t *)key;
791
792/* all but the last block: affect some 32 bits of (a,b,c) */
793while (len > 12) {
794 a += k[0];
795 a += ((u_int32_t)k[1]) << 8;
796 a += ((u_int32_t)k[2]) << 16;
797 a += ((u_int32_t)k[3]) << 24;
798 b += k[4];
799 b += ((u_int32_t)k[5]) << 8;
800 b += ((u_int32_t)k[6]) << 16;
801 b += ((u_int32_t)k[7]) << 24;
802 c += k[8];
803 c += ((u_int32_t)k[9]) << 8;
804 c += ((u_int32_t)k[10]) << 16;
805 c += ((u_int32_t)k[11]) << 24;
806 JHASH_MIX(a, b, c);
807 len -= 12;
808 k += 12;
809}
810
811/* last block: affect all 32 bits of (c) */
812switch (len) {
813case 12:
814 c += ((u_int32_t)k[11]) << 24;
815/* FALLTHRU */
816case 11:
817 c += ((u_int32_t)k[10]) << 16;
818/* FALLTHRU */
819case 10:
820 c += ((u_int32_t)k[9]) << 8;
821/* FALLTHRU */
822case 9:
823 c += k[8];
824/* FALLTHRU */
825case 8:
826 b += ((u_int32_t)k[7]) << 24;
827/* FALLTHRU */
828case 7:
829 b += ((u_int32_t)k[6]) << 16;
830/* FALLTHRU */
831case 6:
832 b += ((u_int32_t)k[5]) << 8;
833/* FALLTHRU */
834case 5:
835 b += k[4];
836/* FALLTHRU */
837case 4:
838 a += ((u_int32_t)k[3]) << 24;
839/* FALLTHRU */
840case 3:
841 a += ((u_int32_t)k[2]) << 16;
842/* FALLTHRU */
843case 2:
844 a += ((u_int32_t)k[1]) << 8;
845/* FALLTHRU */
846case 1:
847 a += k[0];
848 break;
849
850case 0:
851 /* zero length requires no mixing */
852 return c;
853}
854
855JHASH_FINAL(a, b, c);
856
857return c;
316670eb
A
858#endif /* !defined(__i386__) && !defined(__x86_64__) */
859}
860#endif /* LITTLE_ENDIAN */