]>
git.saurik.com Git - apple/xnu.git/blob - bsd/net/flowhash.c
2 * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * http://code.google.com/p/smhasher/
32 * Copyright (c) 2009-2011 Austin Appleby.
34 * MurmurHash3 was written by Austin Appleby, and is placed in the public
35 * domain. The author hereby disclaims copyright to this source code.
39 * http://burtleburtle.net/bob/hash/
41 * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
43 * You can use this free for any purpose. It's in the public domain.
48 #include <sys/types.h>
49 #include <machine/endian.h>
50 #include <net/flowhash.h>
52 static inline u_int32_t
getblock32(const u_int32_t
*, int);
53 static inline u_int64_t
getblock64(const u_int64_t
*, int);
54 static inline u_int32_t
mh3_fmix32(u_int32_t
);
55 static inline u_int64_t
mh3_fmix64(u_int64_t
);
57 #define ALIGNED16(v) ((((uintptr_t)(v)) & 1) == 0)
58 #define ALIGNED32(v) ((((uintptr_t)(v)) & 3) == 0)
59 #define ALIGNED64(v) ((((uintptr_t)(v)) & 7) == 0)
61 #define ROTL32(x, r) (((x) << (r)) | ((x) >> (32 - (r))))
62 #define ROTL64(x, r) (((x) << (r)) | ((x) >> (64 - (r))))
65 * The following hash algorithms are selected based on performance:
67 * Intel 32-bit: MurmurHash3_x86_32
68 * Intel 64-bit: MurmurHash3_x64_128
71 #if defined(__x86_64__)
72 net_flowhash_fn_t
*net_flowhash
= net_flowhash_mh3_x64_128
;
73 #else /* !__i386__ && !__x86_64__ */
74 net_flowhash_fn_t
*net_flowhash
= net_flowhash_jhash
;
75 #endif /* !__i386__ && !__x86_64__ */
77 #if defined(__i386__) || defined(__x86_64__)
78 static inline u_int32_t
79 getblock32(const u_int32_t
*p
, int i
)
84 static inline u_int64_t
85 getblock64(const u_int64_t
*p
, int i
)
89 #else /* !__i386__ && !__x86_64 */
90 static inline u_int32_t
91 getblock32(const u_int32_t
*p
, int i
)
93 const u_int8_t
*bytes
= (u_int8_t
*)(void *)(uintptr_t)(p
+ i
);
99 #if BYTE_ORDER == BIG_ENDIAN
101 (((u_int32_t
)bytes
[0]) << 24) |
102 (((u_int32_t
)bytes
[1]) << 16) |
103 (((u_int32_t
)bytes
[2]) << 8) |
104 ((u_int32_t
)bytes
[3]);
105 #else /* LITTLE_ENDIAN */
107 (((u_int32_t
)bytes
[3]) << 24) |
108 (((u_int32_t
)bytes
[2]) << 16) |
109 (((u_int32_t
)bytes
[1]) << 8) |
110 ((u_int32_t
)bytes
[0]);
111 #endif /* LITTLE_ENDIAN */
116 static inline u_int64_t
117 getblock64(const u_int64_t
*p
, int i
)
119 const u_int8_t
*bytes
= (const u_int8_t
*)(void *)(uintptr_t)(p
+ i
);
125 #if BYTE_ORDER == BIG_ENDIAN
127 (((u_int64_t
)bytes
[0]) << 56) |
128 (((u_int64_t
)bytes
[1]) << 48) |
129 (((u_int64_t
)bytes
[2]) << 40) |
130 (((u_int64_t
)bytes
[3]) << 32) |
131 (((u_int64_t
)bytes
[4]) << 24) |
132 (((u_int64_t
)bytes
[5]) << 16) |
133 (((u_int64_t
)bytes
[6]) << 8) |
134 ((u_int64_t
)bytes
[7]);
135 #else /* LITTLE_ENDIAN */
137 (((u_int64_t
)bytes
[7]) << 56) |
138 (((u_int64_t
)bytes
[6]) << 48) |
139 (((u_int64_t
)bytes
[5]) << 40) |
140 (((u_int64_t
)bytes
[4]) << 32) |
141 (((u_int64_t
)bytes
[3]) << 24) |
142 (((u_int64_t
)bytes
[2]) << 16) |
143 (((u_int64_t
)bytes
[1]) << 8) |
144 ((u_int64_t
)bytes
[0]);
145 #endif /* LITTLE_ENDIAN */
149 #endif /* !__i386__ && !__x86_64 */
151 static inline u_int32_t
152 mh3_fmix32(u_int32_t h
)
163 static inline u_int64_t
164 mh3_fmix64(u_int64_t k
)
167 k
*= 0xff51afd7ed558ccdLLU
;
169 k
*= 0xc4ceb9fe1a85ec53LLU
;
178 #define MH3_X86_32_C1 0xcc9e2d51
179 #define MH3_X86_32_C2 0x1b873593
182 net_flowhash_mh3_x86_32(const void *key
, u_int32_t len
, const u_int32_t seed
)
184 const u_int8_t
*data
= (const u_int8_t
*)key
;
185 const u_int32_t nblocks
= len
/ 4;
186 const u_int32_t
*blocks
;
187 const u_int8_t
*tail
;
188 u_int32_t h1
= seed
, k1
;
192 blocks
= (const u_int32_t
*)(const void *)(data
+ nblocks
* 4);
194 for (i
= -nblocks
; i
; i
++) {
195 k1
= getblock32(blocks
, i
);
203 h1
= h1
* 5 + 0xe6546b64;
207 tail
= (const u_int8_t
*)(const void *)(data
+ nblocks
* 4);
234 * MurmurHash3_x64_128
236 #define MH3_X64_128_C1 0x87c37b91114253d5LLU
237 #define MH3_X64_128_C2 0x4cf5ad432745937fLLU
240 net_flowhash_mh3_x64_128(const void *key
, u_int32_t len
, const u_int32_t seed
)
242 const u_int8_t
*data
= (const u_int8_t
*)key
;
243 const u_int32_t nblocks
= len
/ 16;
244 const u_int64_t
*blocks
;
245 const u_int8_t
*tail
;
246 u_int64_t h1
= seed
, k1
;
247 u_int64_t h2
= seed
, k2
;
251 blocks
= (const u_int64_t
*)(const void *)data
;
253 for (i
= 0; i
< nblocks
; i
++) {
254 k1
= getblock64(blocks
, i
* 2 + 0);
255 k2
= getblock64(blocks
, i
* 2 + 1);
257 k1
*= MH3_X64_128_C1
;
259 k1
*= MH3_X64_128_C2
;
264 h1
= h1
* 5 + 0x52dce729;
266 k2
*= MH3_X64_128_C2
;
268 k2
*= MH3_X64_128_C1
;
273 h2
= h2
* 5+ 0x38495ab5;
277 tail
= (const u_int8_t
*)(const void *)(data
+ nblocks
* 16);
283 k2
^= ((u_int64_t
)tail
[14]) << 48;
286 k2
^= ((u_int64_t
)tail
[13]) << 40;
289 k2
^= ((u_int64_t
)tail
[12]) << 32;
292 k2
^= ((u_int64_t
)tail
[11]) << 24;
295 k2
^= ((u_int64_t
)tail
[10]) << 16;
298 k2
^= ((u_int64_t
)tail
[9]) << 8;
301 k2
^= ((u_int64_t
)tail
[8]) << 0;
302 k2
*= MH3_X64_128_C2
;
304 k2
*= MH3_X64_128_C1
;
308 k1
^= ((u_int64_t
)tail
[7]) << 56;
311 k1
^= ((u_int64_t
)tail
[6]) << 48;
314 k1
^= ((u_int64_t
)tail
[5]) << 40;
317 k1
^= ((u_int64_t
)tail
[4]) << 32;
320 k1
^= ((u_int64_t
)tail
[3]) << 24;
323 k1
^= ((u_int64_t
)tail
[2]) << 16;
326 k1
^= ((u_int64_t
)tail
[1]) << 8;
329 k1
^= ((u_int64_t
)tail
[0]) << 0;
330 k1
*= MH3_X64_128_C1
;
332 k1
*= MH3_X64_128_C2
;
349 /* throw all but lowest 32-bit */
350 return (h1
& 0xffffffff);
353 #define JHASH_INIT 0xdeadbeef
355 #define JHASH_MIX(a, b, c) { \
356 a -= c; a ^= ROTL32(c, 4); c += b; \
357 b -= a; b ^= ROTL32(a, 6); a += c; \
358 c -= b; c ^= ROTL32(b, 8); b += a; \
359 a -= c; a ^= ROTL32(c, 16); c += b; \
360 b -= a; b ^= ROTL32(a, 19); a += c; \
361 c -= b; c ^= ROTL32(b, 4); b += a; \
364 #define JHASH_FINAL(a, b, c) { \
365 c ^= b; c -= ROTL32(b, 14); \
366 a ^= c; a -= ROTL32(c, 11); \
367 b ^= a; b -= ROTL32(a, 25); \
368 c ^= b; c -= ROTL32(b, 16); \
369 a ^= c; a -= ROTL32(c, 4); \
370 b ^= a; b -= ROTL32(a, 14); \
371 c ^= b; c -= ROTL32(b, 24); \
374 #if BYTE_ORDER == BIG_ENDIAN
379 net_flowhash_jhash(const void *key
, u_int32_t len
, const u_int32_t seed
)
383 /* Set up the internal state */
384 a
= b
= c
= JHASH_INIT
+ len
+ seed
;
386 if (ALIGNED32(key
)) {
387 /* read 32-bit chunks */
388 const u_int32_t
*k
= (const u_int32_t
*)key
;
391 * all but last block:
392 * aligned reads and affect 32 bits of (a,b,c)
404 * handle the last (probably partial) block
406 * "k[2] << 8" actually reads beyond the end of the string,
407 * but then shifts out the part it's not allowed to read.
408 * Because the string is aligned, the illegal read is in
409 * the same word as the rest of the string. The masking
410 * trick does make the hash noticably faster for short
411 * strings (like English words).
421 c
+= k
[2] & 0xffffff00;
427 c
+= k
[2] & 0xffff0000;
433 c
+= k
[2] & 0xff000000;
444 b
+= k
[1] & 0xffffff00;
449 b
+= k
[1] & 0xffff0000;
454 b
+= k
[1] & 0xff000000;
463 a
+= k
[0] & 0xffffff00;
467 a
+= k
[0] & 0xffff0000;
471 a
+= k
[0] & 0xff000000;
475 /* zero length requires no mixing */
479 JHASH_FINAL(a
, b
, c
);
484 /* need to read the key one byte at a time */
485 const u_int8_t
*k
= (const u_int8_t
*)key
;
487 /* all but the last block: affect some 32 bits of (a,b,c) */
489 a
+= ((u_int32_t
)k
[0]) << 24;
490 a
+= ((u_int32_t
)k
[1]) << 16;
491 a
+= ((u_int32_t
)k
[2]) << 8;
492 a
+= ((u_int32_t
)k
[3]);
493 b
+= ((u_int32_t
)k
[4]) << 24;
494 b
+= ((u_int32_t
)k
[5]) << 16;
495 b
+= ((u_int32_t
)k
[6]) << 8;
496 b
+= ((u_int32_t
)k
[7]);
497 c
+= ((u_int32_t
)k
[8]) << 24;
498 c
+= ((u_int32_t
)k
[9]) << 16;
499 c
+= ((u_int32_t
)k
[10]) << 8;
500 c
+= ((u_int32_t
)k
[11]);
506 /* last block: affect all 32 bits of (c) */
512 c
+= ((u_int32_t
)k
[10]) << 8;
515 c
+= ((u_int32_t
)k
[9]) << 16;
518 c
+= ((u_int32_t
)k
[8]) << 24;
524 b
+= ((u_int32_t
)k
[6]) << 8;
527 b
+= ((u_int32_t
)k
[5]) << 16;
530 b
+= ((u_int32_t
)k
[4]) << 24;
536 a
+= ((u_int32_t
)k
[2]) << 8;
539 a
+= ((u_int32_t
)k
[1]) << 16;
542 a
+= ((u_int32_t
)k
[0]) << 24;
546 /* zero length requires no mixing */
550 JHASH_FINAL(a
, b
, c
);
554 #else /* LITTLE_ENDIAN */
559 net_flowhash_jhash(const void *key
, u_int32_t len
, const u_int32_t seed
)
563 /* Set up the internal state */
564 a
= b
= c
= JHASH_INIT
+ len
+ seed
;
566 #if defined(__i386__) || defined(__x86_64__)
568 * On i386/x86_64, it is faster to read 32-bit chunks if the key
569 * is aligned 32-bit OR not 16-bit, and perform 16-bit reads if it
572 if (ALIGNED32(key
) || !ALIGNED16(key
)) {
573 #else /* !defined(__i386__) && !defined(__x86_64__) */
574 if (ALIGNED32(key
)) {
575 #endif /* !defined(__i386__) && !defined(__x86_64__) */
576 /* read 32-bit chunks */
577 const u_int32_t
*k
= (const u_int32_t
*)key
;
580 * all but last block:
581 * aligned reads and affect 32 bits of (a,b,c)
593 * handle the last (probably partial) block
595 * "k[2] & 0xffffff" actually reads beyond the end of the
596 * string, but then masks off the part it's not allowed
597 * to read. Because the string is aligned, the masked-off
598 * tail is in the same word as the rest of the string.
599 * The masking trick does make the hash noticably faster
600 * for short strings (like English words).
610 c
+= k
[2] & 0xffffff;
633 b
+= k
[1] & 0xffffff;
652 a
+= k
[0] & 0xffffff;
664 /* zero length requires no mixing */
668 JHASH_FINAL(a
, b
, c
);
672 #if !defined(__i386__) && !defined(__x86_64__)
673 else if (ALIGNED16(key
)) {
674 #endif /* !defined(__i386__) && !defined(__x86_64__) */
675 /* read 16-bit chunks */
676 const u_int16_t
*k
= (const u_int16_t
*)key
;
679 /* all but last block: aligned reads and different mixing */
681 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
682 b
+= k
[2] + (((u_int32_t
)k
[3]) << 16);
683 c
+= k
[4] + (((u_int32_t
)k
[5]) << 16);
689 /* handle the last (probably partial) block */
690 k8
= (const u_int8_t
*)k
;
693 c
+= k
[4] + (((u_int32_t
)k
[5]) << 16);
694 b
+= k
[2] + (((u_int32_t
)k
[3]) << 16);
695 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
699 c
+= ((u_int32_t
)k8
[10]) << 16;
703 b
+= k
[2] + (((u_int32_t
)k
[3]) << 16);
704 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
711 b
+= k
[2] + (((u_int32_t
)k
[3]) << 16);
712 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
716 b
+= ((u_int32_t
)k8
[6]) << 16;
720 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
727 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
731 a
+= ((u_int32_t
)k8
[2]) << 16;
742 /* zero length requires no mixing */
746 JHASH_FINAL(a
, b
, c
);
749 #if !defined(__i386__) && !defined(__x86_64__)
752 /* need to read the key one byte at a time */
753 const u_int8_t
*k
= (const u_int8_t
*)key
;
755 /* all but the last block: affect some 32 bits of (a,b,c) */
758 a
+= ((u_int32_t
)k
[1]) << 8;
759 a
+= ((u_int32_t
)k
[2]) << 16;
760 a
+= ((u_int32_t
)k
[3]) << 24;
762 b
+= ((u_int32_t
)k
[5]) << 8;
763 b
+= ((u_int32_t
)k
[6]) << 16;
764 b
+= ((u_int32_t
)k
[7]) << 24;
766 c
+= ((u_int32_t
)k
[9]) << 8;
767 c
+= ((u_int32_t
)k
[10]) << 16;
768 c
+= ((u_int32_t
)k
[11]) << 24;
774 /* last block: affect all 32 bits of (c) */
777 c
+= ((u_int32_t
)k
[11]) << 24;
780 c
+= ((u_int32_t
)k
[10]) << 16;
783 c
+= ((u_int32_t
)k
[9]) << 8;
789 b
+= ((u_int32_t
)k
[7]) << 24;
792 b
+= ((u_int32_t
)k
[6]) << 16;
795 b
+= ((u_int32_t
)k
[5]) << 8;
801 a
+= ((u_int32_t
)k
[3]) << 24;
804 a
+= ((u_int32_t
)k
[2]) << 16;
807 a
+= ((u_int32_t
)k
[1]) << 8;
814 /* zero length requires no mixing */
818 JHASH_FINAL(a
, b
, c
);
821 #endif /* !defined(__i386__) && !defined(__x86_64__) */
823 #endif /* LITTLE_ENDIAN */