]>
git.saurik.com Git - apple/xnu.git/blob - bsd/net/flowhash.c
2 * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * http://code.google.com/p/smhasher/
32 * Copyright (c) 2009-2011 Austin Appleby.
34 * MurmurHash3 was written by Austin Appleby, and is placed in the public
35 * domain. The author hereby disclaims copyright to this source code.
39 * http://burtleburtle.net/bob/hash/
41 * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
43 * You can use this free for any purpose. It's in the public domain.
48 #include <sys/types.h>
49 #include <machine/endian.h>
50 #include <net/flowhash.h>
52 static inline u_int32_t
getblock32(const u_int32_t
*, int);
53 static inline u_int64_t
getblock64(const u_int64_t
*, int);
54 static inline u_int32_t
mh3_fmix32(u_int32_t
);
55 static inline u_int64_t
mh3_fmix64(u_int64_t
);
57 #define ALIGNED16(v) ((((uintptr_t)(v)) & 1) == 0)
58 #define ALIGNED32(v) ((((uintptr_t)(v)) & 3) == 0)
59 #define ALIGNED64(v) ((((uintptr_t)(v)) & 7) == 0)
61 #define ROTL32(x, r) (((x) << (r)) | ((x) >> (32 - (r))))
62 #define ROTL64(x, r) (((x) << (r)) | ((x) >> (64 - (r))))
65 * The following hash algorithms are selected based on performance:
67 * 64-bit: MurmurHash3_x64_128
71 net_flowhash_fn_t
*net_flowhash
= net_flowhash_mh3_x64_128
;
73 net_flowhash_fn_t
*net_flowhash
= net_flowhash_jhash
;
74 #endif /* !__LP64__ */
76 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
77 static inline u_int32_t
78 getblock32(const u_int32_t
*p
, int i
)
83 static inline u_int64_t
84 getblock64(const u_int64_t
*p
, int i
)
88 #else /* !__i386__ && !__x86_64__ && !__arm64__*/
89 static inline u_int32_t
90 getblock32(const u_int32_t
*p
, int i
)
92 const u_int8_t
*bytes
= (u_int8_t
*)(void *)(uintptr_t)(p
+ i
);
98 #if BYTE_ORDER == BIG_ENDIAN
100 (((u_int32_t
)bytes
[0]) << 24) |
101 (((u_int32_t
)bytes
[1]) << 16) |
102 (((u_int32_t
)bytes
[2]) << 8) |
103 ((u_int32_t
)bytes
[3]);
104 #else /* LITTLE_ENDIAN */
106 (((u_int32_t
)bytes
[3]) << 24) |
107 (((u_int32_t
)bytes
[2]) << 16) |
108 (((u_int32_t
)bytes
[1]) << 8) |
109 ((u_int32_t
)bytes
[0]);
110 #endif /* LITTLE_ENDIAN */
115 static inline u_int64_t
116 getblock64(const u_int64_t
*p
, int i
)
118 const u_int8_t
*bytes
= (const u_int8_t
*)(void *)(uintptr_t)(p
+ i
);
124 #if BYTE_ORDER == BIG_ENDIAN
126 (((u_int64_t
)bytes
[0]) << 56) |
127 (((u_int64_t
)bytes
[1]) << 48) |
128 (((u_int64_t
)bytes
[2]) << 40) |
129 (((u_int64_t
)bytes
[3]) << 32) |
130 (((u_int64_t
)bytes
[4]) << 24) |
131 (((u_int64_t
)bytes
[5]) << 16) |
132 (((u_int64_t
)bytes
[6]) << 8) |
133 ((u_int64_t
)bytes
[7]);
134 #else /* LITTLE_ENDIAN */
136 (((u_int64_t
)bytes
[7]) << 56) |
137 (((u_int64_t
)bytes
[6]) << 48) |
138 (((u_int64_t
)bytes
[5]) << 40) |
139 (((u_int64_t
)bytes
[4]) << 32) |
140 (((u_int64_t
)bytes
[3]) << 24) |
141 (((u_int64_t
)bytes
[2]) << 16) |
142 (((u_int64_t
)bytes
[1]) << 8) |
143 ((u_int64_t
)bytes
[0]);
144 #endif /* LITTLE_ENDIAN */
148 #endif /* !__i386__ && !__x86_64 && !__arm64__ */
150 static inline u_int32_t
151 mh3_fmix32(u_int32_t h
)
162 static inline u_int64_t
163 mh3_fmix64(u_int64_t k
)
166 k
*= 0xff51afd7ed558ccdLLU
;
168 k
*= 0xc4ceb9fe1a85ec53LLU
;
177 #define MH3_X86_32_C1 0xcc9e2d51
178 #define MH3_X86_32_C2 0x1b873593
181 net_flowhash_mh3_x86_32(const void *key
, u_int32_t len
, const u_int32_t seed
)
183 const u_int8_t
*data
= (const u_int8_t
*)key
;
184 const u_int32_t nblocks
= len
/ 4;
185 const u_int32_t
*blocks
;
186 const u_int8_t
*tail
;
187 u_int32_t h1
= seed
, k1
;
191 blocks
= (const u_int32_t
*)(const void *)(data
+ nblocks
* 4);
193 for (i
= -nblocks
; i
; i
++) {
194 k1
= getblock32(blocks
, i
);
202 h1
= h1
* 5 + 0xe6546b64;
206 tail
= (const u_int8_t
*)(const void *)(data
+ nblocks
* 4);
233 * MurmurHash3_x64_128
235 #define MH3_X64_128_C1 0x87c37b91114253d5LLU
236 #define MH3_X64_128_C2 0x4cf5ad432745937fLLU
239 net_flowhash_mh3_x64_128(const void *key
, u_int32_t len
, const u_int32_t seed
)
241 const u_int8_t
*data
= (const u_int8_t
*)key
;
242 const u_int32_t nblocks
= len
/ 16;
243 const u_int64_t
*blocks
;
244 const u_int8_t
*tail
;
245 u_int64_t h1
= seed
, k1
;
246 u_int64_t h2
= seed
, k2
;
250 blocks
= (const u_int64_t
*)(const void *)data
;
252 for (i
= 0; i
< nblocks
; i
++) {
253 k1
= getblock64(blocks
, i
* 2 + 0);
254 k2
= getblock64(blocks
, i
* 2 + 1);
256 k1
*= MH3_X64_128_C1
;
257 #if defined(__x86_64__)
258 __asm__ ( "rol $31, %[k1]\n\t" :[k1
] "+r" (k1
) : :);
259 #elif defined(__arm64__)
260 __asm__ ( "ror %[k1], %[k1], #(64-31)\n\t" :[k1
] "+r" (k1
) : :);
261 #else /* !__x86_64__ && !__arm64__ */
263 #endif /* !__x86_64__ && !__arm64__ */
264 k1
*= MH3_X64_128_C2
;
267 #if defined(__x86_64__)
268 __asm__ ( "rol $27, %[h1]\n\t" :[h1
] "+r" (h1
) : :);
269 #elif defined(__arm64__)
270 __asm__ ( "ror %[h1], %[h1], #(64-27)\n\t" :[h1
] "+r" (h1
) : :);
271 #else /* !__x86_64__ && !__arm64__ */
273 #endif /* !__x86_64__ && !__arm64__ */
275 h1
= h1
* 5 + 0x52dce729;
277 k2
*= MH3_X64_128_C2
;
278 #if defined(__x86_64__)
279 __asm__ ( "rol $33, %[k2]\n\t" :[k2
] "+r" (k2
) : :);
280 #elif defined(__arm64__)
281 __asm__ ( "ror %[k2], %[k2], #(64-33)\n\t" :[k2
] "+r" (k2
) : :);
282 #else /* !__x86_64__ && !__arm64__ */
284 #endif /* !__x86_64__ && !__arm64__ */
285 k2
*= MH3_X64_128_C1
;
288 #if defined(__x86_64__)
289 __asm__ ( "rol $31, %[h2]\n\t" :[h2
] "+r" (h2
) : :);
290 #elif defined(__arm64__)
291 __asm__ ( "ror %[h2], %[h2], #(64-31)\n\t" :[h2
] "+r" (h2
) : :);
292 #else /* !__x86_64__ && !__arm64__ */
294 #endif /* !__x86_64__ && !__arm64__ */
296 h2
= h2
* 5+ 0x38495ab5;
300 tail
= (const u_int8_t
*)(const void *)(data
+ nblocks
* 16);
306 k2
^= ((u_int64_t
)tail
[14]) << 48;
309 k2
^= ((u_int64_t
)tail
[13]) << 40;
312 k2
^= ((u_int64_t
)tail
[12]) << 32;
315 k2
^= ((u_int64_t
)tail
[11]) << 24;
318 k2
^= ((u_int64_t
)tail
[10]) << 16;
321 k2
^= ((u_int64_t
)tail
[9]) << 8;
324 k2
^= ((u_int64_t
)tail
[8]) << 0;
325 k2
*= MH3_X64_128_C2
;
326 #if defined(__x86_64__)
327 __asm__ ( "rol $33, %[k2]\n\t" :[k2
] "+r" (k2
) : :);
328 #elif defined(__arm64__)
329 __asm__ ( "ror %[k2], %[k2], #(64-33)\n\t" :[k2
] "+r" (k2
) : :);
330 #else /* !__x86_64__ && !__arm64__ */
332 #endif /* !__x86_64__ && !__arm64__ */
333 k2
*= MH3_X64_128_C1
;
337 k1
^= ((u_int64_t
)tail
[7]) << 56;
340 k1
^= ((u_int64_t
)tail
[6]) << 48;
343 k1
^= ((u_int64_t
)tail
[5]) << 40;
346 k1
^= ((u_int64_t
)tail
[4]) << 32;
349 k1
^= ((u_int64_t
)tail
[3]) << 24;
352 k1
^= ((u_int64_t
)tail
[2]) << 16;
355 k1
^= ((u_int64_t
)tail
[1]) << 8;
358 k1
^= ((u_int64_t
)tail
[0]) << 0;
359 k1
*= MH3_X64_128_C1
;
360 #if defined(__x86_64__)
361 __asm__ ( "rol $31, %[k1]\n\t" :[k1
] "+r" (k1
) : :);
362 #elif defined(__arm64__)
363 __asm__ ( "ror %[k1], %[k1], #(64-31)\n\t" :[k1
] "+r" (k1
) : :);
364 #else /* !__x86_64__ && !__arm64__ */
366 #endif /* !__x86_64__ && !__arm64__ */
367 k1
*= MH3_X64_128_C2
;
384 /* throw all but lowest 32-bit */
385 return (h1
& 0xffffffff);
388 #define JHASH_INIT 0xdeadbeef
390 #define JHASH_MIX(a, b, c) { \
391 a -= c; a ^= ROTL32(c, 4); c += b; \
392 b -= a; b ^= ROTL32(a, 6); a += c; \
393 c -= b; c ^= ROTL32(b, 8); b += a; \
394 a -= c; a ^= ROTL32(c, 16); c += b; \
395 b -= a; b ^= ROTL32(a, 19); a += c; \
396 c -= b; c ^= ROTL32(b, 4); b += a; \
399 #define JHASH_FINAL(a, b, c) { \
400 c ^= b; c -= ROTL32(b, 14); \
401 a ^= c; a -= ROTL32(c, 11); \
402 b ^= a; b -= ROTL32(a, 25); \
403 c ^= b; c -= ROTL32(b, 16); \
404 a ^= c; a -= ROTL32(c, 4); \
405 b ^= a; b -= ROTL32(a, 14); \
406 c ^= b; c -= ROTL32(b, 24); \
409 #if BYTE_ORDER == BIG_ENDIAN
414 net_flowhash_jhash(const void *key
, u_int32_t len
, const u_int32_t seed
)
418 /* Set up the internal state */
419 a
= b
= c
= JHASH_INIT
+ len
+ seed
;
421 if (ALIGNED32(key
)) {
422 /* read 32-bit chunks */
423 const u_int32_t
*k
= (const u_int32_t
*)key
;
426 * all but last block:
427 * aligned reads and affect 32 bits of (a,b,c)
439 * handle the last (probably partial) block
441 * "k[2] << 8" actually reads beyond the end of the string,
442 * but then shifts out the part it's not allowed to read.
443 * Because the string is aligned, the illegal read is in
444 * the same word as the rest of the string. The masking
445 * trick does make the hash noticably faster for short
446 * strings (like English words).
456 c
+= k
[2] & 0xffffff00;
462 c
+= k
[2] & 0xffff0000;
468 c
+= k
[2] & 0xff000000;
479 b
+= k
[1] & 0xffffff00;
484 b
+= k
[1] & 0xffff0000;
489 b
+= k
[1] & 0xff000000;
498 a
+= k
[0] & 0xffffff00;
502 a
+= k
[0] & 0xffff0000;
506 a
+= k
[0] & 0xff000000;
510 /* zero length requires no mixing */
514 JHASH_FINAL(a
, b
, c
);
519 /* need to read the key one byte at a time */
520 const u_int8_t
*k
= (const u_int8_t
*)key
;
522 /* all but the last block: affect some 32 bits of (a,b,c) */
524 a
+= ((u_int32_t
)k
[0]) << 24;
525 a
+= ((u_int32_t
)k
[1]) << 16;
526 a
+= ((u_int32_t
)k
[2]) << 8;
527 a
+= ((u_int32_t
)k
[3]);
528 b
+= ((u_int32_t
)k
[4]) << 24;
529 b
+= ((u_int32_t
)k
[5]) << 16;
530 b
+= ((u_int32_t
)k
[6]) << 8;
531 b
+= ((u_int32_t
)k
[7]);
532 c
+= ((u_int32_t
)k
[8]) << 24;
533 c
+= ((u_int32_t
)k
[9]) << 16;
534 c
+= ((u_int32_t
)k
[10]) << 8;
535 c
+= ((u_int32_t
)k
[11]);
541 /* last block: affect all 32 bits of (c) */
547 c
+= ((u_int32_t
)k
[10]) << 8;
550 c
+= ((u_int32_t
)k
[9]) << 16;
553 c
+= ((u_int32_t
)k
[8]) << 24;
559 b
+= ((u_int32_t
)k
[6]) << 8;
562 b
+= ((u_int32_t
)k
[5]) << 16;
565 b
+= ((u_int32_t
)k
[4]) << 24;
571 a
+= ((u_int32_t
)k
[2]) << 8;
574 a
+= ((u_int32_t
)k
[1]) << 16;
577 a
+= ((u_int32_t
)k
[0]) << 24;
581 /* zero length requires no mixing */
585 JHASH_FINAL(a
, b
, c
);
589 #else /* LITTLE_ENDIAN */
594 net_flowhash_jhash(const void *key
, u_int32_t len
, const u_int32_t seed
)
598 /* Set up the internal state */
599 a
= b
= c
= JHASH_INIT
+ len
+ seed
;
601 #if defined(__i386__) || defined(__x86_64__)
603 * On i386/x86_64, it is faster to read 32-bit chunks if the key
604 * is aligned 32-bit OR not 16-bit, and perform 16-bit reads if it
607 if (ALIGNED32(key
) || !ALIGNED16(key
)) {
608 #else /* !defined(__i386__) && !defined(__x86_64__) */
609 if (ALIGNED32(key
)) {
610 #endif /* !defined(__i386__) && !defined(__x86_64__) */
611 /* read 32-bit chunks */
612 const u_int32_t
*k
= (const u_int32_t
*)key
;
615 * all but last block:
616 * aligned reads and affect 32 bits of (a,b,c)
628 * handle the last (probably partial) block
630 * "k[2] & 0xffffff" actually reads beyond the end of the
631 * string, but then masks off the part it's not allowed
632 * to read. Because the string is aligned, the masked-off
633 * tail is in the same word as the rest of the string.
634 * The masking trick does make the hash noticably faster
635 * for short strings (like English words).
645 c
+= k
[2] & 0xffffff;
668 b
+= k
[1] & 0xffffff;
687 a
+= k
[0] & 0xffffff;
699 /* zero length requires no mixing */
703 JHASH_FINAL(a
, b
, c
);
707 #if !defined(__i386__) && !defined(__x86_64__)
708 else if (ALIGNED16(key
)) {
709 #endif /* !defined(__i386__) && !defined(__x86_64__) */
710 /* read 16-bit chunks */
711 const u_int16_t
*k
= (const u_int16_t
*)key
;
714 /* all but last block: aligned reads and different mixing */
716 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
717 b
+= k
[2] + (((u_int32_t
)k
[3]) << 16);
718 c
+= k
[4] + (((u_int32_t
)k
[5]) << 16);
724 /* handle the last (probably partial) block */
725 k8
= (const u_int8_t
*)k
;
728 c
+= k
[4] + (((u_int32_t
)k
[5]) << 16);
729 b
+= k
[2] + (((u_int32_t
)k
[3]) << 16);
730 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
734 c
+= ((u_int32_t
)k8
[10]) << 16;
738 b
+= k
[2] + (((u_int32_t
)k
[3]) << 16);
739 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
746 b
+= k
[2] + (((u_int32_t
)k
[3]) << 16);
747 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
751 b
+= ((u_int32_t
)k8
[6]) << 16;
755 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
762 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
766 a
+= ((u_int32_t
)k8
[2]) << 16;
777 /* zero length requires no mixing */
781 JHASH_FINAL(a
, b
, c
);
784 #if !defined(__i386__) && !defined(__x86_64__)
787 /* need to read the key one byte at a time */
788 const u_int8_t
*k
= (const u_int8_t
*)key
;
790 /* all but the last block: affect some 32 bits of (a,b,c) */
793 a
+= ((u_int32_t
)k
[1]) << 8;
794 a
+= ((u_int32_t
)k
[2]) << 16;
795 a
+= ((u_int32_t
)k
[3]) << 24;
797 b
+= ((u_int32_t
)k
[5]) << 8;
798 b
+= ((u_int32_t
)k
[6]) << 16;
799 b
+= ((u_int32_t
)k
[7]) << 24;
801 c
+= ((u_int32_t
)k
[9]) << 8;
802 c
+= ((u_int32_t
)k
[10]) << 16;
803 c
+= ((u_int32_t
)k
[11]) << 24;
809 /* last block: affect all 32 bits of (c) */
812 c
+= ((u_int32_t
)k
[11]) << 24;
815 c
+= ((u_int32_t
)k
[10]) << 16;
818 c
+= ((u_int32_t
)k
[9]) << 8;
824 b
+= ((u_int32_t
)k
[7]) << 24;
827 b
+= ((u_int32_t
)k
[6]) << 16;
830 b
+= ((u_int32_t
)k
[5]) << 8;
836 a
+= ((u_int32_t
)k
[3]) << 24;
839 a
+= ((u_int32_t
)k
[2]) << 16;
842 a
+= ((u_int32_t
)k
[1]) << 8;
849 /* zero length requires no mixing */
853 JHASH_FINAL(a
, b
, c
);
856 #endif /* !defined(__i386__) && !defined(__x86_64__) */
858 #endif /* LITTLE_ENDIAN */