]>
git.saurik.com Git - apple/xnu.git/blob - bsd/net/flowhash.c
2 * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * http://code.google.com/p/smhasher/
32 * Copyright (c) 2009-2011 Austin Appleby.
34 * MurmurHash3 was written by Austin Appleby, and is placed in the public
35 * domain. The author hereby disclaims copyright to this source code.
39 * http://burtleburtle.net/bob/hash/
41 * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
43 * You can use this free for any purpose. It's in the public domain.
48 #include <sys/types.h>
49 #include <machine/endian.h>
50 #include <net/flowhash.h>
52 static inline u_int32_t
getblock32(const u_int32_t
*, int);
53 static inline u_int64_t
getblock64(const u_int64_t
*, int);
54 static inline u_int32_t
mh3_fmix32(u_int32_t
);
55 static inline u_int64_t
mh3_fmix64(u_int64_t
);
57 #define ALIGNED16(v) ((((uintptr_t)(v)) & 1) == 0)
58 #define ALIGNED32(v) ((((uintptr_t)(v)) & 3) == 0)
59 #define ALIGNED64(v) ((((uintptr_t)(v)) & 7) == 0)
61 #define ROTL32(x, r) (((x) << (r)) | ((x) >> (32 - (r))))
62 #define ROTL64(x, r) (((x) << (r)) | ((x) >> (64 - (r))))
65 * The following hash algorithms are selected based on performance:
67 * 64-bit: MurmurHash3_x64_128
71 net_flowhash_fn_t
*net_flowhash
= net_flowhash_mh3_x64_128
;
73 net_flowhash_fn_t
*net_flowhash
= net_flowhash_jhash
;
74 #endif /* !__LP64__ */
76 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
77 static inline u_int32_t
78 getblock32(const u_int32_t
*p
, int i
)
83 static inline u_int64_t
84 getblock64(const u_int64_t
*p
, int i
)
88 #else /* !__i386__ && !__x86_64__ && !__arm64__*/
89 static inline u_int32_t
90 getblock32(const u_int32_t
*p
, int i
)
92 const u_int8_t
*bytes
= (u_int8_t
*)(void *)(uintptr_t)(p
+ i
);
98 #if BYTE_ORDER == BIG_ENDIAN
100 (((u_int32_t
)bytes
[0]) << 24) |
101 (((u_int32_t
)bytes
[1]) << 16) |
102 (((u_int32_t
)bytes
[2]) << 8) |
103 ((u_int32_t
)bytes
[3]);
104 #else /* LITTLE_ENDIAN */
106 (((u_int32_t
)bytes
[3]) << 24) |
107 (((u_int32_t
)bytes
[2]) << 16) |
108 (((u_int32_t
)bytes
[1]) << 8) |
109 ((u_int32_t
)bytes
[0]);
110 #endif /* LITTLE_ENDIAN */
115 static inline u_int64_t
116 getblock64(const u_int64_t
*p
, int i
)
118 const u_int8_t
*bytes
= (const u_int8_t
*)(void *)(uintptr_t)(p
+ i
);
124 #if BYTE_ORDER == BIG_ENDIAN
126 (((u_int64_t
)bytes
[0]) << 56) |
127 (((u_int64_t
)bytes
[1]) << 48) |
128 (((u_int64_t
)bytes
[2]) << 40) |
129 (((u_int64_t
)bytes
[3]) << 32) |
130 (((u_int64_t
)bytes
[4]) << 24) |
131 (((u_int64_t
)bytes
[5]) << 16) |
132 (((u_int64_t
)bytes
[6]) << 8) |
133 ((u_int64_t
)bytes
[7]);
134 #else /* LITTLE_ENDIAN */
136 (((u_int64_t
)bytes
[7]) << 56) |
137 (((u_int64_t
)bytes
[6]) << 48) |
138 (((u_int64_t
)bytes
[5]) << 40) |
139 (((u_int64_t
)bytes
[4]) << 32) |
140 (((u_int64_t
)bytes
[3]) << 24) |
141 (((u_int64_t
)bytes
[2]) << 16) |
142 (((u_int64_t
)bytes
[1]) << 8) |
143 ((u_int64_t
)bytes
[0]);
144 #endif /* LITTLE_ENDIAN */
148 #endif /* !__i386__ && !__x86_64 && !__arm64__ */
150 static inline u_int32_t
151 mh3_fmix32(u_int32_t h
)
162 static inline u_int64_t
163 mh3_fmix64(u_int64_t k
)
166 k
*= 0xff51afd7ed558ccdLLU
;
168 k
*= 0xc4ceb9fe1a85ec53LLU
;
177 #define MH3_X86_32_C1 0xcc9e2d51
178 #define MH3_X86_32_C2 0x1b873593
181 net_flowhash_mh3_x86_32(const void *key
, u_int32_t len
, const u_int32_t seed
)
183 const u_int8_t
*data
= (const u_int8_t
*)key
;
184 const u_int32_t nblocks
= len
/ 4;
185 const u_int32_t
*blocks
;
186 const u_int8_t
*tail
;
187 u_int32_t h1
= seed
, k1
;
191 blocks
= (const u_int32_t
*)(const void *)(data
+ nblocks
* 4);
193 for (i
= -nblocks
; i
; i
++) {
194 k1
= getblock32(blocks
, i
);
202 h1
= h1
* 5 + 0xe6546b64;
206 tail
= (const u_int8_t
*)(const void *)(data
+ nblocks
* 4);
234 * MurmurHash3_x64_128
236 #define MH3_X64_128_C1 0x87c37b91114253d5LLU
237 #define MH3_X64_128_C2 0x4cf5ad432745937fLLU
240 net_flowhash_mh3_x64_128(const void *key
, u_int32_t len
, const u_int32_t seed
)
242 const u_int8_t
*data
= (const u_int8_t
*)key
;
243 const u_int32_t nblocks
= len
/ 16;
244 const u_int64_t
*blocks
;
245 const u_int8_t
*tail
;
246 u_int64_t h1
= seed
, k1
;
247 u_int64_t h2
= seed
, k2
;
251 blocks
= (const u_int64_t
*)(const void *)data
;
253 for (i
= 0; i
< nblocks
; i
++) {
254 k1
= getblock64(blocks
, i
* 2 + 0);
255 k2
= getblock64(blocks
, i
* 2 + 1);
257 k1
*= MH3_X64_128_C1
;
258 #if defined(__x86_64__)
259 __asm__ ( "rol $31, %[k1]\n\t" :[k1
] "+r" (k1
) : :);
260 #elif defined(__arm64__)
261 __asm__ ( "ror %[k1], %[k1], #(64-31)\n\t" :[k1
] "+r" (k1
) : :);
262 #else /* !__x86_64__ && !__arm64__ */
264 #endif /* !__x86_64__ && !__arm64__ */
265 k1
*= MH3_X64_128_C2
;
268 #if defined(__x86_64__)
269 __asm__ ( "rol $27, %[h1]\n\t" :[h1
] "+r" (h1
) : :);
270 #elif defined(__arm64__)
271 __asm__ ( "ror %[h1], %[h1], #(64-27)\n\t" :[h1
] "+r" (h1
) : :);
272 #else /* !__x86_64__ && !__arm64__ */
274 #endif /* !__x86_64__ && !__arm64__ */
276 h1
= h1
* 5 + 0x52dce729;
278 k2
*= MH3_X64_128_C2
;
279 #if defined(__x86_64__)
280 __asm__ ( "rol $33, %[k2]\n\t" :[k2
] "+r" (k2
) : :);
281 #elif defined(__arm64__)
282 __asm__ ( "ror %[k2], %[k2], #(64-33)\n\t" :[k2
] "+r" (k2
) : :);
283 #else /* !__x86_64__ && !__arm64__ */
285 #endif /* !__x86_64__ && !__arm64__ */
286 k2
*= MH3_X64_128_C1
;
289 #if defined(__x86_64__)
290 __asm__ ( "rol $31, %[h2]\n\t" :[h2
] "+r" (h2
) : :);
291 #elif defined(__arm64__)
292 __asm__ ( "ror %[h2], %[h2], #(64-31)\n\t" :[h2
] "+r" (h2
) : :);
293 #else /* !__x86_64__ && !__arm64__ */
295 #endif /* !__x86_64__ && !__arm64__ */
297 h2
= h2
* 5 + 0x38495ab5;
301 tail
= (const u_int8_t
*)(const void *)(data
+ nblocks
* 16);
307 k2
^= ((u_int64_t
)tail
[14]) << 48;
310 k2
^= ((u_int64_t
)tail
[13]) << 40;
313 k2
^= ((u_int64_t
)tail
[12]) << 32;
316 k2
^= ((u_int64_t
)tail
[11]) << 24;
319 k2
^= ((u_int64_t
)tail
[10]) << 16;
322 k2
^= ((u_int64_t
)tail
[9]) << 8;
325 k2
^= ((u_int64_t
)tail
[8]) << 0;
326 k2
*= MH3_X64_128_C2
;
327 #if defined(__x86_64__)
328 __asm__ ( "rol $33, %[k2]\n\t" :[k2
] "+r" (k2
) : :);
329 #elif defined(__arm64__)
330 __asm__ ( "ror %[k2], %[k2], #(64-33)\n\t" :[k2
] "+r" (k2
) : :);
331 #else /* !__x86_64__ && !__arm64__ */
333 #endif /* !__x86_64__ && !__arm64__ */
334 k2
*= MH3_X64_128_C1
;
338 k1
^= ((u_int64_t
)tail
[7]) << 56;
341 k1
^= ((u_int64_t
)tail
[6]) << 48;
344 k1
^= ((u_int64_t
)tail
[5]) << 40;
347 k1
^= ((u_int64_t
)tail
[4]) << 32;
350 k1
^= ((u_int64_t
)tail
[3]) << 24;
353 k1
^= ((u_int64_t
)tail
[2]) << 16;
356 k1
^= ((u_int64_t
)tail
[1]) << 8;
359 k1
^= ((u_int64_t
)tail
[0]) << 0;
360 k1
*= MH3_X64_128_C1
;
361 #if defined(__x86_64__)
362 __asm__ ( "rol $31, %[k1]\n\t" :[k1
] "+r" (k1
) : :);
363 #elif defined(__arm64__)
364 __asm__ ( "ror %[k1], %[k1], #(64-31)\n\t" :[k1
] "+r" (k1
) : :);
365 #else /* !__x86_64__ && !__arm64__ */
367 #endif /* !__x86_64__ && !__arm64__ */
368 k1
*= MH3_X64_128_C2
;
386 /* throw all but lowest 32-bit */
387 return h1
& 0xffffffff;
390 #define JHASH_INIT 0xdeadbeef
392 #define JHASH_MIX(a, b, c) { \
393 a -= c; a ^= ROTL32(c, 4); c += b; \
394 b -= a; b ^= ROTL32(a, 6); a += c; \
395 c -= b; c ^= ROTL32(b, 8); b += a; \
396 a -= c; a ^= ROTL32(c, 16); c += b; \
397 b -= a; b ^= ROTL32(a, 19); a += c; \
398 c -= b; c ^= ROTL32(b, 4); b += a; \
401 #define JHASH_FINAL(a, b, c) { \
402 c ^= b; c -= ROTL32(b, 14); \
403 a ^= c; a -= ROTL32(c, 11); \
404 b ^= a; b -= ROTL32(a, 25); \
405 c ^= b; c -= ROTL32(b, 16); \
406 a ^= c; a -= ROTL32(c, 4); \
407 b ^= a; b -= ROTL32(a, 14); \
408 c ^= b; c -= ROTL32(b, 24); \
411 #if BYTE_ORDER == BIG_ENDIAN
416 net_flowhash_jhash(const void *key
, u_int32_t len
, const u_int32_t seed
)
420 /* Set up the internal state */
421 a
= b
= c
= JHASH_INIT
+ len
+ seed
;
423 if (ALIGNED32(key
)) {
424 /* read 32-bit chunks */
425 const u_int32_t
*k
= (const u_int32_t
*)key
;
428 * all but last block:
429 * aligned reads and affect 32 bits of (a,b,c)
441 * handle the last (probably partial) block
443 * "k[2] << 8" actually reads beyond the end of the string,
444 * but then shifts out the part it's not allowed to read.
445 * Because the string is aligned, the illegal read is in
446 * the same word as the rest of the string. The masking
447 * trick does make the hash noticably faster for short
448 * strings (like English words).
458 c
+= k
[2] & 0xffffff00;
464 c
+= k
[2] & 0xffff0000;
470 c
+= k
[2] & 0xff000000;
481 b
+= k
[1] & 0xffffff00;
486 b
+= k
[1] & 0xffff0000;
491 b
+= k
[1] & 0xff000000;
500 a
+= k
[0] & 0xffffff00;
504 a
+= k
[0] & 0xffff0000;
508 a
+= k
[0] & 0xff000000;
512 /* zero length requires no mixing */
516 JHASH_FINAL(a
, b
, c
);
521 /* need to read the key one byte at a time */
522 const u_int8_t
*k
= (const u_int8_t
*)key
;
524 /* all but the last block: affect some 32 bits of (a,b,c) */
526 a
+= ((u_int32_t
)k
[0]) << 24;
527 a
+= ((u_int32_t
)k
[1]) << 16;
528 a
+= ((u_int32_t
)k
[2]) << 8;
529 a
+= ((u_int32_t
)k
[3]);
530 b
+= ((u_int32_t
)k
[4]) << 24;
531 b
+= ((u_int32_t
)k
[5]) << 16;
532 b
+= ((u_int32_t
)k
[6]) << 8;
533 b
+= ((u_int32_t
)k
[7]);
534 c
+= ((u_int32_t
)k
[8]) << 24;
535 c
+= ((u_int32_t
)k
[9]) << 16;
536 c
+= ((u_int32_t
)k
[10]) << 8;
537 c
+= ((u_int32_t
)k
[11]);
543 /* last block: affect all 32 bits of (c) */
549 c
+= ((u_int32_t
)k
[10]) << 8;
552 c
+= ((u_int32_t
)k
[9]) << 16;
555 c
+= ((u_int32_t
)k
[8]) << 24;
561 b
+= ((u_int32_t
)k
[6]) << 8;
564 b
+= ((u_int32_t
)k
[5]) << 16;
567 b
+= ((u_int32_t
)k
[4]) << 24;
573 a
+= ((u_int32_t
)k
[2]) << 8;
576 a
+= ((u_int32_t
)k
[1]) << 16;
579 a
+= ((u_int32_t
)k
[0]) << 24;
583 /* zero length requires no mixing */
587 JHASH_FINAL(a
, b
, c
);
591 #else /* LITTLE_ENDIAN */
596 net_flowhash_jhash(const void *key
, u_int32_t len
, const u_int32_t seed
)
600 /* Set up the internal state */
601 a
= b
= c
= JHASH_INIT
+ len
+ seed
;
603 #if defined(__i386__) || defined(__x86_64__)
605 * On i386/x86_64, it is faster to read 32-bit chunks if the key
606 * is aligned 32-bit OR not 16-bit, and perform 16-bit reads if it
609 if (ALIGNED32(key
) || !ALIGNED16(key
)) {
610 #else /* !defined(__i386__) && !defined(__x86_64__) */
611 if (ALIGNED32(key
)) {
612 #endif /* !defined(__i386__) && !defined(__x86_64__) */
613 /* read 32-bit chunks */
614 const u_int32_t
*k
= (const u_int32_t
*)key
;
617 * all but last block:
618 * aligned reads and affect 32 bits of (a,b,c)
630 * handle the last (probably partial) block
632 * "k[2] & 0xffffff" actually reads beyond the end of the
633 * string, but then masks off the part it's not allowed
634 * to read. Because the string is aligned, the masked-off
635 * tail is in the same word as the rest of the string.
636 * The masking trick does make the hash noticably faster
637 * for short strings (like English words).
647 c
+= k
[2] & 0xffffff;
670 b
+= k
[1] & 0xffffff;
689 a
+= k
[0] & 0xffffff;
701 /* zero length requires no mixing */
705 JHASH_FINAL(a
, b
, c
);
709 #if !defined(__i386__) && !defined(__x86_64__)
710 else if (ALIGNED16(key
)) {
711 #endif /* !defined(__i386__) && !defined(__x86_64__) */
712 /* read 16-bit chunks */
713 const u_int16_t
*k
= (const u_int16_t
*)key
;
716 /* all but last block: aligned reads and different mixing */
718 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
719 b
+= k
[2] + (((u_int32_t
)k
[3]) << 16);
720 c
+= k
[4] + (((u_int32_t
)k
[5]) << 16);
726 /* handle the last (probably partial) block */
727 k8
= (const u_int8_t
*)k
;
730 c
+= k
[4] + (((u_int32_t
)k
[5]) << 16);
731 b
+= k
[2] + (((u_int32_t
)k
[3]) << 16);
732 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
736 c
+= ((u_int32_t
)k8
[10]) << 16;
740 b
+= k
[2] + (((u_int32_t
)k
[3]) << 16);
741 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
748 b
+= k
[2] + (((u_int32_t
)k
[3]) << 16);
749 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
753 b
+= ((u_int32_t
)k8
[6]) << 16;
757 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
764 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
768 a
+= ((u_int32_t
)k8
[2]) << 16;
779 /* zero length requires no mixing */
783 JHASH_FINAL(a
, b
, c
);
786 #if !defined(__i386__) && !defined(__x86_64__)
789 /* need to read the key one byte at a time */
790 const u_int8_t
*k
= (const u_int8_t
*)key
;
792 /* all but the last block: affect some 32 bits of (a,b,c) */
795 a
+= ((u_int32_t
)k
[1]) << 8;
796 a
+= ((u_int32_t
)k
[2]) << 16;
797 a
+= ((u_int32_t
)k
[3]) << 24;
799 b
+= ((u_int32_t
)k
[5]) << 8;
800 b
+= ((u_int32_t
)k
[6]) << 16;
801 b
+= ((u_int32_t
)k
[7]) << 24;
803 c
+= ((u_int32_t
)k
[9]) << 8;
804 c
+= ((u_int32_t
)k
[10]) << 16;
805 c
+= ((u_int32_t
)k
[11]) << 24;
811 /* last block: affect all 32 bits of (c) */
814 c
+= ((u_int32_t
)k
[11]) << 24;
817 c
+= ((u_int32_t
)k
[10]) << 16;
820 c
+= ((u_int32_t
)k
[9]) << 8;
826 b
+= ((u_int32_t
)k
[7]) << 24;
829 b
+= ((u_int32_t
)k
[6]) << 16;
832 b
+= ((u_int32_t
)k
[5]) << 8;
838 a
+= ((u_int32_t
)k
[3]) << 24;
841 a
+= ((u_int32_t
)k
[2]) << 16;
844 a
+= ((u_int32_t
)k
[1]) << 8;
851 /* zero length requires no mixing */
855 JHASH_FINAL(a
, b
, c
);
858 #endif /* !defined(__i386__) && !defined(__x86_64__) */
860 #endif /* LITTLE_ENDIAN */