]>
git.saurik.com Git - apple/xnu.git/blob - bsd/net/flowhash.c
2 * Copyright (c) 2011-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * http://code.google.com/p/smhasher/
32 * Copyright (c) 2009-2011 Austin Appleby.
34 * MurmurHash3 was written by Austin Appleby, and is placed in the public
35 * domain. The author hereby disclaims copyright to this source code.
39 * http://burtleburtle.net/bob/hash/
41 * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
43 * You can use this free for any purpose. It's in the public domain.
48 #include <sys/types.h>
49 #include <machine/endian.h>
50 #include <net/flowhash.h>
53 static inline u_int32_t
getblock32(const u_int32_t
*, int);
54 static inline u_int64_t
getblock64(const u_int64_t
*, int);
55 static inline u_int32_t
mh3_fmix32(u_int32_t
);
56 static inline u_int64_t
mh3_fmix64(u_int64_t
);
58 #define ALIGNED16(v) ((((uintptr_t)(v)) & 1) == 0)
59 #define ALIGNED32(v) ((((uintptr_t)(v)) & 3) == 0)
60 #define ALIGNED64(v) ((((uintptr_t)(v)) & 7) == 0)
62 #define ROTL32(x, r) (((x) << (r)) | ((x) >> (32 - (r))))
63 #define ROTL64(x, r) (((x) << (r)) | ((x) >> (64 - (r))))
66 * The following hash algorithms are selected based on performance:
68 * 64-bit: MurmurHash3_x64_128
72 net_flowhash_fn_t
*net_flowhash
= net_flowhash_mh3_x64_128
;
74 net_flowhash_fn_t
*net_flowhash
= net_flowhash_jhash
;
75 #endif /* !__LP64__ */
77 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
78 static inline u_int32_t
79 getblock32(const u_int32_t
*p
, int i
)
84 static inline u_int64_t
85 getblock64(const u_int64_t
*p
, int i
)
89 #else /* !__i386__ && !__x86_64__ && !__arm64__*/
90 static inline u_int32_t
91 getblock32(const u_int32_t
*p
, int i
)
93 const u_int8_t
*bytes
= (u_int8_t
*)(void *)(uintptr_t)(p
+ i
);
99 #if BYTE_ORDER == BIG_ENDIAN
101 (((u_int32_t
)bytes
[0]) << 24) |
102 (((u_int32_t
)bytes
[1]) << 16) |
103 (((u_int32_t
)bytes
[2]) << 8) |
104 ((u_int32_t
)bytes
[3]);
105 #else /* LITTLE_ENDIAN */
107 (((u_int32_t
)bytes
[3]) << 24) |
108 (((u_int32_t
)bytes
[2]) << 16) |
109 (((u_int32_t
)bytes
[1]) << 8) |
110 ((u_int32_t
)bytes
[0]);
111 #endif /* LITTLE_ENDIAN */
116 static inline u_int64_t
117 getblock64(const u_int64_t
*p
, int i
)
119 const u_int8_t
*bytes
= (const u_int8_t
*)(void *)(uintptr_t)(p
+ i
);
125 #if BYTE_ORDER == BIG_ENDIAN
127 (((u_int64_t
)bytes
[0]) << 56) |
128 (((u_int64_t
)bytes
[1]) << 48) |
129 (((u_int64_t
)bytes
[2]) << 40) |
130 (((u_int64_t
)bytes
[3]) << 32) |
131 (((u_int64_t
)bytes
[4]) << 24) |
132 (((u_int64_t
)bytes
[5]) << 16) |
133 (((u_int64_t
)bytes
[6]) << 8) |
134 ((u_int64_t
)bytes
[7]);
135 #else /* LITTLE_ENDIAN */
137 (((u_int64_t
)bytes
[7]) << 56) |
138 (((u_int64_t
)bytes
[6]) << 48) |
139 (((u_int64_t
)bytes
[5]) << 40) |
140 (((u_int64_t
)bytes
[4]) << 32) |
141 (((u_int64_t
)bytes
[3]) << 24) |
142 (((u_int64_t
)bytes
[2]) << 16) |
143 (((u_int64_t
)bytes
[1]) << 8) |
144 ((u_int64_t
)bytes
[0]);
145 #endif /* LITTLE_ENDIAN */
149 #endif /* !__i386__ && !__x86_64 && !__arm64__ */
151 static inline u_int32_t
152 mh3_fmix32(u_int32_t h
)
163 static inline u_int64_t
164 mh3_fmix64(u_int64_t k
)
167 k
*= 0xff51afd7ed558ccdLLU
;
169 k
*= 0xc4ceb9fe1a85ec53LLU
;
178 #define MH3_X86_32_C1 0xcc9e2d51
179 #define MH3_X86_32_C2 0x1b873593
182 net_flowhash_mh3_x86_32(const void *key
, u_int32_t len
, const u_int32_t seed
)
184 const u_int8_t
*data
= (const u_int8_t
*)key
;
185 const u_int32_t nblocks
= len
/ 4;
186 const u_int32_t
*blocks
;
187 const u_int8_t
*tail
;
188 u_int32_t h1
= seed
, k1
;
192 blocks
= (const u_int32_t
*)(const void *)(data
+ nblocks
* 4);
194 for (i
= -nblocks
; i
; i
++) {
195 k1
= getblock32(blocks
, i
);
203 h1
= h1
* 5 + 0xe6546b64;
207 tail
= (const u_int8_t
*)(const void *)(data
+ nblocks
* 4);
235 * MurmurHash3_x64_128
237 #define MH3_X64_128_C1 0x87c37b91114253d5LLU
238 #define MH3_X64_128_C2 0x4cf5ad432745937fLLU
241 net_flowhash_mh3_x64_128(const void *key
, u_int32_t len
, const u_int32_t seed
)
243 const u_int8_t
*data
= (const u_int8_t
*)key
;
244 const u_int32_t nblocks
= len
/ 16;
245 const u_int64_t
*blocks
;
246 const u_int8_t
*tail
;
247 u_int64_t h1
= seed
, k1
;
248 u_int64_t h2
= seed
, k2
;
252 blocks
= (const u_int64_t
*)(const void *)data
;
254 for (i
= 0; i
< nblocks
; i
++) {
255 k1
= getblock64(blocks
, i
* 2 + 0);
256 k2
= getblock64(blocks
, i
* 2 + 1);
258 k1
*= MH3_X64_128_C1
;
259 #if defined(__x86_64__)
260 __asm__ ( "rol $31, %[k1]\n\t" :[k1
] "+r" (k1
) : :);
261 #elif defined(__arm64__)
262 __asm__ ( "ror %[k1], %[k1], #(64-31)\n\t" :[k1
] "+r" (k1
) : :);
263 #else /* !__x86_64__ && !__arm64__ */
265 #endif /* !__x86_64__ && !__arm64__ */
266 k1
*= MH3_X64_128_C2
;
269 #if defined(__x86_64__)
270 __asm__ ( "rol $27, %[h1]\n\t" :[h1
] "+r" (h1
) : :);
271 #elif defined(__arm64__)
272 __asm__ ( "ror %[h1], %[h1], #(64-27)\n\t" :[h1
] "+r" (h1
) : :);
273 #else /* !__x86_64__ && !__arm64__ */
275 #endif /* !__x86_64__ && !__arm64__ */
277 h1
= h1
* 5 + 0x52dce729;
279 k2
*= MH3_X64_128_C2
;
280 #if defined(__x86_64__)
281 __asm__ ( "rol $33, %[k2]\n\t" :[k2
] "+r" (k2
) : :);
282 #elif defined(__arm64__)
283 __asm__ ( "ror %[k2], %[k2], #(64-33)\n\t" :[k2
] "+r" (k2
) : :);
284 #else /* !__x86_64__ && !__arm64__ */
286 #endif /* !__x86_64__ && !__arm64__ */
287 k2
*= MH3_X64_128_C1
;
290 #if defined(__x86_64__)
291 __asm__ ( "rol $31, %[h2]\n\t" :[h2
] "+r" (h2
) : :);
292 #elif defined(__arm64__)
293 __asm__ ( "ror %[h2], %[h2], #(64-31)\n\t" :[h2
] "+r" (h2
) : :);
294 #else /* !__x86_64__ && !__arm64__ */
296 #endif /* !__x86_64__ && !__arm64__ */
298 h2
= h2
* 5 + 0x38495ab5;
302 tail
= (const u_int8_t
*)(const void *)(data
+ nblocks
* 16);
308 k2
^= ((u_int64_t
)tail
[14]) << 48;
311 k2
^= ((u_int64_t
)tail
[13]) << 40;
314 k2
^= ((u_int64_t
)tail
[12]) << 32;
317 k2
^= ((u_int64_t
)tail
[11]) << 24;
320 k2
^= ((u_int64_t
)tail
[10]) << 16;
323 k2
^= ((u_int64_t
)tail
[9]) << 8;
326 k2
^= ((u_int64_t
)tail
[8]) << 0;
327 k2
*= MH3_X64_128_C2
;
328 #if defined(__x86_64__)
329 __asm__ ( "rol $33, %[k2]\n\t" :[k2
] "+r" (k2
) : :);
330 #elif defined(__arm64__)
331 __asm__ ( "ror %[k2], %[k2], #(64-33)\n\t" :[k2
] "+r" (k2
) : :);
332 #else /* !__x86_64__ && !__arm64__ */
334 #endif /* !__x86_64__ && !__arm64__ */
335 k2
*= MH3_X64_128_C1
;
339 k1
^= ((u_int64_t
)tail
[7]) << 56;
342 k1
^= ((u_int64_t
)tail
[6]) << 48;
345 k1
^= ((u_int64_t
)tail
[5]) << 40;
348 k1
^= ((u_int64_t
)tail
[4]) << 32;
351 k1
^= ((u_int64_t
)tail
[3]) << 24;
354 k1
^= ((u_int64_t
)tail
[2]) << 16;
357 k1
^= ((u_int64_t
)tail
[1]) << 8;
360 k1
^= ((u_int64_t
)tail
[0]) << 0;
361 k1
*= MH3_X64_128_C1
;
362 #if defined(__x86_64__)
363 __asm__ ( "rol $31, %[k1]\n\t" :[k1
] "+r" (k1
) : :);
364 #elif defined(__arm64__)
365 __asm__ ( "ror %[k1], %[k1], #(64-31)\n\t" :[k1
] "+r" (k1
) : :);
366 #else /* !__x86_64__ && !__arm64__ */
368 #endif /* !__x86_64__ && !__arm64__ */
369 k1
*= MH3_X64_128_C2
;
387 /* throw all but lowest 32-bit */
388 return h1
& 0xffffffff;
391 #define JHASH_INIT 0xdeadbeef
393 #define JHASH_MIX(a, b, c) { \
394 a -= c; a ^= ROTL32(c, 4); c += b; \
395 b -= a; b ^= ROTL32(a, 6); a += c; \
396 c -= b; c ^= ROTL32(b, 8); b += a; \
397 a -= c; a ^= ROTL32(c, 16); c += b; \
398 b -= a; b ^= ROTL32(a, 19); a += c; \
399 c -= b; c ^= ROTL32(b, 4); b += a; \
402 #define JHASH_FINAL(a, b, c) { \
403 c ^= b; c -= ROTL32(b, 14); \
404 a ^= c; a -= ROTL32(c, 11); \
405 b ^= a; b -= ROTL32(a, 25); \
406 c ^= b; c -= ROTL32(b, 16); \
407 a ^= c; a -= ROTL32(c, 4); \
408 b ^= a; b -= ROTL32(a, 14); \
409 c ^= b; c -= ROTL32(b, 24); \
412 #if BYTE_ORDER == BIG_ENDIAN
417 net_flowhash_jhash(const void *key
, u_int32_t len
, const u_int32_t seed
)
421 /* Set up the internal state */
422 a
= b
= c
= JHASH_INIT
+ len
+ seed
;
424 if (ALIGNED32(key
)) {
425 /* read 32-bit chunks */
426 const u_int32_t
*k
= (const u_int32_t
*)key
;
429 * all but last block:
430 * aligned reads and affect 32 bits of (a,b,c)
442 * handle the last (probably partial) block
444 * "k[2] << 8" actually reads beyond the end of the string,
445 * but then shifts out the part it's not allowed to read.
446 * Because the string is aligned, the illegal read is in
447 * the same word as the rest of the string. The masking
448 * trick does make the hash noticably faster for short
449 * strings (like English words).
459 c
+= k
[2] & 0xffffff00;
465 c
+= k
[2] & 0xffff0000;
471 c
+= k
[2] & 0xff000000;
482 b
+= k
[1] & 0xffffff00;
487 b
+= k
[1] & 0xffff0000;
492 b
+= k
[1] & 0xff000000;
501 a
+= k
[0] & 0xffffff00;
505 a
+= k
[0] & 0xffff0000;
509 a
+= k
[0] & 0xff000000;
513 /* zero length requires no mixing */
517 JHASH_FINAL(a
, b
, c
);
522 /* need to read the key one byte at a time */
523 const u_int8_t
*k
= (const u_int8_t
*)key
;
525 /* all but the last block: affect some 32 bits of (a,b,c) */
527 a
+= ((u_int32_t
)k
[0]) << 24;
528 a
+= ((u_int32_t
)k
[1]) << 16;
529 a
+= ((u_int32_t
)k
[2]) << 8;
530 a
+= ((u_int32_t
)k
[3]);
531 b
+= ((u_int32_t
)k
[4]) << 24;
532 b
+= ((u_int32_t
)k
[5]) << 16;
533 b
+= ((u_int32_t
)k
[6]) << 8;
534 b
+= ((u_int32_t
)k
[7]);
535 c
+= ((u_int32_t
)k
[8]) << 24;
536 c
+= ((u_int32_t
)k
[9]) << 16;
537 c
+= ((u_int32_t
)k
[10]) << 8;
538 c
+= ((u_int32_t
)k
[11]);
544 /* last block: affect all 32 bits of (c) */
550 c
+= ((u_int32_t
)k
[10]) << 8;
553 c
+= ((u_int32_t
)k
[9]) << 16;
556 c
+= ((u_int32_t
)k
[8]) << 24;
562 b
+= ((u_int32_t
)k
[6]) << 8;
565 b
+= ((u_int32_t
)k
[5]) << 16;
568 b
+= ((u_int32_t
)k
[4]) << 24;
574 a
+= ((u_int32_t
)k
[2]) << 8;
577 a
+= ((u_int32_t
)k
[1]) << 16;
580 a
+= ((u_int32_t
)k
[0]) << 24;
584 /* zero length requires no mixing */
588 JHASH_FINAL(a
, b
, c
);
592 #else /* LITTLE_ENDIAN */
597 net_flowhash_jhash(const void *key
, u_int32_t len
, const u_int32_t seed
)
601 /* Set up the internal state */
602 a
= b
= c
= JHASH_INIT
+ len
+ seed
;
604 #if defined(__i386__) || defined(__x86_64__)
606 * On i386/x86_64, it is faster to read 32-bit chunks if the key
607 * is aligned 32-bit OR not 16-bit, and perform 16-bit reads if it
610 if (ALIGNED32(key
) || !ALIGNED16(key
)) {
611 #else /* !defined(__i386__) && !defined(__x86_64__) */
612 if (ALIGNED32(key
)) {
613 #endif /* !defined(__i386__) && !defined(__x86_64__) */
614 /* read 32-bit chunks */
615 const u_int32_t
*k
= (const u_int32_t
*)key
;
618 * all but last block:
619 * aligned reads and affect 32 bits of (a,b,c)
631 * handle the last (probably partial) block
633 * "k[2] & 0xffffff" actually reads beyond the end of the
634 * string, but then masks off the part it's not allowed
635 * to read. Because the string is aligned, the masked-off
636 * tail is in the same word as the rest of the string.
637 * The masking trick does make the hash noticably faster
638 * for short strings (like English words).
648 c
+= k
[2] & 0xffffff;
671 b
+= k
[1] & 0xffffff;
690 a
+= k
[0] & 0xffffff;
702 /* zero length requires no mixing */
706 JHASH_FINAL(a
, b
, c
);
710 #if !defined(__i386__) && !defined(__x86_64__)
711 else if (ALIGNED16(key
)) {
712 #endif /* !defined(__i386__) && !defined(__x86_64__) */
713 /* read 16-bit chunks */
714 const u_int16_t
*k
= (const u_int16_t
*)key
;
717 /* all but last block: aligned reads and different mixing */
719 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
720 b
+= k
[2] + (((u_int32_t
)k
[3]) << 16);
721 c
+= k
[4] + (((u_int32_t
)k
[5]) << 16);
727 /* handle the last (probably partial) block */
728 k8
= (const u_int8_t
*)k
;
731 c
+= k
[4] + (((u_int32_t
)k
[5]) << 16);
732 b
+= k
[2] + (((u_int32_t
)k
[3]) << 16);
733 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
737 c
+= ((u_int32_t
)k8
[10]) << 16;
741 b
+= k
[2] + (((u_int32_t
)k
[3]) << 16);
742 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
749 b
+= k
[2] + (((u_int32_t
)k
[3]) << 16);
750 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
754 b
+= ((u_int32_t
)k8
[6]) << 16;
758 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
765 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
769 a
+= ((u_int32_t
)k8
[2]) << 16;
780 /* zero length requires no mixing */
784 JHASH_FINAL(a
, b
, c
);
787 #if !defined(__i386__) && !defined(__x86_64__)
790 /* need to read the key one byte at a time */
791 const u_int8_t
*k
= (const u_int8_t
*)key
;
793 /* all but the last block: affect some 32 bits of (a,b,c) */
796 a
+= ((u_int32_t
)k
[1]) << 8;
797 a
+= ((u_int32_t
)k
[2]) << 16;
798 a
+= ((u_int32_t
)k
[3]) << 24;
800 b
+= ((u_int32_t
)k
[5]) << 8;
801 b
+= ((u_int32_t
)k
[6]) << 16;
802 b
+= ((u_int32_t
)k
[7]) << 24;
804 c
+= ((u_int32_t
)k
[9]) << 8;
805 c
+= ((u_int32_t
)k
[10]) << 16;
806 c
+= ((u_int32_t
)k
[11]) << 24;
812 /* last block: affect all 32 bits of (c) */
815 c
+= ((u_int32_t
)k
[11]) << 24;
818 c
+= ((u_int32_t
)k
[10]) << 16;
821 c
+= ((u_int32_t
)k
[9]) << 8;
827 b
+= ((u_int32_t
)k
[7]) << 24;
830 b
+= ((u_int32_t
)k
[6]) << 16;
833 b
+= ((u_int32_t
)k
[5]) << 8;
839 a
+= ((u_int32_t
)k
[3]) << 24;
842 a
+= ((u_int32_t
)k
[2]) << 16;
845 a
+= ((u_int32_t
)k
[1]) << 8;
852 /* zero length requires no mixing */
856 JHASH_FINAL(a
, b
, c
);
859 #endif /* !defined(__i386__) && !defined(__x86_64__) */
861 #endif /* LITTLE_ENDIAN */