]>
git.saurik.com Git - apple/xnu.git/blob - bsd/net/flowhash.c
e63462423482ba5a0fde662c3130bd775fd9a537
2 * Copyright (c) 2011 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * http://code.google.com/p/smhasher/
32 * Copyright (c) 2009-2011 Austin Appleby.
34 * MurmurHash3 was written by Austin Appleby, and is placed in the public
35 * domain. The author hereby disclaims copyright to this source code.
39 * http://burtleburtle.net/bob/hash/
41 * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
43 * You can use this free for any purpose. It's in the public domain.
48 #include <sys/types.h>
49 #include <machine/endian.h>
50 #include <net/flowhash.h>
52 static inline u_int32_t
getblock32(const u_int32_t
*, int);
53 static inline u_int64_t
getblock64(const u_int64_t
*, int);
54 static inline u_int32_t
mh3_fmix32(u_int32_t
);
55 static inline u_int64_t
mh3_fmix64(u_int64_t
);
57 #define ALIGNED16(v) ((((uintptr_t)(v)) & 1) == 0)
58 #define ALIGNED32(v) ((((uintptr_t)(v)) & 3) == 0)
59 #define ALIGNED64(v) ((((uintptr_t)(v)) & 7) == 0)
61 #define ROTL32(x, r) (((x) << (r)) | ((x) >> (32 - (r))))
62 #define ROTL64(x, r) (((x) << (r)) | ((x) >> (64 - (r))))
65 * The following hash algorithms are selected based on performance:
67 * Intel 32-bit: MurmurHash3_x86_32
68 * Intel 64-bit: MurmurHash3_x64_128
72 net_flowhash_fn_t
*net_flowhash
= net_flowhash_mh3_x86_32
;
73 #elif defined(__x86_64__)
74 net_flowhash_fn_t
*net_flowhash
= net_flowhash_mh3_x64_128
;
75 #else /* !__i386__ && !__x86_64__ */
76 net_flowhash_fn_t
*net_flowhash
= net_flowhash_jhash
;
77 #endif /* !__i386__ && !__x86_64__ */
79 #if defined(__i386__) || defined(__x86_64__)
80 static inline u_int32_t
81 getblock32(const u_int32_t
*p
, int i
)
86 static inline u_int64_t
87 getblock64(const u_int64_t
*p
, int i
)
91 #else /* !__i386__ && !__x86_64 */
92 static inline u_int32_t
93 getblock32(const u_int32_t
*p
, int i
)
95 const u_int8_t
*bytes
= (u_int8_t
*)(void *)(uintptr_t)(p
+ i
);
101 #if BYTE_ORDER == BIG_ENDIAN
103 (((u_int32_t
)bytes
[0]) << 24) |
104 (((u_int32_t
)bytes
[1]) << 16) |
105 (((u_int32_t
)bytes
[2]) << 8) |
106 ((u_int32_t
)bytes
[3]);
107 #else /* LITTLE_ENDIAN */
109 (((u_int32_t
)bytes
[3]) << 24) |
110 (((u_int32_t
)bytes
[2]) << 16) |
111 (((u_int32_t
)bytes
[1]) << 8) |
112 ((u_int32_t
)bytes
[0]);
113 #endif /* LITTLE_ENDIAN */
118 static inline u_int64_t
119 getblock64(const u_int64_t
*p
, int i
)
121 const u_int8_t
*bytes
= (const u_int8_t
*)(void *)(uintptr_t)(p
+ i
);
127 #if BYTE_ORDER == BIG_ENDIAN
129 (((u_int64_t
)bytes
[0]) << 56) |
130 (((u_int64_t
)bytes
[1]) << 48) |
131 (((u_int64_t
)bytes
[2]) << 40) |
132 (((u_int64_t
)bytes
[3]) << 32) |
133 (((u_int64_t
)bytes
[4]) << 24) |
134 (((u_int64_t
)bytes
[5]) << 16) |
135 (((u_int64_t
)bytes
[6]) << 8) |
136 ((u_int64_t
)bytes
[7]);
137 #else /* LITTLE_ENDIAN */
139 (((u_int64_t
)bytes
[7]) << 56) |
140 (((u_int64_t
)bytes
[6]) << 48) |
141 (((u_int64_t
)bytes
[5]) << 40) |
142 (((u_int64_t
)bytes
[4]) << 32) |
143 (((u_int64_t
)bytes
[3]) << 24) |
144 (((u_int64_t
)bytes
[2]) << 16) |
145 (((u_int64_t
)bytes
[1]) << 8) |
146 ((u_int64_t
)bytes
[0]);
147 #endif /* LITTLE_ENDIAN */
151 #endif /* !__i386__ && !__x86_64 */
153 static inline u_int32_t
154 mh3_fmix32(u_int32_t h
)
165 static inline u_int64_t
166 mh3_fmix64(u_int64_t k
)
169 k
*= 0xff51afd7ed558ccdLLU
;
171 k
*= 0xc4ceb9fe1a85ec53LLU
;
180 #define MH3_X86_32_C1 0xcc9e2d51
181 #define MH3_X86_32_C2 0x1b873593
184 net_flowhash_mh3_x86_32(const void *key
, u_int32_t len
, const u_int32_t seed
)
186 const u_int8_t
*data
= (const u_int8_t
*)key
;
187 const u_int32_t nblocks
= len
/ 4;
188 const u_int32_t
*blocks
;
189 const u_int8_t
*tail
;
190 u_int32_t h1
= seed
, k1
;
194 blocks
= (const u_int32_t
*)(const void *)(data
+ nblocks
* 4);
196 for (i
= -nblocks
; i
; i
++) {
197 k1
= getblock32(blocks
, i
);
205 h1
= h1
* 5 + 0xe6546b64;
209 tail
= (const u_int8_t
*)(const void *)(data
+ nblocks
* 4);
236 * MurmurHash3_x64_128
238 #define MH3_X64_128_C1 0x87c37b91114253d5LLU
239 #define MH3_X64_128_C2 0x4cf5ad432745937fLLU
242 net_flowhash_mh3_x64_128(const void *key
, u_int32_t len
, const u_int32_t seed
)
244 const u_int8_t
*data
= (const u_int8_t
*)key
;
245 const u_int32_t nblocks
= len
/ 16;
246 const u_int64_t
*blocks
;
247 const u_int8_t
*tail
;
248 u_int64_t h1
= seed
, k1
;
249 u_int64_t h2
= seed
, k2
;
253 blocks
= (const u_int64_t
*)(const void *)data
;
255 for (i
= 0; i
< nblocks
; i
++) {
256 k1
= getblock64(blocks
, i
* 2 + 0);
257 k2
= getblock64(blocks
, i
* 2 + 1);
259 k1
*= MH3_X64_128_C1
;
261 k1
*= MH3_X64_128_C2
;
266 h1
= h1
* 5 + 0x52dce729;
268 k2
*= MH3_X64_128_C2
;
270 k2
*= MH3_X64_128_C1
;
275 h2
= h2
* 5+ 0x38495ab5;
279 tail
= (const u_int8_t
*)(const void *)(data
+ nblocks
* 16);
285 k2
^= ((u_int64_t
)tail
[14]) << 48;
288 k2
^= ((u_int64_t
)tail
[13]) << 40;
291 k2
^= ((u_int64_t
)tail
[12]) << 32;
294 k2
^= ((u_int64_t
)tail
[11]) << 24;
297 k2
^= ((u_int64_t
)tail
[10]) << 16;
300 k2
^= ((u_int64_t
)tail
[9]) << 8;
303 k2
^= ((u_int64_t
)tail
[8]) << 0;
304 k2
*= MH3_X64_128_C2
;
306 k2
*= MH3_X64_128_C1
;
310 k1
^= ((u_int64_t
)tail
[7]) << 56;
313 k1
^= ((u_int64_t
)tail
[6]) << 48;
316 k1
^= ((u_int64_t
)tail
[5]) << 40;
319 k1
^= ((u_int64_t
)tail
[4]) << 32;
322 k1
^= ((u_int64_t
)tail
[3]) << 24;
325 k1
^= ((u_int64_t
)tail
[2]) << 16;
328 k1
^= ((u_int64_t
)tail
[1]) << 8;
331 k1
^= ((u_int64_t
)tail
[0]) << 0;
332 k1
*= MH3_X64_128_C1
;
334 k1
*= MH3_X64_128_C2
;
351 /* throw all but lowest 32-bit */
352 return (h1
& 0xffffffff);
355 #define JHASH_INIT 0xdeadbeef
357 #define JHASH_MIX(a, b, c) { \
358 a -= c; a ^= ROTL32(c, 4); c += b; \
359 b -= a; b ^= ROTL32(a, 6); a += c; \
360 c -= b; c ^= ROTL32(b, 8); b += a; \
361 a -= c; a ^= ROTL32(c, 16); c += b; \
362 b -= a; b ^= ROTL32(a, 19); a += c; \
363 c -= b; c ^= ROTL32(b, 4); b += a; \
366 #define JHASH_FINAL(a, b, c) { \
367 c ^= b; c -= ROTL32(b, 14); \
368 a ^= c; a -= ROTL32(c, 11); \
369 b ^= a; b -= ROTL32(a, 25); \
370 c ^= b; c -= ROTL32(b, 16); \
371 a ^= c; a -= ROTL32(c, 4); \
372 b ^= a; b -= ROTL32(a, 14); \
373 c ^= b; c -= ROTL32(b, 24); \
376 #if BYTE_ORDER == BIG_ENDIAN
381 net_flowhash_jhash(const void *key
, u_int32_t len
, const u_int32_t seed
)
385 /* Set up the internal state */
386 a
= b
= c
= JHASH_INIT
+ len
+ seed
;
388 if (ALIGNED32(key
)) {
389 /* read 32-bit chunks */
390 const u_int32_t
*k
= (const u_int32_t
*)key
;
393 * all but last block:
394 * aligned reads and affect 32 bits of (a,b,c)
406 * handle the last (probably partial) block
408 * "k[2] << 8" actually reads beyond the end of the string,
409 * but then shifts out the part it's not allowed to read.
410 * Because the string is aligned, the illegal read is in
411 * the same word as the rest of the string. The masking
412 * trick does make the hash noticably faster for short
413 * strings (like English words).
423 c
+= k
[2] & 0xffffff00;
429 c
+= k
[2] & 0xffff0000;
435 c
+= k
[2] & 0xff000000;
446 b
+= k
[1] & 0xffffff00;
451 b
+= k
[1] & 0xffff0000;
456 b
+= k
[1] & 0xff000000;
465 a
+= k
[0] & 0xffffff00;
469 a
+= k
[0] & 0xffff0000;
473 a
+= k
[0] & 0xff000000;
477 /* zero length requires no mixing */
481 JHASH_FINAL(a
, b
, c
);
486 /* need to read the key one byte at a time */
487 const u_int8_t
*k
= (const u_int8_t
*)key
;
489 /* all but the last block: affect some 32 bits of (a,b,c) */
491 a
+= ((u_int32_t
)k
[0]) << 24;
492 a
+= ((u_int32_t
)k
[1]) << 16;
493 a
+= ((u_int32_t
)k
[2]) << 8;
494 a
+= ((u_int32_t
)k
[3]);
495 b
+= ((u_int32_t
)k
[4]) << 24;
496 b
+= ((u_int32_t
)k
[5]) << 16;
497 b
+= ((u_int32_t
)k
[6]) << 8;
498 b
+= ((u_int32_t
)k
[7]);
499 c
+= ((u_int32_t
)k
[8]) << 24;
500 c
+= ((u_int32_t
)k
[9]) << 16;
501 c
+= ((u_int32_t
)k
[10]) << 8;
502 c
+= ((u_int32_t
)k
[11]);
508 /* last block: affect all 32 bits of (c) */
514 c
+= ((u_int32_t
)k
[10]) << 8;
517 c
+= ((u_int32_t
)k
[9]) << 16;
520 c
+= ((u_int32_t
)k
[8]) << 24;
526 b
+= ((u_int32_t
)k
[6]) << 8;
529 b
+= ((u_int32_t
)k
[5]) << 16;
532 b
+= ((u_int32_t
)k
[4]) << 24;
538 a
+= ((u_int32_t
)k
[2]) << 8;
541 a
+= ((u_int32_t
)k
[1]) << 16;
544 a
+= ((u_int32_t
)k
[0]) << 24;
548 /* zero length requires no mixing */
552 JHASH_FINAL(a
, b
, c
);
556 #else /* LITTLE_ENDIAN */
561 net_flowhash_jhash(const void *key
, u_int32_t len
, const u_int32_t seed
)
565 /* Set up the internal state */
566 a
= b
= c
= JHASH_INIT
+ len
+ seed
;
568 #if defined(__i386__) || defined(__x86_64__)
570 * On i386/x86_64, it is faster to read 32-bit chunks if the key
571 * is aligned 32-bit OR not 16-bit, and perform 16-bit reads if it
574 if (ALIGNED32(key
) || !ALIGNED16(key
)) {
575 #else /* !defined(__i386__) && !defined(__x86_64__) */
576 if (ALIGNED32(key
)) {
577 #endif /* !defined(__i386__) && !defined(__x86_64__) */
578 /* read 32-bit chunks */
579 const u_int32_t
*k
= (const u_int32_t
*)key
;
582 * all but last block:
583 * aligned reads and affect 32 bits of (a,b,c)
595 * handle the last (probably partial) block
597 * "k[2] & 0xffffff" actually reads beyond the end of the
598 * string, but then masks off the part it's not allowed
599 * to read. Because the string is aligned, the masked-off
600 * tail is in the same word as the rest of the string.
601 * The masking trick does make the hash noticably faster
602 * for short strings (like English words).
612 c
+= k
[2] & 0xffffff;
635 b
+= k
[1] & 0xffffff;
654 a
+= k
[0] & 0xffffff;
666 /* zero length requires no mixing */
670 JHASH_FINAL(a
, b
, c
);
674 #if !defined(__i386__) && !defined(__x86_64__)
675 else if (ALIGNED16(key
)) {
676 #endif /* !defined(__i386__) && !defined(__x86_64__) */
677 /* read 16-bit chunks */
678 const u_int16_t
*k
= (const u_int16_t
*)key
;
681 /* all but last block: aligned reads and different mixing */
683 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
684 b
+= k
[2] + (((u_int32_t
)k
[3]) << 16);
685 c
+= k
[4] + (((u_int32_t
)k
[5]) << 16);
691 /* handle the last (probably partial) block */
692 k8
= (const u_int8_t
*)k
;
695 c
+= k
[4] + (((u_int32_t
)k
[5]) << 16);
696 b
+= k
[2] + (((u_int32_t
)k
[3]) << 16);
697 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
701 c
+= ((u_int32_t
)k8
[10]) << 16;
705 b
+= k
[2] + (((u_int32_t
)k
[3]) << 16);
706 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
713 b
+= k
[2] + (((u_int32_t
)k
[3]) << 16);
714 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
718 b
+= ((u_int32_t
)k8
[6]) << 16;
722 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
729 a
+= k
[0] + (((u_int32_t
)k
[1]) << 16);
733 a
+= ((u_int32_t
)k8
[2]) << 16;
744 /* zero length requires no mixing */
748 JHASH_FINAL(a
, b
, c
);
751 #if !defined(__i386__) && !defined(__x86_64__)
754 /* need to read the key one byte at a time */
755 const u_int8_t
*k
= (const u_int8_t
*)key
;
757 /* all but the last block: affect some 32 bits of (a,b,c) */
760 a
+= ((u_int32_t
)k
[1]) << 8;
761 a
+= ((u_int32_t
)k
[2]) << 16;
762 a
+= ((u_int32_t
)k
[3]) << 24;
764 b
+= ((u_int32_t
)k
[5]) << 8;
765 b
+= ((u_int32_t
)k
[6]) << 16;
766 b
+= ((u_int32_t
)k
[7]) << 24;
768 c
+= ((u_int32_t
)k
[9]) << 8;
769 c
+= ((u_int32_t
)k
[10]) << 16;
770 c
+= ((u_int32_t
)k
[11]) << 24;
776 /* last block: affect all 32 bits of (c) */
779 c
+= ((u_int32_t
)k
[11]) << 24;
782 c
+= ((u_int32_t
)k
[10]) << 16;
785 c
+= ((u_int32_t
)k
[9]) << 8;
791 b
+= ((u_int32_t
)k
[7]) << 24;
794 b
+= ((u_int32_t
)k
[6]) << 16;
797 b
+= ((u_int32_t
)k
[5]) << 8;
803 a
+= ((u_int32_t
)k
[3]) << 24;
806 a
+= ((u_int32_t
)k
[2]) << 16;
809 a
+= ((u_int32_t
)k
[1]) << 8;
816 /* zero length requires no mixing */
820 JHASH_FINAL(a
, b
, c
);
823 #endif /* !defined(__i386__) && !defined(__x86_64__) */
825 #endif /* LITTLE_ENDIAN */