]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2011-2012 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | /* | |
30 | * http://code.google.com/p/smhasher/ | |
31 | * | |
32 | * Copyright (c) 2009-2011 Austin Appleby. | |
33 | * | |
34 | * MurmurHash3 was written by Austin Appleby, and is placed in the public | |
35 | * domain. The author hereby disclaims copyright to this source code. | |
36 | */ | |
37 | ||
38 | /* | |
39 | * http://burtleburtle.net/bob/hash/ | |
40 | * | |
41 | * lookup3.c, by Bob Jenkins, May 2006, Public Domain. | |
42 | * | |
43 | * You can use this free for any purpose. It's in the public domain. | |
44 | * It has no warranty. | |
45 | */ | |
46 | ||
47 | #include <stdbool.h> | |
48 | #include <sys/types.h> | |
49 | #include <machine/endian.h> | |
50 | #include <net/flowhash.h> | |
51 | ||
52 | static inline u_int32_t getblock32(const u_int32_t *, int); | |
53 | static inline u_int64_t getblock64(const u_int64_t *, int); | |
54 | static inline u_int32_t mh3_fmix32(u_int32_t); | |
55 | static inline u_int64_t mh3_fmix64(u_int64_t); | |
56 | ||
57 | #define ALIGNED16(v) ((((uintptr_t)(v)) & 1) == 0) | |
58 | #define ALIGNED32(v) ((((uintptr_t)(v)) & 3) == 0) | |
59 | #define ALIGNED64(v) ((((uintptr_t)(v)) & 7) == 0) | |
60 | ||
61 | #define ROTL32(x, r) (((x) << (r)) | ((x) >> (32 - (r)))) | |
62 | #define ROTL64(x, r) (((x) << (r)) | ((x) >> (64 - (r)))) | |
63 | ||
64 | /* | |
65 | * The following hash algorithms are selected based on performance: | |
66 | * | |
67 | * 64-bit: MurmurHash3_x64_128 | |
68 | * 32-bit: JHash | |
69 | */ | |
70 | #if defined(__LP64__) | |
71 | net_flowhash_fn_t *net_flowhash = net_flowhash_mh3_x64_128; | |
72 | #else /* !__LP64__ */ | |
73 | net_flowhash_fn_t *net_flowhash = net_flowhash_jhash; | |
74 | #endif /* !__LP64__ */ | |
75 | ||
76 | #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__) | |
77 | static inline u_int32_t | |
78 | getblock32(const u_int32_t *p, int i) | |
79 | { | |
80 | return p[i]; | |
81 | } | |
82 | ||
83 | static inline u_int64_t | |
84 | getblock64(const u_int64_t *p, int i) | |
85 | { | |
86 | return p[i]; | |
87 | } | |
88 | #else /* !__i386__ && !__x86_64__ && !__arm64__*/ | |
89 | static inline u_int32_t | |
90 | getblock32(const u_int32_t *p, int i) | |
91 | { | |
92 | const u_int8_t *bytes = (u_int8_t *)(void *)(uintptr_t)(p + i); | |
93 | u_int32_t value; | |
94 | ||
95 | if (ALIGNED32(p)) { | |
96 | value = p[i]; | |
97 | } else { | |
98 | #if BYTE_ORDER == BIG_ENDIAN | |
99 | value = | |
100 | (((u_int32_t)bytes[0]) << 24) | | |
101 | (((u_int32_t)bytes[1]) << 16) | | |
102 | (((u_int32_t)bytes[2]) << 8) | | |
103 | ((u_int32_t)bytes[3]); | |
104 | #else /* LITTLE_ENDIAN */ | |
105 | value = | |
106 | (((u_int32_t)bytes[3]) << 24) | | |
107 | (((u_int32_t)bytes[2]) << 16) | | |
108 | (((u_int32_t)bytes[1]) << 8) | | |
109 | ((u_int32_t)bytes[0]); | |
110 | #endif /* LITTLE_ENDIAN */ | |
111 | } | |
112 | return value; | |
113 | } | |
114 | ||
115 | static inline u_int64_t | |
116 | getblock64(const u_int64_t *p, int i) | |
117 | { | |
118 | const u_int8_t *bytes = (const u_int8_t *)(void *)(uintptr_t)(p + i); | |
119 | u_int64_t value; | |
120 | ||
121 | if (ALIGNED64(p)) { | |
122 | value = p[i]; | |
123 | } else { | |
124 | #if BYTE_ORDER == BIG_ENDIAN | |
125 | value = | |
126 | (((u_int64_t)bytes[0]) << 56) | | |
127 | (((u_int64_t)bytes[1]) << 48) | | |
128 | (((u_int64_t)bytes[2]) << 40) | | |
129 | (((u_int64_t)bytes[3]) << 32) | | |
130 | (((u_int64_t)bytes[4]) << 24) | | |
131 | (((u_int64_t)bytes[5]) << 16) | | |
132 | (((u_int64_t)bytes[6]) << 8) | | |
133 | ((u_int64_t)bytes[7]); | |
134 | #else /* LITTLE_ENDIAN */ | |
135 | value = | |
136 | (((u_int64_t)bytes[7]) << 56) | | |
137 | (((u_int64_t)bytes[6]) << 48) | | |
138 | (((u_int64_t)bytes[5]) << 40) | | |
139 | (((u_int64_t)bytes[4]) << 32) | | |
140 | (((u_int64_t)bytes[3]) << 24) | | |
141 | (((u_int64_t)bytes[2]) << 16) | | |
142 | (((u_int64_t)bytes[1]) << 8) | | |
143 | ((u_int64_t)bytes[0]); | |
144 | #endif /* LITTLE_ENDIAN */ | |
145 | } | |
146 | return value; | |
147 | } | |
148 | #endif /* !__i386__ && !__x86_64 && !__arm64__ */ | |
149 | ||
150 | static inline u_int32_t | |
151 | mh3_fmix32(u_int32_t h) | |
152 | { | |
153 | h ^= h >> 16; | |
154 | h *= 0x85ebca6b; | |
155 | h ^= h >> 13; | |
156 | h *= 0xc2b2ae35; | |
157 | h ^= h >> 16; | |
158 | ||
159 | return h; | |
160 | } | |
161 | ||
162 | static inline u_int64_t | |
163 | mh3_fmix64(u_int64_t k) | |
164 | { | |
165 | k ^= k >> 33; | |
166 | k *= 0xff51afd7ed558ccdLLU; | |
167 | k ^= k >> 33; | |
168 | k *= 0xc4ceb9fe1a85ec53LLU; | |
169 | k ^= k >> 33; | |
170 | ||
171 | return k; | |
172 | } | |
173 | ||
174 | /* | |
175 | * MurmurHash3_x86_32 | |
176 | */ | |
177 | #define MH3_X86_32_C1 0xcc9e2d51 | |
178 | #define MH3_X86_32_C2 0x1b873593 | |
179 | ||
180 | u_int32_t | |
181 | net_flowhash_mh3_x86_32(const void *key, u_int32_t len, const u_int32_t seed) | |
182 | { | |
183 | const u_int8_t *data = (const u_int8_t *)key; | |
184 | const u_int32_t nblocks = len / 4; | |
185 | const u_int32_t *blocks; | |
186 | const u_int8_t *tail; | |
187 | u_int32_t h1 = seed, k1; | |
188 | int i; | |
189 | ||
190 | /* body */ | |
191 | blocks = (const u_int32_t *)(const void *)(data + nblocks * 4); | |
192 | ||
193 | for (i = -nblocks; i; i++) { | |
194 | k1 = getblock32(blocks, i); | |
195 | ||
196 | k1 *= MH3_X86_32_C1; | |
197 | k1 = ROTL32(k1, 15); | |
198 | k1 *= MH3_X86_32_C2; | |
199 | ||
200 | h1 ^= k1; | |
201 | h1 = ROTL32(h1, 13); | |
202 | h1 = h1 * 5 + 0xe6546b64; | |
203 | } | |
204 | ||
205 | /* tail */ | |
206 | tail = (const u_int8_t *)(const void *)(data + nblocks * 4); | |
207 | k1 = 0; | |
208 | ||
209 | switch (len & 3) { | |
210 | case 3: | |
211 | k1 ^= tail[2] << 16; | |
212 | /* FALLTHRU */ | |
213 | case 2: | |
214 | k1 ^= tail[1] << 8; | |
215 | /* FALLTHRU */ | |
216 | case 1: | |
217 | k1 ^= tail[0]; | |
218 | k1 *= MH3_X86_32_C1; | |
219 | k1 = ROTL32(k1, 15); | |
220 | k1 *= MH3_X86_32_C2; | |
221 | h1 ^= k1; | |
222 | } | |
223 | ; | |
224 | ||
225 | /* finalization */ | |
226 | h1 ^= len; | |
227 | ||
228 | h1 = mh3_fmix32(h1); | |
229 | ||
230 | return h1; | |
231 | } | |
232 | ||
233 | /* | |
234 | * MurmurHash3_x64_128 | |
235 | */ | |
236 | #define MH3_X64_128_C1 0x87c37b91114253d5LLU | |
237 | #define MH3_X64_128_C2 0x4cf5ad432745937fLLU | |
238 | ||
239 | u_int32_t | |
240 | net_flowhash_mh3_x64_128(const void *key, u_int32_t len, const u_int32_t seed) | |
241 | { | |
242 | const u_int8_t *data = (const u_int8_t *)key; | |
243 | const u_int32_t nblocks = len / 16; | |
244 | const u_int64_t *blocks; | |
245 | const u_int8_t *tail; | |
246 | u_int64_t h1 = seed, k1; | |
247 | u_int64_t h2 = seed, k2; | |
248 | u_int32_t i; | |
249 | ||
250 | /* body */ | |
251 | blocks = (const u_int64_t *)(const void *)data; | |
252 | ||
253 | for (i = 0; i < nblocks; i++) { | |
254 | k1 = getblock64(blocks, i * 2 + 0); | |
255 | k2 = getblock64(blocks, i * 2 + 1); | |
256 | ||
257 | k1 *= MH3_X64_128_C1; | |
258 | #if defined(__x86_64__) | |
259 | __asm__ ( "rol $31, %[k1]\n\t" :[k1] "+r" (k1) : :); | |
260 | #elif defined(__arm64__) | |
261 | __asm__ ( "ror %[k1], %[k1], #(64-31)\n\t" :[k1] "+r" (k1) : :); | |
262 | #else /* !__x86_64__ && !__arm64__ */ | |
263 | k1 = ROTL64(k1, 31); | |
264 | #endif /* !__x86_64__ && !__arm64__ */ | |
265 | k1 *= MH3_X64_128_C2; | |
266 | h1 ^= k1; | |
267 | ||
268 | #if defined(__x86_64__) | |
269 | __asm__ ( "rol $27, %[h1]\n\t" :[h1] "+r" (h1) : :); | |
270 | #elif defined(__arm64__) | |
271 | __asm__ ( "ror %[h1], %[h1], #(64-27)\n\t" :[h1] "+r" (h1) : :); | |
272 | #else /* !__x86_64__ && !__arm64__ */ | |
273 | h1 = ROTL64(h1, 27); | |
274 | #endif /* !__x86_64__ && !__arm64__ */ | |
275 | h1 += h2; | |
276 | h1 = h1 * 5 + 0x52dce729; | |
277 | ||
278 | k2 *= MH3_X64_128_C2; | |
279 | #if defined(__x86_64__) | |
280 | __asm__ ( "rol $33, %[k2]\n\t" :[k2] "+r" (k2) : :); | |
281 | #elif defined(__arm64__) | |
282 | __asm__ ( "ror %[k2], %[k2], #(64-33)\n\t" :[k2] "+r" (k2) : :); | |
283 | #else /* !__x86_64__ && !__arm64__ */ | |
284 | k2 = ROTL64(k2, 33); | |
285 | #endif /* !__x86_64__ && !__arm64__ */ | |
286 | k2 *= MH3_X64_128_C1; | |
287 | h2 ^= k2; | |
288 | ||
289 | #if defined(__x86_64__) | |
290 | __asm__ ( "rol $31, %[h2]\n\t" :[h2] "+r" (h2) : :); | |
291 | #elif defined(__arm64__) | |
292 | __asm__ ( "ror %[h2], %[h2], #(64-31)\n\t" :[h2] "+r" (h2) : :); | |
293 | #else /* !__x86_64__ && !__arm64__ */ | |
294 | h2 = ROTL64(h2, 31); | |
295 | #endif /* !__x86_64__ && !__arm64__ */ | |
296 | h2 += h1; | |
297 | h2 = h2 * 5 + 0x38495ab5; | |
298 | } | |
299 | ||
300 | /* tail */ | |
301 | tail = (const u_int8_t *)(const void *)(data + nblocks * 16); | |
302 | k1 = 0; | |
303 | k2 = 0; | |
304 | ||
305 | switch (len & 15) { | |
306 | case 15: | |
307 | k2 ^= ((u_int64_t)tail[14]) << 48; | |
308 | /* FALLTHRU */ | |
309 | case 14: | |
310 | k2 ^= ((u_int64_t)tail[13]) << 40; | |
311 | /* FALLTHRU */ | |
312 | case 13: | |
313 | k2 ^= ((u_int64_t)tail[12]) << 32; | |
314 | /* FALLTHRU */ | |
315 | case 12: | |
316 | k2 ^= ((u_int64_t)tail[11]) << 24; | |
317 | /* FALLTHRU */ | |
318 | case 11: | |
319 | k2 ^= ((u_int64_t)tail[10]) << 16; | |
320 | /* FALLTHRU */ | |
321 | case 10: | |
322 | k2 ^= ((u_int64_t)tail[9]) << 8; | |
323 | /* FALLTHRU */ | |
324 | case 9: | |
325 | k2 ^= ((u_int64_t)tail[8]) << 0; | |
326 | k2 *= MH3_X64_128_C2; | |
327 | #if defined(__x86_64__) | |
328 | __asm__ ( "rol $33, %[k2]\n\t" :[k2] "+r" (k2) : :); | |
329 | #elif defined(__arm64__) | |
330 | __asm__ ( "ror %[k2], %[k2], #(64-33)\n\t" :[k2] "+r" (k2) : :); | |
331 | #else /* !__x86_64__ && !__arm64__ */ | |
332 | k2 = ROTL64(k2, 33); | |
333 | #endif /* !__x86_64__ && !__arm64__ */ | |
334 | k2 *= MH3_X64_128_C1; | |
335 | h2 ^= k2; | |
336 | /* FALLTHRU */ | |
337 | case 8: | |
338 | k1 ^= ((u_int64_t)tail[7]) << 56; | |
339 | /* FALLTHRU */ | |
340 | case 7: | |
341 | k1 ^= ((u_int64_t)tail[6]) << 48; | |
342 | /* FALLTHRU */ | |
343 | case 6: | |
344 | k1 ^= ((u_int64_t)tail[5]) << 40; | |
345 | /* FALLTHRU */ | |
346 | case 5: | |
347 | k1 ^= ((u_int64_t)tail[4]) << 32; | |
348 | /* FALLTHRU */ | |
349 | case 4: | |
350 | k1 ^= ((u_int64_t)tail[3]) << 24; | |
351 | /* FALLTHRU */ | |
352 | case 3: | |
353 | k1 ^= ((u_int64_t)tail[2]) << 16; | |
354 | /* FALLTHRU */ | |
355 | case 2: | |
356 | k1 ^= ((u_int64_t)tail[1]) << 8; | |
357 | /* FALLTHRU */ | |
358 | case 1: | |
359 | k1 ^= ((u_int64_t)tail[0]) << 0; | |
360 | k1 *= MH3_X64_128_C1; | |
361 | #if defined(__x86_64__) | |
362 | __asm__ ( "rol $31, %[k1]\n\t" :[k1] "+r" (k1) : :); | |
363 | #elif defined(__arm64__) | |
364 | __asm__ ( "ror %[k1], %[k1], #(64-31)\n\t" :[k1] "+r" (k1) : :); | |
365 | #else /* !__x86_64__ && !__arm64__ */ | |
366 | k1 = ROTL64(k1, 31); | |
367 | #endif /* !__x86_64__ && !__arm64__ */ | |
368 | k1 *= MH3_X64_128_C2; | |
369 | h1 ^= k1; | |
370 | } | |
371 | ; | |
372 | ||
373 | /* finalization */ | |
374 | h1 ^= len; | |
375 | h2 ^= len; | |
376 | ||
377 | h1 += h2; | |
378 | h2 += h1; | |
379 | ||
380 | h1 = mh3_fmix64(h1); | |
381 | h2 = mh3_fmix64(h2); | |
382 | ||
383 | h1 += h2; | |
384 | h2 += h1; | |
385 | ||
386 | /* throw all but lowest 32-bit */ | |
387 | return h1 & 0xffffffff; | |
388 | } | |
389 | ||
390 | #define JHASH_INIT 0xdeadbeef | |
391 | ||
392 | #define JHASH_MIX(a, b, c) { \ | |
393 | a -= c; a ^= ROTL32(c, 4); c += b; \ | |
394 | b -= a; b ^= ROTL32(a, 6); a += c; \ | |
395 | c -= b; c ^= ROTL32(b, 8); b += a; \ | |
396 | a -= c; a ^= ROTL32(c, 16); c += b; \ | |
397 | b -= a; b ^= ROTL32(a, 19); a += c; \ | |
398 | c -= b; c ^= ROTL32(b, 4); b += a; \ | |
399 | } | |
400 | ||
401 | #define JHASH_FINAL(a, b, c) { \ | |
402 | c ^= b; c -= ROTL32(b, 14); \ | |
403 | a ^= c; a -= ROTL32(c, 11); \ | |
404 | b ^= a; b -= ROTL32(a, 25); \ | |
405 | c ^= b; c -= ROTL32(b, 16); \ | |
406 | a ^= c; a -= ROTL32(c, 4); \ | |
407 | b ^= a; b -= ROTL32(a, 14); \ | |
408 | c ^= b; c -= ROTL32(b, 24); \ | |
409 | } | |
410 | ||
411 | #if BYTE_ORDER == BIG_ENDIAN | |
412 | /* | |
413 | * hashbig() | |
414 | */ | |
415 | u_int32_t | |
416 | net_flowhash_jhash(const void *key, u_int32_t len, const u_int32_t seed) | |
417 | { | |
418 | u_int32_t a, b, c; | |
419 | ||
420 | /* Set up the internal state */ | |
421 | a = b = c = JHASH_INIT + len + seed; | |
422 | ||
423 | if (ALIGNED32(key)) { | |
424 | /* read 32-bit chunks */ | |
425 | const u_int32_t *k = (const u_int32_t *)key; | |
426 | ||
427 | /* | |
428 | * all but last block: | |
429 | * aligned reads and affect 32 bits of (a,b,c) | |
430 | */ | |
431 | while (len > 12) { | |
432 | a += k[0]; | |
433 | b += k[1]; | |
434 | c += k[2]; | |
435 | JHASH_MIX(a, b, c); | |
436 | len -= 12; | |
437 | k += 3; | |
438 | } | |
439 | ||
440 | /* | |
441 | * handle the last (probably partial) block | |
442 | * | |
443 | * "k[2] << 8" actually reads beyond the end of the string, | |
444 | * but then shifts out the part it's not allowed to read. | |
445 | * Because the string is aligned, the illegal read is in | |
446 | * the same word as the rest of the string. The masking | |
447 | * trick does make the hash noticably faster for short | |
448 | * strings (like English words). | |
449 | */ | |
450 | switch (len) { | |
451 | case 12: | |
452 | c += k[2]; | |
453 | b += k[1]; | |
454 | a += k[0]; | |
455 | break; | |
456 | ||
457 | case 11: | |
458 | c += k[2] & 0xffffff00; | |
459 | b += k[1]; | |
460 | a += k[0]; | |
461 | break; | |
462 | ||
463 | case 10: | |
464 | c += k[2] & 0xffff0000; | |
465 | b += k[1]; | |
466 | a += k[0]; | |
467 | break; | |
468 | ||
469 | case 9: | |
470 | c += k[2] & 0xff000000; | |
471 | b += k[1]; | |
472 | a += k[0]; | |
473 | break; | |
474 | ||
475 | case 8: | |
476 | b += k[1]; | |
477 | a += k[0]; | |
478 | break; | |
479 | ||
480 | case 7: | |
481 | b += k[1] & 0xffffff00; | |
482 | a += k[0]; | |
483 | break; | |
484 | ||
485 | case 6: | |
486 | b += k[1] & 0xffff0000; | |
487 | a += k[0]; | |
488 | break; | |
489 | ||
490 | case 5: | |
491 | b += k[1] & 0xff000000; | |
492 | a += k[0]; | |
493 | break; | |
494 | ||
495 | case 4: | |
496 | a += k[0]; | |
497 | break; | |
498 | ||
499 | case 3: | |
500 | a += k[0] & 0xffffff00; | |
501 | break; | |
502 | ||
503 | case 2: | |
504 | a += k[0] & 0xffff0000; | |
505 | break; | |
506 | ||
507 | case 1: | |
508 | a += k[0] & 0xff000000; | |
509 | break; | |
510 | ||
511 | case 0: | |
512 | /* zero length requires no mixing */ | |
513 | return c; | |
514 | } | |
515 | ||
516 | JHASH_FINAL(a, b, c); | |
517 | ||
518 | return c; | |
519 | } | |
520 | ||
521 | /* need to read the key one byte at a time */ | |
522 | const u_int8_t *k = (const u_int8_t *)key; | |
523 | ||
524 | /* all but the last block: affect some 32 bits of (a,b,c) */ | |
525 | while (len > 12) { | |
526 | a += ((u_int32_t)k[0]) << 24; | |
527 | a += ((u_int32_t)k[1]) << 16; | |
528 | a += ((u_int32_t)k[2]) << 8; | |
529 | a += ((u_int32_t)k[3]); | |
530 | b += ((u_int32_t)k[4]) << 24; | |
531 | b += ((u_int32_t)k[5]) << 16; | |
532 | b += ((u_int32_t)k[6]) << 8; | |
533 | b += ((u_int32_t)k[7]); | |
534 | c += ((u_int32_t)k[8]) << 24; | |
535 | c += ((u_int32_t)k[9]) << 16; | |
536 | c += ((u_int32_t)k[10]) << 8; | |
537 | c += ((u_int32_t)k[11]); | |
538 | JHASH_MIX(a, b, c); | |
539 | len -= 12; | |
540 | k += 12; | |
541 | } | |
542 | ||
543 | /* last block: affect all 32 bits of (c) */ | |
544 | switch (len) { | |
545 | case 12: | |
546 | c += k[11]; | |
547 | /* FALLTHRU */ | |
548 | case 11: | |
549 | c += ((u_int32_t)k[10]) << 8; | |
550 | /* FALLTHRU */ | |
551 | case 10: | |
552 | c += ((u_int32_t)k[9]) << 16; | |
553 | /* FALLTHRU */ | |
554 | case 9: | |
555 | c += ((u_int32_t)k[8]) << 24; | |
556 | /* FALLTHRU */ | |
557 | case 8: | |
558 | b += k[7]; | |
559 | /* FALLTHRU */ | |
560 | case 7: | |
561 | b += ((u_int32_t)k[6]) << 8; | |
562 | /* FALLTHRU */ | |
563 | case 6: | |
564 | b += ((u_int32_t)k[5]) << 16; | |
565 | /* FALLTHRU */ | |
566 | case 5: | |
567 | b += ((u_int32_t)k[4]) << 24; | |
568 | /* FALLTHRU */ | |
569 | case 4: | |
570 | a += k[3]; | |
571 | /* FALLTHRU */ | |
572 | case 3: | |
573 | a += ((u_int32_t)k[2]) << 8; | |
574 | /* FALLTHRU */ | |
575 | case 2: | |
576 | a += ((u_int32_t)k[1]) << 16; | |
577 | /* FALLTHRU */ | |
578 | case 1: | |
579 | a += ((u_int32_t)k[0]) << 24; | |
580 | break; | |
581 | ||
582 | case 0: | |
583 | /* zero length requires no mixing */ | |
584 | return c; | |
585 | } | |
586 | ||
587 | JHASH_FINAL(a, b, c); | |
588 | ||
589 | return c; | |
590 | } | |
591 | #else /* LITTLE_ENDIAN */ | |
592 | /* | |
593 | * hashlittle() | |
594 | */ | |
595 | u_int32_t | |
596 | net_flowhash_jhash(const void *key, u_int32_t len, const u_int32_t seed) | |
597 | { | |
598 | u_int32_t a, b, c; | |
599 | ||
600 | /* Set up the internal state */ | |
601 | a = b = c = JHASH_INIT + len + seed; | |
602 | ||
603 | #if defined(__i386__) || defined(__x86_64__) | |
604 | /* | |
605 | * On i386/x86_64, it is faster to read 32-bit chunks if the key | |
606 | * is aligned 32-bit OR not 16-bit, and perform 16-bit reads if it | |
607 | * is aligned 16-bit. | |
608 | */ | |
609 | if (ALIGNED32(key) || !ALIGNED16(key)) { | |
610 | #else /* !defined(__i386__) && !defined(__x86_64__) */ | |
611 | if (ALIGNED32(key)) { | |
612 | #endif /* !defined(__i386__) && !defined(__x86_64__) */ | |
613 | /* read 32-bit chunks */ | |
614 | const u_int32_t *k = (const u_int32_t *)key; | |
615 | ||
616 | /* | |
617 | * all but last block: | |
618 | * aligned reads and affect 32 bits of (a,b,c) | |
619 | */ | |
620 | while (len > 12) { | |
621 | a += k[0]; | |
622 | b += k[1]; | |
623 | c += k[2]; | |
624 | JHASH_MIX(a, b, c); | |
625 | len -= 12; | |
626 | k += 3; | |
627 | } | |
628 | ||
629 | /* | |
630 | * handle the last (probably partial) block | |
631 | * | |
632 | * "k[2] & 0xffffff" actually reads beyond the end of the | |
633 | * string, but then masks off the part it's not allowed | |
634 | * to read. Because the string is aligned, the masked-off | |
635 | * tail is in the same word as the rest of the string. | |
636 | * The masking trick does make the hash noticably faster | |
637 | * for short strings (like English words). | |
638 | */ | |
639 | switch (len) { | |
640 | case 12: | |
641 | c += k[2]; | |
642 | b += k[1]; | |
643 | a += k[0]; | |
644 | break; | |
645 | ||
646 | case 11: | |
647 | c += k[2] & 0xffffff; | |
648 | b += k[1]; | |
649 | a += k[0]; | |
650 | break; | |
651 | ||
652 | case 10: | |
653 | c += k[2] & 0xffff; | |
654 | b += k[1]; | |
655 | a += k[0]; | |
656 | break; | |
657 | ||
658 | case 9: | |
659 | c += k[2] & 0xff; | |
660 | b += k[1]; | |
661 | a += k[0]; | |
662 | break; | |
663 | ||
664 | case 8: | |
665 | b += k[1]; | |
666 | a += k[0]; | |
667 | break; | |
668 | ||
669 | case 7: | |
670 | b += k[1] & 0xffffff; | |
671 | a += k[0]; | |
672 | break; | |
673 | ||
674 | case 6: | |
675 | b += k[1] & 0xffff; | |
676 | a += k[0]; | |
677 | break; | |
678 | ||
679 | case 5: | |
680 | b += k[1] & 0xff; | |
681 | a += k[0]; | |
682 | break; | |
683 | ||
684 | case 4: | |
685 | a += k[0]; | |
686 | break; | |
687 | ||
688 | case 3: | |
689 | a += k[0] & 0xffffff; | |
690 | break; | |
691 | ||
692 | case 2: | |
693 | a += k[0] & 0xffff; | |
694 | break; | |
695 | ||
696 | case 1: | |
697 | a += k[0] & 0xff; | |
698 | break; | |
699 | ||
700 | case 0: | |
701 | /* zero length requires no mixing */ | |
702 | return c; | |
703 | } | |
704 | ||
705 | JHASH_FINAL(a, b, c); | |
706 | ||
707 | return c; | |
708 | } | |
709 | #if !defined(__i386__) && !defined(__x86_64__) | |
710 | else if (ALIGNED16(key)) { | |
711 | #endif /* !defined(__i386__) && !defined(__x86_64__) */ | |
712 | /* read 16-bit chunks */ | |
713 | const u_int16_t *k = (const u_int16_t *)key; | |
714 | const u_int8_t *k8; | |
715 | ||
716 | /* all but last block: aligned reads and different mixing */ | |
717 | while (len > 12) { | |
718 | a += k[0] + (((u_int32_t)k[1]) << 16); | |
719 | b += k[2] + (((u_int32_t)k[3]) << 16); | |
720 | c += k[4] + (((u_int32_t)k[5]) << 16); | |
721 | JHASH_MIX(a, b, c); | |
722 | len -= 12; | |
723 | k += 6; | |
724 | } | |
725 | ||
726 | /* handle the last (probably partial) block */ | |
727 | k8 = (const u_int8_t *)k; | |
728 | switch (len) { | |
729 | case 12: | |
730 | c += k[4] + (((u_int32_t)k[5]) << 16); | |
731 | b += k[2] + (((u_int32_t)k[3]) << 16); | |
732 | a += k[0] + (((u_int32_t)k[1]) << 16); | |
733 | break; | |
734 | ||
735 | case 11: | |
736 | c += ((u_int32_t)k8[10]) << 16; | |
737 | /* FALLTHRU */ | |
738 | case 10: | |
739 | c += k[4]; | |
740 | b += k[2] + (((u_int32_t)k[3]) << 16); | |
741 | a += k[0] + (((u_int32_t)k[1]) << 16); | |
742 | break; | |
743 | ||
744 | case 9: | |
745 | c += k8[8]; | |
746 | /* FALLTHRU */ | |
747 | case 8: | |
748 | b += k[2] + (((u_int32_t)k[3]) << 16); | |
749 | a += k[0] + (((u_int32_t)k[1]) << 16); | |
750 | break; | |
751 | ||
752 | case 7: | |
753 | b += ((u_int32_t)k8[6]) << 16; | |
754 | /* FALLTHRU */ | |
755 | case 6: | |
756 | b += k[2]; | |
757 | a += k[0] + (((u_int32_t)k[1]) << 16); | |
758 | break; | |
759 | ||
760 | case 5: | |
761 | b += k8[4]; | |
762 | /* FALLTHRU */ | |
763 | case 4: | |
764 | a += k[0] + (((u_int32_t)k[1]) << 16); | |
765 | break; | |
766 | ||
767 | case 3: | |
768 | a += ((u_int32_t)k8[2]) << 16; | |
769 | /* FALLTHRU */ | |
770 | case 2: | |
771 | a += k[0]; | |
772 | break; | |
773 | ||
774 | case 1: | |
775 | a += k8[0]; | |
776 | break; | |
777 | ||
778 | case 0: | |
779 | /* zero length requires no mixing */ | |
780 | return c; | |
781 | } | |
782 | ||
783 | JHASH_FINAL(a, b, c); | |
784 | ||
785 | return c; | |
786 | #if !defined(__i386__) && !defined(__x86_64__) | |
787 | } | |
788 | ||
789 | /* need to read the key one byte at a time */ | |
790 | const u_int8_t *k = (const u_int8_t *)key; | |
791 | ||
792 | /* all but the last block: affect some 32 bits of (a,b,c) */ | |
793 | while (len > 12) { | |
794 | a += k[0]; | |
795 | a += ((u_int32_t)k[1]) << 8; | |
796 | a += ((u_int32_t)k[2]) << 16; | |
797 | a += ((u_int32_t)k[3]) << 24; | |
798 | b += k[4]; | |
799 | b += ((u_int32_t)k[5]) << 8; | |
800 | b += ((u_int32_t)k[6]) << 16; | |
801 | b += ((u_int32_t)k[7]) << 24; | |
802 | c += k[8]; | |
803 | c += ((u_int32_t)k[9]) << 8; | |
804 | c += ((u_int32_t)k[10]) << 16; | |
805 | c += ((u_int32_t)k[11]) << 24; | |
806 | JHASH_MIX(a, b, c); | |
807 | len -= 12; | |
808 | k += 12; | |
809 | } | |
810 | ||
811 | /* last block: affect all 32 bits of (c) */ | |
812 | switch (len) { | |
813 | case 12: | |
814 | c += ((u_int32_t)k[11]) << 24; | |
815 | /* FALLTHRU */ | |
816 | case 11: | |
817 | c += ((u_int32_t)k[10]) << 16; | |
818 | /* FALLTHRU */ | |
819 | case 10: | |
820 | c += ((u_int32_t)k[9]) << 8; | |
821 | /* FALLTHRU */ | |
822 | case 9: | |
823 | c += k[8]; | |
824 | /* FALLTHRU */ | |
825 | case 8: | |
826 | b += ((u_int32_t)k[7]) << 24; | |
827 | /* FALLTHRU */ | |
828 | case 7: | |
829 | b += ((u_int32_t)k[6]) << 16; | |
830 | /* FALLTHRU */ | |
831 | case 6: | |
832 | b += ((u_int32_t)k[5]) << 8; | |
833 | /* FALLTHRU */ | |
834 | case 5: | |
835 | b += k[4]; | |
836 | /* FALLTHRU */ | |
837 | case 4: | |
838 | a += ((u_int32_t)k[3]) << 24; | |
839 | /* FALLTHRU */ | |
840 | case 3: | |
841 | a += ((u_int32_t)k[2]) << 16; | |
842 | /* FALLTHRU */ | |
843 | case 2: | |
844 | a += ((u_int32_t)k[1]) << 8; | |
845 | /* FALLTHRU */ | |
846 | case 1: | |
847 | a += k[0]; | |
848 | break; | |
849 | ||
850 | case 0: | |
851 | /* zero length requires no mixing */ | |
852 | return c; | |
853 | } | |
854 | ||
855 | JHASH_FINAL(a, b, c); | |
856 | ||
857 | return c; | |
858 | #endif /* !defined(__i386__) && !defined(__x86_64__) */ | |
859 | } | |
860 | #endif /* LITTLE_ENDIAN */ |