]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
5ba3f43e | 2 | * Copyright (c) 2000-2017 Apple Inc. All rights reserved. |
5d5c5d0d | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
1c79356b | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
8f6c56a5 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
8f6c56a5 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* | |
29 | * Copyright (c) 1988, 1992, 1993 | |
30 | * The Regents of the University of California. All rights reserved. | |
31 | * | |
32 | * Redistribution and use in source and binary forms, with or without | |
33 | * modification, are permitted provided that the following conditions | |
34 | * are met: | |
35 | * 1. Redistributions of source code must retain the above copyright | |
36 | * notice, this list of conditions and the following disclaimer. | |
37 | * 2. Redistributions in binary form must reproduce the above copyright | |
38 | * notice, this list of conditions and the following disclaimer in the | |
39 | * documentation and/or other materials provided with the distribution. | |
40 | * 3. All advertising materials mentioning features or use of this software | |
41 | * must display the following acknowledgement: | |
42 | * This product includes software developed by the University of | |
43 | * California, Berkeley and its contributors. | |
44 | * 4. Neither the name of the University nor the names of its contributors | |
45 | * may be used to endorse or promote products derived from this software | |
46 | * without specific prior written permission. | |
47 | * | |
48 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
49 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
50 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
51 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
52 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
53 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
54 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
55 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
56 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
57 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
58 | * SUCH DAMAGE. | |
59 | * | |
60 | * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 | |
61 | */ | |
62 | ||
63 | #include <sys/param.h> | |
39236c6e | 64 | #include <machine/endian.h> |
1c79356b | 65 | #include <sys/mbuf.h> |
2d21ac55 | 66 | #include <kern/debug.h> |
39236c6e | 67 | #include <net/dlil.h> |
2d21ac55 | 68 | #include <netinet/in.h> |
39236c6e | 69 | #define _IP_VHL |
2d21ac55 | 70 | #include <netinet/ip.h> |
39236c6e | 71 | #include <netinet/ip_var.h> |
1c79356b A |
72 | |
73 | /* | |
74 | * Checksum routine for Internet Protocol family headers (Portable Version). | |
75 | * | |
76 | * This routine is very heavily used in the network | |
77 | * code and should be modified for each CPU to be as fast as possible. | |
78 | */ | |
39236c6e A |
79 | #define REDUCE16 { \ |
80 | q_util.q = sum; \ | |
81 | l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ | |
82 | sum = l_util.s[0] + l_util.s[1]; \ | |
83 | ADDCARRY(sum); \ | |
84 | } | |
1c79356b | 85 | |
0b4e3aa0 | 86 | union l_util { |
39236c6e A |
87 | uint16_t s[2]; |
88 | uint32_t l; | |
0b4e3aa0 | 89 | }; |
1c79356b | 90 | |
0b4e3aa0 | 91 | union q_util { |
39236c6e A |
92 | uint16_t s[4]; |
93 | uint32_t l[2]; | |
94 | uint64_t q; | |
2d21ac55 | 95 | }; |
0b4e3aa0 | 96 | |
5ba3f43e | 97 | extern uint32_t os_cpu_in_cksum(const void *, uint32_t, uint32_t); |
0b4e3aa0 | 98 | |
39236c6e A |
99 | /* |
100 | * Perform 16-bit 1's complement sum on a contiguous span. | |
101 | */ | |
102 | uint16_t | |
103 | b_sum16(const void *buf, int len) | |
104 | { | |
5ba3f43e | 105 | return (os_cpu_in_cksum(buf, len, 0)); |
39236c6e | 106 | } |
2d21ac55 | 107 | |
39236c6e A |
108 | uint16_t inet_cksum_simple(struct mbuf *, int); |
109 | /* | |
110 | * For the exported _in_cksum symbol in BSDKernel symbol set. | |
111 | */ | |
112 | uint16_t | |
2d21ac55 A |
113 | inet_cksum_simple(struct mbuf *m, int len) |
114 | { | |
115 | return (inet_cksum(m, 0, 0, len)); | |
116 | } | |
1c79356b | 117 | |
39236c6e A |
118 | uint16_t |
119 | in_addword(uint16_t a, uint16_t b) | |
0b4e3aa0 | 120 | { |
39236c6e | 121 | uint64_t sum = a + b; |
2d21ac55 | 122 | |
39236c6e | 123 | ADDCARRY(sum); |
0b4e3aa0 A |
124 | return (sum); |
125 | } | |
126 | ||
39236c6e A |
127 | uint16_t |
128 | in_pseudo(uint32_t a, uint32_t b, uint32_t c) | |
0b4e3aa0 | 129 | { |
39236c6e | 130 | uint64_t sum; |
0b4e3aa0 | 131 | union q_util q_util; |
2d21ac55 | 132 | union l_util l_util; |
0b4e3aa0 | 133 | |
39236c6e | 134 | sum = (uint64_t)a + b + c; |
0b4e3aa0 A |
135 | REDUCE16; |
136 | return (sum); | |
0b4e3aa0 A |
137 | } |
138 | ||
39236c6e A |
139 | uint16_t |
140 | in_pseudo64(uint64_t a, uint64_t b, uint64_t c) | |
1c79356b | 141 | { |
39236c6e A |
142 | uint64_t sum; |
143 | union q_util q_util; | |
2d21ac55 | 144 | union l_util l_util; |
1c79356b | 145 | |
39236c6e A |
146 | sum = a + b + c; |
147 | REDUCE16; | |
148 | return (sum); | |
149 | } | |
1c79356b | 150 | |
39236c6e A |
151 | /* |
152 | * May be used on IP header with options. | |
153 | */ | |
154 | uint16_t | |
155 | in_cksum_hdr_opt(const struct ip *ip) | |
156 | { | |
157 | return (~b_sum16(ip, (IP_VHL_HL(ip->ip_vhl) << 2)) & 0xffff); | |
158 | } | |
159 | ||
160 | /* | |
161 | * A wrapper around the simple in_cksum_hdr() and the more complicated | |
162 | * inet_cksum(); the former is chosen if the IP header is simple, | |
163 | * contiguous and 32-bit aligned. Also does some stats accounting. | |
164 | */ | |
165 | uint16_t | |
166 | ip_cksum_hdr_dir(struct mbuf *m, uint32_t hlen, int out) | |
167 | { | |
168 | struct ip *ip = mtod(m, struct ip *); | |
169 | ||
170 | if (out) { | |
171 | ipstat.ips_snd_swcsum++; | |
172 | ipstat.ips_snd_swcsum_bytes += hlen; | |
173 | } else { | |
174 | ipstat.ips_rcv_swcsum++; | |
175 | ipstat.ips_rcv_swcsum_bytes += hlen; | |
0b4e3aa0 | 176 | } |
0b4e3aa0 | 177 | |
39236c6e A |
178 | if (hlen == sizeof (*ip) && |
179 | m->m_len >= sizeof (*ip) && IP_HDR_ALIGNED_P(ip)) | |
180 | return (in_cksum_hdr(ip)); | |
0b4e3aa0 | 181 | |
39236c6e A |
182 | return (inet_cksum(m, 0, 0, hlen)); |
183 | } | |
2d21ac55 | 184 | |
5ba3f43e A |
185 | uint16_t |
186 | ip_cksum_hdr_dir_buffer(const void *buffer, uint32_t hlen, uint32_t len, | |
187 | int out) | |
188 | { | |
189 | const struct ip *ip = buffer; | |
190 | ||
191 | if (out) { | |
192 | ipstat.ips_snd_swcsum++; | |
193 | ipstat.ips_snd_swcsum_bytes += hlen; | |
194 | } else { | |
195 | ipstat.ips_rcv_swcsum++; | |
196 | ipstat.ips_rcv_swcsum_bytes += hlen; | |
197 | } | |
198 | ||
199 | if (hlen == sizeof (*ip) && | |
200 | len >= sizeof (*ip) && IP_HDR_ALIGNED_P(ip)) | |
201 | return (in_cksum_hdr(ip)); | |
202 | ||
203 | return (inet_cksum_buffer(buffer, 0, 0, hlen)); | |
204 | } | |
205 | ||
39236c6e A |
206 | /* |
207 | * m MUST contain at least an IP header, if nxt is specified; | |
208 | * nxt is the upper layer protocol number; | |
209 | * off is an offset where TCP/UDP/ICMP header starts; | |
210 | * len is a total length of a transport segment (e.g. TCP header + TCP payload) | |
211 | */ | |
212 | uint16_t | |
213 | inet_cksum(struct mbuf *m, uint32_t nxt, uint32_t off, uint32_t len) | |
214 | { | |
215 | uint32_t sum; | |
0b4e3aa0 | 216 | |
39236c6e A |
217 | sum = m_sum16(m, off, len); |
218 | ||
219 | /* include pseudo header checksum? */ | |
220 | if (nxt != 0) { | |
221 | struct ip *ip; | |
222 | unsigned char buf[sizeof ((*ip))] __attribute__((aligned(8))); | |
223 | uint32_t mlen; | |
0b4e3aa0 | 224 | |
0b4e3aa0 | 225 | /* |
39236c6e A |
226 | * Sanity check |
227 | * | |
228 | * Use m_length2() instead of m_length(), as we cannot rely on | |
229 | * the caller setting m_pkthdr.len correctly, if the mbuf is | |
230 | * a M_PKTHDR one. | |
0b4e3aa0 | 231 | */ |
39236c6e A |
232 | if ((mlen = m_length2(m, NULL)) < sizeof (*ip)) { |
233 | panic("%s: mbuf %p too short (%d) for IPv4 header", | |
234 | __func__, m, mlen); | |
235 | /* NOTREACHED */ | |
0b4e3aa0 | 236 | } |
39236c6e | 237 | |
0b4e3aa0 | 238 | /* |
39236c6e A |
239 | * In case the IP header is not contiguous, or not 32-bit |
240 | * aligned, copy it to a local buffer. Note here that we | |
241 | * expect the data pointer to point to the IP header. | |
0b4e3aa0 | 242 | */ |
39236c6e A |
243 | if ((sizeof (*ip) > m->m_len) || |
244 | !IP_HDR_ALIGNED_P(mtod(m, caddr_t))) { | |
245 | m_copydata(m, 0, sizeof (*ip), (caddr_t)buf); | |
246 | ip = (struct ip *)(void *)buf; | |
247 | } else { | |
248 | ip = (struct ip *)(void *)(m->m_data); | |
1c79356b | 249 | } |
39236c6e A |
250 | |
251 | /* add pseudo header checksum */ | |
252 | sum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, | |
253 | htonl(len + nxt)); | |
254 | ||
255 | /* fold in carry bits */ | |
256 | ADDCARRY(sum); | |
1c79356b | 257 | } |
39236c6e | 258 | |
1c79356b A |
259 | return (~sum & 0xffff); |
260 | } | |
5ba3f43e A |
261 | |
262 | /* | |
263 | * buffer MUST contain at least an IP header, if nxt is specified; | |
264 | * nxt is the upper layer protocol number; | |
265 | * off is an offset where TCP/UDP/ICMP header starts; | |
266 | * len is a total length of a transport segment (e.g. TCP header + TCP payload) | |
267 | */ | |
268 | uint16_t | |
269 | inet_cksum_buffer(const void *buffer, uint32_t nxt, uint32_t off, | |
270 | uint32_t len) | |
271 | { | |
272 | uint32_t sum; | |
273 | ||
274 | if (off >= len) | |
275 | panic("%s: off (%d) >= len (%d)", __func__, off, len); | |
276 | ||
277 | sum = b_sum16(&((const uint8_t *)buffer)[off], len); | |
278 | ||
279 | /* include pseudo header checksum? */ | |
280 | if (nxt != 0) { | |
281 | const struct ip *ip; | |
282 | unsigned char buf[sizeof ((*ip))] __attribute__((aligned(8))); | |
283 | ||
284 | /* | |
285 | * In case the IP header is not contiguous, or not 32-bit | |
286 | * aligned, copy it to a local buffer. Note here that we | |
287 | * expect the data pointer to point to the IP header. | |
288 | */ | |
289 | if (!IP_HDR_ALIGNED_P(buffer)) { | |
290 | memcpy(buf, buffer, sizeof (*ip)); | |
291 | ip = (const struct ip *)(const void *)buf; | |
292 | } else { | |
293 | ip = (const struct ip *)buffer; | |
294 | } | |
295 | ||
296 | /* add pseudo header checksum */ | |
297 | sum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, | |
298 | htonl(len + nxt)); | |
299 | ||
300 | /* fold in carry bits */ | |
301 | ADDCARRY(sum); | |
302 | } | |
303 | ||
304 | return (~sum & 0xffff); | |
305 | } | |
306 | ||
307 | #if DEBUG || DEVELOPMENT | |
5ba3f43e A |
308 | #include <pexpert/pexpert.h> |
309 | ||
310 | #define CKSUM_ERR kprintf | |
311 | ||
312 | /* | |
313 | * The following routines implement the portable, reference implementation | |
314 | * of os_cpu_in_cksum_mbuf(). This is currently used only for validating | |
315 | * the correctness of the platform-specific implementation, at boot time | |
316 | * in dlil_verify_sum16(). It returns the 32-bit accumulator without doing | |
317 | * a 1's complement on it. | |
318 | */ | |
319 | #if !defined(__LP64__) | |
320 | /* 32-bit version */ | |
321 | uint32_t | |
322 | in_cksum_mbuf_ref(struct mbuf *m, int len, int off, uint32_t initial_sum) | |
323 | { | |
324 | int mlen; | |
325 | uint32_t sum, partial; | |
326 | unsigned int final_acc; | |
327 | uint8_t *data; | |
328 | boolean_t needs_swap, started_on_odd; | |
329 | ||
330 | VERIFY(len >= 0); | |
331 | VERIFY(off >= 0); | |
332 | ||
333 | needs_swap = FALSE; | |
334 | started_on_odd = FALSE; | |
335 | sum = (initial_sum >> 16) + (initial_sum & 0xffff); | |
336 | ||
337 | for (;;) { | |
338 | if (__improbable(m == NULL)) { | |
339 | CKSUM_ERR("%s: out of data\n", __func__); | |
340 | return ((uint32_t)-1); | |
341 | } | |
342 | mlen = m->m_len; | |
343 | if (mlen > off) { | |
344 | mlen -= off; | |
345 | data = mtod(m, uint8_t *) + off; | |
346 | goto post_initial_offset; | |
347 | } | |
348 | off -= mlen; | |
349 | if (len == 0) | |
350 | break; | |
351 | m = m->m_next; | |
352 | } | |
353 | ||
354 | for (; len > 0; m = m->m_next) { | |
355 | if (__improbable(m == NULL)) { | |
356 | CKSUM_ERR("%s: out of data\n", __func__); | |
357 | return ((uint32_t)-1); | |
358 | } | |
359 | mlen = m->m_len; | |
360 | data = mtod(m, uint8_t *); | |
361 | post_initial_offset: | |
362 | if (mlen == 0) | |
363 | continue; | |
364 | if (mlen > len) | |
365 | mlen = len; | |
366 | len -= mlen; | |
367 | ||
368 | partial = 0; | |
369 | if ((uintptr_t)data & 1) { | |
370 | /* Align on word boundary */ | |
371 | started_on_odd = !started_on_odd; | |
372 | #if BYTE_ORDER == LITTLE_ENDIAN | |
373 | partial = *data << 8; | |
374 | #else /* BYTE_ORDER != LITTLE_ENDIAN */ | |
375 | partial = *data; | |
376 | #endif /* BYTE_ORDER != LITTLE_ENDIAN */ | |
377 | ++data; | |
378 | --mlen; | |
379 | } | |
380 | needs_swap = started_on_odd; | |
381 | while (mlen >= 32) { | |
382 | __builtin_prefetch(data + 32); | |
383 | partial += *(uint16_t *)(void *)data; | |
384 | partial += *(uint16_t *)(void *)(data + 2); | |
385 | partial += *(uint16_t *)(void *)(data + 4); | |
386 | partial += *(uint16_t *)(void *)(data + 6); | |
387 | partial += *(uint16_t *)(void *)(data + 8); | |
388 | partial += *(uint16_t *)(void *)(data + 10); | |
389 | partial += *(uint16_t *)(void *)(data + 12); | |
390 | partial += *(uint16_t *)(void *)(data + 14); | |
391 | partial += *(uint16_t *)(void *)(data + 16); | |
392 | partial += *(uint16_t *)(void *)(data + 18); | |
393 | partial += *(uint16_t *)(void *)(data + 20); | |
394 | partial += *(uint16_t *)(void *)(data + 22); | |
395 | partial += *(uint16_t *)(void *)(data + 24); | |
396 | partial += *(uint16_t *)(void *)(data + 26); | |
397 | partial += *(uint16_t *)(void *)(data + 28); | |
398 | partial += *(uint16_t *)(void *)(data + 30); | |
399 | data += 32; | |
400 | mlen -= 32; | |
401 | if (__improbable(partial & 0xc0000000)) { | |
402 | if (needs_swap) | |
403 | partial = (partial << 8) + | |
404 | (partial >> 24); | |
405 | sum += (partial >> 16); | |
406 | sum += (partial & 0xffff); | |
407 | partial = 0; | |
408 | } | |
409 | } | |
410 | if (mlen & 16) { | |
411 | partial += *(uint16_t *)(void *)data; | |
412 | partial += *(uint16_t *)(void *)(data + 2); | |
413 | partial += *(uint16_t *)(void *)(data + 4); | |
414 | partial += *(uint16_t *)(void *)(data + 6); | |
415 | partial += *(uint16_t *)(void *)(data + 8); | |
416 | partial += *(uint16_t *)(void *)(data + 10); | |
417 | partial += *(uint16_t *)(void *)(data + 12); | |
418 | partial += *(uint16_t *)(void *)(data + 14); | |
419 | data += 16; | |
420 | mlen -= 16; | |
421 | } | |
422 | /* | |
423 | * mlen is not updated below as the remaining tests | |
424 | * are using bit masks, which are not affected. | |
425 | */ | |
426 | if (mlen & 8) { | |
427 | partial += *(uint16_t *)(void *)data; | |
428 | partial += *(uint16_t *)(void *)(data + 2); | |
429 | partial += *(uint16_t *)(void *)(data + 4); | |
430 | partial += *(uint16_t *)(void *)(data + 6); | |
431 | data += 8; | |
432 | } | |
433 | if (mlen & 4) { | |
434 | partial += *(uint16_t *)(void *)data; | |
435 | partial += *(uint16_t *)(void *)(data + 2); | |
436 | data += 4; | |
437 | } | |
438 | if (mlen & 2) { | |
439 | partial += *(uint16_t *)(void *)data; | |
440 | data += 2; | |
441 | } | |
442 | if (mlen & 1) { | |
443 | #if BYTE_ORDER == LITTLE_ENDIAN | |
444 | partial += *data; | |
445 | #else /* BYTE_ORDER != LITTLE_ENDIAN */ | |
446 | partial += *data << 8; | |
447 | #endif /* BYTE_ORDER != LITTLE_ENDIAN */ | |
448 | started_on_odd = !started_on_odd; | |
449 | } | |
450 | ||
451 | if (needs_swap) | |
452 | partial = (partial << 8) + (partial >> 24); | |
453 | sum += (partial >> 16) + (partial & 0xffff); | |
454 | /* | |
455 | * Reduce sum to allow potential byte swap | |
456 | * in the next iteration without carry. | |
457 | */ | |
458 | sum = (sum >> 16) + (sum & 0xffff); | |
459 | } | |
460 | final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff); | |
461 | final_acc = (final_acc >> 16) + (final_acc & 0xffff); | |
462 | return (final_acc & 0xffff); | |
463 | } | |
464 | ||
465 | #else /* __LP64__ */ | |
466 | /* 64-bit version */ | |
467 | uint32_t | |
468 | in_cksum_mbuf_ref(struct mbuf *m, int len, int off, uint32_t initial_sum) | |
469 | { | |
470 | int mlen; | |
471 | uint64_t sum, partial; | |
472 | unsigned int final_acc; | |
473 | uint8_t *data; | |
474 | boolean_t needs_swap, started_on_odd; | |
475 | ||
476 | VERIFY(len >= 0); | |
477 | VERIFY(off >= 0); | |
478 | ||
479 | needs_swap = FALSE; | |
480 | started_on_odd = FALSE; | |
481 | sum = initial_sum; | |
482 | ||
483 | for (;;) { | |
484 | if (__improbable(m == NULL)) { | |
485 | CKSUM_ERR("%s: out of data\n", __func__); | |
486 | return ((uint32_t)-1); | |
487 | } | |
488 | mlen = m->m_len; | |
489 | if (mlen > off) { | |
490 | mlen -= off; | |
491 | data = mtod(m, uint8_t *) + off; | |
492 | goto post_initial_offset; | |
493 | } | |
494 | off -= mlen; | |
495 | if (len == 0) | |
496 | break; | |
497 | m = m->m_next; | |
498 | } | |
499 | ||
500 | for (; len > 0; m = m->m_next) { | |
501 | if (__improbable(m == NULL)) { | |
502 | CKSUM_ERR("%s: out of data\n", __func__); | |
503 | return ((uint32_t)-1); | |
504 | } | |
505 | mlen = m->m_len; | |
506 | data = mtod(m, uint8_t *); | |
507 | post_initial_offset: | |
508 | if (mlen == 0) | |
509 | continue; | |
510 | if (mlen > len) | |
511 | mlen = len; | |
512 | len -= mlen; | |
513 | ||
514 | partial = 0; | |
515 | if ((uintptr_t)data & 1) { | |
516 | /* Align on word boundary */ | |
517 | started_on_odd = !started_on_odd; | |
518 | #if BYTE_ORDER == LITTLE_ENDIAN | |
519 | partial = *data << 8; | |
520 | #else /* BYTE_ORDER != LITTLE_ENDIAN */ | |
521 | partial = *data; | |
522 | #endif /* BYTE_ORDER != LITTLE_ENDIAN */ | |
523 | ++data; | |
524 | --mlen; | |
525 | } | |
526 | needs_swap = started_on_odd; | |
527 | if ((uintptr_t)data & 2) { | |
528 | if (mlen < 2) | |
529 | goto trailing_bytes; | |
530 | partial += *(uint16_t *)(void *)data; | |
531 | data += 2; | |
532 | mlen -= 2; | |
533 | } | |
534 | while (mlen >= 64) { | |
535 | __builtin_prefetch(data + 32); | |
536 | __builtin_prefetch(data + 64); | |
537 | partial += *(uint32_t *)(void *)data; | |
538 | partial += *(uint32_t *)(void *)(data + 4); | |
539 | partial += *(uint32_t *)(void *)(data + 8); | |
540 | partial += *(uint32_t *)(void *)(data + 12); | |
541 | partial += *(uint32_t *)(void *)(data + 16); | |
542 | partial += *(uint32_t *)(void *)(data + 20); | |
543 | partial += *(uint32_t *)(void *)(data + 24); | |
544 | partial += *(uint32_t *)(void *)(data + 28); | |
545 | partial += *(uint32_t *)(void *)(data + 32); | |
546 | partial += *(uint32_t *)(void *)(data + 36); | |
547 | partial += *(uint32_t *)(void *)(data + 40); | |
548 | partial += *(uint32_t *)(void *)(data + 44); | |
549 | partial += *(uint32_t *)(void *)(data + 48); | |
550 | partial += *(uint32_t *)(void *)(data + 52); | |
551 | partial += *(uint32_t *)(void *)(data + 56); | |
552 | partial += *(uint32_t *)(void *)(data + 60); | |
553 | data += 64; | |
554 | mlen -= 64; | |
555 | if (__improbable(partial & (3ULL << 62))) { | |
556 | if (needs_swap) | |
557 | partial = (partial << 8) + | |
558 | (partial >> 56); | |
559 | sum += (partial >> 32); | |
560 | sum += (partial & 0xffffffff); | |
561 | partial = 0; | |
562 | } | |
563 | } | |
564 | /* | |
565 | * mlen is not updated below as the remaining tests | |
566 | * are using bit masks, which are not affected. | |
567 | */ | |
568 | if (mlen & 32) { | |
569 | partial += *(uint32_t *)(void *)data; | |
570 | partial += *(uint32_t *)(void *)(data + 4); | |
571 | partial += *(uint32_t *)(void *)(data + 8); | |
572 | partial += *(uint32_t *)(void *)(data + 12); | |
573 | partial += *(uint32_t *)(void *)(data + 16); | |
574 | partial += *(uint32_t *)(void *)(data + 20); | |
575 | partial += *(uint32_t *)(void *)(data + 24); | |
576 | partial += *(uint32_t *)(void *)(data + 28); | |
577 | data += 32; | |
578 | } | |
579 | if (mlen & 16) { | |
580 | partial += *(uint32_t *)(void *)data; | |
581 | partial += *(uint32_t *)(void *)(data + 4); | |
582 | partial += *(uint32_t *)(void *)(data + 8); | |
583 | partial += *(uint32_t *)(void *)(data + 12); | |
584 | data += 16; | |
585 | } | |
586 | if (mlen & 8) { | |
587 | partial += *(uint32_t *)(void *)data; | |
588 | partial += *(uint32_t *)(void *)(data + 4); | |
589 | data += 8; | |
590 | } | |
591 | if (mlen & 4) { | |
592 | partial += *(uint32_t *)(void *)data; | |
593 | data += 4; | |
594 | } | |
595 | if (mlen & 2) { | |
596 | partial += *(uint16_t *)(void *)data; | |
597 | data += 2; | |
598 | } | |
599 | trailing_bytes: | |
600 | if (mlen & 1) { | |
601 | #if BYTE_ORDER == LITTLE_ENDIAN | |
602 | partial += *data; | |
603 | #else /* BYTE_ORDER != LITTLE_ENDIAN */ | |
604 | partial += *data << 8; | |
605 | #endif /* BYTE_ORDER != LITTLE_ENDIAN */ | |
606 | started_on_odd = !started_on_odd; | |
607 | } | |
608 | ||
609 | if (needs_swap) | |
610 | partial = (partial << 8) + (partial >> 56); | |
611 | sum += (partial >> 32) + (partial & 0xffffffff); | |
612 | /* | |
613 | * Reduce sum to allow potential byte swap | |
614 | * in the next iteration without carry. | |
615 | */ | |
616 | sum = (sum >> 32) + (sum & 0xffffffff); | |
617 | } | |
618 | final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) + | |
619 | ((sum >> 16) & 0xffff) + (sum & 0xffff); | |
620 | final_acc = (final_acc >> 16) + (final_acc & 0xffff); | |
621 | final_acc = (final_acc >> 16) + (final_acc & 0xffff); | |
622 | return (final_acc & 0xffff); | |
623 | } | |
624 | #endif /* __LP64 */ | |
625 | #endif /* DEBUG || DEVELOPMENT */ |