]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
5ba3f43e | 2 | * Copyright (c) 2000-2017 Apple Inc. All rights reserved. |
5d5c5d0d | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
0a7de745 | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
0a7de745 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
0a7de745 | 17 | * |
2d21ac55 A |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
0a7de745 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* | |
29 | * Copyright (c) 1988, 1992, 1993 | |
30 | * The Regents of the University of California. All rights reserved. | |
31 | * | |
32 | * Redistribution and use in source and binary forms, with or without | |
33 | * modification, are permitted provided that the following conditions | |
34 | * are met: | |
35 | * 1. Redistributions of source code must retain the above copyright | |
36 | * notice, this list of conditions and the following disclaimer. | |
37 | * 2. Redistributions in binary form must reproduce the above copyright | |
38 | * notice, this list of conditions and the following disclaimer in the | |
39 | * documentation and/or other materials provided with the distribution. | |
40 | * 3. All advertising materials mentioning features or use of this software | |
41 | * must display the following acknowledgement: | |
42 | * This product includes software developed by the University of | |
43 | * California, Berkeley and its contributors. | |
44 | * 4. Neither the name of the University nor the names of its contributors | |
45 | * may be used to endorse or promote products derived from this software | |
46 | * without specific prior written permission. | |
47 | * | |
48 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
49 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
50 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
51 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
52 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
53 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
54 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
55 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
56 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
57 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
58 | * SUCH DAMAGE. | |
59 | * | |
60 | * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 | |
61 | */ | |
62 | ||
63 | #include <sys/param.h> | |
39236c6e | 64 | #include <machine/endian.h> |
1c79356b | 65 | #include <sys/mbuf.h> |
2d21ac55 | 66 | #include <kern/debug.h> |
39236c6e | 67 | #include <net/dlil.h> |
2d21ac55 | 68 | #include <netinet/in.h> |
0a7de745 | 69 | #define _IP_VHL |
2d21ac55 | 70 | #include <netinet/ip.h> |
39236c6e | 71 | #include <netinet/ip_var.h> |
1c79356b A |
72 | |
73 | /* | |
74 | * Checksum routine for Internet Protocol family headers (Portable Version). | |
75 | * | |
76 | * This routine is very heavily used in the network | |
77 | * code and should be modified for each CPU to be as fast as possible. | |
78 | */ | |
0a7de745 A |
79 | #define REDUCE16 { \ |
80 | q_util.q = sum; \ | |
39236c6e | 81 | l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ |
0a7de745 A |
82 | sum = l_util.s[0] + l_util.s[1]; \ |
83 | ADDCARRY(sum); \ | |
39236c6e | 84 | } |
1c79356b | 85 | |
0b4e3aa0 | 86 | union l_util { |
0a7de745 A |
87 | uint16_t s[2]; |
88 | uint32_t l; | |
0b4e3aa0 | 89 | }; |
1c79356b | 90 | |
0b4e3aa0 | 91 | union q_util { |
0a7de745 A |
92 | uint16_t s[4]; |
93 | uint32_t l[2]; | |
94 | uint64_t q; | |
2d21ac55 | 95 | }; |
0b4e3aa0 | 96 | |
5ba3f43e | 97 | extern uint32_t os_cpu_in_cksum(const void *, uint32_t, uint32_t); |
0b4e3aa0 | 98 | |
39236c6e A |
99 | /* |
100 | * Perform 16-bit 1's complement sum on a contiguous span. | |
101 | */ | |
102 | uint16_t | |
103 | b_sum16(const void *buf, int len) | |
104 | { | |
0a7de745 | 105 | return os_cpu_in_cksum(buf, len, 0); |
39236c6e | 106 | } |
2d21ac55 | 107 | |
39236c6e A |
108 | uint16_t inet_cksum_simple(struct mbuf *, int); |
109 | /* | |
110 | * For the exported _in_cksum symbol in BSDKernel symbol set. | |
111 | */ | |
112 | uint16_t | |
2d21ac55 A |
113 | inet_cksum_simple(struct mbuf *m, int len) |
114 | { | |
0a7de745 | 115 | return inet_cksum(m, 0, 0, len); |
2d21ac55 | 116 | } |
1c79356b | 117 | |
39236c6e A |
118 | uint16_t |
119 | in_addword(uint16_t a, uint16_t b) | |
0b4e3aa0 | 120 | { |
39236c6e | 121 | uint64_t sum = a + b; |
2d21ac55 | 122 | |
39236c6e | 123 | ADDCARRY(sum); |
0a7de745 | 124 | return sum; |
0b4e3aa0 A |
125 | } |
126 | ||
39236c6e A |
127 | uint16_t |
128 | in_pseudo(uint32_t a, uint32_t b, uint32_t c) | |
0b4e3aa0 | 129 | { |
0a7de745 A |
130 | uint64_t sum; |
131 | union q_util q_util; | |
132 | union l_util l_util; | |
0b4e3aa0 | 133 | |
0a7de745 A |
134 | sum = (uint64_t)a + b + c; |
135 | REDUCE16; | |
136 | return sum; | |
0b4e3aa0 A |
137 | } |
138 | ||
39236c6e A |
139 | uint16_t |
140 | in_pseudo64(uint64_t a, uint64_t b, uint64_t c) | |
1c79356b | 141 | { |
39236c6e A |
142 | uint64_t sum; |
143 | union q_util q_util; | |
2d21ac55 | 144 | union l_util l_util; |
1c79356b | 145 | |
39236c6e A |
146 | sum = a + b + c; |
147 | REDUCE16; | |
0a7de745 | 148 | return sum; |
39236c6e | 149 | } |
1c79356b | 150 | |
39236c6e A |
151 | /* |
152 | * May be used on IP header with options. | |
153 | */ | |
154 | uint16_t | |
155 | in_cksum_hdr_opt(const struct ip *ip) | |
156 | { | |
0a7de745 | 157 | return ~b_sum16(ip, (IP_VHL_HL(ip->ip_vhl) << 2)) & 0xffff; |
39236c6e A |
158 | } |
159 | ||
160 | /* | |
161 | * A wrapper around the simple in_cksum_hdr() and the more complicated | |
162 | * inet_cksum(); the former is chosen if the IP header is simple, | |
163 | * contiguous and 32-bit aligned. Also does some stats accounting. | |
164 | */ | |
165 | uint16_t | |
166 | ip_cksum_hdr_dir(struct mbuf *m, uint32_t hlen, int out) | |
167 | { | |
168 | struct ip *ip = mtod(m, struct ip *); | |
169 | ||
170 | if (out) { | |
171 | ipstat.ips_snd_swcsum++; | |
172 | ipstat.ips_snd_swcsum_bytes += hlen; | |
173 | } else { | |
174 | ipstat.ips_rcv_swcsum++; | |
175 | ipstat.ips_rcv_swcsum_bytes += hlen; | |
0b4e3aa0 | 176 | } |
0b4e3aa0 | 177 | |
0a7de745 A |
178 | if (hlen == sizeof(*ip) && |
179 | m->m_len >= sizeof(*ip) && IP_HDR_ALIGNED_P(ip)) { | |
180 | return in_cksum_hdr(ip); | |
181 | } | |
0b4e3aa0 | 182 | |
0a7de745 | 183 | return inet_cksum(m, 0, 0, hlen); |
39236c6e | 184 | } |
2d21ac55 | 185 | |
5ba3f43e A |
186 | uint16_t |
187 | ip_cksum_hdr_dir_buffer(const void *buffer, uint32_t hlen, uint32_t len, | |
188 | int out) | |
189 | { | |
190 | const struct ip *ip = buffer; | |
191 | ||
192 | if (out) { | |
193 | ipstat.ips_snd_swcsum++; | |
194 | ipstat.ips_snd_swcsum_bytes += hlen; | |
195 | } else { | |
196 | ipstat.ips_rcv_swcsum++; | |
197 | ipstat.ips_rcv_swcsum_bytes += hlen; | |
198 | } | |
199 | ||
0a7de745 A |
200 | if (hlen == sizeof(*ip) && |
201 | len >= sizeof(*ip) && IP_HDR_ALIGNED_P(ip)) { | |
202 | return in_cksum_hdr(ip); | |
203 | } | |
5ba3f43e | 204 | |
0a7de745 | 205 | return inet_cksum_buffer(buffer, 0, 0, hlen); |
5ba3f43e A |
206 | } |
207 | ||
39236c6e A |
208 | /* |
209 | * m MUST contain at least an IP header, if nxt is specified; | |
210 | * nxt is the upper layer protocol number; | |
211 | * off is an offset where TCP/UDP/ICMP header starts; | |
212 | * len is a total length of a transport segment (e.g. TCP header + TCP payload) | |
213 | */ | |
214 | uint16_t | |
215 | inet_cksum(struct mbuf *m, uint32_t nxt, uint32_t off, uint32_t len) | |
216 | { | |
217 | uint32_t sum; | |
0b4e3aa0 | 218 | |
39236c6e A |
219 | sum = m_sum16(m, off, len); |
220 | ||
221 | /* include pseudo header checksum? */ | |
222 | if (nxt != 0) { | |
223 | struct ip *ip; | |
0a7de745 | 224 | unsigned char buf[sizeof((*ip))] __attribute__((aligned(8))); |
39236c6e | 225 | uint32_t mlen; |
0b4e3aa0 | 226 | |
0b4e3aa0 | 227 | /* |
39236c6e A |
228 | * Sanity check |
229 | * | |
230 | * Use m_length2() instead of m_length(), as we cannot rely on | |
231 | * the caller setting m_pkthdr.len correctly, if the mbuf is | |
232 | * a M_PKTHDR one. | |
0b4e3aa0 | 233 | */ |
0a7de745 | 234 | if ((mlen = m_length2(m, NULL)) < sizeof(*ip)) { |
39236c6e A |
235 | panic("%s: mbuf %p too short (%d) for IPv4 header", |
236 | __func__, m, mlen); | |
237 | /* NOTREACHED */ | |
0b4e3aa0 | 238 | } |
39236c6e | 239 | |
0b4e3aa0 | 240 | /* |
39236c6e A |
241 | * In case the IP header is not contiguous, or not 32-bit |
242 | * aligned, copy it to a local buffer. Note here that we | |
243 | * expect the data pointer to point to the IP header. | |
0b4e3aa0 | 244 | */ |
0a7de745 | 245 | if ((sizeof(*ip) > m->m_len) || |
39236c6e | 246 | !IP_HDR_ALIGNED_P(mtod(m, caddr_t))) { |
0a7de745 | 247 | m_copydata(m, 0, sizeof(*ip), (caddr_t)buf); |
39236c6e A |
248 | ip = (struct ip *)(void *)buf; |
249 | } else { | |
250 | ip = (struct ip *)(void *)(m->m_data); | |
1c79356b | 251 | } |
39236c6e A |
252 | |
253 | /* add pseudo header checksum */ | |
254 | sum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, | |
255 | htonl(len + nxt)); | |
256 | ||
257 | /* fold in carry bits */ | |
258 | ADDCARRY(sum); | |
1c79356b | 259 | } |
39236c6e | 260 | |
0a7de745 | 261 | return ~sum & 0xffff; |
1c79356b | 262 | } |
5ba3f43e A |
263 | |
264 | /* | |
265 | * buffer MUST contain at least an IP header, if nxt is specified; | |
266 | * nxt is the upper layer protocol number; | |
267 | * off is an offset where TCP/UDP/ICMP header starts; | |
268 | * len is a total length of a transport segment (e.g. TCP header + TCP payload) | |
269 | */ | |
270 | uint16_t | |
271 | inet_cksum_buffer(const void *buffer, uint32_t nxt, uint32_t off, | |
272 | uint32_t len) | |
273 | { | |
274 | uint32_t sum; | |
275 | ||
0a7de745 | 276 | if (off >= len) { |
5ba3f43e | 277 | panic("%s: off (%d) >= len (%d)", __func__, off, len); |
0a7de745 | 278 | } |
5ba3f43e A |
279 | |
280 | sum = b_sum16(&((const uint8_t *)buffer)[off], len); | |
281 | ||
282 | /* include pseudo header checksum? */ | |
283 | if (nxt != 0) { | |
284 | const struct ip *ip; | |
0a7de745 | 285 | unsigned char buf[sizeof((*ip))] __attribute__((aligned(8))); |
5ba3f43e A |
286 | |
287 | /* | |
288 | * In case the IP header is not contiguous, or not 32-bit | |
289 | * aligned, copy it to a local buffer. Note here that we | |
290 | * expect the data pointer to point to the IP header. | |
291 | */ | |
292 | if (!IP_HDR_ALIGNED_P(buffer)) { | |
0a7de745 | 293 | memcpy(buf, buffer, sizeof(*ip)); |
5ba3f43e A |
294 | ip = (const struct ip *)(const void *)buf; |
295 | } else { | |
296 | ip = (const struct ip *)buffer; | |
297 | } | |
298 | ||
299 | /* add pseudo header checksum */ | |
300 | sum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, | |
301 | htonl(len + nxt)); | |
302 | ||
303 | /* fold in carry bits */ | |
304 | ADDCARRY(sum); | |
305 | } | |
306 | ||
0a7de745 | 307 | return ~sum & 0xffff; |
5ba3f43e A |
308 | } |
309 | ||
310 | #if DEBUG || DEVELOPMENT | |
5ba3f43e A |
311 | #include <pexpert/pexpert.h> |
312 | ||
0a7de745 | 313 | #define CKSUM_ERR kprintf |
5ba3f43e A |
314 | |
315 | /* | |
316 | * The following routines implement the portable, reference implementation | |
317 | * of os_cpu_in_cksum_mbuf(). This is currently used only for validating | |
318 | * the correctness of the platform-specific implementation, at boot time | |
319 | * in dlil_verify_sum16(). It returns the 32-bit accumulator without doing | |
320 | * a 1's complement on it. | |
321 | */ | |
322 | #if !defined(__LP64__) | |
323 | /* 32-bit version */ | |
324 | uint32_t | |
325 | in_cksum_mbuf_ref(struct mbuf *m, int len, int off, uint32_t initial_sum) | |
326 | { | |
327 | int mlen; | |
328 | uint32_t sum, partial; | |
329 | unsigned int final_acc; | |
330 | uint8_t *data; | |
331 | boolean_t needs_swap, started_on_odd; | |
332 | ||
333 | VERIFY(len >= 0); | |
334 | VERIFY(off >= 0); | |
335 | ||
336 | needs_swap = FALSE; | |
337 | started_on_odd = FALSE; | |
338 | sum = (initial_sum >> 16) + (initial_sum & 0xffff); | |
339 | ||
340 | for (;;) { | |
341 | if (__improbable(m == NULL)) { | |
342 | CKSUM_ERR("%s: out of data\n", __func__); | |
0a7de745 | 343 | return (uint32_t)-1; |
5ba3f43e A |
344 | } |
345 | mlen = m->m_len; | |
346 | if (mlen > off) { | |
347 | mlen -= off; | |
348 | data = mtod(m, uint8_t *) + off; | |
349 | goto post_initial_offset; | |
350 | } | |
351 | off -= mlen; | |
0a7de745 | 352 | if (len == 0) { |
5ba3f43e | 353 | break; |
0a7de745 | 354 | } |
5ba3f43e A |
355 | m = m->m_next; |
356 | } | |
357 | ||
358 | for (; len > 0; m = m->m_next) { | |
359 | if (__improbable(m == NULL)) { | |
360 | CKSUM_ERR("%s: out of data\n", __func__); | |
0a7de745 | 361 | return (uint32_t)-1; |
5ba3f43e A |
362 | } |
363 | mlen = m->m_len; | |
364 | data = mtod(m, uint8_t *); | |
365 | post_initial_offset: | |
0a7de745 | 366 | if (mlen == 0) { |
5ba3f43e | 367 | continue; |
0a7de745 A |
368 | } |
369 | if (mlen > len) { | |
5ba3f43e | 370 | mlen = len; |
0a7de745 | 371 | } |
5ba3f43e A |
372 | len -= mlen; |
373 | ||
374 | partial = 0; | |
375 | if ((uintptr_t)data & 1) { | |
376 | /* Align on word boundary */ | |
377 | started_on_odd = !started_on_odd; | |
378 | #if BYTE_ORDER == LITTLE_ENDIAN | |
379 | partial = *data << 8; | |
380 | #else /* BYTE_ORDER != LITTLE_ENDIAN */ | |
381 | partial = *data; | |
382 | #endif /* BYTE_ORDER != LITTLE_ENDIAN */ | |
383 | ++data; | |
384 | --mlen; | |
385 | } | |
386 | needs_swap = started_on_odd; | |
387 | while (mlen >= 32) { | |
388 | __builtin_prefetch(data + 32); | |
389 | partial += *(uint16_t *)(void *)data; | |
390 | partial += *(uint16_t *)(void *)(data + 2); | |
391 | partial += *(uint16_t *)(void *)(data + 4); | |
392 | partial += *(uint16_t *)(void *)(data + 6); | |
393 | partial += *(uint16_t *)(void *)(data + 8); | |
394 | partial += *(uint16_t *)(void *)(data + 10); | |
395 | partial += *(uint16_t *)(void *)(data + 12); | |
396 | partial += *(uint16_t *)(void *)(data + 14); | |
397 | partial += *(uint16_t *)(void *)(data + 16); | |
398 | partial += *(uint16_t *)(void *)(data + 18); | |
399 | partial += *(uint16_t *)(void *)(data + 20); | |
400 | partial += *(uint16_t *)(void *)(data + 22); | |
401 | partial += *(uint16_t *)(void *)(data + 24); | |
402 | partial += *(uint16_t *)(void *)(data + 26); | |
403 | partial += *(uint16_t *)(void *)(data + 28); | |
404 | partial += *(uint16_t *)(void *)(data + 30); | |
405 | data += 32; | |
406 | mlen -= 32; | |
407 | if (__improbable(partial & 0xc0000000)) { | |
0a7de745 | 408 | if (needs_swap) { |
5ba3f43e A |
409 | partial = (partial << 8) + |
410 | (partial >> 24); | |
0a7de745 | 411 | } |
5ba3f43e A |
412 | sum += (partial >> 16); |
413 | sum += (partial & 0xffff); | |
414 | partial = 0; | |
415 | } | |
416 | } | |
417 | if (mlen & 16) { | |
418 | partial += *(uint16_t *)(void *)data; | |
419 | partial += *(uint16_t *)(void *)(data + 2); | |
420 | partial += *(uint16_t *)(void *)(data + 4); | |
421 | partial += *(uint16_t *)(void *)(data + 6); | |
422 | partial += *(uint16_t *)(void *)(data + 8); | |
423 | partial += *(uint16_t *)(void *)(data + 10); | |
424 | partial += *(uint16_t *)(void *)(data + 12); | |
425 | partial += *(uint16_t *)(void *)(data + 14); | |
426 | data += 16; | |
427 | mlen -= 16; | |
428 | } | |
429 | /* | |
430 | * mlen is not updated below as the remaining tests | |
431 | * are using bit masks, which are not affected. | |
432 | */ | |
433 | if (mlen & 8) { | |
434 | partial += *(uint16_t *)(void *)data; | |
435 | partial += *(uint16_t *)(void *)(data + 2); | |
436 | partial += *(uint16_t *)(void *)(data + 4); | |
437 | partial += *(uint16_t *)(void *)(data + 6); | |
438 | data += 8; | |
439 | } | |
440 | if (mlen & 4) { | |
441 | partial += *(uint16_t *)(void *)data; | |
442 | partial += *(uint16_t *)(void *)(data + 2); | |
443 | data += 4; | |
444 | } | |
445 | if (mlen & 2) { | |
446 | partial += *(uint16_t *)(void *)data; | |
447 | data += 2; | |
448 | } | |
449 | if (mlen & 1) { | |
450 | #if BYTE_ORDER == LITTLE_ENDIAN | |
451 | partial += *data; | |
452 | #else /* BYTE_ORDER != LITTLE_ENDIAN */ | |
453 | partial += *data << 8; | |
454 | #endif /* BYTE_ORDER != LITTLE_ENDIAN */ | |
455 | started_on_odd = !started_on_odd; | |
456 | } | |
457 | ||
0a7de745 | 458 | if (needs_swap) { |
5ba3f43e | 459 | partial = (partial << 8) + (partial >> 24); |
0a7de745 | 460 | } |
5ba3f43e A |
461 | sum += (partial >> 16) + (partial & 0xffff); |
462 | /* | |
463 | * Reduce sum to allow potential byte swap | |
464 | * in the next iteration without carry. | |
465 | */ | |
466 | sum = (sum >> 16) + (sum & 0xffff); | |
467 | } | |
468 | final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff); | |
469 | final_acc = (final_acc >> 16) + (final_acc & 0xffff); | |
0a7de745 | 470 | return final_acc & 0xffff; |
5ba3f43e A |
471 | } |
472 | ||
473 | #else /* __LP64__ */ | |
474 | /* 64-bit version */ | |
475 | uint32_t | |
476 | in_cksum_mbuf_ref(struct mbuf *m, int len, int off, uint32_t initial_sum) | |
477 | { | |
478 | int mlen; | |
479 | uint64_t sum, partial; | |
480 | unsigned int final_acc; | |
481 | uint8_t *data; | |
482 | boolean_t needs_swap, started_on_odd; | |
483 | ||
484 | VERIFY(len >= 0); | |
485 | VERIFY(off >= 0); | |
486 | ||
487 | needs_swap = FALSE; | |
488 | started_on_odd = FALSE; | |
489 | sum = initial_sum; | |
490 | ||
491 | for (;;) { | |
492 | if (__improbable(m == NULL)) { | |
493 | CKSUM_ERR("%s: out of data\n", __func__); | |
0a7de745 | 494 | return (uint32_t)-1; |
5ba3f43e A |
495 | } |
496 | mlen = m->m_len; | |
497 | if (mlen > off) { | |
498 | mlen -= off; | |
499 | data = mtod(m, uint8_t *) + off; | |
500 | goto post_initial_offset; | |
501 | } | |
502 | off -= mlen; | |
0a7de745 | 503 | if (len == 0) { |
5ba3f43e | 504 | break; |
0a7de745 | 505 | } |
5ba3f43e A |
506 | m = m->m_next; |
507 | } | |
508 | ||
509 | for (; len > 0; m = m->m_next) { | |
510 | if (__improbable(m == NULL)) { | |
511 | CKSUM_ERR("%s: out of data\n", __func__); | |
0a7de745 | 512 | return (uint32_t)-1; |
5ba3f43e A |
513 | } |
514 | mlen = m->m_len; | |
515 | data = mtod(m, uint8_t *); | |
516 | post_initial_offset: | |
0a7de745 | 517 | if (mlen == 0) { |
5ba3f43e | 518 | continue; |
0a7de745 A |
519 | } |
520 | if (mlen > len) { | |
5ba3f43e | 521 | mlen = len; |
0a7de745 | 522 | } |
5ba3f43e A |
523 | len -= mlen; |
524 | ||
525 | partial = 0; | |
526 | if ((uintptr_t)data & 1) { | |
527 | /* Align on word boundary */ | |
528 | started_on_odd = !started_on_odd; | |
529 | #if BYTE_ORDER == LITTLE_ENDIAN | |
530 | partial = *data << 8; | |
531 | #else /* BYTE_ORDER != LITTLE_ENDIAN */ | |
532 | partial = *data; | |
533 | #endif /* BYTE_ORDER != LITTLE_ENDIAN */ | |
534 | ++data; | |
535 | --mlen; | |
536 | } | |
537 | needs_swap = started_on_odd; | |
538 | if ((uintptr_t)data & 2) { | |
0a7de745 | 539 | if (mlen < 2) { |
5ba3f43e | 540 | goto trailing_bytes; |
0a7de745 | 541 | } |
5ba3f43e A |
542 | partial += *(uint16_t *)(void *)data; |
543 | data += 2; | |
544 | mlen -= 2; | |
545 | } | |
546 | while (mlen >= 64) { | |
547 | __builtin_prefetch(data + 32); | |
548 | __builtin_prefetch(data + 64); | |
549 | partial += *(uint32_t *)(void *)data; | |
550 | partial += *(uint32_t *)(void *)(data + 4); | |
551 | partial += *(uint32_t *)(void *)(data + 8); | |
552 | partial += *(uint32_t *)(void *)(data + 12); | |
553 | partial += *(uint32_t *)(void *)(data + 16); | |
554 | partial += *(uint32_t *)(void *)(data + 20); | |
555 | partial += *(uint32_t *)(void *)(data + 24); | |
556 | partial += *(uint32_t *)(void *)(data + 28); | |
557 | partial += *(uint32_t *)(void *)(data + 32); | |
558 | partial += *(uint32_t *)(void *)(data + 36); | |
559 | partial += *(uint32_t *)(void *)(data + 40); | |
560 | partial += *(uint32_t *)(void *)(data + 44); | |
561 | partial += *(uint32_t *)(void *)(data + 48); | |
562 | partial += *(uint32_t *)(void *)(data + 52); | |
563 | partial += *(uint32_t *)(void *)(data + 56); | |
564 | partial += *(uint32_t *)(void *)(data + 60); | |
565 | data += 64; | |
566 | mlen -= 64; | |
567 | if (__improbable(partial & (3ULL << 62))) { | |
0a7de745 | 568 | if (needs_swap) { |
5ba3f43e A |
569 | partial = (partial << 8) + |
570 | (partial >> 56); | |
0a7de745 | 571 | } |
5ba3f43e A |
572 | sum += (partial >> 32); |
573 | sum += (partial & 0xffffffff); | |
574 | partial = 0; | |
575 | } | |
576 | } | |
577 | /* | |
578 | * mlen is not updated below as the remaining tests | |
579 | * are using bit masks, which are not affected. | |
580 | */ | |
581 | if (mlen & 32) { | |
582 | partial += *(uint32_t *)(void *)data; | |
583 | partial += *(uint32_t *)(void *)(data + 4); | |
584 | partial += *(uint32_t *)(void *)(data + 8); | |
585 | partial += *(uint32_t *)(void *)(data + 12); | |
586 | partial += *(uint32_t *)(void *)(data + 16); | |
587 | partial += *(uint32_t *)(void *)(data + 20); | |
588 | partial += *(uint32_t *)(void *)(data + 24); | |
589 | partial += *(uint32_t *)(void *)(data + 28); | |
590 | data += 32; | |
591 | } | |
592 | if (mlen & 16) { | |
593 | partial += *(uint32_t *)(void *)data; | |
594 | partial += *(uint32_t *)(void *)(data + 4); | |
595 | partial += *(uint32_t *)(void *)(data + 8); | |
596 | partial += *(uint32_t *)(void *)(data + 12); | |
597 | data += 16; | |
598 | } | |
599 | if (mlen & 8) { | |
600 | partial += *(uint32_t *)(void *)data; | |
601 | partial += *(uint32_t *)(void *)(data + 4); | |
602 | data += 8; | |
603 | } | |
604 | if (mlen & 4) { | |
605 | partial += *(uint32_t *)(void *)data; | |
606 | data += 4; | |
607 | } | |
608 | if (mlen & 2) { | |
609 | partial += *(uint16_t *)(void *)data; | |
610 | data += 2; | |
611 | } | |
612 | trailing_bytes: | |
613 | if (mlen & 1) { | |
614 | #if BYTE_ORDER == LITTLE_ENDIAN | |
615 | partial += *data; | |
616 | #else /* BYTE_ORDER != LITTLE_ENDIAN */ | |
617 | partial += *data << 8; | |
618 | #endif /* BYTE_ORDER != LITTLE_ENDIAN */ | |
619 | started_on_odd = !started_on_odd; | |
620 | } | |
621 | ||
0a7de745 | 622 | if (needs_swap) { |
5ba3f43e | 623 | partial = (partial << 8) + (partial >> 56); |
0a7de745 | 624 | } |
5ba3f43e A |
625 | sum += (partial >> 32) + (partial & 0xffffffff); |
626 | /* | |
627 | * Reduce sum to allow potential byte swap | |
628 | * in the next iteration without carry. | |
629 | */ | |
630 | sum = (sum >> 32) + (sum & 0xffffffff); | |
631 | } | |
632 | final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) + | |
633 | ((sum >> 16) & 0xffff) + (sum & 0xffff); | |
634 | final_acc = (final_acc >> 16) + (final_acc & 0xffff); | |
635 | final_acc = (final_acc >> 16) + (final_acc & 0xffff); | |
0a7de745 | 636 | return final_acc & 0xffff; |
5ba3f43e A |
637 | } |
638 | #endif /* __LP64 */ | |
639 | #endif /* DEBUG || DEVELOPMENT */ |