]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/cpu_in_cksum_gen.c
xnu-4570.31.3.tar.gz
[apple/xnu.git] / bsd / netinet / cpu_in_cksum_gen.c
CommitLineData
39236c6e 1/*
5ba3f43e 2 * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
39236c6e
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*-
30 * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 *
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in
41 * the documentation and/or other materials provided with the
42 * distribution.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
45 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
46 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
47 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
48 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
49 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
50 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
51 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
52 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
53 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
54 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 */
57
5ba3f43e 58#ifdef KERNEL
39236c6e 59#include <sys/param.h>
39236c6e
A
60#include <machine/endian.h>
61#include <sys/mcache.h>
62#include <sys/mbuf.h>
63#include <kern/debug.h>
39236c6e 64#include <libkern/libkern.h>
5ba3f43e
A
65#include <mach/boolean.h>
66#include <pexpert/pexpert.h>
67#define CKSUM_ERR(fmt, args...) kprintf(fmt, ## args)
68#else /* !KERNEL */
69#ifndef LIBSYSCALL_INTERFACE
70#error "LIBSYSCALL_INTERFACE not defined"
71#endif /* !LIBSYSCALL_INTERFACE */
72#include <stdlib.h>
73#include <stddef.h>
74#include <stdint.h>
75#include <unistd.h>
76#include <strings.h>
77#include <mach/boolean.h>
78#endif /* !KERNEL */
79
80/* compile time assert */
81#ifndef _CASSERT
82#define _CASSERT(x) _Static_assert(x, "compile-time assertion failed")
83#endif /* !_CASSERT */
84
85#ifndef VERIFY
86#define VERIFY(EX) ((void)0)
87#endif /* !VERIFY */
88
89#ifndef CKSUM_ERR
90#define CKSUM_ERR(fmt, args...) ((void)0)
91#endif /* !CKSUM_ERR */
39236c6e 92
5ba3f43e
A
93#define PREDICT_TRUE(x) __builtin_expect(!!((long)(x)), 1L)
94#define PREDICT_FALSE(x) __builtin_expect(!!((long)(x)), 0L)
39236c6e 95
5ba3f43e
A
96/* fake mbuf struct used only for calling os_cpu_in_cksum_mbuf() */
97struct _mbuf {
98 struct _mbuf *_m_next;
99 void *_m_pad;
100 uint8_t *_m_data;
101 int32_t _m_len;
102};
103
104extern uint32_t os_cpu_in_cksum(const void *, uint32_t, uint32_t);
105extern uint32_t os_cpu_in_cksum_mbuf(struct _mbuf *, int, int, uint32_t);
106
107uint32_t
108os_cpu_in_cksum(const void *data, uint32_t len, uint32_t initial_sum)
109{
110 /*
111 * If data is 4-bytes aligned, length is multiple of 4-bytes,
112 * and the amount to checksum is small, this would be quicker;
113 * this is suitable for IPv4 header.
114 */
115 if (IS_P2ALIGNED(data, sizeof (uint32_t)) &&
116 len <= 64 && (len & 3) == 0) {
117 uint8_t *p = __DECONST(uint8_t *, data);
118 uint64_t sum = initial_sum;
119
120 if (PREDICT_TRUE(len == 20)) { /* simple IPv4 header */
121 sum += *(uint32_t *)(void *)p;
122 sum += *(uint32_t *)(void *)(p + 4);
123 sum += *(uint32_t *)(void *)(p + 8);
124 sum += *(uint32_t *)(void *)(p + 12);
125 sum += *(uint32_t *)(void *)(p + 16);
126 } else {
127 while (len) {
128 sum += *(uint32_t *)(void *)p;
129 p += 4;
130 len -= 4;
131 }
132 }
133
134 /* fold 64-bit to 16-bit (deferred carries) */
135 sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */
136 sum = (sum >> 16) + (sum & 0xffff); /* 17-bit + carry */
137 sum = (sum >> 16) + (sum & 0xffff); /* 16-bit + carry */
138 sum = (sum >> 16) + (sum & 0xffff); /* final carry */
139
140 return (sum & 0xffff);
141 }
142
143 /*
144 * Otherwise, let os_cpu_in_cksum_mbuf() handle it; it only looks
145 * at 3 fields: {next,data,len}, and since it doesn't care about
146 * the authenticity of the mbuf, we use a fake one here. Make
147 * sure the offsets are as expected.
148 */
149#if defined(__LP64__)
150 _CASSERT(offsetof(struct _mbuf, _m_next) == 0);
151 _CASSERT(offsetof(struct _mbuf, _m_data) == 16);
152 _CASSERT(offsetof(struct _mbuf, _m_len) == 24);
153#else /* !__LP64__ */
154 _CASSERT(offsetof(struct _mbuf, _m_next) == 0);
155 _CASSERT(offsetof(struct _mbuf, _m_data) == 8);
156 _CASSERT(offsetof(struct _mbuf, _m_len) == 12);
157#endif /* !__LP64__ */
158#ifdef KERNEL
159 _CASSERT(offsetof(struct _mbuf, _m_next) ==
160 offsetof(struct mbuf, m_next));
161 _CASSERT(offsetof(struct _mbuf, _m_data) ==
162 offsetof(struct mbuf, m_data));
163 _CASSERT(offsetof(struct _mbuf, _m_len) ==
164 offsetof(struct mbuf, m_len));
165#endif /* KERNEL */
166 struct _mbuf m = {
167 ._m_next = NULL,
168 ._m_data = __DECONST(uint8_t *, data),
169 ._m_len = len,
170 };
171
172 return (os_cpu_in_cksum_mbuf(&m, len, 0, initial_sum));
173}
174
175#if defined(__i386__) || defined(__x86_64__)
39236c6e
A
176
177/*
178 * Checksum routine for Internet Protocol family headers (Portable Version).
179 *
180 * This routine is very heavily used in the network
181 * code and should be modified for each CPU to be as fast as possible.
182 *
183 * A discussion of different implementation techniques can be found in
184 * RFC 1071.
185 *
186 * The default implementation for 32-bit architectures is using
187 * a 32-bit accumulator and operating on 16-bit operands.
188 *
189 * The default implementation for 64-bit architectures is using
190 * a 64-bit accumulator and operating on 32-bit operands.
191 *
192 * Both versions are unrolled to handle 32 Byte / 64 Byte fragments as core
193 * of the inner loop. After each iteration of the inner loop, a partial
194 * reduction is done to avoid carry in long packets.
195 */
196
5ba3f43e 197#if !defined(__LP64__)
39236c6e 198/* 32-bit version */
5ba3f43e
A
199uint32_t
200os_cpu_in_cksum_mbuf(struct _mbuf *m, int len, int off, uint32_t initial_sum)
39236c6e
A
201{
202 int mlen;
203 uint32_t sum, partial;
204 unsigned int final_acc;
205 uint8_t *data;
206 boolean_t needs_swap, started_on_odd;
207
208 VERIFY(len >= 0);
209 VERIFY(off >= 0);
210
211 needs_swap = FALSE;
212 started_on_odd = FALSE;
213 sum = (initial_sum >> 16) + (initial_sum & 0xffff);
214
215 for (;;) {
216 if (PREDICT_FALSE(m == NULL)) {
5ba3f43e
A
217 CKSUM_ERR("%s: out of data\n", __func__);
218 return ((uint32_t)-1);
39236c6e 219 }
5ba3f43e 220 mlen = m->_m_len;
39236c6e
A
221 if (mlen > off) {
222 mlen -= off;
5ba3f43e 223 data = m->_m_data + off;
39236c6e
A
224 goto post_initial_offset;
225 }
226 off -= mlen;
227 if (len == 0)
228 break;
5ba3f43e 229 m = m->_m_next;
39236c6e
A
230 }
231
5ba3f43e 232 for (; len > 0; m = m->_m_next) {
39236c6e 233 if (PREDICT_FALSE(m == NULL)) {
5ba3f43e
A
234 CKSUM_ERR("%s: out of data\n", __func__);
235 return ((uint32_t)-1);
39236c6e 236 }
5ba3f43e
A
237 mlen = m->_m_len;
238 data = m->_m_data;
39236c6e
A
239post_initial_offset:
240 if (mlen == 0)
241 continue;
242 if (mlen > len)
243 mlen = len;
244 len -= mlen;
245
246 partial = 0;
247 if ((uintptr_t)data & 1) {
248 /* Align on word boundary */
249 started_on_odd = !started_on_odd;
250#if BYTE_ORDER == LITTLE_ENDIAN
251 partial = *data << 8;
252#else
253 partial = *data;
254#endif
255 ++data;
256 --mlen;
257 }
258 needs_swap = started_on_odd;
259 while (mlen >= 32) {
260 __builtin_prefetch(data + 32);
261 partial += *(uint16_t *)(void *)data;
262 partial += *(uint16_t *)(void *)(data + 2);
263 partial += *(uint16_t *)(void *)(data + 4);
264 partial += *(uint16_t *)(void *)(data + 6);
265 partial += *(uint16_t *)(void *)(data + 8);
266 partial += *(uint16_t *)(void *)(data + 10);
267 partial += *(uint16_t *)(void *)(data + 12);
268 partial += *(uint16_t *)(void *)(data + 14);
269 partial += *(uint16_t *)(void *)(data + 16);
270 partial += *(uint16_t *)(void *)(data + 18);
271 partial += *(uint16_t *)(void *)(data + 20);
272 partial += *(uint16_t *)(void *)(data + 22);
273 partial += *(uint16_t *)(void *)(data + 24);
274 partial += *(uint16_t *)(void *)(data + 26);
275 partial += *(uint16_t *)(void *)(data + 28);
276 partial += *(uint16_t *)(void *)(data + 30);
277 data += 32;
278 mlen -= 32;
279 if (PREDICT_FALSE(partial & 0xc0000000)) {
280 if (needs_swap)
281 partial = (partial << 8) +
282 (partial >> 24);
283 sum += (partial >> 16);
284 sum += (partial & 0xffff);
285 partial = 0;
286 }
287 }
288 if (mlen & 16) {
289 partial += *(uint16_t *)(void *)data;
290 partial += *(uint16_t *)(void *)(data + 2);
291 partial += *(uint16_t *)(void *)(data + 4);
292 partial += *(uint16_t *)(void *)(data + 6);
293 partial += *(uint16_t *)(void *)(data + 8);
294 partial += *(uint16_t *)(void *)(data + 10);
295 partial += *(uint16_t *)(void *)(data + 12);
296 partial += *(uint16_t *)(void *)(data + 14);
297 data += 16;
298 mlen -= 16;
299 }
300 /*
301 * mlen is not updated below as the remaining tests
302 * are using bit masks, which are not affected.
303 */
304 if (mlen & 8) {
305 partial += *(uint16_t *)(void *)data;
306 partial += *(uint16_t *)(void *)(data + 2);
307 partial += *(uint16_t *)(void *)(data + 4);
308 partial += *(uint16_t *)(void *)(data + 6);
309 data += 8;
310 }
311 if (mlen & 4) {
312 partial += *(uint16_t *)(void *)data;
313 partial += *(uint16_t *)(void *)(data + 2);
314 data += 4;
315 }
316 if (mlen & 2) {
317 partial += *(uint16_t *)(void *)data;
318 data += 2;
319 }
320 if (mlen & 1) {
321#if BYTE_ORDER == LITTLE_ENDIAN
322 partial += *data;
323#else
324 partial += *data << 8;
325#endif
326 started_on_odd = !started_on_odd;
327 }
328
329 if (needs_swap)
330 partial = (partial << 8) + (partial >> 24);
331 sum += (partial >> 16) + (partial & 0xffff);
332 /*
333 * Reduce sum to allow potential byte swap
334 * in the next iteration without carry.
335 */
336 sum = (sum >> 16) + (sum & 0xffff);
337 }
338 final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff);
339 final_acc = (final_acc >> 16) + (final_acc & 0xffff);
5ba3f43e 340 return (final_acc & 0xffff);
39236c6e
A
341}
342
5ba3f43e 343#else /* __LP64__ */
39236c6e 344/* 64-bit version */
5ba3f43e
A
345uint32_t
346os_cpu_in_cksum_mbuf(struct _mbuf *m, int len, int off, uint32_t initial_sum)
39236c6e
A
347{
348 int mlen;
349 uint64_t sum, partial;
350 unsigned int final_acc;
351 uint8_t *data;
352 boolean_t needs_swap, started_on_odd;
353
354 VERIFY(len >= 0);
355 VERIFY(off >= 0);
356
357 needs_swap = FALSE;
358 started_on_odd = FALSE;
359 sum = initial_sum;
360
361 for (;;) {
362 if (PREDICT_FALSE(m == NULL)) {
5ba3f43e
A
363 CKSUM_ERR("%s: out of data\n", __func__);
364 return ((uint32_t)-1);
39236c6e 365 }
5ba3f43e 366 mlen = m->_m_len;
39236c6e
A
367 if (mlen > off) {
368 mlen -= off;
5ba3f43e 369 data = m->_m_data + off;
39236c6e
A
370 goto post_initial_offset;
371 }
372 off -= mlen;
373 if (len == 0)
374 break;
5ba3f43e 375 m = m->_m_next;
39236c6e
A
376 }
377
5ba3f43e 378 for (; len > 0; m = m->_m_next) {
39236c6e 379 if (PREDICT_FALSE(m == NULL)) {
5ba3f43e
A
380 CKSUM_ERR("%s: out of data\n", __func__);
381 return ((uint32_t)-1);
39236c6e 382 }
5ba3f43e
A
383 mlen = m->_m_len;
384 data = m->_m_data;
39236c6e
A
385post_initial_offset:
386 if (mlen == 0)
387 continue;
388 if (mlen > len)
389 mlen = len;
390 len -= mlen;
391
392 partial = 0;
393 if ((uintptr_t)data & 1) {
394 /* Align on word boundary */
395 started_on_odd = !started_on_odd;
396#if BYTE_ORDER == LITTLE_ENDIAN
397 partial = *data << 8;
398#else
399 partial = *data;
400#endif
401 ++data;
402 --mlen;
403 }
404 needs_swap = started_on_odd;
405 if ((uintptr_t)data & 2) {
406 if (mlen < 2)
407 goto trailing_bytes;
408 partial += *(uint16_t *)(void *)data;
409 data += 2;
410 mlen -= 2;
411 }
412 while (mlen >= 64) {
413 __builtin_prefetch(data + 32);
414 __builtin_prefetch(data + 64);
415 partial += *(uint32_t *)(void *)data;
416 partial += *(uint32_t *)(void *)(data + 4);
417 partial += *(uint32_t *)(void *)(data + 8);
418 partial += *(uint32_t *)(void *)(data + 12);
419 partial += *(uint32_t *)(void *)(data + 16);
420 partial += *(uint32_t *)(void *)(data + 20);
421 partial += *(uint32_t *)(void *)(data + 24);
422 partial += *(uint32_t *)(void *)(data + 28);
423 partial += *(uint32_t *)(void *)(data + 32);
424 partial += *(uint32_t *)(void *)(data + 36);
425 partial += *(uint32_t *)(void *)(data + 40);
426 partial += *(uint32_t *)(void *)(data + 44);
427 partial += *(uint32_t *)(void *)(data + 48);
428 partial += *(uint32_t *)(void *)(data + 52);
429 partial += *(uint32_t *)(void *)(data + 56);
430 partial += *(uint32_t *)(void *)(data + 60);
431 data += 64;
432 mlen -= 64;
433 if (PREDICT_FALSE(partial & (3ULL << 62))) {
434 if (needs_swap)
435 partial = (partial << 8) +
436 (partial >> 56);
437 sum += (partial >> 32);
438 sum += (partial & 0xffffffff);
439 partial = 0;
440 }
441 }
442 /*
443 * mlen is not updated below as the remaining tests
444 * are using bit masks, which are not affected.
445 */
446 if (mlen & 32) {
447 partial += *(uint32_t *)(void *)data;
448 partial += *(uint32_t *)(void *)(data + 4);
449 partial += *(uint32_t *)(void *)(data + 8);
450 partial += *(uint32_t *)(void *)(data + 12);
451 partial += *(uint32_t *)(void *)(data + 16);
452 partial += *(uint32_t *)(void *)(data + 20);
453 partial += *(uint32_t *)(void *)(data + 24);
454 partial += *(uint32_t *)(void *)(data + 28);
455 data += 32;
456 }
457 if (mlen & 16) {
458 partial += *(uint32_t *)(void *)data;
459 partial += *(uint32_t *)(void *)(data + 4);
460 partial += *(uint32_t *)(void *)(data + 8);
461 partial += *(uint32_t *)(void *)(data + 12);
462 data += 16;
463 }
464 if (mlen & 8) {
465 partial += *(uint32_t *)(void *)data;
466 partial += *(uint32_t *)(void *)(data + 4);
467 data += 8;
468 }
469 if (mlen & 4) {
470 partial += *(uint32_t *)(void *)data;
471 data += 4;
472 }
473 if (mlen & 2) {
474 partial += *(uint16_t *)(void *)data;
475 data += 2;
476 }
477trailing_bytes:
478 if (mlen & 1) {
479#if BYTE_ORDER == LITTLE_ENDIAN
480 partial += *data;
481#else
482 partial += *data << 8;
483#endif
484 started_on_odd = !started_on_odd;
485 }
486
487 if (needs_swap)
488 partial = (partial << 8) + (partial >> 56);
489 sum += (partial >> 32) + (partial & 0xffffffff);
490 /*
491 * Reduce sum to allow potential byte swap
492 * in the next iteration without carry.
493 */
494 sum = (sum >> 32) + (sum & 0xffffffff);
495 }
496 final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) +
497 ((sum >> 16) & 0xffff) + (sum & 0xffff);
498 final_acc = (final_acc >> 16) + (final_acc & 0xffff);
499 final_acc = (final_acc >> 16) + (final_acc & 0xffff);
5ba3f43e 500 return (final_acc & 0xffff);
39236c6e 501}
5ba3f43e
A
502#endif /* __LP64 */
503
504#endif /* __i386__ || __x86_64__ */