]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/cpu_in_cksum_gen.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / netinet / cpu_in_cksum_gen.c
CommitLineData
39236c6e 1/*
5ba3f43e 2 * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
39236c6e
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*-
30 * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 *
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in
41 * the documentation and/or other materials provided with the
42 * distribution.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
45 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
46 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
47 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
48 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
49 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
50 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
51 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
52 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
53 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
54 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 */
57
5ba3f43e 58#ifdef KERNEL
39236c6e 59#include <sys/param.h>
39236c6e
A
60#include <machine/endian.h>
61#include <sys/mcache.h>
62#include <sys/mbuf.h>
63#include <kern/debug.h>
39236c6e 64#include <libkern/libkern.h>
5ba3f43e
A
65#include <mach/boolean.h>
66#include <pexpert/pexpert.h>
0a7de745 67#define CKSUM_ERR(fmt, args...) kprintf(fmt, ## args)
5ba3f43e
A
68#else /* !KERNEL */
69#ifndef LIBSYSCALL_INTERFACE
70#error "LIBSYSCALL_INTERFACE not defined"
71#endif /* !LIBSYSCALL_INTERFACE */
72#include <stdlib.h>
73#include <stddef.h>
74#include <stdint.h>
75#include <unistd.h>
76#include <strings.h>
77#include <mach/boolean.h>
78#endif /* !KERNEL */
79
80/* compile time assert */
81#ifndef _CASSERT
0a7de745 82#define _CASSERT(x) _Static_assert(x, "compile-time assertion failed")
5ba3f43e
A
83#endif /* !_CASSERT */
84
85#ifndef VERIFY
0a7de745 86#define VERIFY(EX) ((void)0)
5ba3f43e
A
87#endif /* !VERIFY */
88
89#ifndef CKSUM_ERR
0a7de745 90#define CKSUM_ERR(fmt, args...) ((void)0)
5ba3f43e 91#endif /* !CKSUM_ERR */
39236c6e 92
0a7de745
A
93#define PREDICT_TRUE(x) __builtin_expect(!!((long)(x)), 1L)
94#define PREDICT_FALSE(x) __builtin_expect(!!((long)(x)), 0L)
39236c6e 95
5ba3f43e
A
96/* fake mbuf struct used only for calling os_cpu_in_cksum_mbuf() */
97struct _mbuf {
0a7de745
A
98 struct _mbuf *_m_next;
99 void *_m_pad;
100 uint8_t *_m_data;
101 int32_t _m_len;
5ba3f43e
A
102};
103
104extern uint32_t os_cpu_in_cksum(const void *, uint32_t, uint32_t);
105extern uint32_t os_cpu_in_cksum_mbuf(struct _mbuf *, int, int, uint32_t);
106
107uint32_t
108os_cpu_in_cksum(const void *data, uint32_t len, uint32_t initial_sum)
109{
110 /*
c3c9b80d
A
111 * If data is 4-bytes aligned (conditional), length is multiple
112 * of 4-bytes (required), and the amount to checksum is small,
113 * this would be quicker; this is suitable for IPv4/TCP header.
5ba3f43e 114 */
c3c9b80d
A
115 if (
116#if !defined(__arm64__) && !defined(__x86_64__)
117 IS_P2ALIGNED(data, sizeof(uint32_t)) &&
118#endif /* !__arm64__ && !__x86_64__ */
119 len <= 64 && (len & 3) == 0) {
5ba3f43e
A
120 uint8_t *p = __DECONST(uint8_t *, data);
121 uint64_t sum = initial_sum;
122
c3c9b80d
A
123 switch (len) {
124 case 20: /* simple IPv4 or TCP header */
5ba3f43e
A
125 sum += *(uint32_t *)(void *)p;
126 sum += *(uint32_t *)(void *)(p + 4);
127 sum += *(uint32_t *)(void *)(p + 8);
128 sum += *(uint32_t *)(void *)(p + 12);
129 sum += *(uint32_t *)(void *)(p + 16);
c3c9b80d
A
130 break;
131
132 case 32: /* TCP header + timestamp option */
133 sum += *(uint32_t *)(void *)p;
134 sum += *(uint32_t *)(void *)(p + 4);
135 sum += *(uint32_t *)(void *)(p + 8);
136 sum += *(uint32_t *)(void *)(p + 12);
137 sum += *(uint32_t *)(void *)(p + 16);
138 sum += *(uint32_t *)(void *)(p + 20);
139 sum += *(uint32_t *)(void *)(p + 24);
140 sum += *(uint32_t *)(void *)(p + 28);
141 break;
142
143 default:
5ba3f43e
A
144 while (len) {
145 sum += *(uint32_t *)(void *)p;
146 p += 4;
147 len -= 4;
148 }
c3c9b80d 149 break;
5ba3f43e
A
150 }
151
152 /* fold 64-bit to 16-bit (deferred carries) */
0a7de745
A
153 sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */
154 sum = (sum >> 16) + (sum & 0xffff); /* 17-bit + carry */
155 sum = (sum >> 16) + (sum & 0xffff); /* 16-bit + carry */
156 sum = (sum >> 16) + (sum & 0xffff); /* final carry */
5ba3f43e 157
0a7de745 158 return sum & 0xffff;
5ba3f43e
A
159 }
160
161 /*
162 * Otherwise, let os_cpu_in_cksum_mbuf() handle it; it only looks
163 * at 3 fields: {next,data,len}, and since it doesn't care about
164 * the authenticity of the mbuf, we use a fake one here. Make
165 * sure the offsets are as expected.
166 */
167#if defined(__LP64__)
168 _CASSERT(offsetof(struct _mbuf, _m_next) == 0);
169 _CASSERT(offsetof(struct _mbuf, _m_data) == 16);
170 _CASSERT(offsetof(struct _mbuf, _m_len) == 24);
171#else /* !__LP64__ */
172 _CASSERT(offsetof(struct _mbuf, _m_next) == 0);
173 _CASSERT(offsetof(struct _mbuf, _m_data) == 8);
174 _CASSERT(offsetof(struct _mbuf, _m_len) == 12);
175#endif /* !__LP64__ */
176#ifdef KERNEL
177 _CASSERT(offsetof(struct _mbuf, _m_next) ==
178 offsetof(struct mbuf, m_next));
179 _CASSERT(offsetof(struct _mbuf, _m_data) ==
180 offsetof(struct mbuf, m_data));
181 _CASSERT(offsetof(struct _mbuf, _m_len) ==
182 offsetof(struct mbuf, m_len));
183#endif /* KERNEL */
184 struct _mbuf m = {
185 ._m_next = NULL,
186 ._m_data = __DECONST(uint8_t *, data),
187 ._m_len = len,
188 };
189
0a7de745 190 return os_cpu_in_cksum_mbuf(&m, len, 0, initial_sum);
5ba3f43e
A
191}
192
193#if defined(__i386__) || defined(__x86_64__)
39236c6e
A
194
195/*
196 * Checksum routine for Internet Protocol family headers (Portable Version).
197 *
198 * This routine is very heavily used in the network
199 * code and should be modified for each CPU to be as fast as possible.
200 *
201 * A discussion of different implementation techniques can be found in
202 * RFC 1071.
203 *
204 * The default implementation for 32-bit architectures is using
205 * a 32-bit accumulator and operating on 16-bit operands.
206 *
207 * The default implementation for 64-bit architectures is using
208 * a 64-bit accumulator and operating on 32-bit operands.
209 *
210 * Both versions are unrolled to handle 32 Byte / 64 Byte fragments as core
211 * of the inner loop. After each iteration of the inner loop, a partial
212 * reduction is done to avoid carry in long packets.
213 */
214
5ba3f43e 215#if !defined(__LP64__)
39236c6e 216/* 32-bit version */
5ba3f43e
A
217uint32_t
218os_cpu_in_cksum_mbuf(struct _mbuf *m, int len, int off, uint32_t initial_sum)
39236c6e
A
219{
220 int mlen;
221 uint32_t sum, partial;
222 unsigned int final_acc;
223 uint8_t *data;
224 boolean_t needs_swap, started_on_odd;
225
226 VERIFY(len >= 0);
227 VERIFY(off >= 0);
228
229 needs_swap = FALSE;
230 started_on_odd = FALSE;
231 sum = (initial_sum >> 16) + (initial_sum & 0xffff);
232
233 for (;;) {
234 if (PREDICT_FALSE(m == NULL)) {
5ba3f43e 235 CKSUM_ERR("%s: out of data\n", __func__);
0a7de745 236 return (uint32_t)-1;
39236c6e 237 }
5ba3f43e 238 mlen = m->_m_len;
39236c6e
A
239 if (mlen > off) {
240 mlen -= off;
5ba3f43e 241 data = m->_m_data + off;
39236c6e
A
242 goto post_initial_offset;
243 }
244 off -= mlen;
0a7de745 245 if (len == 0) {
39236c6e 246 break;
0a7de745 247 }
5ba3f43e 248 m = m->_m_next;
39236c6e
A
249 }
250
5ba3f43e 251 for (; len > 0; m = m->_m_next) {
39236c6e 252 if (PREDICT_FALSE(m == NULL)) {
5ba3f43e 253 CKSUM_ERR("%s: out of data\n", __func__);
0a7de745 254 return (uint32_t)-1;
39236c6e 255 }
5ba3f43e
A
256 mlen = m->_m_len;
257 data = m->_m_data;
39236c6e 258post_initial_offset:
0a7de745 259 if (mlen == 0) {
39236c6e 260 continue;
0a7de745
A
261 }
262 if (mlen > len) {
39236c6e 263 mlen = len;
0a7de745 264 }
39236c6e
A
265 len -= mlen;
266
267 partial = 0;
268 if ((uintptr_t)data & 1) {
269 /* Align on word boundary */
270 started_on_odd = !started_on_odd;
271#if BYTE_ORDER == LITTLE_ENDIAN
272 partial = *data << 8;
273#else
274 partial = *data;
275#endif
276 ++data;
277 --mlen;
278 }
279 needs_swap = started_on_odd;
280 while (mlen >= 32) {
281 __builtin_prefetch(data + 32);
282 partial += *(uint16_t *)(void *)data;
283 partial += *(uint16_t *)(void *)(data + 2);
284 partial += *(uint16_t *)(void *)(data + 4);
285 partial += *(uint16_t *)(void *)(data + 6);
286 partial += *(uint16_t *)(void *)(data + 8);
287 partial += *(uint16_t *)(void *)(data + 10);
288 partial += *(uint16_t *)(void *)(data + 12);
289 partial += *(uint16_t *)(void *)(data + 14);
290 partial += *(uint16_t *)(void *)(data + 16);
291 partial += *(uint16_t *)(void *)(data + 18);
292 partial += *(uint16_t *)(void *)(data + 20);
293 partial += *(uint16_t *)(void *)(data + 22);
294 partial += *(uint16_t *)(void *)(data + 24);
295 partial += *(uint16_t *)(void *)(data + 26);
296 partial += *(uint16_t *)(void *)(data + 28);
297 partial += *(uint16_t *)(void *)(data + 30);
298 data += 32;
299 mlen -= 32;
300 if (PREDICT_FALSE(partial & 0xc0000000)) {
0a7de745 301 if (needs_swap) {
39236c6e
A
302 partial = (partial << 8) +
303 (partial >> 24);
0a7de745 304 }
39236c6e
A
305 sum += (partial >> 16);
306 sum += (partial & 0xffff);
307 partial = 0;
308 }
309 }
310 if (mlen & 16) {
311 partial += *(uint16_t *)(void *)data;
312 partial += *(uint16_t *)(void *)(data + 2);
313 partial += *(uint16_t *)(void *)(data + 4);
314 partial += *(uint16_t *)(void *)(data + 6);
315 partial += *(uint16_t *)(void *)(data + 8);
316 partial += *(uint16_t *)(void *)(data + 10);
317 partial += *(uint16_t *)(void *)(data + 12);
318 partial += *(uint16_t *)(void *)(data + 14);
319 data += 16;
320 mlen -= 16;
321 }
322 /*
323 * mlen is not updated below as the remaining tests
324 * are using bit masks, which are not affected.
325 */
326 if (mlen & 8) {
327 partial += *(uint16_t *)(void *)data;
328 partial += *(uint16_t *)(void *)(data + 2);
329 partial += *(uint16_t *)(void *)(data + 4);
330 partial += *(uint16_t *)(void *)(data + 6);
331 data += 8;
332 }
333 if (mlen & 4) {
334 partial += *(uint16_t *)(void *)data;
335 partial += *(uint16_t *)(void *)(data + 2);
336 data += 4;
337 }
338 if (mlen & 2) {
339 partial += *(uint16_t *)(void *)data;
340 data += 2;
341 }
342 if (mlen & 1) {
343#if BYTE_ORDER == LITTLE_ENDIAN
344 partial += *data;
345#else
346 partial += *data << 8;
347#endif
348 started_on_odd = !started_on_odd;
349 }
350
0a7de745 351 if (needs_swap) {
39236c6e 352 partial = (partial << 8) + (partial >> 24);
0a7de745 353 }
39236c6e
A
354 sum += (partial >> 16) + (partial & 0xffff);
355 /*
356 * Reduce sum to allow potential byte swap
357 * in the next iteration without carry.
358 */
359 sum = (sum >> 16) + (sum & 0xffff);
360 }
361 final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff);
362 final_acc = (final_acc >> 16) + (final_acc & 0xffff);
0a7de745 363 return final_acc & 0xffff;
39236c6e
A
364}
365
5ba3f43e 366#else /* __LP64__ */
39236c6e 367/* 64-bit version */
5ba3f43e
A
368uint32_t
369os_cpu_in_cksum_mbuf(struct _mbuf *m, int len, int off, uint32_t initial_sum)
39236c6e
A
370{
371 int mlen;
372 uint64_t sum, partial;
373 unsigned int final_acc;
374 uint8_t *data;
375 boolean_t needs_swap, started_on_odd;
376
377 VERIFY(len >= 0);
378 VERIFY(off >= 0);
379
380 needs_swap = FALSE;
381 started_on_odd = FALSE;
382 sum = initial_sum;
383
384 for (;;) {
385 if (PREDICT_FALSE(m == NULL)) {
5ba3f43e 386 CKSUM_ERR("%s: out of data\n", __func__);
0a7de745 387 return (uint32_t)-1;
39236c6e 388 }
5ba3f43e 389 mlen = m->_m_len;
39236c6e
A
390 if (mlen > off) {
391 mlen -= off;
5ba3f43e 392 data = m->_m_data + off;
39236c6e
A
393 goto post_initial_offset;
394 }
395 off -= mlen;
0a7de745 396 if (len == 0) {
39236c6e 397 break;
0a7de745 398 }
5ba3f43e 399 m = m->_m_next;
39236c6e
A
400 }
401
5ba3f43e 402 for (; len > 0; m = m->_m_next) {
39236c6e 403 if (PREDICT_FALSE(m == NULL)) {
5ba3f43e 404 CKSUM_ERR("%s: out of data\n", __func__);
0a7de745 405 return (uint32_t)-1;
39236c6e 406 }
5ba3f43e
A
407 mlen = m->_m_len;
408 data = m->_m_data;
39236c6e 409post_initial_offset:
0a7de745 410 if (mlen == 0) {
39236c6e 411 continue;
0a7de745
A
412 }
413 if (mlen > len) {
39236c6e 414 mlen = len;
0a7de745 415 }
39236c6e
A
416 len -= mlen;
417
418 partial = 0;
419 if ((uintptr_t)data & 1) {
420 /* Align on word boundary */
421 started_on_odd = !started_on_odd;
422#if BYTE_ORDER == LITTLE_ENDIAN
423 partial = *data << 8;
424#else
425 partial = *data;
426#endif
427 ++data;
428 --mlen;
429 }
430 needs_swap = started_on_odd;
431 if ((uintptr_t)data & 2) {
0a7de745 432 if (mlen < 2) {
39236c6e 433 goto trailing_bytes;
0a7de745 434 }
39236c6e
A
435 partial += *(uint16_t *)(void *)data;
436 data += 2;
437 mlen -= 2;
438 }
439 while (mlen >= 64) {
440 __builtin_prefetch(data + 32);
441 __builtin_prefetch(data + 64);
442 partial += *(uint32_t *)(void *)data;
443 partial += *(uint32_t *)(void *)(data + 4);
444 partial += *(uint32_t *)(void *)(data + 8);
445 partial += *(uint32_t *)(void *)(data + 12);
446 partial += *(uint32_t *)(void *)(data + 16);
447 partial += *(uint32_t *)(void *)(data + 20);
448 partial += *(uint32_t *)(void *)(data + 24);
449 partial += *(uint32_t *)(void *)(data + 28);
450 partial += *(uint32_t *)(void *)(data + 32);
451 partial += *(uint32_t *)(void *)(data + 36);
452 partial += *(uint32_t *)(void *)(data + 40);
453 partial += *(uint32_t *)(void *)(data + 44);
454 partial += *(uint32_t *)(void *)(data + 48);
455 partial += *(uint32_t *)(void *)(data + 52);
456 partial += *(uint32_t *)(void *)(data + 56);
457 partial += *(uint32_t *)(void *)(data + 60);
458 data += 64;
459 mlen -= 64;
460 if (PREDICT_FALSE(partial & (3ULL << 62))) {
0a7de745 461 if (needs_swap) {
39236c6e
A
462 partial = (partial << 8) +
463 (partial >> 56);
0a7de745 464 }
39236c6e
A
465 sum += (partial >> 32);
466 sum += (partial & 0xffffffff);
467 partial = 0;
468 }
469 }
470 /*
471 * mlen is not updated below as the remaining tests
472 * are using bit masks, which are not affected.
473 */
474 if (mlen & 32) {
475 partial += *(uint32_t *)(void *)data;
476 partial += *(uint32_t *)(void *)(data + 4);
477 partial += *(uint32_t *)(void *)(data + 8);
478 partial += *(uint32_t *)(void *)(data + 12);
479 partial += *(uint32_t *)(void *)(data + 16);
480 partial += *(uint32_t *)(void *)(data + 20);
481 partial += *(uint32_t *)(void *)(data + 24);
482 partial += *(uint32_t *)(void *)(data + 28);
483 data += 32;
484 }
485 if (mlen & 16) {
486 partial += *(uint32_t *)(void *)data;
487 partial += *(uint32_t *)(void *)(data + 4);
488 partial += *(uint32_t *)(void *)(data + 8);
489 partial += *(uint32_t *)(void *)(data + 12);
490 data += 16;
491 }
492 if (mlen & 8) {
493 partial += *(uint32_t *)(void *)data;
494 partial += *(uint32_t *)(void *)(data + 4);
495 data += 8;
496 }
497 if (mlen & 4) {
498 partial += *(uint32_t *)(void *)data;
499 data += 4;
500 }
501 if (mlen & 2) {
502 partial += *(uint16_t *)(void *)data;
503 data += 2;
504 }
505trailing_bytes:
506 if (mlen & 1) {
507#if BYTE_ORDER == LITTLE_ENDIAN
508 partial += *data;
509#else
510 partial += *data << 8;
511#endif
512 started_on_odd = !started_on_odd;
513 }
514
0a7de745 515 if (needs_swap) {
39236c6e 516 partial = (partial << 8) + (partial >> 56);
0a7de745 517 }
39236c6e
A
518 sum += (partial >> 32) + (partial & 0xffffffff);
519 /*
520 * Reduce sum to allow potential byte swap
521 * in the next iteration without carry.
522 */
523 sum = (sum >> 32) + (sum & 0xffffffff);
524 }
525 final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) +
526 ((sum >> 16) & 0xffff) + (sum & 0xffff);
527 final_acc = (final_acc >> 16) + (final_acc & 0xffff);
528 final_acc = (final_acc >> 16) + (final_acc & 0xffff);
0a7de745 529 return final_acc & 0xffff;
39236c6e 530}
5ba3f43e
A
531#endif /* __LP64 */
532
533#endif /* __i386__ || __x86_64__ */