]>
git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/cpu_in_cksum_gen.c
2 * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>.
31 * All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in
41 * the documentation and/or other materials provided with the
44 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
45 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
46 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
47 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
48 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
49 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
50 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
51 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
52 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
53 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
54 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 #include <sys/param.h>
60 #include <machine/endian.h>
61 #include <sys/mcache.h>
63 #include <kern/debug.h>
64 #include <libkern/libkern.h>
65 #include <mach/boolean.h>
66 #include <pexpert/pexpert.h>
67 #define CKSUM_ERR(fmt, args...) kprintf(fmt, ## args)
69 #ifndef LIBSYSCALL_INTERFACE
70 #error "LIBSYSCALL_INTERFACE not defined"
71 #endif /* !LIBSYSCALL_INTERFACE */
77 #include <mach/boolean.h>
80 /* compile time assert */
82 #define _CASSERT(x) _Static_assert(x, "compile-time assertion failed")
83 #endif /* !_CASSERT */
86 #define VERIFY(EX) ((void)0)
90 #define CKSUM_ERR(fmt, args...) ((void)0)
91 #endif /* !CKSUM_ERR */
93 #define PREDICT_TRUE(x) __builtin_expect(!!((long)(x)), 1L)
94 #define PREDICT_FALSE(x) __builtin_expect(!!((long)(x)), 0L)
96 /* fake mbuf struct used only for calling os_cpu_in_cksum_mbuf() */
98 struct _mbuf
*_m_next
;
104 extern uint32_t os_cpu_in_cksum(const void *, uint32_t, uint32_t);
105 extern uint32_t os_cpu_in_cksum_mbuf(struct _mbuf
*, int, int, uint32_t);
108 os_cpu_in_cksum(const void *data
, uint32_t len
, uint32_t initial_sum
)
111 * If data is 4-bytes aligned (conditional), length is multiple
112 * of 4-bytes (required), and the amount to checksum is small,
113 * this would be quicker; this is suitable for IPv4/TCP header.
116 #if !defined(__arm64__) && !defined(__x86_64__)
117 IS_P2ALIGNED(data
, sizeof(uint32_t)) &&
118 #endif /* !__arm64__ && !__x86_64__ */
119 len
<= 64 && (len
& 3) == 0) {
120 uint8_t *p
= __DECONST(uint8_t *, data
);
121 uint64_t sum
= initial_sum
;
124 case 20: /* simple IPv4 or TCP header */
125 sum
+= *(uint32_t *)(void *)p
;
126 sum
+= *(uint32_t *)(void *)(p
+ 4);
127 sum
+= *(uint32_t *)(void *)(p
+ 8);
128 sum
+= *(uint32_t *)(void *)(p
+ 12);
129 sum
+= *(uint32_t *)(void *)(p
+ 16);
132 case 32: /* TCP header + timestamp option */
133 sum
+= *(uint32_t *)(void *)p
;
134 sum
+= *(uint32_t *)(void *)(p
+ 4);
135 sum
+= *(uint32_t *)(void *)(p
+ 8);
136 sum
+= *(uint32_t *)(void *)(p
+ 12);
137 sum
+= *(uint32_t *)(void *)(p
+ 16);
138 sum
+= *(uint32_t *)(void *)(p
+ 20);
139 sum
+= *(uint32_t *)(void *)(p
+ 24);
140 sum
+= *(uint32_t *)(void *)(p
+ 28);
145 sum
+= *(uint32_t *)(void *)p
;
152 /* fold 64-bit to 16-bit (deferred carries) */
153 sum
= (sum
>> 32) + (sum
& 0xffffffff); /* 33-bit */
154 sum
= (sum
>> 16) + (sum
& 0xffff); /* 17-bit + carry */
155 sum
= (sum
>> 16) + (sum
& 0xffff); /* 16-bit + carry */
156 sum
= (sum
>> 16) + (sum
& 0xffff); /* final carry */
162 * Otherwise, let os_cpu_in_cksum_mbuf() handle it; it only looks
163 * at 3 fields: {next,data,len}, and since it doesn't care about
164 * the authenticity of the mbuf, we use a fake one here. Make
165 * sure the offsets are as expected.
167 #if defined(__LP64__)
168 _CASSERT(offsetof(struct _mbuf
, _m_next
) == 0);
169 _CASSERT(offsetof(struct _mbuf
, _m_data
) == 16);
170 _CASSERT(offsetof(struct _mbuf
, _m_len
) == 24);
171 #else /* !__LP64__ */
172 _CASSERT(offsetof(struct _mbuf
, _m_next
) == 0);
173 _CASSERT(offsetof(struct _mbuf
, _m_data
) == 8);
174 _CASSERT(offsetof(struct _mbuf
, _m_len
) == 12);
175 #endif /* !__LP64__ */
177 _CASSERT(offsetof(struct _mbuf
, _m_next
) ==
178 offsetof(struct mbuf
, m_next
));
179 _CASSERT(offsetof(struct _mbuf
, _m_data
) ==
180 offsetof(struct mbuf
, m_data
));
181 _CASSERT(offsetof(struct _mbuf
, _m_len
) ==
182 offsetof(struct mbuf
, m_len
));
186 ._m_data
= __DECONST(uint8_t *, data
),
190 return os_cpu_in_cksum_mbuf(&m
, len
, 0, initial_sum
);
193 #if defined(__i386__) || defined(__x86_64__)
196 * Checksum routine for Internet Protocol family headers (Portable Version).
198 * This routine is very heavily used in the network
199 * code and should be modified for each CPU to be as fast as possible.
201 * A discussion of different implementation techniques can be found in
204 * The default implementation for 32-bit architectures is using
205 * a 32-bit accumulator and operating on 16-bit operands.
207 * The default implementation for 64-bit architectures is using
208 * a 64-bit accumulator and operating on 32-bit operands.
210 * Both versions are unrolled to handle 32 Byte / 64 Byte fragments as core
211 * of the inner loop. After each iteration of the inner loop, a partial
212 * reduction is done to avoid carry in long packets.
215 #if !defined(__LP64__)
218 os_cpu_in_cksum_mbuf(struct _mbuf
*m
, int len
, int off
, uint32_t initial_sum
)
221 uint32_t sum
, partial
;
222 unsigned int final_acc
;
224 boolean_t needs_swap
, started_on_odd
;
230 started_on_odd
= FALSE
;
231 sum
= (initial_sum
>> 16) + (initial_sum
& 0xffff);
234 if (PREDICT_FALSE(m
== NULL
)) {
235 CKSUM_ERR("%s: out of data\n", __func__
);
241 data
= m
->_m_data
+ off
;
242 goto post_initial_offset
;
251 for (; len
> 0; m
= m
->_m_next
) {
252 if (PREDICT_FALSE(m
== NULL
)) {
253 CKSUM_ERR("%s: out of data\n", __func__
);
268 if ((uintptr_t)data
& 1) {
269 /* Align on word boundary */
270 started_on_odd
= !started_on_odd
;
271 #if BYTE_ORDER == LITTLE_ENDIAN
272 partial
= *data
<< 8;
279 needs_swap
= started_on_odd
;
281 __builtin_prefetch(data
+ 32);
282 partial
+= *(uint16_t *)(void *)data
;
283 partial
+= *(uint16_t *)(void *)(data
+ 2);
284 partial
+= *(uint16_t *)(void *)(data
+ 4);
285 partial
+= *(uint16_t *)(void *)(data
+ 6);
286 partial
+= *(uint16_t *)(void *)(data
+ 8);
287 partial
+= *(uint16_t *)(void *)(data
+ 10);
288 partial
+= *(uint16_t *)(void *)(data
+ 12);
289 partial
+= *(uint16_t *)(void *)(data
+ 14);
290 partial
+= *(uint16_t *)(void *)(data
+ 16);
291 partial
+= *(uint16_t *)(void *)(data
+ 18);
292 partial
+= *(uint16_t *)(void *)(data
+ 20);
293 partial
+= *(uint16_t *)(void *)(data
+ 22);
294 partial
+= *(uint16_t *)(void *)(data
+ 24);
295 partial
+= *(uint16_t *)(void *)(data
+ 26);
296 partial
+= *(uint16_t *)(void *)(data
+ 28);
297 partial
+= *(uint16_t *)(void *)(data
+ 30);
300 if (PREDICT_FALSE(partial
& 0xc0000000)) {
302 partial
= (partial
<< 8) +
305 sum
+= (partial
>> 16);
306 sum
+= (partial
& 0xffff);
311 partial
+= *(uint16_t *)(void *)data
;
312 partial
+= *(uint16_t *)(void *)(data
+ 2);
313 partial
+= *(uint16_t *)(void *)(data
+ 4);
314 partial
+= *(uint16_t *)(void *)(data
+ 6);
315 partial
+= *(uint16_t *)(void *)(data
+ 8);
316 partial
+= *(uint16_t *)(void *)(data
+ 10);
317 partial
+= *(uint16_t *)(void *)(data
+ 12);
318 partial
+= *(uint16_t *)(void *)(data
+ 14);
323 * mlen is not updated below as the remaining tests
324 * are using bit masks, which are not affected.
327 partial
+= *(uint16_t *)(void *)data
;
328 partial
+= *(uint16_t *)(void *)(data
+ 2);
329 partial
+= *(uint16_t *)(void *)(data
+ 4);
330 partial
+= *(uint16_t *)(void *)(data
+ 6);
334 partial
+= *(uint16_t *)(void *)data
;
335 partial
+= *(uint16_t *)(void *)(data
+ 2);
339 partial
+= *(uint16_t *)(void *)data
;
343 #if BYTE_ORDER == LITTLE_ENDIAN
346 partial
+= *data
<< 8;
348 started_on_odd
= !started_on_odd
;
352 partial
= (partial
<< 8) + (partial
>> 24);
354 sum
+= (partial
>> 16) + (partial
& 0xffff);
356 * Reduce sum to allow potential byte swap
357 * in the next iteration without carry.
359 sum
= (sum
>> 16) + (sum
& 0xffff);
361 final_acc
= ((sum
>> 16) & 0xffff) + (sum
& 0xffff);
362 final_acc
= (final_acc
>> 16) + (final_acc
& 0xffff);
363 return final_acc
& 0xffff;
369 os_cpu_in_cksum_mbuf(struct _mbuf
*m
, int len
, int off
, uint32_t initial_sum
)
372 uint64_t sum
, partial
;
373 unsigned int final_acc
;
375 boolean_t needs_swap
, started_on_odd
;
381 started_on_odd
= FALSE
;
385 if (PREDICT_FALSE(m
== NULL
)) {
386 CKSUM_ERR("%s: out of data\n", __func__
);
392 data
= m
->_m_data
+ off
;
393 goto post_initial_offset
;
402 for (; len
> 0; m
= m
->_m_next
) {
403 if (PREDICT_FALSE(m
== NULL
)) {
404 CKSUM_ERR("%s: out of data\n", __func__
);
419 if ((uintptr_t)data
& 1) {
420 /* Align on word boundary */
421 started_on_odd
= !started_on_odd
;
422 #if BYTE_ORDER == LITTLE_ENDIAN
423 partial
= *data
<< 8;
430 needs_swap
= started_on_odd
;
431 if ((uintptr_t)data
& 2) {
435 partial
+= *(uint16_t *)(void *)data
;
440 __builtin_prefetch(data
+ 32);
441 __builtin_prefetch(data
+ 64);
442 partial
+= *(uint32_t *)(void *)data
;
443 partial
+= *(uint32_t *)(void *)(data
+ 4);
444 partial
+= *(uint32_t *)(void *)(data
+ 8);
445 partial
+= *(uint32_t *)(void *)(data
+ 12);
446 partial
+= *(uint32_t *)(void *)(data
+ 16);
447 partial
+= *(uint32_t *)(void *)(data
+ 20);
448 partial
+= *(uint32_t *)(void *)(data
+ 24);
449 partial
+= *(uint32_t *)(void *)(data
+ 28);
450 partial
+= *(uint32_t *)(void *)(data
+ 32);
451 partial
+= *(uint32_t *)(void *)(data
+ 36);
452 partial
+= *(uint32_t *)(void *)(data
+ 40);
453 partial
+= *(uint32_t *)(void *)(data
+ 44);
454 partial
+= *(uint32_t *)(void *)(data
+ 48);
455 partial
+= *(uint32_t *)(void *)(data
+ 52);
456 partial
+= *(uint32_t *)(void *)(data
+ 56);
457 partial
+= *(uint32_t *)(void *)(data
+ 60);
460 if (PREDICT_FALSE(partial
& (3ULL << 62))) {
462 partial
= (partial
<< 8) +
465 sum
+= (partial
>> 32);
466 sum
+= (partial
& 0xffffffff);
471 * mlen is not updated below as the remaining tests
472 * are using bit masks, which are not affected.
475 partial
+= *(uint32_t *)(void *)data
;
476 partial
+= *(uint32_t *)(void *)(data
+ 4);
477 partial
+= *(uint32_t *)(void *)(data
+ 8);
478 partial
+= *(uint32_t *)(void *)(data
+ 12);
479 partial
+= *(uint32_t *)(void *)(data
+ 16);
480 partial
+= *(uint32_t *)(void *)(data
+ 20);
481 partial
+= *(uint32_t *)(void *)(data
+ 24);
482 partial
+= *(uint32_t *)(void *)(data
+ 28);
486 partial
+= *(uint32_t *)(void *)data
;
487 partial
+= *(uint32_t *)(void *)(data
+ 4);
488 partial
+= *(uint32_t *)(void *)(data
+ 8);
489 partial
+= *(uint32_t *)(void *)(data
+ 12);
493 partial
+= *(uint32_t *)(void *)data
;
494 partial
+= *(uint32_t *)(void *)(data
+ 4);
498 partial
+= *(uint32_t *)(void *)data
;
502 partial
+= *(uint16_t *)(void *)data
;
507 #if BYTE_ORDER == LITTLE_ENDIAN
510 partial
+= *data
<< 8;
512 started_on_odd
= !started_on_odd
;
516 partial
= (partial
<< 8) + (partial
>> 56);
518 sum
+= (partial
>> 32) + (partial
& 0xffffffff);
520 * Reduce sum to allow potential byte swap
521 * in the next iteration without carry.
523 sum
= (sum
>> 32) + (sum
& 0xffffffff);
525 final_acc
= (sum
>> 48) + ((sum
>> 32) & 0xffff) +
526 ((sum
>> 16) & 0xffff) + (sum
& 0xffff);
527 final_acc
= (final_acc
>> 16) + (final_acc
& 0xffff);
528 final_acc
= (final_acc
>> 16) + (final_acc
& 0xffff);
529 return final_acc
& 0xffff;
533 #endif /* __i386__ || __x86_64__ */