]>
git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/cpu_in_cksum.c
2 * Copyright (c) 2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>.
31 * All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in
41 * the documentation and/or other materials provided with the
44 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
45 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
46 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
47 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
48 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
49 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
50 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
51 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
52 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
53 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
54 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 #include <sys/param.h>
59 #include <mach/boolean.h>
60 #include <machine/endian.h>
61 #include <sys/mcache.h>
63 #include <kern/debug.h>
64 #include <netinet/in.h>
65 #include <libkern/libkern.h>
67 int cpu_in_cksum(struct mbuf
*, int, int, uint32_t);
69 #define PREDICT_FALSE(_exp) __builtin_expect((_exp), 0)
72 * Checksum routine for Internet Protocol family headers (Portable Version).
74 * This routine is very heavily used in the network
75 * code and should be modified for each CPU to be as fast as possible.
77 * A discussion of different implementation techniques can be found in
80 * The default implementation for 32-bit architectures is using
81 * a 32-bit accumulator and operating on 16-bit operands.
83 * The default implementation for 64-bit architectures is using
84 * a 64-bit accumulator and operating on 32-bit operands.
86 * Both versions are unrolled to handle 32 Byte / 64 Byte fragments as core
87 * of the inner loop. After each iteration of the inner loop, a partial
88 * reduction is done to avoid carry in long packets.
91 #if ULONG_MAX == 0xffffffffUL
94 cpu_in_cksum(struct mbuf
*m
, int len
, int off
, uint32_t initial_sum
)
97 uint32_t sum
, partial
;
98 unsigned int final_acc
;
100 boolean_t needs_swap
, started_on_odd
;
106 started_on_odd
= FALSE
;
107 sum
= (initial_sum
>> 16) + (initial_sum
& 0xffff);
110 if (PREDICT_FALSE(m
== NULL
)) {
111 printf("%s: out of data\n", __func__
);
117 data
= mtod(m
, uint8_t *) + off
;
118 goto post_initial_offset
;
126 for (; len
> 0; m
= m
->m_next
) {
127 if (PREDICT_FALSE(m
== NULL
)) {
128 printf("%s: out of data\n", __func__
);
132 data
= mtod(m
, uint8_t *);
141 if ((uintptr_t)data
& 1) {
142 /* Align on word boundary */
143 started_on_odd
= !started_on_odd
;
144 #if BYTE_ORDER == LITTLE_ENDIAN
145 partial
= *data
<< 8;
152 needs_swap
= started_on_odd
;
154 __builtin_prefetch(data
+ 32);
155 partial
+= *(uint16_t *)(void *)data
;
156 partial
+= *(uint16_t *)(void *)(data
+ 2);
157 partial
+= *(uint16_t *)(void *)(data
+ 4);
158 partial
+= *(uint16_t *)(void *)(data
+ 6);
159 partial
+= *(uint16_t *)(void *)(data
+ 8);
160 partial
+= *(uint16_t *)(void *)(data
+ 10);
161 partial
+= *(uint16_t *)(void *)(data
+ 12);
162 partial
+= *(uint16_t *)(void *)(data
+ 14);
163 partial
+= *(uint16_t *)(void *)(data
+ 16);
164 partial
+= *(uint16_t *)(void *)(data
+ 18);
165 partial
+= *(uint16_t *)(void *)(data
+ 20);
166 partial
+= *(uint16_t *)(void *)(data
+ 22);
167 partial
+= *(uint16_t *)(void *)(data
+ 24);
168 partial
+= *(uint16_t *)(void *)(data
+ 26);
169 partial
+= *(uint16_t *)(void *)(data
+ 28);
170 partial
+= *(uint16_t *)(void *)(data
+ 30);
173 if (PREDICT_FALSE(partial
& 0xc0000000)) {
175 partial
= (partial
<< 8) +
177 sum
+= (partial
>> 16);
178 sum
+= (partial
& 0xffff);
183 partial
+= *(uint16_t *)(void *)data
;
184 partial
+= *(uint16_t *)(void *)(data
+ 2);
185 partial
+= *(uint16_t *)(void *)(data
+ 4);
186 partial
+= *(uint16_t *)(void *)(data
+ 6);
187 partial
+= *(uint16_t *)(void *)(data
+ 8);
188 partial
+= *(uint16_t *)(void *)(data
+ 10);
189 partial
+= *(uint16_t *)(void *)(data
+ 12);
190 partial
+= *(uint16_t *)(void *)(data
+ 14);
195 * mlen is not updated below as the remaining tests
196 * are using bit masks, which are not affected.
199 partial
+= *(uint16_t *)(void *)data
;
200 partial
+= *(uint16_t *)(void *)(data
+ 2);
201 partial
+= *(uint16_t *)(void *)(data
+ 4);
202 partial
+= *(uint16_t *)(void *)(data
+ 6);
206 partial
+= *(uint16_t *)(void *)data
;
207 partial
+= *(uint16_t *)(void *)(data
+ 2);
211 partial
+= *(uint16_t *)(void *)data
;
215 #if BYTE_ORDER == LITTLE_ENDIAN
218 partial
+= *data
<< 8;
220 started_on_odd
= !started_on_odd
;
224 partial
= (partial
<< 8) + (partial
>> 24);
225 sum
+= (partial
>> 16) + (partial
& 0xffff);
227 * Reduce sum to allow potential byte swap
228 * in the next iteration without carry.
230 sum
= (sum
>> 16) + (sum
& 0xffff);
232 final_acc
= ((sum
>> 16) & 0xffff) + (sum
& 0xffff);
233 final_acc
= (final_acc
>> 16) + (final_acc
& 0xffff);
234 return (~final_acc
& 0xffff);
240 cpu_in_cksum(struct mbuf
*m
, int len
, int off
, uint32_t initial_sum
)
243 uint64_t sum
, partial
;
244 unsigned int final_acc
;
246 boolean_t needs_swap
, started_on_odd
;
252 started_on_odd
= FALSE
;
256 if (PREDICT_FALSE(m
== NULL
)) {
257 printf("%s: out of data\n", __func__
);
263 data
= mtod(m
, uint8_t *) + off
;
264 goto post_initial_offset
;
272 for (; len
> 0; m
= m
->m_next
) {
273 if (PREDICT_FALSE(m
== NULL
)) {
274 printf("%s: out of data\n", __func__
);
278 data
= mtod(m
, uint8_t *);
287 if ((uintptr_t)data
& 1) {
288 /* Align on word boundary */
289 started_on_odd
= !started_on_odd
;
290 #if BYTE_ORDER == LITTLE_ENDIAN
291 partial
= *data
<< 8;
298 needs_swap
= started_on_odd
;
299 if ((uintptr_t)data
& 2) {
302 partial
+= *(uint16_t *)(void *)data
;
307 __builtin_prefetch(data
+ 32);
308 __builtin_prefetch(data
+ 64);
309 partial
+= *(uint32_t *)(void *)data
;
310 partial
+= *(uint32_t *)(void *)(data
+ 4);
311 partial
+= *(uint32_t *)(void *)(data
+ 8);
312 partial
+= *(uint32_t *)(void *)(data
+ 12);
313 partial
+= *(uint32_t *)(void *)(data
+ 16);
314 partial
+= *(uint32_t *)(void *)(data
+ 20);
315 partial
+= *(uint32_t *)(void *)(data
+ 24);
316 partial
+= *(uint32_t *)(void *)(data
+ 28);
317 partial
+= *(uint32_t *)(void *)(data
+ 32);
318 partial
+= *(uint32_t *)(void *)(data
+ 36);
319 partial
+= *(uint32_t *)(void *)(data
+ 40);
320 partial
+= *(uint32_t *)(void *)(data
+ 44);
321 partial
+= *(uint32_t *)(void *)(data
+ 48);
322 partial
+= *(uint32_t *)(void *)(data
+ 52);
323 partial
+= *(uint32_t *)(void *)(data
+ 56);
324 partial
+= *(uint32_t *)(void *)(data
+ 60);
327 if (PREDICT_FALSE(partial
& (3ULL << 62))) {
329 partial
= (partial
<< 8) +
331 sum
+= (partial
>> 32);
332 sum
+= (partial
& 0xffffffff);
337 * mlen is not updated below as the remaining tests
338 * are using bit masks, which are not affected.
341 partial
+= *(uint32_t *)(void *)data
;
342 partial
+= *(uint32_t *)(void *)(data
+ 4);
343 partial
+= *(uint32_t *)(void *)(data
+ 8);
344 partial
+= *(uint32_t *)(void *)(data
+ 12);
345 partial
+= *(uint32_t *)(void *)(data
+ 16);
346 partial
+= *(uint32_t *)(void *)(data
+ 20);
347 partial
+= *(uint32_t *)(void *)(data
+ 24);
348 partial
+= *(uint32_t *)(void *)(data
+ 28);
352 partial
+= *(uint32_t *)(void *)data
;
353 partial
+= *(uint32_t *)(void *)(data
+ 4);
354 partial
+= *(uint32_t *)(void *)(data
+ 8);
355 partial
+= *(uint32_t *)(void *)(data
+ 12);
359 partial
+= *(uint32_t *)(void *)data
;
360 partial
+= *(uint32_t *)(void *)(data
+ 4);
364 partial
+= *(uint32_t *)(void *)data
;
368 partial
+= *(uint16_t *)(void *)data
;
373 #if BYTE_ORDER == LITTLE_ENDIAN
376 partial
+= *data
<< 8;
378 started_on_odd
= !started_on_odd
;
382 partial
= (partial
<< 8) + (partial
>> 56);
383 sum
+= (partial
>> 32) + (partial
& 0xffffffff);
385 * Reduce sum to allow potential byte swap
386 * in the next iteration without carry.
388 sum
= (sum
>> 32) + (sum
& 0xffffffff);
390 final_acc
= (sum
>> 48) + ((sum
>> 32) & 0xffff) +
391 ((sum
>> 16) & 0xffff) + (sum
& 0xffff);
392 final_acc
= (final_acc
>> 16) + (final_acc
& 0xffff);
393 final_acc
= (final_acc
>> 16) + (final_acc
& 0xffff);
394 return (~final_acc
& 0xffff);
396 #endif /* ULONG_MAX != 0xffffffffUL */