]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2012 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | /*- | |
30 | * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>. | |
31 | * All rights reserved. | |
32 | * | |
33 | * Redistribution and use in source and binary forms, with or without | |
34 | * modification, are permitted provided that the following conditions | |
35 | * are met: | |
36 | * | |
37 | * 1. Redistributions of source code must retain the above copyright | |
38 | * notice, this list of conditions and the following disclaimer. | |
39 | * 2. Redistributions in binary form must reproduce the above copyright | |
40 | * notice, this list of conditions and the following disclaimer in | |
41 | * the documentation and/or other materials provided with the | |
42 | * distribution. | |
43 | * | |
44 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
45 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
46 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | |
47 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | |
48 | * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | |
49 | * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, | |
50 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
51 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | |
52 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
53 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |
54 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
55 | * SUCH DAMAGE. | |
56 | */ | |
57 | ||
58 | #include <sys/param.h> | |
59 | #include <mach/boolean.h> | |
60 | #include <machine/endian.h> | |
61 | #include <sys/mcache.h> | |
62 | #include <sys/mbuf.h> | |
63 | #include <kern/debug.h> | |
64 | #include <netinet/in.h> | |
65 | #include <libkern/libkern.h> | |
66 | ||
67 | int cpu_in_cksum(struct mbuf *, int, int, uint32_t); | |
68 | ||
69 | #define PREDICT_FALSE(_exp) __builtin_expect((_exp), 0) | |
70 | ||
71 | /* | |
72 | * Checksum routine for Internet Protocol family headers (Portable Version). | |
73 | * | |
74 | * This routine is very heavily used in the network | |
75 | * code and should be modified for each CPU to be as fast as possible. | |
76 | * | |
77 | * A discussion of different implementation techniques can be found in | |
78 | * RFC 1071. | |
79 | * | |
80 | * The default implementation for 32-bit architectures is using | |
81 | * a 32-bit accumulator and operating on 16-bit operands. | |
82 | * | |
83 | * The default implementation for 64-bit architectures is using | |
84 | * a 64-bit accumulator and operating on 32-bit operands. | |
85 | * | |
86 | * Both versions are unrolled to handle 32 Byte / 64 Byte fragments as core | |
87 | * of the inner loop. After each iteration of the inner loop, a partial | |
88 | * reduction is done to avoid carry in long packets. | |
89 | */ | |
90 | ||
91 | #if ULONG_MAX == 0xffffffffUL | |
92 | /* 32-bit version */ | |
93 | int | |
94 | cpu_in_cksum(struct mbuf *m, int len, int off, uint32_t initial_sum) | |
95 | { | |
96 | int mlen; | |
97 | uint32_t sum, partial; | |
98 | unsigned int final_acc; | |
99 | uint8_t *data; | |
100 | boolean_t needs_swap, started_on_odd; | |
101 | ||
102 | VERIFY(len >= 0); | |
103 | VERIFY(off >= 0); | |
104 | ||
105 | needs_swap = FALSE; | |
106 | started_on_odd = FALSE; | |
107 | sum = (initial_sum >> 16) + (initial_sum & 0xffff); | |
108 | ||
109 | for (;;) { | |
110 | if (PREDICT_FALSE(m == NULL)) { | |
111 | printf("%s: out of data\n", __func__); | |
112 | return (-1); | |
113 | } | |
114 | mlen = m->m_len; | |
115 | if (mlen > off) { | |
116 | mlen -= off; | |
117 | data = mtod(m, uint8_t *) + off; | |
118 | goto post_initial_offset; | |
119 | } | |
120 | off -= mlen; | |
121 | if (len == 0) | |
122 | break; | |
123 | m = m->m_next; | |
124 | } | |
125 | ||
126 | for (; len > 0; m = m->m_next) { | |
127 | if (PREDICT_FALSE(m == NULL)) { | |
128 | printf("%s: out of data\n", __func__); | |
129 | return (-1); | |
130 | } | |
131 | mlen = m->m_len; | |
132 | data = mtod(m, uint8_t *); | |
133 | post_initial_offset: | |
134 | if (mlen == 0) | |
135 | continue; | |
136 | if (mlen > len) | |
137 | mlen = len; | |
138 | len -= mlen; | |
139 | ||
140 | partial = 0; | |
141 | if ((uintptr_t)data & 1) { | |
142 | /* Align on word boundary */ | |
143 | started_on_odd = !started_on_odd; | |
144 | #if BYTE_ORDER == LITTLE_ENDIAN | |
145 | partial = *data << 8; | |
146 | #else | |
147 | partial = *data; | |
148 | #endif | |
149 | ++data; | |
150 | --mlen; | |
151 | } | |
152 | needs_swap = started_on_odd; | |
153 | while (mlen >= 32) { | |
154 | __builtin_prefetch(data + 32); | |
155 | partial += *(uint16_t *)(void *)data; | |
156 | partial += *(uint16_t *)(void *)(data + 2); | |
157 | partial += *(uint16_t *)(void *)(data + 4); | |
158 | partial += *(uint16_t *)(void *)(data + 6); | |
159 | partial += *(uint16_t *)(void *)(data + 8); | |
160 | partial += *(uint16_t *)(void *)(data + 10); | |
161 | partial += *(uint16_t *)(void *)(data + 12); | |
162 | partial += *(uint16_t *)(void *)(data + 14); | |
163 | partial += *(uint16_t *)(void *)(data + 16); | |
164 | partial += *(uint16_t *)(void *)(data + 18); | |
165 | partial += *(uint16_t *)(void *)(data + 20); | |
166 | partial += *(uint16_t *)(void *)(data + 22); | |
167 | partial += *(uint16_t *)(void *)(data + 24); | |
168 | partial += *(uint16_t *)(void *)(data + 26); | |
169 | partial += *(uint16_t *)(void *)(data + 28); | |
170 | partial += *(uint16_t *)(void *)(data + 30); | |
171 | data += 32; | |
172 | mlen -= 32; | |
173 | if (PREDICT_FALSE(partial & 0xc0000000)) { | |
174 | if (needs_swap) | |
175 | partial = (partial << 8) + | |
176 | (partial >> 24); | |
177 | sum += (partial >> 16); | |
178 | sum += (partial & 0xffff); | |
179 | partial = 0; | |
180 | } | |
181 | } | |
182 | if (mlen & 16) { | |
183 | partial += *(uint16_t *)(void *)data; | |
184 | partial += *(uint16_t *)(void *)(data + 2); | |
185 | partial += *(uint16_t *)(void *)(data + 4); | |
186 | partial += *(uint16_t *)(void *)(data + 6); | |
187 | partial += *(uint16_t *)(void *)(data + 8); | |
188 | partial += *(uint16_t *)(void *)(data + 10); | |
189 | partial += *(uint16_t *)(void *)(data + 12); | |
190 | partial += *(uint16_t *)(void *)(data + 14); | |
191 | data += 16; | |
192 | mlen -= 16; | |
193 | } | |
194 | /* | |
195 | * mlen is not updated below as the remaining tests | |
196 | * are using bit masks, which are not affected. | |
197 | */ | |
198 | if (mlen & 8) { | |
199 | partial += *(uint16_t *)(void *)data; | |
200 | partial += *(uint16_t *)(void *)(data + 2); | |
201 | partial += *(uint16_t *)(void *)(data + 4); | |
202 | partial += *(uint16_t *)(void *)(data + 6); | |
203 | data += 8; | |
204 | } | |
205 | if (mlen & 4) { | |
206 | partial += *(uint16_t *)(void *)data; | |
207 | partial += *(uint16_t *)(void *)(data + 2); | |
208 | data += 4; | |
209 | } | |
210 | if (mlen & 2) { | |
211 | partial += *(uint16_t *)(void *)data; | |
212 | data += 2; | |
213 | } | |
214 | if (mlen & 1) { | |
215 | #if BYTE_ORDER == LITTLE_ENDIAN | |
216 | partial += *data; | |
217 | #else | |
218 | partial += *data << 8; | |
219 | #endif | |
220 | started_on_odd = !started_on_odd; | |
221 | } | |
222 | ||
223 | if (needs_swap) | |
224 | partial = (partial << 8) + (partial >> 24); | |
225 | sum += (partial >> 16) + (partial & 0xffff); | |
226 | /* | |
227 | * Reduce sum to allow potential byte swap | |
228 | * in the next iteration without carry. | |
229 | */ | |
230 | sum = (sum >> 16) + (sum & 0xffff); | |
231 | } | |
232 | final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff); | |
233 | final_acc = (final_acc >> 16) + (final_acc & 0xffff); | |
234 | return (~final_acc & 0xffff); | |
235 | } | |
236 | ||
237 | #else | |
238 | /* 64-bit version */ | |
239 | int | |
240 | cpu_in_cksum(struct mbuf *m, int len, int off, uint32_t initial_sum) | |
241 | { | |
242 | int mlen; | |
243 | uint64_t sum, partial; | |
244 | unsigned int final_acc; | |
245 | uint8_t *data; | |
246 | boolean_t needs_swap, started_on_odd; | |
247 | ||
248 | VERIFY(len >= 0); | |
249 | VERIFY(off >= 0); | |
250 | ||
251 | needs_swap = FALSE; | |
252 | started_on_odd = FALSE; | |
253 | sum = initial_sum; | |
254 | ||
255 | for (;;) { | |
256 | if (PREDICT_FALSE(m == NULL)) { | |
257 | printf("%s: out of data\n", __func__); | |
258 | return (-1); | |
259 | } | |
260 | mlen = m->m_len; | |
261 | if (mlen > off) { | |
262 | mlen -= off; | |
263 | data = mtod(m, uint8_t *) + off; | |
264 | goto post_initial_offset; | |
265 | } | |
266 | off -= mlen; | |
267 | if (len == 0) | |
268 | break; | |
269 | m = m->m_next; | |
270 | } | |
271 | ||
272 | for (; len > 0; m = m->m_next) { | |
273 | if (PREDICT_FALSE(m == NULL)) { | |
274 | printf("%s: out of data\n", __func__); | |
275 | return (-1); | |
276 | } | |
277 | mlen = m->m_len; | |
278 | data = mtod(m, uint8_t *); | |
279 | post_initial_offset: | |
280 | if (mlen == 0) | |
281 | continue; | |
282 | if (mlen > len) | |
283 | mlen = len; | |
284 | len -= mlen; | |
285 | ||
286 | partial = 0; | |
287 | if ((uintptr_t)data & 1) { | |
288 | /* Align on word boundary */ | |
289 | started_on_odd = !started_on_odd; | |
290 | #if BYTE_ORDER == LITTLE_ENDIAN | |
291 | partial = *data << 8; | |
292 | #else | |
293 | partial = *data; | |
294 | #endif | |
295 | ++data; | |
296 | --mlen; | |
297 | } | |
298 | needs_swap = started_on_odd; | |
299 | if ((uintptr_t)data & 2) { | |
300 | if (mlen < 2) | |
301 | goto trailing_bytes; | |
302 | partial += *(uint16_t *)(void *)data; | |
303 | data += 2; | |
304 | mlen -= 2; | |
305 | } | |
306 | while (mlen >= 64) { | |
307 | __builtin_prefetch(data + 32); | |
308 | __builtin_prefetch(data + 64); | |
309 | partial += *(uint32_t *)(void *)data; | |
310 | partial += *(uint32_t *)(void *)(data + 4); | |
311 | partial += *(uint32_t *)(void *)(data + 8); | |
312 | partial += *(uint32_t *)(void *)(data + 12); | |
313 | partial += *(uint32_t *)(void *)(data + 16); | |
314 | partial += *(uint32_t *)(void *)(data + 20); | |
315 | partial += *(uint32_t *)(void *)(data + 24); | |
316 | partial += *(uint32_t *)(void *)(data + 28); | |
317 | partial += *(uint32_t *)(void *)(data + 32); | |
318 | partial += *(uint32_t *)(void *)(data + 36); | |
319 | partial += *(uint32_t *)(void *)(data + 40); | |
320 | partial += *(uint32_t *)(void *)(data + 44); | |
321 | partial += *(uint32_t *)(void *)(data + 48); | |
322 | partial += *(uint32_t *)(void *)(data + 52); | |
323 | partial += *(uint32_t *)(void *)(data + 56); | |
324 | partial += *(uint32_t *)(void *)(data + 60); | |
325 | data += 64; | |
326 | mlen -= 64; | |
327 | if (PREDICT_FALSE(partial & (3ULL << 62))) { | |
328 | if (needs_swap) | |
329 | partial = (partial << 8) + | |
330 | (partial >> 56); | |
331 | sum += (partial >> 32); | |
332 | sum += (partial & 0xffffffff); | |
333 | partial = 0; | |
334 | } | |
335 | } | |
336 | /* | |
337 | * mlen is not updated below as the remaining tests | |
338 | * are using bit masks, which are not affected. | |
339 | */ | |
340 | if (mlen & 32) { | |
341 | partial += *(uint32_t *)(void *)data; | |
342 | partial += *(uint32_t *)(void *)(data + 4); | |
343 | partial += *(uint32_t *)(void *)(data + 8); | |
344 | partial += *(uint32_t *)(void *)(data + 12); | |
345 | partial += *(uint32_t *)(void *)(data + 16); | |
346 | partial += *(uint32_t *)(void *)(data + 20); | |
347 | partial += *(uint32_t *)(void *)(data + 24); | |
348 | partial += *(uint32_t *)(void *)(data + 28); | |
349 | data += 32; | |
350 | } | |
351 | if (mlen & 16) { | |
352 | partial += *(uint32_t *)(void *)data; | |
353 | partial += *(uint32_t *)(void *)(data + 4); | |
354 | partial += *(uint32_t *)(void *)(data + 8); | |
355 | partial += *(uint32_t *)(void *)(data + 12); | |
356 | data += 16; | |
357 | } | |
358 | if (mlen & 8) { | |
359 | partial += *(uint32_t *)(void *)data; | |
360 | partial += *(uint32_t *)(void *)(data + 4); | |
361 | data += 8; | |
362 | } | |
363 | if (mlen & 4) { | |
364 | partial += *(uint32_t *)(void *)data; | |
365 | data += 4; | |
366 | } | |
367 | if (mlen & 2) { | |
368 | partial += *(uint16_t *)(void *)data; | |
369 | data += 2; | |
370 | } | |
371 | trailing_bytes: | |
372 | if (mlen & 1) { | |
373 | #if BYTE_ORDER == LITTLE_ENDIAN | |
374 | partial += *data; | |
375 | #else | |
376 | partial += *data << 8; | |
377 | #endif | |
378 | started_on_odd = !started_on_odd; | |
379 | } | |
380 | ||
381 | if (needs_swap) | |
382 | partial = (partial << 8) + (partial >> 56); | |
383 | sum += (partial >> 32) + (partial & 0xffffffff); | |
384 | /* | |
385 | * Reduce sum to allow potential byte swap | |
386 | * in the next iteration without carry. | |
387 | */ | |
388 | sum = (sum >> 32) + (sum & 0xffffffff); | |
389 | } | |
390 | final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) + | |
391 | ((sum >> 16) & 0xffff) + (sum & 0xffff); | |
392 | final_acc = (final_acc >> 16) + (final_acc & 0xffff); | |
393 | final_acc = (final_acc >> 16) + (final_acc & 0xffff); | |
394 | return (~final_acc & 0xffff); | |
395 | } | |
396 | #endif /* ULONG_MAX != 0xffffffffUL */ |