2 * Copyright (c) 2009-2018 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 /* $NetBSD: cpu_in_cksum.S,v 1.2 2008/01/27 16:58:05 chris Exp $ */
32 * Copyright 2003 Wasabi Systems, Inc.
33 * All rights reserved.
35 * Written by Steve C. Woodford for Wasabi Systems, Inc.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed for the NetBSD Project by
48 * Wasabi Systems, Inc.
49 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
50 * or promote products derived from this software without specific prior
53 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63 * POSSIBILITY OF SUCH DAMAGE.
67 #include "../../../osfmk/arm/arch.h"
68 #include "../../../osfmk/arm/proc_reg.h"
71 #error "Unsupported: __ARM_VFP__ < 3"
72 #endif /* __ARM_VFP__ < 3 */
73 #define CKSUM_ERR _kprintf
75 #ifndef LIBSYSCALL_INTERFACE
76 #error "LIBSYSCALL_INTERFACE not defined"
77 #endif /* !LIBSYSCALL_INTERFACE */
78 #define CKSUM_ERR _fprintf_stderr
83 * The following default the implementation to little-endian architectures.
85 #define LITTLE_ENDIAN 1
86 #define BYTE_ORDER LITTLE_ENDIAN
93 * Ugly, but we have little choice, since relying on genassym and <assym.s>
94 * is not possible unless this code lives in osfmk. Note also that this
95 * routine expects "mbuf-like" argument, and it does not expect the mbuf to be
96 * authentic; it only cares about 3 fields.
105 * The use of R7 in this code as data register prevents
106 * the use of debugging or instrumentation tools, which is an acceptable
107 * tradeoff considering the potential gain in performance.
111 * Hand-optimised implementations for ARM/Xscale
116 push {r0, r1, r2, r12}
117 bl _enable_kernel_vfp_context
118 pop {r0, r1, r2, r12}
124 * uint32_t os_cpu_in_cksum_mbuf(struct mbuf *m, int len, int off,
125 * uint32_t initial_sum);
133 * Function wide register usage
135 * r9 remaining length to parse
136 * ip pointer to next mbuf
138 * This function returns the partial 16-bit checksum accumulated in
139 * a 32-bit variable (withouth 1's complement); caller is responsible
140 * for folding the 32-bit sum into 16-bit and performinng the 1's
141 * complement if applicable
143 .globl _os_cpu_in_cksum_mbuf
146 _os_cpu_in_cksum_mbuf:
147 stmfd sp!, {r4-r11,lr}
149 mov r8, r3 /* Accumulate sum in r8 */
150 mov r9, r1 /* save len in r9 */
151 mov ip, r0 /* set ip to the current mbuf */
153 cmp r9, #0 /* length is 0? */
154 bne .Lin_cksum_skip_loop /* if not, proceed further */
155 mov r0, r8 /* otherwise, return initial sum */
157 ldmfd sp!, {r4-r11, pc}
159 .Lin_cksum_skip_loop:
160 ldr r1, [ip, #(M_LEN)]
161 ldr r0, [ip, #(M_DATA)]
162 ldr ip, [ip, #(M_NEXT)]
163 .Lin_cksum_skip_entry:
164 subs r2, r2, r1 /* offset = offset - mbuf length */
165 blt .Lin_cksum_skip_done /* if offset has gone negative start with this mbuf */
167 bne .Lin_cksum_skip_loop
170 .Lin_cksum_skip_done:
171 add r0, r2, r0 /* data += offset (offset is < 0) */
172 add r0, r0, r1 /* data += length of mbuf */
173 /* data == start of data to cksum */
174 rsb r1, r2, #0x00 /* length = remainder of mbuf to read */
179 ldr r1, [ip, #(M_LEN)]
180 ldr r0, [ip, #(M_DATA)]
181 ldr ip, [ip, #(M_NEXT)]
195 * Replace the 'blne _ASM_LABEL(L_cksumdata)' by bringing the called function
196 * inline. This results in slightly faster code, and also permits the whole
197 * function to be included in kernel profiling data.
201 * The main in*_cksum() workhorse...
204 * r0 Pointer to buffer
209 * r2 Accumulated 32-bit sum
216 /* We first have to word-align the buffer. */
218 beq .Lcksumdata_wordaligned
220 cmp r1, r7 /* Enough bytes left to make it? */
221 blt .Lcksumdata_endgame
223 ldrb r4, [r0], #0x01 /* Fetch 1st byte */
224 ldrbge r5, [r0], #0x01 /* Fetch 2nd byte */
226 ldrbgt r6, [r0], #0x01 /* Fetch 3rd byte */
228 /* Combine the three bytes depending on endianness and alignment */
229 #if BYTE_ORDER != LITTLE_ENDIAN
230 orreq r2, r5, r4, lsl #8
231 orreq r2, r2, r6, lsl #24
232 orrne r2, r4, r5, lsl #8
233 orrne r2, r2, r6, lsl #16
235 orreq r2, r4, r5, lsl #8
236 orreq r2, r2, r6, lsl #16
237 orrne r2, r5, r4, lsl #8
238 orrne r2, r2, r6, lsl #24
240 subs r1, r1, r7 /* Update length */
241 beq .Lin_cksum_next /* All done? */
243 /* Buffer is now word aligned */
244 .Lcksumdata_wordaligned:
248 cmp r1, #512 // do this if r1 is at least 512
257 // move r2 to s16 (q4) for neon computation
259 vld1.32 {q0-q1}, [r0]!
261 vld1.32 {q2-q3}, [r0]!
263 // pre-decrement size by 64
276 subs r3, r3, #0x40 // decrement size by 64
307 ands r1, r1, #0x3f // residual bytes
312 #endif /* __ARM_VFP__ >= 3 */
315 blt .Lcksumdata_bigloop_end
318 ldmia r0!, {r3, r4, r5, r6}
322 ldmia r0!, {r3, r4, r5, r7}
327 ldmia r0!, {r3, r4, r5, r6}
332 ldmia r0!, {r3, r4, r5, r7}
340 bge .Lcksumdata_bigloop
341 .Lcksumdata_bigloop_end:
348 blt .Lcksumdata_less_than_32
349 ldmia r0!, {r3, r4, r5, r6}
353 ldmia r0!, {r3, r4, r5, r7}
363 .Lcksumdata_less_than_32:
364 /* There are less than 32 bytes left */
368 adds r4, r4, r4, lsr #1 /* Side effect: Clear carry flag */
372 * Note: We use ldm here, even on Xscale, since the combined issue/result
373 * latencies for ldm and ldrd are the same. Using ldm avoids needless #ifdefs.
375 /* At least 24 bytes remaining... */
381 /* At least 16 bytes remaining... */
386 /* At least 8 bytes remaining... */
391 /* Less than 8 bytes remaining... */
394 blt .Lcksumdata_lessthan4
401 /* Deal with < 4 bytes remaining */
402 .Lcksumdata_lessthan4:
406 /* Deal with 1 to 3 remaining bytes, possibly misaligned */
408 ldrb r3, [r0] /* Fetch first byte */
410 ldrbge r4, [r0, #0x01] /* Fetch 2nd and 3rd as necessary */
412 ldrbgt r5, [r0, #0x02]
414 /* Combine the three bytes depending on endianness and alignment */
416 #if BYTE_ORDER != LITTLE_ENDIAN
417 orreq r3, r4, r3, lsl #8
418 orreq r3, r3, r5, lsl #24
419 orrne r3, r3, r4, lsl #8
420 orrne r3, r3, r5, lsl #16
422 orreq r3, r3, r4, lsl #8
423 orreq r3, r3, r5, lsl #16
424 orrne r3, r4, r3, lsl #8
425 orrne r3, r3, r5, lsl #24
441 add r0, r0, r8, lsr #16
442 add r0, r0, r0, lsr #16
445 * If we were to 1's complement it (XOR with 0xffff):
450 ldmfd sp!, {r4-r11, pc}
453 adr r0, .Lin_cksum_whoops_str
457 ldmfd sp!, {r4-r11, pc}
459 .Lin_cksum_whoops_str:
460 .asciz "os_cpu_in_cksum_mbuf: out of data\n"