2 * Copyright (c) 2006, 2009 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
25 #if !defined _ARM_ARCH_7 || defined VARIANT_DYLD
27 #include <mach/machine/asm.h>
28 #include <architecture/arm/asm_help.h>
31 * A reasonably well-optimized bzero/memset. Should work equally well on arm11 and arm9 based
34 * The algorithm is to align the destination pointer on a 32 byte boundary and then
35 * blast data 64 bytes at a time, in two stores of 32 bytes per loop.
41 /* void *memset(void *ptr, int c, size_t len); */
43 /* move len into r1, unpack c into r2 */
46 orr r1, r1, r1, lsl #8
47 orr r2, r1, r1, lsl #16
52 /* void bzero(void *ptr, size_t len); */
54 /* zero out r2 so we can be just like memset(0) */
58 /* move the base pointer into r12 and leave r0 alone so that we return the original pointer */
61 /* copy r2 into r3 for 64-bit stores */
64 /* check for zero len */
68 /* fall back to a bytewise store for less than 32 bytes */
72 /* check for 32 byte unaligned ptr */
76 /* make sure we have more than 64 bytes to zero */
78 blt L_lessthan64aligned
80 /* >= 64 bytes of len, 32 byte aligned */
83 /* we need some registers, avoid r7 (frame pointer) and r9 (thread register) */
84 stmfd sp!, { r4-r6, r8, r10-r11 }
92 /* pre-subtract 64 from the len to avoid an extra compare in the loop */
96 stmia r12!, { r2-r6, r8, r10-r11 }
98 stmia r12!, { r2-r6, r8, r10-r11 }
101 /* restore the saved regs */
102 ldmfd sp!, { r4-r6, r8, r10-r11 }
104 /* check for completion (had previously subtracted an extra 64 from len) */
109 /* do we have 16 or more bytes left */
111 stmgeia r12!, { r2-r3 }
112 stmgeia r12!, { r2-r3 }
114 bgt L_lessthan64aligned
118 /* store 0 to 15 bytes */
119 mov r1, r1, lsl #28 /* move the remaining len bits [3:0] to the flags area of cpsr */
122 stmmiia r12!, { r2-r3 } /* n is set, store 8 bytes */
123 streq r2, [r12], #4 /* z is set, store 4 bytes */
124 strcsh r2, [r12], #2 /* c is set, store 2 bytes */
125 strvsb r2, [r12], #1 /* v is set, store 1 byte */
129 /* bytewise copy, 2 bytes at a time, alignment not guaranteed */
137 /* unaligned on 32 byte boundary, store 1-15 bytes until we're 16 byte aligned */
139 rsb r3, r3, #0x00000000
142 strvsb r2, [r12], #1 /* v is set, unaligned in the 1s column */
143 strcsh r2, [r12], #2 /* c is set, unaligned in the 2s column */
144 streq r2, [r12], #4 /* z is set, unaligned in the 4s column */
145 strmi r2, [r12], #4 /* n is set, unaligned in the 8s column */
148 subs r1, r1, r3, lsr #28
151 /* we had previously trashed r3, restore it */
154 /* now make sure we're 32 byte aligned */
156 stmneia r12!, { r2-r3 }
157 stmneia r12!, { r2-r3 }
160 /* we're now aligned, check for >= 64 bytes left */
162 bge L_64ormorealigned
163 b L_lessthan64aligned
165 X_LEAF(___bzero, _bzero)
167 #endif // !defined _ARM_ARCH_7 || defined VARIANT_DYLD