]> git.saurik.com Git - apple/xnu.git/blob - osfmk/arm/bzero.s
xnu-4570.71.2.tar.gz
[apple/xnu.git] / osfmk / arm / bzero.s
1 /*
2 * Copyright (c) 2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <arm/proc_reg.h>
30
31 #include <arm/asm.h>
32
33 /*
34 * A reasonably well-optimized bzero/memset. Should work equally well on arm11 and arm9 based
35 * cores.
36 *
37 * The algorithm is to align the destination pointer on a 32 byte boundary and then
38 * blast data 64 bytes at a time, in two stores of 32 bytes per loop.
39 */
40 .syntax unified
41 .text
42 .align 2
43
44 /*
45 * void *secure_memset(void * addr, int pattern, size_t length)
46 *
47 * It is important that this function remains defined in assembly to avoid
48 * compiler optimizations.
49 */
50 ENTRY(secure_memset)
51 /* void *memset(void *ptr, int c, size_t len); */
52 ENTRY(memset)
53 /* move len into r1, unpack c into r2 */
54 mov r3, r2
55 and r1, r1, #0xff
56 orr r1, r1, r1, lsl #8
57 orr r2, r1, r1, lsl #16
58 mov r1, r3
59 b Lbzeroengine
60
61 /* void bzero(void *ptr, size_t len); */
62 ENTRY2(bzero,__bzero)
63 /* zero out r2 so we can be just like memset(0) */
64 mov r2, #0
65
66 Lbzeroengine:
67 /* move the base pointer into r12 and leave r0 alone so that we return the original pointer */
68 mov r12, r0
69
70 /* copy r2 into r3 for 64-bit stores */
71 mov r3, r2
72
73 /* check for zero len */
74 cmp r1, #0
75 bxeq lr
76
77 /* fall back to a bytewise store for less than 32 bytes */
78 cmp r1, #32
79 blt L_bytewise
80
81 /* check for 32 byte unaligned ptr */
82 tst r12, #0x1f
83 bne L_unaligned
84
85 /* make sure we have more than 64 bytes to zero */
86 cmp r1, #64
87 blt L_lessthan64aligned
88
89 /* >= 64 bytes of len, 32 byte aligned */
90 L_64ormorealigned:
91
92 /* we need some registers, avoid r7 (frame pointer) and r9 (thread register) */
93 stmfd sp!, { r4-r6, r8, r10-r11 }
94 mov r4, r2
95 mov r5, r2
96 mov r6, r2
97 mov r8, r2
98 mov r10, r2
99 mov r11, r2
100
101 /* pre-subtract 64 from the len to avoid an extra compare in the loop */
102 sub r1, r1, #64
103
104 L_64loop:
105 stmia r12!, { r2-r6, r8, r10-r11 }
106 subs r1, r1, #64
107 stmia r12!, { r2-r6, r8, r10-r11 }
108 bge L_64loop
109
110 /* restore the saved regs */
111 ldmfd sp!, { r4-r6, r8, r10-r11 }
112
113 /* check for completion (had previously subtracted an extra 64 from len) */
114 adds r1, r1, #64
115 bxeq lr
116
117 L_lessthan64aligned:
118 /* do we have 16 or more bytes left */
119 cmp r1, #16
120 stmiage r12!, { r2-r3 }
121 stmiage r12!, { r2-r3 }
122 subsge r1, r1, #16
123 bgt L_lessthan64aligned
124 bxeq lr
125
126 L_lessthan16aligned:
127 /* store 0 to 15 bytes */
128 mov r1, r1, lsl #28 /* move the remaining len bits [3:0] to the flags area of cpsr */
129 msr cpsr_f, r1
130
131 stmiami r12!, { r2-r3 } /* n is set, store 8 bytes */
132 streq r2, [r12], #4 /* z is set, store 4 bytes */
133 strhcs r2, [r12], #2 /* c is set, store 2 bytes */
134 strbvs r2, [r12], #1 /* v is set, store 1 byte */
135 bx lr
136
137 L_bytewise:
138 /* bytewise copy, 2 bytes at a time, alignment not guaranteed */
139 subs r1, r1, #2
140 strb r2, [r12], #1
141 strbpl r2, [r12], #1
142 bhi L_bytewise
143 bx lr
144
145 L_unaligned:
146 /* unaligned on 32 byte boundary, store 1-15 bytes until we're 16 byte aligned */
147 mov r3, r12, lsl #28
148 rsb r3, r3, #0x00000000
149 msr cpsr_f, r3
150
151 strbvs r2, [r12], #1 /* v is set, unaligned in the 1s column */
152 strhcs r2, [r12], #2 /* c is set, unaligned in the 2s column */
153 streq r2, [r12], #4 /* z is set, unaligned in the 4s column */
154 strmi r2, [r12], #4 /* n is set, unaligned in the 8s column */
155 strmi r2, [r12], #4
156
157 subs r1, r1, r3, lsr #28
158 bxeq lr
159
160 /* we had previously trashed r3, restore it */
161 mov r3, r2
162
163 /* now make sure we're 32 byte aligned */
164 tst r12, #(1 << 4)
165 stmiane r12!, { r2-r3 }
166 stmiane r12!, { r2-r3 }
167 subsne r1, r1, #16
168
169 /* we're now aligned, check for >= 64 bytes left */
170 cmp r1, #64
171 bge L_64ormorealigned
172 b L_lessthan64aligned
173