]> git.saurik.com Git - apple/libc.git/blame - arm/string/bzero.s
Libc-594.9.5.tar.gz
[apple/libc.git] / arm / string / bzero.s
CommitLineData
b5d655f7 1/*
51282358 2 * Copyright (c) 2006, 2009 Apple Inc. All rights reserved.
b5d655f7
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
51282358
A
23
24#if defined __thumb2__ && defined __ARM_NEON__
25
26// Use our tuned NEON implementation when it is available. Otherwise fall back
27// on more generic ARM code.
28
29#include "NEON/bzero.s"
30
31#else // defined __thumb2__ && defined __ARM_NEON__
b5d655f7
A
32
33#include <mach/machine/asm.h>
34e8f829
A
34#include <architecture/arm/asm_help.h>
35
b5d655f7
A
36/*
37 * A reasonably well-optimized bzero/memset. Should work equally well on arm11 and arm9 based
38 * cores.
39 *
40 * The algorithm is to align the destination pointer on a 32 byte boundary and then
41 * blast data 64 bytes at a time, in two stores of 32 bytes per loop.
42 */
43 .text
44 .align 2
45
46 .globl _memset
47/* void *memset(void *ptr, int c, size_t len); */
48_memset:
49 /* move len into r1, unpack c into r2 */
50 mov r3, r2
51 and r1, r1, #0xff
52 orr r1, r1, r1, lsl #8
53 orr r2, r1, r1, lsl #16
54 mov r1, r3
55 b Lbzeroengine
56
57 .globl _bzero
58/* void bzero(void *ptr, size_t len); */
59_bzero:
60 /* zero out r2 so we can be just like memset(0) */
61 mov r2, #0
62
63Lbzeroengine:
64 /* move the base pointer into r12 and leave r0 alone so that we return the original pointer */
65 mov r12, r0
66
67 /* copy r2 into r3 for 64-bit stores */
68 mov r3, r2
69
70 /* check for zero len */
71 cmp r1, #0
72 bxeq lr
73
74 /* fall back to a bytewise store for less than 32 bytes */
75 cmp r1, #32
76 blt L_bytewise
77
78 /* check for 32 byte unaligned ptr */
79 tst r12, #0x1f
80 bne L_unaligned
81
82 /* make sure we have more than 64 bytes to zero */
83 cmp r1, #64
84 blt L_lessthan64aligned
85
86 /* >= 64 bytes of len, 32 byte aligned */
87L_64ormorealigned:
88
89 /* we need some registers, avoid r7 (frame pointer) and r9 (thread register) */
90 stmfd sp!, { r4-r6, r8, r10-r11 }
91 mov r4, r2
92 mov r5, r2
93 mov r6, r2
94 mov r8, r2
95 mov r10, r2
96 mov r11, r2
97
98 /* pre-subtract 64 from the len to avoid an extra compare in the loop */
99 sub r1, r1, #64
100
101L_64loop:
102 stmia r12!, { r2-r6, r8, r10-r11 }
103 subs r1, r1, #64
104 stmia r12!, { r2-r6, r8, r10-r11 }
105 bge L_64loop
106
107 /* restore the saved regs */
108 ldmfd sp!, { r4-r6, r8, r10-r11 }
109
110 /* check for completion (had previously subtracted an extra 64 from len) */
111 adds r1, r1, #64
112 bxeq lr
113
114L_lessthan64aligned:
115 /* do we have 16 or more bytes left */
116 cmp r1, #16
117 stmgeia r12!, { r2-r3 }
118 stmgeia r12!, { r2-r3 }
119 subges r1, r1, #16
120 bgt L_lessthan64aligned
121 bxeq lr
122
123L_lessthan16aligned:
124 /* store 0 to 15 bytes */
125 mov r1, r1, lsl #28 /* move the remaining len bits [3:0] to the flags area of cpsr */
126 msr cpsr_f, r1
127
128 stmmiia r12!, { r2-r3 } /* n is set, store 8 bytes */
129 streq r2, [r12], #4 /* z is set, store 4 bytes */
130 strcsh r2, [r12], #2 /* c is set, store 2 bytes */
131 strvsb r2, [r12], #1 /* v is set, store 1 byte */
132 bx lr
133
134L_bytewise:
135 /* bytewise copy, 2 bytes at a time, alignment not guaranteed */
136 subs r1, r1, #2
137 strb r2, [r12], #1
138 strplb r2, [r12], #1
139 bhi L_bytewise
140 bx lr
141
142L_unaligned:
143 /* unaligned on 32 byte boundary, store 1-15 bytes until we're 16 byte aligned */
144 mov r3, r12, lsl #28
145 rsb r3, r3, #0x00000000
146 msr cpsr_f, r3
147
148 strvsb r2, [r12], #1 /* v is set, unaligned in the 1s column */
149 strcsh r2, [r12], #2 /* c is set, unaligned in the 2s column */
150 streq r2, [r12], #4 /* z is set, unaligned in the 4s column */
151 strmi r2, [r12], #4 /* n is set, unaligned in the 8s column */
152 strmi r2, [r12], #4
153
154 subs r1, r1, r3, lsr #28
155 bxeq lr
156
157 /* we had previously trashed r3, restore it */
158 mov r3, r2
159
160 /* now make sure we're 32 byte aligned */
161 tst r12, #(1 << 4)
162 stmneia r12!, { r2-r3 }
163 stmneia r12!, { r2-r3 }
164 subnes r1, r1, #16
165
166 /* we're now aligned, check for >= 64 bytes left */
167 cmp r1, #64
168 bge L_64ormorealigned
169 b L_lessthan64aligned
170
34e8f829 171X_LEAF(___bzero, _bzero)
51282358
A
172
173#endif // defined __thumb2__ && defined __ARM_NEON__