]> git.saurik.com Git - apple/libc.git/blame - arm/string/bcopy.s
Libc-583.tar.gz
[apple/libc.git] / arm / string / bcopy.s
CommitLineData
b5d655f7
A
1/*
2 * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24#include <arm/arch.h>
25
26.text
27.align 2
28
29 .globl _memcpy
30 .globl _bcopy
31 .globl _memmove
32
33_bcopy: /* void bcopy(const void *src, void *dest, size_t len); */
34 mov r3, r0
35 mov r0, r1
36 mov r1, r3
37
38_memcpy: /* void *memcpy(void *dest, const void *src, size_t len); */
39_memmove: /* void *memmove(void *dest, const void *src, size_t len); */
40 /* check for zero len or if the pointers are the same */
41 cmp r2, #0
42 cmpne r0, r1
43 bxeq lr
44
45 /* save r0 (return value), r4 (scratch), and r5 (scratch) */
46 stmfd sp!, { r0, r4, r5, r7, lr }
47 add r7, sp, #12
48
49 /* check for overlap. r3 <- distance between src & dest */
50 subhs r3, r0, r1
51 sublo r3, r1, r0
52 cmp r3, r2 /* if distance(src, dest) < len, we have overlap */
53 blo Loverlap
54
55Lnormalforwardcopy:
56 /* are src and dest dissimilarly word aligned? */
57 mov r12, r0, lsl #30
58 cmp r12, r1, lsl #30
59 bne Lnonwordaligned_forward
60
61 /* if len < 64, do a quick forward copy */
62 cmp r2, #64
63 blt Lsmallforwardcopy
64
65 /* check for 16 byte src/dest unalignment */
66 tst r0, #0xf
67 bne Lsimilarlyunaligned
68
69 /* check for 32 byte dest unalignment */
70 tst r0, #(1<<4)
71 bne Lunaligned_32
72
73Lmorethan64_aligned:
74 /* save some more registers to use in the copy */
75 stmfd sp!, { r6, r8, r10, r11 }
76
77 /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
78 sub r2, r2, #64
79
80L64loop:
81 /* copy 64 bytes at a time */
82 ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
83#ifdef _ARM_ARCH_6
84 pld [r1, #32]
85#endif
86 stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
87 ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
88 subs r2, r2, #64
89#ifdef _ARM_ARCH_6
90 pld [r1, #32]
91#endif
92 stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
93 bge L64loop
94
95 /* restore the scratch registers we just saved */
96 ldmfd sp!, { r6, r8, r10, r11 }
97
98 /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
99 adds r2, r2, #64
100 beq Lexit
101
102Llessthan64_aligned:
103 /* copy 16 bytes at a time until we have < 16 bytes */
104 cmp r2, #16
105 ldmgeia r1!, { r3, r4, r5, r12 }
106 stmgeia r0!, { r3, r4, r5, r12 }
107 subges r2, r2, #16
108 bgt Llessthan64_aligned
109 beq Lexit
110
111Llessthan16_aligned:
112 mov r2, r2, lsl #28
113 msr cpsr_f, r2
114
115 ldmmiia r1!, { r2, r3 }
116 ldreq r4, [r1], #4
117 ldrcsh r5, [r1], #2
118 ldrvsb r12, [r1], #1
119
120 stmmiia r0!, { r2, r3 }
121 streq r4, [r0], #4
122 strcsh r5, [r0], #2
123 strvsb r12, [r0], #1
124 b Lexit
125
126Lsimilarlyunaligned:
127 /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
128 mov r12, r0, lsl #28
129 rsb r12, r12, #0
130 msr cpsr_f, r12
131
132 ldrvsb r3, [r1], #1
133 ldrcsh r4, [r1], #2
134 ldreq r5, [r1], #4
135
136 strvsb r3, [r0], #1
137 strcsh r4, [r0], #2
138 streq r5, [r0], #4
139
140 ldmmiia r1!, { r3, r4 }
141 stmmiia r0!, { r3, r4 }
142
143 subs r2, r2, r12, lsr #28
144 beq Lexit
145
146Lunaligned_32:
147 /* bring up to dest 32 byte alignment */
148 tst r0, #(1 << 4)
149 ldmneia r1!, { r3, r4, r5, r12 }
150 stmneia r0!, { r3, r4, r5, r12 }
151 subne r2, r2, #16
152
153 /* we should now be aligned, see what copy method we should use */
154 cmp r2, #64
155 bge Lmorethan64_aligned
156 b Llessthan64_aligned
157
158Lbytewise2:
159 /* copy 2 bytes at a time */
160 subs r2, r2, #2
161
162 ldrb r3, [r1], #1
163 ldrplb r4, [r1], #1
164
165 strb r3, [r0], #1
166 strplb r4, [r0], #1
167
168 bhi Lbytewise2
169 b Lexit
170
171Lbytewise:
172 /* simple bytewise forward copy */
173 ldrb r3, [r1], #1
174 subs r2, r2, #1
175 strb r3, [r0], #1
176 bne Lbytewise
177 b Lexit
178
179Lsmallforwardcopy:
180 /* src and dest are word aligned similarly, less than 64 bytes to copy */
181 cmp r2, #4
182 blt Lbytewise2
183
184 /* bytewise copy until word aligned */
185 tst r1, #3
186Lwordalignloop:
187 ldrneb r3, [r1], #1
188 strneb r3, [r0], #1
189 subne r2, r2, #1
190 tstne r1, #3
191 bne Lwordalignloop
192
193 cmp r2, #16
194 bge Llessthan64_aligned
195 blt Llessthan16_aligned
196
197Loverlap:
198 /* src and dest overlap in some way, len > 0 */
199 cmp r0, r1 /* if dest > src */
200 bhi Loverlap_srclower
201
202Loverlap_destlower:
203 /* dest < src, see if we can still do a fast forward copy or fallback to slow forward copy */
204 cmp r3, #64
205 bge Lnormalforwardcopy /* overlap is greater than one stride of the copy, use normal copy */
206
207 cmp r3, #2
208 bge Lbytewise2
209 b Lbytewise
210
211 /* the following routines deal with having to copy in the reverse direction */
212Loverlap_srclower:
213 /* src < dest, with overlap */
214
215 /* src += len; dest += len; */
216 add r0, r0, r2
217 add r1, r1, r2
218
219 /* we have to copy in reverse no matter what, test if we can we use a large block reverse copy */
220 cmp r2, #64 /* less than 64 bytes to copy? */
221 cmpgt r3, #64 /* less than 64 bytes of nonoverlap? */
222 blt Lbytewise_reverse
223
224 /* test of src and dest are nonword aligned differently */
225 mov r3, r0, lsl #30
226 cmp r3, r1, lsl #30
227 bne Lbytewise_reverse
228
229 /* test if src and dest are non word aligned or dest is non 16 byte aligned */
230 tst r0, #0xf
231 bne Lunaligned_reverse_similarly
232
233 /* test for dest 32 byte alignment */
234 tst r0, #(1<<4)
235 bne Lunaligned_32_reverse_similarly
236
237 /* 64 byte reverse block copy, src and dest aligned */
238Lmorethan64_aligned_reverse:
239 /* save some more registers to use in the copy */
240 stmfd sp!, { r6, r8, r10, r11 }
241
242 /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
243 sub r2, r2, #64
244
245L64loop_reverse:
246 /* copy 64 bytes at a time */
247 ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
248#ifdef _ARM_ARCH_6
249 pld [r1, #-32]
250#endif
251 stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
252 ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
253 subs r2, r2, #64
254#ifdef _ARM_ARCH_6
255 pld [r1, #-32]
256#endif
257 stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
258 bge L64loop_reverse
259
260 /* restore the scratch registers we just saved */
261 ldmfd sp!, { r6, r8, r10, r11 }
262
263 /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
264 adds r2, r2, #64
265 beq Lexit
266
267Lbytewise_reverse:
268 ldrb r3, [r1, #-1]!
269 strb r3, [r0, #-1]!
270 subs r2, r2, #1
271 bne Lbytewise_reverse
272 b Lexit
273
274Lunaligned_reverse_similarly:
275 /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
276 mov r12, r0, lsl #28
277 msr cpsr_f, r12
278
279 ldrvsb r3, [r1, #-1]!
280 ldrcsh r4, [r1, #-2]!
281 ldreq r5, [r1, #-4]!
282
283 strvsb r3, [r0, #-1]!
284 strcsh r4, [r0, #-2]!
285 streq r5, [r0, #-4]!
286
287 ldmmidb r1!, { r3, r4 }
288 stmmidb r0!, { r3, r4 }
289
290 subs r2, r2, r12, lsr #28
291 beq Lexit
292
293Lunaligned_32_reverse_similarly:
294 /* bring up to dest 32 byte alignment */
295 tst r0, #(1 << 4)
296 ldmnedb r1!, { r3, r4, r5, r12 }
297 stmnedb r0!, { r3, r4, r5, r12 }
298 subne r2, r2, #16
299
300 /* we should now be aligned, see what copy method we should use */
301 cmp r2, #64
302 bge Lmorethan64_aligned_reverse
303 b Lbytewise_reverse
304
305 /* the following routines deal with non word aligned copies */
306Lnonwordaligned_forward:
307 cmp r2, #8
308 blt Lbytewise2 /* not worth the effort with less than 24 bytes total */
309
310 /* bytewise copy until src word aligned */
311 tst r1, #3
312Lwordalignloop2:
313 ldrneb r3, [r1], #1
314 strneb r3, [r0], #1
315 subne r2, r2, #1
316 tstne r1, #3
317 bne Lwordalignloop2
318
319 /* figure out how the src and dest are unaligned */
320 and r3, r0, #3
321 cmp r3, #2
322 blt Lalign1_forward
323 beq Lalign2_forward
324 bgt Lalign3_forward
325
326Lalign1_forward:
327 /* the dest pointer is 1 byte off from src */
328 mov r12, r2, lsr #2 /* number of words we should copy */
329 sub r0, r0, #1
330
331 /* prime the copy */
332 ldrb r4, [r0] /* load D[7:0] */
333
334Lalign1_forward_loop:
335 ldr r3, [r1], #4 /* load S */
336 orr r4, r4, r3, lsl #8 /* D[31:8] = S[24:0] */
337 str r4, [r0], #4 /* save D */
338 mov r4, r3, lsr #24 /* D[7:0] = S[31:25] */
339 subs r12, r12, #1
340 bne Lalign1_forward_loop
341
342 /* finish the copy off */
343 strb r4, [r0], #1 /* save D[7:0] */
344
345 ands r2, r2, #3
346 beq Lexit
347 b Lbytewise2
348
349Lalign2_forward:
350 /* the dest pointer is 2 bytes off from src */
351 mov r12, r2, lsr #2 /* number of words we should copy */
352 sub r0, r0, #2
353
354 /* prime the copy */
355 ldrh r4, [r0] /* load D[15:0] */
356
357Lalign2_forward_loop:
358 ldr r3, [r1], #4 /* load S */
359 orr r4, r4, r3, lsl #16 /* D[31:16] = S[15:0] */
360 str r4, [r0], #4 /* save D */
361 mov r4, r3, lsr #16 /* D[15:0] = S[31:15] */
362 subs r12, r12, #1
363 bne Lalign2_forward_loop
364
365 /* finish the copy off */
366 strh r4, [r0], #2 /* save D[15:0] */
367
368 ands r2, r2, #3
369 beq Lexit
370 b Lbytewise2
371
372Lalign3_forward:
373 /* the dest pointer is 3 bytes off from src */
374 mov r12, r2, lsr #2 /* number of words we should copy */
375 sub r0, r0, #3
376
377 /* prime the copy */
378 ldr r4, [r0]
379 and r4, r4, #0x00ffffff /* load D[24:0] */
380
381Lalign3_forward_loop:
382 ldr r3, [r1], #4 /* load S */
383 orr r4, r4, r3, lsl #24 /* D[31:25] = S[7:0] */
384 str r4, [r0], #4 /* save D */
385 mov r4, r3, lsr #8 /* D[24:0] = S[31:8] */
386 subs r12, r12, #1
387 bne Lalign3_forward_loop
388
389 /* finish the copy off */
390 strh r4, [r0], #2 /* save D[15:0] */
391 mov r4, r4, lsr #16
392 strb r4, [r0], #1 /* save D[23:16] */
393
394 ands r2, r2, #3
395 beq Lexit
396 b Lbytewise2
397
398Lexit:
399 ldmfd sp!, {r0, r4, r5, r7, pc}
400
401