2 * Copyright (c) 2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <arm/proc_reg.h>
40 _bcopy: /* void bcopy(const void *src, void *dest, size_t len); */
46 _memcpy: /* void *memcpy(void *dest, const void *src, size_t len); */
47 _memmove: /* void *memmove(void *dest, const void *src, size_t len); */
48 /* check for zero len or if the pointers are the same */
53 /* save r0 (return value), r4 (scratch), and r5 (scratch) */
54 stmfd sp!, { r0, r4, r5, r7, lr }
57 /* check for overlap. r3 <- distance between src & dest */
60 cmp r3, r2 /* if distance(src, dest) < len, we have overlap */
64 /* are src and dest dissimilarly word aligned? */
67 bne Lnonwordaligned_forward
69 /* if len < 64, do a quick forward copy */
73 /* check for 16 byte src/dest unalignment */
75 bne Lsimilarlyunaligned
77 /* check for 32 byte dest unalignment */
82 /* save some more registers to use in the copy */
83 stmfd sp!, { r6, r8, r10, r11 }
85 /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
89 /* copy 64 bytes at a time */
90 ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
92 stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
93 ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
96 stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
99 /* restore the scratch registers we just saved */
100 ldmfd sp!, { r6, r8, r10, r11 }
102 /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
107 /* copy 16 bytes at a time until we have < 16 bytes */
109 ldmiage r1!, { r3, r4, r5, r12 }
110 stmiage r0!, { r3, r4, r5, r12 }
112 bgt Llessthan64_aligned
119 ldmiami r1!, { r2, r3 }
124 stmiami r0!, { r2, r3 }
131 /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
144 ldmiami r1!, { r3, r4 }
145 stmiami r0!, { r3, r4 }
147 subs r2, r2, r12, lsr #28
151 /* bring up to dest 32 byte alignment */
153 ldmiane r1!, { r3, r4, r5, r12 }
154 stmiane r0!, { r3, r4, r5, r12 }
157 /* we should now be aligned, see what copy method we should use */
159 bge Lmorethan64_aligned
160 b Llessthan64_aligned
163 /* copy 2 bytes at a time */
176 /* simple bytewise forward copy */
184 /* src and dest are word aligned similarly, less than 64 bytes to copy */
188 /* bytewise copy until word aligned */
198 bge Llessthan64_aligned
199 blt Llessthan16_aligned
202 /* src and dest overlap in some way, len > 0 */
203 cmp r0, r1 /* if dest > src */
204 bhi Loverlap_srclower
207 /* dest < src, see if we can still do a fast forward copy or fallback to slow forward copy */
209 bge Lnormalforwardcopy /* overlap is greater than one stride of the copy, use normal copy */
215 /* the following routines deal with having to copy in the reverse direction */
217 /* src < dest, with overlap */
219 /* src += len; dest += len; */
223 /* we have to copy in reverse no matter what, test if we can we use a large block reverse copy */
224 cmp r2, #64 /* less than 64 bytes to copy? */
225 cmpgt r3, #64 /* less than 64 bytes of nonoverlap? */
226 blt Lbytewise_reverse
228 /* test of src and dest are nonword aligned differently */
231 bne Lbytewise_reverse
233 /* test if src and dest are non word aligned or dest is non 16 byte aligned */
235 bne Lunaligned_reverse_similarly
237 /* test for dest 32 byte alignment */
239 bne Lunaligned_32_reverse_similarly
241 /* 64 byte reverse block copy, src and dest aligned */
242 Lmorethan64_aligned_reverse:
243 /* save some more registers to use in the copy */
244 stmfd sp!, { r6, r8, r10, r11 }
246 /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
250 /* copy 64 bytes at a time */
251 ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
252 #if ARCH_ARMv5 || ARCH_ARMv5e || ARCH_ARMv6
255 stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
256 ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
259 stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
262 /* restore the scratch registers we just saved */
263 ldmfd sp!, { r6, r8, r10, r11 }
265 /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
273 bne Lbytewise_reverse
276 Lunaligned_reverse_similarly:
277 /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
281 ldrbvs r3, [r1, #-1]!
282 ldrhcs r4, [r1, #-2]!
285 strbvs r3, [r0, #-1]!
286 strhcs r4, [r0, #-2]!
289 ldmdbmi r1!, { r3, r4 }
290 stmdbmi r0!, { r3, r4 }
292 subs r2, r2, r12, lsr #28
295 Lunaligned_32_reverse_similarly:
296 /* bring up to dest 32 byte alignment */
298 ldmdbne r1!, { r3, r4, r5, r12 }
299 stmdbne r0!, { r3, r4, r5, r12 }
302 /* we should now be aligned, see what copy method we should use */
304 bge Lmorethan64_aligned_reverse
307 /* the following routines deal with non word aligned copies */
308 Lnonwordaligned_forward:
310 blt Lbytewise2 /* not worth the effort with less than 24 bytes total */
312 /* bytewise copy until src word aligned */
321 /* figure out how the src and dest are unaligned */
329 /* the dest pointer is 1 byte off from src */
330 mov r12, r2, lsr #2 /* number of words we should copy */
334 ldrb r4, [r0] /* load D[7:0] */
336 Lalign1_forward_loop:
337 ldr r3, [r1], #4 /* load S */
338 orr r4, r4, r3, lsl #8 /* D[31:8] = S[24:0] */
339 str r4, [r0], #4 /* save D */
340 mov r4, r3, lsr #24 /* D[7:0] = S[31:25] */
342 bne Lalign1_forward_loop
344 /* finish the copy off */
345 strb r4, [r0], #1 /* save D[7:0] */
352 /* the dest pointer is 2 bytes off from src */
353 mov r12, r2, lsr #2 /* number of words we should copy */
357 ldrh r4, [r0] /* load D[15:0] */
359 Lalign2_forward_loop:
360 ldr r3, [r1], #4 /* load S */
361 orr r4, r4, r3, lsl #16 /* D[31:16] = S[15:0] */
362 str r4, [r0], #4 /* save D */
363 mov r4, r3, lsr #16 /* D[15:0] = S[31:15] */
365 bne Lalign2_forward_loop
367 /* finish the copy off */
368 strh r4, [r0], #2 /* save D[15:0] */
375 /* the dest pointer is 3 bytes off from src */
376 mov r12, r2, lsr #2 /* number of words we should copy */
381 and r4, r4, #0x00ffffff /* load D[24:0] */
383 Lalign3_forward_loop:
384 ldr r3, [r1], #4 /* load S */
385 orr r4, r4, r3, lsl #24 /* D[31:25] = S[7:0] */
386 str r4, [r0], #4 /* save D */
387 mov r4, r3, lsr #8 /* D[24:0] = S[31:8] */
389 bne Lalign3_forward_loop
391 /* finish the copy off */
392 strh r4, [r0], #2 /* save D[15:0] */
394 strb r4, [r0], #1 /* save D[23:16] */
401 ldmfd sp!, { r0, r4, r5, r7, pc }