2 * Copyright (c) 2006, 2009 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 /*****************************************************************************
25 * ARMv5 and ARMv6 implementation, also used in dyld on later archs *
26 *****************************************************************************/
29 #if !defined _ARM_ARCH_7 || defined VARIANT_DYLD
38 _bcopy: /* void bcopy(const void *src, void *dest, size_t len); */
43 _memcpy: /* void *memcpy(void *dest, const void *src, size_t len); */
44 _memmove: /* void *memmove(void *dest, const void *src, size_t len); */
45 /* check for zero len or if the pointers are the same */
50 /* save r0 (return value), r4 (scratch), and r5 (scratch) */
51 stmfd sp!, { r0, r4, r5, r7, lr }
54 /* check for overlap. r3 <- distance between src & dest */
57 cmp r3, r2 /* if distance(src, dest) < len, we have overlap */
61 /* are src and dest dissimilarly word aligned? */
64 bne Lnonwordaligned_forward
66 /* if len < 64, do a quick forward copy */
70 /* check for 16 byte src/dest unalignment */
72 bne Lsimilarlyunaligned
74 /* check for 32 byte dest unalignment */
79 /* save some more registers to use in the copy */
80 stmfd sp!, { r6, r8, r10, r11 }
82 /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
86 /* copy 64 bytes at a time */
87 ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
91 stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
92 ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
97 stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
100 /* restore the scratch registers we just saved */
101 ldmfd sp!, { r6, r8, r10, r11 }
103 /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
108 /* copy 16 bytes at a time until we have < 16 bytes */
110 ldmgeia r1!, { r3, r4, r5, r12 }
111 stmgeia r0!, { r3, r4, r5, r12 }
113 bgt Llessthan64_aligned
120 ldmmiia r1!, { r2, r3 }
125 stmmiia r0!, { r2, r3 }
132 /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
145 ldmmiia r1!, { r3, r4 }
146 stmmiia r0!, { r3, r4 }
148 subs r2, r2, r12, lsr #28
152 /* bring up to dest 32 byte alignment */
154 ldmneia r1!, { r3, r4, r5, r12 }
155 stmneia r0!, { r3, r4, r5, r12 }
158 /* we should now be aligned, see what copy method we should use */
160 bge Lmorethan64_aligned
161 b Llessthan64_aligned
164 /* copy 2 bytes at a time */
177 /* simple bytewise forward copy */
185 /* src and dest are word aligned similarly, less than 64 bytes to copy */
189 /* bytewise copy until word aligned */
199 bge Llessthan64_aligned
200 blt Llessthan16_aligned
203 /* src and dest overlap in some way, len > 0 */
204 cmp r0, r1 /* if dest > src */
205 bhi Loverlap_srclower
208 /* dest < src, see if we can still do a fast forward copy or fallback to slow forward copy */
210 bge Lnormalforwardcopy /* overlap is greater than one stride of the copy, use normal copy */
216 /* the following routines deal with having to copy in the reverse direction */
218 /* src < dest, with overlap */
220 /* src += len; dest += len; */
224 /* we have to copy in reverse no matter what, test if we can we use a large block reverse copy */
225 cmp r2, #64 /* less than 64 bytes to copy? */
226 cmpgt r3, #64 /* less than 64 bytes of nonoverlap? */
227 blt Lbytewise_reverse
229 /* test of src and dest are nonword aligned differently */
232 bne Lbytewise_reverse
234 /* test if src and dest are non word aligned or dest is non 16 byte aligned */
236 bne Lunaligned_reverse_similarly
238 /* test for dest 32 byte alignment */
240 bne Lunaligned_32_reverse_similarly
242 /* 64 byte reverse block copy, src and dest aligned */
243 Lmorethan64_aligned_reverse:
244 /* save some more registers to use in the copy */
245 stmfd sp!, { r6, r8, r10, r11 }
247 /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
251 /* copy 64 bytes at a time */
252 ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
256 stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
257 ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
262 stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
265 /* restore the scratch registers we just saved */
266 ldmfd sp!, { r6, r8, r10, r11 }
268 /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
276 bne Lbytewise_reverse
279 Lunaligned_reverse_similarly:
280 /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
284 ldrvsb r3, [r1, #-1]!
285 ldrcsh r4, [r1, #-2]!
288 strvsb r3, [r0, #-1]!
289 strcsh r4, [r0, #-2]!
292 ldmmidb r1!, { r3, r4 }
293 stmmidb r0!, { r3, r4 }
295 subs r2, r2, r12, lsr #28
298 Lunaligned_32_reverse_similarly:
299 /* bring up to dest 32 byte alignment */
301 ldmnedb r1!, { r3, r4, r5, r12 }
302 stmnedb r0!, { r3, r4, r5, r12 }
305 /* we should now be aligned, see what copy method we should use */
307 bge Lmorethan64_aligned_reverse
310 /* the following routines deal with non word aligned copies */
311 Lnonwordaligned_forward:
313 blt Lbytewise2 /* not worth the effort with less than 24 bytes total */
315 /* bytewise copy until src word aligned */
324 /* figure out how the src and dest are unaligned */
332 /* the dest pointer is 1 byte off from src */
333 mov r12, r2, lsr #2 /* number of words we should copy */
337 ldrb r4, [r0] /* load D[7:0] */
339 Lalign1_forward_loop:
340 ldr r3, [r1], #4 /* load S */
341 orr r4, r4, r3, lsl #8 /* D[31:8] = S[24:0] */
342 str r4, [r0], #4 /* save D */
343 mov r4, r3, lsr #24 /* D[7:0] = S[31:25] */
345 bne Lalign1_forward_loop
347 /* finish the copy off */
348 strb r4, [r0], #1 /* save D[7:0] */
355 /* the dest pointer is 2 bytes off from src */
356 mov r12, r2, lsr #2 /* number of words we should copy */
360 ldrh r4, [r0] /* load D[15:0] */
362 Lalign2_forward_loop:
363 ldr r3, [r1], #4 /* load S */
364 orr r4, r4, r3, lsl #16 /* D[31:16] = S[15:0] */
365 str r4, [r0], #4 /* save D */
366 mov r4, r3, lsr #16 /* D[15:0] = S[31:15] */
368 bne Lalign2_forward_loop
370 /* finish the copy off */
371 strh r4, [r0], #2 /* save D[15:0] */
378 /* the dest pointer is 3 bytes off from src */
379 mov r12, r2, lsr #2 /* number of words we should copy */
384 and r4, r4, #0x00ffffff /* load D[24:0] */
386 Lalign3_forward_loop:
387 ldr r3, [r1], #4 /* load S */
388 orr r4, r4, r3, lsl #24 /* D[31:25] = S[7:0] */
389 str r4, [r0], #4 /* save D */
390 mov r4, r3, lsr #8 /* D[24:0] = S[31:8] */
392 bne Lalign3_forward_loop
394 /* finish the copy off */
395 strh r4, [r0], #2 /* save D[15:0] */
397 strb r4, [r0], #1 /* save D[23:16] */
404 ldmfd sp!, {r0, r4, r5, r7, pc}
406 #endif // !defined _ARM_ARCH_7 || defined VARIANT_DYLD