2 * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
33 _bcopy: /* void bcopy(const void *src, void *dest, size_t len); */
38 _memcpy: /* void *memcpy(void *dest, const void *src, size_t len); */
39 _memmove: /* void *memmove(void *dest, const void *src, size_t len); */
40 /* check for zero len or if the pointers are the same */
45 /* save r0 (return value), r4 (scratch), and r5 (scratch) */
46 stmfd sp!, { r0, r4, r5, r7, lr }
49 /* check for overlap. r3 <- distance between src & dest */
52 cmp r3, r2 /* if distance(src, dest) < len, we have overlap */
56 /* are src and dest dissimilarly word aligned? */
59 bne Lnonwordaligned_forward
61 /* if len < 64, do a quick forward copy */
65 /* check for 16 byte src/dest unalignment */
67 bne Lsimilarlyunaligned
69 /* check for 32 byte dest unalignment */
74 /* save some more registers to use in the copy */
75 stmfd sp!, { r6, r8, r10, r11 }
77 /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
81 /* copy 64 bytes at a time */
82 ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
86 stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
87 ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
92 stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
95 /* restore the scratch registers we just saved */
96 ldmfd sp!, { r6, r8, r10, r11 }
98 /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
103 /* copy 16 bytes at a time until we have < 16 bytes */
105 ldmgeia r1!, { r3, r4, r5, r12 }
106 stmgeia r0!, { r3, r4, r5, r12 }
108 bgt Llessthan64_aligned
115 ldmmiia r1!, { r2, r3 }
120 stmmiia r0!, { r2, r3 }
127 /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
140 ldmmiia r1!, { r3, r4 }
141 stmmiia r0!, { r3, r4 }
143 subs r2, r2, r12, lsr #28
147 /* bring up to dest 32 byte alignment */
149 ldmneia r1!, { r3, r4, r5, r12 }
150 stmneia r0!, { r3, r4, r5, r12 }
153 /* we should now be aligned, see what copy method we should use */
155 bge Lmorethan64_aligned
156 b Llessthan64_aligned
159 /* copy 2 bytes at a time */
172 /* simple bytewise forward copy */
180 /* src and dest are word aligned similarly, less than 64 bytes to copy */
184 /* bytewise copy until word aligned */
194 bge Llessthan64_aligned
195 blt Llessthan16_aligned
198 /* src and dest overlap in some way, len > 0 */
199 cmp r0, r1 /* if dest > src */
200 bhi Loverlap_srclower
203 /* dest < src, see if we can still do a fast forward copy or fallback to slow forward copy */
205 bge Lnormalforwardcopy /* overlap is greater than one stride of the copy, use normal copy */
211 /* the following routines deal with having to copy in the reverse direction */
213 /* src < dest, with overlap */
215 /* src += len; dest += len; */
219 /* we have to copy in reverse no matter what, test if we can we use a large block reverse copy */
220 cmp r2, #64 /* less than 64 bytes to copy? */
221 cmpgt r3, #64 /* less than 64 bytes of nonoverlap? */
222 blt Lbytewise_reverse
224 /* test of src and dest are nonword aligned differently */
227 bne Lbytewise_reverse
229 /* test if src and dest are non word aligned or dest is non 16 byte aligned */
231 bne Lunaligned_reverse_similarly
233 /* test for dest 32 byte alignment */
235 bne Lunaligned_32_reverse_similarly
237 /* 64 byte reverse block copy, src and dest aligned */
238 Lmorethan64_aligned_reverse:
239 /* save some more registers to use in the copy */
240 stmfd sp!, { r6, r8, r10, r11 }
242 /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
246 /* copy 64 bytes at a time */
247 ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
251 stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
252 ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
257 stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
260 /* restore the scratch registers we just saved */
261 ldmfd sp!, { r6, r8, r10, r11 }
263 /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
271 bne Lbytewise_reverse
274 Lunaligned_reverse_similarly:
275 /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
279 ldrvsb r3, [r1, #-1]!
280 ldrcsh r4, [r1, #-2]!
283 strvsb r3, [r0, #-1]!
284 strcsh r4, [r0, #-2]!
287 ldmmidb r1!, { r3, r4 }
288 stmmidb r0!, { r3, r4 }
290 subs r2, r2, r12, lsr #28
293 Lunaligned_32_reverse_similarly:
294 /* bring up to dest 32 byte alignment */
296 ldmnedb r1!, { r3, r4, r5, r12 }
297 stmnedb r0!, { r3, r4, r5, r12 }
300 /* we should now be aligned, see what copy method we should use */
302 bge Lmorethan64_aligned_reverse
305 /* the following routines deal with non word aligned copies */
306 Lnonwordaligned_forward:
308 blt Lbytewise2 /* not worth the effort with less than 24 bytes total */
310 /* bytewise copy until src word aligned */
319 /* figure out how the src and dest are unaligned */
327 /* the dest pointer is 1 byte off from src */
328 mov r12, r2, lsr #2 /* number of words we should copy */
332 ldrb r4, [r0] /* load D[7:0] */
334 Lalign1_forward_loop:
335 ldr r3, [r1], #4 /* load S */
336 orr r4, r4, r3, lsl #8 /* D[31:8] = S[24:0] */
337 str r4, [r0], #4 /* save D */
338 mov r4, r3, lsr #24 /* D[7:0] = S[31:25] */
340 bne Lalign1_forward_loop
342 /* finish the copy off */
343 strb r4, [r0], #1 /* save D[7:0] */
350 /* the dest pointer is 2 bytes off from src */
351 mov r12, r2, lsr #2 /* number of words we should copy */
355 ldrh r4, [r0] /* load D[15:0] */
357 Lalign2_forward_loop:
358 ldr r3, [r1], #4 /* load S */
359 orr r4, r4, r3, lsl #16 /* D[31:16] = S[15:0] */
360 str r4, [r0], #4 /* save D */
361 mov r4, r3, lsr #16 /* D[15:0] = S[31:15] */
363 bne Lalign2_forward_loop
365 /* finish the copy off */
366 strh r4, [r0], #2 /* save D[15:0] */
373 /* the dest pointer is 3 bytes off from src */
374 mov r12, r2, lsr #2 /* number of words we should copy */
379 and r4, r4, #0x00ffffff /* load D[24:0] */
381 Lalign3_forward_loop:
382 ldr r3, [r1], #4 /* load S */
383 orr r4, r4, r3, lsl #24 /* D[31:25] = S[7:0] */
384 str r4, [r0], #4 /* save D */
385 mov r4, r3, lsr #8 /* D[24:0] = S[31:8] */
387 bne Lalign3_forward_loop
389 /* finish the copy off */
390 strh r4, [r0], #2 /* save D[15:0] */
392 strb r4, [r0], #1 /* save D[23:16] */
399 ldmfd sp!, {r0, r4, r5, r7, pc}