X-Git-Url: https://git.saurik.com/apple/libc.git/blobdiff_plain/1f2f436a38f7ae2d39a943ad2898d8fed4ed2e58..a28bf75d63c6a64e4c3b417c6052e45f42c6cedd:/arm/string/bcopy_Generic.s diff --git a/arm/string/bcopy_Generic.s b/arm/string/bcopy_Generic.s index e69de29..4d5fc22 100644 --- a/arm/string/bcopy_Generic.s +++ b/arm/string/bcopy_Generic.s @@ -0,0 +1,407 @@ +/* + * Copyright (c) 2006, 2009 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/***************************************************************************** + * ARMv5 and ARMv6 implementation, also used in dyld on later archs * + *****************************************************************************/ + +#include +#if !defined _ARM_ARCH_7 || defined VARIANT_DYLD + +.text +.align 2 + + .globl _memcpy + .globl _bcopy + .globl _memmove + +_bcopy: /* void bcopy(const void *src, void *dest, size_t len); */ + mov r3, r0 + mov r0, r1 + mov r1, r3 + +_memcpy: /* void *memcpy(void *dest, const void *src, size_t len); */ +_memmove: /* void *memmove(void *dest, const void *src, size_t len); */ + /* check for zero len or if the pointers are the same */ + cmp r2, #0 + cmpne r0, r1 + bxeq lr + + /* save r0 (return value), r4 (scratch), and r5 (scratch) */ + stmfd sp!, { r0, r4, r5, r7, lr } + add r7, sp, #12 + + /* check for overlap. r3 <- distance between src & dest */ + subhs r3, r0, r1 + sublo r3, r1, r0 + cmp r3, r2 /* if distance(src, dest) < len, we have overlap */ + blo Loverlap + +Lnormalforwardcopy: + /* are src and dest dissimilarly word aligned? */ + mov r12, r0, lsl #30 + cmp r12, r1, lsl #30 + bne Lnonwordaligned_forward + + /* if len < 64, do a quick forward copy */ + cmp r2, #64 + blt Lsmallforwardcopy + + /* check for 16 byte src/dest unalignment */ + tst r0, #0xf + bne Lsimilarlyunaligned + + /* check for 32 byte dest unalignment */ + tst r0, #(1<<4) + bne Lunaligned_32 + +Lmorethan64_aligned: + /* save some more registers to use in the copy */ + stmfd sp!, { r6, r8, r10, r11 } + + /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */ + sub r2, r2, #64 + +L64loop: + /* copy 64 bytes at a time */ + ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } +#ifdef _ARM_ARCH_6 + pld [r1, #32] +#endif + stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } + ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } + subs r2, r2, #64 +#ifdef _ARM_ARCH_6 + pld [r1, #32] +#endif + stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } + bge L64loop + + /* restore the scratch registers we just saved */ + ldmfd sp!, { r6, r8, r10, r11 } + + /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */ + adds r2, r2, #64 + beq Lexit + +Llessthan64_aligned: + /* copy 16 bytes at a time until we have < 16 bytes */ + cmp r2, #16 + ldmgeia r1!, { r3, r4, r5, r12 } + stmgeia r0!, { r3, r4, r5, r12 } + subges r2, r2, #16 + bgt Llessthan64_aligned + beq Lexit + +Llessthan16_aligned: + mov r2, r2, lsl #28 + msr cpsr_f, r2 + + ldmmiia r1!, { r2, r3 } + ldreq r4, [r1], #4 + ldrcsh r5, [r1], #2 + ldrvsb r12, [r1], #1 + + stmmiia r0!, { r2, r3 } + streq r4, [r0], #4 + strcsh r5, [r0], #2 + strvsb r12, [r0], #1 + b Lexit + +Lsimilarlyunaligned: + /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */ + mov r12, r0, lsl #28 + rsb r12, r12, #0 + msr cpsr_f, r12 + + ldrvsb r3, [r1], #1 + ldrcsh r4, [r1], #2 + ldreq r5, [r1], #4 + + strvsb r3, [r0], #1 + strcsh r4, [r0], #2 + streq r5, [r0], #4 + + ldmmiia r1!, { r3, r4 } + stmmiia r0!, { r3, r4 } + + subs r2, r2, r12, lsr #28 + beq Lexit + +Lunaligned_32: + /* bring up to dest 32 byte alignment */ + tst r0, #(1 << 4) + ldmneia r1!, { r3, r4, r5, r12 } + stmneia r0!, { r3, r4, r5, r12 } + subne r2, r2, #16 + + /* we should now be aligned, see what copy method we should use */ + cmp r2, #64 + bge Lmorethan64_aligned + b Llessthan64_aligned + +Lbytewise2: + /* copy 2 bytes at a time */ + subs r2, r2, #2 + + ldrb r3, [r1], #1 + ldrplb r4, [r1], #1 + + strb r3, [r0], #1 + strplb r4, [r0], #1 + + bhi Lbytewise2 + b Lexit + +Lbytewise: + /* simple bytewise forward copy */ + ldrb r3, [r1], #1 + subs r2, r2, #1 + strb r3, [r0], #1 + bne Lbytewise + b Lexit + +Lsmallforwardcopy: + /* src and dest are word aligned similarly, less than 64 bytes to copy */ + cmp r2, #4 + blt Lbytewise2 + + /* bytewise copy until word aligned */ + tst r1, #3 +Lwordalignloop: + ldrneb r3, [r1], #1 + strneb r3, [r0], #1 + subne r2, r2, #1 + tstne r1, #3 + bne Lwordalignloop + + cmp r2, #16 + bge Llessthan64_aligned + blt Llessthan16_aligned + +Loverlap: + /* src and dest overlap in some way, len > 0 */ + cmp r0, r1 /* if dest > src */ + bhi Loverlap_srclower + +Loverlap_destlower: + /* dest < src, see if we can still do a fast forward copy or fallback to slow forward copy */ + cmp r3, #64 + bge Lnormalforwardcopy /* overlap is greater than one stride of the copy, use normal copy */ + + cmp r3, #2 + bge Lbytewise2 + b Lbytewise + + /* the following routines deal with having to copy in the reverse direction */ +Loverlap_srclower: + /* src < dest, with overlap */ + + /* src += len; dest += len; */ + add r0, r0, r2 + add r1, r1, r2 + + /* we have to copy in reverse no matter what, test if we can we use a large block reverse copy */ + cmp r2, #64 /* less than 64 bytes to copy? */ + cmpgt r3, #64 /* less than 64 bytes of nonoverlap? */ + blt Lbytewise_reverse + + /* test of src and dest are nonword aligned differently */ + mov r3, r0, lsl #30 + cmp r3, r1, lsl #30 + bne Lbytewise_reverse + + /* test if src and dest are non word aligned or dest is non 16 byte aligned */ + tst r0, #0xf + bne Lunaligned_reverse_similarly + + /* test for dest 32 byte alignment */ + tst r0, #(1<<4) + bne Lunaligned_32_reverse_similarly + + /* 64 byte reverse block copy, src and dest aligned */ +Lmorethan64_aligned_reverse: + /* save some more registers to use in the copy */ + stmfd sp!, { r6, r8, r10, r11 } + + /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */ + sub r2, r2, #64 + +L64loop_reverse: + /* copy 64 bytes at a time */ + ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } +#ifdef _ARM_ARCH_6 + pld [r1, #-32] +#endif + stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } + ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } + subs r2, r2, #64 +#ifdef _ARM_ARCH_6 + pld [r1, #-32] +#endif + stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } + bge L64loop_reverse + + /* restore the scratch registers we just saved */ + ldmfd sp!, { r6, r8, r10, r11 } + + /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */ + adds r2, r2, #64 + beq Lexit + +Lbytewise_reverse: + ldrb r3, [r1, #-1]! + strb r3, [r0, #-1]! + subs r2, r2, #1 + bne Lbytewise_reverse + b Lexit + +Lunaligned_reverse_similarly: + /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */ + mov r12, r0, lsl #28 + msr cpsr_f, r12 + + ldrvsb r3, [r1, #-1]! + ldrcsh r4, [r1, #-2]! + ldreq r5, [r1, #-4]! + + strvsb r3, [r0, #-1]! + strcsh r4, [r0, #-2]! + streq r5, [r0, #-4]! + + ldmmidb r1!, { r3, r4 } + stmmidb r0!, { r3, r4 } + + subs r2, r2, r12, lsr #28 + beq Lexit + +Lunaligned_32_reverse_similarly: + /* bring up to dest 32 byte alignment */ + tst r0, #(1 << 4) + ldmnedb r1!, { r3, r4, r5, r12 } + stmnedb r0!, { r3, r4, r5, r12 } + subne r2, r2, #16 + + /* we should now be aligned, see what copy method we should use */ + cmp r2, #64 + bge Lmorethan64_aligned_reverse + b Lbytewise_reverse + + /* the following routines deal with non word aligned copies */ +Lnonwordaligned_forward: + cmp r2, #8 + blt Lbytewise2 /* not worth the effort with less than 24 bytes total */ + + /* bytewise copy until src word aligned */ + tst r1, #3 +Lwordalignloop2: + ldrneb r3, [r1], #1 + strneb r3, [r0], #1 + subne r2, r2, #1 + tstne r1, #3 + bne Lwordalignloop2 + + /* figure out how the src and dest are unaligned */ + and r3, r0, #3 + cmp r3, #2 + blt Lalign1_forward + beq Lalign2_forward + bgt Lalign3_forward + +Lalign1_forward: + /* the dest pointer is 1 byte off from src */ + mov r12, r2, lsr #2 /* number of words we should copy */ + sub r0, r0, #1 + + /* prime the copy */ + ldrb r4, [r0] /* load D[7:0] */ + +Lalign1_forward_loop: + ldr r3, [r1], #4 /* load S */ + orr r4, r4, r3, lsl #8 /* D[31:8] = S[24:0] */ + str r4, [r0], #4 /* save D */ + mov r4, r3, lsr #24 /* D[7:0] = S[31:25] */ + subs r12, r12, #1 + bne Lalign1_forward_loop + + /* finish the copy off */ + strb r4, [r0], #1 /* save D[7:0] */ + + ands r2, r2, #3 + beq Lexit + b Lbytewise2 + +Lalign2_forward: + /* the dest pointer is 2 bytes off from src */ + mov r12, r2, lsr #2 /* number of words we should copy */ + sub r0, r0, #2 + + /* prime the copy */ + ldrh r4, [r0] /* load D[15:0] */ + +Lalign2_forward_loop: + ldr r3, [r1], #4 /* load S */ + orr r4, r4, r3, lsl #16 /* D[31:16] = S[15:0] */ + str r4, [r0], #4 /* save D */ + mov r4, r3, lsr #16 /* D[15:0] = S[31:15] */ + subs r12, r12, #1 + bne Lalign2_forward_loop + + /* finish the copy off */ + strh r4, [r0], #2 /* save D[15:0] */ + + ands r2, r2, #3 + beq Lexit + b Lbytewise2 + +Lalign3_forward: + /* the dest pointer is 3 bytes off from src */ + mov r12, r2, lsr #2 /* number of words we should copy */ + sub r0, r0, #3 + + /* prime the copy */ + ldr r4, [r0] + and r4, r4, #0x00ffffff /* load D[24:0] */ + +Lalign3_forward_loop: + ldr r3, [r1], #4 /* load S */ + orr r4, r4, r3, lsl #24 /* D[31:25] = S[7:0] */ + str r4, [r0], #4 /* save D */ + mov r4, r3, lsr #8 /* D[24:0] = S[31:8] */ + subs r12, r12, #1 + bne Lalign3_forward_loop + + /* finish the copy off */ + strh r4, [r0], #2 /* save D[15:0] */ + mov r4, r4, lsr #16 + strb r4, [r0], #1 /* save D[23:16] */ + + ands r2, r2, #3 + beq Lexit + b Lbytewise2 + +Lexit: + ldmfd sp!, {r0, r4, r5, r7, pc} + +#endif // !defined _ARM_ARCH_7 || defined VARIANT_DYLD +