Libc-763.11.tar.gz

[apple/libc.git] / arm / string / bcopy.s
diff --git a/arm/string/bcopy.s b/arm/string/bcopy.s

deleted file mode 100644 (file)

index 2e67e1c..0000000
--- a/arm/string/bcopy.s
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * Copyright (c) 2006, 2009 Apple Inc. All rights reserved.
- *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_LICENSE_HEADER_END@
- */
-
-#if defined __thumb2__ && defined __ARM_NEON__
-    
-// Use our tuned NEON implementation when it is available.  Otherwise fall back
-// on more generic ARM code.
-
-#include "NEON/bcopy.s"
-    
-#else // defined __thumb2__ && defined __ARM_NEON__
-
-/*****************************************************************************
- * ARMv5 and ARMv6 implementation                                            *
- *****************************************************************************/
- 
-#include <arm/arch.h>
-
-.text
-.align 2
-       
-       .globl _memcpy
-       .globl _bcopy
-       .globl _memmove
-
-_bcopy:                /* void bcopy(const void *src, void *dest, size_t len); */
-       mov             r3, r0
-       mov             r0, r1
-       mov             r1, r3
-
-_memcpy:               /* void *memcpy(void *dest, const void *src, size_t len); */
-_memmove:      /* void *memmove(void *dest, const void *src, size_t len); */
-       /* check for zero len or if the pointers are the same */
-       cmp             r2, #0
-       cmpne   r0, r1
-       bxeq    lr
-
-       /* save r0 (return value), r4 (scratch), and r5 (scratch) */
-       stmfd   sp!, { r0, r4, r5, r7, lr }
-       add     r7, sp, #12
-       
-       /* check for overlap. r3 <- distance between src & dest */
-       subhs   r3, r0, r1
-       sublo   r3, r1, r0
-       cmp             r3, r2                  /* if distance(src, dest) < len, we have overlap */
-       blo             Loverlap
-
-Lnormalforwardcopy:
-       /* are src and dest dissimilarly word aligned? */
-       mov             r12, r0, lsl #30
-       cmp             r12, r1, lsl #30
-       bne             Lnonwordaligned_forward
-
-       /* if len < 64, do a quick forward copy */
-       cmp             r2, #64
-       blt             Lsmallforwardcopy
-
-       /* check for 16 byte src/dest unalignment */
-       tst             r0, #0xf
-       bne             Lsimilarlyunaligned
-
-       /* check for 32 byte dest unalignment */
-       tst             r0, #(1<<4)
-       bne             Lunaligned_32
-
-Lmorethan64_aligned:
-       /* save some more registers to use in the copy */
-       stmfd   sp!, { r6, r8, r10, r11 }
-
-       /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
-       sub             r2, r2, #64
-
-L64loop:
-       /* copy 64 bytes at a time */
-       ldmia   r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
-#ifdef _ARM_ARCH_6
-       pld             [r1, #32]
-#endif
-       stmia   r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
-       ldmia   r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
-       subs    r2, r2, #64
-#ifdef _ARM_ARCH_6
-       pld             [r1, #32]
-#endif
-       stmia   r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
-       bge             L64loop
-
-       /* restore the scratch registers we just saved */
-       ldmfd   sp!, { r6, r8, r10, r11 }
-
-       /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
-       adds    r2, r2, #64
-       beq             Lexit
-
-Llessthan64_aligned:
-       /* copy 16 bytes at a time until we have < 16 bytes */
-       cmp             r2, #16
-       ldmgeia r1!, { r3, r4, r5, r12 }
-       stmgeia r0!, { r3, r4, r5, r12 }
-       subges  r2, r2, #16
-       bgt             Llessthan64_aligned
-       beq             Lexit
-       
-Llessthan16_aligned:
-       mov             r2, r2, lsl #28
-       msr             cpsr_f, r2
-
-       ldmmiia r1!, { r2, r3 }
-       ldreq   r4, [r1], #4
-       ldrcsh  r5, [r1], #2
-       ldrvsb  r12, [r1], #1
-
-       stmmiia r0!, { r2, r3 }
-       streq   r4, [r0], #4
-       strcsh  r5, [r0], #2
-       strvsb  r12, [r0], #1
-       b               Lexit
-
-Lsimilarlyunaligned:
-       /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
-       mov             r12, r0, lsl #28
-       rsb             r12, r12, #0
-       msr             cpsr_f, r12
-
-       ldrvsb  r3, [r1], #1
-       ldrcsh  r4, [r1], #2
-       ldreq   r5, [r1], #4
-
-       strvsb  r3, [r0], #1
-       strcsh  r4, [r0], #2
-       streq   r5, [r0], #4
-
-       ldmmiia r1!, { r3, r4 }
-       stmmiia r0!, { r3, r4 }
-
-       subs    r2, r2, r12, lsr #28
-       beq             Lexit
-
-Lunaligned_32:
-       /* bring up to dest 32 byte alignment */
-       tst             r0, #(1 << 4)
-       ldmneia r1!, { r3, r4, r5, r12 }
-       stmneia r0!, { r3, r4, r5, r12 }
-       subne   r2, r2, #16
-
-       /* we should now be aligned, see what copy method we should use */
-       cmp             r2, #64
-       bge             Lmorethan64_aligned
-       b               Llessthan64_aligned
-       
-Lbytewise2:
-       /* copy 2 bytes at a time */
-       subs    r2, r2, #2
-
-       ldrb    r3, [r1], #1
-       ldrplb  r4, [r1], #1
-
-       strb    r3, [r0], #1
-       strplb  r4, [r0], #1
-
-       bhi             Lbytewise2
-       b               Lexit
-
-Lbytewise:
-       /* simple bytewise forward copy */
-       ldrb    r3, [r1], #1
-       subs    r2, r2, #1
-       strb    r3, [r0], #1
-       bne             Lbytewise
-       b               Lexit
-
-Lsmallforwardcopy:
-       /* src and dest are word aligned similarly, less than 64 bytes to copy */
-       cmp             r2, #4
-       blt             Lbytewise2
-
-       /* bytewise copy until word aligned */
-       tst             r1, #3
-Lwordalignloop:
-       ldrneb  r3, [r1], #1
-       strneb  r3, [r0], #1
-       subne   r2, r2, #1
-       tstne   r1, #3
-       bne             Lwordalignloop
-
-       cmp             r2, #16
-       bge             Llessthan64_aligned
-       blt             Llessthan16_aligned
-
-Loverlap:
-       /* src and dest overlap in some way, len > 0 */
-       cmp             r0, r1                          /* if dest > src */
-       bhi             Loverlap_srclower
-
-Loverlap_destlower:
-       /* dest < src, see if we can still do a fast forward copy or fallback to slow forward copy */
-       cmp             r3, #64
-       bge             Lnormalforwardcopy      /* overlap is greater than one stride of the copy, use normal copy */
-
-       cmp             r3, #2
-       bge             Lbytewise2
-       b               Lbytewise
-
-       /* the following routines deal with having to copy in the reverse direction */
-Loverlap_srclower:
-       /* src < dest, with overlap */
-
-       /* src += len; dest += len; */
-       add             r0, r0, r2
-       add             r1, r1, r2
-
-       /* we have to copy in reverse no matter what, test if we can we use a large block reverse copy */
-       cmp             r2, #64                         /* less than 64 bytes to copy? */
-       cmpgt   r3, #64                         /* less than 64 bytes of nonoverlap? */
-       blt             Lbytewise_reverse
-
-       /* test of src and dest are nonword aligned differently */
-       mov             r3, r0, lsl #30
-       cmp             r3, r1, lsl #30
-       bne             Lbytewise_reverse
-
-       /* test if src and dest are non word aligned or dest is non 16 byte aligned */
-       tst             r0, #0xf
-       bne             Lunaligned_reverse_similarly
-
-       /* test for dest 32 byte alignment */
-       tst             r0, #(1<<4)
-       bne             Lunaligned_32_reverse_similarly
-
-       /* 64 byte reverse block copy, src and dest aligned */
-Lmorethan64_aligned_reverse:
-       /* save some more registers to use in the copy */
-       stmfd   sp!, { r6, r8, r10, r11 }
-
-       /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
-       sub             r2, r2, #64
-
-L64loop_reverse:
-       /* copy 64 bytes at a time */
-       ldmdb   r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
-#ifdef _ARM_ARCH_6
-       pld             [r1, #-32]
-#endif
-       stmdb   r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }      
-       ldmdb   r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }      
-       subs    r2, r2, #64
-#ifdef _ARM_ARCH_6
-       pld             [r1, #-32]
-#endif
-       stmdb   r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }      
-       bge             L64loop_reverse
-
-       /* restore the scratch registers we just saved */
-       ldmfd   sp!, { r6, r8, r10, r11 }
-
-       /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
-       adds    r2, r2, #64
-       beq             Lexit
-
-Lbytewise_reverse:
-       ldrb    r3, [r1, #-1]!
-       strb    r3, [r0, #-1]!
-       subs    r2, r2, #1
-       bne             Lbytewise_reverse
-       b               Lexit
-
-Lunaligned_reverse_similarly:
-       /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
-       mov             r12, r0, lsl #28
-       msr             cpsr_f, r12
-
-       ldrvsb  r3, [r1, #-1]!
-       ldrcsh  r4, [r1, #-2]!
-       ldreq   r5, [r1, #-4]!
-
-       strvsb  r3, [r0, #-1]!
-       strcsh  r4, [r0, #-2]!
-       streq   r5, [r0, #-4]!
-
-       ldmmidb r1!, { r3, r4 }
-       stmmidb r0!, { r3, r4 }
-
-       subs    r2, r2, r12, lsr #28
-       beq             Lexit
-
-Lunaligned_32_reverse_similarly:
-       /* bring up to dest 32 byte alignment */
-       tst             r0, #(1 << 4)
-       ldmnedb r1!, { r3, r4, r5, r12 }
-       stmnedb r0!, { r3, r4, r5, r12 }
-       subne   r2, r2, #16
-
-       /* we should now be aligned, see what copy method we should use */
-       cmp             r2, #64
-       bge             Lmorethan64_aligned_reverse
-       b               Lbytewise_reverse
-
-       /* the following routines deal with non word aligned copies */
-Lnonwordaligned_forward:
-       cmp             r2, #8
-       blt             Lbytewise2                      /* not worth the effort with less than 24 bytes total */
-
-       /* bytewise copy until src word aligned */
-       tst             r1, #3
-Lwordalignloop2:
-       ldrneb  r3, [r1], #1
-       strneb  r3, [r0], #1
-       subne   r2, r2, #1
-       tstne   r1, #3
-       bne             Lwordalignloop2
-
-       /* figure out how the src and dest are unaligned */
-       and             r3, r0, #3
-       cmp             r3, #2
-       blt             Lalign1_forward
-       beq             Lalign2_forward
-       bgt             Lalign3_forward
-
-Lalign1_forward:
-       /* the dest pointer is 1 byte off from src */
-       mov             r12, r2, lsr #2         /* number of words we should copy */
-       sub             r0, r0, #1
-
-       /* prime the copy */
-       ldrb    r4, [r0]                        /* load D[7:0] */
-
-Lalign1_forward_loop:
-       ldr             r3, [r1], #4            /* load S */
-       orr             r4, r4, r3, lsl #8      /* D[31:8] = S[24:0] */
-       str             r4, [r0], #4            /* save D */
-       mov             r4, r3, lsr #24         /* D[7:0] = S[31:25] */
-       subs    r12, r12, #1
-       bne             Lalign1_forward_loop
-
-       /* finish the copy off */
-       strb    r4, [r0], #1            /* save D[7:0] */
-
-       ands    r2, r2, #3
-       beq             Lexit
-       b               Lbytewise2
-
-Lalign2_forward:
-       /* the dest pointer is 2 bytes off from src */
-       mov             r12, r2, lsr #2         /* number of words we should copy */
-       sub             r0, r0, #2
-
-       /* prime the copy */
-       ldrh    r4, [r0]                        /* load D[15:0] */
-
-Lalign2_forward_loop:
-       ldr             r3, [r1], #4            /* load S */
-       orr             r4, r4, r3, lsl #16     /* D[31:16] = S[15:0] */
-       str             r4, [r0], #4            /* save D */
-       mov             r4, r3, lsr #16         /* D[15:0] = S[31:15] */
-       subs    r12, r12, #1
-       bne             Lalign2_forward_loop
-
-       /* finish the copy off */
-       strh    r4, [r0], #2            /* save D[15:0] */
-
-       ands    r2, r2, #3
-       beq             Lexit
-       b               Lbytewise2
-
-Lalign3_forward:
-       /* the dest pointer is 3 bytes off from src */
-       mov             r12, r2, lsr #2         /* number of words we should copy */
-       sub             r0, r0, #3
-
-       /* prime the copy */
-       ldr             r4, [r0]
-       and             r4, r4, #0x00ffffff     /* load D[24:0] */
-
-Lalign3_forward_loop:
-       ldr             r3, [r1], #4            /* load S */
-       orr             r4, r4, r3, lsl #24     /* D[31:25] = S[7:0] */
-       str             r4, [r0], #4            /* save D */
-       mov             r4, r3, lsr #8          /* D[24:0] = S[31:8] */
-       subs    r12, r12, #1
-       bne             Lalign3_forward_loop
-
-       /* finish the copy off */
-       strh    r4, [r0], #2            /* save D[15:0] */
-       mov             r4, r4, lsr #16
-       strb    r4, [r0], #1            /* save D[23:16] */
-
-       ands    r2, r2, #3
-       beq             Lexit
-       b               Lbytewise2
-
-Lexit:
-       ldmfd   sp!, {r0, r4, r5, r7, pc}
-
-#endif // defined __thumb2__ && defined __ARM_NEON__
-