X-Git-Url: https://git.saurik.com/apple/libc.git/blobdiff_plain/a28bf75d63c6a64e4c3b417c6052e45f42c6cedd..6465356a983ac139f81d3b7913cdb548477c346c:/arm/string/memset_pattern.s diff --git a/arm/string/memset_pattern.s b/arm/string/memset_pattern.s deleted file mode 100755 index b569484..0000000 --- a/arm/string/memset_pattern.s +++ /dev/null @@ -1,336 +0,0 @@ -/* - * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ - -#include - -// Only built for armv6 and higher. -#if defined _ARM_ARCH_6 - -// If we're building for armv7, and not for DYLD, then we have a symbol -// resolver so we need to rename these implementations. -#if defined _ARM_ARCH_7 && !defined VARIANT_DYLD -#define _memset_pattern4 _memset_pattern4$VARIANT$Generic -#define _memset_pattern8 _memset_pattern8$VARIANT$Generic -#define _memset_pattern16 _memset_pattern16$VARIANT$Generic -#endif - -#include - -/* - * This file contains the following functions: - * - * void memset_pattern4(void *b, const void *c4, size_t len) - * void memset_pattern8(void *b, const void *c8, size_t len) - * void memset_pattern16(void *b, const void *c16, size_t len) - * - * The memset() is implemented in the bzero.s file. - * - * This is a reasonably well optimized version of memset_pattern* routines - * implemented for the ARM9 and ARM11 processors using the ARMv6 instruction - * set. These routines use the ARM's core registers. - * - * The algorithm is to align the destination pointer on a 16 byte boundary - * and then blast data 64 bytes at a time, in two stores of 32 bytes per loop. - * - */ - .text - .align 2 - .syntax unified - -/*----------------------------------------------------------------------------*/ -/* void memset_pattern4(void *ptr, const void *pattern4, size_t len); */ -/* */ -/* r0 << destination pointer */ -/* r1 << pointer to 4-byte pattern */ -/* r2 << 'len' (length of destination buffer in bytes) */ -/*----------------------------------------------------------------------------*/ - .globl _memset_pattern4 -_memset_pattern4: - cmp r2, #0 /* check if len is zero */ - bxeq lr /* return if length is zero */ - - /* We need some registers, so save volatiles on stack */ - /* Avoid r7 (frame pointer) and r9 (thread register) */ - stmfd sp!, {r4-r7, lr} - add r7, sp, #12 /* establish frame */ - stmfd sp!, {r8, r10-r11} - - /* copy destination base pointer r0 to r12 and leave r0 alone */ - /* so that we return original pointer back to the caller */ - mov r12, r0 - - /* Check if 'len' is long enough to bother alignment of destination */ - /* pointer */ - cmp r2, #32 /* long enough to bother aligning? */ - movlt r3, #4 /* move pattern length into r3 */ - movlt r10, #4 /* pattern index */ - movlt r11, r1 /* move pattern pointer into r11 */ - blt L_Short /* no */ - - /* move 'len' into r1, get 4-byte pattern in r2 */ - mov r6, r2 /* temporarily move 'len' in to r6 */ - ldr r2, [r1]/* load 4-byte pattern into r2 */ - mov r1, r6 /* move 'len' from r6 to r1 */ - - mov r3, r2 /* copy 4-byte pattern into r3, r4 and r5 registers */ - mov r4, r2 - mov r5, r2 - -L_NotShort: - - /* Check for 16 or 32 byte aligned destination pointer */ - tst r12, #0x1F /* check for 32 byte aligned */ - beq L_Aligned - tst r12, #0xF /* check for 16 byte aligned */ - beq L_16ByteAligned - b L_Unaligned /* yes */ - -L_Bytewise: - ldrb r4, [r11], #1 - strb r4, [r12], #1 - subs r10, #1 - moveq r10, r3 - moveq r11, r1 - sub r2, #1 - -L_Short: - cmp r2, #0 /* more bytes left? */ - bne L_Bytewise - ldm sp!, {r8, r10-r11} /* restores registers from stack */ - ldm sp!, {r4-r7, pc} /* restore & return from subroutine */ - -/* 'len' is long enough to justify aligning the destination pointer */ -/* */ -/* By the time we reach here, data is stored in registers as follows: */ -/* r1 << 'len' (length of destination buffer in bytes) */ -/* r2-r5 << pattern; either 4x4byte OR 2x8byte OR 1x16-byte */ -/* r12 << destination pointer copy (scratch register) */ -/* r0 << destination pointer original */ -/* */ -/* Use r11 as scratch register to store the #bytes offset to 16-byte align */ -/* */ -/* Unaligned on 32-byte boundary, store 1-15 bytes until 16-byte aligned */ -/* As we store these bytes, we rotate the pattern stored in r2-r5 to reflect */ -/* the alignment. */ - -L_Unaligned: - mov r11, r12, lsl #28 - rsb r11, r11, #0 - msr cpsr_f, r11 /* Bits[31:28] of cpsr now contain #bytes to align*/ - -L_Store15BytesAndRotatePattern: - strbvs r2, [r12], #1 /* v is set, unaligned in the 1s column */ - andvs r6, r2, #0xFF /* Rotate pattern right in r2-r5 by 1-byte */ - andvs r8, r3, #0xFF /* Consider register r2-r5 and a contiguous */ - andvs r10, r4, #0xFF /* 16-byte register with r2 containing LSB */ - andvs r11, r5, #0xFF /* and r5 containing MSB */ - lsrvs r2, r2, #8 - lsrvs r3, r3, #8 - lsrvs r4, r4, #8 - lsrvs r5, r5, #8 - orrvs r2, r2, r8, lsl #24 - orrvs r3, r3, r10, lsl #24 - orrvs r4, r4, r11, lsl #24 - orrvs r5, r5, r6, lsl #24 - - strhcs r2, [r12], #2 /* c is set, unaligned in the 2s column */ - movcs r6, r2, lsl #16 /* Rotate pattern right in r2-r5 by 2-bytes */ - movcs r8, r3, lsl #16 - movcs r10, r4, lsl #16 - movcs r11, r5, lsl #16 - lsrcs r2, r2, #16 - lsrcs r3, r3, #16 - lsrcs r4, r4, #16 - lsrcs r5, r5, #16 - orrcs r2, r2, r8 - orrcs r3, r3, r10 - orrcs r4, r4, r11 - orrcs r5, r5, r6 - - streq r2, [r12], #4 /* z is set, unaligned in the 4s column */ - moveq r6, r2 /* Rotate pattern right in r2-r5 by 4-bytes */ - moveq r2, r3 - moveq r3, r4 - moveq r4, r5 - moveq r5, r6 - - stmmi r12!, {r2-r3} /* n is set, unaligned in the 8s column */ - movmi r6, r2 /* Rotate pattern right in r2-r5 by 4-bytes */ - movmi r8, r3 - movmi r2, r4 - movmi r3, r5 - movmi r4, r6 - movmi r5, r8 - - mrs r11, cpsr /*copy cpsr in to r11 */ - subs r1, r1, r11, lsr #28 - ldmeq sp!, {r8, r10-r11} /* restores registers from stack */ - ldmeq sp!, {r4-r7, pc} /* restore & return from subroutine */ - -/* By the time we reach here, we are 16-byte aligned and r2-r5 contains */ -/* rotated pattern. Now lets make sure we are 32-byte aligned. */ -L_16ByteAligned: - tst r12, #(1 << 4) - stmne r12!, {r2-r5} - subsne r1, r1, #16 - -/* By the time we reach here, data is stored in registers as follows: */ -/* r1 << 'len' (remaining length of destination buffer in bytes) */ -/* r2-r5 << rotated pattern; either 4x4byte OR 2x8byte OR 1x16-byte */ -/* r12 << aligned destination pointer copy (scratch register) */ -L_Aligned: - cmp r1, #64 - blt L_AlignedLessThan64 - -/* Copy pattern in four more registers so that we can do 64 byte transfers */ - mov r6, r2 - mov r8, r3 - mov r10, r4 - mov r11, r5 - -/* At this point, we are 16-byte aligned and 'len' is greater than 64 bytes */ -/* Lets transfer 64 bytes at a time until len becomes less than 64 bytes */ - sub r1, r1, #64 /* pre-subtract to avoid extra compare in loop */ -L_Loop64: - stm r12!, {r2-r6, r8, r10-r11} - subs r1, r1, #64 - stm r12!, {r2-r6, r8, r10-r11} - bge L_Loop64 - - /* return if 'len' is zero */ - adds r1, r1, #64 /* readjust length; previously subtracted extra 64*/ - ldmeq sp!, {r8, r10-r11} /* restores registers from stack */ - ldmeq sp!, {r4-r7, pc} /* restore & return from subroutine */ - -L_AlignedLessThan64: - /* do we have 16 or more bytes left */ - cmp r1, #16 - stmge r12!, {r2-r5} - subsge r1, r1, #16 - bgt L_AlignedLessThan64 - ldmeq sp!, {r8, r10-r11} /* restores registers from stack */ - ldmeq sp!, {r4-r7, pc} /* restore & return from subroutine */ - -L_AlignedLessThan16: - /* store last up-to 15 bytes */ - /* move the remaining len bits [3:0] to the flags area of cpsr */ - mov r1, r1, lsl #28 - msr cpsr_f, r1 - - stmmi r12!, {r2-r3} /* n is set, store 8 bytes */ - movmi r2, r4 /* shift vector down 8 bytes */ - movmi r3, r5 - - streq r2, [r12], #4 /* z is set, store 4 bytes */ - moveq r2, r3 /* shift vector down 4 bytes */ - - strhcs r2, [r12], #2 /* c is set, store 2 bytes */ - lsrcs r2, #16 /* shift register right 2 bytes */ - - strbvs r2, [r12], #1 /* v is set, store 1 byte */ - ldm sp!, {r8, r10-r11} /* restores registers from stack */ - ldm sp!, {r4-r7, pc} /* restore & return from subroutine */ - -/*----------------------------------------------------------------------------*/ -/* void memset_pattern8(void *ptr, const void *pattern8, size_t len); */ -/* */ -/* r0 << destination pointer */ -/* r1 << pointer to 8-byte pattern */ -/* r2 << 'len' (length of destination buffer in bytes) */ -/*----------------------------------------------------------------------------*/ - .globl _memset_pattern8 -_memset_pattern8: - cmp r2, #0 /* check if len is zero */ - bxeq lr /* return if length is zero */ - - /* We need some registers, so save volatiles on stack */ - /* Avoid r7 (frame pointer) and r9 (thread register) */ - stmfd sp!, {r4-r7, lr} - add r7, sp, #12 /* establish frame */ - stmfd sp!, {r8, r10-r11} - - /* copy destination base pointer r0 to r12 and leave r0 alone */ - /* so that we return original pointer back to the caller */ - mov r12, r0 - - /* Check if 'len' is long enough to bother alignment of destination */ - /* pointer */ - cmp r2, #32 /* long enough to bother aligning? */ - movlt r3, #8 /* move pattern length into r3 */ - movlt r10, #8 /* pattern index */ - movlt r11, r1 /* move pattern pointer into r11 */ - blt L_Short /* no */ - - /* move 'len' into r1, get 8-byte pattern in r2-r3 */ - mov r6, r2 /* temporarily move 'len' in to r6 */ - ldr r2, [r1], #4 /* load 8-byte pattern into r2-r3 */ - ldr r3, [r1], #4 - mov r1, r6 /* move 'len' from r6 to r1 */ - - mov r4, r2 /* copy 8-byte pattern into r4-r5 registers */ - mov r5, r3 - b L_NotShort /* yes */ - - -/*----------------------------------------------------------------------------*/ -/* void memset_pattern16(void *ptr, const void *pattern16, size_t len); */ -/* */ -/* r0 << destination pointer */ -/* r1 << pointer to 16-byte pattern */ -/* r2 << 'len' (length of destination buffer in bytes) */ -/*----------------------------------------------------------------------------*/ - .globl _memset_pattern16 -_memset_pattern16: - cmp r2, #0 /* check if len is zero */ - bxeq lr /* return if length is zero */ - - /* We need some registers, so save volatiles on stack */ - /* Avoid r7 (frame pointer) and r9 (thread register) */ - stmfd sp!, {r4-r7, lr} - add r7, sp, #12 /* establish frame */ - stmfd sp!, {r8, r10-r11} - - /* copy destination base pointer r0 to r12 and leave r0 alone */ - /* so that we return original pointer back to the caller */ - mov r12, r0 - - /* Check if 'len' is long enough to bother alignment of destination */ - /* pointer */ - cmp r2, #32 /* long enough to bother aligning? */ - movlt r3, #16 /* move pattern length into r3 */ - movlt r10, #16 /* pattern index */ - movlt r11, r1 /* move pattern pointer into r11 */ - blt L_Short /* no */ - - /* move 'len' into r1, get 16-byte pattern in r2-r5 */ - mov r6, r2 /* temporarily move 'len' in to r6 */ - ldr r2, [r1], #4 /* load 16-byte pattern into r2-r5 */ - ldr r3, [r1], #4 - ldr r4, [r1], #4 - ldr r5, [r1], #4 - mov r1, r6 /* move 'len' from r6 to r1 */ - - b L_NotShort /* yes */ - - -#endif /* _ARM_ARCH_6 */