2 * Copyright (c) 2011 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
23 * This file implements the following functions for the Swift micro-arch:
25 * void bzero(void * destination,
28 * void __bzero(void * destination,
31 * zeros out a buffer length bytes long, beginning at the address destination.
33 * void *memset(void * destination,
37 * writes value converted to an unsigned char to n successive bytes, beginning
42 #if defined _ARM_ARCH_7
46 .globl ___bzero$VARIANT$Swift
47 .globl _bzero$VARIANT$Swift
48 .globl _memset$VARIANT$Swift
52 ___bzero$VARIANT$Swift:
54 mov r2, r1 // Set value to zero and move length to the
55 eor r1, r1 // correct register to match the memset API.
56 _memset$VARIANT$Swift:
57 push {r7,lr} // Establish a frame, and make a copy of the
58 mov r7, sp // pointer to increment so that we can
59 mov ip, r0 // return the original pointer unmodified.
61 vdup.8 q0, r1 // Splat the low byte of value across q0.
62 subs r3, r2, #64 // If length < 64, jump to a dedicated
63 blo L_lengthLessThan64 // code path to handle small buffers.
65 vmov q1, q0 // Copy the splatted value to q1.
66 orr lr, r2, r0 // If the length is not a multiple of 16 or
67 ands lr, #0xf // the buffer is not 16-byte aligned, then
68 bne L_edgingNeeded // some edging is needed; branch.
70 0: subs r3, #64 // Write 64 bytes at a time to the 16-byte
71 vst1.8 {q0,q1}, [ip,:128]! // aligned buffer. Terminate this loop when
72 vst1.8 {q0,q1}, [ip,:128]! // 64 or fewer bytes remain to be written.
75 add ip, r3 // Backtrack the destination pointer by
76 vst1.8 {q0,q1}, [ip,:128]! // 64 - remaining bytes, and write 64 bytes
77 vst1.8 {q0,q1}, [ip,:128] // to that address. This takes us precisely
78 pop {r7,pc} // to the end of the buffer.
81 vst1.8 {q0}, [ip] // Write 16 bytes to the [possibly unaligned]
82 and lr, ip, #0xf // buffer, then advance the pointer to the
83 bic ip, #0xf // next aligned location, and adjust the
84 add r3, lr // length accordingly. Note that this means
85 add ip, #16 // that the first write in the loop may
86 subs r3, #16 // overlap with the write we just performed;
87 blo 1f // this is the fastest way to get alignment
90 0: subs r3, #64 // Write 64 bytes at a time to the 16-byte
91 vst1.8 {q0,q1}, [ip,:128]! // aligned buffer. Terminate this loop when
92 vst1.8 {q0,q1}, [ip,:128]! // 64 or fewer bytes remain to be written.
95 1: add ip, r3 // Backtrack the destination pointer by
96 vst1.8 {q0,q1}, [ip]! // 64 - remaining bytes, and write 64 bytes
97 vst1.8 {q0,q1}, [ip] // to that address. This takes us precisely
98 pop {r7,pc} // to the end of the buffer.
101 subs r3, r2, #8 // If the length is smaller than eight, jump
102 blo 1f // into a dedicated byte store loop.
104 0: subs r3, #8 // Write 8 bytes at a time to the destination
105 vst1.8 {d0}, [ip]! // buffer, terminating when eight or fewer
106 bhi 0b // bytes remain to be written.
108 add ip, r3 // Backtrack the destination pointer by
109 vst1.8 {d0}, [ip] // 8 - remaining bytes, and write 8 bytes
110 pop {r7,pc} // to that address, then return.
112 1: subs r2, #1 // Store one byte at a time to the destination
113 strbhs r1, [ip], #1 // buffer, until we exhaust the length.
117 #endif // defined _ARM_ARCH_7