]> git.saurik.com Git - apple/libc.git/blob - arm/string/bzero_CortexA9.s
Libc-825.24.tar.gz
[apple/libc.git] / arm / string / bzero_CortexA9.s
1 /*
2 * Copyright (c) 2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 *
23 * This file implements the following functions for the Cortex-A9 processor:
24 *
25 * void bzero(void * destination,
26 * size_t length);
27 *
28 * void __bzero(void * destination,
29 * size_t length);
30 *
31 * zeros out a buffer length bytes long, beginning at the address destination.
32 *
33 * void *memset(void * destination,
34 * int value,
35 * size_t n);
36 *
37 * writes value converted to an unsigned char to n successive bytes, beginning
38 * at destination.
39 */
40
41 #include <arm/arch.h>
42 #if defined _ARM_ARCH_7
43
44 /*****************************************************************************
45 * Macros *
46 *****************************************************************************/
47
48 #define A9_ENTRY(name) \
49 .align 2;\
50 .globl _ ## name ## $VARIANT$CortexA9;\
51 _ ## name ## $VARIANT$CortexA9:
52
53 #define ESTABLISH_FRAME \
54 push {r0,r4,r7,lr};\
55 add r7, sp, #8
56
57 #define CLEAR_FRAME_AND_RETURN \
58 pop {r0,r4,r7,pc}
59
60 #define ADDITIONAL_CALLEE_SAVE_REGISTERS {r5,r6,r8}
61
62 #define STORE_REGISTERS {r1,r3,r4,r5,r6,r8,r9,ip}
63
64 /*****************************************************************************
65 * entry points *
66 *****************************************************************************/
67
68 .text
69 .syntax unified
70 .code 32
71
72 A9_ENTRY(__bzero)
73 A9_ENTRY(bzero)
74 mov r2, r1
75 eor r1, r1
76
77 A9_ENTRY(memset)
78 // Early out if fewer than four bytes are to be set. Otherwise, store up to
79 // three bytes to align the destination pointer to a word boundary.
80 ESTABLISH_FRAME
81 and r1, #0xff
82 subs r2, #4
83 orr r1, r1, r1, lsl #8
84 blo L_lengthLessThanFour
85 orr r1, r1, r1, lsl #16
86 0: tst r0, #0x3
87 beq L_wordAligned
88 strb r1, [r0],#1
89 subs r2, #1
90 bhs 0b
91 L_lengthLessThanFour:
92 adds r2, #4
93 beq 1f
94 0: strb r1, [r0],#1
95 subs r2, #1
96 bne 0b
97 1: CLEAR_FRAME_AND_RETURN
98
99 L_wordAligned:
100 // Destination pointer has word alignment. Early out if fewer than 64 bytes
101 // are to be set. Otherwise, store up to 28 bytes to align the destination
102 // pointer to a cacheline boundary.
103 mov r3, r1
104 mov r4, r1
105 subs r2, #0x3c
106 mov r9, r1
107 blo L_lengthLessThanSixtyFour
108 0: tst r0, #0x1c
109 beq L_cachelineAligned
110 str r1, [r0],#4
111 subs r2, #4
112 bhs 0b
113 L_lengthLessThanSixtyFour:
114 tst r2, #0x30
115 beq 1f
116 0: stm r0!, {r1,r3,r4,r9}
117 sub r2, #0x10
118 tst r2, #0x30
119 bne 0b
120 1: tst r2, #0xf
121 beq 2f
122 lsls ip, r2, #29
123 stmcs r0!, {r1,r3}
124 strmi r1, [r0],#4
125 lsls ip, r2, #31
126 strhcs r1, [r0],#2
127 strbmi r1, [r0]
128 2: CLEAR_FRAME_AND_RETURN
129
130 L_cachelineAligned:
131 // Main unrolled loop; stores two complete cachelines per iteration.
132 push ADDITIONAL_CALLEE_SAVE_REGISTERS
133 mov r5, r1
134 mov r6, r1
135 mov r8, r1
136 mov ip, r1
137 .align 4
138 0: stm r0!, STORE_REGISTERS
139 subs r2, #0x40
140 stm r0!, STORE_REGISTERS
141 bhs 0b
142 pop ADDITIONAL_CALLEE_SAVE_REGISTERS
143 b L_lengthLessThanSixtyFour
144
145 #endif // defined _ARM_ARCH_7