]>
Commit | Line | Data |
---|---|---|
7b00c0c4 A |
1 | /* |
2 | * Copyright (c) 2009 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
12 | * | |
13 | * The Original Code and all software distributed under the License are | |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
20 | * | |
21 | * @APPLE_LICENSE_HEADER_END@ | |
22 | * | |
23 | * This file implements the following functions for the Cortex-A9 processor: | |
24 | * | |
25 | * void bzero(void * destination, | |
26 | * size_t length); | |
27 | * | |
28 | * void __bzero(void * destination, | |
29 | * size_t length); | |
30 | * | |
31 | * zeros out a buffer length bytes long, beginning at the address destination. | |
32 | * | |
33 | * void *memset(void * destination, | |
34 | * int value, | |
35 | * size_t n); | |
36 | * | |
37 | * writes value converted to an unsigned char to n successive bytes, beginning | |
38 | * at destination. | |
39 | */ | |
ad3c9f2a A |
40 | |
41 | #include <arm/arch.h> | |
42 | #if defined _ARM_ARCH_7 | |
43 | ||
44 | /***************************************************************************** | |
45 | * Macros * | |
46 | *****************************************************************************/ | |
47 | ||
48 | #define A9_ENTRY(name) \ | |
49 | .align 2;\ | |
50 | .globl _ ## name ## $VARIANT$CortexA9;\ | |
51 | _ ## name ## $VARIANT$CortexA9: | |
52 | ||
53 | #define ESTABLISH_FRAME \ | |
54 | push {r0,r4,r7,lr};\ | |
55 | add r7, sp, #8 | |
56 | ||
57 | #define CLEAR_FRAME_AND_RETURN \ | |
58 | pop {r0,r4,r7,pc} | |
59 | ||
60 | #define ADDITIONAL_CALLEE_SAVE_REGISTERS {r5,r6,r8} | |
61 | ||
62 | #define STORE_REGISTERS {r1,r3,r4,r5,r6,r8,r9,ip} | |
63 | ||
64 | /***************************************************************************** | |
65 | * entry points * | |
66 | *****************************************************************************/ | |
67 | ||
68 | .text | |
69 | .syntax unified | |
70 | .code 32 | |
71 | ||
72 | A9_ENTRY(__bzero) | |
73 | A9_ENTRY(bzero) | |
74 | mov r2, r1 | |
75 | eor r1, r1 | |
76 | ||
77 | A9_ENTRY(memset) | |
78 | // Early out if fewer than four bytes are to be set. Otherwise, store up to | |
79 | // three bytes to align the destination pointer to a word boundary. | |
80 | ESTABLISH_FRAME | |
81 | and r1, #0xff | |
82 | subs r2, #4 | |
83 | orr r1, r1, r1, lsl #8 | |
84 | blo L_lengthLessThanFour | |
85 | orr r1, r1, r1, lsl #16 | |
86 | 0: tst r0, #0x3 | |
87 | beq L_wordAligned | |
88 | strb r1, [r0],#1 | |
89 | subs r2, #1 | |
90 | bhs 0b | |
91 | L_lengthLessThanFour: | |
92 | adds r2, #4 | |
93 | beq 1f | |
94 | 0: strb r1, [r0],#1 | |
95 | subs r2, #1 | |
96 | bne 0b | |
97 | 1: CLEAR_FRAME_AND_RETURN | |
98 | ||
99 | L_wordAligned: | |
100 | // Destination pointer has word alignment. Early out if fewer than 64 bytes | |
101 | // are to be set. Otherwise, store up to 28 bytes to align the destination | |
102 | // pointer to a cacheline boundary. | |
103 | mov r3, r1 | |
104 | mov r4, r1 | |
105 | subs r2, #0x3c | |
106 | mov r9, r1 | |
107 | blo L_lengthLessThanSixtyFour | |
108 | 0: tst r0, #0x1c | |
109 | beq L_cachelineAligned | |
110 | str r1, [r0],#4 | |
111 | subs r2, #4 | |
112 | bhs 0b | |
113 | L_lengthLessThanSixtyFour: | |
114 | tst r2, #0x30 | |
115 | beq 1f | |
116 | 0: stm r0!, {r1,r3,r4,r9} | |
117 | sub r2, #0x10 | |
118 | tst r2, #0x30 | |
119 | bne 0b | |
120 | 1: tst r2, #0xf | |
121 | beq 2f | |
122 | lsls ip, r2, #29 | |
123 | stmcs r0!, {r1,r3} | |
124 | strmi r1, [r0],#4 | |
125 | lsls ip, r2, #31 | |
126 | strhcs r1, [r0],#2 | |
127 | strbmi r1, [r0] | |
128 | 2: CLEAR_FRAME_AND_RETURN | |
129 | ||
130 | L_cachelineAligned: | |
131 | // Main unrolled loop; stores two complete cachelines per iteration. | |
132 | push ADDITIONAL_CALLEE_SAVE_REGISTERS | |
133 | mov r5, r1 | |
134 | mov r6, r1 | |
135 | mov r8, r1 | |
136 | mov ip, r1 | |
137 | .align 4 | |
138 | 0: stm r0!, STORE_REGISTERS | |
139 | subs r2, #0x40 | |
140 | stm r0!, STORE_REGISTERS | |
141 | bhs 0b | |
142 | pop ADDITIONAL_CALLEE_SAVE_REGISTERS | |
143 | b L_lengthLessThanSixtyFour | |
144 | ||
145 | #endif // defined _ARM_ARCH_7 |