]>
Commit | Line | Data |
---|---|---|
55e303ae A |
1 | /* |
2 | * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. | |
3 | * | |
8f6c56a5 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
55e303ae | 5 | * |
8f6c56a5 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
8ad349bb | 24 | * limitations under the License. |
8f6c56a5 A |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
55e303ae A |
27 | */ |
28 | /* ======================================= | |
29 | * BCOPY, MEMCPY, and MEMMOVE for Mac OS X | |
30 | * ======================================= | |
31 | * | |
32 | * Version of 2/20/2003, for a hypothetic 64-bit processor without Altivec. | |
33 | * This version might be used bringing up new processors, with known | |
34 | * Altivec bugs that need to be worked around. It is not particularly well | |
35 | * optimized. | |
36 | * | |
91447636 A |
37 | * For 64-bit processors with a 128-byte cache line, running in either |
38 | * 32- or 64-bit mode. This is written for 32-bit execution, the kernel | |
39 | * will translate to 64-bit code when it compiles the 64-bit commpage. | |
40 | * | |
55e303ae A |
41 | * Register usage. Note we use R2, so this code will not run in a PEF/CFM |
42 | * environment. | |
43 | * r0 = "w7" or temp | |
44 | * r2 = "w8" | |
45 | * r3 = not used, as memcpy and memmove return 1st parameter as a value | |
46 | * r4 = source ptr ("rs") | |
47 | * r5 = count of bytes to move ("rc") | |
48 | * r6 = "w1" | |
49 | * r7 = "w2" | |
50 | * r8 = "w3" | |
51 | * r9 = "w4" | |
52 | * r10 = "w5" | |
53 | * r11 = "w6" | |
54 | * r12 = destination ptr ("rd") | |
55 | */ | |
56 | #define rs r4 | |
57 | #define rd r12 | |
58 | #define rc r5 | |
59 | #define rv r2 | |
60 | ||
61 | #define w1 r6 | |
62 | #define w2 r7 | |
63 | #define w3 r8 | |
64 | #define w4 r9 | |
65 | #define w5 r10 | |
66 | #define w6 r11 | |
67 | #define w7 r0 | |
68 | #define w8 r2 | |
69 | ||
70 | #define ASSEMBLER | |
71 | #include <sys/appleapiopts.h> | |
72 | #include <ppc/asm.h> | |
73 | #include <machine/cpu_capabilities.h> | |
74 | #include <machine/commpage.h> | |
75 | ||
76 | .text | |
55e303ae A |
77 | |
78 | #define kLong 64 // too long for inline loopless code | |
79 | ||
80 | ||
81 | // Main entry points. | |
82 | ||
83 | .align 5 | |
84 | bcopy_64: // void bcopy(const void *src, void *dst, size_t len) | |
85 | cmplwi rc,kLong // short or long? | |
86 | sub w1,r4,r3 // must move in reverse if (rd-rs)<rc | |
87 | cmplw cr1,w1,rc // set cr1 blt iff we must move reverse | |
88 | mr rd,r4 // start to move registers to canonic spot | |
89 | mr rs,r3 | |
90 | blt LShort // handle short operands | |
91 | dcbt 0,r3 // touch in destination | |
92 | b LLong // join medium/long operand code | |
93 | ||
94 | // NB: memmove() must be 8 words past bcopy(), to agree with comm page addresses. | |
95 | ||
96 | .align 5 | |
97 | Lmemcpy_g4: // void* memcpy(void *dst, void *src, size_t len) | |
98 | Lmemmove_g4: // void* memmove(void *dst, const void *src, size_t len) | |
99 | cmplwi rc,kLong // short or long? | |
100 | sub w1,r3,r4 // must move in reverse if (rd-rs)<rc | |
101 | dcbt 0,r4 // touch in the first line of source | |
102 | cmplw cr1,w1,rc // set cr1 blt iff we must move reverse | |
103 | mr rd,r3 // must leave r3 alone, it is return value for memcpy etc | |
104 | bge LLong // handle medium or long operands | |
105 | ||
106 | // Handle short operands. | |
107 | ||
108 | LShort: | |
109 | mtcrf 0x02,rc // put length bits 26-27 in cr6 (faster one cr at a time) | |
110 | mtcrf 0x01,rc // put length bits 28-31 in cr7 | |
111 | blt cr1,LShortReverse | |
112 | ||
113 | // Forward short operands. This is the most frequent case, so it is inline. | |
114 | ||
115 | LShort64: // enter to xfer last 64 bytes | |
116 | bf 26,0f // 64-byte chunk to xfer? | |
117 | ld w1,0(rs) | |
118 | ld w2,8(rs) | |
119 | ld w3,16(rs) | |
120 | ld w4,24(rs) | |
121 | addi rs,rs,32 | |
122 | std w1,0(rd) | |
123 | std w2,8(rd) | |
124 | std w3,16(rd) | |
125 | std w4,24(rd) | |
126 | addi rd,rd,32 | |
127 | 0: | |
128 | bf 27,1f // quadword to move? | |
129 | ld w1,0(rs) | |
130 | ld w2,8(rs) | |
131 | addi rs,rs,16 | |
132 | std w1,0(rd) | |
133 | std w2,8(rd) | |
134 | addi rd,rd,16 | |
135 | 1: | |
136 | bf 28,2f // doubleword? | |
137 | ld w1,0(rs) | |
138 | addi rs,rs,8 | |
139 | std w1,0(rd) | |
140 | addi rd,rd,8 | |
141 | 2: | |
142 | bf 29,3f // word? | |
143 | lwz w1,0(rs) | |
144 | addi rs,rs,4 | |
145 | stw w1,0(rd) | |
146 | addi rd,rd,4 | |
147 | 3: | |
148 | bf 30,4f // halfword to move? | |
149 | lhz w1,0(rs) | |
150 | addi rs,rs,2 | |
151 | sth w1,0(rd) | |
152 | addi rd,rd,2 | |
153 | 4: | |
154 | bflr 31 // skip if no odd byte | |
155 | lbz w1,0(rs) | |
156 | stb w1,0(rd) | |
157 | blr | |
158 | ||
159 | ||
160 | // Handle short reverse operands. | |
161 | // cr6 = bits 26-27 of length | |
162 | // cr7 = bits 28-31 of length | |
163 | ||
164 | LShortReverse: | |
165 | add rs,rs,rc // adjust ptrs for reverse move | |
166 | add rd,rd,rc | |
167 | LShortReverse64: // enter to xfer last 64 bytes | |
168 | bf 26,0f // 64-byte chunk to xfer? | |
169 | ld w1,-8(rs) | |
170 | ld w2,-16(rs) | |
171 | ld w3,-24(rs) | |
172 | ldu w4,-32(rs) | |
173 | std w1,-8(rd) | |
174 | std w2,-16(rd) | |
175 | std w3,-24(rd) | |
176 | stdu w4,-32(rd) | |
177 | 0: | |
178 | bf 27,1f // quadword to move? | |
179 | ld w1,-8(rs) | |
180 | ldu w2,-16(rs) | |
181 | std w1,-8(rd) | |
182 | stdu w2,-16(rd) | |
183 | 1: | |
184 | bf 28,2f // doubleword? | |
185 | ldu w1,-8(rs) | |
186 | stdu w1,-8(rd) | |
187 | 2: | |
188 | bf 29,3f // word? | |
189 | lwzu w1,-4(rs) | |
190 | stwu w1,-4(rd) | |
191 | 3: | |
192 | bf 30,4f // halfword to move? | |
193 | lhzu w1,-2(rs) | |
194 | sthu w1,-2(rd) | |
195 | 4: | |
196 | bflr 31 // done if no odd byte | |
197 | lbz w1,-1(rs) // no update | |
198 | stb w1,-1(rd) | |
199 | blr | |
200 | ||
201 | ||
202 | // Long operands. | |
203 | // cr1 = blt iff we must move reverse | |
204 | ||
205 | .align 4 | |
206 | LLong: | |
207 | dcbtst 0,rd // touch in destination | |
208 | neg w3,rd // start to compute #bytes to align destination | |
209 | andi. w6,w3,7 // w6 <- #bytes to 8-byte align destination | |
210 | blt cr1,LLongReverse // handle reverse moves | |
211 | mtctr w6 // set up for loop to align destination | |
212 | sub rc,rc,w6 // adjust count | |
213 | beq LAligned // destination already 8-byte aligned | |
214 | 1: | |
215 | lbz w1,0(rs) | |
216 | addi rs,rs,1 | |
217 | stb w1,0(rd) | |
218 | addi rd,rd,1 | |
219 | bdnz 1b | |
220 | ||
221 | // Destination is 8-byte aligned. | |
222 | ||
223 | LAligned: | |
224 | srwi. w2,rc,6 // w2 <- count of 64-byte chunks | |
225 | mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time) | |
226 | mtcrf 0x01,rc // put length bits 28-31 in cr7 | |
227 | beq LShort64 // no 64-byte chunks | |
228 | mtctr w2 | |
229 | b 1f | |
230 | ||
231 | // Loop moving 64-byte chunks. | |
232 | ||
233 | .align 5 | |
234 | 1: | |
235 | ld w1,0(rs) | |
236 | ld w2,8(rs) | |
237 | ld w3,16(rs) | |
238 | ld w4,24(rs) | |
239 | ld w5,32(rs) | |
240 | ld w6,40(rs) | |
241 | ld w7,48(rs) | |
242 | ld w8,56(rs) | |
243 | addi rs,rs,64 | |
244 | std w1,0(rd) | |
245 | std w2,8(rd) | |
246 | std w3,16(rd) | |
247 | std w4,24(rd) | |
248 | std w5,32(rd) | |
249 | std w6,40(rd) | |
250 | std w7,48(rd) | |
251 | std w8,56(rd) | |
252 | addi rd,rd,64 | |
253 | bdnz 1b | |
254 | ||
255 | b LShort64 | |
256 | ||
257 | ||
258 | // Handle reverse moves. | |
259 | ||
260 | LLongReverse: | |
261 | add rd,rd,rc // point to end of operands | |
262 | add rs,rs,rc | |
263 | andi. r0,rd,7 // is destination 8-byte aligned? | |
264 | sub rc,rc,r0 // adjust count | |
265 | mtctr r0 // set up for byte loop | |
266 | beq LRevAligned // already aligned | |
267 | ||
268 | 1: | |
269 | lbzu w1,-1(rs) | |
270 | stbu w1,-1(rd) | |
271 | bdnz 1b | |
272 | ||
273 | // Destination is 8-byte aligned. | |
274 | ||
275 | LRevAligned: | |
276 | srwi. w2,rc,6 // w2 <- count of 64-byte chunks | |
277 | mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time) | |
278 | mtcrf 0x01,rc // put length bits 28-31 in cr7 | |
279 | beq LShortReverse64 // no 64-byte chunks | |
280 | mtctr w2 | |
281 | b 1f | |
282 | ||
283 | // Loop over 64-byte chunks (reverse). | |
284 | ||
285 | .align 5 | |
286 | 1: | |
287 | ld w1,-8(rs) | |
288 | ld w2,-16(rs) | |
289 | ld w3,-24(rs) | |
290 | ld w4,-32(rs) | |
291 | ld w5,-40(rs) | |
292 | ld w6,-48(rs) | |
293 | ld w7,-56(rs) | |
294 | ldu w8,-64(rs) | |
295 | std w1,-8(rd) | |
296 | std w2,-16(rd) | |
297 | std w3,-24(rd) | |
298 | std w4,-32(rd) | |
299 | std w5,-40(rd) | |
300 | std w6,-48(rd) | |
301 | std w7,-56(rd) | |
302 | stdu w8,-64(rd) | |
303 | bdnz 1b | |
304 | ||
305 | b LShortReverse64 | |
306 | ||
91447636 | 307 | COMMPAGE_DESCRIPTOR(bcopy_64,_COMM_PAGE_BCOPY,k64Bit,kHasAltivec,kCommPageBoth+kPort32to64) |