]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | /* ======================================= | |
29 | * BCOPY, MEMCPY, and MEMMOVE for Mac OS X | |
30 | * ======================================= | |
31 | * | |
32 | * Version of 2/20/2003, tuned for G3. | |
33 | * | |
34 | * Register usage. Note we use R2, so this code will not run in a PEF/CFM | |
35 | * environment. | |
36 | * | |
37 | * r0 = "w7" or temp | |
38 | * r2 = "w8" | |
39 | * r3 = not used, as memcpy and memmove return 1st parameter as a value | |
40 | * r4 = source ptr ("rs") | |
41 | * r5 = count of bytes to move ("rc") | |
42 | * r6 = "w1" | |
43 | * r7 = "w2" | |
44 | * r8 = "w3" | |
45 | * r9 = "w4" | |
46 | * r10 = "w5" | |
47 | * r11 = "w6" | |
48 | * r12 = destination ptr ("rd") | |
49 | * f0-f3 = used for moving 8-byte aligned data | |
50 | */ | |
51 | #define rs r4 // NB: we depend on rs==r4 in "lswx" instructions | |
52 | #define rd r12 | |
53 | #define rc r5 | |
54 | ||
55 | #define w1 r6 | |
56 | #define w2 r7 | |
57 | #define w3 r8 | |
58 | #define w4 r9 | |
59 | #define w5 r10 | |
60 | #define w6 r11 | |
61 | #define w7 r0 | |
62 | #define w8 r2 | |
63 | ||
64 | #include <sys/appleapiopts.h> | |
65 | #include <ppc/asm.h> | |
66 | #include <machine/cpu_capabilities.h> | |
67 | #include <machine/commpage.h> | |
68 | ||
69 | .text | |
70 | ||
71 | ||
72 | #define kLong 33 // too long for string ops | |
73 | ||
74 | ||
75 | // Main entry points. | |
76 | ||
77 | .align 5 | |
78 | bcopy_g3: // void bcopy(const void *src, void *dst, size_t len) | |
79 | cmplwi rc,kLong // length > 32 bytes? | |
80 | sub w1,r4,r3 // must move in reverse if (rd-rs)<rc | |
81 | mr rd,r4 // start to move source & dest to canonic spot | |
82 | bge LLong0 // skip if long operand | |
83 | mtxer rc // set length for string ops | |
84 | lswx r5,0,r3 // load bytes into r5-r12 | |
85 | stswx r5,0,r4 // store them | |
86 | blr | |
87 | ||
88 | // NB: memcpy() and memmove() must follow bcopy() by 32 bytes, for comm page. | |
89 | ||
90 | .align 5 | |
91 | Lmemcpy_g3: // void* memcpy(void *dst, void *src, size_t len) | |
92 | Lmemmove_g3: // void* memmove(void *dst, const void *src, size_t len) | |
93 | cmplwi rc,kLong // length > 32 bytes? | |
94 | sub w1,r3,rs // must move in reverse if (rd-rs)<rc | |
95 | mr rd,r3 // must leave r3 alone, it is return value for memcpy etc | |
96 | bge LLong1 // longer than 32 bytes | |
97 | mtxer rc // set length for string ops | |
98 | lswx r5,0,r4 // load bytes into r5-r12 | |
99 | stswx r5,0,r3 // store them | |
100 | blr | |
101 | ||
102 | // Long operands (more than 32 bytes.) | |
103 | // w1 = (rd-rs), used to check for alignment | |
104 | ||
105 | LLong0: // enter from bcopy() | |
106 | mr rs,r3 // must leave r3 alone (it is return value for memcpy) | |
107 | LLong1: // enter from memcpy() and memmove() | |
108 | cmplw cr1,w1,rc // set cr1 blt iff we must move reverse | |
109 | rlwinm r0,w1,0,0x3 // are operands relatively word-aligned? | |
110 | neg w2,rd // prepare to align destination | |
111 | cmpwi cr5,r0,0 // set cr5 beq if relatively word aligned | |
112 | blt cr1,LLongReverse // handle reverse move | |
113 | andi. w4,w2,3 // w4 <- #bytes to word align destination | |
114 | beq cr5,LLongFloat // relatively aligned so use FPRs | |
115 | sub rc,rc,w4 // adjust count for alignment | |
116 | srwi r0,rc,5 // get #chunks to xfer (>=1) | |
117 | rlwinm rc,rc,0,0x1F // mask down to leftover bytes | |
118 | mtctr r0 // set up loop count | |
119 | beq 1f // dest already word aligned | |
120 | ||
121 | // Word align the destination. | |
122 | ||
123 | mtxer w4 // byte count to xer | |
124 | cmpwi r0,0 // any chunks to xfer? | |
125 | lswx w1,0,rs // move w4 bytes to align dest | |
126 | add rs,rs,w4 | |
127 | stswx w1,0,rd | |
128 | add rd,rd,w4 | |
129 | beq- 2f // pathologic case, no chunks to xfer | |
130 | ||
131 | // Forward, unaligned loop. | |
132 | ||
133 | 1: | |
134 | lwz w1,0(rs) | |
135 | lwz w2,4(rs) | |
136 | lwz w3,8(rs) | |
137 | lwz w4,12(rs) | |
138 | lwz w5,16(rs) | |
139 | lwz w6,20(rs) | |
140 | lwz w7,24(rs) | |
141 | lwz w8,28(rs) | |
142 | addi rs,rs,32 | |
143 | stw w1,0(rd) | |
144 | stw w2,4(rd) | |
145 | stw w3,8(rd) | |
146 | stw w4,12(rd) | |
147 | stw w5,16(rd) | |
148 | stw w6,20(rd) | |
149 | stw w7,24(rd) | |
150 | stw w8,28(rd) | |
151 | addi rd,rd,32 | |
152 | bdnz 1b | |
153 | 2: // rc = remaining bytes (0-31) | |
154 | mtxer rc // set up count for string ops | |
155 | mr r0,rd // move dest ptr out of the way | |
156 | lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4) | |
157 | stswx r5,0,r0 // store them | |
158 | blr | |
159 | ||
160 | ||
161 | ||
162 | // Forward, aligned loop. We use FPRs. | |
163 | ||
164 | LLongFloat: | |
165 | andi. w4,w2,7 // W4 <- #bytes to doubleword-align destination | |
166 | sub rc,rc,w4 // adjust count for alignment | |
167 | srwi r0,rc,5 // number of 32-byte chunks to xfer | |
168 | rlwinm rc,rc,0,0x1F // mask down to leftover bytes | |
169 | mtctr r0 // set up loop count | |
170 | beq 1f // dest already doubleword aligned | |
171 | ||
172 | // Doubleword align the destination. | |
173 | ||
174 | mtxer w4 // byte count to xer | |
175 | cmpwi r0,0 // any chunks to xfer? | |
176 | lswx w1,0,rs // move w4 bytes to align dest | |
177 | add rs,rs,w4 | |
178 | stswx w1,0,rd | |
179 | add rd,rd,w4 | |
180 | beq- 2f // pathologic case, no chunks to xfer | |
181 | 1: // loop over 32-byte chunks | |
182 | lfd f0,0(rs) | |
183 | lfd f1,8(rs) | |
184 | lfd f2,16(rs) | |
185 | lfd f3,24(rs) | |
186 | addi rs,rs,32 | |
187 | stfd f0,0(rd) | |
188 | stfd f1,8(rd) | |
189 | stfd f2,16(rd) | |
190 | stfd f3,24(rd) | |
191 | addi rd,rd,32 | |
192 | bdnz 1b | |
193 | 2: // rc = remaining bytes (0-31) | |
194 | mtxer rc // set up count for string ops | |
195 | mr r0,rd // move dest ptr out of the way | |
196 | lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4) | |
197 | stswx r5,0,r0 // store them | |
198 | blr | |
199 | ||
200 | ||
201 | // Long, reverse moves. | |
202 | // cr5 = beq if relatively word aligned | |
203 | ||
204 | LLongReverse: | |
205 | add rd,rd,rc // point to end of operands + 1 | |
206 | add rs,rs,rc | |
207 | beq cr5,LReverseFloat // aligned operands so can use FPRs | |
208 | srwi r0,rc,5 // get chunk count | |
209 | rlwinm rc,rc,0,0x1F // mask down to leftover bytes | |
210 | mtctr r0 // set up loop count | |
211 | mtxer rc // set up for trailing bytes | |
212 | 1: | |
213 | lwz w1,-4(rs) | |
214 | lwz w2,-8(rs) | |
215 | lwz w3,-12(rs) | |
216 | lwz w4,-16(rs) | |
217 | stw w1,-4(rd) | |
218 | lwz w5,-20(rs) | |
219 | stw w2,-8(rd) | |
220 | lwz w6,-24(rs) | |
221 | stw w3,-12(rd) | |
222 | lwz w7,-28(rs) | |
223 | stw w4,-16(rd) | |
224 | lwzu w8,-32(rs) | |
225 | stw w5,-20(rd) | |
226 | stw w6,-24(rd) | |
227 | stw w7,-28(rd) | |
228 | stwu w8,-32(rd) | |
229 | bdnz 1b | |
230 | ||
231 | sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31) | |
232 | sub r0,rd,rc // move dest ptr out of way | |
233 | lswx r5,0,r4 // load xer bytes into r5-r12 | |
234 | stswx r5,0,r0 // store them | |
235 | blr | |
236 | ||
237 | ||
238 | // Long, reverse aligned moves. We use FPRs. | |
239 | ||
240 | LReverseFloat: | |
241 | andi. w4,rd,7 // W3 <- #bytes to doubleword-align destination | |
242 | sub rc,rc,w4 // adjust count for alignment | |
243 | srwi r0,rc,5 // number of 32-byte chunks to xfer | |
244 | rlwinm rc,rc,0,0x1F // mask down to leftover bytes | |
245 | mtctr r0 // set up loop count | |
246 | beq 1f // dest already doubleword aligned | |
247 | ||
248 | // Doubleword align the destination. | |
249 | ||
250 | mtxer w4 // byte count to xer | |
251 | cmpwi r0,0 // any chunks to xfer? | |
252 | sub rs,rs,w4 // point to 1st bytes to xfer | |
253 | sub rd,rd,w4 | |
254 | lswx w1,0,rs // move w3 bytes to align dest | |
255 | stswx w1,0,rd | |
256 | beq- 2f // pathologic case, no chunks to xfer | |
257 | 1: | |
258 | lfd f0,-8(rs) | |
259 | lfd f1,-16(rs) | |
260 | lfd f2,-24(rs) | |
261 | lfdu f3,-32(rs) | |
262 | stfd f0,-8(rd) | |
263 | stfd f1,-16(rd) | |
264 | stfd f2,-24(rd) | |
265 | stfdu f3,-32(rd) | |
266 | bdnz 1b | |
267 | 2: // rc = remaining bytes (0-31) | |
268 | mtxer rc // set up count for string ops | |
269 | sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31) | |
270 | sub r0,rd,rc // move dest ptr out of way | |
271 | lswx r5,0,r4 // load xer bytes into r5-r12 | |
272 | stswx r5,0,r0 // store them | |
273 | blr | |
274 | ||
275 | COMMPAGE_DESCRIPTOR(bcopy_g3,_COMM_PAGE_BCOPY,0,k64Bit+kHasAltivec,kCommPage32) |