]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
de355530 | 2 | * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. |
1c79356b A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
43866e37 | 6 | * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. |
1c79356b | 7 | * |
43866e37 A |
8 | * This file contains Original Code and/or Modifications of Original Code |
9 | * as defined in and that are subject to the Apple Public Source License | |
10 | * Version 2.0 (the 'License'). You may not use this file except in | |
11 | * compliance with the License. Please obtain a copy of the License at | |
12 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
13 | * file. | |
14 | * | |
15 | * The Original Code and all software distributed under the License are | |
16 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
1c79356b A |
17 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
18 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
43866e37 A |
19 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
20 | * Please see the License for the specific language governing rights and | |
21 | * limitations under the License. | |
1c79356b A |
22 | * |
23 | * @APPLE_LICENSE_HEADER_END@ | |
24 | */ | |
25 | ; | |
26 | ; Copy bytes of data around. handles overlapped data. | |
27 | ; | |
28 | ; Change this to use Altivec later on, and maybe floating point. | |
29 | ; | |
1c79356b A |
30 | ; |
31 | #include <ppc/asm.h> | |
32 | #include <ppc/proc_reg.h> | |
33 | ||
34 | ; Use CR5_lt to indicate non-cached | |
35 | #define noncache 20 | |
36 | ; Use CR5_gt to indicate that we need to turn data translation back on | |
37 | #define fixxlate 21 | |
de355530 A |
38 | ; Use CR5_eq to indicate that we need to invalidate bats |
39 | #define killbats 22 | |
1c79356b A |
40 | |
41 | ; | |
42 | ; bcopy_nc(from, to, nbytes) | |
43 | ; | |
44 | ; bcopy_nc operates on non-cached memory so we can not use any kind | |
45 | ; of cache instructions. | |
46 | ; | |
47 | ||
9bccf70c A |
48 | .align 5 |
49 | .globl EXT(bcopy_nc) | |
1c79356b | 50 | |
9bccf70c | 51 | LEXT(bcopy_nc) |
1c79356b A |
52 | |
53 | crset noncache ; Set non-cached | |
54 | b bcpswap | |
55 | ||
9bccf70c A |
56 | ; |
57 | ; void bcopy_physvir(from, to, nbytes) | |
58 | ; Attempt to copy physically addressed memory with translation on if conditions are met. | |
de355530 | 59 | ; Otherwise do a normal bcopy_phys. |
9bccf70c A |
60 | ; |
61 | ; Rules are: neither source nor destination can cross a page. | |
de355530 | 62 | ; No accesses above the 2GB line (I/O or ROM). |
9bccf70c | 63 | ; |
de355530 A |
64 | ; Interrupts must be disabled throughout the copy when this is called |
65 | ||
9bccf70c A |
66 | ; To do this, we build a |
67 | ; 128 DBAT for both the source and sink. If both are the same, only one is | |
68 | ; loaded. We do not touch the IBATs, so there is no issue if either physical page | |
69 | ; address is the same as the virtual address of the instructions we are executing. | |
70 | ; | |
de355530 | 71 | ; At the end, we invalidate the used DBATs and reenable interrupts. |
9bccf70c A |
72 | ; |
73 | ; Note, this one will not work in user state | |
74 | ; | |
75 | ||
76 | .align 5 | |
77 | .globl EXT(bcopy_physvir) | |
78 | ||
79 | LEXT(bcopy_physvir) | |
80 | ||
de355530 | 81 | addic. r0,r5,-1 ; Get length - 1 |
9bccf70c | 82 | add r11,r3,r0 ; Point to last byte of sink |
de355530 | 83 | cmplw cr1,r3,r4 ; Does source == sink? |
9bccf70c A |
84 | add r12,r4,r0 ; Point to last byte of source |
85 | bltlr- ; Bail if length is 0 or way too big | |
86 | xor r7,r11,r3 ; See if we went to next page | |
87 | xor r8,r12,r4 ; See if we went to next page | |
88 | or r0,r7,r8 ; Combine wrap | |
89 | ||
de355530 | 90 | li r9,((PTE_WIMG_CB_CACHED_COHERENT<<3)|2) ; Set default attributes |
9bccf70c A |
91 | rlwinm. r0,r0,0,0,19 ; Did we overflow a page? |
92 | li r7,2 ; Set validity flags | |
93 | li r8,2 ; Set validity flags | |
de355530 | 94 | bne- EXT(bcopy_phys) ; Overflowed page, do normal physical copy... |
9bccf70c | 95 | |
de355530 | 96 | crset killbats ; Remember to trash BATs on the way out |
9bccf70c A |
97 | rlwimi r11,r9,0,15,31 ; Set sink lower DBAT value |
98 | rlwimi r12,r9,0,15,31 ; Set source lower DBAT value | |
99 | rlwimi r7,r11,0,0,14 ; Set sink upper DBAT value | |
100 | rlwimi r8,r12,0,0,14 ; Set source upper DBAT value | |
101 | cmplw cr1,r11,r12 ; See if sink and source are same block | |
102 | ||
103 | sync | |
104 | ||
105 | mtdbatl 0,r11 ; Set sink lower DBAT | |
106 | mtdbatu 0,r7 ; Set sink upper DBAT | |
107 | ||
108 | beq- cr1,bcpvsame ; Source and sink are in same block | |
109 | ||
110 | mtdbatl 1,r12 ; Set source lower DBAT | |
111 | mtdbatu 1,r8 ; Set source upper DBAT | |
112 | ||
113 | bcpvsame: mr r6,r3 ; Set source | |
114 | crclr noncache ; Set cached | |
115 | ||
de355530 A |
116 | b copyit ; Go copy it... |
117 | ||
9bccf70c | 118 | |
1c79356b A |
119 | ; |
120 | ; void bcopy_phys(from, to, nbytes) | |
121 | ; Turns off data translation before the copy. Note, this one will | |
de355530 | 122 | ; not work in user state |
1c79356b A |
123 | ; |
124 | ||
9bccf70c A |
125 | .align 5 |
126 | .globl EXT(bcopy_phys) | |
127 | ||
128 | LEXT(bcopy_phys) | |
de355530 | 129 | |
1c79356b | 130 | mfmsr r9 ; Get the MSR |
de355530 | 131 | |
1c79356b | 132 | crclr noncache ; Set cached |
de355530 A |
133 | rlwinm. r8,r9,0,MSR_DR_BIT,MSR_DR_BIT ; Is data translation on? |
134 | ||
135 | cmplw cr1,r4,r3 ; Compare "to" and "from" | |
1c79356b A |
136 | cmplwi cr7,r5,0 ; Check if we have a 0 length |
137 | mr r6,r3 ; Set source | |
de355530 A |
138 | beqlr- cr1 ; Bail if "to" and "from" are the same |
139 | xor r9,r9,r8 ; Turn off translation if it is on (should be) | |
1c79356b A |
140 | beqlr- cr7 ; Bail if length is 0 |
141 | ||
de355530 A |
142 | rlwinm r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off |
143 | crclr killbats ; Make sure we do not trash BATs on the way out | |
144 | rlwinm r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off | |
1c79356b A |
145 | mtmsr r9 ; Set DR translation off |
146 | isync ; Wait for it | |
147 | ||
de355530 A |
148 | crnot fixxlate,cr0_eq ; Remember to turn on translation if it was |
149 | b copyit ; Go copy it... | |
1c79356b A |
150 | |
151 | ; | |
152 | ; void bcopy(from, to, nbytes) | |
153 | ; | |
154 | ||
9bccf70c A |
155 | .align 5 |
156 | .globl EXT(bcopy) | |
157 | ||
158 | LEXT(bcopy) | |
1c79356b A |
159 | |
160 | crclr noncache ; Set cached | |
161 | ||
de355530 A |
162 | bcpswap: cmplw cr1,r4,r3 ; Compare "to" and "from" |
163 | mr. r5,r5 ; Check if we have a 0 length | |
1c79356b | 164 | mr r6,r3 ; Set source |
de355530 A |
165 | crclr killbats ; Make sure we do not trash BATs on the way out |
166 | beqlr- cr1 ; Bail if "to" and "from" are the same | |
167 | beqlr- ; Bail if length is 0 | |
1c79356b | 168 | crclr fixxlate ; Set translation already ok |
de355530 | 169 | b copyit ; Go copy it... |
1c79356b A |
170 | |
171 | ; | |
172 | ; When we move the memory, forward overlays must be handled. We | |
173 | ; also can not use the cache instructions if we are from bcopy_nc. | |
174 | ; We need to preserve R3 because it needs to be returned for memcpy. | |
175 | ; We can be interrupted and lose control here. | |
176 | ; | |
de355530 A |
177 | ; There is no stack, so in order to used floating point, we would |
178 | ; need to take the FP exception. Any potential gains by using FP | |
1c79356b A |
179 | ; would be more than eaten up by this. |
180 | ; | |
de355530 | 181 | ; Later, we should used Altivec for large moves. |
1c79356b A |
182 | ; |
183 | ||
9bccf70c A |
184 | .align 5 |
185 | .globl EXT(memcpy) | |
de355530 | 186 | |
9bccf70c | 187 | LEXT(memcpy) |
de355530 | 188 | |
1c79356b A |
189 | cmplw cr1,r3,r4 ; "to" and "from" the same? |
190 | mr r6,r4 ; Set the "from" | |
191 | mr. r5,r5 ; Length zero? | |
192 | crclr noncache ; Set cached | |
193 | mr r4,r3 ; Set the "to" | |
194 | crclr fixxlate ; Set translation already ok | |
195 | beqlr- cr1 ; "to" and "from" are the same | |
196 | beqlr- ; Length is 0 | |
de355530 | 197 | crclr killbats ; Make sure we do not trash BATs on the way out |
1c79356b | 198 | |
de355530 | 199 | copyit: sub r12,r4,r6 ; Get potential overlap (negative if backward move) |
1c79356b A |
200 | lis r8,0x7FFF ; Start up a mask |
201 | srawi r11,r12,31 ; Propagate the sign bit | |
202 | dcbt br0,r6 ; Touch in the first source line | |
203 | cntlzw r7,r5 ; Get the highest power of 2 factor of the length | |
204 | ori r8,r8,0xFFFF ; Make limit 0x7FFFFFFF | |
205 | xor r9,r12,r11 ; If sink - source was negative, invert bits | |
206 | srw r8,r8,r7 ; Get move length limitation | |
207 | sub r9,r9,r11 ; If sink - source was negative, add 1 and get absolute value | |
208 | cmplw r12,r5 ; See if we actually forward overlap | |
209 | cmplwi cr7,r9,32 ; See if at least a line between source and sink | |
210 | dcbtst br0,r4 ; Touch in the first sink line | |
211 | cmplwi cr1,r5,32 ; Are we moving more than a line? | |
de355530 | 212 | cror noncache,noncache,28 ; Set to not DCBZ output line if not enough space |
1c79356b A |
213 | blt- fwdovrlap ; This is a forward overlapping area, handle it... |
214 | ||
215 | ; | |
216 | ; R4 = sink | |
217 | ; R5 = length | |
218 | ; R6 = source | |
219 | ; | |
220 | ||
221 | ; | |
222 | ; Here we figure out how much we have to move to get the sink onto a | |
223 | ; cache boundary. If we can, and there are still more that 32 bytes | |
224 | ; left to move, we can really speed things up by DCBZing the sink line. | |
225 | ; We can not do this if noncache is set because we will take an | |
226 | ; alignment exception. | |
227 | ||
228 | neg r0,r4 ; Get the number of bytes to move to align to a line boundary | |
229 | rlwinm. r0,r0,0,27,31 ; Clean it up and test it | |
230 | and r0,r0,r8 ; limit to the maximum front end move | |
231 | mtcrf 3,r0 ; Make branch mask for partial moves | |
232 | sub r5,r5,r0 ; Set the length left to move | |
233 | beq alline ; Already on a line... | |
234 | ||
235 | bf 31,alhalf ; No single byte to do... | |
236 | lbz r7,0(r6) ; Get the byte | |
237 | addi r6,r6,1 ; Point to the next | |
238 | stb r7,0(r4) ; Save the single | |
239 | addi r4,r4,1 ; Bump sink | |
240 | ||
241 | ; Sink is halfword aligned here | |
242 | ||
243 | alhalf: bf 30,alword ; No halfword to do... | |
244 | lhz r7,0(r6) ; Get the halfword | |
245 | addi r6,r6,2 ; Point to the next | |
246 | sth r7,0(r4) ; Save the halfword | |
247 | addi r4,r4,2 ; Bump sink | |
248 | ||
249 | ; Sink is word aligned here | |
250 | ||
251 | alword: bf 29,aldouble ; No word to do... | |
252 | lwz r7,0(r6) ; Get the word | |
253 | addi r6,r6,4 ; Point to the next | |
254 | stw r7,0(r4) ; Save the word | |
255 | addi r4,r4,4 ; Bump sink | |
256 | ||
257 | ; Sink is double aligned here | |
258 | ||
259 | aldouble: bf 28,alquad ; No double to do... | |
260 | lwz r7,0(r6) ; Get the first word | |
261 | lwz r8,4(r6) ; Get the second word | |
262 | addi r6,r6,8 ; Point to the next | |
263 | stw r7,0(r4) ; Save the first word | |
264 | stw r8,4(r4) ; Save the second word | |
265 | addi r4,r4,8 ; Bump sink | |
266 | ||
267 | ; Sink is quadword aligned here | |
268 | ||
269 | alquad: bf 27,alline ; No quad to do... | |
270 | lwz r7,0(r6) ; Get the first word | |
271 | lwz r8,4(r6) ; Get the second word | |
272 | lwz r9,8(r6) ; Get the third word | |
273 | stw r7,0(r4) ; Save the first word | |
274 | lwz r11,12(r6) ; Get the fourth word | |
275 | addi r6,r6,16 ; Point to the next | |
276 | stw r8,4(r4) ; Save the second word | |
277 | stw r9,8(r4) ; Save the third word | |
278 | stw r11,12(r4) ; Save the fourth word | |
279 | addi r4,r4,16 ; Bump sink | |
280 | ||
281 | ; Sink is line aligned here | |
282 | ||
283 | alline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move | |
284 | mtcrf 3,r5 ; Make branch mask for backend partial moves | |
285 | rlwinm r11,r5,0,0,26 ; Get number of bytes we are going to move | |
286 | beq- backend ; No full lines to move | |
287 | ||
288 | sub r5,r5,r11 ; Calculate the residual | |
289 | li r10,96 ; Stride for touch ahead | |
290 | ||
291 | nxtline: subic. r0,r0,1 ; Account for the line now | |
292 | ||
293 | bt- noncache,skipz ; Skip if we are not cached... | |
294 | dcbz br0,r4 ; Blow away the whole line because we are replacing it | |
295 | dcbt r6,r10 ; Touch ahead a bit | |
296 | ||
297 | skipz: lwz r7,0(r6) ; Get the first word | |
298 | lwz r8,4(r6) ; Get the second word | |
299 | lwz r9,8(r6) ; Get the third word | |
300 | stw r7,0(r4) ; Save the first word | |
301 | lwz r11,12(r6) ; Get the fourth word | |
302 | stw r8,4(r4) ; Save the second word | |
303 | lwz r7,16(r6) ; Get the fifth word | |
304 | stw r9,8(r4) ; Save the third word | |
305 | lwz r8,20(r6) ; Get the sixth word | |
306 | stw r11,12(r4) ; Save the fourth word | |
307 | lwz r9,24(r6) ; Get the seventh word | |
308 | stw r7,16(r4) ; Save the fifth word | |
309 | lwz r11,28(r6) ; Get the eighth word | |
310 | addi r6,r6,32 ; Point to the next | |
311 | stw r8,20(r4) ; Save the sixth word | |
312 | stw r9,24(r4) ; Save the seventh word | |
313 | stw r11,28(r4) ; Save the eighth word | |
314 | addi r4,r4,32 ; Bump sink | |
315 | bgt+ nxtline ; Do the next line, if any... | |
316 | ||
317 | ||
318 | ; Move backend quadword | |
319 | ||
320 | backend: bf 27,noquad ; No quad to do... | |
321 | lwz r7,0(r6) ; Get the first word | |
322 | lwz r8,4(r6) ; Get the second word | |
323 | lwz r9,8(r6) ; Get the third word | |
324 | lwz r11,12(r6) ; Get the fourth word | |
325 | stw r7,0(r4) ; Save the first word | |
326 | addi r6,r6,16 ; Point to the next | |
327 | stw r8,4(r4) ; Save the second word | |
328 | stw r9,8(r4) ; Save the third word | |
329 | stw r11,12(r4) ; Save the fourth word | |
330 | addi r4,r4,16 ; Bump sink | |
331 | ||
332 | ; Move backend double | |
333 | ||
334 | noquad: bf 28,nodouble ; No double to do... | |
335 | lwz r7,0(r6) ; Get the first word | |
336 | lwz r8,4(r6) ; Get the second word | |
337 | addi r6,r6,8 ; Point to the next | |
338 | stw r7,0(r4) ; Save the first word | |
339 | stw r8,4(r4) ; Save the second word | |
340 | addi r4,r4,8 ; Bump sink | |
341 | ||
342 | ; Move backend word | |
343 | ||
344 | nodouble: bf 29,noword ; No word to do... | |
345 | lwz r7,0(r6) ; Get the word | |
346 | addi r6,r6,4 ; Point to the next | |
347 | stw r7,0(r4) ; Save the word | |
348 | addi r4,r4,4 ; Bump sink | |
349 | ||
350 | ; Move backend halfword | |
351 | ||
352 | noword: bf 30,nohalf ; No halfword to do... | |
353 | lhz r7,0(r6) ; Get the halfword | |
354 | addi r6,r6,2 ; Point to the next | |
355 | sth r7,0(r4) ; Save the halfword | |
356 | addi r4,r4,2 ; Bump sink | |
357 | ||
358 | ; Move backend byte | |
359 | ||
360 | nohalf: bf 31,bcpydone ; Leave cuz we are all done... | |
361 | lbz r7,0(r6) ; Get the byte | |
362 | stb r7,0(r4) ; Save the single | |
363 | ||
de355530 A |
364 | bcpydone: bt- killbats,bcclrbat ; Jump if we need to clear bats... |
365 | bflr fixxlate ; Leave now if we do not need to fix translation... | |
1c79356b A |
366 | mfmsr r9 ; Get the MSR |
367 | ori r9,r9,lo16(MASK(MSR_DR)) ; Turn data translation on | |
de355530 A |
368 | rlwinm r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off |
369 | rlwinm r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off | |
1c79356b A |
370 | mtmsr r9 ; Just do it |
371 | isync ; Hang in there | |
de355530 A |
372 | blr ; Leave cuz we are all done... |
373 | ||
374 | bcclrbat: li r0,0 ; Get set to invalidate upper half | |
9bccf70c A |
375 | sync ; Make sure all is well |
376 | mtdbatu 0,r0 ; Clear sink upper DBAT | |
377 | mtdbatu 1,r0 ; Clear source upper DBAT | |
378 | sync | |
379 | isync | |
380 | blr | |
381 | ||
382 | ||
1c79356b A |
383 | ; |
384 | ; 0123456789ABCDEF0123456789ABCDEF | |
385 | ; 0123456789ABCDEF0123456789ABCDEF | |
386 | ; F | |
387 | ; DE | |
388 | ; 9ABC | |
389 | ; 12345678 | |
390 | ; 123456789ABCDEF0 | |
391 | ; 0 | |
392 | ||
393 | ; | |
394 | ; Here is where we handle a forward overlapping move. These will be slow | |
395 | ; because we can not kill the cache of the destination until after we have | |
396 | ; loaded/saved the source area. Also, because reading memory backwards is | |
397 | ; slower when the cache line needs to be loaded because the critical | |
398 | ; doubleword is loaded first, i.e., the last, then it goes back to the first, | |
399 | ; and on in order. That means that when we are at the second to last DW we | |
400 | ; have to wait until the whole line is in cache before we can proceed. | |
401 | ; | |
de355530 | 402 | |
1c79356b A |
403 | fwdovrlap: add r4,r5,r4 ; Point past the last sink byte |
404 | add r6,r5,r6 ; Point past the last source byte | |
405 | and r0,r4,r8 ; Apply movement limit | |
406 | li r12,-1 ; Make sure we touch in the actual line | |
407 | mtcrf 3,r0 ; Figure out the best way to move backwards | |
408 | dcbt r12,r6 ; Touch in the last line of source | |
409 | rlwinm. r0,r0,0,27,31 ; Calculate the length to adjust to cache boundary | |
410 | dcbtst r12,r4 ; Touch in the last line of the sink | |
411 | beq- balline ; Aready on cache line boundary | |
412 | ||
413 | sub r5,r5,r0 ; Precaculate move length left after alignment | |
414 | ||
415 | bf 31,balhalf ; No single byte to do... | |
416 | lbz r7,-1(r6) ; Get the byte | |
417 | subi r6,r6,1 ; Point to the next | |
418 | stb r7,-1(r4) ; Save the single | |
419 | subi r4,r4,1 ; Bump sink | |
420 | ||
421 | ; Sink is halfword aligned here | |
422 | ||
423 | balhalf: bf 30,balword ; No halfword to do... | |
424 | lhz r7,-2(r6) ; Get the halfword | |
425 | subi r6,r6,2 ; Point to the next | |
426 | sth r7,-2(r4) ; Save the halfword | |
427 | subi r4,r4,2 ; Bump sink | |
428 | ||
429 | ; Sink is word aligned here | |
430 | ||
431 | balword: bf 29,baldouble ; No word to do... | |
432 | lwz r7,-4(r6) ; Get the word | |
433 | subi r6,r6,4 ; Point to the next | |
434 | stw r7,-4(r4) ; Save the word | |
435 | subi r4,r4,4 ; Bump sink | |
436 | ||
437 | ; Sink is double aligned here | |
438 | ||
439 | baldouble: bf 28,balquad ; No double to do... | |
440 | lwz r7,-8(r6) ; Get the first word | |
441 | lwz r8,-4(r6) ; Get the second word | |
442 | subi r6,r6,8 ; Point to the next | |
443 | stw r7,-8(r4) ; Save the first word | |
444 | stw r8,-4(r4) ; Save the second word | |
445 | subi r4,r4,8 ; Bump sink | |
446 | ||
447 | ; Sink is quadword aligned here | |
448 | ||
449 | balquad: bf 27,balline ; No quad to do... | |
450 | lwz r7,-16(r6) ; Get the first word | |
451 | lwz r8,-12(r6) ; Get the second word | |
452 | lwz r9,-8(r6) ; Get the third word | |
453 | lwz r11,-4(r6) ; Get the fourth word | |
454 | stw r7,-16(r4) ; Save the first word | |
455 | subi r6,r6,16 ; Point to the next | |
456 | stw r8,-12(r4) ; Save the second word | |
457 | stw r9,-8(r4) ; Save the third word | |
458 | stw r11,-4(r4) ; Save the fourth word | |
459 | subi r4,r4,16 ; Bump sink | |
460 | ||
461 | ; Sink is line aligned here | |
462 | ||
463 | balline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move | |
464 | mtcrf 3,r5 ; Make branch mask for backend partial moves | |
465 | beq- bbackend ; No full lines to move | |
1c79356b A |
466 | |
467 | ||
468 | ; Registers in use: R0, R1, R3, R4, R5, R6 | |
469 | ; Registers not in use: R2, R7, R8, R9, R10, R11, R12 - Ok, we can make another free for 8 of them | |
470 | ||
471 | bnxtline: subic. r0,r0,1 ; Account for the line now | |
472 | ||
473 | lwz r7,-32(r6) ; Get the first word | |
474 | lwz r5,-28(r6) ; Get the second word | |
475 | lwz r2,-24(r6) ; Get the third word | |
476 | lwz r12,-20(r6) ; Get the third word | |
477 | lwz r11,-16(r6) ; Get the fifth word | |
478 | lwz r10,-12(r6) ; Get the sixth word | |
479 | lwz r9,-8(r6) ; Get the seventh word | |
480 | lwz r8,-4(r6) ; Get the eighth word | |
481 | subi r6,r6,32 ; Point to the next | |
482 | ||
483 | stw r7,-32(r4) ; Get the first word | |
484 | ble- bnotouch ; Last time, skip touch of source... | |
485 | dcbt br0,r6 ; Touch in next source line | |
486 | ||
487 | bnotouch: stw r5,-28(r4) ; Get the second word | |
488 | stw r2,-24(r4) ; Get the third word | |
489 | stw r12,-20(r4) ; Get the third word | |
490 | stw r11,-16(r4) ; Get the fifth word | |
491 | stw r10,-12(r4) ; Get the sixth word | |
492 | stw r9,-8(r4) ; Get the seventh word | |
493 | stw r8,-4(r4) ; Get the eighth word | |
494 | subi r4,r4,32 ; Bump sink | |
495 | ||
496 | bgt+ bnxtline ; Do the next line, if any... | |
1c79356b A |
497 | |
498 | ; | |
499 | ; Note: We touched these lines in at the beginning | |
500 | ; | |
501 | ||
502 | ; Move backend quadword | |
503 | ||
504 | bbackend: bf 27,bnoquad ; No quad to do... | |
505 | lwz r7,-16(r6) ; Get the first word | |
506 | lwz r8,-12(r6) ; Get the second word | |
507 | lwz r9,-8(r6) ; Get the third word | |
508 | lwz r11,-4(r6) ; Get the fourth word | |
509 | stw r7,-16(r4) ; Save the first word | |
510 | subi r6,r6,16 ; Point to the next | |
511 | stw r8,-12(r4) ; Save the second word | |
512 | stw r9,-8(r4) ; Save the third word | |
513 | stw r11,-4(r4) ; Save the fourth word | |
514 | subi r4,r4,16 ; Bump sink | |
515 | ||
516 | ; Move backend double | |
517 | ||
518 | bnoquad: bf 28,bnodouble ; No double to do... | |
519 | lwz r7,-8(r6) ; Get the first word | |
520 | lwz r8,-4(r6) ; Get the second word | |
521 | subi r6,r6,8 ; Point to the next | |
522 | stw r7,-8(r4) ; Save the first word | |
523 | stw r8,-4(r4) ; Save the second word | |
524 | subi r4,r4,8 ; Bump sink | |
525 | ||
526 | ; Move backend word | |
527 | ||
528 | bnodouble: bf 29,bnoword ; No word to do... | |
529 | lwz r7,-4(r6) ; Get the word | |
530 | subi r6,r6,4 ; Point to the next | |
531 | stw r7,-4(r4) ; Save the word | |
532 | subi r4,r4,4 ; Bump sink | |
533 | ||
534 | ; Move backend halfword | |
535 | ||
536 | bnoword: bf 30,bnohalf ; No halfword to do... | |
537 | lhz r7,-2(r6) ; Get the halfword | |
538 | subi r6,r6,2 ; Point to the next | |
539 | sth r7,-2(r4) ; Save the halfword | |
540 | subi r4,r4,2 ; Bump sink | |
541 | ||
542 | ; Move backend byte | |
543 | ||
544 | bnohalf: bflr 31 ; Leave cuz we are all done... | |
545 | lbz r7,-1(r6) ; Get the byte | |
546 | stb r7,-1(r4) ; Save the single | |
547 | ||
9bccf70c | 548 | b bcpydone ; Go exit cuz we are all done... |