]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
de355530 | 2 | * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. |
1c79356b A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
de355530 A |
6 | * The contents of this file constitute Original Code as defined in and |
7 | * are subject to the Apple Public Source License Version 1.1 (the | |
8 | * "License"). You may not use this file except in compliance with the | |
9 | * License. Please obtain a copy of the License at | |
10 | * http://www.apple.com/publicsource and read it before using this file. | |
1c79356b | 11 | * |
de355530 A |
12 | * This Original Code and all software distributed under the License are |
13 | * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
1c79356b A |
14 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
15 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
de355530 A |
16 | * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the |
17 | * License for the specific language governing rights and limitations | |
18 | * under the License. | |
1c79356b A |
19 | * |
20 | * @APPLE_LICENSE_HEADER_END@ | |
21 | */ | |
22 | ; | |
23 | ; Copy bytes of data around. handles overlapped data. | |
24 | ; | |
25 | ; Change this to use Altivec later on, and maybe floating point. | |
26 | ; | |
1c79356b A |
27 | ; |
28 | #include <ppc/asm.h> | |
29 | #include <ppc/proc_reg.h> | |
30 | ||
31 | ; Use CR5_lt to indicate non-cached | |
32 | #define noncache 20 | |
33 | ; Use CR5_gt to indicate that we need to turn data translation back on | |
34 | #define fixxlate 21 | |
de355530 A |
35 | ; Use CR5_eq to indicate that we need to invalidate bats |
36 | #define killbats 22 | |
1c79356b A |
37 | |
38 | ; | |
39 | ; bcopy_nc(from, to, nbytes) | |
40 | ; | |
41 | ; bcopy_nc operates on non-cached memory so we can not use any kind | |
42 | ; of cache instructions. | |
43 | ; | |
44 | ||
9bccf70c A |
45 | .align 5 |
46 | .globl EXT(bcopy_nc) | |
1c79356b | 47 | |
9bccf70c | 48 | LEXT(bcopy_nc) |
1c79356b A |
49 | |
50 | crset noncache ; Set non-cached | |
51 | b bcpswap | |
52 | ||
9bccf70c A |
53 | ; |
54 | ; void bcopy_physvir(from, to, nbytes) | |
55 | ; Attempt to copy physically addressed memory with translation on if conditions are met. | |
de355530 | 56 | ; Otherwise do a normal bcopy_phys. |
9bccf70c A |
57 | ; |
58 | ; Rules are: neither source nor destination can cross a page. | |
de355530 | 59 | ; No accesses above the 2GB line (I/O or ROM). |
9bccf70c | 60 | ; |
de355530 A |
61 | ; Interrupts must be disabled throughout the copy when this is called |
62 | ||
9bccf70c A |
63 | ; To do this, we build a |
64 | ; 128 DBAT for both the source and sink. If both are the same, only one is | |
65 | ; loaded. We do not touch the IBATs, so there is no issue if either physical page | |
66 | ; address is the same as the virtual address of the instructions we are executing. | |
67 | ; | |
de355530 | 68 | ; At the end, we invalidate the used DBATs and reenable interrupts. |
9bccf70c A |
69 | ; |
70 | ; Note, this one will not work in user state | |
71 | ; | |
72 | ||
73 | .align 5 | |
74 | .globl EXT(bcopy_physvir) | |
75 | ||
76 | LEXT(bcopy_physvir) | |
77 | ||
de355530 | 78 | addic. r0,r5,-1 ; Get length - 1 |
9bccf70c | 79 | add r11,r3,r0 ; Point to last byte of sink |
de355530 | 80 | cmplw cr1,r3,r4 ; Does source == sink? |
9bccf70c A |
81 | add r12,r4,r0 ; Point to last byte of source |
82 | bltlr- ; Bail if length is 0 or way too big | |
83 | xor r7,r11,r3 ; See if we went to next page | |
84 | xor r8,r12,r4 ; See if we went to next page | |
85 | or r0,r7,r8 ; Combine wrap | |
86 | ||
de355530 | 87 | li r9,((PTE_WIMG_CB_CACHED_COHERENT<<3)|2) ; Set default attributes |
9bccf70c A |
88 | rlwinm. r0,r0,0,0,19 ; Did we overflow a page? |
89 | li r7,2 ; Set validity flags | |
90 | li r8,2 ; Set validity flags | |
de355530 | 91 | bne- EXT(bcopy_phys) ; Overflowed page, do normal physical copy... |
9bccf70c | 92 | |
de355530 | 93 | crset killbats ; Remember to trash BATs on the way out |
9bccf70c A |
94 | rlwimi r11,r9,0,15,31 ; Set sink lower DBAT value |
95 | rlwimi r12,r9,0,15,31 ; Set source lower DBAT value | |
96 | rlwimi r7,r11,0,0,14 ; Set sink upper DBAT value | |
97 | rlwimi r8,r12,0,0,14 ; Set source upper DBAT value | |
98 | cmplw cr1,r11,r12 ; See if sink and source are same block | |
99 | ||
100 | sync | |
101 | ||
102 | mtdbatl 0,r11 ; Set sink lower DBAT | |
103 | mtdbatu 0,r7 ; Set sink upper DBAT | |
104 | ||
105 | beq- cr1,bcpvsame ; Source and sink are in same block | |
106 | ||
107 | mtdbatl 1,r12 ; Set source lower DBAT | |
108 | mtdbatu 1,r8 ; Set source upper DBAT | |
109 | ||
110 | bcpvsame: mr r6,r3 ; Set source | |
111 | crclr noncache ; Set cached | |
112 | ||
de355530 A |
113 | b copyit ; Go copy it... |
114 | ||
9bccf70c | 115 | |
1c79356b A |
116 | ; |
117 | ; void bcopy_phys(from, to, nbytes) | |
118 | ; Turns off data translation before the copy. Note, this one will | |
de355530 | 119 | ; not work in user state |
1c79356b A |
120 | ; |
121 | ||
9bccf70c A |
122 | .align 5 |
123 | .globl EXT(bcopy_phys) | |
124 | ||
125 | LEXT(bcopy_phys) | |
de355530 | 126 | |
1c79356b | 127 | mfmsr r9 ; Get the MSR |
de355530 | 128 | |
1c79356b | 129 | crclr noncache ; Set cached |
de355530 A |
130 | rlwinm. r8,r9,0,MSR_DR_BIT,MSR_DR_BIT ; Is data translation on? |
131 | ||
132 | cmplw cr1,r4,r3 ; Compare "to" and "from" | |
1c79356b A |
133 | cmplwi cr7,r5,0 ; Check if we have a 0 length |
134 | mr r6,r3 ; Set source | |
de355530 A |
135 | beqlr- cr1 ; Bail if "to" and "from" are the same |
136 | xor r9,r9,r8 ; Turn off translation if it is on (should be) | |
1c79356b A |
137 | beqlr- cr7 ; Bail if length is 0 |
138 | ||
de355530 A |
139 | rlwinm r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off |
140 | crclr killbats ; Make sure we do not trash BATs on the way out | |
141 | rlwinm r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off | |
1c79356b A |
142 | mtmsr r9 ; Set DR translation off |
143 | isync ; Wait for it | |
144 | ||
de355530 A |
145 | crnot fixxlate,cr0_eq ; Remember to turn on translation if it was |
146 | b copyit ; Go copy it... | |
1c79356b A |
147 | |
148 | ; | |
149 | ; void bcopy(from, to, nbytes) | |
150 | ; | |
151 | ||
9bccf70c A |
152 | .align 5 |
153 | .globl EXT(bcopy) | |
154 | ||
155 | LEXT(bcopy) | |
1c79356b A |
156 | |
157 | crclr noncache ; Set cached | |
158 | ||
de355530 A |
159 | bcpswap: cmplw cr1,r4,r3 ; Compare "to" and "from" |
160 | mr. r5,r5 ; Check if we have a 0 length | |
1c79356b | 161 | mr r6,r3 ; Set source |
de355530 A |
162 | crclr killbats ; Make sure we do not trash BATs on the way out |
163 | beqlr- cr1 ; Bail if "to" and "from" are the same | |
164 | beqlr- ; Bail if length is 0 | |
1c79356b | 165 | crclr fixxlate ; Set translation already ok |
de355530 | 166 | b copyit ; Go copy it... |
1c79356b A |
167 | |
168 | ; | |
169 | ; When we move the memory, forward overlays must be handled. We | |
170 | ; also can not use the cache instructions if we are from bcopy_nc. | |
171 | ; We need to preserve R3 because it needs to be returned for memcpy. | |
172 | ; We can be interrupted and lose control here. | |
173 | ; | |
de355530 A |
174 | ; There is no stack, so in order to used floating point, we would |
175 | ; need to take the FP exception. Any potential gains by using FP | |
1c79356b A |
176 | ; would be more than eaten up by this. |
177 | ; | |
de355530 | 178 | ; Later, we should used Altivec for large moves. |
1c79356b A |
179 | ; |
180 | ||
9bccf70c A |
181 | .align 5 |
182 | .globl EXT(memcpy) | |
de355530 | 183 | |
9bccf70c | 184 | LEXT(memcpy) |
de355530 | 185 | |
1c79356b A |
186 | cmplw cr1,r3,r4 ; "to" and "from" the same? |
187 | mr r6,r4 ; Set the "from" | |
188 | mr. r5,r5 ; Length zero? | |
189 | crclr noncache ; Set cached | |
190 | mr r4,r3 ; Set the "to" | |
191 | crclr fixxlate ; Set translation already ok | |
192 | beqlr- cr1 ; "to" and "from" are the same | |
193 | beqlr- ; Length is 0 | |
de355530 | 194 | crclr killbats ; Make sure we do not trash BATs on the way out |
1c79356b | 195 | |
de355530 | 196 | copyit: sub r12,r4,r6 ; Get potential overlap (negative if backward move) |
1c79356b A |
197 | lis r8,0x7FFF ; Start up a mask |
198 | srawi r11,r12,31 ; Propagate the sign bit | |
199 | dcbt br0,r6 ; Touch in the first source line | |
200 | cntlzw r7,r5 ; Get the highest power of 2 factor of the length | |
201 | ori r8,r8,0xFFFF ; Make limit 0x7FFFFFFF | |
202 | xor r9,r12,r11 ; If sink - source was negative, invert bits | |
203 | srw r8,r8,r7 ; Get move length limitation | |
204 | sub r9,r9,r11 ; If sink - source was negative, add 1 and get absolute value | |
205 | cmplw r12,r5 ; See if we actually forward overlap | |
206 | cmplwi cr7,r9,32 ; See if at least a line between source and sink | |
207 | dcbtst br0,r4 ; Touch in the first sink line | |
208 | cmplwi cr1,r5,32 ; Are we moving more than a line? | |
de355530 | 209 | cror noncache,noncache,28 ; Set to not DCBZ output line if not enough space |
1c79356b A |
210 | blt- fwdovrlap ; This is a forward overlapping area, handle it... |
211 | ||
212 | ; | |
213 | ; R4 = sink | |
214 | ; R5 = length | |
215 | ; R6 = source | |
216 | ; | |
217 | ||
218 | ; | |
219 | ; Here we figure out how much we have to move to get the sink onto a | |
220 | ; cache boundary. If we can, and there are still more that 32 bytes | |
221 | ; left to move, we can really speed things up by DCBZing the sink line. | |
222 | ; We can not do this if noncache is set because we will take an | |
223 | ; alignment exception. | |
224 | ||
225 | neg r0,r4 ; Get the number of bytes to move to align to a line boundary | |
226 | rlwinm. r0,r0,0,27,31 ; Clean it up and test it | |
227 | and r0,r0,r8 ; limit to the maximum front end move | |
228 | mtcrf 3,r0 ; Make branch mask for partial moves | |
229 | sub r5,r5,r0 ; Set the length left to move | |
230 | beq alline ; Already on a line... | |
231 | ||
232 | bf 31,alhalf ; No single byte to do... | |
233 | lbz r7,0(r6) ; Get the byte | |
234 | addi r6,r6,1 ; Point to the next | |
235 | stb r7,0(r4) ; Save the single | |
236 | addi r4,r4,1 ; Bump sink | |
237 | ||
238 | ; Sink is halfword aligned here | |
239 | ||
240 | alhalf: bf 30,alword ; No halfword to do... | |
241 | lhz r7,0(r6) ; Get the halfword | |
242 | addi r6,r6,2 ; Point to the next | |
243 | sth r7,0(r4) ; Save the halfword | |
244 | addi r4,r4,2 ; Bump sink | |
245 | ||
246 | ; Sink is word aligned here | |
247 | ||
248 | alword: bf 29,aldouble ; No word to do... | |
249 | lwz r7,0(r6) ; Get the word | |
250 | addi r6,r6,4 ; Point to the next | |
251 | stw r7,0(r4) ; Save the word | |
252 | addi r4,r4,4 ; Bump sink | |
253 | ||
254 | ; Sink is double aligned here | |
255 | ||
256 | aldouble: bf 28,alquad ; No double to do... | |
257 | lwz r7,0(r6) ; Get the first word | |
258 | lwz r8,4(r6) ; Get the second word | |
259 | addi r6,r6,8 ; Point to the next | |
260 | stw r7,0(r4) ; Save the first word | |
261 | stw r8,4(r4) ; Save the second word | |
262 | addi r4,r4,8 ; Bump sink | |
263 | ||
264 | ; Sink is quadword aligned here | |
265 | ||
266 | alquad: bf 27,alline ; No quad to do... | |
267 | lwz r7,0(r6) ; Get the first word | |
268 | lwz r8,4(r6) ; Get the second word | |
269 | lwz r9,8(r6) ; Get the third word | |
270 | stw r7,0(r4) ; Save the first word | |
271 | lwz r11,12(r6) ; Get the fourth word | |
272 | addi r6,r6,16 ; Point to the next | |
273 | stw r8,4(r4) ; Save the second word | |
274 | stw r9,8(r4) ; Save the third word | |
275 | stw r11,12(r4) ; Save the fourth word | |
276 | addi r4,r4,16 ; Bump sink | |
277 | ||
278 | ; Sink is line aligned here | |
279 | ||
280 | alline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move | |
281 | mtcrf 3,r5 ; Make branch mask for backend partial moves | |
282 | rlwinm r11,r5,0,0,26 ; Get number of bytes we are going to move | |
283 | beq- backend ; No full lines to move | |
284 | ||
285 | sub r5,r5,r11 ; Calculate the residual | |
286 | li r10,96 ; Stride for touch ahead | |
287 | ||
288 | nxtline: subic. r0,r0,1 ; Account for the line now | |
289 | ||
290 | bt- noncache,skipz ; Skip if we are not cached... | |
291 | dcbz br0,r4 ; Blow away the whole line because we are replacing it | |
292 | dcbt r6,r10 ; Touch ahead a bit | |
293 | ||
294 | skipz: lwz r7,0(r6) ; Get the first word | |
295 | lwz r8,4(r6) ; Get the second word | |
296 | lwz r9,8(r6) ; Get the third word | |
297 | stw r7,0(r4) ; Save the first word | |
298 | lwz r11,12(r6) ; Get the fourth word | |
299 | stw r8,4(r4) ; Save the second word | |
300 | lwz r7,16(r6) ; Get the fifth word | |
301 | stw r9,8(r4) ; Save the third word | |
302 | lwz r8,20(r6) ; Get the sixth word | |
303 | stw r11,12(r4) ; Save the fourth word | |
304 | lwz r9,24(r6) ; Get the seventh word | |
305 | stw r7,16(r4) ; Save the fifth word | |
306 | lwz r11,28(r6) ; Get the eighth word | |
307 | addi r6,r6,32 ; Point to the next | |
308 | stw r8,20(r4) ; Save the sixth word | |
309 | stw r9,24(r4) ; Save the seventh word | |
310 | stw r11,28(r4) ; Save the eighth word | |
311 | addi r4,r4,32 ; Bump sink | |
312 | bgt+ nxtline ; Do the next line, if any... | |
313 | ||
314 | ||
315 | ; Move backend quadword | |
316 | ||
317 | backend: bf 27,noquad ; No quad to do... | |
318 | lwz r7,0(r6) ; Get the first word | |
319 | lwz r8,4(r6) ; Get the second word | |
320 | lwz r9,8(r6) ; Get the third word | |
321 | lwz r11,12(r6) ; Get the fourth word | |
322 | stw r7,0(r4) ; Save the first word | |
323 | addi r6,r6,16 ; Point to the next | |
324 | stw r8,4(r4) ; Save the second word | |
325 | stw r9,8(r4) ; Save the third word | |
326 | stw r11,12(r4) ; Save the fourth word | |
327 | addi r4,r4,16 ; Bump sink | |
328 | ||
329 | ; Move backend double | |
330 | ||
331 | noquad: bf 28,nodouble ; No double to do... | |
332 | lwz r7,0(r6) ; Get the first word | |
333 | lwz r8,4(r6) ; Get the second word | |
334 | addi r6,r6,8 ; Point to the next | |
335 | stw r7,0(r4) ; Save the first word | |
336 | stw r8,4(r4) ; Save the second word | |
337 | addi r4,r4,8 ; Bump sink | |
338 | ||
339 | ; Move backend word | |
340 | ||
341 | nodouble: bf 29,noword ; No word to do... | |
342 | lwz r7,0(r6) ; Get the word | |
343 | addi r6,r6,4 ; Point to the next | |
344 | stw r7,0(r4) ; Save the word | |
345 | addi r4,r4,4 ; Bump sink | |
346 | ||
347 | ; Move backend halfword | |
348 | ||
349 | noword: bf 30,nohalf ; No halfword to do... | |
350 | lhz r7,0(r6) ; Get the halfword | |
351 | addi r6,r6,2 ; Point to the next | |
352 | sth r7,0(r4) ; Save the halfword | |
353 | addi r4,r4,2 ; Bump sink | |
354 | ||
355 | ; Move backend byte | |
356 | ||
357 | nohalf: bf 31,bcpydone ; Leave cuz we are all done... | |
358 | lbz r7,0(r6) ; Get the byte | |
359 | stb r7,0(r4) ; Save the single | |
360 | ||
de355530 A |
361 | bcpydone: bt- killbats,bcclrbat ; Jump if we need to clear bats... |
362 | bflr fixxlate ; Leave now if we do not need to fix translation... | |
1c79356b A |
363 | mfmsr r9 ; Get the MSR |
364 | ori r9,r9,lo16(MASK(MSR_DR)) ; Turn data translation on | |
de355530 A |
365 | rlwinm r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off |
366 | rlwinm r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off | |
1c79356b A |
367 | mtmsr r9 ; Just do it |
368 | isync ; Hang in there | |
de355530 A |
369 | blr ; Leave cuz we are all done... |
370 | ||
371 | bcclrbat: li r0,0 ; Get set to invalidate upper half | |
9bccf70c A |
372 | sync ; Make sure all is well |
373 | mtdbatu 0,r0 ; Clear sink upper DBAT | |
374 | mtdbatu 1,r0 ; Clear source upper DBAT | |
375 | sync | |
376 | isync | |
377 | blr | |
378 | ||
379 | ||
1c79356b A |
380 | ; |
381 | ; 0123456789ABCDEF0123456789ABCDEF | |
382 | ; 0123456789ABCDEF0123456789ABCDEF | |
383 | ; F | |
384 | ; DE | |
385 | ; 9ABC | |
386 | ; 12345678 | |
387 | ; 123456789ABCDEF0 | |
388 | ; 0 | |
389 | ||
390 | ; | |
391 | ; Here is where we handle a forward overlapping move. These will be slow | |
392 | ; because we can not kill the cache of the destination until after we have | |
393 | ; loaded/saved the source area. Also, because reading memory backwards is | |
394 | ; slower when the cache line needs to be loaded because the critical | |
395 | ; doubleword is loaded first, i.e., the last, then it goes back to the first, | |
396 | ; and on in order. That means that when we are at the second to last DW we | |
397 | ; have to wait until the whole line is in cache before we can proceed. | |
398 | ; | |
de355530 | 399 | |
1c79356b A |
400 | fwdovrlap: add r4,r5,r4 ; Point past the last sink byte |
401 | add r6,r5,r6 ; Point past the last source byte | |
402 | and r0,r4,r8 ; Apply movement limit | |
403 | li r12,-1 ; Make sure we touch in the actual line | |
404 | mtcrf 3,r0 ; Figure out the best way to move backwards | |
405 | dcbt r12,r6 ; Touch in the last line of source | |
406 | rlwinm. r0,r0,0,27,31 ; Calculate the length to adjust to cache boundary | |
407 | dcbtst r12,r4 ; Touch in the last line of the sink | |
408 | beq- balline ; Aready on cache line boundary | |
409 | ||
410 | sub r5,r5,r0 ; Precaculate move length left after alignment | |
411 | ||
412 | bf 31,balhalf ; No single byte to do... | |
413 | lbz r7,-1(r6) ; Get the byte | |
414 | subi r6,r6,1 ; Point to the next | |
415 | stb r7,-1(r4) ; Save the single | |
416 | subi r4,r4,1 ; Bump sink | |
417 | ||
418 | ; Sink is halfword aligned here | |
419 | ||
420 | balhalf: bf 30,balword ; No halfword to do... | |
421 | lhz r7,-2(r6) ; Get the halfword | |
422 | subi r6,r6,2 ; Point to the next | |
423 | sth r7,-2(r4) ; Save the halfword | |
424 | subi r4,r4,2 ; Bump sink | |
425 | ||
426 | ; Sink is word aligned here | |
427 | ||
428 | balword: bf 29,baldouble ; No word to do... | |
429 | lwz r7,-4(r6) ; Get the word | |
430 | subi r6,r6,4 ; Point to the next | |
431 | stw r7,-4(r4) ; Save the word | |
432 | subi r4,r4,4 ; Bump sink | |
433 | ||
434 | ; Sink is double aligned here | |
435 | ||
436 | baldouble: bf 28,balquad ; No double to do... | |
437 | lwz r7,-8(r6) ; Get the first word | |
438 | lwz r8,-4(r6) ; Get the second word | |
439 | subi r6,r6,8 ; Point to the next | |
440 | stw r7,-8(r4) ; Save the first word | |
441 | stw r8,-4(r4) ; Save the second word | |
442 | subi r4,r4,8 ; Bump sink | |
443 | ||
444 | ; Sink is quadword aligned here | |
445 | ||
446 | balquad: bf 27,balline ; No quad to do... | |
447 | lwz r7,-16(r6) ; Get the first word | |
448 | lwz r8,-12(r6) ; Get the second word | |
449 | lwz r9,-8(r6) ; Get the third word | |
450 | lwz r11,-4(r6) ; Get the fourth word | |
451 | stw r7,-16(r4) ; Save the first word | |
452 | subi r6,r6,16 ; Point to the next | |
453 | stw r8,-12(r4) ; Save the second word | |
454 | stw r9,-8(r4) ; Save the third word | |
455 | stw r11,-4(r4) ; Save the fourth word | |
456 | subi r4,r4,16 ; Bump sink | |
457 | ||
458 | ; Sink is line aligned here | |
459 | ||
460 | balline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move | |
461 | mtcrf 3,r5 ; Make branch mask for backend partial moves | |
462 | beq- bbackend ; No full lines to move | |
1c79356b A |
463 | |
464 | ||
465 | ; Registers in use: R0, R1, R3, R4, R5, R6 | |
466 | ; Registers not in use: R2, R7, R8, R9, R10, R11, R12 - Ok, we can make another free for 8 of them | |
467 | ||
468 | bnxtline: subic. r0,r0,1 ; Account for the line now | |
469 | ||
470 | lwz r7,-32(r6) ; Get the first word | |
471 | lwz r5,-28(r6) ; Get the second word | |
472 | lwz r2,-24(r6) ; Get the third word | |
473 | lwz r12,-20(r6) ; Get the third word | |
474 | lwz r11,-16(r6) ; Get the fifth word | |
475 | lwz r10,-12(r6) ; Get the sixth word | |
476 | lwz r9,-8(r6) ; Get the seventh word | |
477 | lwz r8,-4(r6) ; Get the eighth word | |
478 | subi r6,r6,32 ; Point to the next | |
479 | ||
480 | stw r7,-32(r4) ; Get the first word | |
481 | ble- bnotouch ; Last time, skip touch of source... | |
482 | dcbt br0,r6 ; Touch in next source line | |
483 | ||
484 | bnotouch: stw r5,-28(r4) ; Get the second word | |
485 | stw r2,-24(r4) ; Get the third word | |
486 | stw r12,-20(r4) ; Get the third word | |
487 | stw r11,-16(r4) ; Get the fifth word | |
488 | stw r10,-12(r4) ; Get the sixth word | |
489 | stw r9,-8(r4) ; Get the seventh word | |
490 | stw r8,-4(r4) ; Get the eighth word | |
491 | subi r4,r4,32 ; Bump sink | |
492 | ||
493 | bgt+ bnxtline ; Do the next line, if any... | |
1c79356b A |
494 | |
495 | ; | |
496 | ; Note: We touched these lines in at the beginning | |
497 | ; | |
498 | ||
499 | ; Move backend quadword | |
500 | ||
501 | bbackend: bf 27,bnoquad ; No quad to do... | |
502 | lwz r7,-16(r6) ; Get the first word | |
503 | lwz r8,-12(r6) ; Get the second word | |
504 | lwz r9,-8(r6) ; Get the third word | |
505 | lwz r11,-4(r6) ; Get the fourth word | |
506 | stw r7,-16(r4) ; Save the first word | |
507 | subi r6,r6,16 ; Point to the next | |
508 | stw r8,-12(r4) ; Save the second word | |
509 | stw r9,-8(r4) ; Save the third word | |
510 | stw r11,-4(r4) ; Save the fourth word | |
511 | subi r4,r4,16 ; Bump sink | |
512 | ||
513 | ; Move backend double | |
514 | ||
515 | bnoquad: bf 28,bnodouble ; No double to do... | |
516 | lwz r7,-8(r6) ; Get the first word | |
517 | lwz r8,-4(r6) ; Get the second word | |
518 | subi r6,r6,8 ; Point to the next | |
519 | stw r7,-8(r4) ; Save the first word | |
520 | stw r8,-4(r4) ; Save the second word | |
521 | subi r4,r4,8 ; Bump sink | |
522 | ||
523 | ; Move backend word | |
524 | ||
525 | bnodouble: bf 29,bnoword ; No word to do... | |
526 | lwz r7,-4(r6) ; Get the word | |
527 | subi r6,r6,4 ; Point to the next | |
528 | stw r7,-4(r4) ; Save the word | |
529 | subi r4,r4,4 ; Bump sink | |
530 | ||
531 | ; Move backend halfword | |
532 | ||
533 | bnoword: bf 30,bnohalf ; No halfword to do... | |
534 | lhz r7,-2(r6) ; Get the halfword | |
535 | subi r6,r6,2 ; Point to the next | |
536 | sth r7,-2(r4) ; Save the halfword | |
537 | subi r4,r4,2 ; Bump sink | |
538 | ||
539 | ; Move backend byte | |
540 | ||
541 | bnohalf: bflr 31 ; Leave cuz we are all done... | |
542 | lbz r7,-1(r6) ; Get the byte | |
543 | stb r7,-1(r4) ; Save the single | |
544 | ||
9bccf70c | 545 | b bcpydone ; Go exit cuz we are all done... |