2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
23 ; Copy bytes of data around. handles overlapped data.
25 ; Change this to use Altivec later on, and maybe floating point.
29 #include <ppc/proc_reg.h>
31 ; Use CR5_lt to indicate non-cached
33 ; Use CR5_gt to indicate that we need to turn data translation back on
35 ; Use CR5_eq to indicate that we need to invalidate bats
39 ; bcopy_nc(from, to, nbytes)
41 ; bcopy_nc operates on non-cached memory so we can not use any kind
42 ; of cache instructions.
50 crset noncache ; Set non-cached
54 ; void bcopy_physvir(from, to, nbytes)
55 ; Attempt to copy physically addressed memory with translation on if conditions are met.
56 ; Otherwise do a normal bcopy_phys.
58 ; Rules are: neither source nor destination can cross a page.
59 ; No accesses above the 2GB line (I/O or ROM).
61 ; Interrupts must be disabled throughout the copy when this is called
63 ; To do this, we build a
64 ; 128 DBAT for both the source and sink. If both are the same, only one is
65 ; loaded. We do not touch the IBATs, so there is no issue if either physical page
66 ; address is the same as the virtual address of the instructions we are executing.
68 ; At the end, we invalidate the used DBATs and reenable interrupts.
70 ; Note, this one will not work in user state
74 .globl EXT(bcopy_physvir)
78 addic. r0,r5,-1 ; Get length - 1
79 add r11,r3,r0 ; Point to last byte of sink
80 cmplw cr1,r3,r4 ; Does source == sink?
81 add r12,r4,r0 ; Point to last byte of source
82 bltlr- ; Bail if length is 0 or way too big
83 xor r7,r11,r3 ; See if we went to next page
84 xor r8,r12,r4 ; See if we went to next page
85 or r0,r7,r8 ; Combine wrap
87 li r9,((PTE_WIMG_CB_CACHED_COHERENT<<3)|2) ; Set default attributes
88 rlwinm. r0,r0,0,0,19 ; Did we overflow a page?
89 li r7,2 ; Set validity flags
90 li r8,2 ; Set validity flags
91 bne- EXT(bcopy_phys) ; Overflowed page, do normal physical copy...
93 crset killbats ; Remember to trash BATs on the way out
94 rlwimi r11,r9,0,15,31 ; Set sink lower DBAT value
95 rlwimi r12,r9,0,15,31 ; Set source lower DBAT value
96 rlwimi r7,r11,0,0,14 ; Set sink upper DBAT value
97 rlwimi r8,r12,0,0,14 ; Set source upper DBAT value
98 cmplw cr1,r11,r12 ; See if sink and source are same block
102 mtdbatl 0,r11 ; Set sink lower DBAT
103 mtdbatu 0,r7 ; Set sink upper DBAT
105 beq- cr1,bcpvsame ; Source and sink are in same block
107 mtdbatl 1,r12 ; Set source lower DBAT
108 mtdbatu 1,r8 ; Set source upper DBAT
110 bcpvsame: mr r6,r3 ; Set source
111 crclr noncache ; Set cached
113 b copyit ; Go copy it...
117 ; void bcopy_phys(from, to, nbytes)
118 ; Turns off data translation before the copy. Note, this one will
119 ; not work in user state
123 .globl EXT(bcopy_phys)
127 mfmsr r9 ; Get the MSR
129 crclr noncache ; Set cached
130 rlwinm. r8,r9,0,MSR_DR_BIT,MSR_DR_BIT ; Is data translation on?
132 cmplw cr1,r4,r3 ; Compare "to" and "from"
133 cmplwi cr7,r5,0 ; Check if we have a 0 length
134 mr r6,r3 ; Set source
135 beqlr- cr1 ; Bail if "to" and "from" are the same
136 xor r9,r9,r8 ; Turn off translation if it is on (should be)
137 beqlr- cr7 ; Bail if length is 0
139 rlwinm r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off
140 crclr killbats ; Make sure we do not trash BATs on the way out
141 rlwinm r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off
142 mtmsr r9 ; Set DR translation off
145 crnot fixxlate,cr0_eq ; Remember to turn on translation if it was
146 b copyit ; Go copy it...
149 ; void bcopy(from, to, nbytes)
157 crclr noncache ; Set cached
159 bcpswap: cmplw cr1,r4,r3 ; Compare "to" and "from"
160 mr. r5,r5 ; Check if we have a 0 length
161 mr r6,r3 ; Set source
162 crclr killbats ; Make sure we do not trash BATs on the way out
163 beqlr- cr1 ; Bail if "to" and "from" are the same
164 beqlr- ; Bail if length is 0
165 crclr fixxlate ; Set translation already ok
166 b copyit ; Go copy it...
169 ; When we move the memory, forward overlays must be handled. We
170 ; also can not use the cache instructions if we are from bcopy_nc.
171 ; We need to preserve R3 because it needs to be returned for memcpy.
172 ; We can be interrupted and lose control here.
174 ; There is no stack, so in order to used floating point, we would
175 ; need to take the FP exception. Any potential gains by using FP
176 ; would be more than eaten up by this.
178 ; Later, we should used Altivec for large moves.
186 cmplw cr1,r3,r4 ; "to" and "from" the same?
187 mr r6,r4 ; Set the "from"
188 mr. r5,r5 ; Length zero?
189 crclr noncache ; Set cached
190 mr r4,r3 ; Set the "to"
191 crclr fixxlate ; Set translation already ok
192 beqlr- cr1 ; "to" and "from" are the same
194 crclr killbats ; Make sure we do not trash BATs on the way out
196 copyit: sub r12,r4,r6 ; Get potential overlap (negative if backward move)
197 lis r8,0x7FFF ; Start up a mask
198 srawi r11,r12,31 ; Propagate the sign bit
199 dcbt br0,r6 ; Touch in the first source line
200 cntlzw r7,r5 ; Get the highest power of 2 factor of the length
201 ori r8,r8,0xFFFF ; Make limit 0x7FFFFFFF
202 xor r9,r12,r11 ; If sink - source was negative, invert bits
203 srw r8,r8,r7 ; Get move length limitation
204 sub r9,r9,r11 ; If sink - source was negative, add 1 and get absolute value
205 cmplw r12,r5 ; See if we actually forward overlap
206 cmplwi cr7,r9,32 ; See if at least a line between source and sink
207 dcbtst br0,r4 ; Touch in the first sink line
208 cmplwi cr1,r5,32 ; Are we moving more than a line?
209 cror noncache,noncache,28 ; Set to not DCBZ output line if not enough space
210 blt- fwdovrlap ; This is a forward overlapping area, handle it...
219 ; Here we figure out how much we have to move to get the sink onto a
220 ; cache boundary. If we can, and there are still more that 32 bytes
221 ; left to move, we can really speed things up by DCBZing the sink line.
222 ; We can not do this if noncache is set because we will take an
223 ; alignment exception.
225 neg r0,r4 ; Get the number of bytes to move to align to a line boundary
226 rlwinm. r0,r0,0,27,31 ; Clean it up and test it
227 and r0,r0,r8 ; limit to the maximum front end move
228 mtcrf 3,r0 ; Make branch mask for partial moves
229 sub r5,r5,r0 ; Set the length left to move
230 beq alline ; Already on a line...
232 bf 31,alhalf ; No single byte to do...
233 lbz r7,0(r6) ; Get the byte
234 addi r6,r6,1 ; Point to the next
235 stb r7,0(r4) ; Save the single
236 addi r4,r4,1 ; Bump sink
238 ; Sink is halfword aligned here
240 alhalf: bf 30,alword ; No halfword to do...
241 lhz r7,0(r6) ; Get the halfword
242 addi r6,r6,2 ; Point to the next
243 sth r7,0(r4) ; Save the halfword
244 addi r4,r4,2 ; Bump sink
246 ; Sink is word aligned here
248 alword: bf 29,aldouble ; No word to do...
249 lwz r7,0(r6) ; Get the word
250 addi r6,r6,4 ; Point to the next
251 stw r7,0(r4) ; Save the word
252 addi r4,r4,4 ; Bump sink
254 ; Sink is double aligned here
256 aldouble: bf 28,alquad ; No double to do...
257 lwz r7,0(r6) ; Get the first word
258 lwz r8,4(r6) ; Get the second word
259 addi r6,r6,8 ; Point to the next
260 stw r7,0(r4) ; Save the first word
261 stw r8,4(r4) ; Save the second word
262 addi r4,r4,8 ; Bump sink
264 ; Sink is quadword aligned here
266 alquad: bf 27,alline ; No quad to do...
267 lwz r7,0(r6) ; Get the first word
268 lwz r8,4(r6) ; Get the second word
269 lwz r9,8(r6) ; Get the third word
270 stw r7,0(r4) ; Save the first word
271 lwz r11,12(r6) ; Get the fourth word
272 addi r6,r6,16 ; Point to the next
273 stw r8,4(r4) ; Save the second word
274 stw r9,8(r4) ; Save the third word
275 stw r11,12(r4) ; Save the fourth word
276 addi r4,r4,16 ; Bump sink
278 ; Sink is line aligned here
280 alline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move
281 mtcrf 3,r5 ; Make branch mask for backend partial moves
282 rlwinm r11,r5,0,0,26 ; Get number of bytes we are going to move
283 beq- backend ; No full lines to move
285 sub r5,r5,r11 ; Calculate the residual
286 li r10,96 ; Stride for touch ahead
288 nxtline: subic. r0,r0,1 ; Account for the line now
290 bt- noncache,skipz ; Skip if we are not cached...
291 dcbz br0,r4 ; Blow away the whole line because we are replacing it
292 dcbt r6,r10 ; Touch ahead a bit
294 skipz: lwz r7,0(r6) ; Get the first word
295 lwz r8,4(r6) ; Get the second word
296 lwz r9,8(r6) ; Get the third word
297 stw r7,0(r4) ; Save the first word
298 lwz r11,12(r6) ; Get the fourth word
299 stw r8,4(r4) ; Save the second word
300 lwz r7,16(r6) ; Get the fifth word
301 stw r9,8(r4) ; Save the third word
302 lwz r8,20(r6) ; Get the sixth word
303 stw r11,12(r4) ; Save the fourth word
304 lwz r9,24(r6) ; Get the seventh word
305 stw r7,16(r4) ; Save the fifth word
306 lwz r11,28(r6) ; Get the eighth word
307 addi r6,r6,32 ; Point to the next
308 stw r8,20(r4) ; Save the sixth word
309 stw r9,24(r4) ; Save the seventh word
310 stw r11,28(r4) ; Save the eighth word
311 addi r4,r4,32 ; Bump sink
312 bgt+ nxtline ; Do the next line, if any...
315 ; Move backend quadword
317 backend: bf 27,noquad ; No quad to do...
318 lwz r7,0(r6) ; Get the first word
319 lwz r8,4(r6) ; Get the second word
320 lwz r9,8(r6) ; Get the third word
321 lwz r11,12(r6) ; Get the fourth word
322 stw r7,0(r4) ; Save the first word
323 addi r6,r6,16 ; Point to the next
324 stw r8,4(r4) ; Save the second word
325 stw r9,8(r4) ; Save the third word
326 stw r11,12(r4) ; Save the fourth word
327 addi r4,r4,16 ; Bump sink
329 ; Move backend double
331 noquad: bf 28,nodouble ; No double to do...
332 lwz r7,0(r6) ; Get the first word
333 lwz r8,4(r6) ; Get the second word
334 addi r6,r6,8 ; Point to the next
335 stw r7,0(r4) ; Save the first word
336 stw r8,4(r4) ; Save the second word
337 addi r4,r4,8 ; Bump sink
341 nodouble: bf 29,noword ; No word to do...
342 lwz r7,0(r6) ; Get the word
343 addi r6,r6,4 ; Point to the next
344 stw r7,0(r4) ; Save the word
345 addi r4,r4,4 ; Bump sink
347 ; Move backend halfword
349 noword: bf 30,nohalf ; No halfword to do...
350 lhz r7,0(r6) ; Get the halfword
351 addi r6,r6,2 ; Point to the next
352 sth r7,0(r4) ; Save the halfword
353 addi r4,r4,2 ; Bump sink
357 nohalf: bf 31,bcpydone ; Leave cuz we are all done...
358 lbz r7,0(r6) ; Get the byte
359 stb r7,0(r4) ; Save the single
361 bcpydone: bt- killbats,bcclrbat ; Jump if we need to clear bats...
362 bflr fixxlate ; Leave now if we do not need to fix translation...
363 mfmsr r9 ; Get the MSR
364 ori r9,r9,lo16(MASK(MSR_DR)) ; Turn data translation on
365 rlwinm r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off
366 rlwinm r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off
367 mtmsr r9 ; Just do it
368 isync ; Hang in there
369 blr ; Leave cuz we are all done...
371 bcclrbat: li r0,0 ; Get set to invalidate upper half
372 sync ; Make sure all is well
373 mtdbatu 0,r0 ; Clear sink upper DBAT
374 mtdbatu 1,r0 ; Clear source upper DBAT
381 ; 0123456789ABCDEF0123456789ABCDEF
382 ; 0123456789ABCDEF0123456789ABCDEF
391 ; Here is where we handle a forward overlapping move. These will be slow
392 ; because we can not kill the cache of the destination until after we have
393 ; loaded/saved the source area. Also, because reading memory backwards is
394 ; slower when the cache line needs to be loaded because the critical
395 ; doubleword is loaded first, i.e., the last, then it goes back to the first,
396 ; and on in order. That means that when we are at the second to last DW we
397 ; have to wait until the whole line is in cache before we can proceed.
400 fwdovrlap: add r4,r5,r4 ; Point past the last sink byte
401 add r6,r5,r6 ; Point past the last source byte
402 and r0,r4,r8 ; Apply movement limit
403 li r12,-1 ; Make sure we touch in the actual line
404 mtcrf 3,r0 ; Figure out the best way to move backwards
405 dcbt r12,r6 ; Touch in the last line of source
406 rlwinm. r0,r0,0,27,31 ; Calculate the length to adjust to cache boundary
407 dcbtst r12,r4 ; Touch in the last line of the sink
408 beq- balline ; Aready on cache line boundary
410 sub r5,r5,r0 ; Precaculate move length left after alignment
412 bf 31,balhalf ; No single byte to do...
413 lbz r7,-1(r6) ; Get the byte
414 subi r6,r6,1 ; Point to the next
415 stb r7,-1(r4) ; Save the single
416 subi r4,r4,1 ; Bump sink
418 ; Sink is halfword aligned here
420 balhalf: bf 30,balword ; No halfword to do...
421 lhz r7,-2(r6) ; Get the halfword
422 subi r6,r6,2 ; Point to the next
423 sth r7,-2(r4) ; Save the halfword
424 subi r4,r4,2 ; Bump sink
426 ; Sink is word aligned here
428 balword: bf 29,baldouble ; No word to do...
429 lwz r7,-4(r6) ; Get the word
430 subi r6,r6,4 ; Point to the next
431 stw r7,-4(r4) ; Save the word
432 subi r4,r4,4 ; Bump sink
434 ; Sink is double aligned here
436 baldouble: bf 28,balquad ; No double to do...
437 lwz r7,-8(r6) ; Get the first word
438 lwz r8,-4(r6) ; Get the second word
439 subi r6,r6,8 ; Point to the next
440 stw r7,-8(r4) ; Save the first word
441 stw r8,-4(r4) ; Save the second word
442 subi r4,r4,8 ; Bump sink
444 ; Sink is quadword aligned here
446 balquad: bf 27,balline ; No quad to do...
447 lwz r7,-16(r6) ; Get the first word
448 lwz r8,-12(r6) ; Get the second word
449 lwz r9,-8(r6) ; Get the third word
450 lwz r11,-4(r6) ; Get the fourth word
451 stw r7,-16(r4) ; Save the first word
452 subi r6,r6,16 ; Point to the next
453 stw r8,-12(r4) ; Save the second word
454 stw r9,-8(r4) ; Save the third word
455 stw r11,-4(r4) ; Save the fourth word
456 subi r4,r4,16 ; Bump sink
458 ; Sink is line aligned here
460 balline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move
461 mtcrf 3,r5 ; Make branch mask for backend partial moves
462 beq- bbackend ; No full lines to move
465 ; Registers in use: R0, R1, R3, R4, R5, R6
466 ; Registers not in use: R2, R7, R8, R9, R10, R11, R12 - Ok, we can make another free for 8 of them
468 bnxtline: subic. r0,r0,1 ; Account for the line now
470 lwz r7,-32(r6) ; Get the first word
471 lwz r5,-28(r6) ; Get the second word
472 lwz r2,-24(r6) ; Get the third word
473 lwz r12,-20(r6) ; Get the third word
474 lwz r11,-16(r6) ; Get the fifth word
475 lwz r10,-12(r6) ; Get the sixth word
476 lwz r9,-8(r6) ; Get the seventh word
477 lwz r8,-4(r6) ; Get the eighth word
478 subi r6,r6,32 ; Point to the next
480 stw r7,-32(r4) ; Get the first word
481 ble- bnotouch ; Last time, skip touch of source...
482 dcbt br0,r6 ; Touch in next source line
484 bnotouch: stw r5,-28(r4) ; Get the second word
485 stw r2,-24(r4) ; Get the third word
486 stw r12,-20(r4) ; Get the third word
487 stw r11,-16(r4) ; Get the fifth word
488 stw r10,-12(r4) ; Get the sixth word
489 stw r9,-8(r4) ; Get the seventh word
490 stw r8,-4(r4) ; Get the eighth word
491 subi r4,r4,32 ; Bump sink
493 bgt+ bnxtline ; Do the next line, if any...
496 ; Note: We touched these lines in at the beginning
499 ; Move backend quadword
501 bbackend: bf 27,bnoquad ; No quad to do...
502 lwz r7,-16(r6) ; Get the first word
503 lwz r8,-12(r6) ; Get the second word
504 lwz r9,-8(r6) ; Get the third word
505 lwz r11,-4(r6) ; Get the fourth word
506 stw r7,-16(r4) ; Save the first word
507 subi r6,r6,16 ; Point to the next
508 stw r8,-12(r4) ; Save the second word
509 stw r9,-8(r4) ; Save the third word
510 stw r11,-4(r4) ; Save the fourth word
511 subi r4,r4,16 ; Bump sink
513 ; Move backend double
515 bnoquad: bf 28,bnodouble ; No double to do...
516 lwz r7,-8(r6) ; Get the first word
517 lwz r8,-4(r6) ; Get the second word
518 subi r6,r6,8 ; Point to the next
519 stw r7,-8(r4) ; Save the first word
520 stw r8,-4(r4) ; Save the second word
521 subi r4,r4,8 ; Bump sink
525 bnodouble: bf 29,bnoword ; No word to do...
526 lwz r7,-4(r6) ; Get the word
527 subi r6,r6,4 ; Point to the next
528 stw r7,-4(r4) ; Save the word
529 subi r4,r4,4 ; Bump sink
531 ; Move backend halfword
533 bnoword: bf 30,bnohalf ; No halfword to do...
534 lhz r7,-2(r6) ; Get the halfword
535 subi r6,r6,2 ; Point to the next
536 sth r7,-2(r4) ; Save the halfword
537 subi r4,r4,2 ; Bump sink
541 bnohalf: bflr 31 ; Leave cuz we are all done...
542 lbz r7,-1(r6) ; Get the byte
543 stb r7,-1(r4) ; Save the single
545 b bcpydone ; Go exit cuz we are all done...