2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
23 ; Copy bytes of data around. handles overlapped data.
25 ; Change this to use Altivec later on, and maybe floating point.
27 ; NOTE: This file compiles and executes on both MacOX 8.x (Codewarrior)
28 ; and MacOX X. The "#if 0"s are treated as comments by CW so the
29 ; stuff between them is included by CW and excluded on MacOX X.
30 ; Same with the "#include"s.
33 #include <ppc/proc_reg.h>
35 ; Use CR5_lt to indicate non-cached
37 ; Use CR5_gt to indicate that we need to turn data translation back on
48 ; bcopy_nc(from, to, nbytes)
50 ; bcopy_nc operates on non-cached memory so we can not use any kind
51 ; of cache instructions.
59 ENTRY(bcopy_nc, TAG_NO_FRAME_USED)
63 tc xbcopy_nc[TC],xbcopy_nc[DS]
72 crset noncache ; Set non-cached
76 ; void bcopy_phys(from, to, nbytes)
77 ; Turns off data translation before the copy. Note, this one will
78 ; not work in user state
84 ENTRY(bcopy_phys, TAG_NO_FRAME_USED)
87 export xbcopy_phys[DS]
88 tc bcopy_physc[TC],bcopy_phys[DS]
97 mfmsr r9 ; Get the MSR
98 crclr noncache ; Set cached
99 rlwinm. r8,r9,0,MSR_DR_BIT,MSR_DR_BIT ; Is data translation on?
101 cmplw cr1,r4,r3 ; Compare "to" and "from"
102 cmplwi cr7,r5,0 ; Check if we have a 0 length
103 mr r6,r3 ; Set source
104 beqlr- cr1 ; Bail if "to" and "from" are the same
105 xor r9,r9,r8 ; Turn off translation if it is on (should be)
106 beqlr- cr7 ; Bail if length is 0
108 mtmsr r9 ; Set DR translation off
111 crnot fixxlate,cr0_eq ; Remember to turn on translation if it was
112 b copyit ; Go copy it...
115 ; void bcopy(from, to, nbytes)
121 ENTRY(bcopy, TAG_NO_FRAME_USED)
125 tc xbcopyc[TC],xbcopy[DS]
134 crclr noncache ; Set cached
136 bcpswap: cmplw cr1,r4,r3 ; Compare "to" and "from"
137 mr. r5,r5 ; Check if we have a 0 length
138 mr r6,r3 ; Set source
139 beqlr- cr1 ; Bail if "to" and "from" are the same
140 beqlr- ; Bail if length is 0
141 crclr fixxlate ; Set translation already ok
142 b copyit ; Go copy it...
145 ; When we move the memory, forward overlays must be handled. We
146 ; also can not use the cache instructions if we are from bcopy_nc.
147 ; We need to preserve R3 because it needs to be returned for memcpy.
148 ; We can be interrupted and lose control here.
150 ; There is no stack, so in order to used floating point, we would
151 ; need to take the FP exception. Any potential gains by using FP
152 ; would be more than eaten up by this.
154 ; Later, we should used Altivec for large moves.
160 ENTRY(memcpy, TAG_NO_FRAME_USED)
164 tc xmemcpy[TC],xmemcpy[DS]
172 cmplw cr1,r3,r4 ; "to" and "from" the same?
173 mr r6,r4 ; Set the "from"
174 mr. r5,r5 ; Length zero?
175 crclr noncache ; Set cached
176 mr r4,r3 ; Set the "to"
177 crclr fixxlate ; Set translation already ok
178 beqlr- cr1 ; "to" and "from" are the same
181 copyit: sub r12,r4,r6 ; Get potential overlap (negative if backward move)
182 lis r8,0x7FFF ; Start up a mask
183 srawi r11,r12,31 ; Propagate the sign bit
184 dcbt br0,r6 ; Touch in the first source line
185 cntlzw r7,r5 ; Get the highest power of 2 factor of the length
186 ori r8,r8,0xFFFF ; Make limit 0x7FFFFFFF
187 xor r9,r12,r11 ; If sink - source was negative, invert bits
188 srw r8,r8,r7 ; Get move length limitation
189 sub r9,r9,r11 ; If sink - source was negative, add 1 and get absolute value
190 cmplw r12,r5 ; See if we actually forward overlap
191 cmplwi cr7,r9,32 ; See if at least a line between source and sink
192 dcbtst br0,r4 ; Touch in the first sink line
193 cmplwi cr1,r5,32 ; Are we moving more than a line?
194 cror noncache,noncache,28 ; Set to not DCBZ output line if not enough space
195 blt- fwdovrlap ; This is a forward overlapping area, handle it...
204 ; Here we figure out how much we have to move to get the sink onto a
205 ; cache boundary. If we can, and there are still more that 32 bytes
206 ; left to move, we can really speed things up by DCBZing the sink line.
207 ; We can not do this if noncache is set because we will take an
208 ; alignment exception.
210 neg r0,r4 ; Get the number of bytes to move to align to a line boundary
211 rlwinm. r0,r0,0,27,31 ; Clean it up and test it
212 and r0,r0,r8 ; limit to the maximum front end move
213 mtcrf 3,r0 ; Make branch mask for partial moves
214 sub r5,r5,r0 ; Set the length left to move
215 beq alline ; Already on a line...
217 bf 31,alhalf ; No single byte to do...
218 lbz r7,0(r6) ; Get the byte
219 addi r6,r6,1 ; Point to the next
220 stb r7,0(r4) ; Save the single
221 addi r4,r4,1 ; Bump sink
223 ; Sink is halfword aligned here
225 alhalf: bf 30,alword ; No halfword to do...
226 lhz r7,0(r6) ; Get the halfword
227 addi r6,r6,2 ; Point to the next
228 sth r7,0(r4) ; Save the halfword
229 addi r4,r4,2 ; Bump sink
231 ; Sink is word aligned here
233 alword: bf 29,aldouble ; No word to do...
234 lwz r7,0(r6) ; Get the word
235 addi r6,r6,4 ; Point to the next
236 stw r7,0(r4) ; Save the word
237 addi r4,r4,4 ; Bump sink
239 ; Sink is double aligned here
241 aldouble: bf 28,alquad ; No double to do...
242 lwz r7,0(r6) ; Get the first word
243 lwz r8,4(r6) ; Get the second word
244 addi r6,r6,8 ; Point to the next
245 stw r7,0(r4) ; Save the first word
246 stw r8,4(r4) ; Save the second word
247 addi r4,r4,8 ; Bump sink
249 ; Sink is quadword aligned here
251 alquad: bf 27,alline ; No quad to do...
252 lwz r7,0(r6) ; Get the first word
253 lwz r8,4(r6) ; Get the second word
254 lwz r9,8(r6) ; Get the third word
255 stw r7,0(r4) ; Save the first word
256 lwz r11,12(r6) ; Get the fourth word
257 addi r6,r6,16 ; Point to the next
258 stw r8,4(r4) ; Save the second word
259 stw r9,8(r4) ; Save the third word
260 stw r11,12(r4) ; Save the fourth word
261 addi r4,r4,16 ; Bump sink
263 ; Sink is line aligned here
265 alline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move
266 mtcrf 3,r5 ; Make branch mask for backend partial moves
267 rlwinm r11,r5,0,0,26 ; Get number of bytes we are going to move
268 beq- backend ; No full lines to move
270 sub r5,r5,r11 ; Calculate the residual
271 li r10,96 ; Stride for touch ahead
273 nxtline: subic. r0,r0,1 ; Account for the line now
275 bt- noncache,skipz ; Skip if we are not cached...
276 dcbz br0,r4 ; Blow away the whole line because we are replacing it
277 dcbt r6,r10 ; Touch ahead a bit
279 skipz: lwz r7,0(r6) ; Get the first word
280 lwz r8,4(r6) ; Get the second word
281 lwz r9,8(r6) ; Get the third word
282 stw r7,0(r4) ; Save the first word
283 lwz r11,12(r6) ; Get the fourth word
284 stw r8,4(r4) ; Save the second word
285 lwz r7,16(r6) ; Get the fifth word
286 stw r9,8(r4) ; Save the third word
287 lwz r8,20(r6) ; Get the sixth word
288 stw r11,12(r4) ; Save the fourth word
289 lwz r9,24(r6) ; Get the seventh word
290 stw r7,16(r4) ; Save the fifth word
291 lwz r11,28(r6) ; Get the eighth word
292 addi r6,r6,32 ; Point to the next
293 stw r8,20(r4) ; Save the sixth word
294 stw r9,24(r4) ; Save the seventh word
295 stw r11,28(r4) ; Save the eighth word
296 addi r4,r4,32 ; Bump sink
297 bgt+ nxtline ; Do the next line, if any...
300 ; Move backend quadword
302 backend: bf 27,noquad ; No quad to do...
303 lwz r7,0(r6) ; Get the first word
304 lwz r8,4(r6) ; Get the second word
305 lwz r9,8(r6) ; Get the third word
306 lwz r11,12(r6) ; Get the fourth word
307 stw r7,0(r4) ; Save the first word
308 addi r6,r6,16 ; Point to the next
309 stw r8,4(r4) ; Save the second word
310 stw r9,8(r4) ; Save the third word
311 stw r11,12(r4) ; Save the fourth word
312 addi r4,r4,16 ; Bump sink
314 ; Move backend double
316 noquad: bf 28,nodouble ; No double to do...
317 lwz r7,0(r6) ; Get the first word
318 lwz r8,4(r6) ; Get the second word
319 addi r6,r6,8 ; Point to the next
320 stw r7,0(r4) ; Save the first word
321 stw r8,4(r4) ; Save the second word
322 addi r4,r4,8 ; Bump sink
326 nodouble: bf 29,noword ; No word to do...
327 lwz r7,0(r6) ; Get the word
328 addi r6,r6,4 ; Point to the next
329 stw r7,0(r4) ; Save the word
330 addi r4,r4,4 ; Bump sink
332 ; Move backend halfword
334 noword: bf 30,nohalf ; No halfword to do...
335 lhz r7,0(r6) ; Get the halfword
336 addi r6,r6,2 ; Point to the next
337 sth r7,0(r4) ; Save the halfword
338 addi r4,r4,2 ; Bump sink
342 nohalf: bf 31,bcpydone ; Leave cuz we are all done...
343 lbz r7,0(r6) ; Get the byte
344 stb r7,0(r4) ; Save the single
346 bcpydone: bflr fixxlate ; Leave now if we do not need to fix translation...
347 mfmsr r9 ; Get the MSR
348 ori r9,r9,lo16(MASK(MSR_DR)) ; Turn data translation on
349 mtmsr r9 ; Just do it
350 isync ; Hang in there
351 blr ; Leave cuz we are all done...
354 ; 0123456789ABCDEF0123456789ABCDEF
355 ; 0123456789ABCDEF0123456789ABCDEF
364 ; Here is where we handle a forward overlapping move. These will be slow
365 ; because we can not kill the cache of the destination until after we have
366 ; loaded/saved the source area. Also, because reading memory backwards is
367 ; slower when the cache line needs to be loaded because the critical
368 ; doubleword is loaded first, i.e., the last, then it goes back to the first,
369 ; and on in order. That means that when we are at the second to last DW we
370 ; have to wait until the whole line is in cache before we can proceed.
373 fwdovrlap: add r4,r5,r4 ; Point past the last sink byte
374 add r6,r5,r6 ; Point past the last source byte
375 and r0,r4,r8 ; Apply movement limit
376 li r12,-1 ; Make sure we touch in the actual line
377 mtcrf 3,r0 ; Figure out the best way to move backwards
378 dcbt r12,r6 ; Touch in the last line of source
379 rlwinm. r0,r0,0,27,31 ; Calculate the length to adjust to cache boundary
380 dcbtst r12,r4 ; Touch in the last line of the sink
381 beq- balline ; Aready on cache line boundary
383 sub r5,r5,r0 ; Precaculate move length left after alignment
385 bf 31,balhalf ; No single byte to do...
386 lbz r7,-1(r6) ; Get the byte
387 subi r6,r6,1 ; Point to the next
388 stb r7,-1(r4) ; Save the single
389 subi r4,r4,1 ; Bump sink
391 ; Sink is halfword aligned here
393 balhalf: bf 30,balword ; No halfword to do...
394 lhz r7,-2(r6) ; Get the halfword
395 subi r6,r6,2 ; Point to the next
396 sth r7,-2(r4) ; Save the halfword
397 subi r4,r4,2 ; Bump sink
399 ; Sink is word aligned here
401 balword: bf 29,baldouble ; No word to do...
402 lwz r7,-4(r6) ; Get the word
403 subi r6,r6,4 ; Point to the next
404 stw r7,-4(r4) ; Save the word
405 subi r4,r4,4 ; Bump sink
407 ; Sink is double aligned here
409 baldouble: bf 28,balquad ; No double to do...
410 lwz r7,-8(r6) ; Get the first word
411 lwz r8,-4(r6) ; Get the second word
412 subi r6,r6,8 ; Point to the next
413 stw r7,-8(r4) ; Save the first word
414 stw r8,-4(r4) ; Save the second word
415 subi r4,r4,8 ; Bump sink
417 ; Sink is quadword aligned here
419 balquad: bf 27,balline ; No quad to do...
420 lwz r7,-16(r6) ; Get the first word
421 lwz r8,-12(r6) ; Get the second word
422 lwz r9,-8(r6) ; Get the third word
423 lwz r11,-4(r6) ; Get the fourth word
424 stw r7,-16(r4) ; Save the first word
425 subi r6,r6,16 ; Point to the next
426 stw r8,-12(r4) ; Save the second word
427 stw r9,-8(r4) ; Save the third word
428 stw r11,-4(r4) ; Save the fourth word
429 subi r4,r4,16 ; Bump sink
431 ; Sink is line aligned here
433 balline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move
434 mtcrf 3,r5 ; Make branch mask for backend partial moves
435 beq- bbackend ; No full lines to move
437 stwu r1,-8(r1) ; Dummy stack for MacOS
438 stw r2,4(r1) ; Save RTOC
442 ; Registers in use: R0, R1, R3, R4, R5, R6
443 ; Registers not in use: R2, R7, R8, R9, R10, R11, R12 - Ok, we can make another free for 8 of them
445 bnxtline: subic. r0,r0,1 ; Account for the line now
447 lwz r7,-32(r6) ; Get the first word
448 lwz r5,-28(r6) ; Get the second word
449 lwz r2,-24(r6) ; Get the third word
450 lwz r12,-20(r6) ; Get the third word
451 lwz r11,-16(r6) ; Get the fifth word
452 lwz r10,-12(r6) ; Get the sixth word
453 lwz r9,-8(r6) ; Get the seventh word
454 lwz r8,-4(r6) ; Get the eighth word
455 subi r6,r6,32 ; Point to the next
457 stw r7,-32(r4) ; Get the first word
458 ble- bnotouch ; Last time, skip touch of source...
459 dcbt br0,r6 ; Touch in next source line
461 bnotouch: stw r5,-28(r4) ; Get the second word
462 stw r2,-24(r4) ; Get the third word
463 stw r12,-20(r4) ; Get the third word
464 stw r11,-16(r4) ; Get the fifth word
465 stw r10,-12(r4) ; Get the sixth word
466 stw r9,-8(r4) ; Get the seventh word
467 stw r8,-4(r4) ; Get the eighth word
468 subi r4,r4,32 ; Bump sink
470 bgt+ bnxtline ; Do the next line, if any...
472 lwz r2,4(r1) ; Restore RTOC
473 lwz r1,0(r1) ; Pop dummy stack
477 ; Note: We touched these lines in at the beginning
480 ; Move backend quadword
482 bbackend: bf 27,bnoquad ; No quad to do...
483 lwz r7,-16(r6) ; Get the first word
484 lwz r8,-12(r6) ; Get the second word
485 lwz r9,-8(r6) ; Get the third word
486 lwz r11,-4(r6) ; Get the fourth word
487 stw r7,-16(r4) ; Save the first word
488 subi r6,r6,16 ; Point to the next
489 stw r8,-12(r4) ; Save the second word
490 stw r9,-8(r4) ; Save the third word
491 stw r11,-4(r4) ; Save the fourth word
492 subi r4,r4,16 ; Bump sink
494 ; Move backend double
496 bnoquad: bf 28,bnodouble ; No double to do...
497 lwz r7,-8(r6) ; Get the first word
498 lwz r8,-4(r6) ; Get the second word
499 subi r6,r6,8 ; Point to the next
500 stw r7,-8(r4) ; Save the first word
501 stw r8,-4(r4) ; Save the second word
502 subi r4,r4,8 ; Bump sink
506 bnodouble: bf 29,bnoword ; No word to do...
507 lwz r7,-4(r6) ; Get the word
508 subi r6,r6,4 ; Point to the next
509 stw r7,-4(r4) ; Save the word
510 subi r4,r4,4 ; Bump sink
512 ; Move backend halfword
514 bnoword: bf 30,bnohalf ; No halfword to do...
515 lhz r7,-2(r6) ; Get the halfword
516 subi r6,r6,2 ; Point to the next
517 sth r7,-2(r4) ; Save the halfword
518 subi r4,r4,2 ; Bump sink
522 bnohalf: bflr 31 ; Leave cuz we are all done...
523 lbz r7,-1(r6) ; Get the byte
524 stb r7,-1(r4) ; Save the single
526 blr ; Leave cuz we are all done...