/* * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * * The contents of this file constitute Original Code as defined in and * are subject to the Apple Public Source License Version 1.1 (the * "License"). You may not use this file except in compliance with the * License. Please obtain a copy of the License at * http://www.apple.com/publicsource and read it before using this file. * * This Original Code and all software distributed under the License are * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the * License for the specific language governing rights and limitations * under the License. * * @APPLE_LICENSE_HEADER_END@ */ ; ; Copy bytes of data around. handles overlapped data. ; ; Change this to use Altivec later on, and maybe floating point. ; ; #include #include ; Use CR5_lt to indicate non-cached #define noncache 20 ; Use CR5_gt to indicate that we need to turn data translation back on #define fixxlate 21 ; Use CR5_eq to indicate that we need to invalidate bats #define killbats 22 ; ; bcopy_nc(from, to, nbytes) ; ; bcopy_nc operates on non-cached memory so we can not use any kind ; of cache instructions. ; .align 5 .globl EXT(bcopy_nc) LEXT(bcopy_nc) crset noncache ; Set non-cached b bcpswap ; ; void bcopy_physvir(from, to, nbytes) ; Attempt to copy physically addressed memory with translation on if conditions are met. ; Otherwise do a normal bcopy_phys. ; ; Rules are: neither source nor destination can cross a page. ; No accesses above the 2GB line (I/O or ROM). ; ; Interrupts must be disabled throughout the copy when this is called ; To do this, we build a ; 128 DBAT for both the source and sink. If both are the same, only one is ; loaded. We do not touch the IBATs, so there is no issue if either physical page ; address is the same as the virtual address of the instructions we are executing. ; ; At the end, we invalidate the used DBATs and reenable interrupts. ; ; Note, this one will not work in user state ; .align 5 .globl EXT(bcopy_physvir) LEXT(bcopy_physvir) addic. r0,r5,-1 ; Get length - 1 add r11,r3,r0 ; Point to last byte of sink cmplw cr1,r3,r4 ; Does source == sink? add r12,r4,r0 ; Point to last byte of source bltlr- ; Bail if length is 0 or way too big xor r7,r11,r3 ; See if we went to next page xor r8,r12,r4 ; See if we went to next page or r0,r7,r8 ; Combine wrap li r9,((PTE_WIMG_CB_CACHED_COHERENT<<3)|2) ; Set default attributes rlwinm. r0,r0,0,0,19 ; Did we overflow a page? li r7,2 ; Set validity flags li r8,2 ; Set validity flags bne- EXT(bcopy_phys) ; Overflowed page, do normal physical copy... crset killbats ; Remember to trash BATs on the way out rlwimi r11,r9,0,15,31 ; Set sink lower DBAT value rlwimi r12,r9,0,15,31 ; Set source lower DBAT value rlwimi r7,r11,0,0,14 ; Set sink upper DBAT value rlwimi r8,r12,0,0,14 ; Set source upper DBAT value cmplw cr1,r11,r12 ; See if sink and source are same block sync mtdbatl 0,r11 ; Set sink lower DBAT mtdbatu 0,r7 ; Set sink upper DBAT beq- cr1,bcpvsame ; Source and sink are in same block mtdbatl 1,r12 ; Set source lower DBAT mtdbatu 1,r8 ; Set source upper DBAT bcpvsame: mr r6,r3 ; Set source crclr noncache ; Set cached b copyit ; Go copy it... ; ; void bcopy_phys(from, to, nbytes) ; Turns off data translation before the copy. Note, this one will ; not work in user state ; .align 5 .globl EXT(bcopy_phys) LEXT(bcopy_phys) mfmsr r9 ; Get the MSR crclr noncache ; Set cached rlwinm. r8,r9,0,MSR_DR_BIT,MSR_DR_BIT ; Is data translation on? cmplw cr1,r4,r3 ; Compare "to" and "from" cmplwi cr7,r5,0 ; Check if we have a 0 length mr r6,r3 ; Set source beqlr- cr1 ; Bail if "to" and "from" are the same xor r9,r9,r8 ; Turn off translation if it is on (should be) beqlr- cr7 ; Bail if length is 0 rlwinm r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off crclr killbats ; Make sure we do not trash BATs on the way out rlwinm r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off mtmsr r9 ; Set DR translation off isync ; Wait for it crnot fixxlate,cr0_eq ; Remember to turn on translation if it was b copyit ; Go copy it... ; ; void bcopy(from, to, nbytes) ; .align 5 .globl EXT(bcopy) LEXT(bcopy) crclr noncache ; Set cached bcpswap: cmplw cr1,r4,r3 ; Compare "to" and "from" mr. r5,r5 ; Check if we have a 0 length mr r6,r3 ; Set source crclr killbats ; Make sure we do not trash BATs on the way out beqlr- cr1 ; Bail if "to" and "from" are the same beqlr- ; Bail if length is 0 crclr fixxlate ; Set translation already ok b copyit ; Go copy it... ; ; When we move the memory, forward overlays must be handled. We ; also can not use the cache instructions if we are from bcopy_nc. ; We need to preserve R3 because it needs to be returned for memcpy. ; We can be interrupted and lose control here. ; ; There is no stack, so in order to used floating point, we would ; need to take the FP exception. Any potential gains by using FP ; would be more than eaten up by this. ; ; Later, we should used Altivec for large moves. ; .align 5 .globl EXT(memcpy) LEXT(memcpy) cmplw cr1,r3,r4 ; "to" and "from" the same? mr r6,r4 ; Set the "from" mr. r5,r5 ; Length zero? crclr noncache ; Set cached mr r4,r3 ; Set the "to" crclr fixxlate ; Set translation already ok beqlr- cr1 ; "to" and "from" are the same beqlr- ; Length is 0 crclr killbats ; Make sure we do not trash BATs on the way out copyit: sub r12,r4,r6 ; Get potential overlap (negative if backward move) lis r8,0x7FFF ; Start up a mask srawi r11,r12,31 ; Propagate the sign bit dcbt br0,r6 ; Touch in the first source line cntlzw r7,r5 ; Get the highest power of 2 factor of the length ori r8,r8,0xFFFF ; Make limit 0x7FFFFFFF xor r9,r12,r11 ; If sink - source was negative, invert bits srw r8,r8,r7 ; Get move length limitation sub r9,r9,r11 ; If sink - source was negative, add 1 and get absolute value cmplw r12,r5 ; See if we actually forward overlap cmplwi cr7,r9,32 ; See if at least a line between source and sink dcbtst br0,r4 ; Touch in the first sink line cmplwi cr1,r5,32 ; Are we moving more than a line? cror noncache,noncache,28 ; Set to not DCBZ output line if not enough space blt- fwdovrlap ; This is a forward overlapping area, handle it... ; ; R4 = sink ; R5 = length ; R6 = source ; ; ; Here we figure out how much we have to move to get the sink onto a ; cache boundary. If we can, and there are still more that 32 bytes ; left to move, we can really speed things up by DCBZing the sink line. ; We can not do this if noncache is set because we will take an ; alignment exception. neg r0,r4 ; Get the number of bytes to move to align to a line boundary rlwinm. r0,r0,0,27,31 ; Clean it up and test it and r0,r0,r8 ; limit to the maximum front end move mtcrf 3,r0 ; Make branch mask for partial moves sub r5,r5,r0 ; Set the length left to move beq alline ; Already on a line... bf 31,alhalf ; No single byte to do... lbz r7,0(r6) ; Get the byte addi r6,r6,1 ; Point to the next stb r7,0(r4) ; Save the single addi r4,r4,1 ; Bump sink ; Sink is halfword aligned here alhalf: bf 30,alword ; No halfword to do... lhz r7,0(r6) ; Get the halfword addi r6,r6,2 ; Point to the next sth r7,0(r4) ; Save the halfword addi r4,r4,2 ; Bump sink ; Sink is word aligned here alword: bf 29,aldouble ; No word to do... lwz r7,0(r6) ; Get the word addi r6,r6,4 ; Point to the next stw r7,0(r4) ; Save the word addi r4,r4,4 ; Bump sink ; Sink is double aligned here aldouble: bf 28,alquad ; No double to do... lwz r7,0(r6) ; Get the first word lwz r8,4(r6) ; Get the second word addi r6,r6,8 ; Point to the next stw r7,0(r4) ; Save the first word stw r8,4(r4) ; Save the second word addi r4,r4,8 ; Bump sink ; Sink is quadword aligned here alquad: bf 27,alline ; No quad to do... lwz r7,0(r6) ; Get the first word lwz r8,4(r6) ; Get the second word lwz r9,8(r6) ; Get the third word stw r7,0(r4) ; Save the first word lwz r11,12(r6) ; Get the fourth word addi r6,r6,16 ; Point to the next stw r8,4(r4) ; Save the second word stw r9,8(r4) ; Save the third word stw r11,12(r4) ; Save the fourth word addi r4,r4,16 ; Bump sink ; Sink is line aligned here alline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move mtcrf 3,r5 ; Make branch mask for backend partial moves rlwinm r11,r5,0,0,26 ; Get number of bytes we are going to move beq- backend ; No full lines to move sub r5,r5,r11 ; Calculate the residual li r10,96 ; Stride for touch ahead nxtline: subic. r0,r0,1 ; Account for the line now bt- noncache,skipz ; Skip if we are not cached... dcbz br0,r4 ; Blow away the whole line because we are replacing it dcbt r6,r10 ; Touch ahead a bit skipz: lwz r7,0(r6) ; Get the first word lwz r8,4(r6) ; Get the second word lwz r9,8(r6) ; Get the third word stw r7,0(r4) ; Save the first word lwz r11,12(r6) ; Get the fourth word stw r8,4(r4) ; Save the second word lwz r7,16(r6) ; Get the fifth word stw r9,8(r4) ; Save the third word lwz r8,20(r6) ; Get the sixth word stw r11,12(r4) ; Save the fourth word lwz r9,24(r6) ; Get the seventh word stw r7,16(r4) ; Save the fifth word lwz r11,28(r6) ; Get the eighth word addi r6,r6,32 ; Point to the next stw r8,20(r4) ; Save the sixth word stw r9,24(r4) ; Save the seventh word stw r11,28(r4) ; Save the eighth word addi r4,r4,32 ; Bump sink bgt+ nxtline ; Do the next line, if any... ; Move backend quadword backend: bf 27,noquad ; No quad to do... lwz r7,0(r6) ; Get the first word lwz r8,4(r6) ; Get the second word lwz r9,8(r6) ; Get the third word lwz r11,12(r6) ; Get the fourth word stw r7,0(r4) ; Save the first word addi r6,r6,16 ; Point to the next stw r8,4(r4) ; Save the second word stw r9,8(r4) ; Save the third word stw r11,12(r4) ; Save the fourth word addi r4,r4,16 ; Bump sink ; Move backend double noquad: bf 28,nodouble ; No double to do... lwz r7,0(r6) ; Get the first word lwz r8,4(r6) ; Get the second word addi r6,r6,8 ; Point to the next stw r7,0(r4) ; Save the first word stw r8,4(r4) ; Save the second word addi r4,r4,8 ; Bump sink ; Move backend word nodouble: bf 29,noword ; No word to do... lwz r7,0(r6) ; Get the word addi r6,r6,4 ; Point to the next stw r7,0(r4) ; Save the word addi r4,r4,4 ; Bump sink ; Move backend halfword noword: bf 30,nohalf ; No halfword to do... lhz r7,0(r6) ; Get the halfword addi r6,r6,2 ; Point to the next sth r7,0(r4) ; Save the halfword addi r4,r4,2 ; Bump sink ; Move backend byte nohalf: bf 31,bcpydone ; Leave cuz we are all done... lbz r7,0(r6) ; Get the byte stb r7,0(r4) ; Save the single bcpydone: bt- killbats,bcclrbat ; Jump if we need to clear bats... bflr fixxlate ; Leave now if we do not need to fix translation... mfmsr r9 ; Get the MSR ori r9,r9,lo16(MASK(MSR_DR)) ; Turn data translation on rlwinm r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off rlwinm r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off mtmsr r9 ; Just do it isync ; Hang in there blr ; Leave cuz we are all done... bcclrbat: li r0,0 ; Get set to invalidate upper half sync ; Make sure all is well mtdbatu 0,r0 ; Clear sink upper DBAT mtdbatu 1,r0 ; Clear source upper DBAT sync isync blr ; ; 0123456789ABCDEF0123456789ABCDEF ; 0123456789ABCDEF0123456789ABCDEF ; F ; DE ; 9ABC ; 12345678 ; 123456789ABCDEF0 ; 0 ; ; Here is where we handle a forward overlapping move. These will be slow ; because we can not kill the cache of the destination until after we have ; loaded/saved the source area. Also, because reading memory backwards is ; slower when the cache line needs to be loaded because the critical ; doubleword is loaded first, i.e., the last, then it goes back to the first, ; and on in order. That means that when we are at the second to last DW we ; have to wait until the whole line is in cache before we can proceed. ; fwdovrlap: add r4,r5,r4 ; Point past the last sink byte add r6,r5,r6 ; Point past the last source byte and r0,r4,r8 ; Apply movement limit li r12,-1 ; Make sure we touch in the actual line mtcrf 3,r0 ; Figure out the best way to move backwards dcbt r12,r6 ; Touch in the last line of source rlwinm. r0,r0,0,27,31 ; Calculate the length to adjust to cache boundary dcbtst r12,r4 ; Touch in the last line of the sink beq- balline ; Aready on cache line boundary sub r5,r5,r0 ; Precaculate move length left after alignment bf 31,balhalf ; No single byte to do... lbz r7,-1(r6) ; Get the byte subi r6,r6,1 ; Point to the next stb r7,-1(r4) ; Save the single subi r4,r4,1 ; Bump sink ; Sink is halfword aligned here balhalf: bf 30,balword ; No halfword to do... lhz r7,-2(r6) ; Get the halfword subi r6,r6,2 ; Point to the next sth r7,-2(r4) ; Save the halfword subi r4,r4,2 ; Bump sink ; Sink is word aligned here balword: bf 29,baldouble ; No word to do... lwz r7,-4(r6) ; Get the word subi r6,r6,4 ; Point to the next stw r7,-4(r4) ; Save the word subi r4,r4,4 ; Bump sink ; Sink is double aligned here baldouble: bf 28,balquad ; No double to do... lwz r7,-8(r6) ; Get the first word lwz r8,-4(r6) ; Get the second word subi r6,r6,8 ; Point to the next stw r7,-8(r4) ; Save the first word stw r8,-4(r4) ; Save the second word subi r4,r4,8 ; Bump sink ; Sink is quadword aligned here balquad: bf 27,balline ; No quad to do... lwz r7,-16(r6) ; Get the first word lwz r8,-12(r6) ; Get the second word lwz r9,-8(r6) ; Get the third word lwz r11,-4(r6) ; Get the fourth word stw r7,-16(r4) ; Save the first word subi r6,r6,16 ; Point to the next stw r8,-12(r4) ; Save the second word stw r9,-8(r4) ; Save the third word stw r11,-4(r4) ; Save the fourth word subi r4,r4,16 ; Bump sink ; Sink is line aligned here balline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move mtcrf 3,r5 ; Make branch mask for backend partial moves beq- bbackend ; No full lines to move ; Registers in use: R0, R1, R3, R4, R5, R6 ; Registers not in use: R2, R7, R8, R9, R10, R11, R12 - Ok, we can make another free for 8 of them bnxtline: subic. r0,r0,1 ; Account for the line now lwz r7,-32(r6) ; Get the first word lwz r5,-28(r6) ; Get the second word lwz r2,-24(r6) ; Get the third word lwz r12,-20(r6) ; Get the third word lwz r11,-16(r6) ; Get the fifth word lwz r10,-12(r6) ; Get the sixth word lwz r9,-8(r6) ; Get the seventh word lwz r8,-4(r6) ; Get the eighth word subi r6,r6,32 ; Point to the next stw r7,-32(r4) ; Get the first word ble- bnotouch ; Last time, skip touch of source... dcbt br0,r6 ; Touch in next source line bnotouch: stw r5,-28(r4) ; Get the second word stw r2,-24(r4) ; Get the third word stw r12,-20(r4) ; Get the third word stw r11,-16(r4) ; Get the fifth word stw r10,-12(r4) ; Get the sixth word stw r9,-8(r4) ; Get the seventh word stw r8,-4(r4) ; Get the eighth word subi r4,r4,32 ; Bump sink bgt+ bnxtline ; Do the next line, if any... ; ; Note: We touched these lines in at the beginning ; ; Move backend quadword bbackend: bf 27,bnoquad ; No quad to do... lwz r7,-16(r6) ; Get the first word lwz r8,-12(r6) ; Get the second word lwz r9,-8(r6) ; Get the third word lwz r11,-4(r6) ; Get the fourth word stw r7,-16(r4) ; Save the first word subi r6,r6,16 ; Point to the next stw r8,-12(r4) ; Save the second word stw r9,-8(r4) ; Save the third word stw r11,-4(r4) ; Save the fourth word subi r4,r4,16 ; Bump sink ; Move backend double bnoquad: bf 28,bnodouble ; No double to do... lwz r7,-8(r6) ; Get the first word lwz r8,-4(r6) ; Get the second word subi r6,r6,8 ; Point to the next stw r7,-8(r4) ; Save the first word stw r8,-4(r4) ; Save the second word subi r4,r4,8 ; Bump sink ; Move backend word bnodouble: bf 29,bnoword ; No word to do... lwz r7,-4(r6) ; Get the word subi r6,r6,4 ; Point to the next stw r7,-4(r4) ; Save the word subi r4,r4,4 ; Bump sink ; Move backend halfword bnoword: bf 30,bnohalf ; No halfword to do... lhz r7,-2(r6) ; Get the halfword subi r6,r6,2 ; Point to the next sth r7,-2(r4) ; Save the halfword subi r4,r4,2 ; Bump sink ; Move backend byte bnohalf: bflr 31 ; Leave cuz we are all done... lbz r7,-1(r6) ; Get the byte stb r7,-1(r4) ; Save the single b bcpydone ; Go exit cuz we are all done...