/* * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this * file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ */ #include #include #include #include #include #include #define INSTRUMENT 0 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> /* * void pmap_zero_page(vm_offset_t pa) * * Zero a page of physical memory. This routine runs in 32 or 64-bit mode, * and handles 32 and 128-byte cache lines. */ .align 5 .globl EXT(pmap_zero_page) LEXT(pmap_zero_page) mflr r12 // save return address bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10 mtlr r12 // restore return address andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size subfic r4,r9,PPC_PGBYTES // r4 <- starting offset in page bt++ pf64Bitb,page0S4 // Go do the big guys... slwi r3,r3,12 // get page address from page num b page_zero_1 // Jump to line aligned loop... .align 5 nop nop nop nop nop nop nop page0S4: sldi r3,r3,12 // get page address from page num page_zero_1: // loop zeroing cache lines sub. r5,r4,r9 // more to go? dcbz128 r3,r4 // zero either 32 or 128 bytes sub r4,r5,r9 // generate next offset dcbz128 r3,r5 bne-- page_zero_1 b EXT(ml_restore) // restore MSR and do the isync //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> /* void * phys_copy(src, dst, bytecount) * addr64_t src; * addr64_t dst; * int bytecount * * This routine will copy bytecount bytes from physical address src to physical * address dst. It runs in 64-bit mode if necessary, but does not handle * overlap or make any attempt to be optimal. Length must be a signed word. * Not performance critical. */ .align 5 .globl EXT(phys_copy) LEXT(phys_copy) rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg mflr r12 // get return address rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits rlwinm r4,r5,0,1,0 ; Duplicate high half of long long paddr into top of reg bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10 rlwimi r4,r6,0,0,31 ; Combine bottom of long long to full 64-bits mtlr r12 // restore return address subic. r5,r7,4 // a word to copy? b phys_copy_2 .align 5 phys_copy_1: // loop copying words subic. r5,r5,4 // more to go? lwz r0,0(r3) addi r3,r3,4 stw r0,0(r4) addi r4,r4,4 phys_copy_2: bge phys_copy_1 addic. r5,r5,4 // restore count ble phys_copy_4 // no more // Loop is aligned here phys_copy_3: // loop copying bytes subic. r5,r5,1 // more to go? lbz r0,0(r3) addi r3,r3,1 stb r0,0(r4) addi r4,r4,1 bgt phys_copy_3 phys_copy_4: b EXT(ml_restore) // restore MSR and do the isync //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> /* void * pmap_copy_page(src, dst) * ppnum_t src; * ppnum_t dst; * * This routine will copy the physical page src to physical page dst * * This routine assumes that the src and dst are page numbers and that the * destination is cached. It runs on 32 and 64 bit processors, with and * without altivec, and with 32 and 128 byte cache lines. * We also must assume that no-one will be executing within the destination * page, and that this will be used for paging. Because this * is a common routine, we have tuned loops for each processor class. * */ #define kSFSize (FM_SIZE+160) ENTRY(pmap_copy_page, TAG_NO_FRAME_USED) lis r2,hi16(MASK(MSR_VEC)) ; Get the vector flag mflr r0 // get return ori r2,r2,lo16(MASK(MSR_FP)) ; Add the FP flag stw r0,8(r1) // save stwu r1,-kSFSize(r1) // set up a stack frame for VRs or FPRs mfmsr r11 // save MSR at entry mfsprg r10,2 // get feature flags andc r11,r11,r2 // Clear out vec and fp ori r2,r2,lo16(MASK(MSR_EE)) // Get EE on also andc r2,r11,r2 // Clear out EE as well mtcrf 0x02,r10 // we need to test pf64Bit ori r2,r2,MASK(MSR_FP) // must enable FP for G3... mtcrf 0x80,r10 // we need to test pfAltivec too oris r2,r2,hi16(MASK(MSR_VEC)) // enable altivec for G4 (ignored if G3) mtmsr r2 // turn EE off, FP and VEC on isync bt++ pf64Bitb,pmap_copy_64 // skip if 64-bit processor (only they take hint) slwi r3,r3,12 // get page address from page num slwi r4,r4,12 // get page address from page num rlwinm r12,r2,0,MSR_DR_BIT+1,MSR_DR_BIT-1 // get ready to turn off DR bt pfAltivecb,pmap_copy_g4 // altivec but not 64-bit means G4 // G3 -- copy using FPRs stfd f0,FM_SIZE+0(r1) // save the 4 FPRs we use to copy stfd f1,FM_SIZE+8(r1) li r5,PPC_PGBYTES/32 // count of cache lines in a page stfd f2,FM_SIZE+16(r1) mtctr r5 stfd f3,FM_SIZE+24(r1) mtmsr r12 // turn off DR after saving FPRs on stack isync pmap_g3_copy_loop: // loop over 32-byte cache lines dcbz 0,r4 // avoid read of dest line lfd f0,0(r3) lfd f1,8(r3) lfd f2,16(r3) lfd f3,24(r3) addi r3,r3,32 stfd f0,0(r4) stfd f1,8(r4) stfd f2,16(r4) stfd f3,24(r4) dcbst 0,r4 // flush dest line to RAM addi r4,r4,32 bdnz pmap_g3_copy_loop sync // wait for stores to take subi r4,r4,PPC_PGBYTES // restore ptr to destintation page li r6,PPC_PGBYTES-32 // point to last line in page pmap_g3_icache_flush: subic. r5,r6,32 // more to go? icbi r4,r6 // flush another line in icache subi r6,r5,32 // get offset to next line icbi r4,r5 bne pmap_g3_icache_flush sync mtmsr r2 // turn DR back on isync lfd f0,FM_SIZE+0(r1) // restore the FPRs lfd f1,FM_SIZE+8(r1) lfd f2,FM_SIZE+16(r1) lfd f3,FM_SIZE+24(r1) b pmap_g4_restore // restore MSR and done // G4 -- copy using VRs pmap_copy_g4: // r2=(MSR-EE), r12=(r2-DR), r10=features, r11=old MSR la r9,FM_SIZE+16(r1) // place where we save VRs to r9 li r5,16 // load x-form offsets into r5-r9 li r6,32 // another offset stvx v0,0,r9 // save some VRs so we can use to copy li r7,48 // another offset stvx v1,r5,r9 li r0,PPC_PGBYTES/64 // we loop over 64-byte chunks stvx v2,r6,r9 mtctr r0 li r8,96 // get look-ahead for touch stvx v3,r7,r9 li r9,128 mtmsr r12 // now we've saved VRs on stack, turn off DR isync // wait for it to happen b pmap_g4_copy_loop .align 5 // align inner loops pmap_g4_copy_loop: // loop over 64-byte chunks dcbt r3,r8 // touch 3 lines ahead nop // avoid a 17-word loop... dcbt r3,r9 // touch 4 lines ahead nop // more padding dcba 0,r4 // avoid pre-fetch of 1st dest line lvx v0,0,r3 // offset 0 lvx v1,r5,r3 // offset 16 lvx v2,r6,r3 // offset 32 lvx v3,r7,r3 // offset 48 addi r3,r3,64 dcba r6,r4 // avoid pre-fetch of 2nd line stvx v0,0,r4 // offset 0 stvx v1,r5,r4 // offset 16 stvx v2,r6,r4 // offset 32 stvx v3,r7,r4 // offset 48 dcbf 0,r4 // push line 1 dcbf r6,r4 // and line 2 addi r4,r4,64 bdnz pmap_g4_copy_loop sync // wait for stores to take subi r4,r4,PPC_PGBYTES // restore ptr to destintation page li r8,PPC_PGBYTES-32 // point to last line in page pmap_g4_icache_flush: subic. r9,r8,32 // more to go? icbi r4,r8 // flush from icache subi r8,r9,32 // get offset to next line icbi r4,r9 bne pmap_g4_icache_flush sync mtmsr r2 // turn DR back on isync la r9,FM_SIZE+16(r1) // get base of VR save area lvx v0,0,r9 // restore the VRs lvx v1,r5,r9 lvx v2,r6,r9 lvx v3,r7,r9 pmap_g4_restore: // r11=MSR mtmsr r11 // turn EE on, VEC and FR off isync // wait for it to happen addi r1,r1,kSFSize // pop off our stack frame lwz r0,8(r1) // restore return address mtlr r0 blr // 64-bit/128-byte processor: copy using VRs pmap_copy_64: // r10=features, r11=old MSR sldi r3,r3,12 // get page address from page num sldi r4,r4,12 // get page address from page num la r9,FM_SIZE+16(r1) // get base of VR save area li r5,16 // load x-form offsets into r5-r9 li r6,32 // another offset bf pfAltivecb,pmap_novmx_copy // altivec suppressed... stvx v0,0,r9 // save 8 VRs so we can copy wo bubbles stvx v1,r5,r9 li r7,48 // another offset li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks stvx v2,r6,r9 stvx v3,r7,r9 addi r9,r9,64 // advance base ptr so we can store another 4 mtctr r0 li r0,MASK(MSR_DR) // get DR bit stvx v4,0,r9 stvx v5,r5,r9 andc r12,r2,r0 // turn off DR bit li r0,1 // get a 1 to slam into SF stvx v6,r6,r9 stvx v7,r7,r9 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0) li r8,-128 // offset so we can reach back one line mtmsrd r12 // now we've saved VRs, turn DR off and SF on isync // wait for it to happen dcbt128 0,r3,1 // start a forward stream b pmap_64_copy_loop .align 5 // align inner loops pmap_64_copy_loop: // loop over 128-byte chunks dcbz128 0,r4 // avoid read of destination line lvx v0,0,r3 // offset 0 lvx v1,r5,r3 // offset 16 lvx v2,r6,r3 // offset 32 lvx v3,r7,r3 // offset 48 addi r3,r3,64 // don't have enough GPRs so add 64 2x lvx v4,0,r3 // offset 64 lvx v5,r5,r3 // offset 80 lvx v6,r6,r3 // offset 96 lvx v7,r7,r3 // offset 112 addi r3,r3,64 stvx v0,0,r4 // offset 0 stvx v1,r5,r4 // offset 16 stvx v2,r6,r4 // offset 32 stvx v3,r7,r4 // offset 48 addi r4,r4,64 stvx v4,0,r4 // offset 64 stvx v5,r5,r4 // offset 80 stvx v6,r6,r4 // offset 96 stvx v7,r7,r4 // offset 112 addi r4,r4,64 dcbf r8,r4 // flush the line we just wrote bdnz pmap_64_copy_loop sync // wait for stores to take subi r4,r4,PPC_PGBYTES // restore ptr to destintation page li r8,PPC_PGBYTES-128 // point to last line in page pmap_64_icache_flush: subic. r9,r8,128 // more to go? icbi r4,r8 // flush from icache subi r8,r9,128 // get offset to next line icbi r4,r9 bne pmap_64_icache_flush sync mtmsrd r2 // turn DR back on, SF off isync la r9,FM_SIZE+16(r1) // get base address of VR save area on stack lvx v0,0,r9 // restore the VRs lvx v1,r5,r9 lvx v2,r6,r9 lvx v3,r7,r9 addi r9,r9,64 lvx v4,0,r9 lvx v5,r5,r9 lvx v6,r6,r9 lvx v7,r7,r9 b pmap_g4_restore // restore lower half of MSR and return // // Copy on 64-bit without VMX // pmap_novmx_copy: li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks mtctr r0 li r0,MASK(MSR_DR) // get DR bit andc r12,r2,r0 // turn off DR bit li r0,1 // get a 1 to slam into SF rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0) mtmsrd r12 // now we've saved VRs, turn DR off and SF on isync // wait for it to happen dcbt128 0,r3,1 // start a forward stream pmap_novmx_copy_loop: // loop over 128-byte cache lines dcbz128 0,r4 // avoid read of dest line ld r0,0(r3) // Load half a line ld r12,8(r3) ld r5,16(r3) ld r6,24(r3) ld r7,32(r3) ld r8,40(r3) ld r9,48(r3) ld r10,56(r3) std r0,0(r4) // Store half a line std r12,8(r4) std r5,16(r4) std r6,24(r4) std r7,32(r4) std r8,40(r4) std r9,48(r4) std r10,56(r4) ld r0,64(r3) // Load half a line ld r12,72(r3) ld r5,80(r3) ld r6,88(r3) ld r7,96(r3) ld r8,104(r3) ld r9,112(r3) ld r10,120(r3) addi r3,r3,128 std r0,64(r4) // Store half a line std r12,72(r4) std r5,80(r4) std r6,88(r4) std r7,96(r4) std r8,104(r4) std r9,112(r4) std r10,120(r4) dcbf 0,r4 // flush the line we just wrote addi r4,r4,128 bdnz pmap_novmx_copy_loop sync // wait for stores to take subi r4,r4,PPC_PGBYTES // restore ptr to destintation page li r8,PPC_PGBYTES-128 // point to last line in page pmap_novmx_icache_flush: subic. r9,r8,128 // more to go? icbi r4,r8 // flush from icache subi r8,r9,128 // get offset to next line icbi r4,r9 bne pmap_novmx_icache_flush sync mtmsrd r2 // turn DR back on, SF off isync b pmap_g4_restore // restore lower half of MSR and return //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> // Stack frame format used by copyin, copyout, copyinstr and copyoutstr. // These routines all run both on 32 and 64-bit machines, though because they are called // by the BSD kernel they are always in 32-bit mode when entered. The mapped ptr returned // by MapUserMemoryWindow will be 64 bits however on 64-bit machines. Beware to avoid // using compare instructions on this ptr. This mapped ptr is kept globally in r31, so there // is no need to store or load it, which are mode-dependent operations since it could be // 32 or 64 bits. #define kkFrameSize (FM_SIZE+32) #define kkBufSize (FM_SIZE+0) #define kkCR3 (FM_SIZE+4) #define kkSource (FM_SIZE+8) #define kkDest (FM_SIZE+12) #define kkCountPtr (FM_SIZE+16) #define kkR31Save (FM_SIZE+20) #define kkThrErrJmp (FM_SIZE+24) // nonvolatile CR bits we use as flags in cr3 #define kk64bit 12 #define kkNull 13 #define kkIn 14 #define kkString 15 #define kkZero 15 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> /* * int * copyoutstr(src, dst, maxcount, count) * vm_offset_t src; // r3 * addr64_t dst; // r4 and r5 * vm_size_t maxcount; // r6 * vm_size_t* count; // r7 * * Set *count to the number of bytes copied. */ ENTRY(copyoutstr, TAG_NO_FRAME_USED) mfcr r2,0x10 // save caller's cr3, which we use for flags mr r10,r4 // move high word of 64-bit user address to r10 li r0,0 crset kkString // flag as a string op mr r11,r5 // move low word of 64-bit user address to r11 stw r0,0(r7) // initialize #bytes moved crclr kkIn // flag as copyout b copyJoin //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> /* * int * copyinstr(src, dst, maxcount, count) * addr64_t src; // r3 and r4 * vm_offset_t dst; // r5 * vm_size_t maxcount; // r6 * vm_size_t* count; // r7 * * Set *count to the number of bytes copied * If dst == NULL, don't copy, just count bytes. * Only currently called from klcopyinstr. */ ENTRY(copyinstr, TAG_NO_FRAME_USED) mfcr r2,0x10 // save caller's cr3, which we use for flags cmplwi r5,0 // dst==NULL? mr r10,r3 // move high word of 64-bit user address to r10 li r0,0 crset kkString // flag as a string op mr r11,r4 // move low word of 64-bit user address to r11 crmove kkNull,cr0_eq // remember if (dst==NULL) stw r0,0(r7) // initialize #bytes moved crset kkIn // flag as copyin (rather than copyout) b copyJoin1 // skip over the "crclr kkNull" //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> /* * int * copyout(src, dst, count) * vm_offset_t src; // r3 * addr64_t dst; // r4 and r5 * size_t count; // r6 */ .align 5 .globl EXT(copyout) .globl EXT(copyoutmsg) LEXT(copyout) LEXT(copyoutmsg) #if INSTRUMENT mfspr r12,pmc1 ; INSTRUMENT - saveinstr[12] - Take stamp at copyout stw r12,0x6100+(12*16)+0x0(0) ; INSTRUMENT - Save it mfspr r12,pmc2 ; INSTRUMENT - Get stamp stw r12,0x6100+(12*16)+0x4(0) ; INSTRUMENT - Save it mfspr r12,pmc3 ; INSTRUMENT - Get stamp stw r12,0x6100+(12*16)+0x8(0) ; INSTRUMENT - Save it mfspr r12,pmc4 ; INSTRUMENT - Get stamp stw r12,0x6100+(12*16)+0xC(0) ; INSTRUMENT - Save it #endif mfcr r2,0x10 // save caller's cr3, which we use for flags mr r10,r4 // move high word of 64-bit user address to r10 crclr kkString // not a string version mr r11,r5 // move low word of 64-bit user address to r11 crclr kkIn // flag as copyout b copyJoin //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> /* * int * copyin(src, dst, count) * addr64_t src; // r3 and r4 * vm_offset_t dst; // r5 * size_t count; // r6 */ .align 5 .globl EXT(copyin) .globl EXT(copyinmsg) LEXT(copyin) LEXT(copyinmsg) mfcr r2,0x10 // save caller's cr3, which we use for flags mr r10,r3 // move high word of 64-bit user address to r10 crclr kkString // not a string version mr r11,r4 // move low word of 64-bit user address to r11 crset kkIn // flag as copyin // Common code to handle setup for all the copy variants: // r2 = caller's cr3 // r3 = source if copyout // r5 = dest if copyin // r6 = buffer length or count // r7 = count output ptr (if kkString set) // r10 = high word of 64-bit user-space address (source if copyin, dest if copyout) // r11 = low word of 64-bit user-space address // cr3 = kkIn, kkString, kkNull flags copyJoin: crclr kkNull // (dst==NULL) convention not used with this call copyJoin1: // enter from copyinstr with kkNull set mflr r0 // get return address cmplwi r6,0 // buffer length 0? lis r9,0x1000 // r9 <- 0x10000000 (256MB) stw r0,FM_LR_SAVE(r1) // save return cmplw cr1,r6,r9 // buffer length > 256MB ? mfsprg r8,2 // get the features beq-- copyinout_0 // 0 length is degenerate case stwu r1,-kkFrameSize(r1) // set up stack frame stw r2,kkCR3(r1) // save caller's cr3, which we use for flags mtcrf 0x02,r8 // move pf64Bit to cr6 stw r3,kkSource(r1) // save args across MapUserMemoryWindow stw r5,kkDest(r1) stw r6,kkBufSize(r1) crmove kk64bit,pf64Bitb // remember if this is a 64-bit processor stw r7,kkCountPtr(r1) stw r31,kkR31Save(r1) // we use r31 globally for mapped user ptr li r31,0 // no mapped ptr yet // Handle buffer length > 256MB. This is an error (ENAMETOOLONG) on copyin and copyout. // The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp // the buffer length to 256MB. This isn't an issue if the string is less than 256MB // (as most are!), but if they are >256MB we eventually return ENAMETOOLONG. This restriction // is due to MapUserMemoryWindow; we don't want to consume more than two segments for // the mapping. ble++ cr1,copyin0 // skip if buffer length <= 256MB bf kkString,copyinout_too_big // error if not string op mr r6,r9 // silently clamp buffer length to 256MB stw r9,kkBufSize(r1) // update saved copy too // Set up thread_recover in case we hit an illegal address. copyin0: mfsprg r8,1 // Get the current thread lis r2,hi16(copyinout_error) ori r2,r2,lo16(copyinout_error) lwz r4,THREAD_RECOVER(r8) lwz r3,ACT_VMMAP(r8) // r3 <- vm_map virtual address stw r2,THREAD_RECOVER(r8) stw r4,kkThrErrJmp(r1) // Map user segment into kernel map, turn on 64-bit mode. At this point: // r3 = vm map // r6 = buffer length // r10/r11 = 64-bit user-space ptr (source if copyin, dest if copyout) // // When we call MapUserMemoryWindow, we pass: // r3 = vm map ptr // r4/r5 = 64-bit user space address as an addr64_t mr r4,r10 // copy user ptr into r4/r5 mr r5,r11 #if INSTRUMENT mfspr r12,pmc1 ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace stw r12,0x6100+(13*16)+0x0(0) ; INSTRUMENT - Save it mfspr r12,pmc2 ; INSTRUMENT - Get stamp stw r12,0x6100+(13*16)+0x4(0) ; INSTRUMENT - Save it mfspr r12,pmc3 ; INSTRUMENT - Get stamp stw r12,0x6100+(13*16)+0x8(0) ; INSTRUMENT - Save it mfspr r12,pmc4 ; INSTRUMENT - Get stamp stw r12,0x6100+(13*16)+0xC(0) ; INSTRUMENT - Save it #endif bl EXT(MapUserMemoryWindow) // get r3/r4 <- 64-bit address in kernel map of user operand #if INSTRUMENT mfspr r12,pmc1 ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace stw r12,0x6100+(14*16)+0x0(0) ; INSTRUMENT - Save it mfspr r12,pmc2 ; INSTRUMENT - Get stamp stw r12,0x6100+(14*16)+0x4(0) ; INSTRUMENT - Save it mfspr r12,pmc3 ; INSTRUMENT - Get stamp stw r12,0x6100+(14*16)+0x8(0) ; INSTRUMENT - Save it mfspr r12,pmc4 ; INSTRUMENT - Get stamp stw r12,0x6100+(14*16)+0xC(0) ; INSTRUMENT - Save it #endif mr r31,r4 // r31 <- mapped ptr into user space (may be 64-bit) bf-- kk64bit,copyin1 // skip if a 32-bit processor rldimi r31,r3,32,0 // slam high-order bits into mapped ptr mfmsr r4 // if 64-bit, turn on SF so we can use returned ptr li r0,1 rldimi r4,r0,63,MSR_SF_BIT // light bit 0 mtmsrd r4 // turn on 64-bit mode isync // wait for mode to change // Load r3-r5, substituting mapped ptr as appropriate. copyin1: lwz r5,kkBufSize(r1) // restore length to copy bf kkIn,copyin2 // skip if copyout lwz r4,kkDest(r1) // copyin: dest is kernel ptr mr r3,r31 // source is mapped ptr b copyin3 copyin2: // handle copyout lwz r3,kkSource(r1) // source is kernel buffer (r3 at entry) mr r4,r31 // dest is mapped ptr into user space // Finally, all set up to copy: // r3 = source ptr (mapped if copyin) // r4 = dest ptr (mapped if copyout) // r5 = length // r31 = mapped ptr returned by MapUserMemoryWindow // cr3 = kkIn, kkString, kk64bit, and kkNull flags copyin3: bt kkString,copyString // handle copyinstr and copyoutstr bl EXT(bcopy) // copyin and copyout: let bcopy do the work li r3,0 // return success // Main exit point for copyin, copyout, copyinstr, and copyoutstr. Also reached // from error recovery if we get a DSI accessing user space. Clear recovery ptr, // and pop off frame. // r3 = 0, EFAULT, or ENAMETOOLONG copyinx: lwz r2,kkCR3(r1) // get callers cr3 mfsprg r6,1 // Get the current thread bf-- kk64bit,copyinx1 // skip if 32-bit processor mfmsr r12 rldicl r12,r12,0,MSR_SF_BIT+1 // if 64-bit processor, turn 64-bit mode off mtmsrd r12 // turn SF off isync // wait for the mode to change copyinx1: lwz r0,FM_LR_SAVE+kkFrameSize(r1) // get return address lwz r31,kkR31Save(r1) // restore callers r31 lwz r4,kkThrErrJmp(r1) // load saved thread recover addi r1,r1,kkFrameSize // pop off our stack frame mtlr r0 stw r4,THREAD_RECOVER(r6) // restore thread recover mtcrf 0x10,r2 // restore cr3 blr /* We get here via the exception handler if an illegal * user memory reference was made. This error handler is used by * copyin, copyout, copyinstr, and copyoutstr. Registers are as * they were at point of fault, so for example cr3 flags are valid. */ copyinout_error: li r3,EFAULT // return error b copyinx copyinout_0: // degenerate case: 0-length copy mtcrf 0x10,r2 // restore cr3 li r3,0 // return success blr copyinout_too_big: // degenerate case mtcrf 0x10,r2 // restore cr3 lwz r1,0(r1) // pop off stack frame li r3,ENAMETOOLONG blr //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> // Handle copyinstr and copyoutstr. At this point the stack frame is set up, // the recovery ptr is set, the user's buffer is mapped, we're in 64-bit mode // if necessary, and: // r3 = source ptr, mapped if copyinstr // r4 = dest ptr, mapped if copyoutstr // r5 = buffer length // r31 = mapped ptr returned by MapUserMemoryWindow // cr3 = kkIn, kkString, kkNull, and kk64bit flags // We do word copies unless the buffer is very short, then use a byte copy loop // for the leftovers if necessary. The crossover at which the word loop becomes // faster is about seven bytes, counting the zero. // // We first must word-align the source ptr, in order to avoid taking a spurious // page fault. copyString: cmplwi cr1,r5,15 // is buffer very short? mr r12,r3 // remember ptr to 1st source byte mtctr r5 // assuming short, set up loop count for bytes blt-- cr1,copyinstr8 // too short for word loop rlwinm r2,r3,0,0x3 // get byte offset of 1st byte within word rlwinm r9,r3,3,0x18 // get bit offset of 1st byte within word li r7,-1 sub r3,r3,r2 // word-align source address add r6,r5,r2 // get length starting at byte 0 in word srw r7,r7,r9 // get mask for bytes in first word srwi r0,r6,2 // get #words in buffer lwz r5,0(r3) // get aligned word with first source byte lis r10,hi16(0xFEFEFEFF) // load magic constants into r10 and r11 lis r11,hi16(0x80808080) mtctr r0 // set up word loop count addi r3,r3,4 // advance past the source word ori r10,r10,lo16(0xFEFEFEFF) ori r11,r11,lo16(0x80808080) orc r8,r5,r7 // map bytes preceeding first source byte into 0xFF bt-- kkNull,copyinstr5enter // enter loop that just counts // Special case 1st word, which has been 0xFF filled on left. Note that we use // "and.", even though we execute both in 32 and 64-bit mode. This is OK. slw r5,r5,r9 // left justify payload bytes add r9,r10,r8 // r9 = data + 0xFEFEFEFF andc r7,r11,r8 // r7 = ~data & 0x80808080 subfic r0,r2,4 // get r0 <- #payload bytes in 1st word and. r7,r9,r7 // if r7==0, then all bytes in r8 are nonzero stw r5,0(r4) // copy payload bytes to dest buffer add r4,r4,r0 // then point to next byte in dest buffer bdnzt cr0_eq,copyinstr6 // use loop that copies if 0 not found b copyinstr7 // 0 found (buffer can't be full) // Word loop(s). They do a word-parallel search for 0s, using the following // inobvious but very efficient test: // y = data + 0xFEFEFEFF // z = ~data & 0x80808080 // If (y & z)==0, then all bytes in dataword are nonzero. There are two copies // of this loop, one that just counts and another that copies. // r3 = ptr to next word of source (word aligned) // r4 = ptr to next byte in buffer // r6 = original buffer length (adjusted to be word origin) // r10 = 0xFEFEFEFE // r11 = 0x80808080 // r12 = ptr to 1st source byte (used to determine string length) .align 5 // align inner loops for speed copyinstr5: // version that counts but does not copy lwz r8,0(r3) // get next word of source addi r3,r3,4 // advance past it copyinstr5enter: add r9,r10,r8 // r9 = data + 0xFEFEFEFF andc r7,r11,r8 // r7 = ~data & 0x80808080 and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode) bdnzt cr0_eq,copyinstr5 // if r7==0, then all bytes in r8 are nonzero b copyinstr7 .align 5 // align inner loops for speed copyinstr6: // version that counts and copies lwz r8,0(r3) // get next word of source addi r3,r3,4 // advance past it addi r4,r4,4 // increment dest ptr while we wait for data add r9,r10,r8 // r9 = data + 0xFEFEFEFF andc r7,r11,r8 // r7 = ~data & 0x80808080 and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode) stw r8,-4(r4) // pack all 4 bytes into buffer bdnzt cr0_eq,copyinstr6 // if r7==0, then all bytes are nonzero // Either 0 found or buffer filled. The above algorithm has mapped nonzero bytes to 0 // and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also // mapped to 0x80. We must mask out these false hits before searching for an 0x80 byte. // r3 = word aligned ptr to next word of source (ie, r8==mem(r3-4)) // r6 = original buffer length (adjusted to be word origin) // r7 = computed vector of 0x00 and 0x80 bytes // r8 = original source word, coming from -4(r3), possibly padded with 0xFFs on left if 1st word // r12 = ptr to 1st source byte (used to determine string length) // cr0 = beq set iff 0 not found copyinstr7: rlwinm r2,r8,7,0,31 // move 0x01 bits to 0x80 position rlwinm r6,r6,0,0x3 // mask down to partial byte count in last word andc r7,r7,r2 // turn off false hits from 0x0100 worst case crnot kkZero,cr0_eq // 0 found iff cr0_eq is off srwi r7,r7,8 // we want to count the 0 as a byte xferred cmpwi r6,0 // any bytes left over in last word? cntlzw r7,r7 // now we can find the 0 byte (ie, the 0x80) subi r3,r3,4 // back up r3 to point to 1st byte in r8 srwi r7,r7,3 // convert 8,16,24,32 to 1,2,3,4 add r3,r3,r7 // now r3 points one past 0 byte, or at 1st byte not xferred bt++ kkZero,copyinstr10 // 0 found, so done beq copyinstr10 // r6==0, so buffer truly full mtctr r6 // 0 not found, loop over r6 bytes b copyinstr8 // enter byte loop for last 1-3 leftover bytes // Byte loop. This is used for very small buffers and for the odd bytes left over // after searching and copying words at a time. // r3 = ptr to next byte of source // r4 = ptr to next dest byte // r12 = ptr to first byte of source // ctr = count of bytes to check .align 5 // align inner loops for speed copyinstr8: // loop over bytes of source lbz r0,0(r3) // get next byte of source addi r3,r3,1 addi r4,r4,1 // increment dest addr whether we store or not cmpwi r0,0 // the 0? bt-- kkNull,copyinstr9 // don't store if copyinstr with NULL ptr stb r0,-1(r4) copyinstr9: bdnzf cr0_eq,copyinstr8 // loop if byte not 0 and more room in buffer crmove kkZero,cr0_eq // remember if 0 found or buffer filled // Buffer filled or 0 found. Unwind and return. // r3 = ptr to 1st source byte not transferred // r12 = ptr to 1st source byte // r31 = mapped ptr returned by MapUserMemoryWindow // cr3 = kkZero set iff 0 found copyinstr10: lwz r9,kkCountPtr(r1) // get ptr to place to store count of bytes moved sub r2,r3,r12 // compute #bytes copied (including the 0) li r3,0 // assume success return status stw r2,0(r9) // store #bytes moved bt++ kkZero,copyinx // we did find the 0 so return 0 li r3,ENAMETOOLONG // buffer filled b copyinx // join main exit routine //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> /* * int * copypv(source, sink, size, which) * addr64_t src; // r3 and r4 * addr64_t dst; // r5 and r6 * size_t size; // r7 * int which; // r8 * * Operand size bytes are copied from operand src into operand dst. The source and * destination operand addresses are given as addr64_t, and may designate starting * locations in physical or virtual memory in any combination except where both are * virtual. Virtual memory locations may be in either the kernel or the current thread's * address space. Operand size may be up to 256MB. * * Operation is controlled by operand which, which offers these options: * cppvPsrc : source operand is (1) physical or (0) virtual * cppvPsnk : destination operand is (1) physical or (0) virtual * cppvKmap : virtual operand is in (1) kernel or (0) current thread * cppvFsnk : (1) flush destination before and after transfer * cppvFsrc : (1) flush source before and after transfer * cppvNoModSnk : (1) don't set source operand's changed bit(s) * cppvNoRefSrc : (1) don't set destination operand's referenced bit(s) * * Implementation is now split into this new 64-bit path and the old path, hw_copypv_32(). * This section describes the operation of the new 64-bit path. * * The 64-bit path utilizes the more capacious 64-bit kernel address space to create a * window in the kernel address space into all of physical RAM plus the I/O hole. Since * the window's mappings specify the proper access policies for the underlying memory, * the new path does not have to flush caches to avoid a cache paradox, so cppvFsnk * and cppvFsrc are ignored. Physical operand adresses are relocated into the physical * memory window, and are accessed with data relocation on. Virtual addresses are either * within the kernel, or are mapped into the kernel address space through the user memory * window. Because accesses to a virtual operand are performed with data relocation on, * the new path does not have to translate the address, disable/enable interrupts, lock * the mapping, or update referenced and changed bits. * * The IBM 970 (a.k.a. G5) processor treats real-mode accesses as guarded, so there is * a substantial performance penalty for copypv operating in real mode. Utilizing the * new 64-bit path, transfer performance increases >100% on the G5. * * The attentive reader may notice that mtmsrd ops are not followed by isync ops as * might be expected. The 970 follows PowerPC architecture version 2.01, which defines * mtmsrd with L=0 as a context synchronizing op, so a following isync is no longer * required. * * To keep things exciting, we develop 64-bit values in non-volatiles, but we also need * to call 32-bit functions, which would lead to the high-order 32 bits of our values * getting clobbered unless we do something special. So, we preserve our 64-bit non-volatiles * in our own stack frame across calls to 32-bit functions. * */ // Map operand which bits into non-volatile CR2 and CR3 bits. #define whichAlign ((3+1)*4) #define whichMask 0x007F0000 #define pvPsnk (cppvPsnkb - whichAlign) #define pvPsrc (cppvPsrcb - whichAlign) #define pvFsnk (cppvFsnkb - whichAlign) #define pvFsrc (cppvFsrcb - whichAlign) #define pvNoModSnk (cppvNoModSnkb - whichAlign) #define pvNoRefSrc (cppvNoRefSrcb - whichAlign) #define pvKmap (cppvKmapb - whichAlign) #define pvNoCache cr2_lt .align 5 .globl EXT(copypv) LEXT(copypv) mfsprg r10,2 // get feature flags mtcrf 0x02,r10 // we need to test pf64Bit bt++ pf64Bitb,copypv_64 // skip if 64-bit processor (only they take hint) b EXT(hw_copypv_32) // carry on with 32-bit copypv // Push a 32-bit ABI-compliant stack frame and preserve all non-volatiles that we'll clobber. copypv_64: mfsprg r9,1 // get current thread stwu r1,-(FM_ALIGN((31-26+11)*4)+FM_SIZE)(r1) // allocate stack frame and link it mflr r0 // get return address mfcr r10 // get cr2 and cr3 lwz r12,THREAD_RECOVER(r9) // get error callback stw r26,FM_ARG0+0x00(r1) // save non-volatile r26 stw r27,FM_ARG0+0x04(r1) // save non-volatile r27 stw r28,FM_ARG0+0x08(r1) // save non-volatile r28 stw r29,FM_ARG0+0x0C(r1) // save non-volatile r29 stw r30,FM_ARG0+0x10(r1) // save non-volatile r30 stw r31,FM_ARG0+0x14(r1) // save non-volatile r31 stw r12,FM_ARG0+0x20(r1) // save error callback stw r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1) // save return address stw r10,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1) // save non-volatile cr2 and cr3 // Non-volatile register usage in this routine is: // r26: saved msr image // r27: current pmap_t / virtual source address // r28: destination virtual address // r29: source address // r30: destination address // r31: byte count to copy // cr2/3: parameter 'which' bits rlwinm r8,r8,whichAlign,whichMask // align and mask which bits mr r31,r7 // copy size to somewhere non-volatile mtcrf 0x20,r8 // insert which bits into cr2 and cr3 mtcrf 0x10,r8 // insert which bits into cr2 and cr3 rlwinm r29,r3,0,1,0 // form source address high-order bits rlwinm r30,r5,0,1,0 // form destination address high-order bits rlwimi r29,r4,0,0,31 // form source address low-order bits rlwimi r30,r6,0,0,31 // form destination address low-order bits crand cr7_lt,pvPsnk,pvPsrc // are both operand addresses physical? cntlzw r0,r31 // count leading zeroes in byte count cror cr7_eq,pvPsnk,pvPsrc // cr7_eq <- source or destination is physical bf-- cr7_eq,copypv_einval // both operands may not be virtual cmplwi r0,4 // byte count greater than or equal 256M (2**28)? blt-- copypv_einval // byte count too big, give EINVAL cmplwi r31,0 // byte count zero? beq-- copypv_zero // early out bt cr7_lt,copypv_phys // both operand addresses are physical mr r28,r30 // assume destination is virtual bf pvPsnk,copypv_dv // is destination virtual? mr r28,r29 // no, so source must be virtual copypv_dv: lis r27,ha16(EXT(kernel_pmap)) // get kernel's pmap_t *, high-order lwz r27,lo16(EXT(kernel_pmap))(r27) // get kernel's pmap_t bt pvKmap,copypv_kern // virtual address in kernel map? lwz r3,ACT_VMMAP(r9) // get user's vm_map * rldicl r4,r28,32,32 // r4, r5 <- addr64_t virtual address rldicl r5,r28,0,32 std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit call std r30,FM_ARG0+0x38(r1) // preserve 64-bit r30 across 32-bit call bl EXT(MapUserMemoryWindow) // map slice of user space into kernel space ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29 ld r30,FM_ARG0+0x38(r1) // restore 64-bit r30 rlwinm r28,r3,0,1,0 // convert relocated addr64_t virtual address rlwimi r28,r4,0,0,31 // into a single 64-bit scalar copypv_kern: // Since we'll be accessing the virtual operand with data-relocation on, we won't need to // update the referenced and changed bits manually after the copy. So, force the appropriate // flag bit on for the virtual operand. crorc pvNoModSnk,pvNoModSnk,pvPsnk // for virtual dest, let hardware do ref/chg bits crorc pvNoRefSrc,pvNoRefSrc,pvPsrc // for virtual source, let hardware do ref bit // We'll be finding a mapping and looking at, so we need to disable 'rupts. lis r0,hi16(MASK(MSR_VEC)) // get vector mask ori r0,r0,lo16(MASK(MSR_FP)) // insert fp mask mfmsr r26 // save current msr andc r26,r26,r0 // turn off VEC and FP in saved copy ori r0,r0,lo16(MASK(MSR_EE)) // add EE to our mask andc r0,r26,r0 // disable EE in our new msr image mtmsrd r0 // introduce new msr image // We're now holding the virtual operand's pmap_t in r27 and its virtual address in r28. We now // try to find a mapping corresponding to this address in order to determine whether the address // is cacheable. If we don't find a mapping, we can safely assume that the operand is cacheable // (a non-cacheable operand must be a block mapping, which will always exist); otherwise, we // examine the mapping's caching-inhibited bit. mr r3,r27 // r3 <- pmap_t pmap rldicl r4,r28,32,32 // r4, r5 <- addr64_t va rldicl r5,r28,0,32 la r6,FM_ARG0+0x18(r1) // r6 <- addr64_t *nextva li r7,1 // r7 <- int full, search nested mappings std r26,FM_ARG0+0x28(r1) // preserve 64-bit r26 across 32-bit calls std r28,FM_ARG0+0x30(r1) // preserve 64-bit r28 across 32-bit calls std r29,FM_ARG0+0x38(r1) // preserve 64-bit r29 across 32-bit calls std r30,FM_ARG0+0x40(r1) // preserve 64-bit r30 across 32-bit calls bl EXT(mapping_find) // find mapping for virtual operand mr. r3,r3 // did we find it? beq copypv_nomapping // nope, so we'll assume it's cacheable lwz r4,mpVAddr+4(r3) // get low half of virtual addr for hw flags rlwinm. r4,r4,0,mpIb-32,mpIb-32 // caching-inhibited bit set? crnot pvNoCache,cr0_eq // if it is, use bcopy_nc bl EXT(mapping_drop_busy) // drop busy on the mapping copypv_nomapping: ld r26,FM_ARG0+0x28(r1) // restore 64-bit r26 ld r28,FM_ARG0+0x30(r1) // restore 64-bit r28 ld r29,FM_ARG0+0x38(r1) // restore 64-bit r29 ld r30,FM_ARG0+0x40(r1) // restore 64-bit r30 mtmsrd r26 // restore msr to it's previous state // Set both the source and destination virtual addresses to the virtual operand's address -- // we'll overlay one of them with the physical operand's address. mr r27,r28 // make virtual operand BOTH source AND destination // Now we're ready to relocate the physical operand address(es) into the physical memory window. // Recall that we've mapped physical memory (including the I/O hole) into the kernel's address // space somewhere at or over the 2**32 line. If one or both of the operands are in the I/O hole, // we'll set the pvNoCache flag, forcing use of non-caching bcopy_nc() to do the copy. copypv_phys: ld r6,lgPMWvaddr(0) // get physical memory window virtual address bf pvPsnk,copypv_dstvirt // is destination address virtual? cntlzd r4,r30 // count leading zeros in destination address cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1) cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations add r28,r30,r6 // relocate physical destination into physical window copypv_dstvirt: bf pvPsrc,copypv_srcvirt // is source address virtual? cntlzd r4,r29 // count leading zeros in source address cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1) cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations add r27,r29,r6 // relocate physical source into physical window copypv_srcvirt: // Once the copy is under way (bcopy or bcopy_nc), we will want to get control if anything // funny happens during the copy. So, we set a pointer to our error handler in the per-thread // control block. mfsprg r8,1 // get current threads stuff lis r3,hi16(copypv_error) // get our error callback's address, high ori r3,r3,lo16(copypv_error) // get our error callback's address, low stw r3,THREAD_RECOVER(r8) // set our error callback // Since our physical operand(s) are relocated at or above the 2**32 line, we must enter // 64-bit mode. li r0,1 // get a handy one bit mfmsr r3 // get current msr rldimi r3,r0,63,MSR_SF_BIT // set SF bit on in our msr copy mtmsrd r3 // enter 64-bit mode // If requested, flush data cache // Note that we don't flush, the code is being saved "just in case". #if 0 bf pvFsrc,copypv_nfs // do we flush the source? rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address rldicl r4,r27,0,32 mr r5,r31 // r5 <- count (in bytes) li r6,0 // r6 <- boolean phys (false, not physical) bl EXT(flush_dcache) // flush the source operand copypv_nfs: bf pvFsnk,copypv_nfdx // do we flush the destination? rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address rldicl r4,r28,0,32 mr r5,r31 // r5 <- count (in bytes) li r6,0 // r6 <- boolean phys (false, not physical) bl EXT(flush_dcache) // flush the destination operand copypv_nfdx: #endif // Call bcopy or bcopy_nc to perform the copy. mr r3,r27 // r3 <- source virtual address mr r4,r28 // r4 <- destination virtual address mr r5,r31 // r5 <- bytes to copy bt pvNoCache,copypv_nc // take non-caching route bl EXT(bcopy) // call bcopy to do the copying b copypv_copydone copypv_nc: bl EXT(bcopy_nc) // call bcopy_nc to do the copying copypv_copydone: // If requested, flush data cache // Note that we don't flush, the code is being saved "just in case". #if 0 bf pvFsrc,copypv_nfsx // do we flush the source? rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address rldicl r4,r27,0,32 mr r5,r31 // r5 <- count (in bytes) li r6,0 // r6 <- boolean phys (false, not physical) bl EXT(flush_dcache) // flush the source operand copypv_nfsx: bf pvFsnk,copypv_nfd // do we flush the destination? rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address rldicl r4,r28,0,32 mr r5,r31 // r5 <- count (in bytes) li r6,0 // r6 <- boolean phys (false, not physical) bl EXT(flush_dcache) // flush the destination operand copypv_nfd: #endif // Leave 64-bit mode. mfmsr r3 // get current msr rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy mtmsrd r3 // leave 64-bit mode // If requested, set ref/chg on source/dest physical operand(s). It is possible that copy is // from/to a RAM disk situated outside of mapped physical RAM, so we check each page by calling // mapping_phys_lookup() before we try to set its ref/chg bits; otherwise, we might panic. // Note that this code is page-size sensitive, so it should probably be a part of our low-level // code in hw_vm.s. bt pvNoModSnk,copypv_nomod // skip destination update if not requested std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit calls li r26,1 // r26 <- 4K-page count mr r27,r31 // r27 <- byte count rlwinm r3,r30,0,20,31 // does destination cross a page boundary? subfic r3,r3,4096 // cmplw r3,r27 // blt copypv_modnox // skip if not crossing case subf r27,r3,r27 // r27 <- byte count less initial fragment addi r26,r26,1 // increment page count copypv_modnox: srdi r3,r27,12 // pages to update (not including crosser) add r26,r26,r3 // add in crosser srdi r27,r30,12 // r27 <- destination page number copypv_modloop: mr r3,r27 // r3 <- destination page number la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex bl EXT(mapping_phys_lookup) // see if page is really there mr. r3,r3 // is it? beq-- copypv_modend // nope, break out of modify loop mr r3,r27 // r3 <- destination page number bl EXT(mapping_set_mod) // set page changed status subi r26,r26,1 // decrement page count cmpwi r26,0 // done yet? bgt copypv_modloop // nope, iterate copypv_modend: ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29 copypv_nomod: bt pvNoRefSrc,copypv_done // skip source update if not requested copypv_debugref: li r26,1 // r26 <- 4K-page count mr r27,r31 // r27 <- byte count rlwinm r3,r29,0,20,31 // does source cross a page boundary? subfic r3,r3,4096 // cmplw r3,r27 // blt copypv_refnox // skip if not crossing case subf r27,r3,r27 // r27 <- byte count less initial fragment addi r26,r26,1 // increment page count copypv_refnox: srdi r3,r27,12 // pages to update (not including crosser) add r26,r26,r3 // add in crosser srdi r27,r29,12 // r27 <- source page number copypv_refloop: mr r3,r27 // r3 <- source page number la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex bl EXT(mapping_phys_lookup) // see if page is really there mr. r3,r3 // is it? beq-- copypv_done // nope, break out of modify loop mr r3,r27 // r3 <- source page number bl EXT(mapping_set_ref) // set page referenced status subi r26,r26,1 // decrement page count cmpwi r26,0 // done yet? bgt copypv_refloop // nope, iterate // Return, indicating success. copypv_done: copypv_zero: li r3,0 // our efforts were crowned with success // Pop frame, restore caller's non-volatiles, clear recovery routine pointer. copypv_return: mfsprg r9,1 // get current threads stuff lwz r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1) // get return address lwz r4,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1) // get non-volatile cr2 and cr3 lwz r26,FM_ARG0+0x00(r1) // restore non-volatile r26 lwz r27,FM_ARG0+0x04(r1) // restore non-volatile r27 mtlr r0 // restore return address lwz r28,FM_ARG0+0x08(r1) // restore non-volatile r28 mtcrf 0x20,r4 // restore non-volatile cr2 mtcrf 0x10,r4 // restore non-volatile cr3 lwz r11,FM_ARG0+0x20(r1) // save error callback lwz r29,FM_ARG0+0x0C(r1) // restore non-volatile r29 lwz r30,FM_ARG0+0x10(r1) // restore non-volatile r30 lwz r31,FM_ARG0+0x14(r1) // restore non-volatile r31 stw r11,THREAD_RECOVER(r9) // restore our error callback lwz r1,0(r1) // release stack frame blr // y'all come back now // Invalid argument handler. copypv_einval: li r3,EINVAL // invalid argument b copypv_return // return // Error encountered during bcopy or bcopy_nc. copypv_error: mfmsr r3 // get current msr rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy mtmsrd r3 // leave 64-bit mode li r3,EFAULT // it was all his fault b copypv_return // return