/* * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this * file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ */ #include #include #include #include #include #include #define INSTRUMENT 0 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> /* * void pmap_zero_page(vm_offset_t pa) * * Zero a page of physical memory. This routine runs in 32 or 64-bit mode, * and handles 32 and 128-byte cache lines. */ .align 5 .globl EXT(pmap_zero_page) LEXT(pmap_zero_page) mflr r12 // save return address bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10 mtlr r12 // restore return address andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size subfic r4,r9,PPC_PGBYTES // r4 <- starting offset in page bt++ pf64Bitb,page0S4 // Go do the big guys... slwi r3,r3,12 // get page address from page num b page_zero_1 // Jump to line aligned loop... .align 5 nop nop nop nop nop nop nop page0S4: sldi r3,r3,12 // get page address from page num page_zero_1: // loop zeroing cache lines sub. r5,r4,r9 // more to go? dcbz128 r3,r4 // zero either 32 or 128 bytes sub r4,r5,r9 // generate next offset dcbz128 r3,r5 bne-- page_zero_1 b EXT(ml_restore) // restore MSR and do the isync //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> /* void * phys_copy(src, dst, bytecount) * addr64_t src; * addr64_t dst; * int bytecount * * This routine will copy bytecount bytes from physical address src to physical * address dst. It runs in 64-bit mode if necessary, but does not handle * overlap or make any attempt to be optimal. Length must be a signed word. * Not performance critical. */ .align 5 .globl EXT(phys_copy) LEXT(phys_copy) rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg mflr r12 // get return address rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits rlwinm r4,r5,0,1,0 ; Duplicate high half of long long paddr into top of reg bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10 rlwimi r4,r6,0,0,31 ; Combine bottom of long long to full 64-bits mtlr r12 // restore return address subic. r5,r7,4 // a word to copy? b phys_copy_2 .align 5 phys_copy_1: // loop copying words subic. r5,r5,4 // more to go? lwz r0,0(r3) addi r3,r3,4 stw r0,0(r4) addi r4,r4,4 phys_copy_2: bge phys_copy_1 addic. r5,r5,4 // restore count ble phys_copy_4 // no more // Loop is aligned here phys_copy_3: // loop copying bytes subic. r5,r5,1 // more to go? lbz r0,0(r3) addi r3,r3,1 stb r0,0(r4) addi r4,r4,1 bgt phys_copy_3 phys_copy_4: b EXT(ml_restore) // restore MSR and do the isync //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> /* void * pmap_copy_page(src, dst) * ppnum_t src; * ppnum_t dst; * * This routine will copy the physical page src to physical page dst * * This routine assumes that the src and dst are page numbers and that the * destination is cached. It runs on 32 and 64 bit processors, with and * without altivec, and with 32 and 128 byte cache lines. * We also must assume that no-one will be executing within the destination * page, and that this will be used for paging. Because this * is a common routine, we have tuned loops for each processor class. * */ #define kSFSize (FM_SIZE+160) ENTRY(pmap_copy_page, TAG_NO_FRAME_USED) lis r2,hi16(MASK(MSR_VEC)) ; Get the vector flag mflr r0 // get return ori r2,r2,lo16(MASK(MSR_FP)) ; Add the FP flag stw r0,8(r1) // save stwu r1,-kSFSize(r1) // set up a stack frame for VRs or FPRs mfmsr r11 // save MSR at entry mfsprg r10,2 // get feature flags andc r11,r11,r2 // Clear out vec and fp ori r2,r2,lo16(MASK(MSR_EE)) // Get EE on also andc r2,r11,r2 // Clear out EE as well mtcrf 0x02,r10 // we need to test pf64Bit ori r2,r2,MASK(MSR_FP) // must enable FP for G3... mtcrf 0x80,r10 // we need to test pfAltivec too oris r2,r2,hi16(MASK(MSR_VEC)) // enable altivec for G4 (ignored if G3) mtmsr r2 // turn EE off, FP and VEC on isync bt++ pf64Bitb,pmap_copy_64 // skip if 64-bit processor (only they take hint) slwi r3,r3,12 // get page address from page num slwi r4,r4,12 // get page address from page num rlwinm r12,r2,0,MSR_DR_BIT+1,MSR_DR_BIT-1 // get ready to turn off DR bt pfAltivecb,pmap_copy_g4 // altivec but not 64-bit means G4 // G3 -- copy using FPRs stfd f0,FM_SIZE+0(r1) // save the 4 FPRs we use to copy stfd f1,FM_SIZE+8(r1) li r5,PPC_PGBYTES/32 // count of cache lines in a page stfd f2,FM_SIZE+16(r1) mtctr r5 stfd f3,FM_SIZE+24(r1) mtmsr r12 // turn off DR after saving FPRs on stack isync pmap_g3_copy_loop: // loop over 32-byte cache lines dcbz 0,r4 // avoid read of dest line lfd f0,0(r3) lfd f1,8(r3) lfd f2,16(r3) lfd f3,24(r3) addi r3,r3,32 stfd f0,0(r4) stfd f1,8(r4) stfd f2,16(r4) stfd f3,24(r4) dcbst 0,r4 // flush dest line to RAM addi r4,r4,32 bdnz pmap_g3_copy_loop sync // wait for stores to take subi r4,r4,PPC_PGBYTES // restore ptr to destintation page li r6,PPC_PGBYTES-32 // point to last line in page pmap_g3_icache_flush: subic. r5,r6,32 // more to go? icbi r4,r6 // flush another line in icache subi r6,r5,32 // get offset to next line icbi r4,r5 bne pmap_g3_icache_flush sync mtmsr r2 // turn DR back on isync lfd f0,FM_SIZE+0(r1) // restore the FPRs lfd f1,FM_SIZE+8(r1) lfd f2,FM_SIZE+16(r1) lfd f3,FM_SIZE+24(r1) b pmap_g4_restore // restore MSR and done // G4 -- copy using VRs pmap_copy_g4: // r2=(MSR-EE), r12=(r2-DR), r10=features, r11=old MSR la r9,FM_SIZE+16(r1) // place where we save VRs to r9 li r5,16 // load x-form offsets into r5-r9 li r6,32 // another offset stvx v0,0,r9 // save some VRs so we can use to copy li r7,48 // another offset stvx v1,r5,r9 li r0,PPC_PGBYTES/64 // we loop over 64-byte chunks stvx v2,r6,r9 mtctr r0 li r8,96 // get look-ahead for touch stvx v3,r7,r9 li r9,128 mtmsr r12 // now we've saved VRs on stack, turn off DR isync // wait for it to happen b pmap_g4_copy_loop .align 5 // align inner loops pmap_g4_copy_loop: // loop over 64-byte chunks dcbt r3,r8 // touch 3 lines ahead nop // avoid a 17-word loop... dcbt r3,r9 // touch 4 lines ahead nop // more padding dcba 0,r4 // avoid pre-fetch of 1st dest line lvx v0,0,r3 // offset 0 lvx v1,r5,r3 // offset 16 lvx v2,r6,r3 // offset 32 lvx v3,r7,r3 // offset 48 addi r3,r3,64 dcba r6,r4 // avoid pre-fetch of 2nd line stvx v0,0,r4 // offset 0 stvx v1,r5,r4 // offset 16 stvx v2,r6,r4 // offset 32 stvx v3,r7,r4 // offset 48 dcbf 0,r4 // push line 1 dcbf r6,r4 // and line 2 addi r4,r4,64 bdnz pmap_g4_copy_loop sync // wait for stores to take subi r4,r4,PPC_PGBYTES // restore ptr to destintation page li r8,PPC_PGBYTES-32 // point to last line in page pmap_g4_icache_flush: subic. r9,r8,32 // more to go? icbi r4,r8 // flush from icache subi r8,r9,32 // get offset to next line icbi r4,r9 bne pmap_g4_icache_flush sync mtmsr r2 // turn DR back on isync la r9,FM_SIZE+16(r1) // get base of VR save area lvx v0,0,r9 // restore the VRs lvx v1,r5,r9 lvx v2,r6,r9 lvx v3,r7,r9 pmap_g4_restore: // r11=MSR mtmsr r11 // turn EE on, VEC and FR off isync // wait for it to happen addi r1,r1,kSFSize // pop off our stack frame lwz r0,8(r1) // restore return address mtlr r0 blr // 64-bit/128-byte processor: copy using VRs pmap_copy_64: // r10=features, r11=old MSR sldi r3,r3,12 // get page address from page num sldi r4,r4,12 // get page address from page num la r9,FM_SIZE+16(r1) // get base of VR save area li r5,16 // load x-form offsets into r5-r9 li r6,32 // another offset bf pfAltivecb,pmap_novmx_copy // altivec suppressed... stvx v0,0,r9 // save 8 VRs so we can copy wo bubbles stvx v1,r5,r9 li r7,48 // another offset li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks stvx v2,r6,r9 stvx v3,r7,r9 addi r9,r9,64 // advance base ptr so we can store another 4 mtctr r0 li r0,MASK(MSR_DR) // get DR bit stvx v4,0,r9 stvx v5,r5,r9 andc r12,r2,r0 // turn off DR bit li r0,1 // get a 1 to slam into SF stvx v6,r6,r9 stvx v7,r7,r9 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0) li r8,-128 // offset so we can reach back one line mtmsrd r12 // now we've saved VRs, turn DR off and SF on isync // wait for it to happen dcbt128 0,r3,1 // start a forward stream b pmap_64_copy_loop .align 5 // align inner loops pmap_64_copy_loop: // loop over 128-byte chunks dcbz128 0,r4 // avoid read of destination line lvx v0,0,r3 // offset 0 lvx v1,r5,r3 // offset 16 lvx v2,r6,r3 // offset 32 lvx v3,r7,r3 // offset 48 addi r3,r3,64 // don't have enough GPRs so add 64 2x lvx v4,0,r3 // offset 64 lvx v5,r5,r3 // offset 80 lvx v6,r6,r3 // offset 96 lvx v7,r7,r3 // offset 112 addi r3,r3,64 stvx v0,0,r4 // offset 0 stvx v1,r5,r4 // offset 16 stvx v2,r6,r4 // offset 32 stvx v3,r7,r4 // offset 48 addi r4,r4,64 stvx v4,0,r4 // offset 64 stvx v5,r5,r4 // offset 80 stvx v6,r6,r4 // offset 96 stvx v7,r7,r4 // offset 112 addi r4,r4,64 dcbf r8,r4 // flush the line we just wrote bdnz pmap_64_copy_loop sync // wait for stores to take subi r4,r4,PPC_PGBYTES // restore ptr to destintation page li r8,PPC_PGBYTES-128 // point to last line in page pmap_64_icache_flush: subic. r9,r8,128 // more to go? icbi r4,r8 // flush from icache subi r8,r9,128 // get offset to next line icbi r4,r9 bne pmap_64_icache_flush sync mtmsrd r2 // turn DR back on, SF off isync la r9,FM_SIZE+16(r1) // get base address of VR save area on stack lvx v0,0,r9 // restore the VRs lvx v1,r5,r9 lvx v2,r6,r9 lvx v3,r7,r9 addi r9,r9,64 lvx v4,0,r9 lvx v5,r5,r9 lvx v6,r6,r9 lvx v7,r7,r9 b pmap_g4_restore // restore lower half of MSR and return // // Copy on 64-bit without VMX // pmap_novmx_copy: li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks mtctr r0 li r0,MASK(MSR_DR) // get DR bit andc r12,r2,r0 // turn off DR bit li r0,1 // get a 1 to slam into SF rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0) mtmsrd r12 // now we've saved VRs, turn DR off and SF on isync // wait for it to happen dcbt128 0,r3,1 // start a forward stream pmap_novmx_copy_loop: // loop over 128-byte cache lines dcbz128 0,r4 // avoid read of dest line ld r0,0(r3) // Load half a line ld r12,8(r3) ld r5,16(r3) ld r6,24(r3) ld r7,32(r3) ld r8,40(r3) ld r9,48(r3) ld r10,56(r3) std r0,0(r4) // Store half a line std r12,8(r4) std r5,16(r4) std r6,24(r4) std r7,32(r4) std r8,40(r4) std r9,48(r4) std r10,56(r4) ld r0,64(r3) // Load half a line ld r12,72(r3) ld r5,80(r3) ld r6,88(r3) ld r7,96(r3) ld r8,104(r3) ld r9,112(r3) ld r10,120(r3) addi r3,r3,128 std r0,64(r4) // Store half a line std r12,72(r4) std r5,80(r4) std r6,88(r4) std r7,96(r4) std r8,104(r4) std r9,112(r4) std r10,120(r4) dcbf 0,r4 // flush the line we just wrote addi r4,r4,128 bdnz pmap_novmx_copy_loop sync // wait for stores to take subi r4,r4,PPC_PGBYTES // restore ptr to destintation page li r8,PPC_PGBYTES-128 // point to last line in page pmap_novmx_icache_flush: subic. r9,r8,128 // more to go? icbi r4,r8 // flush from icache subi r8,r9,128 // get offset to next line icbi r4,r9 bne pmap_novmx_icache_flush sync mtmsrd r2 // turn DR back on, SF off isync b pmap_g4_restore // restore lower half of MSR and return //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> // Stack frame format used by copyin, copyout, copyinstr and copyoutstr. // These routines all run both on 32 and 64-bit machines, though because they are called // by the BSD kernel they are always in 32-bit mode when entered. The mapped ptr returned // by MapUserAddressSpace will be 64 bits however on 64-bit machines. Beware to avoid // using compare instructions on this ptr. This mapped ptr is kept globally in r31, so there // is no need to store or load it, which are mode-dependent operations since it could be // 32 or 64 bits. #define kkFrameSize (FM_SIZE+32) #define kkBufSize (FM_SIZE+0) #define kkCR (FM_SIZE+4) #define kkSource (FM_SIZE+8) #define kkDest (FM_SIZE+12) #define kkCountPtr (FM_SIZE+16) #define kkR31Save (FM_SIZE+20) // nonvolatile CR bits we use as flags in cr3 #define kk64bit 12 #define kkNull 13 #define kkIn 14 #define kkString 15 #define kkZero 15 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> /* * int * copyoutstr(src, dst, maxcount, count) * vm_offset_t src; * vm_offset_t dst; * vm_size_t maxcount; * vm_size_t* count; * * Set *count to the number of bytes copied. */ ENTRY(copyoutstr, TAG_NO_FRAME_USED) mfcr r2 // we use nonvolatile cr3 li r0,0 crset kkString // flag as a string op mr r10,r4 // for copyout, dest ptr (r4) is in user space stw r0,0(r6) // initialize #bytes moved crclr kkIn // flag as copyout b copyJoin //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> /* * int * copyinstr(src, dst, maxcount, count) * vm_offset_t src; * vm_offset_t dst; * vm_size_t maxcount; * vm_size_t* count; * * Set *count to the number of bytes copied * If dst == NULL, don't copy, just count bytes. * Only currently called from klcopyinstr. */ ENTRY(copyinstr, TAG_NO_FRAME_USED) mfcr r2 // we use nonvolatile cr3 cmplwi r4,0 // dst==NULL? li r0,0 crset kkString // flag as a string op mr r10,r3 // for copyin, source ptr (r3) is in user space crmove kkNull,cr0_eq // remember if (dst==NULL) stw r0,0(r6) // initialize #bytes moved crset kkIn // flag as copyin (rather than copyout) b copyJoin1 // skip over the "crclr kkNull" //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> /* * int * copyout(src, dst, count) * vm_offset_t src; * vm_offset_t dst; * size_t count; */ .align 5 .globl EXT(copyout) .globl EXT(copyoutmsg) LEXT(copyout) LEXT(copyoutmsg) #if INSTRUMENT mfspr r12,pmc1 ; INSTRUMENT - saveinstr[12] - Take stamp at copyout stw r12,0x6100+(12*16)+0x0(0) ; INSTRUMENT - Save it mfspr r12,pmc2 ; INSTRUMENT - Get stamp stw r12,0x6100+(12*16)+0x4(0) ; INSTRUMENT - Save it mfspr r12,pmc3 ; INSTRUMENT - Get stamp stw r12,0x6100+(12*16)+0x8(0) ; INSTRUMENT - Save it mfspr r12,pmc4 ; INSTRUMENT - Get stamp stw r12,0x6100+(12*16)+0xC(0) ; INSTRUMENT - Save it #endif mfcr r2 // save caller's CR crclr kkString // not a string version mr r10,r4 // dest (r4) is user-space ptr crclr kkIn // flag as copyout b copyJoin //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> /* * int * copyin(src, dst, count) * vm_offset_t src; * vm_offset_t dst; * size_t count; */ .align 5 .globl EXT(copyin) .globl EXT(copyinmsg) LEXT(copyin) LEXT(copyinmsg) mfcr r2 // save caller's CR crclr kkString // not a string version mr r10,r3 // source (r3) is user-space ptr in copyin crset kkIn // flag as copyin // Common code to handle setup for all the copy variants: // r2 = caller's CR, since we use cr3 // r3-r6 = parameters // r10 = user-space ptr (r3 if copyin, r4 if copyout) // cr3 = kkIn, kkString, kkNull flags copyJoin: crclr kkNull // (dst==NULL) convention not used with this call copyJoin1: // enter from copyinstr with kkNull set mflr r0 // get return address cmplwi r5,0 // buffer length 0? lis r9,0x1000 // r9 <- 0x10000000 (256MB) stw r0,FM_LR_SAVE(r1) // save return cmplw cr1,r5,r9 // buffer length > 256MB ? mfsprg r8,2 // get the features beq-- copyinout_0 // 0 length is degenerate case stwu r1,-kkFrameSize(r1) // set up stack frame stw r2,kkCR(r1) // save caller's CR since we use cr3 mtcrf 0x02,r8 // move pf64Bit to cr6 stw r3,kkSource(r1) // save args across MapUserAddressSpace stw r4,kkDest(r1) stw r5,kkBufSize(r1) crmove kk64bit,pf64Bitb // remember if this is a 64-bit processor stw r6,kkCountPtr(r1) stw r31,kkR31Save(r1) // we use r31 globally for mapped user ptr li r31,0 // no mapped ptr yet // Handle buffer length > 256MB. This is an error (ENAMETOOLONG) on copyin and copyout. // The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp // the buffer length to 256MB. This isn't an issue if the string is less than 256MB // (as most are!), but if they are >256MB we eventually return ENAMETOOLONG. This restriction // is due to MapUserAddressSpace; we don't want to consume more than two segments for // the mapping. ble++ cr1,copyin0 // skip if buffer length <= 256MB bf kkString,copyinout_too_big // error if not string op mr r5,r9 // silently clamp buffer length to 256MB stw r9,kkBufSize(r1) // update saved copy too // Set up thread_recover in case we hit an illegal address. copyin0: mfsprg r8,1 /* Get the current act */ lis r2,hi16(copyinout_error) lwz r7,ACT_THREAD(r8) ori r2,r2,lo16(copyinout_error) lwz r3,ACT_VMMAP(r8) // r3 <- vm_map virtual address stw r2,THREAD_RECOVER(r7) // Map user segment into kernel map, turn on 64-bit mode. // r3 = vm map // r5 = buffer length // r10 = user space ptr (r3 if copyin, r4 if copyout) mr r6,r5 // Set length to map li r4,0 // Note: we only do this 32-bit for now mr r5,r10 // arg2 <- user space ptr #if INSTRUMENT mfspr r12,pmc1 ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace stw r12,0x6100+(13*16)+0x0(0) ; INSTRUMENT - Save it mfspr r12,pmc2 ; INSTRUMENT - Get stamp stw r12,0x6100+(13*16)+0x4(0) ; INSTRUMENT - Save it mfspr r12,pmc3 ; INSTRUMENT - Get stamp stw r12,0x6100+(13*16)+0x8(0) ; INSTRUMENT - Save it mfspr r12,pmc4 ; INSTRUMENT - Get stamp stw r12,0x6100+(13*16)+0xC(0) ; INSTRUMENT - Save it #endif bl EXT(MapUserAddressSpace) // set r3 <- address in kernel map of user operand #if INSTRUMENT mfspr r12,pmc1 ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace stw r12,0x6100+(14*16)+0x0(0) ; INSTRUMENT - Save it mfspr r12,pmc2 ; INSTRUMENT - Get stamp stw r12,0x6100+(14*16)+0x4(0) ; INSTRUMENT - Save it mfspr r12,pmc3 ; INSTRUMENT - Get stamp stw r12,0x6100+(14*16)+0x8(0) ; INSTRUMENT - Save it mfspr r12,pmc4 ; INSTRUMENT - Get stamp stw r12,0x6100+(14*16)+0xC(0) ; INSTRUMENT - Save it #endif or. r0,r3,r4 // Did we fail the mapping? mr r31,r4 // r31 <- mapped ptr into user space (may be 64-bit) beq-- copyinout_error // was 0, so there was an error making the mapping bf-- kk64bit,copyin1 // skip if a 32-bit processor rldimi r31,r3,32,0 // slam high-order bits into mapped ptr mfmsr r4 // if 64-bit, turn on SF so we can use returned ptr li r0,1 rldimi r4,r0,63,MSR_SF_BIT // light bit 0 mtmsrd r4 // turn on 64-bit mode isync // wait for mode to change // Load r3-r5, substituting mapped ptr as appropriate. copyin1: lwz r5,kkBufSize(r1) // restore length to copy bf kkIn,copyin2 // skip if copyout lwz r4,kkDest(r1) // copyin: source is mapped, dest is r4 at entry mr r3,r31 // source is mapped ptr b copyin3 copyin2: // handle copyout lwz r3,kkSource(r1) // source is kernel buffer (r3 at entry) mr r4,r31 // dest is mapped ptr into user space // Finally, all set up to copy: // r3 = source ptr (mapped if copyin) // r4 = dest ptr (mapped if copyout) // r5 = length // r31 = mapped ptr returned by MapUserAddressSpace // cr3 = kkIn, kkString, kk64bit, and kkNull flags copyin3: bt kkString,copyString // handle copyinstr and copyoutstr bl EXT(bcopy) // copyin and copyout: let bcopy do the work li r3,0 // return success // Main exit point for copyin, copyout, copyinstr, and copyoutstr. Also reached // from error recovery if we get a DSI accessing user space. Clear recovery ptr, // and pop off frame. Note that we have kept // the mapped ptr into user space in r31, as a reg64_t type (ie, a 64-bit ptr on // 64-bit machines.) We must unpack r31 into an addr64_t in (r3,r4) before passing // it to ReleaseUserAddressSpace. // r3 = 0, EFAULT, or ENAMETOOLONG copyinx: lwz r2,kkCR(r1) // get callers cr3 mfsprg r6,1 // Get the current act lwz r10,ACT_THREAD(r6) bf-- kk64bit,copyinx1 // skip if 32-bit processor mfmsr r12 rldicl r12,r12,0,MSR_SF_BIT+1 // if 64-bit processor, turn 64-bit mode off mtmsrd r12 // turn SF off and EE back on isync // wait for the mode to change copyinx1: lwz r31,kkR31Save(r1) // restore callers r31 addi r1,r1,kkFrameSize // pop off our stack frame lwz r0,FM_LR_SAVE(r1) li r4,0 stw r4,THREAD_RECOVER(r10) // Clear recovery mtlr r0 mtcrf 0x10,r2 // restore cr3 blr /* We get here via the exception handler if an illegal * user memory reference was made. This error handler is used by * copyin, copyout, copyinstr, and copyoutstr. Registers are as * they were at point of fault, so for example cr3 flags are valid. */ copyinout_error: li r3,EFAULT // return error b copyinx copyinout_0: // degenerate case: 0-length copy mtcrf 0x10,r2 // restore cr3 li r3,0 // return success blr copyinout_too_big: // degenerate case mtcrf 0x10,r2 // restore cr3 lwz r1,0(r1) // pop off stack frame li r3,ENAMETOOLONG blr //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> // Handle copyinstr and copyoutstr. At this point the stack frame is set up, // the recovery ptr is set, the user's buffer is mapped, we're in 64-bit mode // if necessary, and: // r3 = source ptr, mapped if copyinstr // r4 = dest ptr, mapped if copyoutstr // r5 = buffer length // r31 = mapped ptr returned by MapUserAddressSpace // cr3 = kkIn, kkString, kkNull, and kk64bit flags // We do word copies unless the buffer is very short, then use a byte copy loop // for the leftovers if necessary. copyString: li r12,0 // Set header bytes count to zero cmplwi cr1,r5,20 // is buffer very short? mtctr r5 // assuming short, set up loop count for bytes blt cr1,copyinstr8 // too short for word loop andi. r12,r3,0x3 // is source ptr word aligned? bne copyinstr11 // bytes loop copyinstr1: srwi r6,r5,2 // get #words in buffer mtctr r6 // set up word loop count lis r10,hi16(0xFEFEFEFF) // load magic constants into r10 and r11 lis r11,hi16(0x80808080) ori r10,r10,lo16(0xFEFEFEFF) ori r11,r11,lo16(0x80808080) bf kkNull,copyinstr6 // enter loop that copies b copyinstr5 // use loop that just counts // Word loop(s). They do a word-parallel search for 0s, using the following // inobvious but very efficient test: // y = data + 0xFEFEFEFF // z = ~data & 0x80808080 // If (y & z)==0, then all bytes in dataword are nonzero. We need two copies of // this loop, since if we test kkNull in the loop then it becomes 9 words long. .align 5 // align inner loops for speed copyinstr5: // version that counts but does not copy lwz r8,0(r3) // get next word of source addi r3,r3,4 // increment source ptr add r9,r10,r8 // r9 = data + 0xFEFEFEFF andc r7,r11,r8 // r7 = ~data & 0x80808080 and. r7,r9,r7 // r7 = r9 & r7 bdnzt cr0_eq,copyinstr5 // if r7==0, then all bytes are nonzero b copyinstr7 .align 5 // align inner loops for speed copyinstr6: // version that counts and copies lwz r8,0(r3) // get next word of source addi r3,r3,4 // increment source ptr addi r4,r4,4 // increment dest ptr while we wait for data add r9,r10,r8 // r9 = data + 0xFEFEFEFF andc r7,r11,r8 // r7 = ~data & 0x80808080 and. r7,r9,r7 // r7 = r9 & r7 stw r8,-4(r4) // pack all 4 bytes into buffer bdnzt cr0_eq,copyinstr6 // if r7==0, then all bytes are nonzero // Either 0 found or buffer filled. The above algorithm has mapped nonzero bytes to 0 // and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also // mapped to 0x80. We must mask out these false hits before searching for an 0x80 byte. copyinstr7: crnot kkZero,cr0_eq // 0 found iff cr0_eq is off mfctr r6 // get #words remaining in buffer rlwinm r2,r8,7,0,31 // move 0x01 bits to 0x80 position slwi r6,r6,2 // convert to #bytes remaining andc r7,r7,r2 // turn off false hits from 0x0100 worst case rlwimi r6,r5,0,30,31 // add in odd bytes leftover in buffer srwi r7,r7,8 // we want to count the 0 as a byte xferred addi r6,r6,4 // don't count last word xferred (yet) cntlzw r7,r7 // now we can find the 0 byte (ie, the 0x80) srwi r7,r7,3 // convert 8,16,24,32 to 1,2,3,4 sub. r6,r6,r7 // account for nonzero bytes in last word bt++ kkZero,copyinstr10 // 0 found, so done beq copyinstr10 // r6==0, so buffer truly full mtctr r6 // 0 not found, loop over r6 bytes b copyinstr8 // enter byte loop for last 1-3 leftover bytes // Byte loop. This is used for very small buffers and for the odd bytes left over // after searching and copying words at a time. .align 5 // align inner loops for speed copyinstr8: // loop over bytes of source lbz r0,0(r3) // get next byte of source addi r3,r3,1 addi r4,r4,1 // increment dest addr whether we store or not cmpwi r0,0 // the 0? bt-- kkNull,copyinstr9 // don't store (was copyinstr with NULL ptr) stb r0,-1(r4) copyinstr9: bdnzf cr0_eq,copyinstr8 // loop if byte not 0 and more room in buffer mfctr r6 // get #bytes left in buffer crmove kkZero,cr0_eq // remember if 0 found or buffer filled // Buffer filled or 0 found. Unwind and return. // r5 = kkBufSize, ie buffer length // r6 = untransferred bytes remaining in buffer // r31 = mapped ptr returned by MapUserAddressSpace // cr3 = kkZero set iff 0 found copyinstr10: lwz r9,kkCountPtr(r1) // get ptr to place to store count of bytes moved sub r2,r5,r6 // get #bytes we moved, counting the 0 iff any add r2,r2,r12 // add the header bytes count li r3,0 // assume 0 return status stw r2,0(r9) // store #bytes moved bt++ kkZero,copyinx // we did find the 0 so return 0 li r3,ENAMETOOLONG // buffer filled b copyinx // join main exit routine // Byte loop. This is used on the header bytes for unaligned source .align 5 // align inner loops for speed copyinstr11: li r10,4 // load word size sub r12,r10,r12 // set the header bytes count mtctr r12 // set up bytes loop count copyinstr12: // loop over bytes of source lbz r0,0(r3) // get next byte of source addi r3,r3,1 addi r4,r4,1 // increment dest addr whether we store or not cmpwi r0,0 // the 0? bt-- kkNull,copyinstr13 // don't store (was copyinstr with NULL ptr) stb r0,-1(r4) copyinstr13: bdnzf cr0_eq,copyinstr12 // loop if byte not 0 and more room in buffer sub r5,r5,r12 // substract the bytes copied bne cr0_eq,copyinstr1 // branch to word loop mr r5,r12 // Get the header bytes count li r12,0 // Clear the header bytes count mfctr r6 // get #bytes left in buffer crmove kkZero,cr0_eq // remember if 0 found or buffer filled b copyinstr10