/*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
- * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License"). You may not use this file except in compliance with the
+ * License. Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
*
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
#include <assym.s>
#include <sys/errno.h>
-#define INSTRUMENT 0
-
-//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
/*
* void pmap_zero_page(vm_offset_t pa)
*
- * Zero a page of physical memory. This routine runs in 32 or 64-bit mode,
- * and handles 32 and 128-byte cache lines.
+ * zero a page of physical memory.
*/
+#if DEBUG
+ /* C debug stub in pmap.c calls this */
+ENTRY(pmap_zero_page_assembler, TAG_NO_FRAME_USED)
+#else
+ENTRY(pmap_zero_page, TAG_NO_FRAME_USED)
+#endif /* DEBUG */
+
+ mfmsr r6 /* Get the MSR */
+ rlwinm r6,r6,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off
+ rlwinm r6,r6,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off
+ rlwinm r7, r6, 0, MSR_DR_BIT+1, MSR_DR_BIT-1 /* Turn off DR */
+ rlwinm r7,r7,0,MSR_EE_BIT+1,MSR_EE_BIT-1 ; Disable interruptions
+ li r4,PPC_PGBYTES-CACHE_LINE_SIZE /* Point to the end of the page */
+ mtmsr r7 /* Set MSR to DR off */
+ isync /* Ensure data translations are off */
+
+
+.L_phys_zero_loop:
+ subic. r5,r4,CACHE_LINE_SIZE /* Point to the next one */
+ dcbz r4, r3 /* Clear the whole thing to 0s */
+ subi r4,r5,CACHE_LINE_SIZE /* Point to the next one */
+ dcbz r5, r3 /* Clear the next to zeros */
+ bgt+ .L_phys_zero_loop /* Keep going until we do the page... */
+
+ sync /* Make sure they're all done */
+ li r4,PPC_PGBYTES-CACHE_LINE_SIZE /* Point to the end of the page */
+
+.L_inst_inval_loop:
+ subic. r5,r4,CACHE_LINE_SIZE /* Point to the next one */
+ icbi r4, r3 /* Clear the whole thing to 0s */
+ subi r4,r5,CACHE_LINE_SIZE /* Point to the next one */
+ icbi r5, r3 /* Clear the next to zeros */
+ bgt+ .L_inst_inval_loop /* Keep going until we do the page... */
+
+ sync /* Make sure they're all done */
+
+ mtmsr r6 /* Restore original translations */
+ isync /* Ensure data translations are on */
- .align 5
- .globl EXT(pmap_zero_page)
-
-LEXT(pmap_zero_page)
-
- mflr r12 // save return address
- bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10
- mtlr r12 // restore return address
- andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size
-
- subfic r4,r9,PPC_PGBYTES // r4 <- starting offset in page
-
- bt++ pf64Bitb,page0S4 // Go do the big guys...
-
- slwi r3,r3,12 // get page address from page num
- b page_zero_1 // Jump to line aligned loop...
-
- .align 5
-
- nop
- nop
- nop
- nop
- nop
- nop
- nop
-
-page0S4:
- sldi r3,r3,12 // get page address from page num
-
-page_zero_1: // loop zeroing cache lines
- sub. r5,r4,r9 // more to go?
- dcbz128 r3,r4 // zero either 32 or 128 bytes
- sub r4,r5,r9 // generate next offset
- dcbz128 r3,r5
- bne-- page_zero_1
-
- b EXT(ml_restore) // restore MSR and do the isync
-
+ blr
-//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
/* void
* phys_copy(src, dst, bytecount)
- * addr64_t src;
- * addr64_t dst;
+ * vm_offset_t src;
+ * vm_offset_t dst;
* int bytecount
*
* This routine will copy bytecount bytes from physical address src to physical
- * address dst. It runs in 64-bit mode if necessary, but does not handle
- * overlap or make any attempt to be optimal. Length must be a signed word.
- * Not performance critical.
+ * address dst.
*/
+ENTRY(phys_copy, TAG_NO_FRAME_USED)
+
+ /* Switch off data translations */
+ mfmsr r6
+ rlwinm r6,r6,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off
+ rlwinm r6,r6,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off
+ rlwinm r7, r6, 0, MSR_DR_BIT+1, MSR_DR_BIT-1
+ rlwinm r7, r7, 0, MSR_EE_BIT+1, MSR_EE_BIT-1
+ mtmsr r7
+ isync /* Ensure data translations are off */
+
+ subi r3, r3, 4
+ subi r4, r4, 4
+
+ cmpwi r5, 3
+ ble- .L_phys_copy_bytes
+.L_phys_copy_loop:
+ lwz r0, 4(r3)
+ addi r3, r3, 4
+ subi r5, r5, 4
+ stw r0, 4(r4)
+ addi r4, r4, 4
+ cmpwi r5, 3
+ bgt+ .L_phys_copy_loop
+
+ /* If no leftover bytes, we're done now */
+ cmpwi r5, 0
+ beq+ .L_phys_copy_done
+
+.L_phys_copy_bytes:
+ addi r3, r3, 3
+ addi r4, r4, 3
+.L_phys_copy_byte_loop:
+ lbz r0, 1(r3)
+ addi r3, r3, 1
+ subi r5, r5, 1
+ stb r0, 1(r4)
+ addi r4, r4, 1
+ cmpwi r5, 0
+ bne+ .L_phys_copy_byte_loop
+
+.L_phys_copy_done:
+ mtmsr r6 /* Restore original translations */
+ isync /* Ensure data translations are off */
+
+ blr
- .align 5
- .globl EXT(phys_copy)
-
-LEXT(phys_copy)
-
- rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg
- mflr r12 // get return address
- rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits
- rlwinm r4,r5,0,1,0 ; Duplicate high half of long long paddr into top of reg
- bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10
- rlwimi r4,r6,0,0,31 ; Combine bottom of long long to full 64-bits
- mtlr r12 // restore return address
- subic. r5,r7,4 // a word to copy?
- b phys_copy_2
-
- .align 5
-
-phys_copy_1: // loop copying words
- subic. r5,r5,4 // more to go?
- lwz r0,0(r3)
- addi r3,r3,4
- stw r0,0(r4)
- addi r4,r4,4
-phys_copy_2:
- bge phys_copy_1
- addic. r5,r5,4 // restore count
- ble phys_copy_4 // no more
-
- // Loop is aligned here
-
-phys_copy_3: // loop copying bytes
- subic. r5,r5,1 // more to go?
- lbz r0,0(r3)
- addi r3,r3,1
- stb r0,0(r4)
- addi r4,r4,1
- bgt phys_copy_3
-phys_copy_4:
- b EXT(ml_restore) // restore MSR and do the isync
-
-
-//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
/* void
* pmap_copy_page(src, dst)
- * ppnum_t src;
- * ppnum_t dst;
+ * vm_offset_t src;
+ * vm_offset_t dst;
*
* This routine will copy the physical page src to physical page dst
*
- * This routine assumes that the src and dst are page numbers and that the
- * destination is cached. It runs on 32 and 64 bit processors, with and
- * without altivec, and with 32 and 128 byte cache lines.
- * We also must assume that no-one will be executing within the destination
- * page, and that this will be used for paging. Because this
- * is a common routine, we have tuned loops for each processor class.
+ * This routine assumes that the src and dst are page aligned and that the
+ * destination is cached.
+ *
+ * We also must assume that noone will be executing within the destination
+ * page. We also assume that this will be used for paging
*
*/
-#define kSFSize (FM_SIZE+160)
+#if DEBUG
+ /* if debug, we have a little piece of C around this
+ * in pmap.c that gives some trace ability
+ */
+ENTRY(pmap_copy_page_assembler, TAG_NO_FRAME_USED)
+#else
ENTRY(pmap_copy_page, TAG_NO_FRAME_USED)
-
- lis r2,hi16(MASK(MSR_VEC)) ; Get the vector flag
- mflr r0 // get return
- ori r2,r2,lo16(MASK(MSR_FP)) ; Add the FP flag
- stw r0,8(r1) // save
- stwu r1,-kSFSize(r1) // set up a stack frame for VRs or FPRs
- mfmsr r11 // save MSR at entry
- mfsprg r10,2 // get feature flags
- andc r11,r11,r2 // Clear out vec and fp
- ori r2,r2,lo16(MASK(MSR_EE)) // Get EE on also
- andc r2,r11,r2 // Clear out EE as well
- mtcrf 0x02,r10 // we need to test pf64Bit
- ori r2,r2,MASK(MSR_FP) // must enable FP for G3...
- mtcrf 0x80,r10 // we need to test pfAltivec too
- oris r2,r2,hi16(MASK(MSR_VEC)) // enable altivec for G4 (ignored if G3)
- mtmsr r2 // turn EE off, FP and VEC on
- isync
- bt++ pf64Bitb,pmap_copy_64 // skip if 64-bit processor (only they take hint)
- slwi r3,r3,12 // get page address from page num
- slwi r4,r4,12 // get page address from page num
- rlwinm r12,r2,0,MSR_DR_BIT+1,MSR_DR_BIT-1 // get ready to turn off DR
- bt pfAltivecb,pmap_copy_g4 // altivec but not 64-bit means G4
-
-
- // G3 -- copy using FPRs
-
- stfd f0,FM_SIZE+0(r1) // save the 4 FPRs we use to copy
- stfd f1,FM_SIZE+8(r1)
- li r5,PPC_PGBYTES/32 // count of cache lines in a page
- stfd f2,FM_SIZE+16(r1)
- mtctr r5
- stfd f3,FM_SIZE+24(r1)
- mtmsr r12 // turn off DR after saving FPRs on stack
- isync
-
-pmap_g3_copy_loop: // loop over 32-byte cache lines
- dcbz 0,r4 // avoid read of dest line
- lfd f0,0(r3)
- lfd f1,8(r3)
- lfd f2,16(r3)
- lfd f3,24(r3)
- addi r3,r3,32
- stfd f0,0(r4)
- stfd f1,8(r4)
- stfd f2,16(r4)
- stfd f3,24(r4)
- dcbst 0,r4 // flush dest line to RAM
- addi r4,r4,32
- bdnz pmap_g3_copy_loop
-
- sync // wait for stores to take
- subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
- li r6,PPC_PGBYTES-32 // point to last line in page
-pmap_g3_icache_flush:
- subic. r5,r6,32 // more to go?
- icbi r4,r6 // flush another line in icache
- subi r6,r5,32 // get offset to next line
- icbi r4,r5
- bne pmap_g3_icache_flush
-
- sync
- mtmsr r2 // turn DR back on
- isync
- lfd f0,FM_SIZE+0(r1) // restore the FPRs
- lfd f1,FM_SIZE+8(r1)
- lfd f2,FM_SIZE+16(r1)
- lfd f3,FM_SIZE+24(r1)
-
- b pmap_g4_restore // restore MSR and done
-
-
- // G4 -- copy using VRs
-
-pmap_copy_g4: // r2=(MSR-EE), r12=(r2-DR), r10=features, r11=old MSR
- la r9,FM_SIZE+16(r1) // place where we save VRs to r9
- li r5,16 // load x-form offsets into r5-r9
- li r6,32 // another offset
- stvx v0,0,r9 // save some VRs so we can use to copy
- li r7,48 // another offset
- stvx v1,r5,r9
- li r0,PPC_PGBYTES/64 // we loop over 64-byte chunks
- stvx v2,r6,r9
- mtctr r0
- li r8,96 // get look-ahead for touch
- stvx v3,r7,r9
- li r9,128
- mtmsr r12 // now we've saved VRs on stack, turn off DR
- isync // wait for it to happen
- b pmap_g4_copy_loop
-
- .align 5 // align inner loops
-pmap_g4_copy_loop: // loop over 64-byte chunks
- dcbt r3,r8 // touch 3 lines ahead
- nop // avoid a 17-word loop...
- dcbt r3,r9 // touch 4 lines ahead
- nop // more padding
- dcba 0,r4 // avoid pre-fetch of 1st dest line
- lvx v0,0,r3 // offset 0
- lvx v1,r5,r3 // offset 16
- lvx v2,r6,r3 // offset 32
- lvx v3,r7,r3 // offset 48
- addi r3,r3,64
- dcba r6,r4 // avoid pre-fetch of 2nd line
- stvx v0,0,r4 // offset 0
- stvx v1,r5,r4 // offset 16
- stvx v2,r6,r4 // offset 32
- stvx v3,r7,r4 // offset 48
- dcbf 0,r4 // push line 1
- dcbf r6,r4 // and line 2
- addi r4,r4,64
- bdnz pmap_g4_copy_loop
-
- sync // wait for stores to take
- subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
- li r8,PPC_PGBYTES-32 // point to last line in page
-pmap_g4_icache_flush:
- subic. r9,r8,32 // more to go?
- icbi r4,r8 // flush from icache
- subi r8,r9,32 // get offset to next line
- icbi r4,r9
- bne pmap_g4_icache_flush
-
- sync
- mtmsr r2 // turn DR back on
- isync
- la r9,FM_SIZE+16(r1) // get base of VR save area
- lvx v0,0,r9 // restore the VRs
- lvx v1,r5,r9
- lvx v2,r6,r9
- lvx v3,r7,r9
-
-pmap_g4_restore: // r11=MSR
- mtmsr r11 // turn EE on, VEC and FR off
- isync // wait for it to happen
- addi r1,r1,kSFSize // pop off our stack frame
- lwz r0,8(r1) // restore return address
- mtlr r0
- blr
-
-
- // 64-bit/128-byte processor: copy using VRs
-
-pmap_copy_64: // r10=features, r11=old MSR
- sldi r3,r3,12 // get page address from page num
- sldi r4,r4,12 // get page address from page num
- la r9,FM_SIZE+16(r1) // get base of VR save area
- li r5,16 // load x-form offsets into r5-r9
- li r6,32 // another offset
- bf pfAltivecb,pmap_novmx_copy // altivec suppressed...
- stvx v0,0,r9 // save 8 VRs so we can copy wo bubbles
- stvx v1,r5,r9
- li r7,48 // another offset
- li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks
- stvx v2,r6,r9
- stvx v3,r7,r9
- addi r9,r9,64 // advance base ptr so we can store another 4
- mtctr r0
- li r0,MASK(MSR_DR) // get DR bit
- stvx v4,0,r9
- stvx v5,r5,r9
- andc r12,r2,r0 // turn off DR bit
- li r0,1 // get a 1 to slam into SF
- stvx v6,r6,r9
- stvx v7,r7,r9
- rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0)
- li r8,-128 // offset so we can reach back one line
- mtmsrd r12 // now we've saved VRs, turn DR off and SF on
- isync // wait for it to happen
- dcbt128 0,r3,1 // start a forward stream
- b pmap_64_copy_loop
-
- .align 5 // align inner loops
-pmap_64_copy_loop: // loop over 128-byte chunks
- dcbz128 0,r4 // avoid read of destination line
- lvx v0,0,r3 // offset 0
- lvx v1,r5,r3 // offset 16
- lvx v2,r6,r3 // offset 32
- lvx v3,r7,r3 // offset 48
- addi r3,r3,64 // don't have enough GPRs so add 64 2x
- lvx v4,0,r3 // offset 64
- lvx v5,r5,r3 // offset 80
- lvx v6,r6,r3 // offset 96
- lvx v7,r7,r3 // offset 112
- addi r3,r3,64
- stvx v0,0,r4 // offset 0
- stvx v1,r5,r4 // offset 16
- stvx v2,r6,r4 // offset 32
- stvx v3,r7,r4 // offset 48
- addi r4,r4,64
- stvx v4,0,r4 // offset 64
- stvx v5,r5,r4 // offset 80
- stvx v6,r6,r4 // offset 96
- stvx v7,r7,r4 // offset 112
- addi r4,r4,64
- dcbf r8,r4 // flush the line we just wrote
- bdnz pmap_64_copy_loop
-
- sync // wait for stores to take
- subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
- li r8,PPC_PGBYTES-128 // point to last line in page
-pmap_64_icache_flush:
- subic. r9,r8,128 // more to go?
- icbi r4,r8 // flush from icache
- subi r8,r9,128 // get offset to next line
- icbi r4,r9
- bne pmap_64_icache_flush
-
- sync
- mtmsrd r2 // turn DR back on, SF off
- isync
- la r9,FM_SIZE+16(r1) // get base address of VR save area on stack
- lvx v0,0,r9 // restore the VRs
- lvx v1,r5,r9
- lvx v2,r6,r9
- lvx v3,r7,r9
- addi r9,r9,64
- lvx v4,0,r9
- lvx v5,r5,r9
- lvx v6,r6,r9
- lvx v7,r7,r9
-
- b pmap_g4_restore // restore lower half of MSR and return
-
- //
- // Copy on 64-bit without VMX
- //
-
-pmap_novmx_copy:
- li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks
- mtctr r0
- li r0,MASK(MSR_DR) // get DR bit
- andc r12,r2,r0 // turn off DR bit
- li r0,1 // get a 1 to slam into SF
- rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0)
- mtmsrd r12 // now we've saved VRs, turn DR off and SF on
- isync // wait for it to happen
- dcbt128 0,r3,1 // start a forward stream
-
-pmap_novmx_copy_loop: // loop over 128-byte cache lines
- dcbz128 0,r4 // avoid read of dest line
-
- ld r0,0(r3) // Load half a line
- ld r12,8(r3)
- ld r5,16(r3)
- ld r6,24(r3)
- ld r7,32(r3)
- ld r8,40(r3)
- ld r9,48(r3)
- ld r10,56(r3)
-
- std r0,0(r4) // Store half a line
- std r12,8(r4)
- std r5,16(r4)
- std r6,24(r4)
- std r7,32(r4)
- std r8,40(r4)
- std r9,48(r4)
- std r10,56(r4)
-
- ld r0,64(r3) // Load half a line
- ld r12,72(r3)
- ld r5,80(r3)
- ld r6,88(r3)
- ld r7,96(r3)
- ld r8,104(r3)
- ld r9,112(r3)
- ld r10,120(r3)
-
- addi r3,r3,128
-
- std r0,64(r4) // Store half a line
- std r12,72(r4)
- std r5,80(r4)
- std r6,88(r4)
- std r7,96(r4)
- std r8,104(r4)
- std r9,112(r4)
- std r10,120(r4)
-
- dcbf 0,r4 // flush the line we just wrote
- addi r4,r4,128
- bdnz pmap_novmx_copy_loop
-
- sync // wait for stores to take
- subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
- li r8,PPC_PGBYTES-128 // point to last line in page
-
-pmap_novmx_icache_flush:
- subic. r9,r8,128 // more to go?
- icbi r4,r8 // flush from icache
- subi r8,r9,128 // get offset to next line
- icbi r4,r9
- bne pmap_novmx_icache_flush
-
- sync
- mtmsrd r2 // turn DR back on, SF off
- isync
-
- b pmap_g4_restore // restore lower half of MSR and return
-
-
-
-//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
+#endif /* DEBUG */
+
+#if 0
+ mfpvr r9 ; Get the PVR
+ rlwinm r9,r9,16,16,31 ; Isolate the PPC processor
+ cmplwi r9,PROCESSOR_VERSION_Max ; Do we have Altivec?
+ beq+ wegotaltivec ; Yeah...
+#endif
-// Stack frame format used by copyin, copyout, copyinstr and copyoutstr.
-// These routines all run both on 32 and 64-bit machines, though because they are called
-// by the BSD kernel they are always in 32-bit mode when entered. The mapped ptr returned
-// by MapUserAddressSpace will be 64 bits however on 64-bit machines. Beware to avoid
-// using compare instructions on this ptr. This mapped ptr is kept globally in r31, so there
-// is no need to store or load it, which are mode-dependent operations since it could be
-// 32 or 64 bits.
-
-#define kkFrameSize (FM_SIZE+32)
-
-#define kkBufSize (FM_SIZE+0)
-#define kkCR (FM_SIZE+4)
-#define kkSource (FM_SIZE+8)
-#define kkDest (FM_SIZE+12)
-#define kkCountPtr (FM_SIZE+16)
-#define kkR31Save (FM_SIZE+20)
-
-
-// nonvolatile CR bits we use as flags in cr3
-
-#define kk64bit 12
-#define kkNull 13
-#define kkIn 14
-#define kkString 15
-#define kkZero 15
-
-
-//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
+ mfmsr r9 ; Get the MSR
+ rlwinm r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off
+ rlwinm r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off
+ stwu r1,-(FM_SIZE+32)(r1) ; Make a frame for us
+ rlwinm r7,r9,0,MSR_EE_BIT+1,MSR_EE_BIT-1 ; Disable interruptions
+ ori r7,r7,lo16(MASK(MSR_FP)) ; Turn on the FPU
+ mtmsr r7 ; Disable rupts and enable FPU
+ isync
+
+ stfd f0,FM_SIZE+0(r1) ; Save an FP register
+ rlwinm r7,r7,0,MSR_DR_BIT+1,MSR_DR_BIT-1 ; Clear the DDAT bit
+ stfd f1,FM_SIZE+8(r1) ; Save an FP register
+ addi r6,r3,PPC_PGBYTES ; Point to the start of the next page
+ stfd f2,FM_SIZE+16(r1) ; Save an FP register
+ mr r8,r4 ; Save the destination
+ stfd f3,FM_SIZE+24(r1) ; Save an FP register
+
+ mtmsr r7 ; Set the new MSR
+ isync ; Ensure data translations are off
+
+ dcbt br0, r3 /* Start in first input line */
+ li r5, CACHE_LINE_SIZE /* Get the line size */
+
+.L_pmap_copy_page_loop:
+ dcbz 0, r4 /* Allocate a line for the output */
+ lfd f0, 0(r3) /* Get first 8 */
+ lfd f1, 8(r3) /* Get second 8 */
+ lfd f2, 16(r3) /* Get third 8 */
+ stfd f0, 0(r4) /* Put first 8 */
+ dcbt r5, r3 /* Start next line coming in */
+ lfd f3, 24(r3) /* Get fourth 8 */
+ stfd f1, 8(r4) /* Put second 8 */
+ addi r3,r3,CACHE_LINE_SIZE /* Point to the next line in */
+ stfd f2, 16(r4) /* Put third 8 */
+ cmplw cr0,r3,r6 /* See if we're finished yet */
+ stfd f3, 24(r4) /* Put fourth 8 */
+ dcbst br0,r4 /* Force it out */
+ addi r4,r4,CACHE_LINE_SIZE /* Point to the next line out */
+ blt+ .L_pmap_copy_page_loop /* Copy the whole page */
+
+ sync /* Make sure they're all done */
+ li r4,PPC_PGBYTES-CACHE_LINE_SIZE /* Point to the end of the page */
+
+invalinst:
+ subic. r5,r4,CACHE_LINE_SIZE /* Point to the next one */
+ icbi r4, r8 /* Trash the i-cache */
+ subi r4,r5,CACHE_LINE_SIZE /* Point to the next one */
+ icbi r5, r8 /* Trash the i-cache */
+ bgt+ invalinst /* Keep going until we do the page... */
+
+ rlwimi r7,r9,0,MSR_DR_BIT,MSR_DR_BIT ; Set DDAT if on
+ sync ; Make sure all invalidates done
+
+ mtmsr r7 ; Set DDAT correctly
+ isync
+
+ lfd f0,FM_SIZE+0(r1) ; Restore an FP register
+ lfd f1,FM_SIZE+8(r1) ; Restore an FP register
+ lfd f2,FM_SIZE+16(r1) ; Restore an FP register
+ lfd f3,FM_SIZE+24(r1) ; Restore an FP register
+
+ lwz r1,0(r1) ; Pop up the stack
+
+ mtmsr r9 ; Turn off FPU now and maybe rupts back on
+ isync
+ blr
+
+#if 0
+;
+; This is not very optimal. We just do it here for a test of
+; Altivec in the kernel.
+;
+wegotaltivec:
+ mfmsr r9 ; Get the MSR
+ lis r8,hi16(0xC0000000) ; Make sure we keep the first 2 vector registers
+ rlwinm r7,r9,0,MSR_EE_BIT+1,MSR_EE_BIT-1 ; Disable interruptions
+ lis r6,lo16(2*256+128) ; Specify 128 blocks of 2 vectors each
+ rlwinm r7,r7,0,MSR_DR_BIT+1,MSR_DR_BIT-1 ; Clear the DDAT bit
+ ori r6,r6,32 ; Set a 32-byte stride
+ mtsprg 256,r8 ; Set VRSave
+ mtmsr r7 ; Disable rupts and turn xlate off
+ isync
+
+ addi r11,r3,4096 ; Point to the next page
+ li r10,16 ; Get vector size
+
+avmovepg: lvxl v0,br0,r3 ; Get first half of line
+ dcba br0,r4 ; Allocate output
+ lvxl v1,r10,r3 ; Get second half of line
+ stvxl v0,br0,r4 ; Save first half of line
+ addi r3,r3,32 ; Point to the next line
+ icbi br0,r4 ; Make the icache go away also
+ stvxl v1,r10,r4 ; Save second half of line
+ cmplw r3,r11 ; Have we reached the next page?
+ dcbst br0,r4 ; Make sure the line is on its way out
+ addi r4,r4,32 ; Point to the next line
+ blt+ avmovepg ; Move the next line...
+
+ li r8,0 ; Clear this
+ sync ; Make sure all the memory stuff is done
+ mtsprg 256,r8 ; Show we are not using VRs any more
+ mtmsr r9 ; Translation and interruptions back on
+ isync
+ blr
+#endif
+
+
+
+
/*
* int
- * copyoutstr(src, dst, maxcount, count)
+ * copyin(src, dst, count)
* vm_offset_t src;
* vm_offset_t dst;
- * vm_size_t maxcount;
- * vm_size_t* count;
+ * int count;
*
- * Set *count to the number of bytes copied.
*/
-ENTRY(copyoutstr, TAG_NO_FRAME_USED)
- mfcr r2 // we use nonvolatile cr3
- li r0,0
- crset kkString // flag as a string op
- mr r10,r4 // for copyout, dest ptr (r4) is in user space
- stw r0,0(r6) // initialize #bytes moved
- crclr kkIn // flag as copyout
- b copyJoin
+ENTRY2(copyin, copyinmsg, TAG_NO_FRAME_USED)
+
+/* Preamble allowing us to call a sub-function */
+ mflr r0
+ stw r0,FM_LR_SAVE(r1)
+ stwu r1,-(FM_SIZE+16)(r1)
+
+ cmpli cr0,r5,0
+ ble- cr0,.L_copyinout_trivial
+
+/* we know we have a valid copyin to do now */
+/* Set up thread_recover in case we hit an illegal address */
+
+ mfsprg r8,1 /* Get the current act */
+ lwz r10,ACT_THREAD(r8)
+ lis r11,hi16(.L_copyinout_error)
+ lwz r8,ACT_VMMAP(r8)
+ ori r11,r11,lo16(.L_copyinout_error)
+ add r9,r3,r5 /* Get the end of the source */
+ lwz r8,VMMAP_PMAP(r8) ; Get the pmap
+ rlwinm r12,r3,6,26,29 ; Get index to the segment slot
+ subi r9,r9,1 /* Make sure we don't go too far */
+ add r8,r8,r12 ; Start indexing to the segment value
+ stw r11,THREAD_RECOVER(r10)
+ xor r9,r9,r3 /* Smoosh 'em together */
+ lwz r8,PMAP_SEGS(r8) ; Get the source SR value
+ rlwinm. r9,r9,0,1,3 /* Top nybble equal? */
+ mtsr SR_COPYIN,r8 ; Set the SR
+ isync
+#if 0
+ lis r0,HIGH_ADDR(EXT(dbgRegsCall)) /* (TEST/DEBUG) */
+ ori r0,r0,LOW_ADDR(EXT(dbgRegsCall)) /* (TEST/DEBUG) */
+ sc /* (TEST/DEBUG) */
+#endif
+
+/* For optimization, we check if the copyin lies on a segment
+ * boundary. If it doesn't, we can use a simple copy. If it
+ * does, we split it into two separate copies in some C code.
+ */
+
+ bne- .L_call_copyin_multiple /* Nope, we went past the segment boundary... */
+
+ rlwinm r3,r3,0,4,31
+ oris r3,r3,(SR_COPYIN_NUM << (28-16)) /* Set the copyin segment as the source */
+
+ bl EXT(bcopy)
+
+/* Now that copyin is done, we don't need a recovery point */
+ addi r1,r1,FM_SIZE+16
+ mfsprg r6,1 /* Get the current act */
+ lwz r10,ACT_THREAD(r6)
+ li r3,0
+ lwz r0,FM_LR_SAVE(r1)
+ stw r3,THREAD_RECOVER(r10) /* Clear recovery */
+ mtlr r0
+ blr
+
+/* we get here via the exception handler if an illegal
+ * user memory reference was made.
+ */
+.L_copyinout_error:
+
+/* Now that copyin is done, we don't need a recovery point */
+
+ mfsprg r6,1 /* Get the current act */
+ addi r1,r1,FM_SIZE+16
+ lwz r10,ACT_THREAD(r6)
+ li r4,0
+ lwz r0,FM_LR_SAVE(r1)
+ stw r4,THREAD_RECOVER(r10) /* Clear recovery */
+ mtlr r0
+ li r3,EFAULT ; Indicate error (EFAULT)
+ blr
+
+.L_copyinout_trivial:
+ /* The copyin/out was for either 0 bytes or a negative
+ * number of bytes, return an appropriate value (0 == SUCCESS).
+ * cr0 still contains result of comparison of len with 0.
+ */
+ li r3, 0
+ beq+ cr0, .L_copyinout_negative
+ li r3, 1
+.L_copyinout_negative:
+
+ /* unwind the stack */
+ addi r1, r1, FM_SIZE+16
+ lwz r0, FM_LR_SAVE(r1)
+ mtlr r0
+
+ blr
+
+.L_call_copyin_multiple:
+
+ /* unwind the stack */
+ addi r1, r1, FM_SIZE+16
+ lwz r0, FM_LR_SAVE(r1)
+ mtlr r0
+
+ b EXT(copyin_multiple) /* not a call - a jump! */
-//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
/*
* int
- * copyinstr(src, dst, maxcount, count)
+ * copyout(src, dst, count)
+ * vm_offset_t src;
+ * vm_offset_t dst;
+ * int count;
+ *
+ */
+
+ENTRY2(copyout, copyoutmsg, TAG_NO_FRAME_USED)
+
+/* Preamble allowing us to call a sub-function */
+
+ mflr r0
+ stw r0,FM_LR_SAVE(r1)
+ stwu r1,-(FM_SIZE+16)(r1)
+
+#if 0
+ stw r3,FM_SIZE+0(r1) /* (TEST/DEBUG) */
+ stw r4,FM_SIZE+4(r1) /* (TEST/DEBUG) */
+ stw r5,FM_SIZE+8(r1) /* (TEST/DEBUG) */
+ mr r6,r0 /* (TEST/DEBUG) */
+
+ bl EXT(tracecopyout) /* (TEST/DEBUG) */
+
+ lwz r3,FM_SIZE+0(r1) /* (TEST/DEBUG) */
+ lwz r4,FM_SIZE+4(r1) /* (TEST/DEBUG) */
+ lwz r5,FM_SIZE+8(r1) /* (TEST/DEBUG) */
+#endif
+
+ cmpli cr0,r5,0
+ ble- cr0,.L_copyinout_trivial
+/* we know we have a valid copyout to do now */
+/* Set up thread_recover in case we hit an illegal address */
+
+
+ mfsprg r8,1 /* Get the current act */
+ lwz r10,ACT_THREAD(r8)
+ lis r11,HIGH_ADDR(.L_copyinout_error)
+ lwz r8,ACT_VMMAP(r8)
+ rlwinm r12,r4,6,26,29 ; Get index to the segment slot
+ ori r11,r11,LOW_ADDR(.L_copyinout_error)
+ add r9,r4,r5 /* Get the end of the destination */
+ lwz r8,VMMAP_PMAP(r8)
+ subi r9,r9,1 /* Make sure we don't go too far */
+ add r8,r8,r12 ; Start indexing to the segment value
+ stw r11,THREAD_RECOVER(r10)
+ xor r9,r9,r4 /* Smoosh 'em together */
+ lwz r8,PMAP_SEGS(r8) ; Get the source SR value
+ rlwinm. r9,r9,0,1,3 /* Top nybble equal? */
+ mtsr SR_COPYIN,r8
+ isync
+
+
+/* For optimisation, we check if the copyout lies on a segment
+ * boundary. If it doesn't, we can use a simple copy. If it
+ * does, we split it into two separate copies in some C code.
+ */
+
+ bne- .L_call_copyout_multiple /* Nope, we went past the segment boundary... */
+
+ rlwinm r4,r4,0,4,31
+ oris r4,r4,(SR_COPYIN_NUM << (28-16)) /* Set the copyin segment as the source */
+
+ bl EXT(bcopy)
+
+/* Now that copyout is done, we don't need a recovery point */
+ mfsprg r6,1 /* Get the current act */
+ addi r1,r1,FM_SIZE+16
+ lwz r10,ACT_THREAD(r6)
+ li r3,0
+ lwz r0,FM_LR_SAVE(r1)
+ stw r3,THREAD_RECOVER(r10) /* Clear recovery */
+ mtlr r0
+ blr
+
+.L_call_copyout_multiple:
+ /* unwind the stack */
+ addi r1, r1, FM_SIZE+16
+ lwz r0, FM_LR_SAVE(r1)
+ mtlr r0
+
+ b EXT(copyout_multiple) /* not a call - a jump! */
+
+/*
+ * boolean_t
+ * copyinstr(src, dst, count, maxcount)
* vm_offset_t src;
* vm_offset_t dst;
* vm_size_t maxcount;
* vm_size_t* count;
*
* Set *count to the number of bytes copied
+ *
* If dst == NULL, don't copy, just count bytes.
* Only currently called from klcopyinstr.
*/
ENTRY(copyinstr, TAG_NO_FRAME_USED)
- mfcr r2 // we use nonvolatile cr3
- cmplwi r4,0 // dst==NULL?
- li r0,0
- crset kkString // flag as a string op
- mr r10,r3 // for copyin, source ptr (r3) is in user space
- crmove kkNull,cr0_eq // remember if (dst==NULL)
- stw r0,0(r6) // initialize #bytes moved
- crset kkIn // flag as copyin (rather than copyout)
- b copyJoin1 // skip over the "crclr kkNull"
-
-
-//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
-/*
- * int
- * copyout(src, dst, count)
- * vm_offset_t src;
- * vm_offset_t dst;
- * size_t count;
- */
- .align 5
- .globl EXT(copyout)
- .globl EXT(copyoutmsg)
-
-LEXT(copyout)
-LEXT(copyoutmsg)
-
-#if INSTRUMENT
- mfspr r12,pmc1 ; INSTRUMENT - saveinstr[12] - Take stamp at copyout
- stw r12,0x6100+(12*16)+0x0(0) ; INSTRUMENT - Save it
- mfspr r12,pmc2 ; INSTRUMENT - Get stamp
- stw r12,0x6100+(12*16)+0x4(0) ; INSTRUMENT - Save it
- mfspr r12,pmc3 ; INSTRUMENT - Get stamp
- stw r12,0x6100+(12*16)+0x8(0) ; INSTRUMENT - Save it
- mfspr r12,pmc4 ; INSTRUMENT - Get stamp
- stw r12,0x6100+(12*16)+0xC(0) ; INSTRUMENT - Save it
-#endif
- mfcr r2 // save caller's CR
- crclr kkString // not a string version
- mr r10,r4 // dest (r4) is user-space ptr
- crclr kkIn // flag as copyout
- b copyJoin
-
-
-//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
-/*
- * int
- * copyin(src, dst, count)
- * vm_offset_t src;
- * vm_offset_t dst;
- * size_t count;
+/* Preamble allowing us to call a sub-function */
+ mflr r0
+ stw r0,FM_LR_SAVE(r1)
+ stwu r1,-(FM_SIZE+16)(r1)
+
+#if 0
+ stw r3,FM_SIZE+0(r1) /* (TEST/DEBUG) */
+ stw r4,FM_SIZE+4(r1) /* (TEST/DEBUG) */
+ stw r5,FM_SIZE+8(r1) /* (TEST/DEBUG) */
+ stw r6,FM_SIZE+12(r1) /* (TEST/DEBUG) */
+ mr r7,r0 /* (TEST/DEBUG) */
+
+ bl EXT(tracecopystr) /* (TEST/DEBUG) */
+
+ lwz r3,FM_SIZE+0(r1) /* (TEST/DEBUG) */
+ lwz r4,FM_SIZE+4(r1) /* (TEST/DEBUG) */
+ lwz r5,FM_SIZE+8(r1) /* (TEST/DEBUG) */
+ stw r6,FM_SIZE+12(r1) /* (TEST/DEBUG) */
+#endif
+
+ cmpli cr0,r5,0
+ ble- cr0,.L_copyinout_trivial
+
+/* we know we have a valid copyin to do now */
+/* Set up thread_recover in case we hit an illegal address */
+
+ li r0,0
+ mfsprg r8,1 /* Get the current act */
+ lwz r10,ACT_THREAD(r8)
+ stw r0,0(r6) /* Clear result length */
+ lis r11,HIGH_ADDR(.L_copyinout_error)
+ lwz r8,ACT_VMMAP(r8) ; Get the map for this activation
+ rlwinm r12,r3,6,26,29 ; Get index to the segment slot
+ lwz r8,VMMAP_PMAP(r8)
+ ori r11,r11,LOW_ADDR(.L_copyinout_error)
+ add r8,r8,r12 ; Start indexing to the segment value
+ stw r11,THREAD_RECOVER(r10)
+ rlwinm r3,r3,0,4,31
+ lwz r7,PMAP_SEGS(r8) ; Get the source SR value
+ oris r3,r3,(SR_COPYIN_NUM << (28-16)) /* Set the copyin segment as the source */
+
+/* Copy byte by byte for now - TODO NMGS speed this up with
+ * some clever (but fairly standard) logic for word copies.
+ * We don't use a copyinstr_multiple since copyinstr is called
+ * with INT_MAX in the linux server. Eugh.
*/
+ li r9,0 /* Clear byte counter */
- .align 5
- .globl EXT(copyin)
- .globl EXT(copyinmsg)
-
-LEXT(copyin)
-LEXT(copyinmsg)
-
- mfcr r2 // save caller's CR
- crclr kkString // not a string version
- mr r10,r3 // source (r3) is user-space ptr in copyin
- crset kkIn // flag as copyin
-
-
-// Common code to handle setup for all the copy variants:
-// r2 = caller's CR, since we use cr3
-// r3-r6 = parameters
-// r10 = user-space ptr (r3 if copyin, r4 if copyout)
-// cr3 = kkIn, kkString, kkNull flags
-
-copyJoin:
- crclr kkNull // (dst==NULL) convention not used with this call
-copyJoin1: // enter from copyinstr with kkNull set
- mflr r0 // get return address
- cmplwi r5,0 // buffer length 0?
- lis r9,0x1000 // r9 <- 0x10000000 (256MB)
- stw r0,FM_LR_SAVE(r1) // save return
- cmplw cr1,r5,r9 // buffer length > 256MB ?
- mfsprg r8,2 // get the features
- beq-- copyinout_0 // 0 length is degenerate case
- stwu r1,-kkFrameSize(r1) // set up stack frame
- stw r2,kkCR(r1) // save caller's CR since we use cr3
- mtcrf 0x02,r8 // move pf64Bit to cr6
- stw r3,kkSource(r1) // save args across MapUserAddressSpace
- stw r4,kkDest(r1)
- stw r5,kkBufSize(r1)
- crmove kk64bit,pf64Bitb // remember if this is a 64-bit processor
- stw r6,kkCountPtr(r1)
- stw r31,kkR31Save(r1) // we use r31 globally for mapped user ptr
- li r31,0 // no mapped ptr yet
-
-
-// Handle buffer length > 256MB. This is an error (ENAMETOOLONG) on copyin and copyout.
-// The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp
-// the buffer length to 256MB. This isn't an issue if the string is less than 256MB
-// (as most are!), but if they are >256MB we eventually return ENAMETOOLONG. This restriction
-// is due to MapUserAddressSpace; we don't want to consume more than two segments for
-// the mapping.
-
- ble++ cr1,copyin0 // skip if buffer length <= 256MB
- bf kkString,copyinout_too_big // error if not string op
- mr r5,r9 // silently clamp buffer length to 256MB
- stw r9,kkBufSize(r1) // update saved copy too
-
-
-// Set up thread_recover in case we hit an illegal address.
-
-copyin0:
- mfsprg r8,1 /* Get the current act */
- lis r2,hi16(copyinout_error)
- lwz r7,ACT_THREAD(r8)
- ori r2,r2,lo16(copyinout_error)
- lwz r3,ACT_VMMAP(r8) // r3 <- vm_map virtual address
- stw r2,THREAD_RECOVER(r7)
-
-
-// Map user segment into kernel map, turn on 64-bit mode.
-// r3 = vm map
-// r5 = buffer length
-// r10 = user space ptr (r3 if copyin, r4 if copyout)
-
- mr r6,r5 // Set length to map
- li r4,0 // Note: we only do this 32-bit for now
- mr r5,r10 // arg2 <- user space ptr
-#if INSTRUMENT
- mfspr r12,pmc1 ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace
- stw r12,0x6100+(13*16)+0x0(0) ; INSTRUMENT - Save it
- mfspr r12,pmc2 ; INSTRUMENT - Get stamp
- stw r12,0x6100+(13*16)+0x4(0) ; INSTRUMENT - Save it
- mfspr r12,pmc3 ; INSTRUMENT - Get stamp
- stw r12,0x6100+(13*16)+0x8(0) ; INSTRUMENT - Save it
- mfspr r12,pmc4 ; INSTRUMENT - Get stamp
- stw r12,0x6100+(13*16)+0xC(0) ; INSTRUMENT - Save it
-#endif
- bl EXT(MapUserAddressSpace) // set r3 <- address in kernel map of user operand
-#if INSTRUMENT
- mfspr r12,pmc1 ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace
- stw r12,0x6100+(14*16)+0x0(0) ; INSTRUMENT - Save it
- mfspr r12,pmc2 ; INSTRUMENT - Get stamp
- stw r12,0x6100+(14*16)+0x4(0) ; INSTRUMENT - Save it
- mfspr r12,pmc3 ; INSTRUMENT - Get stamp
- stw r12,0x6100+(14*16)+0x8(0) ; INSTRUMENT - Save it
- mfspr r12,pmc4 ; INSTRUMENT - Get stamp
- stw r12,0x6100+(14*16)+0xC(0) ; INSTRUMENT - Save it
-#endif
- or. r0,r3,r4 // Did we fail the mapping?
- mr r31,r4 // r31 <- mapped ptr into user space (may be 64-bit)
- beq-- copyinout_error // was 0, so there was an error making the mapping
- bf-- kk64bit,copyin1 // skip if a 32-bit processor
-
- rldimi r31,r3,32,0 // slam high-order bits into mapped ptr
- mfmsr r4 // if 64-bit, turn on SF so we can use returned ptr
- li r0,1
- rldimi r4,r0,63,MSR_SF_BIT // light bit 0
- mtmsrd r4 // turn on 64-bit mode
- isync // wait for mode to change
-
-
-// Load r3-r5, substituting mapped ptr as appropriate.
-
-copyin1:
- lwz r5,kkBufSize(r1) // restore length to copy
- bf kkIn,copyin2 // skip if copyout
- lwz r4,kkDest(r1) // copyin: source is mapped, dest is r4 at entry
- mr r3,r31 // source is mapped ptr
- b copyin3
-copyin2: // handle copyout
- lwz r3,kkSource(r1) // source is kernel buffer (r3 at entry)
- mr r4,r31 // dest is mapped ptr into user space
-
-
-// Finally, all set up to copy:
-// r3 = source ptr (mapped if copyin)
-// r4 = dest ptr (mapped if copyout)
-// r5 = length
-// r31 = mapped ptr returned by MapUserAddressSpace
-// cr3 = kkIn, kkString, kk64bit, and kkNull flags
-
-copyin3:
- bt kkString,copyString // handle copyinstr and copyoutstr
- bl EXT(bcopy) // copyin and copyout: let bcopy do the work
- li r3,0 // return success
-
-
-// Main exit point for copyin, copyout, copyinstr, and copyoutstr. Also reached
-// from error recovery if we get a DSI accessing user space. Clear recovery ptr,
-// and pop off frame. Note that we have kept
-// the mapped ptr into user space in r31, as a reg64_t type (ie, a 64-bit ptr on
-// 64-bit machines.) We must unpack r31 into an addr64_t in (r3,r4) before passing
-// it to ReleaseUserAddressSpace.
-// r3 = 0, EFAULT, or ENAMETOOLONG
-
-copyinx:
- lwz r2,kkCR(r1) // get callers cr3
- mfsprg r6,1 // Get the current act
- lwz r10,ACT_THREAD(r6)
+/* If the destination is NULL, don't do writes,
+ * just count bytes. We set CR7 outside the loop to save time
+ */
+ cmpwi cr7,r4,0 /* Is the destination null? */
- bf-- kk64bit,copyinx1 // skip if 32-bit processor
- mfmsr r12
- rldicl r12,r12,0,MSR_SF_BIT+1 // if 64-bit processor, turn 64-bit mode off
- mtmsrd r12 // turn SF off and EE back on
- isync // wait for the mode to change
-copyinx1:
- lwz r31,kkR31Save(r1) // restore callers r31
- addi r1,r1,kkFrameSize // pop off our stack frame
- lwz r0,FM_LR_SAVE(r1)
- li r4,0
- stw r4,THREAD_RECOVER(r10) // Clear recovery
- mtlr r0
- mtcrf 0x10,r2 // restore cr3
- blr
+nxtseg: mtsr SR_COPYIN,r7 /* Set the source SR */
+ isync
+.L_copyinstr_loop:
+ lbz r0,0(r3) /* Get the source */
+ addic. r5,r5,-1 /* Have we gone far enough? */
+ addi r3,r3,1 /* Bump source pointer */
+
+ cmpwi cr1,r0,0 /* Did we hit a null? */
-/* We get here via the exception handler if an illegal
- * user memory reference was made. This error handler is used by
- * copyin, copyout, copyinstr, and copyoutstr. Registers are as
- * they were at point of fault, so for example cr3 flags are valid.
- */
+ beq cr7,.L_copyinstr_no_store /* If we are just counting, skip the store... */
+
+ stb r0,0(r4) /* Move to sink */
+ addi r4,r4,1 /* Advance sink pointer */
+
+.L_copyinstr_no_store:
-copyinout_error:
- li r3,EFAULT // return error
- b copyinx
-
-copyinout_0: // degenerate case: 0-length copy
- mtcrf 0x10,r2 // restore cr3
- li r3,0 // return success
- blr
-
-copyinout_too_big: // degenerate case
- mtcrf 0x10,r2 // restore cr3
- lwz r1,0(r1) // pop off stack frame
- li r3,ENAMETOOLONG
- blr
-
-
-//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
-// Handle copyinstr and copyoutstr. At this point the stack frame is set up,
-// the recovery ptr is set, the user's buffer is mapped, we're in 64-bit mode
-// if necessary, and:
-// r3 = source ptr, mapped if copyinstr
-// r4 = dest ptr, mapped if copyoutstr
-// r5 = buffer length
-// r31 = mapped ptr returned by MapUserAddressSpace
-// cr3 = kkIn, kkString, kkNull, and kk64bit flags
-// We do word copies unless the buffer is very short, then use a byte copy loop
-// for the leftovers if necessary.
-
-copyString:
- li r12,0 // Set header bytes count to zero
- cmplwi cr1,r5,20 // is buffer very short?
- mtctr r5 // assuming short, set up loop count for bytes
- blt cr1,copyinstr8 // too short for word loop
- andi. r12,r3,0x3 // is source ptr word aligned?
- bne copyinstr11 // bytes loop
-copyinstr1:
- srwi r6,r5,2 // get #words in buffer
- mtctr r6 // set up word loop count
- lis r10,hi16(0xFEFEFEFF) // load magic constants into r10 and r11
- lis r11,hi16(0x80808080)
- ori r10,r10,lo16(0xFEFEFEFF)
- ori r11,r11,lo16(0x80808080)
- bf kkNull,copyinstr6 // enter loop that copies
- b copyinstr5 // use loop that just counts
-
-
-// Word loop(s). They do a word-parallel search for 0s, using the following
-// inobvious but very efficient test:
-// y = data + 0xFEFEFEFF
-// z = ~data & 0x80808080
-// If (y & z)==0, then all bytes in dataword are nonzero. We need two copies of
-// this loop, since if we test kkNull in the loop then it becomes 9 words long.
-
- .align 5 // align inner loops for speed
-copyinstr5: // version that counts but does not copy
- lwz r8,0(r3) // get next word of source
- addi r3,r3,4 // increment source ptr
- add r9,r10,r8 // r9 = data + 0xFEFEFEFF
- andc r7,r11,r8 // r7 = ~data & 0x80808080
- and. r7,r9,r7 // r7 = r9 & r7
- bdnzt cr0_eq,copyinstr5 // if r7==0, then all bytes are nonzero
-
- b copyinstr7
-
- .align 5 // align inner loops for speed
-copyinstr6: // version that counts and copies
- lwz r8,0(r3) // get next word of source
- addi r3,r3,4 // increment source ptr
- addi r4,r4,4 // increment dest ptr while we wait for data
- add r9,r10,r8 // r9 = data + 0xFEFEFEFF
- andc r7,r11,r8 // r7 = ~data & 0x80808080
- and. r7,r9,r7 // r7 = r9 & r7
- stw r8,-4(r4) // pack all 4 bytes into buffer
- bdnzt cr0_eq,copyinstr6 // if r7==0, then all bytes are nonzero
-
-
-// Either 0 found or buffer filled. The above algorithm has mapped nonzero bytes to 0
-// and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also
-// mapped to 0x80. We must mask out these false hits before searching for an 0x80 byte.
-
-copyinstr7:
- crnot kkZero,cr0_eq // 0 found iff cr0_eq is off
- mfctr r6 // get #words remaining in buffer
- rlwinm r2,r8,7,0,31 // move 0x01 bits to 0x80 position
- slwi r6,r6,2 // convert to #bytes remaining
- andc r7,r7,r2 // turn off false hits from 0x0100 worst case
- rlwimi r6,r5,0,30,31 // add in odd bytes leftover in buffer
- srwi r7,r7,8 // we want to count the 0 as a byte xferred
- addi r6,r6,4 // don't count last word xferred (yet)
- cntlzw r7,r7 // now we can find the 0 byte (ie, the 0x80)
- srwi r7,r7,3 // convert 8,16,24,32 to 1,2,3,4
- sub. r6,r6,r7 // account for nonzero bytes in last word
- bt++ kkZero,copyinstr10 // 0 found, so done
-
- beq copyinstr10 // r6==0, so buffer truly full
- mtctr r6 // 0 not found, loop over r6 bytes
- b copyinstr8 // enter byte loop for last 1-3 leftover bytes
-
-
-// Byte loop. This is used for very small buffers and for the odd bytes left over
-// after searching and copying words at a time.
-
- .align 5 // align inner loops for speed
-copyinstr8: // loop over bytes of source
- lbz r0,0(r3) // get next byte of source
- addi r3,r3,1
- addi r4,r4,1 // increment dest addr whether we store or not
- cmpwi r0,0 // the 0?
- bt-- kkNull,copyinstr9 // don't store (was copyinstr with NULL ptr)
- stb r0,-1(r4)
-copyinstr9:
- bdnzf cr0_eq,copyinstr8 // loop if byte not 0 and more room in buffer
-
- mfctr r6 // get #bytes left in buffer
- crmove kkZero,cr0_eq // remember if 0 found or buffer filled
-
-
-// Buffer filled or 0 found. Unwind and return.
-// r5 = kkBufSize, ie buffer length
-// r6 = untransferred bytes remaining in buffer
-// r31 = mapped ptr returned by MapUserAddressSpace
-// cr3 = kkZero set iff 0 found
-
-copyinstr10:
- lwz r9,kkCountPtr(r1) // get ptr to place to store count of bytes moved
- sub r2,r5,r6 // get #bytes we moved, counting the 0 iff any
- add r2,r2,r12 // add the header bytes count
- li r3,0 // assume 0 return status
- stw r2,0(r9) // store #bytes moved
- bt++ kkZero,copyinx // we did find the 0 so return 0
- li r3,ENAMETOOLONG // buffer filled
- b copyinx // join main exit routine
-
-// Byte loop. This is used on the header bytes for unaligned source
-
- .align 5 // align inner loops for speed
-copyinstr11:
- li r10,4 // load word size
- sub r12,r10,r12 // set the header bytes count
- mtctr r12 // set up bytes loop count
-copyinstr12: // loop over bytes of source
- lbz r0,0(r3) // get next byte of source
- addi r3,r3,1
- addi r4,r4,1 // increment dest addr whether we store or not
- cmpwi r0,0 // the 0?
- bt-- kkNull,copyinstr13 // don't store (was copyinstr with NULL ptr)
- stb r0,-1(r4)
-copyinstr13:
- bdnzf cr0_eq,copyinstr12 // loop if byte not 0 and more room in buffer
- sub r5,r5,r12 // substract the bytes copied
- bne cr0_eq,copyinstr1 // branch to word loop
-
- mr r5,r12 // Get the header bytes count
- li r12,0 // Clear the header bytes count
- mfctr r6 // get #bytes left in buffer
- crmove kkZero,cr0_eq // remember if 0 found or buffer filled
- b copyinstr10
+ addi r9,r9,1 /* Count the character */
+ beq- cr1,.L_copyinstr_done /* We're done if we did a null... */
+ beq- cr0,L_copyinstr_toobig /* Also if we maxed the count... */
+
+/* Check to see if the copyin pointer has moved out of the
+ * copyin segment, if it has we must remap.
+ */
+ rlwinm. r0,r3,0,4,31 /* Did we wrap around to 0? */
+ bne+ cr0,.L_copyinstr_loop /* Nope... */
+
+ lwz r7,PMAP_SEGS+4(r8) ; Get the next source SR value
+ addi r8,r8,4 ; Point to the next segment
+ oris r3,r0,(SR_COPYIN_NUM << (28-16)) /* Reset the segment number */
+ b nxtseg /* Keep going... */
+
+L_copyinstr_toobig:
+ li r3,ENAMETOOLONG
+ b L_copyinstr_return
+.L_copyinstr_done:
+ li r3,0 /* Normal return */
+L_copyinstr_return:
+ li r4,0 /* to clear thread_recover */
+ stw r9,0(r6) /* Set how many bytes we did */
+ stw r4,THREAD_RECOVER(r10) /* Clear recovery exit */
+
+ addi r1, r1, FM_SIZE+16
+ lwz r0, FM_LR_SAVE(r1)
+ mtlr r0
+ blr