+// Map operand which bits into non-volatile CR2 and CR3 bits.
+#define whichAlign ((3+1)*4)
+#define whichMask 0x007F0000
+#define pvPsnk (cppvPsnkb - whichAlign)
+#define pvPsrc (cppvPsrcb - whichAlign)
+#define pvFsnk (cppvFsnkb - whichAlign)
+#define pvFsrc (cppvFsrcb - whichAlign)
+#define pvNoModSnk (cppvNoModSnkb - whichAlign)
+#define pvNoRefSrc (cppvNoRefSrcb - whichAlign)
+#define pvKmap (cppvKmapb - whichAlign)
+#define pvNoCache cr2_lt
+
+ .align 5
+ .globl EXT(copypv)
+
+LEXT(copypv)
+ mfsprg r10,2 // get feature flags
+ mtcrf 0x02,r10 // we need to test pf64Bit
+ bt++ pf64Bitb,copypv_64 // skip if 64-bit processor (only they take hint)
+
+ b EXT(hw_copypv_32) // carry on with 32-bit copypv
+
+// Push a 32-bit ABI-compliant stack frame and preserve all non-volatiles that we'll clobber.
+copypv_64:
+ mfsprg r9,1 // get current thread
+ stwu r1,-(FM_ALIGN((31-26+11)*4)+FM_SIZE)(r1)
+ // allocate stack frame and link it
+ mflr r0 // get return address
+ mfcr r10 // get cr2 and cr3
+ lwz r12,THREAD_RECOVER(r9) // get error callback
+ stw r26,FM_ARG0+0x00(r1) // save non-volatile r26
+ stw r27,FM_ARG0+0x04(r1) // save non-volatile r27
+ stw r28,FM_ARG0+0x08(r1) // save non-volatile r28
+ stw r29,FM_ARG0+0x0C(r1) // save non-volatile r29
+ stw r30,FM_ARG0+0x10(r1) // save non-volatile r30
+ stw r31,FM_ARG0+0x14(r1) // save non-volatile r31
+ stw r12,FM_ARG0+0x20(r1) // save error callback
+ stw r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
+ // save return address
+ stw r10,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
+ // save non-volatile cr2 and cr3
+
+// Non-volatile register usage in this routine is:
+// r26: saved msr image
+// r27: current pmap_t / virtual source address
+// r28: destination virtual address
+// r29: source address
+// r30: destination address
+// r31: byte count to copy
+// cr2/3: parameter 'which' bits
+
+ rlwinm r8,r8,whichAlign,whichMask // align and mask which bits
+ mr r31,r7 // copy size to somewhere non-volatile
+ mtcrf 0x20,r8 // insert which bits into cr2 and cr3
+ mtcrf 0x10,r8 // insert which bits into cr2 and cr3
+ rlwinm r29,r3,0,1,0 // form source address high-order bits
+ rlwinm r30,r5,0,1,0 // form destination address high-order bits
+ rlwimi r29,r4,0,0,31 // form source address low-order bits
+ rlwimi r30,r6,0,0,31 // form destination address low-order bits
+ crand cr7_lt,pvPsnk,pvPsrc // are both operand addresses physical?
+ cntlzw r0,r31 // count leading zeroes in byte count
+ cror cr7_eq,pvPsnk,pvPsrc // cr7_eq <- source or destination is physical
+ bf-- cr7_eq,copypv_einval // both operands may not be virtual
+ cmplwi r0,4 // byte count greater than or equal 256M (2**28)?
+ blt-- copypv_einval // byte count too big, give EINVAL
+ cmplwi r31,0 // byte count zero?
+ beq-- copypv_zero // early out
+ bt cr7_lt,copypv_phys // both operand addresses are physical
+ mr r28,r30 // assume destination is virtual
+ bf pvPsnk,copypv_dv // is destination virtual?
+ mr r28,r29 // no, so source must be virtual
+copypv_dv:
+ lis r27,ha16(EXT(kernel_pmap)) // get kernel's pmap_t *, high-order
+ lwz r27,lo16(EXT(kernel_pmap))(r27) // get kernel's pmap_t
+ bt pvKmap,copypv_kern // virtual address in kernel map?
+ lwz r3,ACT_VMMAP(r9) // get user's vm_map *
+ rldicl r4,r28,32,32 // r4, r5 <- addr64_t virtual address
+ rldicl r5,r28,0,32
+ std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit call
+ std r30,FM_ARG0+0x38(r1) // preserve 64-bit r30 across 32-bit call
+ bl EXT(MapUserMemoryWindow) // map slice of user space into kernel space
+ ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29
+ ld r30,FM_ARG0+0x38(r1) // restore 64-bit r30
+ rlwinm r28,r3,0,1,0 // convert relocated addr64_t virtual address
+ rlwimi r28,r4,0,0,31 // into a single 64-bit scalar
+copypv_kern:
+
+// Since we'll be accessing the virtual operand with data-relocation on, we won't need to
+// update the referenced and changed bits manually after the copy. So, force the appropriate
+// flag bit on for the virtual operand.
+ crorc pvNoModSnk,pvNoModSnk,pvPsnk // for virtual dest, let hardware do ref/chg bits
+ crorc pvNoRefSrc,pvNoRefSrc,pvPsrc // for virtual source, let hardware do ref bit
+
+// We'll be finding a mapping and looking at, so we need to disable 'rupts.
+ lis r0,hi16(MASK(MSR_VEC)) // get vector mask
+ ori r0,r0,lo16(MASK(MSR_FP)) // insert fp mask
+ mfmsr r26 // save current msr
+ andc r26,r26,r0 // turn off VEC and FP in saved copy
+ ori r0,r0,lo16(MASK(MSR_EE)) // add EE to our mask
+ andc r0,r26,r0 // disable EE in our new msr image
+ mtmsrd r0 // introduce new msr image
+
+// We're now holding the virtual operand's pmap_t in r27 and its virtual address in r28. We now
+// try to find a mapping corresponding to this address in order to determine whether the address
+// is cacheable. If we don't find a mapping, we can safely assume that the operand is cacheable
+// (a non-cacheable operand must be a block mapping, which will always exist); otherwise, we
+// examine the mapping's caching-inhibited bit.
+ mr r3,r27 // r3 <- pmap_t pmap
+ rldicl r4,r28,32,32 // r4, r5 <- addr64_t va
+ rldicl r5,r28,0,32
+ la r6,FM_ARG0+0x18(r1) // r6 <- addr64_t *nextva
+ li r7,1 // r7 <- int full, search nested mappings
+ std r26,FM_ARG0+0x28(r1) // preserve 64-bit r26 across 32-bit calls
+ std r28,FM_ARG0+0x30(r1) // preserve 64-bit r28 across 32-bit calls
+ std r29,FM_ARG0+0x38(r1) // preserve 64-bit r29 across 32-bit calls
+ std r30,FM_ARG0+0x40(r1) // preserve 64-bit r30 across 32-bit calls
+ bl EXT(mapping_find) // find mapping for virtual operand
+ mr. r3,r3 // did we find it?
+ beq copypv_nomapping // nope, so we'll assume it's cacheable
+ lwz r4,mpVAddr+4(r3) // get low half of virtual addr for hw flags
+ rlwinm. r4,r4,0,mpIb-32,mpIb-32 // caching-inhibited bit set?
+ crnot pvNoCache,cr0_eq // if it is, use bcopy_nc
+ bl EXT(mapping_drop_busy) // drop busy on the mapping
+copypv_nomapping:
+ ld r26,FM_ARG0+0x28(r1) // restore 64-bit r26
+ ld r28,FM_ARG0+0x30(r1) // restore 64-bit r28
+ ld r29,FM_ARG0+0x38(r1) // restore 64-bit r29
+ ld r30,FM_ARG0+0x40(r1) // restore 64-bit r30
+ mtmsrd r26 // restore msr to it's previous state
+
+// Set both the source and destination virtual addresses to the virtual operand's address --
+// we'll overlay one of them with the physical operand's address.
+ mr r27,r28 // make virtual operand BOTH source AND destination
+
+// Now we're ready to relocate the physical operand address(es) into the physical memory window.
+// Recall that we've mapped physical memory (including the I/O hole) into the kernel's address
+// space somewhere at or over the 2**32 line. If one or both of the operands are in the I/O hole,
+// we'll set the pvNoCache flag, forcing use of non-caching bcopy_nc() to do the copy.
+copypv_phys:
+ ld r6,lgPMWvaddr(0) // get physical memory window virtual address
+ bf pvPsnk,copypv_dstvirt // is destination address virtual?
+ cntlzd r4,r30 // count leading zeros in destination address
+ cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
+ cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations
+ add r28,r30,r6 // relocate physical destination into physical window
+copypv_dstvirt:
+ bf pvPsrc,copypv_srcvirt // is source address virtual?
+ cntlzd r4,r29 // count leading zeros in source address
+ cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
+ cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations
+ add r27,r29,r6 // relocate physical source into physical window
+copypv_srcvirt:
+
+// Once the copy is under way (bcopy or bcopy_nc), we will want to get control if anything
+// funny happens during the copy. So, we set a pointer to our error handler in the per-thread
+// control block.
+ mfsprg r8,1 // get current threads stuff
+ lis r3,hi16(copypv_error) // get our error callback's address, high
+ ori r3,r3,lo16(copypv_error) // get our error callback's address, low
+ stw r3,THREAD_RECOVER(r8) // set our error callback
+
+// Since our physical operand(s) are relocated at or above the 2**32 line, we must enter
+// 64-bit mode.
+ li r0,1 // get a handy one bit
+ mfmsr r3 // get current msr
+ rldimi r3,r0,63,MSR_SF_BIT // set SF bit on in our msr copy
+ mtmsrd r3 // enter 64-bit mode
+
+// If requested, flush data cache
+// Note that we don't flush, the code is being saved "just in case".
+#if 0
+ bf pvFsrc,copypv_nfs // do we flush the source?
+ rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address
+ rldicl r4,r27,0,32
+ mr r5,r31 // r5 <- count (in bytes)
+ li r6,0 // r6 <- boolean phys (false, not physical)
+ bl EXT(flush_dcache) // flush the source operand
+copypv_nfs:
+ bf pvFsnk,copypv_nfdx // do we flush the destination?
+ rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address
+ rldicl r4,r28,0,32
+ mr r5,r31 // r5 <- count (in bytes)
+ li r6,0 // r6 <- boolean phys (false, not physical)
+ bl EXT(flush_dcache) // flush the destination operand
+copypv_nfdx:
+#endif
+
+// Call bcopy or bcopy_nc to perform the copy.
+ mr r3,r27 // r3 <- source virtual address
+ mr r4,r28 // r4 <- destination virtual address
+ mr r5,r31 // r5 <- bytes to copy
+ bt pvNoCache,copypv_nc // take non-caching route
+ bl EXT(bcopy) // call bcopy to do the copying
+ b copypv_copydone
+copypv_nc:
+ bl EXT(bcopy_nc) // call bcopy_nc to do the copying
+copypv_copydone:
+
+// If requested, flush data cache
+// Note that we don't flush, the code is being saved "just in case".
+#if 0
+ bf pvFsrc,copypv_nfsx // do we flush the source?
+ rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address
+ rldicl r4,r27,0,32
+ mr r5,r31 // r5 <- count (in bytes)
+ li r6,0 // r6 <- boolean phys (false, not physical)
+ bl EXT(flush_dcache) // flush the source operand
+copypv_nfsx:
+ bf pvFsnk,copypv_nfd // do we flush the destination?
+ rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address
+ rldicl r4,r28,0,32
+ mr r5,r31 // r5 <- count (in bytes)
+ li r6,0 // r6 <- boolean phys (false, not physical)
+ bl EXT(flush_dcache) // flush the destination operand
+copypv_nfd:
+#endif
+
+// Leave 64-bit mode.
+ mfmsr r3 // get current msr
+ rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy
+ mtmsrd r3 // leave 64-bit mode
+
+// If requested, set ref/chg on source/dest physical operand(s). It is possible that copy is
+// from/to a RAM disk situated outside of mapped physical RAM, so we check each page by calling
+// mapping_phys_lookup() before we try to set its ref/chg bits; otherwise, we might panic.
+// Note that this code is page-size sensitive, so it should probably be a part of our low-level
+// code in hw_vm.s.
+ bt pvNoModSnk,copypv_nomod // skip destination update if not requested
+ std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit calls
+ li r26,1 // r26 <- 4K-page count
+ mr r27,r31 // r27 <- byte count
+ rlwinm r3,r30,0,20,31 // does destination cross a page boundary?
+ subfic r3,r3,4096 //
+ cmplw r3,r27 //
+ blt copypv_modnox // skip if not crossing case
+ subf r27,r3,r27 // r27 <- byte count less initial fragment
+ addi r26,r26,1 // increment page count
+copypv_modnox:
+ srdi r3,r27,12 // pages to update (not including crosser)
+ add r26,r26,r3 // add in crosser
+ srdi r27,r30,12 // r27 <- destination page number
+copypv_modloop:
+ mr r3,r27 // r3 <- destination page number
+ la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex
+ bl EXT(mapping_phys_lookup) // see if page is really there
+ mr. r3,r3 // is it?
+ beq-- copypv_modend // nope, break out of modify loop
+ mr r3,r27 // r3 <- destination page number
+ bl EXT(mapping_set_mod) // set page changed status
+ subi r26,r26,1 // decrement page count
+ cmpwi r26,0 // done yet?
+ bgt copypv_modloop // nope, iterate
+copypv_modend:
+ ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29
+copypv_nomod:
+ bt pvNoRefSrc,copypv_done // skip source update if not requested
+copypv_debugref:
+ li r26,1 // r26 <- 4K-page count
+ mr r27,r31 // r27 <- byte count
+ rlwinm r3,r29,0,20,31 // does source cross a page boundary?
+ subfic r3,r3,4096 //
+ cmplw r3,r27 //
+ blt copypv_refnox // skip if not crossing case
+ subf r27,r3,r27 // r27 <- byte count less initial fragment
+ addi r26,r26,1 // increment page count
+copypv_refnox:
+ srdi r3,r27,12 // pages to update (not including crosser)
+ add r26,r26,r3 // add in crosser
+ srdi r27,r29,12 // r27 <- source page number
+copypv_refloop:
+ mr r3,r27 // r3 <- source page number
+ la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex
+ bl EXT(mapping_phys_lookup) // see if page is really there
+ mr. r3,r3 // is it?
+ beq-- copypv_done // nope, break out of modify loop
+ mr r3,r27 // r3 <- source page number
+ bl EXT(mapping_set_ref) // set page referenced status
+ subi r26,r26,1 // decrement page count
+ cmpwi r26,0 // done yet?
+ bgt copypv_refloop // nope, iterate
+
+// Return, indicating success.
+copypv_done:
+copypv_zero:
+ li r3,0 // our efforts were crowned with success
+
+// Pop frame, restore caller's non-volatiles, clear recovery routine pointer.
+copypv_return:
+ mfsprg r9,1 // get current threads stuff
+ lwz r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
+ // get return address
+ lwz r4,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
+ // get non-volatile cr2 and cr3
+ lwz r26,FM_ARG0+0x00(r1) // restore non-volatile r26
+ lwz r27,FM_ARG0+0x04(r1) // restore non-volatile r27
+ mtlr r0 // restore return address
+ lwz r28,FM_ARG0+0x08(r1) // restore non-volatile r28
+ mtcrf 0x20,r4 // restore non-volatile cr2
+ mtcrf 0x10,r4 // restore non-volatile cr3
+ lwz r11,FM_ARG0+0x20(r1) // save error callback
+ lwz r29,FM_ARG0+0x0C(r1) // restore non-volatile r29
+ lwz r30,FM_ARG0+0x10(r1) // restore non-volatile r30
+ lwz r31,FM_ARG0+0x14(r1) // restore non-volatile r31
+ stw r11,THREAD_RECOVER(r9) // restore our error callback
+ lwz r1,0(r1) // release stack frame
+
+ blr // y'all come back now
+
+// Invalid argument handler.
+copypv_einval:
+ li r3,EINVAL // invalid argument
+ b copypv_return // return
+
+// Error encountered during bcopy or bcopy_nc.
+copypv_error:
+ mfmsr r3 // get current msr
+ rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy
+ mtmsrd r3 // leave 64-bit mode
+ li r3,EFAULT // it was all his fault
+ b copypv_return // return