+ rldimi r28,r6,32,0 ; Shift the fixed upper part of the physical over and cram in top
+
+hpSF1: mr r29,r4 ; Save top half of vaddr
+ mr r30,r5 ; Save the bottom half
+
+ la r3,pmapSXlk(r28) ; Point to the pmap search lock
+ bl sxlkShared ; Go get a shared lock on the mapping lists
+ mr. r3,r3 ; Did we get the lock?
+ bne-- hpBadLock ; Nope...
+
+ mr r3,r28 ; get the pmap address
+ mr r4,r29 ; Get bits 0:31 to look for
+ mr r5,r30 ; Get bits 32:64
+
+ bl EXT(mapSearch) ; Go see if we can find it (note: R7 comes back with mpFlags)
+
+ rlwinm. r0,r7,0,mpType ; Is this a normal mapping?
+ crmove cr1_eq,cr0_eq ; cr1_eq <- this is a normal mapping
+ andi. r0,r7,mpPerm|mpRIP ; Is it permanent or being removed?
+ cror cr1_eq,cr0_eq,cr1_eq ; cr1_eq <- normal mapping and not permanent and not being removed
+ mr. r31,r3 ; Save the mapping if we found it
+ mr r29,r4 ; Save next va high half
+ mr r30,r5 ; Save next va low half
+
+ beq-- hpNotFound ; Not found...
+
+ bf-- cr1_eq,hpNotAllowed ; Something special is happening...
+
+ bt++ pf64Bitb,hpDo64 ; Split for 64 bit
+
+ bl mapInvPte32 ; Invalidate and lock PTEG, also merge into physent
+
+ rlwimi r5,r24,0,mpPPb-32,mpPPe-32 ; Stick in the new pp (note that we ignore no-execute for 32-bit)
+ mr. r3,r3 ; Was there a previously valid PTE?
+
+ stb r5,mpVAddr+7(r31) ; Set the new pp field (do not muck with the rest)
+
+ beq-- hpNoOld32 ; Nope...
+
+ stw r5,4(r3) ; Store second half of PTE
+ eieio ; Make sure we do not reorder
+ stw r4,0(r3) ; Revalidate the PTE
+
+ eieio ; Make sure all updates come first
+ stw r6,0(r7) ; Unlock PCA
+
+hpNoOld32: la r3,pmapSXlk(r28) ; Point to the pmap search lock
+ bl sxlkUnlock ; Unlock the search list
+
+ li r3,mapRtOK ; Set normal return
+ b hpR32 ; Join common...
+
+ .align 5
+
+
+hpDo64: bl mapInvPte64 ; Invalidate and lock PTEG, also merge into physent
+
+ rldimi r5,r24,0,mpNb ; Stick in the new no-exectue and pp bits
+ mr. r3,r3 ; Was there a previously valid PTE?
+
+ stb r5,mpVAddr+7(r31) ; Set the new pp field (do not muck with the rest)
+
+ beq-- hpNoOld64 ; Nope...
+
+ std r5,8(r3) ; Store second half of PTE
+ eieio ; Make sure we do not reorder
+ std r4,0(r3) ; Revalidate the PTE
+
+ eieio ; Make sure all updates come first
+ stw r6,0(r7) ; Unlock PCA
+
+hpNoOld64: la r3,pmapSXlk(r28) ; Point to the pmap search lock
+ bl sxlkUnlock ; Unlock the search list
+
+ li r3,mapRtOK ; Set normal return
+ b hpR64 ; Join common...
+
+ .align 5
+
+hpReturn: bt++ pf64Bitb,hpR64 ; Yes...
+
+hpR32: mtmsr r27 ; Restore enables/translation/etc.
+ isync
+ b hpReturnC ; Join common...
+
+hpR64: mtmsrd r27 ; Restore enables/translation/etc.
+ isync
+
+hpReturnC: stw r29,0(r25) ; Save the top of the next va
+ stw r30,4(r25) ; Save the bottom of the next va
+ lwz r0,(FM_ALIGN((31-24+1)*4)+FM_SIZE+FM_LR_SAVE)(r1) ; Save the return
+ lwz r24,FM_ARG0+0x00(r1) ; Save a register
+ lwz r25,FM_ARG0+0x04(r1) ; Save a register
+ lwz r26,FM_ARG0+0x08(r1) ; Save a register
+ mtlr r0 ; Restore the return
+ lwz r27,FM_ARG0+0x0C(r1) ; Save a register
+ lwz r28,FM_ARG0+0x10(r1) ; Save a register
+ lwz r29,FM_ARG0+0x14(r1) ; Save a register
+ lwz r30,FM_ARG0+0x18(r1) ; Save a register
+ lwz r31,FM_ARG0+0x1C(r1) ; Save a register
+ lwz r1,0(r1) ; Pop the stack
+ blr ; Leave...
+
+ .align 5
+
+hpBadLock: li r3,mapRtBadLk ; Set lock time out error code
+ b hpReturn ; Leave....
+
+hpNotFound: la r3,pmapSXlk(r28) ; Point to the pmap search lock
+ bl sxlkUnlock ; Unlock the search list
+
+ li r3,mapRtNotFnd ; Set that we did not find the requested page
+ b hpReturn ; Leave....
+
+hpNotAllowed:
+ rlwinm. r0,r7,0,mpRIPb,mpRIPb ; Is it actually being removed?
+ la r3,pmapSXlk(r28) ; Point to the pmap search lock
+ bne-- hpNotFound ; Yeah...
+ bl sxlkUnlock ; Unlock the search list
+
+ li r3,mapRtBlock ; Assume it was a block
+ rlwinm r0,r7,0,mpType ; Isolate mapping type
+ cmplwi r0,mpBlock ; Is this a block mapping?
+ beq++ hpReturn ; Yes, leave...
+
+ li r3,mapRtPerm ; Set that we hit a permanent page
+ b hpReturn ; Leave....
+
+hpPanic: lis r0,hi16(Choke) ; System abend
+ ori r0,r0,lo16(Choke) ; System abend
+ li r3,failMapping ; Show that we failed some kind of mapping thing
+ sc
+
+
+;
+; int hw_test_rc(pmap, va, reset) - tests RC on a specific va
+;
+; Returns following code ORed with RC from mapping
+; mapRtOK - if all is ok
+; mapRtBadLk - if mapping lock fails
+; mapRtNotFnd - if mapping is not found
+;
+ .align 5
+ .globl EXT(hw_test_rc)
+
+LEXT(hw_test_rc)
+ stwu r1,-(FM_ALIGN((31-24+1)*4)+FM_SIZE)(r1) ; Make some space on the stack
+ mflr r0 ; Save the link register
+ stw r24,FM_ARG0+0x00(r1) ; Save a register
+ stw r25,FM_ARG0+0x04(r1) ; Save a register
+ stw r26,FM_ARG0+0x08(r1) ; Save a register
+ stw r27,FM_ARG0+0x0C(r1) ; Save a register
+ stw r28,FM_ARG0+0x10(r1) ; Save a register
+ mr r24,r6 ; Save the reset request
+ stw r29,FM_ARG0+0x14(r1) ; Save a register
+ stw r30,FM_ARG0+0x18(r1) ; Save a register
+ stw r31,FM_ARG0+0x1C(r1) ; Save a register
+ stw r0,(FM_ALIGN((31-24+1)*4)+FM_SIZE+FM_LR_SAVE)(r1) ; Save the return
+
+#if DEBUG
+ lwz r11,pmapFlags(r3) ; Get pmaps flags
+ rlwinm. r11,r11,0,pmapVMgsaa ; Is guest shadow assist active?
+ bne htrPanic ; Call not valid for guest shadow assist pmap
+#endif
+
+ lwz r6,pmapvr(r3) ; Get the first part of the VR translation for pmap
+ lwz r7,pmapvr+4(r3) ; Get the second part
+
+
+ bl EXT(mapSetUp) ; Turn off interrupts, translation, and possibly enter 64-bit
+
+ mr r27,r11 ; Remember the old MSR
+ mr r26,r12 ; Remember the feature bits
+
+ xor r28,r3,r7 ; Change the common 32- and 64-bit half
+
+ bf-- pf64Bitb,htrSF1 ; skip if 32-bit...
+
+ rldimi r28,r6,32,0 ; Shift the fixed upper part of the physical over and cram in top
+
+htrSF1: mr r29,r4 ; Save top half of vaddr
+ mr r30,r5 ; Save the bottom half
+
+ la r3,pmapSXlk(r28) ; Point to the pmap search lock
+ bl sxlkShared ; Go get a shared lock on the mapping lists
+ mr. r3,r3 ; Did we get the lock?
+ li r25,0 ; Clear RC
+ bne-- htrBadLock ; Nope...
+
+ mr r3,r28 ; get the pmap address
+ mr r4,r29 ; Get bits 0:31 to look for
+ mr r5,r30 ; Get bits 32:64
+
+ bl EXT(mapSearch) ; Go see if we can find it (R7 comes back with mpFlags)
+
+ rlwinm. r0,r7,0,mpType ; Is this a normal mapping?
+ crmove cr1_eq,cr0_eq ; cr1_eq <- this is a normal mapping
+ andi. r0,r7,mpPerm|mpRIP ; Is it permanent or being removed?
+ crand cr1_eq,cr0_eq,cr1_eq ; cr1_eq <- normal mapping and not permanent and not being removed
+ mr. r31,r3 ; Save the mapping if we found it
+ crandc cr1_eq,cr1_eq,cr0_eq ; cr1_eq <- found & normal & not permanent & not being removed
+
+ bf-- cr1_eq,htrNotFound ; Not found, something special, or being removed...
+
+ bt++ pf64Bitb,htrDo64 ; Split for 64 bit
+
+ bl mapInvPte32 ; Invalidate and lock PTEG, also merge into physent
+
+ cmplwi cr1,r24,0 ; Do we want to clear RC?
+ lwz r12,mpVAddr+4(r31) ; Get the bottom of the mapping vaddr field
+ mr. r3,r3 ; Was there a previously valid PTE?
+ li r0,lo16(mpR|mpC) ; Get bits to clear
+
+ and r25,r5,r0 ; Save the RC bits
+ beq++ cr1,htrNoClr32 ; Nope...
+
+ andc r12,r12,r0 ; Clear mapping copy of RC
+ andc r5,r5,r0 ; Clear PTE copy of RC
+ sth r12,mpVAddr+6(r31) ; Set the new RC
+
+htrNoClr32: beq-- htrNoOld32 ; No previously valid PTE...
+
+ sth r5,6(r3) ; Store updated RC
+ eieio ; Make sure we do not reorder
+ stw r4,0(r3) ; Revalidate the PTE
+
+ eieio ; Make sure all updates come first
+ stw r6,0(r7) ; Unlock PCA
+
+htrNoOld32: la r3,pmapSXlk(r28) ; Point to the pmap search lock
+ bl sxlkUnlock ; Unlock the search list
+ li r3,mapRtOK ; Set normal return
+ b htrR32 ; Join common...
+
+ .align 5
+
+
+htrDo64: bl mapInvPte64 ; Invalidate and lock PTEG, also merge into physent
+
+ cmplwi cr1,r24,0 ; Do we want to clear RC?
+ lwz r12,mpVAddr+4(r31) ; Get the bottom of the mapping vaddr field
+ mr. r3,r3 ; Was there a previously valid PTE?
+ li r0,lo16(mpR|mpC) ; Get bits to clear
+
+ and r25,r5,r0 ; Save the RC bits
+ beq++ cr1,htrNoClr64 ; Nope...
+
+ andc r12,r12,r0 ; Clear mapping copy of RC
+ andc r5,r5,r0 ; Clear PTE copy of RC
+ sth r12,mpVAddr+6(r31) ; Set the new RC
+
+htrNoClr64: beq-- htrNoOld64 ; Nope, no pevious pte...
+
+ sth r5,14(r3) ; Store updated RC
+ eieio ; Make sure we do not reorder
+ std r4,0(r3) ; Revalidate the PTE
+
+ eieio ; Make sure all updates come first
+ stw r6,0(r7) ; Unlock PCA
+
+htrNoOld64: la r3,pmapSXlk(r28) ; Point to the pmap search lock
+ bl sxlkUnlock ; Unlock the search list
+ li r3,mapRtOK ; Set normal return
+ b htrR64 ; Join common...
+
+ .align 5
+
+htrReturn: bt++ pf64Bitb,htrR64 ; Yes...
+
+htrR32: mtmsr r27 ; Restore enables/translation/etc.
+ isync
+ b htrReturnC ; Join common...
+
+htrR64: mtmsrd r27 ; Restore enables/translation/etc.
+ isync
+
+htrReturnC: lwz r0,(FM_ALIGN((31-24+1)*4)+FM_SIZE+FM_LR_SAVE)(r1) ; Save the return
+ or r3,r3,r25 ; Send the RC bits back
+ lwz r24,FM_ARG0+0x00(r1) ; Save a register
+ lwz r25,FM_ARG0+0x04(r1) ; Save a register
+ lwz r26,FM_ARG0+0x08(r1) ; Save a register
+ mtlr r0 ; Restore the return
+ lwz r27,FM_ARG0+0x0C(r1) ; Save a register
+ lwz r28,FM_ARG0+0x10(r1) ; Save a register
+ lwz r29,FM_ARG0+0x14(r1) ; Save a register
+ lwz r30,FM_ARG0+0x18(r1) ; Save a register
+ lwz r31,FM_ARG0+0x1C(r1) ; Save a register
+ lwz r1,0(r1) ; Pop the stack
+ blr ; Leave...
+
+ .align 5
+
+htrBadLock: li r3,mapRtBadLk ; Set lock time out error code
+ b htrReturn ; Leave....
+
+htrNotFound:
+ la r3,pmapSXlk(r28) ; Point to the pmap search lock
+ bl sxlkUnlock ; Unlock the search list
+
+ li r3,mapRtNotFnd ; Set that we did not find the requested page
+ b htrReturn ; Leave....
+
+htrPanic: lis r0,hi16(Choke) ; System abend
+ ori r0,r0,lo16(Choke) ; System abend
+ li r3,failMapping ; Show that we failed some kind of mapping thing
+ sc
+
+
+;
+;
+; mapFindLockPN - find and lock physent for a given page number
+;
+;
+ .align 5
+mapFindLockPN:
+ lis r9,hi16(EXT(pmap_mem_regions)) ; Point to the start of the region table
+ mr r2,r3 ; Save our target
+ ori r9,r9,lo16(EXT(pmap_mem_regions)) ; Point to the start of the region table
+
+mapFLPNitr: lwz r3,mrPhysTab(r9) ; Get the actual table address
+ lwz r5,mrStart(r9) ; Get start of table entry
+ lwz r0,mrEnd(r9) ; Get end of table entry
+ addi r9,r9,mrSize ; Point to the next slot
+ cmplwi cr7,r3,0 ; Are we at the end of the table?
+ cmplw r2,r5 ; See if we are in this table
+ cmplw cr1,r2,r0 ; Check end also
+ sub r4,r2,r5 ; Calculate index to physical entry
+ beq-- cr7,mapFLPNmiss ; Leave if we did not find an entry...
+ cror cr0_lt,cr0_lt,cr1_gt ; Set CR0_LT if it is NOT this entry
+ slwi r4,r4,3 ; Get offset to physical entry
+
+ blt-- mapFLPNitr ; Did not find it...
+
+ add r3,r3,r4 ; Point right to the slot
+ b mapPhysLock ; Join common lock code
+
+mapFLPNmiss:
+ li r3,0 ; Show that we did not find it
+ blr ; Leave...
+
+
+;
+; mapPhysFindLock - find physent list and lock it
+; R31 points to mapping
+;
+ .align 5
+
+mapPhysFindLock:
+ lbz r4,mpFlags+1(r31) ; Get the index into the physent bank table
+ lis r3,ha16(EXT(pmap_mem_regions)) ; Get high order of physent table (note use of ha16 to get value appropriate for an addi of low part)
+ rlwinm r4,r4,2,24,29 ; Mask index bits and convert to byte offset
+ addi r4,r4,lo16(EXT(pmap_mem_regions)) ; Get low part of address of entry
+ add r3,r3,r4 ; Point to table entry
+ lwz r5,mpPAddr(r31) ; Get physical page number
+ lwz r7,mrStart(r3) ; Get the start of range
+ lwz r3,mrPhysTab(r3) ; Get the start of the entries for this bank
+ sub r6,r5,r7 ; Get index to physent
+ rlwinm r6,r6,3,0,28 ; Get offset to physent
+ add r3,r3,r6 ; Point right to the physent
+ b mapPhysLock ; Join in the lock...
+
+;
+; mapPhysLock - lock a physent list
+; R3 contains list header
+;
+ .align 5
+
+mapPhysLockS:
+ li r2,lgKillResv ; Get a spot to kill reservation
+ stwcx. r2,0,r2 ; Kill it...
+
+mapPhysLockT:
+ lwz r2,ppLink(r3) ; Get physent chain header
+ rlwinm. r2,r2,0,0,0 ; Is lock clear?
+ bne-- mapPhysLockT ; Nope, still locked...
+
+mapPhysLock:
+ lwarx r2,0,r3 ; Get the lock
+ rlwinm. r0,r2,0,0,0 ; Is it locked?
+ oris r0,r2,0x8000 ; Set the lock bit
+ bne-- mapPhysLockS ; It is locked, spin on it...
+ stwcx. r0,0,r3 ; Try to stuff it back...
+ bne-- mapPhysLock ; Collision, try again...
+ isync ; Clear any speculations
+ blr ; Leave...
+
+
+;
+; mapPhysUnlock - unlock a physent list
+; R3 contains list header
+;
+ .align 5
+
+mapPhysUnlock:
+ lwz r0,ppLink(r3) ; Get physent chain header
+ rlwinm r0,r0,0,1,31 ; Clear the lock bit
+ eieio ; Make sure unlock comes last
+ stw r0,ppLink(r3) ; Unlock the list
+ blr
+
+;
+; mapPhysMerge - merge the RC bits into the master copy
+; R3 points to the physent
+; R4 contains the RC bits
+;
+; Note: we just return if RC is 0
+;
+ .align 5
+
+mapPhysMerge:
+ rlwinm. r4,r4,PTE1_REFERENCED_BIT+(64-ppRb),ppRb-32,ppCb-32 ; Isolate RC bits
+ la r5,ppLink+4(r3) ; Point to the RC field
+ beqlr-- ; Leave if RC is 0...
+
+mapPhysMergeT:
+ lwarx r6,0,r5 ; Get the RC part
+ or r6,r6,r4 ; Merge in the RC
+ stwcx. r6,0,r5 ; Try to stuff it back...
+ bne-- mapPhysMergeT ; Collision, try again...
+ blr ; Leave...
+
+;
+; Sets the physent link pointer and preserves all flags
+; The list is locked
+; R3 points to physent
+; R4 has link to set
+;
+
+ .align 5
+
+mapPhyCSet32:
+ la r5,ppLink+4(r3) ; Point to the link word
+
+mapPhyCSetR:
+ lwarx r2,0,r5 ; Get the link and flags
+ rlwimi r4,r2,0,ppFlags ; Insert the flags
+ stwcx. r4,0,r5 ; Stick them back
+ bne-- mapPhyCSetR ; Someone else did something, try again...
+ blr ; Return...
+
+ .align 5
+
+mapPhyCSet64:
+ li r0,ppLFAmask ; Get mask to clean up mapping pointer
+ rotrdi r0,r0,ppLFArrot ; Rotate clean up mask to get 0xF0000000000000000F
+
+mapPhyCSet64x:
+ ldarx r2,0,r3 ; Get the link and flags
+ and r5,r2,r0 ; Isolate the flags
+ or r6,r4,r5 ; Add them to the link
+ stdcx. r6,0,r3 ; Stick them back
+ bne-- mapPhyCSet64x ; Someone else did something, try again...
+ blr ; Return...
+
+;
+; mapBumpBusy - increment the busy count on a mapping
+; R3 points to mapping
+;
+
+ .align 5
+
+mapBumpBusy:
+ lwarx r4,0,r3 ; Get mpBusy
+ addis r4,r4,0x0100 ; Bump the busy count
+ stwcx. r4,0,r3 ; Save it back
+ bne-- mapBumpBusy ; This did not work, try again...
+ blr ; Leave...
+
+;
+; mapDropBusy - increment the busy count on a mapping
+; R3 points to mapping
+;
+
+ .globl EXT(mapping_drop_busy)
+ .align 5
+
+LEXT(mapping_drop_busy)
+mapDropBusy:
+ lwarx r4,0,r3 ; Get mpBusy
+ addis r4,r4,0xFF00 ; Drop the busy count
+ stwcx. r4,0,r3 ; Save it back
+ bne-- mapDropBusy ; This did not work, try again...
+ blr ; Leave...
+
+;
+; mapDrainBusy - drain the busy count on a mapping
+; R3 points to mapping
+; Note: we already have a busy for ourselves. Only one
+; busy per processor is allowed, so we just spin here
+; waiting for the count to drop to 1.
+; Also, the mapping can not be on any lists when we do this
+; so all we are doing is waiting until it can be released.
+;
+
+ .align 5
+
+mapDrainBusy:
+ lwz r4,mpFlags(r3) ; Get mpBusy
+ rlwinm r4,r4,8,24,31 ; Clean it up
+ cmplwi r4,1 ; Is is just our busy?
+ beqlr++ ; Yeah, it is clear...
+ b mapDrainBusy ; Try again...
+
+
+
+;
+; handleDSeg - handle a data segment fault
+; handleISeg - handle an instruction segment fault
+;
+; All that we do here is to map these to DSI or ISI and insure
+; that the hash bit is not set. This forces the fault code
+; to also handle the missing segment.
+;
+; At entry R2 contains per_proc, R13 contains savarea pointer,
+; and R11 is the exception code.
+;
+
+ .align 5
+ .globl EXT(handleDSeg)
+
+LEXT(handleDSeg)
+
+ li r11,T_DATA_ACCESS ; Change fault to DSI
+ stw r11,saveexception(r13) ; Change the exception code from seg fault to PTE miss
+ b EXT(handlePF) ; Join common...
+
+ .align 5
+ .globl EXT(handleISeg)
+
+LEXT(handleISeg)
+
+ li r11,T_INSTRUCTION_ACCESS ; Change fault to ISI
+ stw r11,saveexception(r13) ; Change the exception code from seg fault to PTE miss
+ b EXT(handlePF) ; Join common...
+
+
+/*
+ * handlePF - handle a page fault interruption
+ *
+ * At entry R2 contains per_proc, R13 contains savarea pointer,
+ * and R11 is the exception code.
+ *
+ * This first part does a quick check to see if we can handle the fault.
+ * We canot handle any kind of protection exceptions here, so we pass
+ * them up to the next level.
+ *
+ * NOTE: In order for a page-fault redrive to work, the translation miss
+ * bit must be set in the DSISR (or SRR1 for IFETCH). That must occur
+ * before we come here.
+ */
+
+ .align 5
+ .globl EXT(handlePF)
+
+LEXT(handlePF)
+
+ mfsprg r12,2 ; Get feature flags
+ cmplwi r11,T_INSTRUCTION_ACCESS ; See if this is for the instruction
+ lwz r8,savesrr1+4(r13) ; Get the MSR to determine mode
+ mtcrf 0x02,r12 ; move pf64Bit to cr6
+ lis r0,hi16(dsiNoEx|dsiProt|dsiInvMode|dsiAC) ; Get the types that we cannot handle here
+ lwz r18,SAVflags(r13) ; Get the flags
+
+ beq-- gotIfetch ; We have an IFETCH here...
+
+ lwz r27,savedsisr(r13) ; Get the DSISR
+ lwz r29,savedar(r13) ; Get the first half of the DAR
+ lwz r30,savedar+4(r13) ; And second half
+
+ b ckIfProt ; Go check if this is a protection fault...
+
+gotIfetch: andis. r27,r8,hi16(dsiValid) ; Clean this up to construct a DSISR value
+ lwz r29,savesrr0(r13) ; Get the first half of the instruction address
+ lwz r30,savesrr0+4(r13) ; And second half
+ stw r27,savedsisr(r13) ; Save the "constructed" DSISR
+
+ckIfProt: and. r4,r27,r0 ; Is this a non-handlable exception?
+ li r20,64 ; Set a limit of 64 nests for sanity check
+ bne-- hpfExit ; Yes... (probably not though)
+
+;
+; Note: if the RI is on, we are accessing user space from the kernel, therefore we
+; should be loading the user pmap here.
+;
+
+ andi. r0,r8,lo16(MASK(MSR_PR)|MASK(MSR_RI)) ; Are we addressing user or kernel space?
+ lis r8,hi16(EXT(kernel_pmap_phys)) ; Assume kernel
+ mr r19,r2 ; Remember the per_proc
+ ori r8,r8,lo16(EXT(kernel_pmap_phys)) ; Assume kernel (bottom of address)
+ mr r23,r30 ; Save the low part of faulting address
+ beq-- hpfInKern ; Skip if we are in the kernel
+ la r8,ppUserPmap(r19) ; Point to the current user pmap
+
+hpfInKern: mr r22,r29 ; Save the high part of faulting address
+
+ bt-- pf64Bitb,hpf64a ; If 64-bit, skip the next bit...
+
+;
+; On 32-bit machines we emulate a segment exception by loading unused SRs with a
+; predefined value that corresponds to no address space. When we see that value
+; we turn off the PTE miss bit in the DSISR to drive the code later on that will
+; cause the proper SR to be loaded.
+;
+
+ lwz r28,4(r8) ; Pick up the pmap
+ rlwinm. r18,r18,0,SAVredriveb,SAVredriveb ; Was this a redrive?
+ mr r25,r28 ; Save the original pmap (in case we nest)
+ lwz r0,pmapFlags(r28) ; Get pmap's flags
+ bne hpfGVtest ; Segs are not ours if so...
+ mfsrin r4,r30 ; Get the SR that was used for translation
+ cmplwi r4,invalSpace ; Is this a simulated segment fault?
+ bne++ hpfGVtest ; No...
+
+ rlwinm r27,r27,0,dsiMissb+1,dsiMissb-1 ; Clear the PTE miss bit in DSISR
+ b hpfGVtest ; Join on up...
+
+ .align 5
+
+ nop ; Push hpfNest to a 32-byte boundary
+ nop ; Push hpfNest to a 32-byte boundary
+ nop ; Push hpfNest to a 32-byte boundary
+
+hpf64a: ld r28,0(r8) ; Get the pmap pointer (64-bit)
+ mr r25,r28 ; Save the original pmap (in case we nest)
+ lwz r0,pmapFlags(r28) ; Get pmap's flags
+
+hpfGVtest: rlwinm. r0,r0,0,pmapVMgsaa ; Using guest shadow mapping assist?
+ bne hpfGVxlate ; Yup, do accelerated shadow stuff
+
+;
+; This is where we loop descending nested pmaps
+;
+
+hpfNest: la r3,pmapSXlk(r28) ; Point to the pmap search lock
+ addi r20,r20,-1 ; Count nest try
+ bl sxlkShared ; Go get a shared lock on the mapping lists
+ mr. r3,r3 ; Did we get the lock?
+ bne-- hpfBadLock ; Nope...
+
+ mr r3,r28 ; Get the pmap pointer
+ mr r4,r22 ; Get top of faulting vaddr
+ mr r5,r23 ; Get bottom of faulting vaddr
+ bl EXT(mapSearch) ; Go see if we can find it (R7 gets mpFlags)
+
+ rlwinm r0,r7,0,mpRIPb,mpRIPb ; Are we removing this one?
+ mr. r31,r3 ; Save the mapping if we found it
+ cmplwi cr1,r0,0 ; Check for removal
+ crorc cr0_eq,cr0_eq,cr1_eq ; Merge not found and removing
+
+ bt-- cr0_eq,hpfNotFound ; Not found or removing...
+
+ rlwinm r0,r7,0,mpType ; Isolate mapping type
+ cmplwi r0,mpNest ; Are we again nested?
+ cmplwi cr1,r0,mpLinkage ; Are we a linkage type?
+ cror cr0_eq,cr1_eq,cr0_eq ; cr0_eq <- nested or linkage type?
+ mr r26,r7 ; Get the flags for this mapping (passed back from search call)
+
+ lhz r21,mpSpace(r31) ; Get the space
+
+ bne++ hpfFoundIt ; No, we found our guy...
+
+
+#if pmapTransSize != 12
+#error pmapTrans entry size is not 12 bytes!!!!!!!!!!!! It is pmapTransSize
+#endif
+ cmplwi r0,mpLinkage ; Linkage mapping?
+ cmplwi cr1,r20,0 ; Too many nestings?
+ beq-- hpfSpclNest ; Do we need to do special handling?
+
+hpfCSrch: lhz r21,mpSpace(r31) ; Get the space
+ lwz r8,mpNestReloc(r31) ; Get the vaddr relocation
+ lwz r9,mpNestReloc+4(r31) ; Get the vaddr relocation bottom half
+ la r3,pmapSXlk(r28) ; Point to the old pmap search lock
+ lis r0,0x8000 ; Get 0xFFFFFFFF80000000
+ lis r10,hi16(EXT(pmapTrans)) ; Get the translate table
+ add r0,r0,r0 ; Get 0xFFFFFFFF00000000 for 64-bit or 0 for 32-bit
+ blt-- cr1,hpfNestTooMuch ; Too many nestings, must be a loop...
+ or r23,r23,r0 ; Make sure a carry will propagate all the way in 64-bit
+ slwi r11,r21,3 ; Multiply space by 8
+ ori r10,r10,lo16(EXT(pmapTrans)) ; Get the translate table low part
+ addc r23,r23,r9 ; Relocate bottom half of vaddr
+ lwz r10,0(r10) ; Get the actual translation map
+ slwi r12,r21,2 ; Multiply space by 4
+ add r10,r10,r11 ; Add in the higher part of the index
+ rlwinm r23,r23,0,0,31 ; Clean up the relocated address (does nothing in 32-bit)
+ adde r22,r22,r8 ; Relocate the top half of the vaddr
+ add r12,r12,r10 ; Now we are pointing at the space to pmap translation entry
+ bl sxlkUnlock ; Unlock the search list
+
+ bt++ pf64Bitb,hpfGetPmap64 ; Separate handling for 64-bit machines
+ lwz r28,pmapPAddr+4(r12) ; Get the physical address of the new pmap
+ cmplwi r28,0 ; Is the pmap paddr valid?
+ bne+ hpfNest ; Nest into new pmap...
+ b hpfBadPmap ; Handle bad pmap
+
+hpfGetPmap64:
+ ld r28,pmapPAddr(r12) ; Get the physical address of the new pmap
+ cmpldi r28,0 ; Is the pmap paddr valid?
+ bne++ hpfNest ; Nest into new pmap...
+ b hpfBadPmap ; Handle bad pmap
+
+
+;
+; Error condition. We only allow 64 nestings. This keeps us from having to
+; check for recusive nests when we install them.
+;
+
+ .align 5
+
+hpfNestTooMuch:
+ lwz r20,savedsisr(r13) ; Get the DSISR
+ la r3,pmapSXlk(r28) ; Point to the pmap search lock
+ bl sxlkUnlock ; Unlock the search list (R3 good from above)
+ ori r20,r20,1 ; Indicate that there was a nesting problem
+ stw r20,savedsisr(r13) ; Stash it
+ lwz r11,saveexception(r13) ; Restore the exception code
+ b EXT(PFSExit) ; Yes... (probably not though)
+
+;
+; Error condition - lock failed - this is fatal
+;
+
+ .align 5
+
+hpfBadLock:
+ lis r0,hi16(Choke) ; System abend
+ ori r0,r0,lo16(Choke) ; System abend
+ li r3,failMapping ; Show mapping failure
+ sc
+
+;
+; Error condition - space id selected an invalid pmap - fatal
+;
+
+ .align 5
+
+hpfBadPmap:
+ lis r0,hi16(Choke) ; System abend
+ ori r0,r0,lo16(Choke) ; System abend
+ li r3,failPmap ; Show invalid pmap
+ sc
+
+;
+; Did not find any kind of mapping
+;
+
+ .align 5
+
+hpfNotFound:
+ la r3,pmapSXlk(r28) ; Point to the pmap search lock
+ bl sxlkUnlock ; Unlock it
+ lwz r11,saveexception(r13) ; Restore the exception code
+
+hpfExit: ; We need this because we can not do a relative branch
+ b EXT(PFSExit) ; Yes... (probably not though)
+
+
+;
+; Here is where we handle special mappings. So far, the only use is to load a
+; processor specific segment register for copy in/out handling.
+;
+; The only (so far implemented) special map is used for copyin/copyout.
+; We keep a mapping of a "linkage" mapping in the per_proc.
+; The linkage mapping is basically a nested pmap that is switched in
+; as part of context switch. It relocates the appropriate user address
+; space slice into the right place in the kernel.
+;
+
+ .align 5
+
+hpfSpclNest:
+ la r31,ppUMWmp(r19) ; Just point to the mapping
+ oris r27,r27,hi16(dsiLinkage) ; Show that we had a linkage mapping here
+ b hpfCSrch ; Go continue search...
+
+
+;
+; We have now found a mapping for the address we faulted on.
+;
+
+;
+; Here we go about calculating what the VSID should be. We concatanate
+; the space ID (14 bits wide) 3 times. We then slide the vaddr over
+; so that bits 0:35 are in 14:49 (leaves a hole for one copy of the space ID).
+; Then we XOR and expanded space ID and the shifted vaddr. This gives us
+; the VSID.
+;
+; This is used both for segment handling and PTE handling
+;
+
+
+#if maxAdrSpb != 14
+#error maxAdrSpb (address space id size) is not 14 bits!!!!!!!!!!!!
+#endif
+
+; Important non-volatile registers at this point ('home' means the final pmap/mapping found
+; when a multi-level mapping has been successfully searched):
+; r21: home space id number
+; r22: relocated high-order 32 bits of vaddr
+; r23: relocated low-order 32 bits of vaddr
+; r25: pmap physical address
+; r27: dsisr
+; r28: home pmap physical address
+; r29: high-order 32 bits of faulting vaddr
+; r30: low-order 32 bits of faulting vaddr
+; r31: mapping's physical address
+
+ .align 5
+
+hpfFoundIt: lwz r12,pmapFlags(r28) ; Get the pmap flags so we can find the keys for this segment
+hpfGVfound: rlwinm. r0,r27,0,dsiMissb,dsiMissb ; Did we actually miss the segment?
+ rlwinm r15,r23,18,14,17 ; Shift 32:35 (0:3) of vaddr just above space ID
+ rlwinm r20,r21,28,22,31 ; Shift upper 10 bits of space into high order
+ rlwinm r14,r22,18,14,31 ; Shift 0:17 of vaddr over
+ rlwinm r0,r27,0,dsiLinkageb,dsiLinkageb ; Isolate linkage mapping flag
+ rlwimi r21,r21,14,4,17 ; Make a second copy of space above first
+ cmplwi cr5,r0,0 ; Did we just do a special nesting?
+ rlwimi r15,r22,18,0,13 ; Shift 18:31 of vaddr just above shifted 32:35
+ crorc cr0_eq,cr0_eq,cr5_eq ; Force outselves through the seg load code if special nest
+ rlwimi r21,r21,28,0,3 ; Get low order of 3rd copy of space at top of register
+ xor r14,r14,r20 ; Calculate the top half of VSID
+ xor r15,r15,r21 ; Calculate the bottom half of the VSID
+ rlwinm r14,r14,12,15,19 ; Slide the top of the VSID over to correct position (trim for 65 bit addressing)
+ rlwinm r12,r12,9,20,22 ; Isolate and position key for cache entry
+ rlwimi r14,r15,12,20,31 ; Slide top of bottom of VSID over into the top
+ rlwinm r15,r15,12,0,19 ; Slide the last nybble into the low order segment position
+ or r12,r12,r15 ; Add key into the bottom of VSID
+;
+; Note: ESID is in R22:R23 pair; VSID is in R14:R15; cache form VSID is R14:R12
+
+ bne++ hpfPteMiss ; Nope, normal PTE miss...
+
+;
+; Here is the only place that we make an entry in the pmap segment cache.
+;
+; Note that we do not make an entry in the segment cache for special
+; nested mappings. This makes the copy in/out segment get refreshed
+; when switching threads.
+;
+; The first thing that we do is to look up the ESID we are going to load
+; into a segment in the pmap cache. If it is already there, this is
+; a segment that appeared since the last time we switched address spaces.
+; If all is correct, then it was another processors that made the cache
+; entry. If not, well, it is an error that we should die on, but I have
+; not figured a good way to trap it yet.
+;
+; If we get a hit, we just bail, otherwise, lock the pmap cache, select
+; an entry based on the generation number, update the cache entry, and
+; also update the pmap sub-tag as well. The sub-tag is a table of 4 bit
+; entries that correspond to the last 4 bits (32:35 for 64-bit and
+; 0:3 for 32-bit) of the ESID.
+;
+; Then we unlock and bail.
+;
+; First lock it. Then select a free slot or steal one based on the generation
+; number. Then store it, update the allocation flags, and unlock.
+;
+; The cache entry contains an image of the ESID/VSID pair we would load for
+; 64-bit architecture. For 32-bit, it is a simple transform to an SR image.
+;
+; Remember, this cache entry goes in the ORIGINAL pmap (saved in R25), not
+; the current one, which may have changed because we nested.
+;
+; Also remember that we do not store the valid bit in the ESID. If we
+; od, this will break some other stuff.
+;
+
+ bne-- cr5,hpfNoCacheEnt2 ; Skip the cache entry if this is a "special nest" fault....
+
+ mr r3,r25 ; Point to the pmap
+ mr r4,r29 ; ESID high half
+ mr r5,r30 ; ESID low half
+ bl pmapCacheLookup ; Go see if this is in the cache already
+
+ mr. r3,r3 ; Did we find it?
+ mr r4,r11 ; Copy this to a different register
+
+ bne-- hpfNoCacheEnt ; Yes, we found it, no need to make another entry...
+
+ lwz r10,pmapSCSubTag(r25) ; Get the first part of the sub-tag lookup table
+ lwz r11,pmapSCSubTag+4(r25) ; Get the second part of the sub-tag lookup table
+
+ cntlzw r7,r4 ; Find a free slot
+
+ subi r6,r7,pmapSegCacheUse ; We end up with a negative if we find one
+ rlwinm r30,r30,0,0,3 ; Clean up the ESID
+ srawi r6,r6,31 ; Get 0xFFFFFFFF if we have one, 0 if not
+ addi r5,r4,1 ; Bump the generation number
+ and r7,r7,r6 ; Clear bit number if none empty
+ andc r8,r4,r6 ; Clear generation count if we found an empty
+ rlwimi r4,r5,0,17,31 ; Insert the new generation number into the control word
+ or r7,r7,r8 ; Select a slot number
+ li r8,0 ; Clear
+ andi. r7,r7,pmapSegCacheUse-1 ; Wrap into the number we are using
+ oris r8,r8,0x8000 ; Get the high bit on
+ la r9,pmapSegCache(r25) ; Point to the segment cache
+ slwi r6,r7,4 ; Get index into the segment cache
+ slwi r2,r7,2 ; Get index into the segment cache sub-tag index
+ srw r8,r8,r7 ; Get the mask
+ cmplwi r2,32 ; See if we are in the first or second half of sub-tag
+ li r0,0 ; Clear
+ rlwinm r2,r2,0,27,31 ; Wrap shift so we do not shift cache entries 8-F out
+ oris r0,r0,0xF000 ; Get the sub-tag mask
+ add r9,r9,r6 ; Point to the cache slot
+ srw r0,r0,r2 ; Slide sub-tag mask to right slot (shift work for either half)
+ srw r5,r30,r2 ; Slide sub-tag to right slot (shift work for either half)
+
+ stw r29,sgcESID(r9) ; Save the top of the ESID
+ andc r10,r10,r0 ; Clear sub-tag slot in case we are in top
+ andc r11,r11,r0 ; Clear sub-tag slot in case we are in bottom
+ stw r30,sgcESID+4(r9) ; Save the bottom of the ESID
+ or r10,r10,r5 ; Stick in subtag in case top half
+ or r11,r11,r5 ; Stick in subtag in case bottom half
+ stw r14,sgcVSID(r9) ; Save the top of the VSID
+ andc r4,r4,r8 ; Clear the invalid bit for the slot we just allocated
+ stw r12,sgcVSID+4(r9) ; Save the bottom of the VSID and the key
+ bge hpfSCSTbottom ; Go save the bottom part of sub-tag
+
+ stw r10,pmapSCSubTag(r25) ; Save the top of the sub-tag
+ b hpfNoCacheEnt ; Go finish up...
+
+hpfSCSTbottom:
+ stw r11,pmapSCSubTag+4(r25) ; Save the bottom of the sub-tag
+
+
+hpfNoCacheEnt:
+ eieio ; Make sure cache is updated before lock
+ stw r4,pmapCCtl(r25) ; Unlock, allocate, and bump generation number
+
+
+hpfNoCacheEnt2:
+ lwz r4,ppMapFlags(r19) ; Get the protection key modifier
+ bt++ pf64Bitb,hpfLoadSeg64 ; If 64-bit, go load the segment...
+
+;
+; Make and enter 32-bit segment register
+;
+
+ lwz r16,validSegs(r19) ; Get the valid SR flags
+ xor r12,r12,r4 ; Alter the storage key before loading segment register
+ rlwinm r2,r30,4,28,31 ; Isolate the segment we are setting
+ rlwinm r6,r12,19,1,3 ; Insert the keys and N bit
+ lis r0,0x8000 ; Set bit 0
+ rlwimi r6,r12,20,12,31 ; Insert 4:23 the VSID
+ srw r0,r0,r2 ; Get bit corresponding to SR
+ rlwimi r6,r14,20,8,11 ; Get the last nybble of the SR contents
+ or r16,r16,r0 ; Show that SR is valid
+
+ mtsrin r6,r30 ; Set the actual SR
+
+ stw r16,validSegs(r19) ; Set the valid SR flags
+
+ b hpfPteMiss ; SR loaded, go do a PTE...
+
+;
+; Make and enter 64-bit segment look-aside buffer entry.
+; Note that the cache entry is the right format except for valid bit.
+; We also need to convert from long long to 64-bit register values.
+;
+
+
+ .align 5
+
+hpfLoadSeg64:
+ ld r16,validSegs(r19) ; Get the valid SLB entry flags
+ sldi r8,r29,32 ; Move high order address over
+ sldi r10,r14,32 ; Move high part of VSID over
+
+ not r3,r16 ; Make valids be 0s
+ li r0,1 ; Prepare to set bit 0
+
+ cntlzd r17,r3 ; Find a free SLB
+ xor r12,r12,r4 ; Alter the storage key before loading segment table entry
+ or r9,r8,r30 ; Form full 64-bit address
+ cmplwi r17,63 ; Did we find a free SLB entry?
+ sldi r0,r0,63 ; Get bit 0 set
+ or r10,r10,r12 ; Move in low part and keys
+ addi r17,r17,1 ; Skip SLB 0 always
+ blt++ hpfFreeSeg ; Yes, go load it...
+
+;
+; No free SLB entries, select one that is in use and invalidate it
+;
+ lwz r4,ppSegSteal(r19) ; Get the next slot to steal
+ addi r17,r4,pmapSegCacheUse+1 ; Select stealee from non-cached slots only
+ addi r4,r4,1 ; Set next slot to steal
+ slbmfee r7,r17 ; Get the entry that is in the selected spot
+ subi r2,r4,63-pmapSegCacheUse ; Force steal to wrap
+ rldicr r7,r7,0,35 ; Clear the valid bit and the rest
+ srawi r2,r2,31 ; Get -1 if steal index still in range
+ slbie r7 ; Invalidate the in-use SLB entry
+ and r4,r4,r2 ; Reset steal index when it should wrap
+ isync ;
+
+ stw r4,ppSegSteal(r19) ; Set the next slot to steal
+;
+; We are now ready to stick the SLB entry in the SLB and mark it in use
+;
+
+hpfFreeSeg:
+ subi r4,r17,1 ; Adjust shift to account for skipping slb 0
+ mr r7,r9 ; Get a copy of the ESID with bits 36:63 clear
+ srd r0,r0,r4 ; Set bit mask for allocation
+ oris r9,r9,0x0800 ; Turn on the valid bit
+ or r16,r16,r0 ; Turn on the allocation flag
+ rldimi r9,r17,0,58 ; Copy in the SLB entry selector
+
+ beq++ cr5,hpfNoBlow ; Skip blowing away the SLBE if this is not a special nest...
+ slbie r7 ; Blow away a potential duplicate
+
+hpfNoBlow: slbmte r10,r9 ; Make that SLB entry
+
+ std r16,validSegs(r19) ; Mark as valid
+ b hpfPteMiss ; STE loaded, go do a PTE...
+
+;
+; The segment has been set up and loaded if need be. Now we are ready to build the
+; PTE and get it into the hash table.
+;
+; Note that there is actually a race here. If we start fault processing on
+; a different pmap, i.e., we have descended into a nested pmap, it is possible
+; that the nest could have been removed from the original pmap. We would
+; succeed with this translation anyway. I do not think we need to worry
+; about this (famous last words) because nobody should be unnesting anything
+; if there are still people activily using them. It should be up to the
+; higher level VM system to put the kibosh on this.
+;
+; There is also another race here: if we fault on the same mapping on more than
+; one processor at the same time, we could end up with multiple PTEs for the same
+; mapping. This is not a good thing.... We really only need one of the
+; fault handlers to finish, so what we do is to set a "fault in progress" flag in
+; the mapping. If we see that set, we just abandon the handler and hope that by
+; the time we restore context and restart the interrupted code, the fault has
+; been resolved by the other guy. If not, we will take another fault.
+;
+
+;
+; NOTE: IMPORTANT - CR7 contains a flag indicating if we have a block mapping or not.
+; It is required to stay there until after we call mapSelSlot!!!!
+;
+
+ .align 5
+
+hpfPteMiss: lwarx r0,0,r31 ; Load the mapping flag field
+ lwz r12,mpPte(r31) ; Get the quick pointer to PTE
+ li r3,mpHValid ; Get the PTE valid bit
+ andi. r2,r0,lo16(mpFIP) ; Are we handling a fault on the other side?
+ ori r2,r0,lo16(mpFIP) ; Set the fault in progress flag
+ crnot cr1_eq,cr0_eq ; Remember if FIP was on
+ and. r12,r12,r3 ; Isolate the valid bit
+ crorc cr0_eq,cr1_eq,cr0_eq ; Bail if FIP is on. Then, if already have PTE, bail...
+ beq-- hpfAbandon ; Yes, other processor is or already has handled this...
+ rlwinm r0,r2,0,mpType ; Isolate mapping type
+ cmplwi r0,mpBlock ; Is this a block mapping?
+ crnot cr7_eq,cr0_eq ; Remember if we have a block mapping
+ stwcx. r2,0,r31 ; Store the flags
+ bne-- hpfPteMiss ; Collision, try again...
+
+ bt++ pf64Bitb,hpfBldPTE64 ; Skip down to the 64 bit stuff...
+
+;
+; At this point we are about to do the 32-bit PTE generation.
+;
+; The following is the R14:R15 pair that contains the "shifted" VSID:
+;
+; 1 2 3 4 4 5 6
+; 0 8 6 4 2 0 8 6 3
+; +--------+--------+--------+--------+--------+--------+--------+--------+
+; |00000000|0000000V|VVVVVVVV|VVVVVVVV|VVVVVVVV|VVVVVVVV|VVVV////|////////|
+; +--------+--------+--------+--------+--------+--------+--------+--------+
+;
+; The 24 bits of the 32-bit architecture VSID is in the following:
+;
+; 1 2 3 4 4 5 6
+; 0 8 6 4 2 0 8 6 3
+; +--------+--------+--------+--------+--------+--------+--------+--------+
+; |////////|////////|////////|////VVVV|VVVVVVVV|VVVVVVVV|VVVV////|////////|
+; +--------+--------+--------+--------+--------+--------+--------+--------+
+;
+
+
+hpfBldPTE32:
+ lwz r25,mpVAddr+4(r31) ; Grab the base virtual address for the mapping (32-bit portion)
+ lwz r24,mpPAddr(r31) ; Grab the base physical page number for the mapping
+
+ mfsdr1 r27 ; Get the hash table base address
+
+ rlwinm r0,r23,0,4,19 ; Isolate just the page index
+ rlwinm r18,r23,10,26,31 ; Extract the API
+ xor r19,r15,r0 ; Calculate hash << 12
+ mr r2,r25 ; Save the flag part of the mapping
+ rlwimi r18,r14,27,1,4 ; Move bits 28:31 of the "shifted" VSID into the PTE image
+ rlwinm r16,r27,16,7,15 ; Extract the hash table size
+ rlwinm r25,r25,0,0,19 ; Clear out the flags
+ slwi r24,r24,12 ; Change ppnum to physical address (note: 36-bit addressing no supported)
+ sub r25,r23,r25 ; Get offset in mapping to page (0 unless block map)
+ ori r16,r16,lo16(0xFFC0) ; Slap in the bottom of the mask
+ rlwinm r27,r27,0,0,15 ; Extract the hash table base
+ rlwinm r19,r19,26,6,25 ; Shift hash over to make offset into hash table
+ add r24,r24,r25 ; Adjust to true physical address
+ rlwimi r18,r15,27,5,24 ; Move bits 32:31 of the "shifted" VSID into the PTE image
+ rlwimi r24,r2,0,20,31 ; Slap in the WIMG and prot
+ and r19,r19,r16 ; Wrap hash table offset into the hash table
+ ori r24,r24,lo16(mpR) ; Turn on the reference bit right now
+ rlwinm r20,r19,28,10,29 ; Shift hash over to make offset into PCA
+ add r19,r19,r27 ; Point to the PTEG
+ subfic r20,r20,-4 ; Get negative offset to PCA
+ oris r18,r18,lo16(0x8000) ; Make sure the valid bit is on
+ add r20,r20,r27 ; Point to the PCA slot
+
+;
+; We now have a valid PTE pair in R18/R24. R18 is PTE upper and R24 is PTE lower.
+; R19 contains the offset of the PTEG in the hash table. R20 has offset into the PCA.
+;
+; We need to check PTE pointer (mpPte) again after we lock the PTEG. It is possible
+; that some other processor beat us and stuck in a PTE or that
+; all we had was a simple segment exception and the PTE was there the whole time.
+; If we find one a pointer, we are done.
+;
+
+ mr r7,r20 ; Copy the PCA pointer
+ bl mapLockPteg ; Lock the PTEG
+
+ lwz r12,mpPte(r31) ; Get the offset to the PTE
+ mr r17,r6 ; Remember the PCA image
+ mr r16,r6 ; Prime the post-select PCA image
+ andi. r0,r12,mpHValid ; Is there a PTE here already?
+ li r21,8 ; Get the number of slots
+
+ bne- cr7,hpfNoPte32 ; Skip this for a block mapping...
+
+ bne- hpfBailOut ; Someone already did this for us...
+
+;
+; The mapSelSlot function selects a PTEG slot to use. As input, it uses R6 as a
+; pointer to the PCA. When it returns, R3 contains 0 if an unoccupied slot was
+; selected, 1 if it stole a non-block PTE, or 2 if it stole a block mapped PTE.
+; R4 returns the slot index.
+;
+; REMEMBER: CR7 indicates that we are building a block mapping.
+;
+
+hpfNoPte32: subic. r21,r21,1 ; See if we have tried all slots
+ mr r6,r17 ; Get back the original PCA
+ rlwimi r6,r16,0,8,15 ; Insert the updated steal slot
+ blt- hpfBailOut ; Holy Cow, all slots are locked...
+
+ bl mapSelSlot ; Go select a slot (note that the PCA image is already set up)
+
+ cmplwi cr5,r3,1 ; Did we steal a slot?
+ rlwimi r19,r4,3,26,28 ; Insert PTE index into PTEG address yielding PTE address
+ mr r16,r6 ; Remember the PCA image after selection
+ blt+ cr5,hpfInser32 ; Nope, no steal...
+
+ lwz r6,0(r19) ; Get the old PTE
+ lwz r7,4(r19) ; Get the real part of the stealee
+ rlwinm r6,r6,0,1,31 ; Clear the valid bit
+ bgt cr5,hpfNipBM ; Do not try to lock a non-existant physent for a block mapping...
+ srwi r3,r7,12 ; Change phys address to a ppnum
+ bl mapFindPhyTry ; Go find and try to lock physent (note: if R3 is 0, there is no physent for this page)
+ cmplwi cr1,r3,0 ; Check if this is in RAM
+ bne- hpfNoPte32 ; Could not get it, try for another...
+
+ crmove cr5_gt,cr1_eq ; If we did not find a physent, pretend that this is a block map
+
+hpfNipBM: stw r6,0(r19) ; Set the invalid PTE
+
+ sync ; Make sure the invalid is stored
+ li r9,tlbieLock ; Get the TLBIE lock
+ rlwinm r10,r6,21,0,3 ; Shift last 4 bits of space to segment part
+
+hpfTLBIE32: lwarx r0,0,r9 ; Get the TLBIE lock
+ mfsprg r4,0 ; Get the per_proc
+ rlwinm r8,r6,25,18,31 ; Extract the space ID
+ rlwinm r11,r6,25,18,31 ; Extract the space ID
+ lwz r7,hwSteals(r4) ; Get the steal count
+ srwi r2,r6,7 ; Align segment number with hash
+ rlwimi r11,r11,14,4,17 ; Get copy above ourselves
+ mr. r0,r0 ; Is it locked?
+ srwi r0,r19,6 ; Align PTEG offset for back hash
+ xor r2,r2,r11 ; Get the segment number (plus a whole bunch of extra bits)
+ xor r11,r11,r0 ; Hash backwards to partial vaddr
+ rlwinm r12,r2,14,0,3 ; Shift segment up
+ mfsprg r2,2 ; Get feature flags
+ li r0,1 ; Get our lock word
+ rlwimi r12,r6,22,4,9 ; Move up the API
+ bne- hpfTLBIE32 ; It is locked, go wait...
+ rlwimi r12,r11,12,10,19 ; Move in the rest of the vaddr
+
+ stwcx. r0,0,r9 ; Try to get it
+ bne- hpfTLBIE32 ; We was beat...
+ addi r7,r7,1 ; Bump the steal count
+
+ rlwinm. r0,r2,0,pfSMPcapb,pfSMPcapb ; Can this be an MP box?
+ li r0,0 ; Lock clear value
+
+ tlbie r12 ; Invalidate it everywhere
+
+
+ beq- hpfNoTS32 ; Can not have MP on this machine...
+
+ eieio ; Make sure that the tlbie happens first
+ tlbsync ; Wait for everyone to catch up
+ sync ; Make sure of it all
+
+hpfNoTS32: stw r0,tlbieLock(0) ; Clear the tlbie lock
+
+ stw r7,hwSteals(r4) ; Save the steal count
+ bgt cr5,hpfInser32 ; We just stole a block mapping...
+
+ lwz r4,4(r19) ; Get the RC of the just invalidated PTE
+
+ la r11,ppLink+4(r3) ; Point to the master RC copy
+ lwz r7,ppLink+4(r3) ; Grab the pointer to the first mapping
+ rlwinm r2,r4,27,ppRb-32,ppCb-32 ; Position the new RC
+
+hpfMrgRC32: lwarx r0,0,r11 ; Get the master RC
+ or r0,r0,r2 ; Merge in the new RC
+ stwcx. r0,0,r11 ; Try to stick it back
+ bne- hpfMrgRC32 ; Try again if we collided...
+
+
+hpfFPnch: rlwinm. r7,r7,0,~ppFlags ; Clean and test mapping address
+ beq- hpfLostPhys ; We could not find our mapping. Kick the bucket...
+
+ lhz r10,mpSpace(r7) ; Get the space
+ lwz r9,mpVAddr+4(r7) ; And the vaddr
+ cmplw cr1,r10,r8 ; Is this one of ours?
+ xor r9,r12,r9 ; Compare virtual address
+ cmplwi r9,0x1000 ; See if we really match
+ crand cr0_eq,cr1_eq,cr0_lt ; See if both space and vaddr match
+ beq+ hpfFPnch2 ; Yes, found ours...
+
+ lwz r7,mpAlias+4(r7) ; Chain on to the next
+ b hpfFPnch ; Check it out...
+
+hpfFPnch2: sub r0,r19,r27 ; Get offset to the PTEG
+ stw r0,mpPte(r7) ; Invalidate the quick pointer (keep quick pointer pointing to PTEG)
+ bl mapPhysUnlock ; Unlock the physent now
+
+hpfInser32: oris r18,r18,lo16(0x8000) ; Make sure the valid bit is on
+
+ stw r24,4(r19) ; Stuff in the real part of the PTE
+ eieio ; Make sure this gets there first
+
+ stw r18,0(r19) ; Stuff the virtual part of the PTE and make it valid
+ mr r17,r16 ; Get the PCA image to save
+ b hpfFinish ; Go join the common exit code...
+
+
+;
+; At this point we are about to do the 64-bit PTE generation.
+;
+; The following is the R14:R15 pair that contains the "shifted" VSID:
+;
+; 1 2 3 4 4 5 6
+; 0 8 6 4 2 0 8 6 3
+; +--------+--------+--------+--------+--------+--------+--------+--------+
+; |00000000|0000000V|VVVVVVVV|VVVVVVVV|VVVVVVVV|VVVVVVVV|VVVV////|////////|
+; +--------+--------+--------+--------+--------+--------+--------+--------+
+;
+;
+
+ .align 5
+
+hpfBldPTE64:
+ ld r10,mpVAddr(r31) ; Grab the base virtual address for the mapping
+ lwz r24,mpPAddr(r31) ; Grab the base physical page number for the mapping
+
+ mfsdr1 r27 ; Get the hash table base address
+
+ sldi r11,r22,32 ; Slide top of adjusted EA over
+ sldi r14,r14,32 ; Slide top of VSID over
+ rlwinm r5,r27,0,27,31 ; Isolate the size
+ eqv r16,r16,r16 ; Get all foxes here
+ rlwimi r15,r23,16,20,24 ; Stick in EA[36:40] to make AVPN
+ mr r2,r10 ; Save the flag part of the mapping
+ or r11,r11,r23 ; Stick in bottom of adjusted EA for full 64-bit value
+ rldicr r27,r27,0,45 ; Clean up the hash table base
+ or r15,r15,r14 ; Stick in bottom of AVPN for full 64-bit value
+ rlwinm r0,r11,0,4,19 ; Clear out everything but the page
+ subfic r5,r5,46 ; Get number of leading zeros
+ xor r19,r0,r15 ; Calculate hash
+ ori r15,r15,1 ; Turn on valid bit in AVPN to make top of PTE
+ srd r16,r16,r5 ; Shift over to get length of table
+ srdi r19,r19,5 ; Convert page offset to hash table offset
+ rldicr r16,r16,0,56 ; Clean up lower bits in hash table size
+ rldicr r10,r10,0,51 ; Clear out flags
+ sldi r24,r24,12 ; Change ppnum to physical address
+ sub r11,r11,r10 ; Get the offset from the base mapping
+ and r19,r19,r16 ; Wrap into hash table
+ add r24,r24,r11 ; Get actual physical address of this page
+ srdi r20,r19,5 ; Convert PTEG offset to PCA offset
+ rldimi r24,r2,0,52 ; Insert the keys, WIMG, RC, etc.
+ subfic r20,r20,-4 ; Get negative offset to PCA
+ ori r24,r24,lo16(mpR) ; Force on the reference bit
+ add r20,r20,r27 ; Point to the PCA slot
+ add r19,r19,r27 ; Point to the PTEG
+
+;
+; We now have a valid PTE pair in R15/R24. R15 is PTE upper and R24 is PTE lower.
+; R19 contains the offset of the PTEG in the hash table. R20 has offset into the PCA.
+;
+; We need to check PTE pointer (mpPte) again after we lock the PTEG. It is possible
+; that some other processor beat us and stuck in a PTE or that
+; all we had was a simple segment exception and the PTE was there the whole time.
+; If we find one a pointer, we are done.
+;
+
+ mr r7,r20 ; Copy the PCA pointer
+ bl mapLockPteg ; Lock the PTEG
+
+ lwz r12,mpPte(r31) ; Get the offset to the PTE
+ mr r17,r6 ; Remember the PCA image
+ mr r18,r6 ; Prime post-selection PCA image
+ andi. r0,r12,mpHValid ; See if we have a PTE now
+ li r21,8 ; Get the number of slots
+
+ bne-- cr7,hpfNoPte64 ; Skip this for a block mapping...
+
+ bne-- hpfBailOut ; Someone already did this for us...
+
+;
+; The mapSelSlot function selects a PTEG slot to use. As input, it uses R3 as a
+; pointer to the PCA. When it returns, R3 contains 0 if an unoccupied slot was
+; selected, 1 if it stole a non-block PTE, or 2 if it stole a block mapped PTE.
+; R4 returns the slot index.
+;
+; REMEMBER: CR7 indicates that we are building a block mapping.
+;
+
+hpfNoPte64: subic. r21,r21,1 ; See if we have tried all slots
+ mr r6,r17 ; Restore original state of PCA
+ rlwimi r6,r18,0,8,15 ; Insert the updated steal slot
+ blt- hpfBailOut ; Holy Cow, all slots are locked...
+
+ bl mapSelSlot ; Go select a slot
+
+ cmplwi cr5,r3,1 ; Did we steal a slot?
+ mr r18,r6 ; Remember the PCA image after selection
+ insrdi r19,r4,3,57 ; Insert slot index into PTEG address bits 57:59, forming the PTE address
+ lwz r10,hwSteals(r2) ; Get the steal count
+ blt++ cr5,hpfInser64 ; Nope, no steal...
+
+ ld r6,0(r19) ; Get the old PTE
+ ld r7,8(r19) ; Get the real part of the stealee
+ rldicr r6,r6,0,62 ; Clear the valid bit
+ bgt cr5,hpfNipBMx ; Do not try to lock a non-existant physent for a block mapping...
+ srdi r3,r7,12 ; Change page address to a page address
+ bl mapFindPhyTry ; Go find and try to lock physent (note: if R3 is 0, there is no physent for this page)
+ cmplwi cr1,r3,0 ; Check if this is in RAM
+ bne-- hpfNoPte64 ; Could not get it, try for another...
+
+ crmove cr5_gt,cr1_eq ; If we did not find a physent, pretend that this is a block map
+
+hpfNipBMx: std r6,0(r19) ; Set the invalid PTE
+ li r9,tlbieLock ; Get the TLBIE lock
+
+ srdi r11,r6,5 ; Shift VSID over for back hash
+ mfsprg r4,0 ; Get the per_proc
+ xor r11,r11,r19 ; Hash backwards to get low bits of VPN
+ sync ; Make sure the invalid is stored
+
+ sldi r12,r6,16 ; Move AVPN to EA position
+ sldi r11,r11,5 ; Move this to the page position
+
+hpfTLBIE64: lwarx r0,0,r9 ; Get the TLBIE lock
+ mr. r0,r0 ; Is it locked?
+ li r0,1 ; Get our lock word
+ bne-- hpfTLBIE65 ; It is locked, go wait...
+
+ stwcx. r0,0,r9 ; Try to get it
+ rldimi r12,r11,0,41 ; Stick the low part of the page number into the AVPN
+ rldicl r8,r6,52,50 ; Isolate the address space ID
+ bne-- hpfTLBIE64 ; We was beat...
+ addi r10,r10,1 ; Bump the steal count
+
+ rldicl r11,r12,0,16 ; Clear cause the book says so
+ li r0,0 ; Lock clear value
+
+ tlbie r11 ; Invalidate it everywhere
+
+ mr r7,r8 ; Get a copy of the space ID
+ eieio ; Make sure that the tlbie happens first
+ rldimi r7,r7,14,36 ; Copy address space to make hash value
+ tlbsync ; Wait for everyone to catch up
+ rldimi r7,r7,28,22 ; Add in a 3rd copy of the hash up top
+ srdi r2,r6,26 ; Shift original segment down to bottom
+
+ ptesync ; Make sure of it all
+ xor r7,r7,r2 ; Compute original segment
+ stw r0,tlbieLock(0) ; Clear the tlbie lock
+
+ stw r10,hwSteals(r4) ; Save the steal count
+ bgt cr5,hpfInser64 ; We just stole a block mapping...
+
+ rldimi r12,r7,28,0 ; Insert decoded segment
+ rldicl r4,r12,0,13 ; Trim to max supported address
+
+ ld r12,8(r19) ; Get the RC of the just invalidated PTE
+
+ la r11,ppLink+4(r3) ; Point to the master RC copy
+ ld r7,ppLink(r3) ; Grab the pointer to the first mapping
+ rlwinm r2,r12,27,ppRb-32,ppCb-32 ; Position the new RC
+
+hpfMrgRC64: lwarx r0,0,r11 ; Get the master RC
+ li r12,ppLFAmask ; Get mask to clean up alias pointer
+ or r0,r0,r2 ; Merge in the new RC
+ rotrdi r12,r12,ppLFArrot ; Rotate clean up mask to get 0xF0000000000000000F
+ stwcx. r0,0,r11 ; Try to stick it back
+ bne-- hpfMrgRC64 ; Try again if we collided...
+
+hpfFPnchx: andc. r7,r7,r12 ; Clean and test mapping address
+ beq-- hpfLostPhys ; We could not find our mapping. Kick the bucket...
+
+ lhz r10,mpSpace(r7) ; Get the space
+ ld r9,mpVAddr(r7) ; And the vaddr
+ cmplw cr1,r10,r8 ; Is this one of ours?
+ xor r9,r4,r9 ; Compare virtual address
+ cmpldi r9,0x1000 ; See if we really match
+ crand cr0_eq,cr1_eq,cr0_lt ; See if both space and vaddr match
+ beq++ hpfFPnch2x ; Yes, found ours...
+
+ ld r7,mpAlias(r7) ; Chain on to the next
+ b hpfFPnchx ; Check it out...
+
+ .align 5
+
+hpfTLBIE65: li r7,lgKillResv ; Point to the reservatio kill area
+ stwcx. r7,0,r7 ; Kill reservation
+
+hpfTLBIE63: lwz r0,0(r9) ; Get the TLBIE lock
+ mr. r0,r0 ; Is it locked?
+ beq++ hpfTLBIE64 ; Yup, wait for it...
+ b hpfTLBIE63 ; Nope, try again..
+
+
+
+hpfFPnch2x: sub r0,r19,r27 ; Get offset to PTEG
+ stw r0,mpPte(r7) ; Invalidate the quick pointer (keep pointing at PTEG though)
+ bl mapPhysUnlock ; Unlock the physent now
+
+
+hpfInser64: std r24,8(r19) ; Stuff in the real part of the PTE
+ eieio ; Make sure this gets there first
+ std r15,0(r19) ; Stuff the virtual part of the PTE and make it valid
+ mr r17,r18 ; Get the PCA image to set
+ b hpfFinish ; Go join the common exit code...
+
+hpfLostPhys:
+ lis r0,hi16(Choke) ; System abend - we must find the stolen mapping or we are dead
+ ori r0,r0,lo16(Choke) ; System abend
+ sc
+
+;
+; This is the common code we execute when we are finished setting up the PTE.
+;
+
+ .align 5
+
+hpfFinish: sub r4,r19,r27 ; Get offset of PTE
+ ori r4,r4,lo16(mpHValid) ; Add valid bit to PTE offset
+ bne cr7,hpfBailOut ; Do not set the PTE pointer for a block map
+ stw r4,mpPte(r31) ; Remember our PTE
+
+hpfBailOut: eieio ; Make sure all updates come first
+ stw r17,0(r20) ; Unlock and set the final PCA
+
+;
+; This is where we go if we have started processing the fault, but find that someone
+; else has taken care of it.
+;
+
+hpfIgnore: lwz r2,mpFlags(r31) ; Get the mapping flags
+ rlwinm r2,r2,0,mpFIPb+1,mpFIPb-1 ; Clear the "fault in progress" flag
+ sth r2,mpFlags+2(r31) ; Set it
+
+ la r3,pmapSXlk(r28) ; Point to the pmap search lock
+ bl sxlkUnlock ; Unlock the search list
+
+ li r11,T_IN_VAIN ; Say that it was handled
+ b EXT(PFSExit) ; Leave...
+
+;
+; This is where we go when we find that someone else
+; is in the process of handling the fault.
+;
+
+hpfAbandon: li r3,lgKillResv ; Kill off any reservation
+ stwcx. r3,0,r3 ; Do it
+
+ la r3,pmapSXlk(r28) ; Point to the pmap search lock
+ bl sxlkUnlock ; Unlock the search list
+
+ li r11,T_IN_VAIN ; Say that it was handled
+ b EXT(PFSExit) ; Leave...
+
+;
+; Guest shadow assist -- page fault handler
+;
+; Here we handle a fault in a guest pmap that has the guest shadow mapping
+; assist active. We locate the VMM pmap extension block, which contains an
+; index over the discontiguous multi-page shadow hash table. The index
+; corresponding to our vaddr is selected, and the selected group within
+; that page is searched for a valid and active entry that contains
+; our vaddr and space id. The search is pipelined, so that we may fetch
+; the next slot while examining the current slot for a hit. The final
+; search iteration is unrolled so that we don't fetch beyond the end of
+; our group, which could have dire consequences depending upon where the
+; physical hash page is located.
+;
+; The VMM pmap extension block occupies a page. Begining at offset 0, we
+; have the pmap_vmm_ext proper. Aligned at the first 128-byte boundary
+; after the pmap_vmm_ext is the hash table physical address index, a
+; linear list of 64-bit physical addresses of the pages that comprise
+; the hash table.
+;
+; In the event that we succesfully locate a guest mapping, we re-join
+; the page fault path at hpfGVfound with the mapping's address in r31;
+; otherwise, we re-join at hpfNotFound. In either case, we re-join holding
+; a share of the pmap search lock for the host pmap with the host pmap's
+; address in r28, the guest pmap's space id in r21, and the guest pmap's
+; flags in r12.
+;
+
+ .align 5
+hpfGVxlate:
+ bt pf64Bitb,hpfGV64 ; Take 64-bit path for 64-bit machine
+
+ lwz r11,pmapVmmExtPhys+4(r28) ; r11 <- VMM pmap extension block paddr
+ lwz r12,pmapFlags(r28) ; r12 <- guest pmap's flags
+ lwz r21,pmapSpace(r28) ; r21 <- guest space ID number
+ lwz r28,vmxHostPmapPhys+4(r11) ; r28 <- host pmap's paddr
+ la r31,VMX_HPIDX_OFFSET(r11) ; r31 <- base of hash page physical index
+ rlwinm r10,r30,0,0xFFFFF000 ; r10 <- page-aligned guest vaddr
+ lwz r6,vxsGpf(r11) ; Get guest fault count
+
+ srwi r3,r10,12 ; Form shadow hash:
+ xor r3,r3,r21 ; spaceID ^ (vaddr >> 12)
+ rlwinm r4,r3,GV_HPAGE_SHIFT,GV_HPAGE_MASK
+ ; Form index offset from hash page number
+ add r31,r31,r4 ; r31 <- hash page index entry
+ lwz r31,4(r31) ; r31 <- hash page paddr
+ rlwimi r31,r3,GV_HGRP_SHIFT,GV_HGRP_MASK
+ ; r31 <- hash group paddr
+
+ la r3,pmapSXlk(r28) ; Point to the host pmap's search lock
+ bl sxlkShared ; Go get a shared lock on the mapping lists
+ mr. r3,r3 ; Did we get the lock?
+ bne- hpfBadLock ; Nope...
+
+ lwz r3,mpFlags(r31) ; r3 <- 1st mapping slot's flags
+ lhz r4,mpSpace(r31) ; r4 <- 1st mapping slot's space ID
+ lwz r5,mpVAddr+4(r31) ; r5 <- 1st mapping slot's virtual address
+ addi r6,r6,1 ; Increment guest fault count
+ li r0,(GV_SLOTS - 1) ; Prepare to iterate over mapping slots
+ mtctr r0 ; in this group
+ stw r6,vxsGpf(r11) ; Update guest fault count
+ b hpfGVlp32
+
+ .align 5
+hpfGVlp32:
+ mr r6,r3 ; r6 <- current mapping slot's flags
+ lwz r3,mpFlags+GV_SLOT_SZ(r31) ; r3 <- next mapping slot's flags
+ mr r7,r4 ; r7 <- current mapping slot's space ID
+ lhz r4,mpSpace+GV_SLOT_SZ(r31) ; r4 <- next mapping slot's space ID
+ clrrwi r8,r5,12 ; r8 <- current mapping slot's virtual addr w/o flags
+ lwz r5,mpVAddr+4+GV_SLOT_SZ(r31); r5 <- next mapping slot's virtual addr
+ andi. r6,r6,mpgFree+mpgDormant ; Isolate guest free and dormant flags
+ xor r7,r7,r21 ; Compare space ID
+ or r0,r6,r7 ; r0 <- !(!free && !dormant && space match)
+ xor r8,r8,r10 ; Compare virtual address
+ or. r0,r0,r8 ; cr0_eq <- !free && !dormant && space match && virtual addr match
+ beq hpfGVfound ; Join common patch on hit (r31 points to mapping)
+
+ addi r31,r31,GV_SLOT_SZ ; r31 <- next mapping slot
+ bdnz hpfGVlp32 ; Iterate
+
+ clrrwi r5,r5,12 ; Remove flags from virtual address
+ andi. r3,r3,mpgFree+mpgDormant ; Isolate guest free and dormant flag
+ xor r4,r4,r21 ; Compare space ID
+ or r0,r3,r4 ; r0 <- !(!free && !dormant && space match)
+ xor r5,r5,r10 ; Compare virtual address
+ or. r0,r0,r5 ; cr0_eq <- !free && !dormant && space match && virtual addr match
+ beq hpfGVfound ; Join common patch on hit (r31 points to mapping)
+
+ b hpfGVmiss
+
+ .align 5
+hpfGV64:
+ ld r11,pmapVmmExtPhys(r28) ; r11 <- VMM pmap extension block paddr
+ lwz r12,pmapFlags(r28) ; r12 <- guest pmap's flags
+ lwz r21,pmapSpace(r28) ; r21 <- guest space ID number
+ ld r28,vmxHostPmapPhys(r11) ; r28 <- host pmap's paddr
+ la r31,VMX_HPIDX_OFFSET(r11) ; r31 <- base of hash page physical index
+ rlwinm r10,r30,0,0xFFFFF000 ; Form 64-bit guest vaddr
+ rldimi r10,r29,32,0 ; cleaning up low-order 12 bits
+ lwz r6,vxsGpf(r11) ; Get guest fault count
+
+ srwi r3,r10,12 ; Form shadow hash:
+ xor r3,r3,r21 ; spaceID ^ (vaddr >> 12)
+ rlwinm r4,r3,GV_HPAGE_SHIFT,GV_HPAGE_MASK
+ ; Form index offset from hash page number
+ add r31,r31,r4 ; r31 <- hash page index entry
+ ld r31,0(r31) ; r31 <- hash page paddr
+ insrdi r31,r3,GV_GRPS_PPG_LG2,64-(GV_HGRP_SHIFT+GV_GRPS_PPG_LG2)
+ ; r31 <- hash group paddr
+
+ la r3,pmapSXlk(r28) ; Point to the host pmap's search lock
+ bl sxlkShared ; Go get a shared lock on the mapping lists
+ mr. r3,r3 ; Did we get the lock?
+ bne-- hpfBadLock ; Nope...
+
+ lwz r3,mpFlags(r31) ; r3 <- 1st mapping slot's flags
+ lhz r4,mpSpace(r31) ; r4 <- 1st mapping slot's space ID
+ ld r5,mpVAddr(r31) ; r5 <- 1st mapping slot's virtual address
+ addi r6,r6,1 ; Increment guest fault count
+ li r0,(GV_SLOTS - 1) ; Prepare to iterate over mapping slots
+ mtctr r0 ; in this group
+ stw r6,vxsGpf(r11) ; Update guest fault count
+ b hpfGVlp64
+
+ .align 5
+hpfGVlp64:
+ mr r6,r3 ; r6 <- current mapping slot's flags
+ lwz r3,mpFlags+GV_SLOT_SZ(r31) ; r3 <- next mapping slot's flags
+ mr r7,r4 ; r7 <- current mapping slot's space ID
+ lhz r4,mpSpace+GV_SLOT_SZ(r31) ; r4 <- next mapping slot's space ID
+ clrrdi r8,r5,12 ; r8 <- current mapping slot's virtual addr w/o flags
+ ld r5,mpVAddr+GV_SLOT_SZ(r31) ; r5 <- next mapping slot's virtual addr
+ andi. r6,r6,mpgFree+mpgDormant ; Isolate guest free and dormant flag
+ xor r7,r7,r21 ; Compare space ID
+ or r0,r6,r7 ; r0 <- !(!free && !dormant && space match)
+ xor r8,r8,r10 ; Compare virtual address
+ or. r0,r0,r8 ; cr0_eq <- !free && !dormant && space match && virtual addr match
+ beq hpfGVfound ; Join common path on hit (r31 points to mapping)
+
+ addi r31,r31,GV_SLOT_SZ ; r31 <- next mapping slot
+ bdnz hpfGVlp64 ; Iterate
+
+ clrrdi r5,r5,12 ; Remove flags from virtual address
+ andi. r3,r3,mpgFree+mpgDormant ; Isolate guest free and dormant flag
+ xor r4,r4,r21 ; Compare space ID
+ or r0,r3,r4 ; r0 <- !(!free && !dormant && space match)
+ xor r5,r5,r10 ; Compare virtual address
+ or. r0,r0,r5 ; cr0_eq <- !free && !dormant && space match && virtual addr match
+ beq hpfGVfound ; Join common path on hit (r31 points to mapping)
+
+hpfGVmiss:
+ lwz r6,vxsGpfMiss(r11) ; Guest guest fault miss count
+ addi r6,r6,1 ; Increment miss count
+ stw r6,vxsGpfMiss(r11) ; Update guest fault miss count
+ b hpfNotFound
+
+/*
+ * hw_set_user_space(pmap)
+ * hw_set_user_space_dis(pmap)
+ *
+ * Indicate whether memory space needs to be switched.
+ * We really need to turn off interrupts here, because we need to be non-preemptable
+ *
+ * hw_set_user_space_dis is used when interruptions are already disabled. Mind the
+ * register usage here. The VMM switch code in vmachmon.s that calls this
+ * know what registers are in use. Check that if these change.
+ */
+
+
+
+ .align 5
+ .globl EXT(hw_set_user_space)
+
+LEXT(hw_set_user_space)
+
+ lis r8,hi16(MASK(MSR_VEC)) ; Get the vector enable
+ mfmsr r10 ; Get the current MSR
+ ori r8,r8,lo16(MASK(MSR_FP)) ; Add in FP
+ ori r9,r8,lo16(MASK(MSR_EE)) ; Add in the EE
+ andc r10,r10,r8 ; Turn off VEC, FP for good
+ andc r9,r10,r9 ; Turn off EE also
+ mtmsr r9 ; Disable them
+ isync ; Make sure FP and vec are off
+ mfsprg r6,1 ; Get the current activation
+ lwz r6,ACT_PER_PROC(r6) ; Get the per_proc block
+ lwz r2,ppUserPmapVirt(r6) ; Get our virtual pmap address
+ mfsprg r4,2 ; The the feature flags
+ lwz r7,pmapvr(r3) ; Get the v to r translation
+ lwz r8,pmapvr+4(r3) ; Get the v to r translation
+ mtcrf 0x80,r4 ; Get the Altivec flag
+ xor r4,r3,r8 ; Get bottom of the real address of bmap anchor
+ cmplw cr1,r3,r2 ; Same address space as before?
+ stw r7,ppUserPmap(r6) ; Show our real pmap address
+ crorc cr1_eq,cr1_eq,pfAltivecb ; See if same address space or not altivec machine
+ stw r4,ppUserPmap+4(r6) ; Show our real pmap address
+ stw r3,ppUserPmapVirt(r6) ; Show our virtual pmap address
+ mtmsr r10 ; Restore interruptions
+ beqlr-- cr1 ; Leave if the same address space or not Altivec
+
+ dssall ; Need to kill all data streams if adrsp changed
+ sync
+ blr ; Return...
+
+ .align 5
+ .globl EXT(hw_set_user_space_dis)
+
+LEXT(hw_set_user_space_dis)
+
+ lwz r7,pmapvr(r3) ; Get the v to r translation
+ mfsprg r4,2 ; The the feature flags
+ lwz r8,pmapvr+4(r3) ; Get the v to r translation
+ mfsprg r6,1 ; Get the current activation
+ lwz r6,ACT_PER_PROC(r6) ; Get the per_proc block
+ lwz r2,ppUserPmapVirt(r6) ; Get our virtual pmap address
+ mtcrf 0x80,r4 ; Get the Altivec flag
+ xor r4,r3,r8 ; Get bottom of the real address of bmap anchor
+ cmplw cr1,r3,r2 ; Same address space as before?
+ stw r7,ppUserPmap(r6) ; Show our real pmap address
+ crorc cr1_eq,cr1_eq,pfAltivecb ; See if same address space or not altivec machine
+ stw r4,ppUserPmap+4(r6) ; Show our real pmap address
+ stw r3,ppUserPmapVirt(r6) ; Show our virtual pmap address
+ beqlr-- cr1 ; Leave if the same
+
+ dssall ; Need to kill all data streams if adrsp changed
+ sync
+ blr ; Return...
+
+/* int mapalc1(struct mappingblok *mb) - Finds, allocates, and zeros a free 1-bit mapping entry
+ *
+ * Lock must already be held on mapping block list
+ * returns 0 if all slots filled.
+ * returns n if a slot is found and it is not the last
+ * returns -n if a slot is found and it is the last
+ * when n and -n are returned, the corresponding bit is cleared
+ * the mapping is zeroed out before return
+ *
+ */
+
+ .align 5
+ .globl EXT(mapalc1)
+
+LEXT(mapalc1)
+ lwz r4,mbfree(r3) ; Get the 1st mask
+ lis r0,0x8000 ; Get the mask to clear the first free bit
+ lwz r5,mbfree+4(r3) ; Get the 2nd mask
+ mr r12,r3 ; Save the block ptr
+ cntlzw r3,r4 ; Get first 1-bit in 1st word
+ srw. r9,r0,r3 ; Get bit corresponding to first free one
+ cntlzw r10,r5 ; Get first free field in second word
+ andc r4,r4,r9 ; Turn 1-bit off in 1st word
+ bne mapalc1f ; Found one in 1st word
+
+ srw. r9,r0,r10 ; Get bit corresponding to first free one in 2nd word
+ li r3,0 ; assume failure return
+ andc r5,r5,r9 ; Turn it off
+ beqlr-- ; There are no 1 bits left...
+ addi r3,r10,32 ; set the correct number
+
+mapalc1f:
+ or. r0,r4,r5 ; any more bits set?
+ stw r4,mbfree(r12) ; update bitmasks
+ stw r5,mbfree+4(r12)
+
+ slwi r6,r3,6 ; get (n * mpBasicSize), ie offset of mapping in block
+ addi r7,r6,32
+ dcbz r6,r12 ; clear the 64-byte mapping
+ dcbz r7,r12
+
+ bnelr++ ; return if another bit remains set
+
+ neg r3,r3 ; indicate we just returned the last bit
+ blr
+
+
+/* int mapalc2(struct mappingblok *mb) - Finds, allocates, and zero's a free 2-bit mapping entry
+ *
+ * Lock must already be held on mapping block list
+ * returns 0 if all slots filled.
+ * returns n if a slot is found and it is not the last
+ * returns -n if a slot is found and it is the last
+ * when n and -n are returned, the corresponding bits are cleared
+ * We find runs of 2 consecutive 1 bits by cntlzw(n & (n<<1)).
+ * the mapping is zero'd out before return
+ */
+
+ .align 5
+ .globl EXT(mapalc2)
+LEXT(mapalc2)
+ lwz r4,mbfree(r3) ; Get the first mask
+ lis r0,0x8000 ; Get the mask to clear the first free bit
+ lwz r5,mbfree+4(r3) ; Get the second mask
+ mr r12,r3 ; Save the block ptr
+ slwi r6,r4,1 ; shift first word over
+ and r6,r4,r6 ; lite start of double bit runs in 1st word
+ slwi r7,r5,1 ; shift 2nd word over
+ cntlzw r3,r6 ; Get first free 2-bit run in 1st word
+ and r7,r5,r7 ; lite start of double bit runs in 2nd word
+ srw. r9,r0,r3 ; Get bit corresponding to first run in 1st word
+ cntlzw r10,r7 ; Get first free field in second word
+ srwi r11,r9,1 ; shift over for 2nd bit in 1st word
+ andc r4,r4,r9 ; Turn off 1st bit in 1st word
+ andc r4,r4,r11 ; turn off 2nd bit in 1st word
+ bne mapalc2a ; Found two consecutive free bits in 1st word
+
+ srw. r9,r0,r10 ; Get bit corresponding to first free one in second word
+ li r3,0 ; assume failure
+ srwi r11,r9,1 ; get mask for 2nd bit
+ andc r5,r5,r9 ; Turn off 1st bit in 2nd word
+ andc r5,r5,r11 ; turn off 2nd bit in 2nd word
+ beq-- mapalc2c ; There are no runs of 2 bits in 2nd word either
+ addi r3,r10,32 ; set the correct number
+
+mapalc2a:
+ or. r0,r4,r5 ; any more bits set?
+ stw r4,mbfree(r12) ; update bitmasks
+ stw r5,mbfree+4(r12)
+ slwi r6,r3,6 ; get (n * mpBasicSize), ie offset of mapping in block
+ addi r7,r6,32
+ addi r8,r6,64
+ addi r9,r6,96
+ dcbz r6,r12 ; zero out the 128-byte mapping
+ dcbz r7,r12 ; we use the slow 32-byte dcbz even on 64-bit machines
+ dcbz r8,r12 ; because the mapping may not be 128-byte aligned
+ dcbz r9,r12
+
+ bnelr++ ; return if another bit remains set
+
+ neg r3,r3 ; indicate we just returned the last bit
+ blr
+
+mapalc2c:
+ rlwinm r7,r5,1,31,31 ; move bit 0 of 2nd word to bit 31
+ and. r0,r4,r7 ; is the 2-bit field that spans the 2 words free?
+ beqlr ; no, we failed
+ rlwinm r4,r4,0,0,30 ; yes, turn off bit 31 of 1st word
+ rlwinm r5,r5,0,1,31 ; turn off bit 0 of 2nd word
+ li r3,31 ; get index of this field
+ b mapalc2a
+
+
+;
+; This routine initialzes the hash table and PCA.
+; It is done here because we may need to be 64-bit to do it.
+;
+
+ .align 5
+ .globl EXT(hw_hash_init)
+
+LEXT(hw_hash_init)
+
+ mfsprg r10,2 ; Get feature flags
+ lis r12,hi16(EXT(hash_table_size)) ; Get hash table size address
+ mtcrf 0x02,r10 ; move pf64Bit to cr6
+ lis r11,hi16(EXT(hash_table_base)) ; Get hash table base address
+ lis r4,0xFF01 ; Set all slots free and start steal at end
+ ori r12,r12,lo16(EXT(hash_table_size)) ; Get hash table size address
+ ori r11,r11,lo16(EXT(hash_table_base)) ; Get hash table base address
+
+ lwz r12,0(r12) ; Get hash table size
+ li r3,0 ; Get start
+ bt++ pf64Bitb,hhiSF ; skip if 64-bit (only they take the hint)
+
+ lwz r11,4(r11) ; Get hash table base
+
+hhiNext32: cmplw r3,r12 ; Have we reached the end?
+ bge- hhiCPCA32 ; Yes...
+ dcbz r3,r11 ; Clear the line
+ addi r3,r3,32 ; Next one...
+ b hhiNext32 ; Go on...
+
+hhiCPCA32: rlwinm r12,r12,28,4,29 ; Get number of slots * 4
+ li r3,-4 ; Displacement to first PCA entry
+ neg r12,r12 ; Get negative end of PCA
+
+hhiNPCA32: stwx r4,r3,r11 ; Initialize the PCA entry
+ subi r3,r3,4 ; Next slot
+ cmpw r3,r12 ; Have we finished?
+ bge+ hhiNPCA32 ; Not yet...
+ blr ; Leave...
+
+hhiSF: mfmsr r9 ; Save the MSR
+ li r8,1 ; Get a 1
+ mr r0,r9 ; Get a copy of the MSR
+ ld r11,0(r11) ; Get hash table base
+ rldimi r0,r8,63,MSR_SF_BIT ; Set SF bit (bit 0)
+ mtmsrd r0 ; Turn on SF
+ isync
+
+
+hhiNext64: cmpld r3,r12 ; Have we reached the end?
+ bge-- hhiCPCA64 ; Yes...
+ dcbz128 r3,r11 ; Clear the line
+ addi r3,r3,128 ; Next one...
+ b hhiNext64 ; Go on...
+
+hhiCPCA64: rlwinm r12,r12,27,5,29 ; Get number of slots * 4
+ li r3,-4 ; Displacement to first PCA entry
+ neg r12,r12 ; Get negative end of PCA
+
+hhiNPCA64: stwx r4,r3,r11 ; Initialize the PCA entry
+ subi r3,r3,4 ; Next slot
+ cmpd r3,r12 ; Have we finished?
+ bge++ hhiNPCA64 ; Not yet...
+
+ mtmsrd r9 ; Turn off SF if it was off
+ isync
+ blr ; Leave...
+
+
+;
+; This routine sets up the hardware to start translation.
+; Note that we do NOT start translation.
+;
+
+ .align 5
+ .globl EXT(hw_setup_trans)
+
+LEXT(hw_setup_trans)
+
+ mfsprg r11,0 ; Get the per_proc block
+ mfsprg r12,2 ; Get feature flags
+ li r0,0 ; Get a 0
+ li r2,1 ; And a 1
+ mtcrf 0x02,r12 ; Move pf64Bit to cr6
+ stw r0,validSegs(r11) ; Make sure we think all SR/STEs are invalid
+ stw r0,validSegs+4(r11) ; Make sure we think all SR/STEs are invalid, part deux
+ sth r2,ppInvSeg(r11) ; Force a reload of the SRs
+ sth r0,ppCurSeg(r11) ; Set that we are starting out in kernel
+
+ bt++ pf64Bitb,hstSF ; skip if 64-bit (only they take the hint)
+
+ li r9,0 ; Clear out a register
+ sync
+ isync
+ mtdbatu 0,r9 ; Invalidate maps
+ mtdbatl 0,r9 ; Invalidate maps
+ mtdbatu 1,r9 ; Invalidate maps
+ mtdbatl 1,r9 ; Invalidate maps
+ mtdbatu 2,r9 ; Invalidate maps
+ mtdbatl 2,r9 ; Invalidate maps
+ mtdbatu 3,r9 ; Invalidate maps
+ mtdbatl 3,r9 ; Invalidate maps
+
+ mtibatu 0,r9 ; Invalidate maps
+ mtibatl 0,r9 ; Invalidate maps
+ mtibatu 1,r9 ; Invalidate maps
+ mtibatl 1,r9 ; Invalidate maps
+ mtibatu 2,r9 ; Invalidate maps
+ mtibatl 2,r9 ; Invalidate maps
+ mtibatu 3,r9 ; Invalidate maps
+ mtibatl 3,r9 ; Invalidate maps
+
+ lis r11,hi16(EXT(hash_table_base)) ; Get hash table base address
+ lis r12,hi16(EXT(hash_table_size)) ; Get hash table size address
+ ori r11,r11,lo16(EXT(hash_table_base)) ; Get hash table base address
+ ori r12,r12,lo16(EXT(hash_table_size)) ; Get hash table size address
+ lwz r11,4(r11) ; Get hash table base
+ lwz r12,0(r12) ; Get hash table size
+ subi r12,r12,1 ; Back off by 1
+ rlwimi r11,r12,16,23,31 ; Stick the size into the sdr1 image
+
+ mtsdr1 r11 ; Ok, we now have the hash table set up
+ sync
+
+ li r12,invalSpace ; Get the invalid segment value
+ li r10,0 ; Start low
+
+hstsetsr: mtsrin r12,r10 ; Set the SR
+ addis r10,r10,0x1000 ; Bump the segment
+ mr. r10,r10 ; Are we finished?
+ bne+ hstsetsr ; Nope...
+ sync
+ blr ; Return...
+
+;
+; 64-bit version
+;
+
+hstSF: lis r11,hi16(EXT(hash_table_base)) ; Get hash table base address
+ lis r12,hi16(EXT(hash_table_size)) ; Get hash table size address
+ ori r11,r11,lo16(EXT(hash_table_base)) ; Get hash table base address
+ ori r12,r12,lo16(EXT(hash_table_size)) ; Get hash table size address
+ ld r11,0(r11) ; Get hash table base
+ lwz r12,0(r12) ; Get hash table size
+ cntlzw r10,r12 ; Get the number of bits
+ subfic r10,r10,13 ; Get the extra bits we need
+ or r11,r11,r10 ; Add the size field to SDR1
+
+ mtsdr1 r11 ; Ok, we now have the hash table set up
+ sync
+
+ li r0,0 ; Set an SLB slot index of 0
+ slbia ; Trash all SLB entries (except for entry 0 that is)
+ slbmfee r7,r0 ; Get the entry that is in SLB index 0
+ rldicr r7,r7,0,35 ; Clear the valid bit and the rest
+ slbie r7 ; Invalidate it
+
+ blr ; Return...
+
+
+;
+; This routine turns on translation for the first time on a processor
+;
+
+ .align 5
+ .globl EXT(hw_start_trans)
+
+LEXT(hw_start_trans)
+
+
+ mfmsr r10 ; Get the msr
+ ori r10,r10,lo16(MASK(MSR_IR) | MASK(MSR_DR)) ; Turn on translation
+
+ mtmsr r10 ; Everything falls apart here
+ isync
+
+ blr ; Back to it.
+
+
+
+;
+; This routine validates a segment register.
+; hw_map_seg(pmap_t pmap, addr64_t seg, addr64_t va)
+;
+; r3 = virtual pmap
+; r4 = segment[0:31]
+; r5 = segment[32:63]
+; r6 = va[0:31]
+; r7 = va[32:63]
+;
+; Note that we transform the addr64_t (long long) parameters into single 64-bit values.
+; Note that there is no reason to apply the key modifier here because this is only
+; used for kernel accesses.
+;
+
+ .align 5
+ .globl EXT(hw_map_seg)
+
+LEXT(hw_map_seg)
+
+ lwz r0,pmapSpace(r3) ; Get the space, we will need it soon
+ lwz r9,pmapFlags(r3) ; Get the flags for the keys now
+ mfsprg r10,2 ; Get feature flags
+
+;
+; Note: the following code would problably be easier to follow if I split it,
+; but I just wanted to see if I could write this to work on both 32- and 64-bit
+; machines combined.
+;
+
+;
+; Here we enter with va[0:31] in r6[0:31] (or r6[32:63] on 64-bit machines)
+; and va[32:63] in r7[0:31] (or r7[32:63] on 64-bit machines)
+
+ rlwinm r4,r4,0,1,0 ; Copy seg[0:31] into r4[0;31] - no-op for 32-bit
+ rlwinm r7,r7,18,14,17 ; Slide va[32:35] east to just west of space ID
+ mtcrf 0x02,r10 ; Move pf64Bit and pfNoMSRirb to cr5 and 6
+ srwi r8,r6,14 ; Slide va[0:17] east to just west of the rest
+ rlwimi r7,r6,18,0,13 ; Slide va[18:31] east to just west of slid va[32:25]
+ rlwimi r0,r0,14,4,17 ; Dup address space ID above itself
+ rlwinm r8,r8,0,1,0 ; Dup low part into high (does nothing on 32-bit machines)
+ rlwinm r2,r0,28,0,31 ; Rotate rotate low nybble to top of low half
+ rlwimi r2,r2,0,1,0 ; Replicate bottom 32 into top 32
+ rlwimi r8,r7,0,0,31 ; Join va[0:17] with va[18:35] (just like mr on 32-bit machines)
+
+ rlwimi r2,r0,0,4,31 ; We should now have 4 copies of the space
+ ; concatenated together. There is garbage
+ ; at the top for 64-bit but we will clean
+ ; that out later.
+ rlwimi r4,r5,0,0,31 ; Copy seg[32:63] into r4[32:63] - just like mr for 32-bit
+
+
+;
+; Here we exit with va[0:35] shifted into r8[14:51], zeros elsewhere, or
+; va[18:35] shifted into r8[0:17], zeros elsewhere on 32-bit machines
+;
+
+;
+; What we have now is:
+;
+; 0 0 1 2 3 4 4 5 6
+; 0 8 6 4 2 0 8 6 3 - for 64-bit machines
+; +--------+--------+--------+--------+--------+--------+--------+--------+
+; r2 = |xxxx0000|AAAAAAAA|AAAAAABB|BBBBBBBB|BBBBCCCC|CCCCCCCC|CCDDDDDD|DDDDDDDD| - hash value
+; +--------+--------+--------+--------+--------+--------+--------+--------+
+; 0 0 1 2 3 - for 32-bit machines
+; 0 8 6 4 1
+;
+; 0 0 1 2 3 4 4 5 6
+; 0 8 6 4 2 0 8 6 3 - for 64-bit machines
+; +--------+--------+--------+--------+--------+--------+--------+--------+
+; r8 = |00000000|000000SS|SSSSSSSS|SSSSSSSS|SSSSSSSS|SSSSSSSS|SS000000|00000000| - shifted and cleaned EA
+; +--------+--------+--------+--------+--------+--------+--------+--------+
+; 0 0 1 2 3 - for 32-bit machines
+; 0 8 6 4 1
+;
+; 0 0 1 2 3 4 4 5 6
+; 0 8 6 4 2 0 8 6 3 - for 64-bit machines
+; +--------+--------+--------+--------+--------+--------+--------+--------+
+; r4 = |SSSSSSSS|SSSSSSSS|SSSSSSSS|SSSSSSSS|SSSS0000|00000000|00000000|00000000| - Segment
+; +--------+--------+--------+--------+--------+--------+--------+--------+
+; 0 0 1 2 3 - for 32-bit machines
+; 0 8 6 4 1
+
+
+ xor r8,r8,r2 ; Calculate VSID
+
+ bf-- pf64Bitb,hms32bit ; Skip out if 32-bit...
+ mfsprg r12,0 ; Get the per_proc
+ li r0,1 ; Prepare to set bit 0 (also to clear EE)
+ mfmsr r6 ; Get current MSR
+ li r2,MASK(MSR_IR)|MASK(MSR_DR) ; Get the translation bits
+ mtmsrd r0,1 ; Set only the EE bit to 0
+ rlwinm r6,r6,0,MSR_EE_BIT,MSR_EE_BIT ; See if EE bit is on
+ mfmsr r11 ; Get the MSR right now, after disabling EE
+ andc r2,r11,r2 ; Turn off translation now
+ rldimi r2,r0,63,0 ; Get bit 64-bit turned on
+ or r11,r11,r6 ; Turn on the EE bit if it was on
+ mtmsrd r2 ; Make sure translation and EE are off and 64-bit is on
+ isync ; Hang out a bit
+
+ ld r6,validSegs(r12) ; Get the valid SLB entry flags
+ sldi r9,r9,9 ; Position the key and noex bit
+
+ rldimi r5,r8,12,0 ; Form the VSID/key
+
+ not r3,r6 ; Make valids be 0s
+
+ cntlzd r7,r3 ; Find a free SLB
+ cmplwi r7,63 ; Did we find a free SLB entry?
+
+ slbie r4 ; Since this ESID may still be in an SLBE, kill it
+
+ oris r4,r4,0x0800 ; Turn on the valid bit in ESID
+ addi r7,r7,1 ; Make sure we skip slb 0
+ blt++ hmsFreeSeg ; Yes, go load it...
+
+;
+; No free SLB entries, select one that is in use and invalidate it
+;
+ lwz r2,ppSegSteal(r12) ; Get the next slot to steal
+ addi r7,r2,pmapSegCacheUse+1 ; Select stealee from non-cached slots only
+ addi r2,r2,1 ; Set next slot to steal
+ slbmfee r3,r7 ; Get the entry that is in the selected spot
+ subi r8,r2,64-(pmapSegCacheUse+1) ; Force steal to wrap
+ rldicr r3,r3,0,35 ; Clear the valid bit and the rest
+ srawi r8,r8,31 ; Get -1 if steal index still in range
+ slbie r3 ; Invalidate the in-use SLB entry
+ and r2,r2,r8 ; Reset steal index when it should wrap
+ isync ;
+
+ stw r2,ppSegSteal(r12) ; Set the next slot to steal
+;
+; We are now ready to stick the SLB entry in the SLB and mark it in use
+;
+
+hmsFreeSeg: subi r2,r7,1 ; Adjust for skipped slb 0
+ rldimi r4,r7,0,58 ; Copy in the SLB entry selector
+ srd r0,r0,r2 ; Set bit mask for allocation
+ rldicl r5,r5,0,15 ; Clean out the unsupported bits
+ or r6,r6,r0 ; Turn on the allocation flag
+
+ slbmte r5,r4 ; Make that SLB entry
+
+ std r6,validSegs(r12) ; Mark as valid
+ mtmsrd r11 ; Restore the MSR
+ isync
+ blr ; Back to it...
+
+ .align 5
+
+hms32bit:
+ mfsprg r12,1 ; Get the current activation
+ lwz r12,ACT_PER_PROC(r12) ; Get the per_proc block
+ rlwinm r8,r8,0,8,31 ; Clean up the VSID
+ rlwinm r2,r4,4,28,31 ; Isolate the segment we are setting
+ lis r0,0x8000 ; Set bit 0
+ rlwimi r8,r9,28,1,3 ; Insert the keys and N bit
+ srw r0,r0,r2 ; Get bit corresponding to SR
+ addi r7,r12,validSegs ; Point to the valid segment flags directly
+
+ mtsrin r8,r4 ; Set the actual SR
+ isync ; Need to make sure this is done
+
+hmsrupt: lwarx r6,0,r7 ; Get and reserve the valid segment flags
+ or r6,r6,r0 ; Show that SR is valid
+ stwcx. r6,0,r7 ; Set the valid SR flags
+ bne-- hmsrupt ; Had an interrupt, need to get flags again...
+
+ blr ; Back to it...
+
+
+;
+; This routine invalidates a segment register.
+;
+
+ .align 5
+ .globl EXT(hw_blow_seg)
+
+LEXT(hw_blow_seg)
+
+ mfsprg r10,2 ; Get feature flags
+ mtcrf 0x02,r10 ; move pf64Bit and pfNoMSRirb to cr5 and 6
+
+ rlwinm r9,r4,0,0,3 ; Save low segment address and make sure it is clean
+
+ bf-- pf64Bitb,hbs32bit ; Skip out if 32-bit...
+
+ li r0,1 ; Prepare to set bit 0 (also to clear EE)
+ mfmsr r6 ; Get current MSR
+ li r2,MASK(MSR_IR)|MASK(MSR_DR) ; Get the translation bits
+ mtmsrd r0,1 ; Set only the EE bit to 0
+ rlwinm r6,r6,0,MSR_EE_BIT,MSR_EE_BIT ; See if EE bit is on
+ mfmsr r11 ; Get the MSR right now, after disabling EE
+ andc r2,r11,r2 ; Turn off translation now
+ rldimi r2,r0,63,0 ; Get bit 64-bit turned on
+ or r11,r11,r6 ; Turn on the EE bit if it was on
+ mtmsrd r2 ; Make sure translation and EE are off and 64-bit is on
+ isync ; Hang out a bit
+
+ rldimi r9,r3,32,0 ; Insert the top part of the ESID
+
+ slbie r9 ; Invalidate the associated SLB entry
+
+ mtmsrd r11 ; Restore the MSR
+ isync
+ blr ; Back to it.
+
+ .align 5
+
+hbs32bit:
+ mfsprg r12,1 ; Get the current activation
+ lwz r12,ACT_PER_PROC(r12) ; Get the per_proc block
+ addi r7,r12,validSegs ; Point to the valid segment flags directly
+ lwarx r4,0,r7 ; Get and reserve the valid segment flags
+ rlwinm r6,r9,4,28,31 ; Convert segment to number
+ lis r2,0x8000 ; Set up a mask
+ srw r2,r2,r6 ; Make a mask
+ and. r0,r4,r2 ; See if this is even valid
+ li r5,invalSpace ; Set the invalid address space VSID
+ beqlr ; Leave if already invalid...
+
+ mtsrin r5,r9 ; Slam the segment register
+ isync ; Need to make sure this is done
+
+hbsrupt: andc r4,r4,r2 ; Clear the valid bit for this segment
+ stwcx. r4,0,r7 ; Set the valid SR flags
+ beqlr++ ; Stored ok, no interrupt, time to leave...
+
+ lwarx r4,0,r7 ; Get and reserve the valid segment flags again
+ b hbsrupt ; Try again...
+
+;
+; This routine invadates the entire pmap segment cache
+;
+; Translation is on, interrupts may or may not be enabled.
+;
+
+ .align 5
+ .globl EXT(invalidateSegs)
+
+LEXT(invalidateSegs)
+
+ la r10,pmapCCtl(r3) ; Point to the segment cache control
+ eqv r2,r2,r2 ; Get all foxes
+
+isInv: lwarx r4,0,r10 ; Get the segment cache control value
+ rlwimi r4,r2,0,0,15 ; Slam in all invalid bits
+ rlwinm. r0,r4,0,pmapCCtlLckb,pmapCCtlLckb ; Is it already locked?
+ bne-- isInv0 ; Yes, try again...
+
+ stwcx. r4,0,r10 ; Try to invalidate it
+ bne-- isInv ; Someone else just stuffed it...
+ blr ; Leave...
+
+
+isInv0: li r4,lgKillResv ; Get reservation kill zone
+ stwcx. r4,0,r4 ; Kill reservation
+
+isInv1: lwz r4,pmapCCtl(r3) ; Get the segment cache control
+ rlwinm. r0,r4,0,pmapCCtlLckb,pmapCCtlLckb ; Is it already locked?
+ bne-- isInv ; Nope...
+ b isInv1 ; Still locked do it again...
+
+;
+; This routine switches segment registers between kernel and user.
+; We have some assumptions and rules:
+; We are in the exception vectors
+; pf64Bitb is set up
+; R3 contains the MSR we going to
+; We can not use R4, R13, R20, R21, R29
+; R13 is the savearea
+; R29 has the per_proc
+;
+; We return R3 as 0 if we did not switch between kernel and user
+; We also maintain and apply the user state key modifier used by VMM support;
+; If we go to the kernel it is set to 0, otherwise it follows the bit
+; in spcFlags.
+;
+
+ .align 5
+ .globl EXT(switchSegs)
+
+LEXT(switchSegs)
+
+ lwz r22,ppInvSeg(r29) ; Get the ppInvSeg (force invalidate) and ppCurSeg (user or kernel segments indicator)
+ lwz r9,spcFlags(r29) ; Pick up the special user state flags
+ rlwinm r2,r3,MSR_PR_BIT+1,31,31 ; Isolate the problem mode bit
+ rlwinm r3,r3,MSR_RI_BIT+1,31,31 ; Isolate the recoverable interrupt bit
+ lis r8,hi16(EXT(kernel_pmap_phys)) ; Assume kernel
+ or r2,r2,r3 ; This will 1 if we will be using user segments
+ li r3,0 ; Get a selection mask
+ cmplw r2,r22 ; This will be EQ if same state and not ppInvSeg
+ ori r8,r8,lo16(EXT(kernel_pmap_phys)) ; Assume kernel (bottom of address)
+ sub r3,r3,r2 ; Form select mask - 0 if kernel, -1 if user
+ la r19,ppUserPmap(r29) ; Point to the current user pmap
+
+; The following line is an exercise of a generally unreadable but recompile-friendly programing practice
+ rlwinm r30,r9,userProtKeybit+1+(63-sgcVSKeyUsr),sgcVSKeyUsr-32,sgcVSKeyUsr-32 ; Isolate the user state protection key
+
+ andc r8,r8,r3 ; Zero kernel pmap ptr if user, untouched otherwise
+ and r19,r19,r3 ; Zero user pmap ptr if kernel, untouched otherwise
+ and r30,r30,r3 ; Clear key modifier if kernel, leave otherwise
+ or r8,r8,r19 ; Get the pointer to the pmap we are using
+
+ beqlr ; We are staying in the same mode, do not touch segs...
+
+ lwz r28,0(r8) ; Get top half of pmap address
+ lwz r10,4(r8) ; Get bottom half
+
+ stw r2,ppInvSeg(r29) ; Clear request for invalidate and save ppCurSeg
+ rlwinm r28,r28,0,1,0 ; Copy top to top
+ stw r30,ppMapFlags(r29) ; Set the key modifier
+ rlwimi r28,r10,0,0,31 ; Insert bottom
+
+ la r10,pmapCCtl(r28) ; Point to the segment cache control
+ la r9,pmapSegCache(r28) ; Point to the segment cache
+
+ssgLock: lwarx r15,0,r10 ; Get and reserve the segment cache control
+ rlwinm. r0,r15,0,pmapCCtlLckb,pmapCCtlLckb ; Someone have the lock?
+ ori r16,r15,lo16(pmapCCtlLck) ; Set lock bit
+ bne-- ssgLock0 ; Yup, this is in use...
+
+ stwcx. r16,0,r10 ; Try to set the lock
+ bne-- ssgLock ; Did we get contention?
+
+ not r11,r15 ; Invert the invalids to valids
+ li r17,0 ; Set a mask for the SRs we are loading
+ isync ; Make sure we are all caught up
+
+ bf-- pf64Bitb,ssg32Enter ; If 32-bit, jump into it...
+
+ li r0,0 ; Clear
+ slbia ; Trash all SLB entries (except for entry 0 that is)
+ li r17,1 ; Get SLB index to load (skip slb 0)
+ oris r0,r0,0x8000 ; Get set for a mask
+ b ssg64Enter ; Start on a cache line...
+
+ .align 5
+
+ssgLock0: li r15,lgKillResv ; Killing field
+ stwcx. r15,0,r15 ; Kill reservation
+
+ssgLock1: lwz r15,pmapCCtl(r28) ; Get the segment cache controls
+ rlwinm. r15,r15,0,pmapCCtlLckb,pmapCCtlLckb ; Someone have the lock?
+ beq++ ssgLock ; Yup, this is in use...
+ b ssgLock1 ; Nope, try again...
+;
+; This is the 32-bit address space switch code.
+; We take a reservation on the segment cache and walk through.
+; For each entry, we load the specified entries and remember which
+; we did with a mask. Then, we figure out which segments should be
+; invalid and then see which actually are. Then we load those with the
+; defined invalid VSID.
+; Afterwards, we unlock the segment cache.
+;
+
+ .align 5
+
+ssg32Enter: cntlzw r12,r11 ; Find the next slot in use
+ cmplwi r12,pmapSegCacheUse ; See if we are done
+ slwi r14,r12,4 ; Index to the cache slot
+ lis r0,0x8000 ; Get set for a mask
+ add r14,r14,r9 ; Point to the entry
+
+ bge- ssg32Done ; All done...
+
+ lwz r5,sgcESID+4(r14) ; Get the ESID part
+ srw r2,r0,r12 ; Form a mask for the one we are loading
+ lwz r7,sgcVSID+4(r14) ; And get the VSID bottom
+
+ andc r11,r11,r2 ; Clear the bit
+ lwz r6,sgcVSID(r14) ; And get the VSID top
+
+ rlwinm r2,r5,4,28,31 ; Change the segment number to a number
+
+ xor r7,r7,r30 ; Modify the key before we actually set it
+ srw r0,r0,r2 ; Get a mask for the SR we are loading
+ rlwinm r8,r7,19,1,3 ; Insert the keys and N bit
+ or r17,r17,r0 ; Remember the segment
+ rlwimi r8,r7,20,12,31 ; Insert 4:23 the VSID
+ rlwimi r8,r6,20,8,11 ; Get the last nybble of the SR contents
+
+ mtsrin r8,r5 ; Load the segment
+ b ssg32Enter ; Go enter the next...
+
+ .align 5
+
+ssg32Done: lwz r16,validSegs(r29) ; Get the valid SRs flags
+ stw r15,pmapCCtl(r28) ; Unlock the segment cache controls
+
+ lis r0,0x8000 ; Get set for a mask
+ li r2,invalSpace ; Set the invalid address space VSID
+
+ nop ; Align loop
+ nop ; Align loop
+ andc r16,r16,r17 ; Get list of SRs that were valid before but not now
+ nop ; Align loop
+
+ssg32Inval: cntlzw r18,r16 ; Get the first one to invalidate
+ cmplwi r18,16 ; Have we finished?
+ srw r22,r0,r18 ; Get the mask bit
+ rlwinm r23,r18,28,0,3 ; Get the segment register we need
+ andc r16,r16,r22 ; Get rid of the guy we just did
+ bge ssg32Really ; Yes, we are really done now...
+
+ mtsrin r2,r23 ; Invalidate the SR
+ b ssg32Inval ; Do the next...
+
+ .align 5
+
+ssg32Really:
+ stw r17,validSegs(r29) ; Set the valid SR flags
+ li r3,1 ; Set kernel/user transition
+ blr
+
+;
+; This is the 64-bit address space switch code.
+; First we blow away all of the SLB entries.
+; Walk through,
+; loading the SLB. Afterwards, we release the cache lock
+;
+; Note that because we have to treat SLBE 0 specially, we do not ever use it...
+; Its a performance thing...
+;
+
+ .align 5
+
+ssg64Enter: cntlzw r12,r11 ; Find the next slot in use
+ cmplwi r12,pmapSegCacheUse ; See if we are done
+ slwi r14,r12,4 ; Index to the cache slot
+ srw r16,r0,r12 ; Form a mask for the one we are loading
+ add r14,r14,r9 ; Point to the entry
+ andc r11,r11,r16 ; Clear the bit
+ bge-- ssg64Done ; All done...
+
+ ld r5,sgcESID(r14) ; Get the ESID part
+ ld r6,sgcVSID(r14) ; And get the VSID part
+ oris r5,r5,0x0800 ; Turn on the valid bit
+ or r5,r5,r17 ; Insert the SLB slot
+ xor r6,r6,r30 ; Modify the key before we actually set it
+ addi r17,r17,1 ; Bump to the next slot
+ slbmte r6,r5 ; Make that SLB entry
+ b ssg64Enter ; Go enter the next...
+
+ .align 5
+
+ssg64Done: stw r15,pmapCCtl(r28) ; Unlock the segment cache controls
+
+ eqv r16,r16,r16 ; Load up with all foxes
+ subfic r17,r17,64 ; Get the number of 1 bits we need
+
+ sld r16,r16,r17 ; Get a mask for the used SLB entries
+ li r3,1 ; Set kernel/user transition
+ std r16,validSegs(r29) ; Set the valid SR flags
+ blr
+
+;
+; mapSetUp - this function sets initial state for all mapping functions.
+; We turn off all translations (physical), disable interruptions, and
+; enter 64-bit mode if applicable.
+;
+; We also return the original MSR in r11, the feature flags in R12,
+; and CR6 set up so we can do easy branches for 64-bit
+; hw_clear_maps assumes r10, r9 will not be trashed.
+;
+
+ .align 5
+ .globl EXT(mapSetUp)
+
+LEXT(mapSetUp)
+
+ lis r0,hi16(MASK(MSR_VEC)) ; Get the vector mask
+ mfsprg r12,2 ; Get feature flags
+ ori r0,r0,lo16(MASK(MSR_FP)) ; Get the FP as well
+ mtcrf 0x04,r12 ; move pf64Bit and pfNoMSRirb to cr5 and 6
+ mfmsr r11 ; Save the MSR
+ mtcrf 0x02,r12 ; move pf64Bit and pfNoMSRirb to cr5 and 6
+ andc r11,r11,r0 ; Clear VEC and FP for good
+ ori r0,r0,lo16(MASK(MSR_EE)|MASK(MSR_DR)|MASK(MSR_IR)) ; Get rid of EE, IR, and DR
+ li r2,1 ; Prepare for 64 bit
+ andc r0,r11,r0 ; Clear the rest
+ bt pfNoMSRirb,msuNoMSR ; No MSR...
+ bt++ pf64Bitb,msuSF ; skip if 64-bit (only they take the hint)
+
+ mtmsr r0 ; Translation and all off
+ isync ; Toss prefetch
+ blr ; Return...
+
+ .align 5
+
+msuSF: rldimi r0,r2,63,MSR_SF_BIT ; set SF bit (bit 0)
+ mtmsrd r0 ; set 64-bit mode, turn off EE, DR, and IR
+ isync ; synchronize
+ blr ; Return...
+
+ .align 5
+
+msuNoMSR: mr r2,r3 ; Save R3 across call
+ mr r3,r0 ; Get the new MSR value