X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/d12e16782ebf8bb779633dff9e14486293bf6d07..5eebf7385fedb1517b66b53c28e5aa6bb0a2be50:/osfmk/ppc/machine_routines_asm.s

diff --git a/osfmk/ppc/machine_routines_asm.s b/osfmk/ppc/machine_routines_asm.s
index 471389e42..e0d376d44 100644
--- a/osfmk/ppc/machine_routines_asm.s
+++ b/osfmk/ppc/machine_routines_asm.s
@@ -27,6 +27,106 @@
 #include <mach/ppc/vm_param.h>
 #include <ppc/exception.h>
 	
+    
+/*
+ * ml_set_physical()		 	-- turn off DR and (if 64-bit) turn SF on
+ *								   it is assumed that pf64Bit is already in cr6
+ * ml_set_physical_get_ffs() 	-- turn DR off, SF on, and get feature flags 
+ * ml_set_physical_disabled()	-- turn DR and EE off, SF on, get feature flags
+ * ml_set_translation_off()		-- turn DR, IR, and EE off, SF on, get feature flags
+ *
+ * Callable only from assembler, these return:
+ *	 r2 -- new MSR
+ *	r11 -- old MSR
+ *	r10 -- feature flags (pf64Bit etc, ie SPRG 2)
+ *	cr6 -- feature flags 24-27, ie pf64Bit, pf128Byte, and pf32Byte
+ *
+ * Uses r0 and r2.  ml_set_translation_off also uses r3 and cr5.
+ */
+
+        .align	4
+        .globl	EXT(ml_set_translation_off)
+LEXT(ml_set_translation_off)
+        mfsprg	r10,2						// get feature flags
+       	li		r0,0						; Clear this
+        mtcrf	0x02,r10					// move pf64Bit etc to cr6
+        ori		r0,r0,lo16(MASK(MSR_EE)+MASK(MSR_FP)+MASK(MSR_IR)+MASK(MSR_DR)) // turn off all 4
+        mfmsr	r11							// get MSR
+		oris	r0,r0,hi16(MASK(MSR_VEC))	// Turn off vector too
+        mtcrf	0x04,r10					// move pfNoMSRir etc to cr5
+        andc	r2,r11,r0					// turn off EE, IR, and DR
+        bt++	pf64Bitb,ml_set_physical_64	// skip if 64-bit (only they take the hint)
+        bf		pfNoMSRirb,ml_set_physical_32	// skip if we can load MSR directly
+        li		r0,loadMSR					// Get the MSR setter SC
+        mr		r3,r2						// copy new MSR to r2
+        sc									// Set it
+        blr
+        
+		.align	4
+		.globl	EXT(ml_set_physical_disabled)
+
+LEXT(ml_set_physical_disabled)
+		li		r0,0						; Clear
+        mfsprg	r10,2						// get feature flags
+        ori		r0,r0,lo16(MASK(MSR_EE))	// turn EE and fp off
+        mtcrf	0x02,r10					// move pf64Bit etc to cr6
+        b		ml_set_physical_join
+
+		.align	5
+		.globl	EXT(ml_set_physical_get_ffs)
+
+LEXT(ml_set_physical_get_ffs)
+        mfsprg	r10,2						// get feature flags
+        mtcrf	0x02,r10					// move pf64Bit etc to cr6
+
+		.globl	EXT(ml_set_physical)
+LEXT(ml_set_physical)
+
+        li		r0,0						// do not turn off interrupts
+
+ml_set_physical_join:
+		oris	r0,r0,hi16(MASK(MSR_VEC))	// Always gonna turn of vectors
+        mfmsr	r11							// get MSR
+        ori		r0,r0,lo16(MASK(MSR_DR)+MASK(MSR_FP))	// always turn off DR and FP bit
+        andc	r2,r11,r0					// turn off DR and maybe EE
+        bt++	pf64Bitb,ml_set_physical_64	// skip if 64-bit (only they take the hint)
+ml_set_physical_32:
+        mtmsr	r2							// turn off translation
+        isync
+        blr
+        
+ml_set_physical_64:
+        li		r0,1						// get a 1 to slam into SF
+        rldimi	r2,r0,63,MSR_SF_BIT			// set SF bit (bit 0)
+        mtmsrd	r2							// set 64-bit mode, turn off data relocation
+        isync								// synchronize
+        blr
+    
+
+/*
+ * ml_restore(old_MSR)
+ *
+ * Callable only from assembler, restores the MSR in r11 saved by ml_set_physical.
+ * We assume cr6 and r11 are as set by ml_set_physical, ie:
+ *	cr6 - pf64Bit flag (feature flags 24-27)
+ *	r11 - old MSR
+ */
+ 
+		.align	5
+		.globl	EXT(ml_restore)
+
+LEXT(ml_restore)
+        bt++	pf64Bitb,ml_restore_64		// handle 64-bit cpus (only they take the hint)
+        mtmsr	r11							// restore a 32-bit MSR
+        isync
+        blr
+        
+ml_restore_64:
+        mtmsrd	r11							// restore a 64-bit MSR
+        isync
+        blr
+
+    
 /* PCI config cycle probing
  *
  *	boolean_t ml_probe_read(vm_offset_t paddr, unsigned int *val)
@@ -44,16 +144,23 @@
 LEXT(ml_probe_read)
 
 			mfsprg	r9,2							; Get feature flags
+			
+			rlwinm.	r0,r9,0,pf64Bitb,pf64Bitb		; Are we on a 64-bit machine?
+			rlwinm	r3,r3,0,0,31					; Clean up for 64-bit machines
+			bne++	mpr64bit						; Go do this the 64-bit way...
+
+mpr32bit:	lis		r8,hi16(MASK(MSR_VEC))			; Get the vector flag
 			mfmsr	r0								; Save the current MSR
-			rlwinm	r0,r0,0,MSR_FP_BIT+1,MSR_FP_BIT-1	; Force floating point off
-			rlwinm	r0,r0,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1	; Force vectors off
+			ori		r8,r8,lo16(MASK(MSR_FP))		; Add the FP flag
+
 			neg		r10,r3							; Number of bytes to end of page
-			rlwinm	r2,r0,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Clear interruptions
+			andc	r0,r0,r8						; Clear VEC and FP
 			rlwinm.	r10,r10,0,20,31					; Clear excess junk and test for page bndry
+			ori		r8,r8,lo16(MASK(MSR_EE)|MASK(MSR_IR)|MASK(MSR_DR))		; Drop EE, IR, and DR
 			mr		r12,r3							; Save the load address
+			andc	r2,r0,r8						; Clear VEC, FP, and EE
 			mtcrf	0x04,r9							; Set the features			
 			cmplwi	cr1,r10,4						; At least 4 bytes left in page?
-			rlwinm	r2,r2,0,MSR_DR_BIT+1,MSR_IR_BIT-1	; Clear translation			
 			beq-	mprdoit							; We are right on the boundary...
 			li		r3,0
 			bltlr-	cr1								; No, just return failure...
@@ -129,9 +236,6 @@ mprNoMuM:
 			mtmsr	r2								; Turn translation back off
 			isync
 			
-			mtspr	hid0, r6							; Restore HID0
-			isync
-			
 			lis		r10,hi16(EXT(shadow_BAT)+shdDBAT)	; Get shadow address
 			ori		r10,r10,lo16(EXT(shadow_BAT)+shdDBAT)	; Get shadow address
 			
@@ -161,121 +265,421 @@ mprNoMuM:
 			.globl	EXT(ml_probe_read_mck)
 LEXT(ml_probe_read_mck)
 
-/* Read physical address
+    
+/* PCI config cycle probing - 64-bit
+ *
+ *	boolean_t ml_probe_read_64(addr64_t paddr, unsigned int *val)
+ *
+ *	Read the memory location at physical address paddr.
+ *  This is a part of a device probe, so there is a good chance we will
+ *  have a machine check here. So we have to be able to handle that.
+ *  We assume that machine checks are enabled both in MSR and HIDs
+ */
+
+;			Force a line boundry here
+			.align	6
+			.globl	EXT(ml_probe_read_64)
+
+LEXT(ml_probe_read_64)
+
+			mfsprg	r9,2							; Get feature flags
+			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
+			rlwinm.	r0,r9,0,pf64Bitb,pf64Bitb		; Are we on a 64-bit machine?
+			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits			
+			
+			mr		r4,r5							; Move result to common register
+			beq--	mpr32bit						; Go do this the 32-bit way...
+
+mpr64bit:	andi.	r0,r3,3							; Check if we are on a word boundary
+			li		r0,0							; Clear the EE bit (and everything else for that matter)
+			bne--	mprFail							; Boundary not good...
+			mfmsr	r11								; Get the MSR
+			mtmsrd	r0,1							; Set the EE bit only (do not care about RI)
+			rlwinm	r11,r11,0,MSR_EE_BIT,MSR_EE_BIT	; Isolate just the EE bit
+			mfmsr	r10								; Refresh our view of the MSR (VMX/FP may have changed)
+			or		r12,r10,r11						; Turn on EE if on before we turned it off
+			ori		r0,r0,lo16(MASK(MSR_IR)|MASK(MSR_DR))	; Get the IR and DR bits
+			li		r2,1							; Get a 1
+			sldi	r2,r2,63						; Get the 64-bit bit
+			andc	r10,r10,r0						; Clear IR and DR
+			or		r10,r10,r2						; Set 64-bit
+			
+			li		r0,1							; Get a 1
+			mtmsrd	r10								; Translation and EE off, 64-bit on
+			isync			
+			
+			sldi	r0,r0,32+8						; Get the right bit to inhibit caching
+
+			mfspr	r8,hid4							; Get HID4
+			or		r2,r8,r0						; Set bit to make real accesses cache-inhibited
+			sync									; Sync up
+			mtspr	hid4,r2							; Make real accesses cache-inhibited
+			isync									; Toss prefetches
+			
+			lis		r7,0xE000						; Get the unlikeliest ESID possible
+			srdi	r7,r7,1							; Make 0x7FFFFFFFF0000000
+			slbie	r7								; Make sure the ERAT is cleared 
+			
+			sync
+			isync
+
+			eieio									; Make sure of all previous accesses
+			
+			lwz		r11,0(r3)						; Get it and maybe machine check here
+			
+			eieio									; Make sure of ordering again
+			sync									; Get caught up yet again
+			isync									; Do not go further till we are here
+
+			sync									; Sync up
+			mtspr	hid4,r8							; Make real accesses not cache-inhibited
+			isync									; Toss prefetches
+
+			lis		r7,0xE000						; Get the unlikeliest ESID possible
+			srdi	r7,r7,1							; Make 0x7FFFFFFFF0000000
+			slbie	r7								; Make sure the ERAT is cleared 
+
+			mtmsrd	r12								; Restore entry MSR
+			isync
+			
+			stw		r11,0(r4)						; Pass back the result
+			li		r3,1							; Indicate success
+			blr										; Leave...
+
+mprFail:	li		r3,0							; Set failure
+			blr										; Leave...
+
+;			Force a line boundry here. This means we will be able to check addresses better
+			.align	6
+			.globl	EXT(ml_probe_read_mck_64)
+LEXT(ml_probe_read_mck_64)
+
+
+/* Read physical address byte
  *
  *	unsigned int ml_phys_read_byte(vm_offset_t paddr)
+ *	unsigned int ml_phys_read_byte_64(addr64_t paddr)
  *
  *	Read the byte at physical address paddr. Memory should not be cache inhibited.
  */
 
 ;			Force a line boundry here
+
 			.align	5
+			.globl	EXT(ml_phys_read_byte_64)
+
+LEXT(ml_phys_read_byte_64)
+
+			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
+			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits
+            b		ml_phys_read_byte_join			
+
 			.globl	EXT(ml_phys_read_byte)
 
 LEXT(ml_phys_read_byte)
+            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
+ml_phys_read_byte_join:								; r3 = address to read (reg64_t)
+			mflr	r11								; Save the return
+			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
+			
+			lbz		r3,0(r3)						; Get the byte
+			b		rdwrpost						; Clean up and leave...
 
-			mfmsr	r10								; Save the current MSR
-			rlwinm	r10,r10,0,MSR_FP_BIT+1,MSR_FP_BIT-1	; Force floating point off
-			rlwinm	r10,r10,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1	; Force vectors off
-			rlwinm	r4,r10,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Clear interruptions
-			rlwinm	r4,r4,0,MSR_DR_BIT+1,MSR_DR_BIT-1	; Clear translation	
 
-			mtmsr	r4								; Translation and all off
-			isync									; Toss prefetch
+/* Read physical address half word
+ *
+ *	unsigned int ml_phys_read_half(vm_offset_t paddr)
+ *	unsigned int ml_phys_read_half_64(addr64_t paddr)
+ *
+ *	Read the half word at physical address paddr. Memory should not be cache inhibited.
+ */
 
-			lbz		r3,0(r3)						; Get the byte
-			sync
+;			Force a line boundry here
 
-			mtmsr	r10								; Restore translation and rupts
-			isync
-			blr
+			.align	5
+			.globl	EXT(ml_phys_read_half_64)
+
+LEXT(ml_phys_read_half_64)
+
+			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
+			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits
+            b		ml_phys_read_half_join		
+
+			.globl	EXT(ml_phys_read_half)
 
-/* Read physical address
+LEXT(ml_phys_read_half)
+            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
+ml_phys_read_half_join:								; r3 = address to read (reg64_t)
+			mflr	r11								; Save the return
+			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
+			
+			lhz		r3,0(r3)						; Get the half word
+			b		rdwrpost						; Clean up and leave...
+
+
+/* Read physical address word
  *
  *	unsigned int ml_phys_read(vm_offset_t paddr)
+ *	unsigned int ml_phys_read_64(addr64_t paddr)
+ *	unsigned int ml_phys_read_word(vm_offset_t paddr)
+ *	unsigned int ml_phys_read_word_64(addr64_t paddr)
  *
  *	Read the word at physical address paddr. Memory should not be cache inhibited.
  */
 
 ;			Force a line boundry here
+
 			.align	5
+			.globl	EXT(ml_phys_read_64)
+			.globl	EXT(ml_phys_read_word_64)
+
+LEXT(ml_phys_read_64)
+LEXT(ml_phys_read_word_64)
+
+			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
+			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits
+            b		ml_phys_read_word_join		
+
 			.globl	EXT(ml_phys_read)
+			.globl	EXT(ml_phys_read_word)
 
 LEXT(ml_phys_read)
+LEXT(ml_phys_read_word)
+            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
+ml_phys_read_word_join:								; r3 = address to read (reg64_t)
+			mflr	r11								; Save the return
+			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
+			
+			lwz		r3,0(r3)						; Get the word
+			b		rdwrpost						; Clean up and leave...
 
-			mfmsr	r0								; Save the current MSR
-			rlwinm	r0,r0,0,MSR_FP_BIT+1,MSR_FP_BIT-1	; Force floating point off
-			rlwinm	r0,r0,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1	; Force vectors off
-			rlwinm	r4,r0,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Clear interruptions
-			rlwinm	r4,r4,0,MSR_DR_BIT+1,MSR_DR_BIT-1	; Clear translation	
 
-			mtmsr	r4								; Translation and all off
-			isync									; Toss prefetch
+/* Read physical address double word
+ *
+ *	unsigned long long ml_phys_read_double(vm_offset_t paddr)
+ *	unsigned long long ml_phys_read_double_64(addr64_t paddr)
+ *
+ *	Read the double word at physical address paddr. Memory should not be cache inhibited.
+ */
+
+;			Force a line boundry here
+
+			.align	5
+			.globl	EXT(ml_phys_read_double_64)
+
+LEXT(ml_phys_read_double_64)
+
+			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
+			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits			
+            b		ml_phys_read_double_join		
+
+			.globl	EXT(ml_phys_read_double)
+
+LEXT(ml_phys_read_double)
+            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
+ml_phys_read_double_join:							; r3 = address to read (reg64_t)
+			mflr	r11								; Save the return
+			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
 			
-			lwz		r3,0(r3)						; Get the word
-			sync
+			lwz		r4,4(r3)						; Get the low word
+			lwz		r3,0(r3)						; Get the high word
+			b		rdwrpost						; Clean up and leave...
 
-			mtmsr	r0								; Restore translation and rupts
-			isync
-			blr
 
 /* Write physical address byte
  *
  *	void ml_phys_write_byte(vm_offset_t paddr, unsigned int data)
+ *	void ml_phys_write_byte_64(addr64_t paddr, unsigned int data)
  *
  *	Write the byte at physical address paddr. Memory should not be cache inhibited.
  */
 
-;			Force a line boundry here
 			.align	5
+			.globl	EXT(ml_phys_write_byte_64)
+
+LEXT(ml_phys_write_byte_64)
+
+			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
+			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits			
+			mr		r4,r5							; Copy over the data
+            b		ml_phys_write_byte_join
+
 			.globl	EXT(ml_phys_write_byte)
 
 LEXT(ml_phys_write_byte)
+            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
+ml_phys_write_byte_join:							; r3 = address to write (reg64_t), r4 = data
+			mflr	r11								; Save the return
+			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
+			
+			stb		r4,0(r3)						; Set the byte
+			b		rdwrpost						; Clean up and leave...
 
-			mfmsr	r0								; Save the current MSR
-			rlwinm	r0,r0,0,MSR_FP_BIT+1,MSR_FP_BIT-1	; Force floating point off
-			rlwinm	r0,r0,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1	; Force vectors off
-			rlwinm	r5,r0,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Clear interruptions
-			rlwinm	r5,r5,0,MSR_DR_BIT+1,MSR_DR_BIT-1	; Clear translation	
 
-			mtmsr	r5								; Translation and all off
-			isync									; Toss prefetch
+/* Write physical address half word
+ *
+ *	void ml_phys_write_half(vm_offset_t paddr, unsigned int data)
+ *	void ml_phys_write_half_64(addr64_t paddr, unsigned int data)
+ *
+ *	Write the half word at physical address paddr. Memory should not be cache inhibited.
+ */
+
+			.align	5
+			.globl	EXT(ml_phys_write_half_64)
+
+LEXT(ml_phys_write_half_64)
+
+			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
+			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits			
+			mr		r4,r5							; Copy over the data
+            b		ml_phys_write_half_join
+
+			.globl	EXT(ml_phys_write_half)
+
+LEXT(ml_phys_write_half)
+            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
+ml_phys_write_half_join:							; r3 = address to write (reg64_t), r4 = data
+			mflr	r11								; Save the return
+			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
 			
-			stb		r4,0(r3)						; Set the byte
-			sync
+			sth		r4,0(r3)						; Set the half word
+			b		rdwrpost						; Clean up and leave...
 
-			mtmsr	r0								; Restore translation and rupts
-			isync
-			blr
 
-/* Write physical address
+/* Write physical address word
  *
  *	void ml_phys_write(vm_offset_t paddr, unsigned int data)
+ *	void ml_phys_write_64(addr64_t paddr, unsigned int data)
+ *	void ml_phys_write_word(vm_offset_t paddr, unsigned int data)
+ *	void ml_phys_write_word_64(addr64_t paddr, unsigned int data)
  *
  *	Write the word at physical address paddr. Memory should not be cache inhibited.
  */
 
-;			Force a line boundry here
 			.align	5
+			.globl	EXT(ml_phys_write_64)
+			.globl	EXT(ml_phys_write_word_64)
+
+LEXT(ml_phys_write_64)
+LEXT(ml_phys_write_word_64)
+
+			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
+			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits			
+			mr		r4,r5							; Copy over the data
+            b		ml_phys_write_word_join
+
 			.globl	EXT(ml_phys_write)
+			.globl	EXT(ml_phys_write_word)
 
 LEXT(ml_phys_write)
+LEXT(ml_phys_write_word)
+            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
+ml_phys_write_word_join:							; r3 = address to write (reg64_t), r4 = data
+			mflr	r11								; Save the return
+			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
+			
+			stw		r4,0(r3)						; Set the word
+			b		rdwrpost						; Clean up and leave...
 
-			mfmsr	r0								; Save the current MSR
-			rlwinm	r0,r0,0,MSR_FP_BIT+1,MSR_FP_BIT-1	; Force floating point off
-			rlwinm	r0,r0,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1	; Force vectors off
-			rlwinm	r5,r0,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Clear interruptions
-			rlwinm	r5,r5,0,MSR_DR_BIT+1,MSR_DR_BIT-1	; Clear translation	
 
-			mtmsr	r5								; Translation and all off
-			isync									; Toss prefetch
+/* Write physical address double word
+ *
+ *	void ml_phys_write_double(vm_offset_t paddr, unsigned long long data)
+ *	void ml_phys_write_double_64(addr64_t paddr, unsigned long long data)
+ *
+ *	Write the double word at physical address paddr. Memory should not be cache inhibited.
+ */
+
+			.align	5
+			.globl	EXT(ml_phys_write_double_64)
+
+LEXT(ml_phys_write_double_64)
+
+			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
+			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits			
+			mr		r4,r5							; Copy over the high data
+			mr		r5,r6							; Copy over the low data
+            b		ml_phys_write_double_join
+
+			.globl	EXT(ml_phys_write_double)
+
+LEXT(ml_phys_write_double)
+            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
+ml_phys_write_double_join:							; r3 = address to write (reg64_t), r4,r5 = data (long long)
+			mflr	r11								; Save the return
+			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
+			
+			stw		r4,0(r3)						; Set the high word
+			stw		r5,4(r3)						; Set the low word
+			b		rdwrpost						; Clean up and leave...
+
+
+			.align	5
+
+rdwrpre:	mfsprg	r12,2							; Get feature flags 
+			lis		r8,hi16(MASK(MSR_VEC))			; Get the vector flag
+			mfmsr	r10								; Save the MSR 
+			ori		r8,r8,lo16(MASK(MSR_FP))		; Add the FP flag
+			mtcrf	0x02,r12						; move pf64Bit
+			andc	r10,r10,r8						; Clear VEC and FP
+			ori		r9,r8,lo16(MASK(MSR_EE)|MASK(MSR_IR)|MASK(MSR_DR))		; Drop EE, DR, and IR
+			li		r2,1							; Prepare for 64 bit
+			andc	r9,r10,r9						; Clear VEC, FP, DR, and EE
+			bf--	pf64Bitb,rdwrpre32				; Join 32-bit code...
+			
+			srdi	r7,r3,31						; Get a 1 if address is in I/O memory
+			rldimi	r9,r2,63,MSR_SF_BIT				; set SF bit (bit 0)
+			cmpldi	cr7,r7,1						; Is source in I/O memory?
+			mtmsrd	r9								; set 64-bit mode, turn off EE, DR, and IR
+			isync									; synchronize
+
+			sldi	r0,r2,32+8						; Get the right bit to turn off caching
+			
+			bnelr++	cr7								; We are not in the I/O area, all ready...
+			
+			mfspr	r8,hid4							; Get HID4
+			or		r2,r8,r0						; Set bit to make real accesses cache-inhibited
+			sync									; Sync up
+			mtspr	hid4,r2							; Make real accesses cache-inhibited
+			isync									; Toss prefetches
+			
+			lis		r7,0xE000						; Get the unlikeliest ESID possible
+			srdi	r7,r7,1							; Make 0x7FFFFFFFF0000000
+			slbie	r7								; Make sure the ERAT is cleared 
 			
-			stw		r4,0(r3)						; Set the word
 			sync
+			isync
+			blr										; Finally,  all ready...
+	
+			.align	5
+			
+rdwrpre32:	rlwimi	r9,r10,0,MSR_IR_BIT,MSR_IR_BIT	; Leave the IR bit unchanged
+			mtmsr	r9								; Drop EE, DR, and leave IR unchanged
+			isync
+			blr										; All set up, leave...
+			
+			.align	5
+			
+rdwrpost:	mtlr	r11								; Restore the return
+			bt++	pf64Bitb,rdwrpost64				; Join 64-bit code...
+			
+			mtmsr	r10								; Restore entry MSR (sans FP and VEC)
+			isync
+			blr										; Leave...
+			
+rdwrpost64:	bne++	cr7,rdwrpcok					; Skip enabling real mode caching if we did not change it...
+
+			sync									; Sync up
+			mtspr	hid4,r8							; Make real accesses not cache-inhibited
+			isync									; Toss prefetches
 
-			mtmsr	r0								; Restore translation and rupts
+			lis		r7,0xE000						; Get the unlikeliest ESID possible
+			srdi	r7,r7,1							; Make 0x7FFFFFFFF0000000
+			slbie	r7								; Make sure the ERAT is cleared 
+
+rdwrpcok:	mtmsrd	r10								; Restore entry MSR (sans FP and VEC)
 			isync
-			blr
+			blr										; Leave...
 
 
 /* set interrupts enabled or disabled
@@ -291,38 +695,41 @@ LEXT(ml_phys_write)
  
 LEXT(ml_set_interrupts_enabled)
 
-			mfsprg	r7,0
-			lwz		r4,PP_INTS_ENABLED(r7)
-			mr.		r4,r4
-			beq-	EXT(fake_set_interrupts_enabled)
+			andi.   r4,r3,1							; Are we turning interruptions on?
+			lis		r0,hi16(MASK(MSR_VEC))			; Get vector enable
 			mfmsr	r5								; Get the current MSR
-			rlwinm	r5,r5,0,MSR_FP_BIT+1,MSR_FP_BIT-1	; Force floating point off
-			mr		r4,r3							; Save the old value
-			rlwinm	r5,r5,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1	; Force vectors off
+			ori		r0,r0,lo16(MASK(MSR_EE)|MASK(MSR_FP))	; Get float enable and EE enable
 			rlwinm	r3,r5,17,31,31					; Set return value
-			rlwimi	r5,r4,15,16,16					; Insert new EE bit
-			andi.   r8,r5,lo16(MASK(MSR_EE))			; Interruptions
-			bne     CheckPreemption
-NoPreemption:
-			mtmsr   r5                              ; Slam enablement
+			andc	r5,r5,r0						; Force VEC and FP off
+			bne	    CheckPreemption					; Interrupts going on, check ASTs...
+
+			mtmsr   r5                              ; Slam diable (always going disabled here)
+			isync									; Need this because FP/Vec might go off
 			blr
 
+			.align	5
+
 CheckPreemption:
-			lwz		r8,PP_NEED_AST(r7)
-			li		r6,AST_URGENT
-			lwz		r8,0(r8)
-			lwz		r7,PP_PREEMPT_CNT(r7)
-			lis		r0,HIGH_ADDR(DoPreemptCall)
-			and.	r8,r8,r6
-			ori		r0,r0,LOW_ADDR(DoPreemptCall)   
-			beq+	NoPreemption
-			cmpi	cr0, r7, 0
+			mfsprg	r7,0
+			ori		r5,r5,lo16(MASK(MSR_EE))		; Turn on the enable
+			lwz		r8,PP_NEED_AST(r7)				; Get pointer to AST flags
+			mfsprg	r9,1							; Get current activation
+			li		r6,AST_URGENT					; Get the type we will preempt for 
+			lwz		r7,ACT_PREEMPT_CNT(r9)			; Get preemption count
+			lwz		r8,0(r8)						; Get AST flags
+			lis		r0,hi16(DoPreemptCall)			; High part of Preempt FW call
+			cmpwi	cr1,r7,0						; Are preemptions masked off?
+			and.	r8,r8,r6						; Are we urgent?
+			crorc	cr1_eq,cr0_eq,cr1_eq			; Remember if preemptions are masked or not urgent
+			ori		r0,r0,lo16(DoPreemptCall)   	; Bottome of FW call
+
 			mtmsr	r5								; Restore the MSR now, before we can preempt
-			bnelr+									; Return if no premption
+			isync									; Need this because FP/Vec might go off
+
+			beqlr++	cr1								; Return if no premption...
 			sc										; Preempt
 			blr
 
-
 /*  Emulate a decremeter exception
  *
  *	void machine_clock_assist(void)
@@ -338,8 +745,8 @@ LEXT(machine_clock_assist)
 			mfsprg	r7,0
 			lwz		r4,PP_INTS_ENABLED(r7)
 			mr.		r4,r4
-			beq-	EXT(CreateFakeDEC)
-			blr
+			bnelr+	cr0
+			b	EXT(CreateFakeDEC)
 
 /*  Set machine into idle power-saving mode. 
  *
@@ -351,23 +758,24 @@ LEXT(machine_clock_assist)
  *
  */
 
-
 ;			Force a line boundry here
 			.align	5
 			.globl	EXT(machine_idle_ppc)
 
 LEXT(machine_idle_ppc)
 
-			mfmsr	r3								; Get the current MSR
-			rlwinm	r3,r3,0,MSR_FP_BIT+1,MSR_FP_BIT-1	; Force floating point off
-			rlwinm	r3,r3,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1	; Force vectors off
-			rlwinm	r5,r3,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Turn off interruptions
+			lis		r0,hi16(MASK(MSR_VEC))			; Get the vector flag
+			mfmsr	r3								; Save the MSR 
+			ori		r0,r0,lo16(MASK(MSR_FP))		; Add the FP flag
+			andc	r3,r3,r0						; Clear VEC and FP
+			ori		r0,r0,lo16(MASK(MSR_EE))		; Drop EE also
+			andc	r5,r3,r0						; Clear VEC, FP, DR, and EE
+
 			mtmsr	r5								; Hold up interruptions for now
 			isync									; May have messed with fp/vec
 			mfsprg	r12,0							; Get the per_proc_info
-			mfspr	r6,hid0							; Get the current power-saving mode
 			mfsprg	r11,2							; Get CPU specific features
-			rlwinm	r6,r6,0,sleep+1,doze-1			; Clear all possible power-saving modes (not DPM though)	
+			mfspr	r6,hid0							; Get the current power-saving mode
 			mtcrf	0xC7,r11						; Get the facility flags
 
 			lis		r4,hi16(napm)					; Assume we can nap
@@ -385,24 +793,36 @@ yesnap:		mftbu	r9								; Get the upper timebase
 			mftbu	r8								; Get the upper one again
 			cmplw	r9,r8							; Did the top tick?
 			bne-	yesnap							; Yeah, need to get it again...
+
 			stw		r8,napStamp(r12)				; Set high order time stamp
 			stw		r7,napStamp+4(r12)				; Set low order nap stamp
 
-			rlwinm.	r7,r11,0,pfNoL2PFNapb,pfNoL2PFNapb			; Turn off L2 Prefetch before nap?
+			rlwinm.	r0,r11,0,pfAltivecb,pfAltivecb	; Do we have altivec?
+			beq-	minovec							; No...
+			dssall									; Stop the streams before we nap/doze
+			sync
+			lwz		r8,napStamp(r12)				; Reload high order time stamp
+clearpipe:
+			cmplw	r8,r8
+			bne-	clearpipe			
+			isync
+minovec:
+
+			rlwinm.	r7,r11,0,pfNoL2PFNapb,pfNoL2PFNapb	; Turn off L2 Prefetch before nap?
 			beq	miL2PFok
 
 			mfspr	r7,msscr0						; Get currect MSSCR0 value
-			rlwinm	r7,r7,0,0,l2pfes-1					; Dissable L2 Prefetch
+			rlwinm	r7,r7,0,0,l2pfes-1				; Disable L2 Prefetch
 			mtspr	msscr0,r7						; Updates MSSCR0 value
 			sync
 			isync
 
 miL2PFok:
-			rlwinm.	r7,r11,0,pfSlowNapb,pfSlowNapb				; Should nap at slow speed?
+			rlwinm.	r7,r11,0,pfSlowNapb,pfSlowNapb	; Should nap at slow speed?
 			beq	minoslownap
 
 			mfspr	r7,hid1							; Get current HID1 value
-			oris	r7,r7,hi16(hid1psm)					; Select PLL1
+			oris	r7,r7,hi16(hid1psm)				; Select PLL1
 			mtspr	hid1,r7							; Update HID1 value
 
 minoslownap:
@@ -414,20 +834,64 @@ minoslownap:
 ;			is taken and set everything up to return directly to machine_idle_ret.
 ;			So, make sure everything we need there is already set up...
 ;
+
+			lis		r10,hi16(dozem|napm|sleepm)		; Mask of power management bits
+		
+			bf--	pf64Bitb,mipNSF1				; skip if 32-bit...
+			
+			sldi	r4,r4,32						; Position the flags
+			sldi	r10,r10,32						; Position the masks
+
+		
+mipNSF1:	andc	r6,r6,r10						; Clean up the old power bits		
+
 			ori		r7,r5,lo16(MASK(MSR_EE))		; Flip on EE
 			or		r6,r6,r4						; Set nap or doze
 			oris	r5,r7,hi16(MASK(MSR_POW))		; Turn on power management in next MSR
+			
+			sync
 			mtspr	hid0,r6							; Set up the HID for nap/doze
+			mfspr	r6,hid0							; Yes, this is silly, keep it here
+			mfspr	r6,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r6,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r6,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r6,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r6,hid0							; Yes, this is a duplicate, keep it here
 			isync									; Make sure it is set
+		
 			mtmsr	r7								; Enable for interrupts
-			rlwinm.	r11,r11,0,pfAltivecb,pfAltivecb	; Do we have altivec?
-			beq-	minovec							; No...
-			dssall									; Stop the streams before we nap/doze
 
-minovec:	sync									; Make sure queues are clear			
-			mtmsr	r5								; Nap or doze
+			bf--	pf64Bitb,mipowloop				; skip if 32-bit...
+			
+			li		r3,0x10							; Fancy nap threashold is 0x10 ticks
+			mftb	r8								; Get the low half of the time base
+			mfdec	r4								; Get the decrementer ticks
+			cmplw	r4,r3							; Less than threashold?
+			blt		mipowloop
+			
+			mtdec	r3								; Load decrimenter with threshold
+			isync									; and make sure,
+			mfdec	r3								; really sure, it gets there
+			
+			rlwinm	r6,r5,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Clear out the EE bit
+			sync									; Make sure queues are clear
+			mtmsr	r6								; Set MSR with EE off but POW on
 			isync									; Make sure this takes before we proceed
-			b		minovec							; loop if POW does not take
+			
+			mftb	r9								; Get the low half of the time base
+			sub		r9,r9,r8						; Get the number of ticks spent waiting
+			sub		r4,r4,r9						; Adjust the decrementer value
+			
+			mtdec	r4								; Load decrimenter with the rest of the timeout
+			isync									; and make sure,
+			mfdec	r4								; really sure, it gets there
+			
+mipowloop:
+			sync									; Make sure queues are clear
+			mtmsr	r5								; Nap or doze, MSR with POW and EE set
+			isync									; Make sure this takes before we proceed
+			b		mipowloop						; loop if POW does not take
+
 ;
 ;			Note that the interrupt handler will turn off the nap/doze bits in the hid.
 ;			Also remember that the interrupt handler will force return to here whenever
@@ -475,30 +939,45 @@ deadsleep:	addi	r3,r3,1							; Make analyzer happy
 			b		deadsleep						; Die the death of 1000 joys...
 #endif	
 			
+			mfsprg	r12,0							; Get the per_proc_info
 			mfspr	r4,hid0							; Get the current power-saving mode
 			eqv		r10,r10,r10						; Get all foxes
 			mfsprg	r11,2							; Get CPU specific features
 
-			rlwinm.	r5,r11,0,pfNoL2PFNapb,pfNoL2PFNapb			; Turn off L2 Prefetch before sleep?
+			rlwinm.	r5,r11,0,pfNoL2PFNapb,pfNoL2PFNapb	; Turn off L2 Prefetch before sleep?
 			beq	mpsL2PFok
 
 			mfspr	r5,msscr0						; Get currect MSSCR0 value
-			rlwinm	r5,r5,0,0,l2pfes-1					; Dissable L2 Prefetch
+			rlwinm	r5,r5,0,0,l2pfes-1				; Disable L2 Prefetch
 			mtspr	msscr0,r5						; Updates MSSCR0 value
 			sync
 			isync
 
 mpsL2PFok:
+			rlwinm.	r5,r11,0,pf64Bitb,pf64Bitb		; PM bits are shifted on 64bit systems.
+			bne		mpsPF64bit
+
+			rlwinm	r4,r4,0,sleep+1,doze-1			; Clear all possible power-saving modes (not DPM though)
+			oris	r4,r4,hi16(sleepm)				; Set sleep
+			b		mpsClearDEC
+
+mpsPF64bit:
+			lis		r5, hi16(dozem|napm|sleepm)		; Clear all possible power-saving modes (not DPM though)
+			sldi	r5, r5, 32
+			andc	r4, r4, r5
+			lis		r5, hi16(napm)					; Set sleep
+			sldi	r5, r5, 32
+			or		r4, r4, r5
+
+mpsClearDEC:
 			mfmsr	r5								; Get the current MSR
 			rlwinm	r10,r10,0,1,31					; Make 0x7FFFFFFF
-			rlwinm	r4,r4,0,sleep+1,doze-1			; Clear all possible power-saving modes (not DPM though)	
 			mtdec	r10								; Load decrimenter with 0x7FFFFFFF
 			isync									; and make sure,
 			mfdec	r9								; really sure, it gets there
 			
 			mtcrf	0x07,r11						; Get the cache flags, etc
 
-			oris	r4,r4,hi16(sleepm)				; Set sleep
 			rlwinm	r5,r5,0,MSR_DR_BIT+1,MSR_IR_BIT-1	; Turn off translation		
 ;
 ;			Note that we need translation off before we set the HID to sleep.  Otherwise
@@ -519,6 +998,12 @@ mpsNoMSRx:
 			ori		r3,r5,lo16(MASK(MSR_EE))		; Flip on EE
 			sync
 			mtspr	hid0,r4							; Set up the HID to sleep
+			mfspr	r4,hid0							; Yes, this is silly, keep it here
+			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
 
 			mtmsr	r3								; Enable for interrupts to drain decrimenter
 				
@@ -540,10 +1025,30 @@ mpsNoMSRx:
 			mfmsr	r5								; Get the current MSR
 			oris	r5,r5,hi16(MASK(MSR_POW))		; Turn on power management in next MSR
 													; Leave EE off because power goes off shortly
-
-slSleepNow:	sync									; Sync it all up
+			mfsprg	r12,0							; Get the per_proc_info
+			li		r10,PP_CPU_FLAGS
+			lhz		r11,PP_CPU_FLAGS(r12)			; Get the flags
+			ori		r11,r11,SleepState				; Marked SleepState
+			sth		r11,PP_CPU_FLAGS(r12)			; Set the flags
+			dcbf	r10,r12
+			
+			mfsprg	r11,2							; Get CPU specific features
+			rlwinm.	r0,r11,0,pf64Bitb,pf64Bitb		; Test for 64 bit processor
+			eqv		r4,r4,r4						; Get all foxes
+			rlwinm	r4,r4,0,1,31					; Make 0x7FFFFFFF
+			beq		slSleepNow						; skip if 32-bit...
+			li		r3, 0x4000						; Cause decrimenter to roll over soon
+			mtdec	r3								; Load decrimenter with 0x00004000
+			isync									; and make sure,
+			mfdec	r3								; really sure, it gets there
+			
+slSleepNow:
+			sync									; Sync it all up
 			mtmsr	r5								; Do sleep with interruptions enabled
 			isync									; Take a pill
+			mtdec	r4								; Load decrimenter with 0x7FFFFFFF
+			isync									; and make sure,
+			mfdec	r3								; really sure, it gets there
 			b		slSleepNow						; Go back to sleep if we wake up...
 			
 
@@ -573,14 +1078,25 @@ LEXT(cacheInit)
 			
 			mfsprg	r11,2							; Get CPU specific features
 			mfmsr	r7								; Get the current MSR
-			rlwinm	r4,r9,0,dpm+1,doze-1			; Clear all possible power-saving modes (also disable DPM)	
 			rlwinm	r7,r7,0,MSR_FP_BIT+1,MSR_FP_BIT-1	; Force floating point off
 			rlwinm	r7,r7,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1	; Force vectors off
 			rlwimi	r11,r11,pfLClckb+1,31,31		; Move pfLClck to another position (to keep from using non-volatile CRs)
 			rlwinm	r5,r7,0,MSR_DR_BIT+1,MSR_IR_BIT-1	; Turn off translation		
 			rlwinm	r5,r5,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Turn off interruptions
 			mtcrf	0x87,r11						; Get the feature flags
+			lis		r10,hi16(dozem|napm|sleepm|dpmm)	; Mask of power management bits
+			bf--	pf64Bitb,cIniNSF1				; Skip if 32-bit...
+			
+			sldi	r10,r10,32						; Position the masks
+
+cIniNSF1:	andc	r4,r9,r10						; Clean up the old power bits		
 			mtspr	hid0,r4							; Set up the HID
+			mfspr	r4,hid0							; Yes, this is silly, keep it here
+			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
 
 			bt		pfNoMSRirb,ciNoMSR				; No MSR...
 
@@ -599,11 +1115,12 @@ ciNoMSRx:
 			dssall									; Stop streams
 			sync
 
-cinoDSS:	lis		r5,hi16(EXT(tlb_system_lock))	; Get the TLBIE lock
+cinoDSS:	li		r5,tlbieLock					; Get the TLBIE lock
 			li		r0,128							; Get number of TLB entries
-			ori		r5,r5,lo16(EXT(tlb_system_lock))	; Grab up the bottom part
 			
 			li		r6,0							; Start at 0
+			bf--	pf64Bitb,citlbhang				; Skip if 32-bit...
+			li		r0,1024							; Get the number of TLB entries
 
 citlbhang:	lwarx	r2,0,r5							; Get the TLBIE lock
 			mr.		r2,r2							; Is it locked?
@@ -627,14 +1144,19 @@ cipurgeTLB:	tlbie	r6								; Purge this entry
 			sync
 			isync
 			
-cinoSMP:	stw		r2,0(r5)						; Unlock TLBIE lock
+			bf--	pf64Bitb,cinoSMP				; Skip if 32-bit...
+			ptesync									; Wait for quiet again
+			sync
+			
+cinoSMP:	stw		r2,tlbieLock(0)					; Unlock TLBIE lock
+
+			bt++	pf64Bitb,cin64					; Skip if 64-bit...
 
-			cror	cr0_eq,pfL1ib,pfL1db			; Check for either I- or D-cache
-			bf-		cr0_eq,cinoL1					; No level 1 to flush...
 			rlwinm.	r0,r9,0,ice,dce					; Were either of the level 1s on?
 			beq-	cinoL1							; No, no need to flush...
 			
-			bf		pfL1fab,ciswdl1					; If no hw flush assist, go do by software...
+            rlwinm.	r0,r11,0,pfL1fab,pfL1fab		; do we have L1 flush assist?
+			beq		ciswdl1							; If no hw flush assist, go do by software...
 			
 			mfspr	r8,msscr0						; Get the memory system control register
 			oris	r8,r8,hi16(dl1hwfm)				; Turn on the hardware flush request
@@ -758,7 +1280,9 @@ cinoL1:
 ;
 ;			Flush and disable the level 2
 ;
-			bf		pfL2b,cinol2					; No level 2 cache to flush
+            mfsprg	r10,2							; need to check 2 features we did not put in CR
+            rlwinm.	r0,r10,0,pfL2b,pfL2b			; do we have L2?
+			beq		cinol2							; No level 2 cache to flush
 
 			mfspr	r8,l2cr							; Get the L2CR
 			lwz		r3,pfl2cr(r12)					; Get the L2CR value
@@ -773,7 +1297,8 @@ cinoL1:
 			bne-	ciinvdl2						; Yes, just invalidate and get PLL synced...		
 			
 ciflushl2:
-			bf		pfL2fab,ciswfl2					; Flush not in hardware...
+            rlwinm.	r0,r10,0,pfL2fab,pfL2fab		; hardware-assisted L2 flush?
+			beq		ciswfl2							; Flush not in hardware...
 			
 			mr		r10,r8							; Take a copy now
 			
@@ -824,7 +1349,7 @@ ciswfldl2a:	lwz		r0,0(r10)						; Load something to flush something
 			addi	r10,r10,32						; Next line
 			bdnz	ciswfldl2a						; Do the lot...
 			
-ciinvdl2:	rlwinm	r8,r3,0,l2e+1,31				; Use the saved L2CR and clear the enable bit
+ciinvdl2:	rlwinm	r8,r3,0,l2e+1,31				; Clear the enable bit
 			b		cinla							; Branch to next line...
 
 			.align  5
@@ -851,7 +1376,9 @@ ciinvl2:	sync
 			sync
 			isync
 ciinvdl2a:	mfspr	r2,l2cr							; Get the L2CR
-			bf		pfL2ib,ciinvdl2b				; Flush not in hardware...
+            mfsprg	r0,2							; need to check a feature in "non-volatile" set
+            rlwinm.	r0,r0,0,pfL2ib,pfL2ib			; flush in HW?
+			beq		ciinvdl2b						; Flush not in hardware...
 			rlwinm.	r2,r2,0,l2i,l2i					; Is the invalidate still going?
 			bne+	ciinvdl2a						; Assume so, this will take a looong time...
 			sync
@@ -903,7 +1430,7 @@ cihwfl3:	mfspr	r10,l3cr						; Get back the L3CR
 			rlwinm.	r10,r10,0,l3hwf,l3hwf			; Is the flush over?
 			bne+	cihwfl3							; Nope, keep going...
 
-ciinvdl3:	rlwinm	r8,r3,0,l3e+1,31				; Use saved L3CR value and clear the enable bit
+ciinvdl3:	rlwinm	r8,r3,0,l3e+1,31				; Clear the enable bit
 			sync									; Make sure of life, liberty, and justice
 			mtspr	l3cr,r8							; Disable L3
 			sync
@@ -953,11 +1480,13 @@ ciinvdl3c:	addi	r2,r2,-1						; ?
 			mtspr	l3cr,r3							; Enable it as desired
 			sync
 cinol3:
-			bf		pfL2b,cinol2a					; No level 2 cache to enable
+            mfsprg	r0,2							; need to check a feature in "non-volatile" set
+            rlwinm.	r0,r0,0,pfL2b,pfL2b				; is there an L2 cache?
+			beq		cinol2a							; No level 2 cache to enable
 
 			lwz		r3,pfl2cr(r12)					; Get the L2CR value
 			cmplwi		r3, 0						; Should the L2 be all the way off?
-			beq		cinol2a						: Yes, done with L2
+			beq		cinol2a							: Yes, done with L2
 			mtspr	l2cr,r3							; Enable it as desired
 			sync
 
@@ -986,6 +1515,157 @@ cinoexit:	mtspr	hid0,r9							; Turn off the invalidate (needed for some older m
 			blr										; Return...
 
 
+;
+;			Handle 64-bit architecture
+;			This processor can not run without caches, so we just push everything out
+;			and flush.  It will be relativily clean afterwards
+;
+			
+			.align	5
+			
+cin64:		
+			mfspr	r10,hid1						; Save hid1
+			mfspr	r4,hid4							; Save hid4
+			mr		r12,r10							; Really save hid1
+			mr		r11,r4							; Get a working copy of hid4
+
+			li		r0,0							; Get a 0
+			eqv		r2,r2,r2						; Get all foxes
+			
+			rldimi	r10,r0,55,7						; Clear I$ prefetch bits (7:8)
+			
+			isync
+			mtspr	hid1,r10						; Stick it
+			mtspr	hid1,r10						; Stick it again
+			isync
+
+			rldimi	r11,r2,38,25					; Disable D$ prefetch (25:25)
+			
+			sync
+			mtspr	hid4,r11						; Stick it
+			isync
+
+			li		r3,8							; Set bit 28+32
+			sldi	r3,r3,32						; Make it bit 28
+			or		r3,r3,r11						; Turn on the flash invalidate L1D$
+			
+			oris	r5,r11,0x0600					; Set disable L1D$ bits		
+			sync
+			mtspr	hid4,r3							; Invalidate
+			isync
+	
+			mtspr	hid4,r5							; Un-invalidate and disable L1D$
+			isync
+			
+			lis		r8,GUSModeReg					; Get the GUS mode ring address
+			mfsprg	r0,2							; Get the feature flags
+			ori		r8,r8,0x8000					; Set to read data
+			rlwinm.	r0,r0,pfSCOMFixUpb+1,31,31		; Set shift if we need a fix me up
+
+			sync
+
+			mtspr	scomc,r8						; Request the GUS mode
+			mfspr	r11,scomd						; Get the GUS mode
+			mfspr	r8,scomc						; Get back the status (we just ignore it)
+			sync
+			isync							
+
+			sld		r11,r11,r0						; Fix up if needed
+
+			ori		r6,r11,lo16(GUSMdmapen)			; Set the bit that means direct L2 cache address
+			lis		r8,GUSModeReg					; Get GUS mode register address
+				
+			sync
+
+			mtspr	scomd,r6						; Set that we want direct L2 mode
+			mtspr	scomc,r8						; Tell GUS we want direct L2 mode
+			mfspr	r3,scomc						; Get back the status
+			sync
+			isync							
+
+			li		r3,0							; Clear start point
+		
+cflushlp:	lis		r6,0x0040						; Pick 4MB line as our target
+			or		r6,r6,r3						; Put in the line offset
+			lwz		r5,0(r6)						; Load a line
+			addis	r6,r6,8							; Roll bit 42:44
+			lwz		r5,0(r6)						; Load a line
+			addis	r6,r6,8							; Roll bit 42:44
+			lwz		r5,0(r6)						; Load a line
+			addis	r6,r6,8							; Roll bit 42:44
+			lwz		r5,0(r6)						; Load a line
+			addis	r6,r6,8							; Roll bit 42:44
+			lwz		r5,0(r6)						; Load a line
+			addis	r6,r6,8							; Roll bit 42:44
+			lwz		r5,0(r6)						; Load a line
+			addis	r6,r6,8							; Roll bit 42:44
+			lwz		r5,0(r6)						; Load a line
+			addis	r6,r6,8							; Roll bit 42:44
+			lwz		r5,0(r6)						; Load a line
+
+			addi	r3,r3,128						; Next line
+			andis.	r5,r3,8							; Have we done enough?
+			beq++	cflushlp						; Not yet...
+			
+			sync
+
+			lis		r6,0x0040						; Pick 4MB line as our target
+
+cflushx:	dcbf	0,r6							; Flush line and invalidate
+			addi	r6,r6,128						; Next line
+			andis.	r5,r6,0x0080					; Have we done enough?
+			beq++	cflushx							; Keep on flushing...
+
+			mr		r3,r10							; Copy current hid1
+			rldimi	r3,r2,54,9						; Set force icbi match mode
+			
+			li		r6,0							; Set start if ICBI range
+			isync
+			mtspr	hid1,r3							; Stick it
+			mtspr	hid1,r3							; Stick it again
+			isync
+
+cflicbi:	icbi	0,r6							; Kill I$
+			addi	r6,r6,128						; Next line
+			andis.	r5,r6,1							; Have we done them all?
+			beq++	cflicbi							; Not yet...
+
+			lis		r8,GUSModeReg					; Get GUS mode register address
+				
+			sync
+
+			mtspr	scomd,r11						; Set that we do not want direct mode
+			mtspr	scomc,r8						; Tell GUS we do not want direct mode
+			mfspr	r3,scomc						; Get back the status
+			sync
+			isync							
+
+			isync
+			mtspr	hid0,r9							; Restore entry hid0
+			mfspr	r9,hid0							; Yes, this is silly, keep it here
+			mfspr	r9,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r9,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r9,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r9,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r9,hid0							; Yes, this is a duplicate, keep it here
+			isync
+
+			isync
+			mtspr	hid1,r12						; Restore entry hid1
+			mtspr	hid1,r12						; Stick it again
+			isync
+		
+			sync
+			mtspr	hid4,r4							; Restore entry hid4
+			isync
+
+			sync
+			mtmsr	r7								; Restore MSR to entry
+			isync
+			blr										; Return...
+			
+			
+
 /*  Disables all caches
  *
  *	void cacheDisable(void)
@@ -1009,13 +1689,16 @@ LEXT(cacheDisable)
 			
 cdNoAlt:	sync
 			
+			btlr	pf64Bitb						; No way to disable a 64-bit machine...
+			
 			mfspr	r5,hid0							; Get the hid
 			rlwinm	r5,r5,0,dce+1,ice-1				; Clear the I- and D- cache enables
 			mtspr	hid0,r5							; Turn off dem caches
 			sync
 
-			bf		pfL2b,cdNoL2					; Skip if no L2...
-			
+            rlwinm.	r0,r11,0,pfL2b,pfL2b			; is there an L2?
+			beq		cdNoL2							; Skip if no L2...
+
 			mfspr	r5,l2cr							; Get the L2
 			rlwinm	r5,r5,0,l2e+1,31				; Turn off enable bit
 
@@ -1034,7 +1717,7 @@ cinlbb:		sync									; Finish memory stuff
 			b		cinlcc							; Jump back up and turn off cache...
 
 cdNoL2:
-			
+
 			bf		pfL3b,cdNoL3					; Skip down if no L3...
 			
 			mfspr	r5,l3cr							; Get the L3
@@ -1050,10 +1733,7 @@ cdNoL3:
 /*  Initialize processor thermal monitoring  
  *	void ml_thrm_init(void)
  *
- *	Build initial TAU registers and start them all going.
- *	We ca not do this at initial start up because we need to have the processor frequency first.
- *	And just why is this in assembler when it does not have to be?? Cause I am just too 
- *	lazy to open up a "C" file, thats why.
+ *	Obsolete, deprecated and will be removed.
  */
 
 ;			Force a line boundry here
@@ -1061,53 +1741,12 @@ cdNoL3:
 			.globl	EXT(ml_thrm_init)
 
 LEXT(ml_thrm_init)
-
-			mfsprg	r12,0							; Get the per_proc blok
-			lis		r11,hi16(EXT(gPEClockFrequencyInfo))	; Get top of processor information
-			mfsprg	r10,2							; Get CPU specific features
-			ori		r11,r11,lo16(EXT(gPEClockFrequencyInfo))	; Get bottom of processor information
-			mtcrf	0x40,r10						; Get the installed features
-
-			li		r3,lo16(thrmtidm|thrmvm)		; Set for lower-than thermal event at 0 degrees
-			bflr	pfThermalb						; No thermal monitoring on this cpu
-			mtspr	thrm1,r3						; Do it
-
-			lwz		r3,thrmthrottleTemp(r12)		; Get our throttle temprature
-			rlwinm	r3,r3,31-thrmthre,thrmthrs,thrmthre	; Position it
-			ori		r3,r3,lo16(thrmvm)				; Set for higher-than event 
-			mtspr	thrm2,r3						; Set it
-
-			lis		r4,hi16(1000000)				; Top of million
-;
-;			Note: some CPU manuals say this is processor clocks, some say bus rate.  The latter
-;			makes more sense because otherwise we can not get over about 400MHz.
-#if 0
-			lwz		r3,PECFIcpurate(r11)				; Get the processor speed
-#else
-			lwz		r3,PECFIbusrate(r11)				; Get the bus speed
-#endif
-			ori		r4,r4,lo16(1000000)				; Bottom of million
-			lis		r7,hi16(thrmsitvm>>1)			; Get top of highest possible value
-			divwu	r3,r3,r4						; Get number of cycles per microseconds
-			ori		r7,r7,lo16(thrmsitvm>>1)		; Get the bottom of the highest possible value
-			addi	r3,r3,1							; Insure we have enough
-			mulli	r3,r3,20						; Get 20 microseconds worth of cycles
-			cmplw	r3,r7							; Check against max
-			ble+	smallenuf						; It is ok...
-			mr		r3,r7							; Saturate
-			
-smallenuf:	rlwinm	r3,r3,31-thrmsitve,thrmsitvs,thrmsitve	; Position			
-			ori		r3,r3,lo16(thrmem)				; Enable with at least 20micro sec sample
-			stw		r3,thrm3val(r12)				; Save this in case we need it later
-			mtspr	thrm3,r3						; Do it
 			blr
 
-
 /*  Set thermal monitor bounds 
  *	void ml_thrm_set(unsigned int low, unsigned int high)
  *
- *	Set TAU to interrupt below low and above high.  A value of
- *	zero disables interruptions in that direction.
+ *	Obsolete, deprecated and will be removed.
  */
 
 ;			Force a line boundry here
@@ -1115,42 +1754,12 @@ smallenuf:	rlwinm	r3,r3,31-thrmsitve,thrmsitvs,thrmsitve	; Position
 			.globl	EXT(ml_thrm_set)
 
 LEXT(ml_thrm_set)
-
-			mfmsr	r0								; Get the MSR
-			rlwinm	r0,r0,0,MSR_FP_BIT+1,MSR_FP_BIT-1	; Force floating point off
-			rlwinm	r0,r0,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1	; Force vectors off
-			rlwinm	r6,r0,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Clear EE bit
-			mtmsr	r6
-			isync
-
-			mfsprg	r12,0							; Get the per_proc blok
-
-			rlwinm.	r6,r3,31-thrmthre,thrmthrs,thrmthre	; Position it and see if enabled
-			mfsprg	r9,2							; Get CPU specific features
-			stw		r3,thrmlowTemp(r12)				; Set the low temprature
-			mtcrf	0x40,r9							; See if we can thermal this machine
-			rlwinm	r9,r9,(((31-thrmtie)+(pfThermIntb+1))&31),thrmtie,thrmtie	; Set interrupt enable if this machine can handle it
-			bf		pfThermalb,tsetcant				; No can do...
-			beq		tsetlowo						; We are setting the low off...
-			ori		r6,r6,lo16(thrmtidm|thrmvm)		; Set the lower-than and valid bit
-			or		r6,r6,r9						; Set interruption request if supported
-
-tsetlowo:	mtspr	thrm1,r6						; Cram the register
-			
-			rlwinm.	r6,r4,31-thrmthre,thrmthrs,thrmthre	; Position it and see if enabled
-			stw		r4,thrmhighTemp(r12)			; Set the high temprature
-			beq		tsethigho						; We are setting the high off...
-			ori		r6,r6,lo16(thrmvm)				; Set valid bit
-			or		r6,r6,r9						; Set interruption request if supported
-
-tsethigho:	mtspr	thrm2,r6						; Cram the register
-
-tsetcant:	mtmsr	r0								; Reenable interruptions
-			blr										; Leave...
+			blr
 
 /*  Read processor temprature  
  *	unsigned int ml_read_temp(void)
  *
+ *	Obsolete, deprecated and will be removed.
  */
 
 ;			Force a line boundry here
@@ -1158,57 +1767,8 @@ tsetcant:	mtmsr	r0								; Reenable interruptions
 			.globl	EXT(ml_read_temp)
 
 LEXT(ml_read_temp)
-
-			mfmsr	r9								; Save the MSR
-			li		r5,15							; Starting point for ranging (start at 15 so we do not overflow)
-			rlwinm	r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1	; Force floating point off
-			rlwinm	r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1	; Force vectors off
-			rlwinm	r8,r9,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Turn off interruptions
-			mfsprg	r7,2							; Get CPU specific features
-			mtmsr	r8								; Do not allow interruptions
-			mtcrf	0x40,r7							; See if we can thermal this machine
-			bf		pfThermalb,thrmcant				; No can do...
-
-			mfspr	r11,thrm1						; Save thrm1
-
-thrmrange:	rlwinm	r4,r5,31-thrmthre,thrmthrs,thrmthre	; Position it
-			ori		r4,r4,lo16(thrmtidm|thrmvm)		; Flip on the valid bit and make comparision for less than
-
-			mtspr	thrm1,r4						; Set the test value
-			
-thrmreada:	mfspr	r3,thrm1						; Get the thermal register back
-			rlwinm.	r0,r3,0,thrmtiv,thrmtiv			; Has it settled yet?
-			beq+	thrmreada						; Nope...
-
-			rlwinm.	r0,r3,0,thrmtin,thrmtin			; Are we still under the threshold?
-			bne		thrmsearch						; No, we went over...
-
-			addi	r5,r5,16						; Start by trying every 16 degrees
-			cmplwi	r5,127							; Have we hit the max?
-			blt-	thrmrange						; Got some more to do...
-
-thrmsearch:	rlwinm	r4,r5,31-thrmthre,thrmthrs,thrmthre	; Position it
-			ori		r4,r4,lo16(thrmtidm|thrmvm)		; Flip on the valid bit and make comparision for less than
-			
-			mtspr	thrm1,r4						; Set the test value
-			
-thrmread:	mfspr	r3,thrm1						; Get the thermal register back
-			rlwinm.	r0,r3,0,thrmtiv,thrmtiv			; Has it settled yet?
-			beq+	thrmread						; Nope...
-			
-			rlwinm.	r0,r3,0,thrmtin,thrmtin			; Are we still under the threshold?
-			beq		thrmdone						; No, we hit it...
-			addic.	r5,r5,-1						; Go down a degree
-			bge+	thrmsearch						; Try again (until we are below freezing)...
-			
-thrmdone:	addi	r3,r5,1							; Return the temprature (bump it up to make it correct)
-			mtspr	thrm1,r11						; Restore the thermal register
-			mtmsr	r9								; Re-enable interruptions
-			blr										; Leave...
-			
-thrmcant:	eqv		r3,r3,r3						; Return bogus temprature because we can not read it
-			mtmsr	r9								; Re-enable interruptions
-			blr										; Leave...
+			li		r3,-1
+			blr
 
 /*  Throttle processor speed up or down
  *	unsigned int ml_throttle(unsigned int step)
@@ -1216,6 +1776,7 @@ thrmcant:	eqv		r3,r3,r3						; Return bogus temprature because we can not read i
  *	Returns old speed and sets new.  Both step and return are values from 0 to
  *	255 that define number of throttle steps, 0 being off and "ictcfim" is max * 2.
  *
+ *	Obsolete, deprecated and will be removed.
  */
 
 ;			Force a line boundry here
@@ -1223,26 +1784,8 @@ thrmcant:	eqv		r3,r3,r3						; Return bogus temprature because we can not read i
 			.globl	EXT(ml_throttle)
 
 LEXT(ml_throttle)
-
-			mfmsr	r9								; Save the MSR
-			rlwinm	r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1	; Force floating point off
-			rlwinm	r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1	; Force vectors off
-			rlwinm	r8,r9,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Turn off interruptions
-			cmplwi	r3,lo16(ictcfim>>1)				; See if we are going too far					
-			mtmsr	r8								; Do not allow interruptions	
-			isync		
-			ble+	throtok							; Throttle value is ok...
-			li		r3,lo16(ictcfim>>1)				; Set max
-
-throtok:	rlwinm.	r4,r3,1,ictcfib,ictcfie			; Set the throttle
-			beq		throtoff						; Skip if we are turning it off...
-			ori		r4,r4,lo16(thrmvm)				; Turn on the valid bit
-			
-throtoff:	mfspr	r3,ictc							; Get the old throttle
-			mtspr	ictc,r4							; Set the new
-			rlwinm	r3,r3,31,1,31					; Shift throttle value over
-			mtmsr	r9								; Restore interruptions
-			blr										; Return...
+			li		r3,0
+			blr
 
 /*
 **      ml_get_timebase()
@@ -1259,36 +1802,84 @@ throtoff:	mfspr	r3,ictc							; Get the old throttle
 LEXT(ml_get_timebase)
 
 loop:
-        mftbu   r4
-        mftb    r5
-        mftbu   r6
-        cmpw    r6, r4
-        bne-    loop
+			mftbu   r4
+			mftb    r5
+			mftbu   r6
+			cmpw    r6, r4
+			bne-    loop
+			
+			stw     r4, 0(r3)
+			stw     r5, 4(r3)
+			
+			blr
 
-        stw     r4, 0(r3)
-        stw     r5, 4(r3)
+/*
+ *		unsigned int cpu_number(void)
+ *
+ *			Returns the current cpu number. 
+ */
+
+			.align	5
+			.globl	EXT(cpu_number)
+
+LEXT(cpu_number)
+			mfsprg	r4,0							; Get per-proc block
+			lhz		r3,PP_CPU_NUMBER(r4)			; Get CPU number 
+			blr										; Return...
 
-        blr
 
 /*
- *		The routine that implements cpu_number.
+ *		void set_machine_current_act(thread_act_t)
+ *
+ *			Set the current activation
  */
+			.align	5
+			.globl	EXT(set_machine_current_act)
 
-		.align  5
-		.globl  EXT(cpu_number)
- 
-LEXT(cpu_number)
- 
-		mfmsr	r9						/* Save the old MSR */
-		rlwinm	r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1	; Force floating point off
-		rlwinm	r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1	; Force vectors off
-		rlwinm	r8,r9,0,17,15			/* Clear interruptions */
-		mtmsr	r8						/* Interrupts off */
-		isync
-		mfsprg	r7,0					/* Get per-proc block */
-		lhz		r3,PP_CPU_NUMBER(r7)	/* Get CPU number */
-		mtmsr	r9						/* Restore interruptions to entry */
-		blr								/* Return... */
+LEXT(set_machine_current_act)
+
+			mtsprg	1,r3							; Set spr1 with the active thread
+			blr										; Return...
+
+/*
+ *		thread_t current_act(void)
+ *		thread_t current_thread(void)
+ *
+ *
+ *			Return the current thread for outside components.
+ */
+			.align	5
+			.globl	EXT(current_act)
+			.globl	EXT(current_thread)
+
+LEXT(current_act)
+LEXT(current_thread)
+
+			mfsprg	r3,1
+			blr
+
+			.align	5
+			.globl	EXT(clock_get_uptime)
+LEXT(clock_get_uptime)
+1:			mftbu	r9
+			mftb	r0
+			mftbu	r11
+			cmpw	r11,r9
+			bne-	1b
+			stw		r0,4(r3)
+			stw		r9,0(r3)
+			blr
+
+		
+			.align	5
+			.globl	EXT(mach_absolute_time)
+LEXT(mach_absolute_time)
+1:			mftbu	r3
+			mftb	r4
+			mftbu	r0
+			cmpw	r0,r3
+			bne-	1b  
+			blr
 
 /*
 **      ml_sense_nmi()
@@ -1311,32 +1902,140 @@ LEXT(ml_sense_nmi)
 			.globl	EXT(ml_set_processor_speed)
 
 LEXT(ml_set_processor_speed)
-			mfsprg	r5, 0								; Get the per_proc_info
+			mflr	r0										; Save the link register
+			stwu    r1, -(FM_ALIGN(4*4)+FM_SIZE)(r1)		; Make some space on the stack
+			stw		r28, FM_ARG0+0x00(r1)					; Save a register
+			stw		r29, FM_ARG0+0x04(r1)					; Save a register
+			stw		r30, FM_ARG0+0x08(r1)					; Save a register
+			stw		r31, FM_ARG0+0x0C(r1)					; Save a register
+			stw		r0, (FM_ALIGN(4*4)+FM_SIZE+FM_LR_SAVE)(r1)	; Save the return
+
+			mfsprg	r31, 0									; Get the per_proc_info
+
+			lwz		r30, pfPowerModes(r31)					; Get the supported power modes
+
+			rlwinm.	r0, r30, 0, pmDualPLLb, pmDualPLLb		; Is DualPLL supported?
+			bne		spsDPLL
+
+			rlwinm.	r0, r30, 0, pmDFSb, pmDFSb				; Is DFS supported?
+			bne		spsDFS
+
+			rlwinm.	r0, r30, 0, pmPowerTuneb, pmPowerTuneb	; Is PowerTune supported?
+			bne		spsPowerTune
+
+			b		spsDone									; No supported power modes
 
-			cmpli	cr0, r3, 0							; Turn off BTIC before low speed
-			beq	sps1
-			mfspr	r4, hid0							; Get the current hid0 value
-			rlwinm	r4, r4, 0, btic+1, btic-1					; Clear the BTIC bit
+spsDPLL:
+			cmpli	cr0, r3, 0								; Turn off BTIC before low speed
+			beq		spsDPLL1
+			mfspr	r4, hid0								; Get the current hid0 value
+			rlwinm	r4, r4, 0, btic+1, btic-1				; Clear the BTIC bit
 			sync
-			mtspr	hid0, r4							; Set the new hid0 value
+			mtspr	hid0, r4								; Set the new hid0 value
 			isync
 			sync
 
-sps1:
-			mfspr	r4, hid1							; Get the current PLL settings
-			rlwimi  r4, r3, 31-hid1ps, hid1ps, hid1ps				; Copy the PLL Select bit
-			stw	r4, pfHID1(r5)							; Save the new hid1 value
-			mtspr	hid1, r4							; Select desired PLL
+spsDPLL1:
+			mfspr	r4, hid1								; Get the current PLL settings
+			rlwimi  r4, r3, 31-hid1ps, hid1ps, hid1ps		; Copy the PLL Select bit
+			stw		r4, pfHID1(r31)							; Save the new hid1 value
+			mtspr	hid1, r4								; Select desired PLL
 
-			cmpli	cr0, r3, 0							; Restore BTIC after high speed
-			bne	sps2
-			lwz	r4, pfHID0(r5)							; Load the hid0 value
+			cmpli	cr0, r3, 0								; Restore BTIC after high speed
+			bne		spsDone
+			lwz		r4, pfHID0(r31)							; Load the hid0 value
 			sync
-			mtspr	hid0, r4							; Set the hid0 value
+			mtspr	hid0, r4								; Set the hid0 value
 			isync
 			sync
+			b		spsDone
 
-sps2:
+spsDFS:
+			cmplwi	r3, 0									; full speed?
+			mfspr	r3, hid1								; Get the current HID1
+			rlwinm	r3, r3, 0, hid1dfs1+1, hid1dfs0-1		; assume full speed, clear dfs bits
+			beq		spsDFS1
+			oris	r3, r3, hi16(hid1dfs1m)					; slow, set half speed dfs1 bit
+
+spsDFS1:
+			stw		r3, pfHID1(r31)							; Save the new hid1 value
+			sync
+			mtspr	hid1, r3								; Set the new HID1
+			sync
+			isync
+			b		spsDone
+
+spsPowerTune:
+			rlwinm	r28, r3, 31-dnap, dnap, dnap			; Shift the 1 bit to the dnap+32 bit
+			rlwinm	r3, r3, 2, 29, 29						; Shift the 1 to a 4 and mask
+			addi	r3, r3, pfPowerTune0					; Add in the pfPowerTune0 offset
+			lwzx	r29, r31, r3							; Load the PowerTune number 0 or 1
+
+			sldi	r28, r28, 32							; Shift to the top half
+			ld		r3, pfHID0(r31)							; Load the saved hid0 value
+			and		r28, r28, r3							; Save the dnap bit
+			lis		r4, hi16(dnapm)							; Make a mask for the dnap bit
+			sldi	r4, r4, 32								; Shift to the top half
+			andc	r3, r3, r4								; Clear the dnap bit
+			or		r28, r28, r3							; Insert the dnap bit as needed for later
+
+			sync
+			mtspr	hid0, r3								; Turn off dnap in hid0
+			mfspr	r3, hid0								; Yes, this is silly, keep it here
+			mfspr	r3, hid0								; Yes, this is a duplicate, keep it here
+			mfspr	r3, hid0								; Yes, this is a duplicate, keep it here
+			mfspr	r3, hid0								; Yes, this is a duplicate, keep it here
+			mfspr	r3, hid0								; Yes, this is a duplicate, keep it here
+			mfspr	r3, hid0								; Yes, this is a duplicate, keep it here
+			isync											; Make sure it is set
+
+			lis		r3, hi16(PowerTuneControlReg)			; Write zero to the PCR
+			ori		r3, r3, lo16(PowerTuneControlReg)
+			li		r4, 0
+			li		r5, 0
+			bl		_ml_scom_write
+
+			lis		r3, hi16(PowerTuneControlReg)			; Write the PowerTune value to the PCR
+			ori		r3, r3, lo16(PowerTuneControlReg)
+			li		r4, 0
+			mr		r5, r29
+			bl		_ml_scom_write
+
+			rlwinm	r29, r29, 13-6, 6, 7					; Move to PSR speed location and isolate the requested speed
+spsPowerTuneLoop:
+			lis		r3, hi16(PowerTuneStatusReg)			; Read the status from the PSR
+			ori		r3, r3, lo16(PowerTuneStatusReg)
+			li		r4, 0
+			bl		_ml_scom_read
+			srdi	r5, r5, 32
+			rlwinm  r0, r5, 0, 6, 7							; Isolate the current speed
+			rlwimi	r0, r5, 0, 2, 2							; Copy in the change in progress bit
+			cmpw	r0, r29									; Compare the requested and current speeds
+			beq		spsPowerTuneDone
+			rlwinm.	r0, r5, 0, 3, 3
+			beq		spsPowerTuneLoop
+
+spsPowerTuneDone:
+			sync
+			mtspr	hid0, r28								; Turn on dnap in hid0 if needed
+			mfspr	r28, hid0								; Yes, this is silly, keep it here
+			mfspr	r28, hid0								; Yes, this is a duplicate, keep it here
+			mfspr	r28, hid0								; Yes, this is a duplicate, keep it here
+			mfspr	r28, hid0								; Yes, this is a duplicate, keep it here
+			mfspr	r28, hid0								; Yes, this is a duplicate, keep it here
+			mfspr	r28, hid0								; Yes, this is a duplicate, keep it here
+			isync											; Make sure it is set
+
+			b		spsDone
+
+spsDone:
+			lwz		r0, (FM_ALIGN(4*4)+FM_SIZE+FM_LR_SAVE)(r1)	; Get the return
+			lwz		r28, FM_ARG0+0x00(r1)					; Restore a register
+			lwz		r29, FM_ARG0+0x04(r1)					; Restore a register
+			lwz		r30, FM_ARG0+0x08(r1)					; Restore a register
+			lwz		r31, FM_ARG0+0x0C(r1)					; Restore a register
+			lwz		r1, FM_BACKPTR(r1)						; Pop the stack
+			mtlr	r0
 			blr
 
 /*
@@ -1348,8 +2047,73 @@ sps2:
 			.globl	EXT(ml_set_processor_voltage)
 
 LEXT(ml_set_processor_voltage)
-			mfspr	r4, hid2							; Get HID2 value
-			rlwimi	r4, r3, 31-hid2vmin, hid2vmin, hid2vmin				; Insert the voltage mode bit
-			mtspr	hid2, r4							; Set the voltage mode
-			sync									; Make sure it is done
+			mfsprg	r5, 0									; Get the per_proc_info
+
+			lwz		r6, pfPowerModes(r5)					; Get the supported power modes
+
+			rlwinm.	r0, r6, 0, pmDPLLVminb, pmDPLLVminb		; Is DPLL Vmin supported
+			beq		spvDone
+
+			mfspr	r4, hid2								; Get HID2 value
+			rlwimi	r4, r3, 31-hid2vmin, hid2vmin, hid2vmin	; Insert the voltage mode bit
+			mtspr	hid2, r4								; Set the voltage mode
+			sync											; Make sure it is done
+
+spvDone:
 			blr
+
+
+;
+;			unsigned int ml_scom_write(unsigned int reg, unsigned long long data)
+;			64-bit machines only
+;			returns status
+;
+
+			.align	5
+			.globl	EXT(ml_scom_write)
+
+LEXT(ml_scom_write)
+
+			rldicr	r3,r3,8,47							; Align register it correctly
+			rldimi	r5,r4,32,0							; Merge the high part of data
+			sync										; Clean up everything
+			
+			mtspr	scomd,r5							; Stick in the data
+			mtspr	scomc,r3							; Set write to register
+			sync
+			isync					
+
+			mfspr	r3,scomc							; Read back status
+			blr											; leave....							
+
+;
+;			unsigned int ml_read_scom(unsigned int reg, unsigned long long *data)
+;			64-bit machines only
+;			returns status
+;			ASM Callers: data (r4) can be zero and the 64 bit data will be returned in r5
+;
+
+			.align	5
+			.globl	EXT(ml_scom_read)
+
+LEXT(ml_scom_read)
+
+			mfsprg	r0,2								; Get the feature flags
+			rldicr	r3,r3,8,47							; Align register it correctly
+			rlwinm	r0,r0,pfSCOMFixUpb+1,31,31			; Set shift if we need a fix me up
+			
+			ori		r3,r3,0x8000						; Set to read data
+			sync
+
+			mtspr	scomc,r3							; Request the register
+			mfspr	r5,scomd							; Get the register contents
+			mfspr	r3,scomc							; Get back the status
+			sync
+			isync							
+
+			sld		r5,r5,r0							; Fix up if needed
+
+			cmplwi	r4, 0								; If data pointer is null, just return
+			beqlr										; the received data in r5
+			std		r5,0(r4)							; Pass back the received data			
+			blr											; Leave...