X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/d52fe63fc81f7e44faaae711812a211a78434976..4a2492630c73add3c3aa8a805ba4ff343d4a58ea:/osfmk/ppc/machine_routines_asm.s?ds=sidebyside

diff --git a/osfmk/ppc/machine_routines_asm.s b/osfmk/ppc/machine_routines_asm.s
index a03a716d9..e98135ff1 100644
--- a/osfmk/ppc/machine_routines_asm.s
+++ b/osfmk/ppc/machine_routines_asm.s
@@ -3,19 +3,22 @@
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
  * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
  * 
  * @APPLE_LICENSE_HEADER_END@
  */
@@ -27,6 +30,106 @@
 #include <mach/ppc/vm_param.h>
 #include <ppc/exception.h>
 	
+    
+/*
+ * ml_set_physical()		 	-- turn off DR and (if 64-bit) turn SF on
+ *								   it is assumed that pf64Bit is already in cr6
+ * ml_set_physical_get_ffs() 	-- turn DR off, SF on, and get feature flags 
+ * ml_set_physical_disabled()	-- turn DR and EE off, SF on, get feature flags
+ * ml_set_translation_off()		-- turn DR, IR, and EE off, SF on, get feature flags
+ *
+ * Callable only from assembler, these return:
+ *	 r2 -- new MSR
+ *	r11 -- old MSR
+ *	r10 -- feature flags (pf64Bit etc, ie SPRG 2)
+ *	cr6 -- feature flags 24-27, ie pf64Bit, pf128Byte, and pf32Byte
+ *
+ * Uses r0 and r2.  ml_set_translation_off also uses r3 and cr5.
+ */
+
+        .align	4
+        .globl	EXT(ml_set_translation_off)
+LEXT(ml_set_translation_off)
+        mfsprg	r10,2						// get feature flags
+       	li		r0,0						; Clear this
+        mtcrf	0x02,r10					// move pf64Bit etc to cr6
+        ori		r0,r0,lo16(MASK(MSR_EE)+MASK(MSR_FP)+MASK(MSR_IR)+MASK(MSR_DR)) // turn off all 4
+        mfmsr	r11							// get MSR
+		oris	r0,r0,hi16(MASK(MSR_VEC))	// Turn off vector too
+        mtcrf	0x04,r10					// move pfNoMSRir etc to cr5
+        andc	r2,r11,r0					// turn off EE, IR, and DR
+        bt++	pf64Bitb,ml_set_physical_64	// skip if 64-bit (only they take the hint)
+        bf		pfNoMSRirb,ml_set_physical_32	// skip if we can load MSR directly
+        li		r0,loadMSR					// Get the MSR setter SC
+        mr		r3,r2						// copy new MSR to r2
+        sc									// Set it
+        blr
+        
+		.align	4
+		.globl	EXT(ml_set_physical_disabled)
+
+LEXT(ml_set_physical_disabled)
+		li		r0,0						; Clear
+        mfsprg	r10,2						// get feature flags
+        ori		r0,r0,lo16(MASK(MSR_EE))	// turn EE and fp off
+        mtcrf	0x02,r10					// move pf64Bit etc to cr6
+        b		ml_set_physical_join
+
+		.align	5
+		.globl	EXT(ml_set_physical_get_ffs)
+
+LEXT(ml_set_physical_get_ffs)
+        mfsprg	r10,2						// get feature flags
+        mtcrf	0x02,r10					// move pf64Bit etc to cr6
+
+		.globl	EXT(ml_set_physical)
+LEXT(ml_set_physical)
+
+        li		r0,0						// do not turn off interrupts
+
+ml_set_physical_join:
+		oris	r0,r0,hi16(MASK(MSR_VEC))	// Always gonna turn of vectors
+        mfmsr	r11							// get MSR
+        ori		r0,r0,lo16(MASK(MSR_DR)+MASK(MSR_FP))	// always turn off DR and FP bit
+        andc	r2,r11,r0					// turn off DR and maybe EE
+        bt++	pf64Bitb,ml_set_physical_64	// skip if 64-bit (only they take the hint)
+ml_set_physical_32:
+        mtmsr	r2							// turn off translation
+        isync
+        blr
+        
+ml_set_physical_64:
+        li		r0,1						// get a 1 to slam into SF
+        rldimi	r2,r0,63,MSR_SF_BIT			// set SF bit (bit 0)
+        mtmsrd	r2							// set 64-bit mode, turn off data relocation
+        isync								// synchronize
+        blr
+    
+
+/*
+ * ml_restore(old_MSR)
+ *
+ * Callable only from assembler, restores the MSR in r11 saved by ml_set_physical.
+ * We assume cr6 and r11 are as set by ml_set_physical, ie:
+ *	cr6 - pf64Bit flag (feature flags 24-27)
+ *	r11 - old MSR
+ */
+ 
+		.align	5
+		.globl	EXT(ml_restore)
+
+LEXT(ml_restore)
+        bt++	pf64Bitb,ml_restore_64		// handle 64-bit cpus (only they take the hint)
+        mtmsr	r11							// restore a 32-bit MSR
+        isync
+        blr
+        
+ml_restore_64:
+        mtmsrd	r11							// restore a 64-bit MSR
+        isync
+        blr
+
+    
 /* PCI config cycle probing
  *
  *	boolean_t ml_probe_read(vm_offset_t paddr, unsigned int *val)
@@ -44,14 +147,23 @@
 LEXT(ml_probe_read)
 
 			mfsprg	r9,2							; Get feature flags
+			
+			rlwinm.	r0,r9,0,pf64Bitb,pf64Bitb		; Are we on a 64-bit machine?
+			rlwinm	r3,r3,0,0,31					; Clean up for 64-bit machines
+			bne++	mpr64bit						; Go do this the 64-bit way...
+
+mpr32bit:	lis		r8,hi16(MASK(MSR_VEC))			; Get the vector flag
 			mfmsr	r0								; Save the current MSR
+			ori		r8,r8,lo16(MASK(MSR_FP))		; Add the FP flag
+
 			neg		r10,r3							; Number of bytes to end of page
-			rlwinm	r2,r0,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Clear interruptions
+			andc	r0,r0,r8						; Clear VEC and FP
 			rlwinm.	r10,r10,0,20,31					; Clear excess junk and test for page bndry
+			ori		r8,r8,lo16(MASK(MSR_EE)|MASK(MSR_IR)|MASK(MSR_DR))		; Drop EE, IR, and DR
 			mr		r12,r3							; Save the load address
+			andc	r2,r0,r8						; Clear VEC, FP, and EE
 			mtcrf	0x04,r9							; Set the features			
 			cmplwi	cr1,r10,4						; At least 4 bytes left in page?
-			rlwinm	r2,r2,0,MSR_DR_BIT+1,MSR_IR_BIT-1	; Clear translation			
 			beq-	mprdoit							; We are right on the boundary...
 			li		r3,0
 			bltlr-	cr1								; No, just return failure...
@@ -75,6 +187,15 @@ mprNoMSRx:
 
 			mfspr		r6, hid0					; Get a copy of hid0
 			
+			rlwinm.		r5, r9, 0, pfNoMuMMCKb, pfNoMuMMCKb		; Check for NoMuMMCK
+			bne		mprNoMuM
+			
+			rlwinm		r5, r6, 0, ice+1, ice-1				; Turn off L1 I-Cache
+			mtspr		hid0, r5
+			isync								; Wait for I-Cache off
+			rlwinm		r5, r6, 0, mum+1, mum-1				; Turn off MuM w/ I-Cache on
+			mtspr		hid0, r5
+mprNoMuM:
 
 ;
 ;			We need to insure that there is no more than 1 BAT register that
@@ -118,9 +239,6 @@ mprNoMSRx:
 			mtmsr	r2								; Turn translation back off
 			isync
 			
-			mtspr	hid0, r6							; Restore HID0
-			isync
-			
 			lis		r10,hi16(EXT(shadow_BAT)+shdDBAT)	; Get shadow address
 			ori		r10,r10,lo16(EXT(shadow_BAT)+shdDBAT)	; Get shadow address
 			
@@ -150,113 +268,421 @@ mprNoMSRx:
 			.globl	EXT(ml_probe_read_mck)
 LEXT(ml_probe_read_mck)
 
-/* Read physical address
+    
+/* PCI config cycle probing - 64-bit
+ *
+ *	boolean_t ml_probe_read_64(addr64_t paddr, unsigned int *val)
+ *
+ *	Read the memory location at physical address paddr.
+ *  This is a part of a device probe, so there is a good chance we will
+ *  have a machine check here. So we have to be able to handle that.
+ *  We assume that machine checks are enabled both in MSR and HIDs
+ */
+
+;			Force a line boundry here
+			.align	6
+			.globl	EXT(ml_probe_read_64)
+
+LEXT(ml_probe_read_64)
+
+			mfsprg	r9,2							; Get feature flags
+			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
+			rlwinm.	r0,r9,0,pf64Bitb,pf64Bitb		; Are we on a 64-bit machine?
+			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits			
+			
+			mr		r4,r5							; Move result to common register
+			beq--	mpr32bit						; Go do this the 32-bit way...
+
+mpr64bit:	andi.	r0,r3,3							; Check if we are on a word boundary
+			li		r0,0							; Clear the EE bit (and everything else for that matter)
+			bne--	mprFail							; Boundary not good...
+			mfmsr	r11								; Get the MSR
+			mtmsrd	r0,1							; Set the EE bit only (do not care about RI)
+			rlwinm	r11,r11,0,MSR_EE_BIT,MSR_EE_BIT	; Isolate just the EE bit
+			mfmsr	r10								; Refresh our view of the MSR (VMX/FP may have changed)
+			or		r12,r10,r11						; Turn on EE if on before we turned it off
+			ori		r0,r0,lo16(MASK(MSR_IR)|MASK(MSR_DR))	; Get the IR and DR bits
+			li		r2,1							; Get a 1
+			sldi	r2,r2,63						; Get the 64-bit bit
+			andc	r10,r10,r0						; Clear IR and DR
+			or		r10,r10,r2						; Set 64-bit
+			
+			li		r0,1							; Get a 1
+			mtmsrd	r10								; Translation and EE off, 64-bit on
+			isync			
+			
+			sldi	r0,r0,32+8						; Get the right bit to inhibit caching
+
+			mfspr	r8,hid4							; Get HID4
+			or		r2,r8,r0						; Set bit to make real accesses cache-inhibited
+			sync									; Sync up
+			mtspr	hid4,r2							; Make real accesses cache-inhibited
+			isync									; Toss prefetches
+			
+			lis		r7,0xE000						; Get the unlikeliest ESID possible
+			srdi	r7,r7,1							; Make 0x7FFFFFFFF0000000
+			slbie	r7								; Make sure the ERAT is cleared 
+			
+			sync
+			isync
+
+			eieio									; Make sure of all previous accesses
+			
+			lwz		r11,0(r3)						; Get it and maybe machine check here
+			
+			eieio									; Make sure of ordering again
+			sync									; Get caught up yet again
+			isync									; Do not go further till we are here
+
+			sync									; Sync up
+			mtspr	hid4,r8							; Make real accesses not cache-inhibited
+			isync									; Toss prefetches
+
+			lis		r7,0xE000						; Get the unlikeliest ESID possible
+			srdi	r7,r7,1							; Make 0x7FFFFFFFF0000000
+			slbie	r7								; Make sure the ERAT is cleared 
+
+			mtmsrd	r12								; Restore entry MSR
+			isync
+			
+			stw		r11,0(r4)						; Pass back the result
+			li		r3,1							; Indicate success
+			blr										; Leave...
+
+mprFail:	li		r3,0							; Set failure
+			blr										; Leave...
+
+;			Force a line boundry here. This means we will be able to check addresses better
+			.align	6
+			.globl	EXT(ml_probe_read_mck_64)
+LEXT(ml_probe_read_mck_64)
+
+
+/* Read physical address byte
  *
  *	unsigned int ml_phys_read_byte(vm_offset_t paddr)
+ *	unsigned int ml_phys_read_byte_64(addr64_t paddr)
  *
  *	Read the byte at physical address paddr. Memory should not be cache inhibited.
  */
 
 ;			Force a line boundry here
+
 			.align	5
+			.globl	EXT(ml_phys_read_byte_64)
+
+LEXT(ml_phys_read_byte_64)
+
+			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
+			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits
+            b		ml_phys_read_byte_join			
+
 			.globl	EXT(ml_phys_read_byte)
 
 LEXT(ml_phys_read_byte)
+            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
+ml_phys_read_byte_join:								; r3 = address to read (reg64_t)
+			mflr	r11								; Save the return
+			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
+			
+			lbz		r3,0(r3)						; Get the byte
+			b		rdwrpost						; Clean up and leave...
 
-			mfmsr	r10								; Save the current MSR
-			rlwinm	r4,r0,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Clear interruptions
-			rlwinm	r4,r4,0,MSR_DR_BIT+1,MSR_DR_BIT-1	; Clear translation	
 
-			mtmsr	r4								; Translation and all off
-			isync									; Toss prefetch
+/* Read physical address half word
+ *
+ *	unsigned int ml_phys_read_half(vm_offset_t paddr)
+ *	unsigned int ml_phys_read_half_64(addr64_t paddr)
+ *
+ *	Read the half word at physical address paddr. Memory should not be cache inhibited.
+ */
 
-			lbz		r3,0(r3)						; Get the byte
-			sync
+;			Force a line boundry here
+
+			.align	5
+			.globl	EXT(ml_phys_read_half_64)
+
+LEXT(ml_phys_read_half_64)
+
+			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
+			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits
+            b		ml_phys_read_half_join		
+
+			.globl	EXT(ml_phys_read_half)
+
+LEXT(ml_phys_read_half)
+            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
+ml_phys_read_half_join:								; r3 = address to read (reg64_t)
+			mflr	r11								; Save the return
+			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
+			
+			lhz		r3,0(r3)						; Get the half word
+			b		rdwrpost						; Clean up and leave...
 
-			mtmsr	r10								; Restore translation and rupts
-			isync
-			blr
 
-/* Read physical address
+/* Read physical address word
  *
  *	unsigned int ml_phys_read(vm_offset_t paddr)
+ *	unsigned int ml_phys_read_64(addr64_t paddr)
+ *	unsigned int ml_phys_read_word(vm_offset_t paddr)
+ *	unsigned int ml_phys_read_word_64(addr64_t paddr)
  *
  *	Read the word at physical address paddr. Memory should not be cache inhibited.
  */
 
 ;			Force a line boundry here
+
 			.align	5
+			.globl	EXT(ml_phys_read_64)
+			.globl	EXT(ml_phys_read_word_64)
+
+LEXT(ml_phys_read_64)
+LEXT(ml_phys_read_word_64)
+
+			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
+			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits
+            b		ml_phys_read_word_join		
+
 			.globl	EXT(ml_phys_read)
+			.globl	EXT(ml_phys_read_word)
 
 LEXT(ml_phys_read)
+LEXT(ml_phys_read_word)
+            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
+ml_phys_read_word_join:								; r3 = address to read (reg64_t)
+			mflr	r11								; Save the return
+			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
+			
+			lwz		r3,0(r3)						; Get the word
+			b		rdwrpost						; Clean up and leave...
 
-			mfmsr	r0								; Save the current MSR
-			rlwinm	r4,r0,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Clear interruptions
-			rlwinm	r4,r4,0,MSR_DR_BIT+1,MSR_DR_BIT-1	; Clear translation	
 
-			mtmsr	r4								; Translation and all off
-			isync									; Toss prefetch
+/* Read physical address double word
+ *
+ *	unsigned long long ml_phys_read_double(vm_offset_t paddr)
+ *	unsigned long long ml_phys_read_double_64(addr64_t paddr)
+ *
+ *	Read the double word at physical address paddr. Memory should not be cache inhibited.
+ */
+
+;			Force a line boundry here
+
+			.align	5
+			.globl	EXT(ml_phys_read_double_64)
+
+LEXT(ml_phys_read_double_64)
+
+			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
+			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits			
+            b		ml_phys_read_double_join		
+
+			.globl	EXT(ml_phys_read_double)
+
+LEXT(ml_phys_read_double)
+            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
+ml_phys_read_double_join:							; r3 = address to read (reg64_t)
+			mflr	r11								; Save the return
+			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
 			
-			lwz		r3,0(r3)						; Get the word
-			sync
+			lwz		r4,4(r3)						; Get the low word
+			lwz		r3,0(r3)						; Get the high word
+			b		rdwrpost						; Clean up and leave...
 
-			mtmsr	r0								; Restore translation and rupts
-			isync
-			blr
 
 /* Write physical address byte
  *
  *	void ml_phys_write_byte(vm_offset_t paddr, unsigned int data)
+ *	void ml_phys_write_byte_64(addr64_t paddr, unsigned int data)
  *
  *	Write the byte at physical address paddr. Memory should not be cache inhibited.
  */
 
-;			Force a line boundry here
 			.align	5
+			.globl	EXT(ml_phys_write_byte_64)
+
+LEXT(ml_phys_write_byte_64)
+
+			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
+			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits			
+			mr		r4,r5							; Copy over the data
+            b		ml_phys_write_byte_join
+
 			.globl	EXT(ml_phys_write_byte)
 
 LEXT(ml_phys_write_byte)
+            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
+ml_phys_write_byte_join:							; r3 = address to write (reg64_t), r4 = data
+			mflr	r11								; Save the return
+			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
+			
+			stb		r4,0(r3)						; Set the byte
+			b		rdwrpost						; Clean up and leave...
 
-			mfmsr	r0								; Save the current MSR
-			rlwinm	r5,r0,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Clear interruptions
-			rlwinm	r5,r5,0,MSR_DR_BIT+1,MSR_DR_BIT-1	; Clear translation	
 
-			mtmsr	r5								; Translation and all off
-			isync									; Toss prefetch
+/* Write physical address half word
+ *
+ *	void ml_phys_write_half(vm_offset_t paddr, unsigned int data)
+ *	void ml_phys_write_half_64(addr64_t paddr, unsigned int data)
+ *
+ *	Write the half word at physical address paddr. Memory should not be cache inhibited.
+ */
+
+			.align	5
+			.globl	EXT(ml_phys_write_half_64)
+
+LEXT(ml_phys_write_half_64)
+
+			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
+			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits			
+			mr		r4,r5							; Copy over the data
+            b		ml_phys_write_half_join
+
+			.globl	EXT(ml_phys_write_half)
+
+LEXT(ml_phys_write_half)
+            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
+ml_phys_write_half_join:							; r3 = address to write (reg64_t), r4 = data
+			mflr	r11								; Save the return
+			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
 			
-			stb		r4,0(r3)						; Set the byte
-			sync
+			sth		r4,0(r3)						; Set the half word
+			b		rdwrpost						; Clean up and leave...
 
-			mtmsr	r0								; Restore translation and rupts
-			isync
-			blr
 
-/* Write physical address
+/* Write physical address word
  *
  *	void ml_phys_write(vm_offset_t paddr, unsigned int data)
+ *	void ml_phys_write_64(addr64_t paddr, unsigned int data)
+ *	void ml_phys_write_word(vm_offset_t paddr, unsigned int data)
+ *	void ml_phys_write_word_64(addr64_t paddr, unsigned int data)
  *
  *	Write the word at physical address paddr. Memory should not be cache inhibited.
  */
 
-;			Force a line boundry here
 			.align	5
+			.globl	EXT(ml_phys_write_64)
+			.globl	EXT(ml_phys_write_word_64)
+
+LEXT(ml_phys_write_64)
+LEXT(ml_phys_write_word_64)
+
+			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
+			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits			
+			mr		r4,r5							; Copy over the data
+            b		ml_phys_write_word_join
+
 			.globl	EXT(ml_phys_write)
+			.globl	EXT(ml_phys_write_word)
 
 LEXT(ml_phys_write)
+LEXT(ml_phys_write_word)
+            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
+ml_phys_write_word_join:							; r3 = address to write (reg64_t), r4 = data
+			mflr	r11								; Save the return
+			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
+			
+			stw		r4,0(r3)						; Set the word
+			b		rdwrpost						; Clean up and leave...
 
-			mfmsr	r0								; Save the current MSR
-			rlwinm	r5,r0,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Clear interruptions
-			rlwinm	r5,r5,0,MSR_DR_BIT+1,MSR_DR_BIT-1	; Clear translation	
 
-			mtmsr	r5								; Translation and all off
-			isync									; Toss prefetch
+/* Write physical address double word
+ *
+ *	void ml_phys_write_double(vm_offset_t paddr, unsigned long long data)
+ *	void ml_phys_write_double_64(addr64_t paddr, unsigned long long data)
+ *
+ *	Write the double word at physical address paddr. Memory should not be cache inhibited.
+ */
+
+			.align	5
+			.globl	EXT(ml_phys_write_double_64)
+
+LEXT(ml_phys_write_double_64)
+
+			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
+			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits			
+			mr		r4,r5							; Copy over the high data
+			mr		r5,r6							; Copy over the low data
+            b		ml_phys_write_double_join
+
+			.globl	EXT(ml_phys_write_double)
+
+LEXT(ml_phys_write_double)
+            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
+ml_phys_write_double_join:							; r3 = address to write (reg64_t), r4,r5 = data (long long)
+			mflr	r11								; Save the return
+			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
+			
+			stw		r4,0(r3)						; Set the high word
+			stw		r5,4(r3)						; Set the low word
+			b		rdwrpost						; Clean up and leave...
+
+
+			.align	5
+
+rdwrpre:	mfsprg	r12,2							; Get feature flags 
+			lis		r8,hi16(MASK(MSR_VEC))			; Get the vector flag
+			mfmsr	r10								; Save the MSR 
+			ori		r8,r8,lo16(MASK(MSR_FP))		; Add the FP flag
+			mtcrf	0x02,r12						; move pf64Bit
+			andc	r10,r10,r8						; Clear VEC and FP
+			ori		r9,r8,lo16(MASK(MSR_EE)|MASK(MSR_IR)|MASK(MSR_DR))		; Drop EE, DR, and IR
+			li		r2,1							; Prepare for 64 bit
+			andc	r9,r10,r9						; Clear VEC, FP, DR, and EE
+			bf--	pf64Bitb,rdwrpre32				; Join 32-bit code...
+			
+			srdi	r7,r3,31						; Get a 1 if address is in I/O memory
+			rldimi	r9,r2,63,MSR_SF_BIT				; set SF bit (bit 0)
+			cmpldi	cr7,r7,1						; Is source in I/O memory?
+			mtmsrd	r9								; set 64-bit mode, turn off EE, DR, and IR
+			isync									; synchronize
+
+			sldi	r0,r2,32+8						; Get the right bit to turn off caching
+			
+			bnelr++	cr7								; We are not in the I/O area, all ready...
+			
+			mfspr	r8,hid4							; Get HID4
+			or		r2,r8,r0						; Set bit to make real accesses cache-inhibited
+			sync									; Sync up
+			mtspr	hid4,r2							; Make real accesses cache-inhibited
+			isync									; Toss prefetches
+			
+			lis		r7,0xE000						; Get the unlikeliest ESID possible
+			srdi	r7,r7,1							; Make 0x7FFFFFFFF0000000
+			slbie	r7								; Make sure the ERAT is cleared 
 			
-			stw		r4,0(r3)						; Set the word
 			sync
+			isync
+			blr										; Finally,  all ready...
+	
+			.align	5
+			
+rdwrpre32:	rlwimi	r9,r10,0,MSR_IR_BIT,MSR_IR_BIT	; Leave the IR bit unchanged
+			mtmsr	r9								; Drop EE, DR, and leave IR unchanged
+			isync
+			blr										; All set up, leave...
+			
+			.align	5
+			
+rdwrpost:	mtlr	r11								; Restore the return
+			bt++	pf64Bitb,rdwrpost64				; Join 64-bit code...
+			
+			mtmsr	r10								; Restore entry MSR (sans FP and VEC)
+			isync
+			blr										; Leave...
+			
+rdwrpost64:	bne++	cr7,rdwrpcok					; Skip enabling real mode caching if we did not change it...
+
+			sync									; Sync up
+			mtspr	hid4,r8							; Make real accesses not cache-inhibited
+			isync									; Toss prefetches
 
-			mtmsr	r0								; Restore translation and rupts
+			lis		r7,0xE000						; Get the unlikeliest ESID possible
+			srdi	r7,r7,1							; Make 0x7FFFFFFFF0000000
+			slbie	r7								; Make sure the ERAT is cleared 
+
+rdwrpcok:	mtmsrd	r10								; Restore entry MSR (sans FP and VEC)
 			isync
-			blr
+			blr										; Leave...
 
 
 /* set interrupts enabled or disabled
@@ -272,37 +698,41 @@ LEXT(ml_phys_write)
  
 LEXT(ml_set_interrupts_enabled)
 
-			mfsprg	r7,0
-			lwz		r4,PP_INTS_ENABLED(r7)
-			mr.		r4,r4
-			beq-	EXT(fake_set_interrupts_enabled)
+			andi.   r4,r3,1							; Are we turning interruptions on?
+			lis		r0,hi16(MASK(MSR_VEC))			; Get vector enable
 			mfmsr	r5								; Get the current MSR
-			mr		r4,r3							; Save the old value
+			ori		r0,r0,lo16(MASK(MSR_EE)|MASK(MSR_FP))	; Get float enable and EE enable
 			rlwinm	r3,r5,17,31,31					; Set return value
-			rlwimi	r5,r4,15,16,16					; Insert new EE bit
-			andi.   r8,r5,lo16(MASK(MSR_EE))			; Interruptions
-			bne     CheckPreemption
-NoPreemption:
-			mtmsr   r5                              ; Slam enablement
+			andc	r5,r5,r0						; Force VEC and FP off
+			bne	    CheckPreemption					; Interrupts going on, check ASTs...
+
+			mtmsr   r5                              ; Slam diable (always going disabled here)
+			isync									; Need this because FP/Vec might go off
 			blr
 
+			.align	5
+
 CheckPreemption:
-			lwz		r8,PP_NEED_AST(r7)
-			lwz		r7,PP_CPU_DATA(r7)
-			li		r6,AST_URGENT
-			lwz		r8,0(r8)
-			lwz		r7,CPU_PREEMPTION_LEVEL(r7)
-			lis		r0,HIGH_ADDR(DoPreemptCall)
-			and.	r8,r8,r6
-			ori		r0,r0,LOW_ADDR(DoPreemptCall)   
-			beq+		NoPreemption
-			cmpi	cr0, r7, 0
-			bne+		NoPreemption
-			sc
-			mtmsr	r5
+			mfsprg	r7,0
+			ori		r5,r5,lo16(MASK(MSR_EE))		; Turn on the enable
+			lwz		r8,PP_NEED_AST(r7)				; Get pointer to AST flags
+			mfsprg	r9,1							; Get current activation
+			li		r6,AST_URGENT					; Get the type we will preempt for 
+			lwz		r7,ACT_PREEMPT_CNT(r9)			; Get preemption count
+			lwz		r8,0(r8)						; Get AST flags
+			lis		r0,hi16(DoPreemptCall)			; High part of Preempt FW call
+			cmpwi	cr1,r7,0						; Are preemptions masked off?
+			and.	r8,r8,r6						; Are we urgent?
+			crorc	cr1_eq,cr0_eq,cr1_eq			; Remember if preemptions are masked or not urgent
+			ori		r0,r0,lo16(DoPreemptCall)   	; Bottome of FW call
+
+			mtmsr	r5								; Restore the MSR now, before we can preempt
+			isync									; Need this because FP/Vec might go off
+
+			beqlr++	cr1								; Return if no premption...
+			sc										; Preempt
 			blr
 
-
 /*  Emulate a decremeter exception
  *
  *	void machine_clock_assist(void)
@@ -318,8 +748,8 @@ LEXT(machine_clock_assist)
 			mfsprg	r7,0
 			lwz		r4,PP_INTS_ENABLED(r7)
 			mr.		r4,r4
-			beq-	EXT(CreateFakeDEC)
-			blr
+			bnelr+	cr0
+			b	EXT(CreateFakeDEC)
 
 /*  Set machine into idle power-saving mode. 
  *
@@ -331,20 +761,24 @@ LEXT(machine_clock_assist)
  *
  */
 
-
 ;			Force a line boundry here
 			.align	5
 			.globl	EXT(machine_idle_ppc)
 
 LEXT(machine_idle_ppc)
 
-			mfmsr	r3								; Get the current MSR
-			rlwinm	r5,r3,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Turn off interruptions
+			lis		r0,hi16(MASK(MSR_VEC))			; Get the vector flag
+			mfmsr	r3								; Save the MSR 
+			ori		r0,r0,lo16(MASK(MSR_FP))		; Add the FP flag
+			andc	r3,r3,r0						; Clear VEC and FP
+			ori		r0,r0,lo16(MASK(MSR_EE))		; Drop EE also
+			andc	r5,r3,r0						; Clear VEC, FP, DR, and EE
+
 			mtmsr	r5								; Hold up interruptions for now
+			isync									; May have messed with fp/vec
 			mfsprg	r12,0							; Get the per_proc_info
-			mfspr	r6,hid0							; Get the current power-saving mode
 			mfsprg	r11,2							; Get CPU specific features
-			rlwinm	r6,r6,0,sleep+1,doze-1			; Clear all possible power-saving modes (not DPM though)	
+			mfspr	r6,hid0							; Get the current power-saving mode
 			mtcrf	0xC7,r11						; Get the facility flags
 
 			lis		r4,hi16(napm)					; Assume we can nap
@@ -365,6 +799,24 @@ yesnap:		mftbu	r9								; Get the upper timebase
 			stw		r8,napStamp(r12)				; Set high order time stamp
 			stw		r7,napStamp+4(r12)				; Set low order nap stamp
 
+			rlwinm.	r7,r11,0,pfNoL2PFNapb,pfNoL2PFNapb	; Turn off L2 Prefetch before nap?
+			beq	miL2PFok
+
+			mfspr	r7,msscr0						; Get currect MSSCR0 value
+			rlwinm	r7,r7,0,0,l2pfes-1				; Disable L2 Prefetch
+			mtspr	msscr0,r7						; Updates MSSCR0 value
+			sync
+			isync
+
+miL2PFok:
+			rlwinm.	r7,r11,0,pfSlowNapb,pfSlowNapb	; Should nap at slow speed?
+			beq	minoslownap
+
+			mfspr	r7,hid1							; Get current HID1 value
+			oris	r7,r7,hi16(hid1psm)				; Select PLL1
+			mtspr	hid1,r7							; Update HID1 value
+
+minoslownap:
 
 ;
 ;			We have to open up interruptions here because book 4 says that we should
@@ -373,20 +825,68 @@ yesnap:		mftbu	r9								; Get the upper timebase
 ;			is taken and set everything up to return directly to machine_idle_ret.
 ;			So, make sure everything we need there is already set up...
 ;
+
+			lis		r10,hi16(dozem|napm|sleepm)		; Mask of power management bits
+		
+			bf--	pf64Bitb,mipNSF1				; skip if 32-bit...
+			
+			sldi	r4,r4,32						; Position the flags
+			sldi	r10,r10,32						; Position the masks
+
+		
+mipNSF1:	andc	r6,r6,r10						; Clean up the old power bits		
+
 			ori		r7,r5,lo16(MASK(MSR_EE))		; Flip on EE
 			or		r6,r6,r4						; Set nap or doze
 			oris	r5,r7,hi16(MASK(MSR_POW))		; Turn on power management in next MSR
+			
+			sync
 			mtspr	hid0,r6							; Set up the HID for nap/doze
+			mfspr	r6,hid0							; Yes, this is silly, keep it here
+			mfspr	r6,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r6,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r6,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r6,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r6,hid0							; Yes, this is a duplicate, keep it here
 			isync									; Make sure it is set
+		
 			mtmsr	r7								; Enable for interrupts
 			rlwinm.	r11,r11,0,pfAltivecb,pfAltivecb	; Do we have altivec?
 			beq-	minovec							; No...
 			dssall									; Stop the streams before we nap/doze
 
-minovec:	sync									; Make sure queues are clear			
-			mtmsr	r5								; Nap or doze
+minovec:
+			bf--	pf64Bitb,mipowloop				; skip if 32-bit...
+			
+			li		r3,0x10							; Fancy nap threashold is 0x10 ticks
+			mftb	r8								; Get the low half of the time base
+			mfdec	r4								; Get the decrementer ticks
+			cmplw	r4,r3							; Less than threashold?
+			blt		mipowloop
+			
+			mtdec	r3								; Load decrimenter with threshold
+			isync									; and make sure,
+			mfdec	r3								; really sure, it gets there
+			
+			rlwinm	r6,r5,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Clear out the EE bit
+			sync									; Make sure queues are clear
+			mtmsr	r6								; Set MSR with EE off but POW on
 			isync									; Make sure this takes before we proceed
-			b		minovec							; loop if POW does not take
+			
+			mftb	r9								; Get the low half of the time base
+			sub		r9,r9,r8						; Get the number of ticks spent waiting
+			sub		r4,r4,r9						; Adjust the decrementer value
+			
+			mtdec	r4								; Load decrimenter with the rest of the timeout
+			isync									; and make sure,
+			mfdec	r4								; really sure, it gets there
+			
+mipowloop:
+			sync									; Make sure queues are clear
+			mtmsr	r5								; Nap or doze, MSR with POW and EE set
+			isync									; Make sure this takes before we proceed
+			b		mipowloop						; loop if POW does not take
+
 ;
 ;			Note that the interrupt handler will turn off the nap/doze bits in the hid.
 ;			Also remember that the interrupt handler will force return to here whenever
@@ -428,12 +928,6 @@ LEXT(ml_ppc_sleep)
 			mtmsr	r5								; No talking
 			isync
 			
-;			No interrupts allowed after we get the savearea
-
-			mfsprg	r6,0							; Get the per_proc
-			mfsprg	r7,1							; Get the pending savearea
-			stw		r7,savedSave(r6)				; Save the savearea for when we wake up
-
 deadsleep:	addi	r3,r3,1							; Make analyzer happy
 			addi	r3,r3,1
 			addi	r3,r3,1
@@ -444,16 +938,41 @@ deadsleep:	addi	r3,r3,1							; Make analyzer happy
 			mfspr	r4,hid0							; Get the current power-saving mode
 			eqv		r10,r10,r10						; Get all foxes
 			mfsprg	r11,2							; Get CPU specific features
+
+			rlwinm.	r5,r11,0,pfNoL2PFNapb,pfNoL2PFNapb	; Turn off L2 Prefetch before sleep?
+			beq	mpsL2PFok
+
+			mfspr	r5,msscr0						; Get currect MSSCR0 value
+			rlwinm	r5,r5,0,0,l2pfes-1				; Disable L2 Prefetch
+			mtspr	msscr0,r5						; Updates MSSCR0 value
+			sync
+			isync
+
+mpsL2PFok:
+			rlwinm.	r5,r11,0,pf64Bitb,pf64Bitb		; PM bits are shifted on 64bit systems.
+			bne		mpsPF64bit
+
+			rlwinm	r4,r4,0,sleep+1,doze-1			; Clear all possible power-saving modes (not DPM though)
+			oris	r4,r4,hi16(sleepm)				; Set sleep
+			b		mpsClearDEC
+
+mpsPF64bit:
+			lis		r5, hi16(dozem|napm|sleepm)		; Clear all possible power-saving modes (not DPM though)
+			sldi	r5, r5, 32
+			andc	r4, r4, r5
+			lis		r5, hi16(napm)					; Set sleep
+			sldi	r5, r5, 32
+			or		r4, r4, r5
+
+mpsClearDEC:
 			mfmsr	r5								; Get the current MSR
 			rlwinm	r10,r10,0,1,31					; Make 0x7FFFFFFF
-			rlwinm	r4,r4,0,sleep+1,doze-1			; Clear all possible power-saving modes (not DPM though)	
 			mtdec	r10								; Load decrimenter with 0x7FFFFFFF
 			isync									; and make sure,
 			mfdec	r9								; really sure, it gets there
 			
 			mtcrf	0x07,r11						; Get the cache flags, etc
 
-			oris	r4,r4,hi16(sleepm)				; Set sleep
 			rlwinm	r5,r5,0,MSR_DR_BIT+1,MSR_IR_BIT-1	; Turn off translation		
 ;
 ;			Note that we need translation off before we set the HID to sleep.  Otherwise
@@ -474,6 +993,12 @@ mpsNoMSRx:
 			ori		r3,r5,lo16(MASK(MSR_EE))		; Flip on EE
 			sync
 			mtspr	hid0,r4							; Set up the HID to sleep
+			mfspr	r4,hid0							; Yes, this is silly, keep it here
+			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
 
 			mtmsr	r3								; Enable for interrupts to drain decrimenter
 				
@@ -484,9 +1009,6 @@ mpsNoMSRx:
 			mtmsr	r5								; Interruptions back off
 			isync									; Toss prefetch
 
-			mfsprg	r7,1							; Get the pending savearea
-			stw		r7,savedSave(r12)				; Save the savearea for when we wake up
-			
 ;
 ;			We are here with translation off, interrupts off, all possible
 ;			interruptions drained off, and a decrimenter that will not pop.
@@ -498,10 +1020,30 @@ mpsNoMSRx:
 			mfmsr	r5								; Get the current MSR
 			oris	r5,r5,hi16(MASK(MSR_POW))		; Turn on power management in next MSR
 													; Leave EE off because power goes off shortly
-
-slSleepNow:	sync									; Sync it all up
+			mfsprg	r12,0							; Get the per_proc_info
+			li		r10,PP_CPU_FLAGS
+			lhz		r11,PP_CPU_FLAGS(r12)			; Get the flags
+			ori		r11,r11,SleepState				; Marked SleepState
+			sth		r11,PP_CPU_FLAGS(r12)			; Set the flags
+			dcbf	r10,r12
+			
+			mfsprg	r11,2							; Get CPU specific features
+			rlwinm.	r0,r11,0,pf64Bitb,pf64Bitb		; Test for 64 bit processor
+			eqv		r4,r4,r4						; Get all foxes
+			rlwinm	r4,r4,0,1,31					; Make 0x7FFFFFFF
+			beq		slSleepNow						; skip if 32-bit...
+			li		r3, 0x4000						; Cause decrimenter to roll over soon
+			mtdec	r3								; Load decrimenter with 0x00004000
+			isync									; and make sure,
+			mfdec	r3								; really sure, it gets there
+			
+slSleepNow:
+			sync									; Sync it all up
 			mtmsr	r5								; Do sleep with interruptions enabled
 			isync									; Take a pill
+			mtdec	r4								; Load decrimenter with 0x7FFFFFFF
+			isync									; and make sure,
+			mfdec	r3								; really sure, it gets there
 			b		slSleepNow						; Go back to sleep if we wake up...
 			
 
@@ -531,12 +1073,25 @@ LEXT(cacheInit)
 			
 			mfsprg	r11,2							; Get CPU specific features
 			mfmsr	r7								; Get the current MSR
-			rlwinm	r4,r9,0,dpm+1,doze-1			; Clear all possible power-saving modes (also disable DPM)	
+			rlwinm	r7,r7,0,MSR_FP_BIT+1,MSR_FP_BIT-1	; Force floating point off
+			rlwinm	r7,r7,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1	; Force vectors off
 			rlwimi	r11,r11,pfLClckb+1,31,31		; Move pfLClck to another position (to keep from using non-volatile CRs)
 			rlwinm	r5,r7,0,MSR_DR_BIT+1,MSR_IR_BIT-1	; Turn off translation		
 			rlwinm	r5,r5,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Turn off interruptions
 			mtcrf	0x87,r11						; Get the feature flags
+			lis		r10,hi16(dozem|napm|sleepm|dpmm)	; Mask of power management bits
+			bf--	pf64Bitb,cIniNSF1				; Skip if 32-bit...
+			
+			sldi	r10,r10,32						; Position the masks
+
+cIniNSF1:	andc	r4,r9,r10						; Clean up the old power bits		
 			mtspr	hid0,r4							; Set up the HID
+			mfspr	r4,hid0							; Yes, this is silly, keep it here
+			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
 
 			bt		pfNoMSRirb,ciNoMSR				; No MSR...
 
@@ -555,11 +1110,12 @@ ciNoMSRx:
 			dssall									; Stop streams
 			sync
 
-cinoDSS:	lis		r5,hi16(EXT(tlb_system_lock))	; Get the TLBIE lock
+cinoDSS:	li		r5,tlbieLock					; Get the TLBIE lock
 			li		r0,128							; Get number of TLB entries
-			ori		r5,r5,lo16(EXT(tlb_system_lock))	; Grab up the bottom part
 			
 			li		r6,0							; Start at 0
+			bf--	pf64Bitb,citlbhang				; Skip if 32-bit...
+			li		r0,1024							; Get the number of TLB entries
 
 citlbhang:	lwarx	r2,0,r5							; Get the TLBIE lock
 			mr.		r2,r2							; Is it locked?
@@ -583,14 +1139,19 @@ cipurgeTLB:	tlbie	r6								; Purge this entry
 			sync
 			isync
 			
-cinoSMP:	stw		r2,0(r5)						; Unlock TLBIE lock
+			bf--	pf64Bitb,cinoSMP				; Skip if 32-bit...
+			ptesync									; Wait for quiet again
+			sync
+			
+cinoSMP:	stw		r2,tlbieLock(0)					; Unlock TLBIE lock
+
+			bt++	pf64Bitb,cin64					; Skip if 64-bit...
 
-			cror	cr0_eq,pfL1ib,pfL1db			; Check for either I- or D-cache
-			bf-		cr0_eq,cinoL1					; No level 1 to flush...
 			rlwinm.	r0,r9,0,ice,dce					; Were either of the level 1s on?
 			beq-	cinoL1							; No, no need to flush...
 			
-			bf		pfL1fab,ciswdl1					; If no hw flush assist, go do by software...
+            rlwinm.	r0,r11,0,pfL1fab,pfL1fab		; do we have L1 flush assist?
+			beq		ciswdl1							; If no hw flush assist, go do by software...
 			
 			mfspr	r8,msscr0						; Get the memory system control register
 			oris	r8,r8,hi16(dl1hwfm)				; Turn on the hardware flush request
@@ -714,7 +1275,9 @@ cinoL1:
 ;
 ;			Flush and disable the level 2
 ;
-			bf		pfL2b,cinol2					; No level 2 cache to flush
+            mfsprg	r10,2							; need to check 2 features we did not put in CR
+            rlwinm.	r0,r10,0,pfL2b,pfL2b			; do we have L2?
+			beq		cinol2							; No level 2 cache to flush
 
 			mfspr	r8,l2cr							; Get the L2CR
 			lwz		r3,pfl2cr(r12)					; Get the L2CR value
@@ -729,7 +1292,8 @@ cinoL1:
 			bne-	ciinvdl2						; Yes, just invalidate and get PLL synced...		
 			
 ciflushl2:
-			bf		pfL2fab,ciswfl2					; Flush not in hardware...
+            rlwinm.	r0,r10,0,pfL2fab,pfL2fab		; hardware-assisted L2 flush?
+			beq		ciswfl2							; Flush not in hardware...
 			
 			mr		r10,r8							; Take a copy now
 			
@@ -780,7 +1344,7 @@ ciswfldl2a:	lwz		r0,0(r10)						; Load something to flush something
 			addi	r10,r10,32						; Next line
 			bdnz	ciswfldl2a						; Do the lot...
 			
-ciinvdl2:	rlwinm	r8,r3,0,l2e+1,31				; Use the saved L2CR and clear the enable bit
+ciinvdl2:	rlwinm	r8,r3,0,l2e+1,31				; Clear the enable bit
 			b		cinla							; Branch to next line...
 
 			.align  5
@@ -807,7 +1371,9 @@ ciinvl2:	sync
 			sync
 			isync
 ciinvdl2a:	mfspr	r2,l2cr							; Get the L2CR
-			bf		pfL2ib,ciinvdl2b				; Flush not in hardware...
+            mfsprg	r0,2							; need to check a feature in "non-volatile" set
+            rlwinm.	r0,r0,0,pfL2ib,pfL2ib			; flush in HW?
+			beq		ciinvdl2b						; Flush not in hardware...
 			rlwinm.	r2,r2,0,l2i,l2i					; Is the invalidate still going?
 			bne+	ciinvdl2a						; Assume so, this will take a looong time...
 			sync
@@ -859,7 +1425,7 @@ cihwfl3:	mfspr	r10,l3cr						; Get back the L3CR
 			rlwinm.	r10,r10,0,l3hwf,l3hwf			; Is the flush over?
 			bne+	cihwfl3							; Nope, keep going...
 
-ciinvdl3:	rlwinm	r8,r3,0,l3e+1,31				; Use saved L3CR value and clear the enable bit
+ciinvdl3:	rlwinm	r8,r3,0,l3e+1,31				; Clear the enable bit
 			sync									; Make sure of life, liberty, and justice
 			mtspr	l3cr,r8							; Disable L3
 			sync
@@ -909,11 +1475,13 @@ ciinvdl3c:	addi	r2,r2,-1						; ?
 			mtspr	l3cr,r3							; Enable it as desired
 			sync
 cinol3:
-			bf		pfL2b,cinol2a					; No level 2 cache to enable
+            mfsprg	r0,2							; need to check a feature in "non-volatile" set
+            rlwinm.	r0,r0,0,pfL2b,pfL2b				; is there an L2 cache?
+			beq		cinol2a							; No level 2 cache to enable
 
 			lwz		r3,pfl2cr(r12)					; Get the L2CR value
 			cmplwi		r3, 0						; Should the L2 be all the way off?
-			beq		cinol2a						: Yes, done with L2
+			beq		cinol2a							: Yes, done with L2
 			mtspr	l2cr,r3							; Enable it as desired
 			sync
 
@@ -942,6 +1510,157 @@ cinoexit:	mtspr	hid0,r9							; Turn off the invalidate (needed for some older m
 			blr										; Return...
 
 
+;
+;			Handle 64-bit architecture
+;			This processor can not run without caches, so we just push everything out
+;			and flush.  It will be relativily clean afterwards
+;
+			
+			.align	5
+			
+cin64:		
+			mfspr	r10,hid1						; Save hid1
+			mfspr	r4,hid4							; Save hid4
+			mr		r12,r10							; Really save hid1
+			mr		r11,r4							; Get a working copy of hid4
+
+			li		r0,0							; Get a 0
+			eqv		r2,r2,r2						; Get all foxes
+			
+			rldimi	r10,r0,55,7						; Clear I$ prefetch bits (7:8)
+			
+			isync
+			mtspr	hid1,r10						; Stick it
+			mtspr	hid1,r10						; Stick it again
+			isync
+
+			rldimi	r11,r2,38,25					; Disable D$ prefetch (25:25)
+			
+			sync
+			mtspr	hid4,r11						; Stick it
+			isync
+
+			li		r3,8							; Set bit 28+32
+			sldi	r3,r3,32						; Make it bit 28
+			or		r3,r3,r11						; Turn on the flash invalidate L1D$
+			
+			oris	r5,r11,0x0600					; Set disable L1D$ bits		
+			sync
+			mtspr	hid4,r3							; Invalidate
+			isync
+	
+			mtspr	hid4,r5							; Un-invalidate and disable L1D$
+			isync
+			
+			lis		r8,GUSModeReg					; Get the GUS mode ring address
+			mfsprg	r0,2							; Get the feature flags
+			ori		r8,r8,0x8000					; Set to read data
+			rlwinm.	r0,r0,pfSCOMFixUpb+1,31,31		; Set shift if we need a fix me up
+
+			sync
+
+			mtspr	scomc,r8						; Request the GUS mode
+			mfspr	r11,scomd						; Get the GUS mode
+			mfspr	r8,scomc						; Get back the status (we just ignore it)
+			sync
+			isync							
+
+			sld		r11,r11,r0						; Fix up if needed
+
+			ori		r6,r11,lo16(GUSMdmapen)			; Set the bit that means direct L2 cache address
+			lis		r8,GUSModeReg					; Get GUS mode register address
+				
+			sync
+
+			mtspr	scomd,r6						; Set that we want direct L2 mode
+			mtspr	scomc,r8						; Tell GUS we want direct L2 mode
+			mfspr	r3,scomc						; Get back the status
+			sync
+			isync							
+
+			li		r3,0							; Clear start point
+		
+cflushlp:	lis		r6,0x0040						; Pick 4MB line as our target
+			or		r6,r6,r3						; Put in the line offset
+			lwz		r5,0(r6)						; Load a line
+			addis	r6,r6,8							; Roll bit 42:44
+			lwz		r5,0(r6)						; Load a line
+			addis	r6,r6,8							; Roll bit 42:44
+			lwz		r5,0(r6)						; Load a line
+			addis	r6,r6,8							; Roll bit 42:44
+			lwz		r5,0(r6)						; Load a line
+			addis	r6,r6,8							; Roll bit 42:44
+			lwz		r5,0(r6)						; Load a line
+			addis	r6,r6,8							; Roll bit 42:44
+			lwz		r5,0(r6)						; Load a line
+			addis	r6,r6,8							; Roll bit 42:44
+			lwz		r5,0(r6)						; Load a line
+			addis	r6,r6,8							; Roll bit 42:44
+			lwz		r5,0(r6)						; Load a line
+
+			addi	r3,r3,128						; Next line
+			andis.	r5,r3,8							; Have we done enough?
+			beq++	cflushlp						; Not yet...
+			
+			sync
+
+			lis		r6,0x0040						; Pick 4MB line as our target
+
+cflushx:	dcbf	0,r6							; Flush line and invalidate
+			addi	r6,r6,128						; Next line
+			andis.	r5,r6,0x0080					; Have we done enough?
+			beq++	cflushx							; Keep on flushing...
+
+			mr		r3,r10							; Copy current hid1
+			rldimi	r3,r2,54,9						; Set force icbi match mode
+			
+			li		r6,0							; Set start if ICBI range
+			isync
+			mtspr	hid1,r3							; Stick it
+			mtspr	hid1,r3							; Stick it again
+			isync
+
+cflicbi:	icbi	0,r6							; Kill I$
+			addi	r6,r6,128						; Next line
+			andis.	r5,r6,1							; Have we done them all?
+			beq++	cflicbi							; Not yet...
+
+			lis		r8,GUSModeReg					; Get GUS mode register address
+				
+			sync
+
+			mtspr	scomd,r11						; Set that we do not want direct mode
+			mtspr	scomc,r8						; Tell GUS we do not want direct mode
+			mfspr	r3,scomc						; Get back the status
+			sync
+			isync							
+
+			isync
+			mtspr	hid0,r9							; Restore entry hid0
+			mfspr	r9,hid0							; Yes, this is silly, keep it here
+			mfspr	r9,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r9,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r9,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r9,hid0							; Yes, this is a duplicate, keep it here
+			mfspr	r9,hid0							; Yes, this is a duplicate, keep it here
+			isync
+
+			isync
+			mtspr	hid1,r12						; Restore entry hid1
+			mtspr	hid1,r12						; Stick it again
+			isync
+		
+			sync
+			mtspr	hid4,r4							; Restore entry hid4
+			isync
+
+			sync
+			mtmsr	r7								; Restore MSR to entry
+			isync
+			blr										; Return...
+			
+			
+
 /*  Disables all caches
  *
  *	void cacheDisable(void)
@@ -965,13 +1684,16 @@ LEXT(cacheDisable)
 			
 cdNoAlt:	sync
 			
+			btlr	pf64Bitb						; No way to disable a 64-bit machine...
+			
 			mfspr	r5,hid0							; Get the hid
 			rlwinm	r5,r5,0,dce+1,ice-1				; Clear the I- and D- cache enables
 			mtspr	hid0,r5							; Turn off dem caches
 			sync
 
-			bf		pfL2b,cdNoL2					; Skip if no L2...
-			
+            rlwinm.	r0,r11,0,pfL2b,pfL2b			; is there an L2?
+			beq		cdNoL2							; Skip if no L2...
+
 			mfspr	r5,l2cr							; Get the L2
 			rlwinm	r5,r5,0,l2e+1,31				; Turn off enable bit
 
@@ -990,7 +1712,7 @@ cinlbb:		sync									; Finish memory stuff
 			b		cinlcc							; Jump back up and turn off cache...
 
 cdNoL2:
-			
+
 			bf		pfL3b,cdNoL3					; Skip down if no L3...
 			
 			mfspr	r5,l3cr							; Get the L3
@@ -1073,8 +1795,11 @@ smallenuf:	rlwinm	r3,r3,31-thrmsitve,thrmsitvs,thrmsitve	; Position
 LEXT(ml_thrm_set)
 
 			mfmsr	r0								; Get the MSR
+			rlwinm	r0,r0,0,MSR_FP_BIT+1,MSR_FP_BIT-1	; Force floating point off
+			rlwinm	r0,r0,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1	; Force vectors off
 			rlwinm	r6,r0,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Clear EE bit
 			mtmsr	r6
+			isync
 
 			mfsprg	r12,0							; Get the per_proc blok
 
@@ -1113,8 +1838,10 @@ tsetcant:	mtmsr	r0								; Reenable interruptions
 LEXT(ml_read_temp)
 
 			mfmsr	r9								; Save the MSR
-			rlwinm	r8,r9,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Turn off interruptions
 			li		r5,15							; Starting point for ranging (start at 15 so we do not overflow)
+			rlwinm	r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1	; Force floating point off
+			rlwinm	r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1	; Force vectors off
+			rlwinm	r8,r9,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Turn off interruptions
 			mfsprg	r7,2							; Get CPU specific features
 			mtmsr	r8								; Do not allow interruptions
 			mtcrf	0x40,r7							; See if we can thermal this machine
@@ -1176,9 +1903,12 @@ thrmcant:	eqv		r3,r3,r3						; Return bogus temprature because we can not read i
 LEXT(ml_throttle)
 
 			mfmsr	r9								; Save the MSR
+			rlwinm	r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1	; Force floating point off
+			rlwinm	r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1	; Force vectors off
 			rlwinm	r8,r9,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Turn off interruptions
 			cmplwi	r3,lo16(ictcfim>>1)				; See if we are going too far					
-			mtmsr	r8								; Do not allow interruptions			
+			mtmsr	r8								; Do not allow interruptions	
+			isync		
 			ble+	throtok							; Throttle value is ok...
 			li		r3,lo16(ictcfim>>1)				; Set max
 
@@ -1207,16 +1937,84 @@ throtoff:	mfspr	r3,ictc							; Get the old throttle
 LEXT(ml_get_timebase)
 
 loop:
-        mftbu   r4
-        mftb    r5
-        mftbu   r6
-        cmpw    r6, r4
-        bne-    loop
+			mftbu   r4
+			mftb    r5
+			mftbu   r6
+			cmpw    r6, r4
+			bne-    loop
+			
+			stw     r4, 0(r3)
+			stw     r5, 4(r3)
+			
+			blr
+
+/*
+ *		unsigned int cpu_number(void)
+ *
+ *			Returns the current cpu number. 
+ */
 
-        stw     r4, 0(r3)
-        stw     r5, 4(r3)
+			.align	5
+			.globl	EXT(cpu_number)
 
-        blr
+LEXT(cpu_number)
+			mfsprg	r4,0							; Get per-proc block
+			lhz		r3,PP_CPU_NUMBER(r4)			; Get CPU number 
+			blr										; Return...
+
+
+/*
+ *		void set_machine_current_act(thread_act_t)
+ *
+ *			Set the current activation
+ */
+			.align	5
+			.globl	EXT(set_machine_current_act)
+
+LEXT(set_machine_current_act)
+
+			mtsprg	1,r3							; Set spr1 with the active thread
+			blr										; Return...
+
+/*
+ *		thread_t current_act(void)
+ *		thread_t current_thread(void)
+ *
+ *
+ *			Return the current thread for outside components.
+ */
+			.align	5
+			.globl	EXT(current_act)
+			.globl	EXT(current_thread)
+
+LEXT(current_act)
+LEXT(current_thread)
+
+			mfsprg	r3,1
+			blr
+
+			.align	5
+			.globl	EXT(clock_get_uptime)
+LEXT(clock_get_uptime)
+1:			mftbu	r9
+			mftb	r0
+			mftbu	r11
+			cmpw	r11,r9
+			bne-	1b
+			stw		r0,4(r3)
+			stw		r9,0(r3)
+			blr
+
+		
+			.align	5
+			.globl	EXT(mach_absolute_time)
+LEXT(mach_absolute_time)
+1:			mftbu	r3
+			mftb	r4
+			mftbu	r0
+			cmpw	r0,r3
+			bne-	1b  
+			blr
 
 /*
 **      ml_sense_nmi()
@@ -1239,4 +2037,69 @@ LEXT(ml_sense_nmi)
 			.globl	EXT(ml_set_processor_speed)
 
 LEXT(ml_set_processor_speed)
+			mfsprg	r5, 0									; Get the per_proc_info
+
+			lwz		r6, pfPowerModes(r5)					; Get the supported power modes
+
+			rlwinm.	r0, r6, 0, pmDualPLLb, pmDualPLLb		; Is DualPLL supported?
+			bne		spsDPLL
+
+			rlwinm.	r0, r6, 0, pmPowerTuneb, pmPowerTuneb	; Is DualPLL supported?
+			bne		spsPowerTune
+
+			b		spsDone									; No supported power modes
+
+spsDPLL:
+			cmpli	cr0, r3, 0								; Turn off BTIC before low speed
+			beq		spsDPLL1
+			mfspr	r4, hid0								; Get the current hid0 value
+			rlwinm	r4, r4, 0, btic+1, btic-1				; Clear the BTIC bit
+			sync
+			mtspr	hid0, r4								; Set the new hid0 value
+			isync
+			sync
+
+spsDPLL1:
+			mfspr	r4, hid1								; Get the current PLL settings
+			rlwimi  r4, r3, 31-hid1ps, hid1ps, hid1ps		; Copy the PLL Select bit
+			stw		r4, pfHID1(r5)							; Save the new hid1 value
+			mtspr	hid1, r4								; Select desired PLL
+
+			cmpli	cr0, r3, 0								; Restore BTIC after high speed
+			bne		spsDone
+			lwz		r4, pfHID0(r5)							; Load the hid0 value
+			sync
+			mtspr	hid0, r4								; Set the hid0 value
+			isync
+			sync
+			b		spsDone
+
+spsPowerTune:
+			b		spsDone
+
+spsDone:
+			blr
+
+/*
+**      ml_set_processor_voltage()
+**
+*/
+;			Force a line boundry here
+			.align	5
+			.globl	EXT(ml_set_processor_voltage)
+
+LEXT(ml_set_processor_voltage)
+			mfsprg	r5, 0									; Get the per_proc_info
+
+			lwz		r6, pfPowerModes(r5)					; Get the supported power modes
+
+			rlwinm.	r0, r6, 0, pmDPLLVminb, pmDPLLVminb		; Is DPLL Vmin supported
+			beq		spvDone
+
+			mfspr	r4, hid2								; Get HID2 value
+			rlwimi	r4, r3, 31-hid2vmin, hid2vmin, hid2vmin	; Insert the voltage mode bit
+			mtspr	hid2, r4								; Set the voltage mode
+			sync											; Make sure it is done
+
+spvDone:
 			blr