X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/765c9de3b4af7c2078d16a03812ae2c7c2b24938..c910b4d9d2451126ae3917b931cd4390c11e1d52:/osfmk/ppc/cache.s

diff --git a/osfmk/ppc/cache.s b/osfmk/ppc/cache.s
index 5494446aa..94aa0aeeb 100644
--- a/osfmk/ppc/cache.s
+++ b/osfmk/ppc/cache.s
@@ -1,102 +1,55 @@
 /*
  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
  * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
  * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
  * @OSF_COPYRIGHT@
  */
 
-#include <cpus.h>
-
 #include <ppc/asm.h>
 #include <ppc/proc_reg.h>
-#include <cpus.h>
+#include <ppc/exception.h>
 #include <assym.s>
-#include <mach_debug.h>
-#include <mach/ppc/vm_param.h>
 
-/*
- * extern void sync_cache(vm_offset_t pa, unsigned count);
- *
- * sync_cache takes a physical address and count to sync, thus
- * must not be called for multiple virtual pages.
- *
- * it writes out the data cache and invalidates the instruction
- * cache for the address range in question
+/* These routines run in 32 or 64-bit addressing, and handle
+ * 32 and 128 byte caches.  They do not use compare instructions
+ * on addresses, since compares are 32/64-bit-mode-specific.
  */
 
-ENTRY(sync_cache, TAG_NO_FRAME_USED)
-
-	/* Switch off data translations */
-	mfmsr	r6
-	rlwinm	r7,	r6,	0,	MSR_DR_BIT+1,	MSR_DR_BIT-1
-	mtmsr	r7
-	isync
-
-	/* Check to see if the address is aligned. */
-	add	r8, r3,r4
-	andi.	r8,r8,(CACHE_LINE_SIZE-1)
-	beq-	.L_sync_check
-	addi	r4,r4,CACHE_LINE_SIZE
-	li	r7,(CACHE_LINE_SIZE-1)	/* Align buffer & count - avoid overflow problems */
-	andc	r4,r4,r7
-	andc	r3,r3,r7
-
-.L_sync_check:
-	cmpwi	r4,	CACHE_LINE_SIZE
-	ble	.L_sync_one_line
-	
-	/* Make ctr hold count of how many times we should loop */
-	addi	r8,	r4,	(CACHE_LINE_SIZE-1)
-	srwi	r8,	r8,	CACHE_LINE_POW2
-	mtctr	r8
-
-	/* loop to flush the data cache */
-.L_sync_data_loop:
-	subic	r4,	r4,	CACHE_LINE_SIZE
-	dcbf	r3,	r4
-	bdnz	.L_sync_data_loop
-	
-	sync
-	mtctr	r8
-
-	/* loop to invalidate the instruction cache */
-.L_sync_inval_loop:
-	icbi	r3,	r4
-	addic	r4,	r4,	CACHE_LINE_SIZE
-	bdnz	.L_sync_inval_loop
-
-.L_sync_cache_done:
-	sync			/* Finish physical writes */
-	mtmsr	r6		/* Restore original translations */
-	isync			/* Ensure data translations are on */
-	blr
-
-.L_sync_one_line:
-	dcbf	0,r3
-	sync
-	icbi	0,r3
-	b	.L_sync_cache_done
+#define	kDcbf			0x1
+#define	kDcbfb			31
+#define	kDcbi			0x2
+#define	kDcbib			30
+#define	kIcbi			0x4
+#define	kIcbib			29
+
 
 /*
  * extern void flush_dcache(vm_offset_t addr, unsigned count, boolean phys);
+ * extern void flush_dcache64(addr64_t addr, unsigned count, boolean phys);
  *
  * flush_dcache takes a virtual or physical address and count to flush
  * and (can be called for multiple virtual pages).
@@ -107,166 +60,330 @@ ENTRY(sync_cache, TAG_NO_FRAME_USED)
  * if 'phys' is non-zero then physical addresses will be used
  */
 
-ENTRY(flush_dcache, TAG_NO_FRAME_USED)
-
-	/* optionally switch off data translations */
-
-	cmpwi	r5,	0
-	mfmsr	r6
-	beq+	0f
-	rlwinm	r7,	r6,	0,	MSR_DR_BIT+1,	MSR_DR_BIT-1
-	mtmsr	r7
-	isync
-0:	
-
-	/* Check to see if the address is aligned. */
-	add	r8, r3,r4
-	andi.	r8,r8,(CACHE_LINE_SIZE-1)
-	beq-	.L_flush_dcache_check
-	addi	r4,r4,CACHE_LINE_SIZE
-	li	r7,(CACHE_LINE_SIZE-1)	/* Align buffer & count - avoid overflow problems */
-	andc	r4,r4,r7
-	andc	r3,r3,r7
-
-.L_flush_dcache_check:
-	cmpwi	r4,	CACHE_LINE_SIZE
-	ble	.L_flush_dcache_one_line
-	
-	/* Make ctr hold count of how many times we should loop */
-	addi	r8,	r4,	(CACHE_LINE_SIZE-1)
-	srwi	r8,	r8,	CACHE_LINE_POW2
-	mtctr	r8
-
-.L_flush_dcache_flush_loop:
-	subic	r4,	r4,	CACHE_LINE_SIZE
-	dcbf	r3,	r4
-	bdnz	.L_flush_dcache_flush_loop
-
-.L_flush_dcache_done:
-	/* Sync restore msr if it was modified */
-	cmpwi	r5,	0
-	sync			/* make sure invalidates have completed */
-	beq+	0f
-	mtmsr	r6		/* Restore original translations */
-	isync			/* Ensure data translations are on */
-0:
-	blr
-
-.L_flush_dcache_one_line:
-	xor	r4,r4,r4
-	dcbf	0,r3
-	b	.L_flush_dcache_done
+
+ 
+        .text
+        .align	5
+        .globl	_flush_dcache
+_flush_dcache:
+        li		r0,kDcbf					// use DCBF instruction
+        rlwinm	r3,r3,0,0,31				// truncate address in case this is a 64-bit machine
+        b		cache_op_join				// join common code
+
+        .align	5
+        .globl	_flush_dcache64
+_flush_dcache64:
+		rlwinm	r3,r3,0,1,0					; Duplicate high half of long long paddr into top of reg
+		li		r0,kDcbf					// use DCBF instruction
+		rlwimi	r3,r4,0,0,31				; Combine bottom of long long to full 64-bits
+		mr		r4,r5						; Move count
+		mr		r5,r6						; Move physical flag
+        b		cache_op_join				// join common code
 
 
 /*
  * extern void invalidate_dcache(vm_offset_t va, unsigned count, boolean phys);
+ * extern void invalidate_dcache64(addr64_t va, unsigned count, boolean phys);
  *
  * invalidate_dcache takes a virtual or physical address and count to
  * invalidate and (can be called for multiple virtual pages).
  *
  * it invalidates the data cache for the address range in question
  */
-
-ENTRY(invalidate_dcache, TAG_NO_FRAME_USED)
-
-	/* optionally switch off data translations */
-
-	cmpwi	r5,	0
-	mfmsr	r6
-	beq+	0f
-	rlwinm	r7,	r6,	0,	MSR_DR_BIT+1,	MSR_DR_BIT-1
-	mtmsr	r7
-	isync
-0:	
-
-	/* Check to see if the address is aligned. */
-	add	r8, r3,r4
-	andi.	r8,r8,(CACHE_LINE_SIZE-1)
-	beq-	.L_invalidate_dcache_check
-	addi	r4,r4,CACHE_LINE_SIZE
-	li	r7,(CACHE_LINE_SIZE-1)	/* Align buffer & count - avoid overflow problems */
-	andc	r4,r4,r7
-	andc	r3,r3,r7
-
-.L_invalidate_dcache_check:
-	cmpwi	r4,	CACHE_LINE_SIZE
-	ble	.L_invalidate_dcache_one_line
-	
-	/* Make ctr hold count of how many times we should loop */
-	addi	r8,	r4,	(CACHE_LINE_SIZE-1)
-	srwi	r8,	r8,	CACHE_LINE_POW2
-	mtctr	r8
-
-.L_invalidate_dcache_invalidate_loop:
-	subic	r4,	r4,	CACHE_LINE_SIZE
-	dcbi	r3,	r4
-	bdnz	.L_invalidate_dcache_invalidate_loop
-
-.L_invalidate_dcache_done:
-	/* Sync restore msr if it was modified */
-	cmpwi	r5,	0
-	sync			/* make sure invalidates have completed */
-	beq+	0f
-	mtmsr	r6		/* Restore original translations */
-	isync			/* Ensure data translations are on */
-0:
-	blr
-
-.L_invalidate_dcache_one_line:
-	xor	r4,r4,r4
-	dcbi	0,r3
-	b	.L_invalidate_dcache_done
+ 
+        .globl	_invalidate_dcache
+_invalidate_dcache:
+        li		r0,kDcbi					// use DCBI instruction
+        rlwinm	r3,r3,0,0,31				// truncate address in case this is a 64-bit machine
+        b		cache_op_join				// join common code
+
+
+        .align	5
+        .globl	_invalidate_dcache64
+_invalidate_dcache64:
+		rlwinm	r3,r3,0,1,0					; Duplicate high half of long long paddr into top of reg
+        li		r0,kDcbi					// use DCBI instruction
+		rlwimi	r3,r4,0,0,31				; Combine bottom of long long to full 64-bits
+		mr		r4,r5						; Move count
+		mr		r5,r6						; Move physical flag
+        b		cache_op_join				// join common code
 
 /*
  * extern void invalidate_icache(vm_offset_t addr, unsigned cnt, boolean phys);
+ * extern void invalidate_icache64(addr64_t addr, unsigned cnt, boolean phys);
  *
  * invalidate_icache takes a virtual or physical address and
  * count to invalidate, (can be called for multiple virtual pages).
  *
  * it invalidates the instruction cache for the address range in question.
  */
+ 
+        .globl	_invalidate_icache
+_invalidate_icache:
+        li		r0,kIcbi					// use ICBI instruction
+        rlwinm	r3,r3,0,0,31				// truncate address in case this is a 64-bit machine
+        b		cache_op_join				// join common code
+        
+
+        .align	5
+        .globl	_invalidate_icache64
+_invalidate_icache64:
+		rlwinm	r3,r3,0,1,0					; Duplicate high half of long long paddr into top of reg
+        li		r0,kIcbi					// use ICBI instruction
+		rlwimi	r3,r4,0,0,31				; Combine bottom of long long to full 64-bits
+		mr		r4,r5						; Move count
+		mr		r5,r6						; Move physical flag
+        b		cache_op_join				// join common code
+                        
+/*
+ * extern void sync_ppage(ppnum_t pa);
+ *
+ * sync_ppage takes a physical page number
+ *
+ * it writes out the data cache and invalidates the instruction
+ * cache for the address range in question
+ */
+
+        .globl	_sync_ppage
+        .align	5
+_sync_ppage:								// Should be the most commonly called routine, by far 
+		mfsprg	r2,2
+        li		r0,kDcbf+kIcbi				// we need to dcbf and then icbi
+		mtcrf	0x02,r2						; Move pf64Bit to cr6
+        li		r5,1						// set flag for physical addresses
+		li		r4,4096						; Set page size
+		bt++	pf64Bitb,spp64				; Skip if 64-bit (only they take the hint)
+        rlwinm	r3,r3,12,0,19				; Convert to physical address - 32-bit
+        b		cache_op_join				; Join up....
+        
+spp64:	sldi	r3,r3,12					; Convert to physical address - 64-bit        
+        b		cache_op_join				; Join up....
+                        
+
+
+/*
+ * extern void sync_cache_virtual(vm_offset_t addr, unsigned count);
+ *
+ * Like "sync_cache", except it takes a virtual address and byte count.
+ * It flushes the data cache, invalidates the I cache, and sync's.
+ */
+ 
+        .globl	_sync_cache_virtual
+        .align	5
+_sync_cache_virtual:
+        li		r0,kDcbf+kIcbi				// we need to dcbf and then icbi
+        li		r5,0						// set flag for virtual addresses
+        b		cache_op_join				// join common code
+        
+                        
+/*
+ * extern void sync_cache(vm_offset_t pa, unsigned count);
+ * extern void sync_cache64(addr64_t pa, unsigned count);
+ *
+ * sync_cache takes a physical address and count to sync, thus
+ * must not be called for multiple virtual pages.
+ *
+ * it writes out the data cache and invalidates the instruction
+ * cache for the address range in question
+ */
+
+        .globl	_sync_cache
+        .align	5
+_sync_cache:
+        li		r0,kDcbf+kIcbi				// we need to dcbf and then icbi
+        li		r5,1						// set flag for physical addresses
+        rlwinm	r3,r3,0,0,31				// truncate address in case this is a 64-bit machine
+        b		cache_op_join				// join common code
+
+        .globl	_sync_cache64
+        .align	5
+_sync_cache64: 
+		rlwinm	r3,r3,0,1,0					; Duplicate high half of long long paddr into top of reg
+        li		r0,kDcbf+kIcbi				// we need to dcbf and then icbi
+		rlwimi	r3,r4,0,0,31				; Combine bottom of long long to full 64-bits
+       	mr		r4,r5						; Copy over the length
+        li		r5,1						// set flag for physical addresses
+
+        
+        // Common code to handle the cache operations.
+
+cache_op_join:								// here with r3=addr, r4=count, r5=phys flag, r0=bits
+        mfsprg	r10,2						// r10 <- processor feature flags
+        cmpwi	cr5,r5,0					// using physical addresses?
+        mtcrf	0x01,r0						// move kDcbf, kDcbi, and kIcbi bits to CR7
+        andi.	r9,r10,pf32Byte+pf128Byte	// r9 <- cache line size
+        mtcrf	0x02,r10					// move pf64Bit bit to CR6
+        subi	r8,r9,1						// r8 <- (linesize-1)
+        beq--	cr5,cache_op_2				// skip if using virtual addresses
+        
+        bf--	pf64Bitb,cache_op_not64		// This is not a 64-bit machine
+       
+        srdi	r12,r3,31					// Slide bit 32 to bit 63
+        cmpldi	r12,1						// Are we in the I/O mapped area?
+        beqlr--								// No cache ops allowed here...
+        
+cache_op_not64:
+        mflr	r12							// save return address
+        bl		EXT(ml_set_physical)		// turn on physical addressing
+        mtlr	r12							// restore return address
+
+        // get r3=first cache line, r4=first line not in set, r6=byte count
+        
+cache_op_2:        
+        add		r7,r3,r4					// point to 1st byte not to operate on
+        andc	r3,r3,r8					// r3 <- 1st line to operate on
+        add		r4,r7,r8					// round up
+        andc	r4,r4,r8					// r4 <- 1st line not to operate on
+        sub.	r6,r4,r3					// r6 <- byte count to operate on
+        beq--	cache_op_exit				// nothing to do
+        bf--	kDcbfb,cache_op_6			// no need to dcbf
+        
+        
+        // DCBF loop
+        
+cache_op_5:
+        sub.	r6,r6,r9					// more to go?
+        dcbf	r6,r3						// flush next line to RAM
+        bne		cache_op_5					// loop if more to go
+        sync								// make sure the data reaches RAM
+        sub		r6,r4,r3					// reset count
+
+
+        // ICBI loop
+        
+cache_op_6:
+        bf--	kIcbib,cache_op_8			// no need to icbi
+cache_op_7:
+        sub.	r6,r6,r9					// more to go?
+        icbi	r6,r3						// invalidate next line
+        bne		cache_op_7
+        sub		r6,r4,r3					// reset count
+        isync
+        sync
+        
+        
+        // DCBI loop
+        
+cache_op_8:
+        bf++	kDcbib,cache_op_exit		// no need to dcbi
+cache_op_9:
+        sub.	r6,r6,r9					// more to go?
+        dcbi	r6,r3						// invalidate next line
+        bne		cache_op_9
+        sync
+        
+        
+        // restore MSR iff necessary and done
+        
+cache_op_exit:
+        beqlr--	cr5							// if using virtual addresses, no need to restore MSR
+        b		EXT(ml_restore)				// restore MSR and return
+
+
+////////////////////////////////////////////////////
+
+        .align	5
+        .globl	_dcache_incoherent_io_store64
+_dcache_incoherent_io_store64:
+		rlwinm	r3,r3,0,1,0					; Duplicate high half of long long paddr into top of reg
+		rlwimi	r3,r4,0,0,31				; Combine bottom of long long to full 64-bits
+		mr		r4,r5						; Move count
+
+											// here with r3=addr, r4=count
+        mfsprg	r10,2						// r10 <- processor feature flags
+        andi.	r9,r10,pf32Byte+pf128Byte	// r9 <- cache line size
+        mtcrf	0x02,r10					// move pf64Bit bit to CR6
+        subi	r8,r9,1						// r8 <- (linesize-1)
+        
+        bf--	pf64Bitb,cache_ios_not64	// This is not a 64-bit machine
+       
+        srdi	r12,r3,31					// Slide bit 32 to bit 63
+        cmpldi	r12,1						// Are we in the I/O mapped area?
+        beqlr--								// No cache ops allowed here...
+        
+cache_ios_not64:
+        mflr	r12							// save return address
+        bl		EXT(ml_set_physical)		// turn on physical addressing
+        mtlr	r12							// restore return address
+
+        // get r3=first cache line, r4=first line not in set, r6=byte count
+        add		r7,r3,r4					// point to 1st byte not to operate on
+        andc	r3,r3,r8					// r3 <- 1st line to operate on
+        add		r4,r7,r8					// round up
+        andc	r4,r4,r8					// r4 <- 1st line not to operate on
+        sub.	r6,r4,r3					// r6 <- byte count to operate on
+        beq--	cache_ios_exit				// nothing to do
+        
+        sub.	r6,r6,r9					// >1 line?
+        beq		cache_ios_last_line			// use dcbst on all lines but last
+        
+        // DCBST loop
+cache_ios_5:
+        sub.	r6,r6,r9					// more to go?
+        dcbst	r6,r3						// store next line
+        bne		cache_ios_5					// loop if more to go
+
+cache_ios_last_line:
+        sync								// flush last line
+        isync
+        dcbf	r6,r3
+        sync
+        isync
+        add		r6,r6,r3
+        lwz		r0,0(r6)					// make sure the data reaches RAM (not just the memory controller)
+        isync
+
+        // restore MSR
+cache_ios_exit:
+        b		EXT(ml_restore)				// restore MSR and return
+
+
+////////////////////////////////////////////////////
+
+        .align	5
+        .globl	_dcache_incoherent_io_flush64
+_dcache_incoherent_io_flush64:
+		rlwinm	r3,r3,0,1,0					; Duplicate high half of long long paddr into top of reg
+		rlwimi	r3,r4,0,0,31				; Combine bottom of long long to full 64-bits
+		mr		r4,r5						; Move count
+
+											// here with r3=addr, r4=count
+        mfsprg	r10,2						// r10 <- processor feature flags
+        andi.	r9,r10,pf32Byte+pf128Byte	// r9 <- cache line size
+        mtcrf	0x02,r10					// move pf64Bit bit to CR6
+        subi	r8,r9,1						// r8 <- (linesize-1)
+        
+        bf--	pf64Bitb,cache_iof_not64	// This is not a 64-bit machine
+       
+        srdi	r12,r3,31					// Slide bit 32 to bit 63
+        cmpldi	r12,1						// Are we in the I/O mapped area?
+        beqlr--								// No cache ops allowed here...
+        
+cache_iof_not64:
+        mflr	r12							// save return address
+        bl		EXT(ml_set_physical)		// turn on physical addressing
+        mtlr	r12							// restore return address
+
+        // get r3=first cache line, r4=first line not in set, r6=byte count
+        add		r7,r3,r4					// point to 1st byte not to operate on
+        andc	r3,r3,r8					// r3 <- 1st line to operate on
+        add		r4,r7,r8					// round up
+        andc	r4,r4,r8					// r4 <- 1st line not to operate on
+        sub.	r6,r4,r3					// r6 <- byte count to operate on
+        beq--	cache_iof_exit				// nothing to do
+        
+        // DCBF loop
+cache_iof_5:
+        sub.	r6,r6,r9					// more to go?
+        dcbf	r6,r3						// store next line
+        bne		cache_iof_5					// loop if more to go
+
+cache_iof_last_line:
+        sync								// flush last line
+        isync
+
+        // restore MSR
+cache_iof_exit:
+        b		EXT(ml_restore)				// restore MSR and return
+
 
-ENTRY(invalidate_icache, TAG_NO_FRAME_USED)
-
-	/* optionally switch off data translations */
-	cmpwi	r5,	0
-	mfmsr	r6
-	beq+	0f
-	rlwinm	r7,	r6,	0,	MSR_DR_BIT+1,	MSR_DR_BIT-1
-	mtmsr	r7
-	isync
-0:	
-
-	/* Check to see if the address is aligned. */
-	add	r8, r3,r4
-	andi.	r8,r8,(CACHE_LINE_SIZE-1)
-	beq-	.L_invalidate_icache_check
-	addi	r4,r4,CACHE_LINE_SIZE
-	li	r7,(CACHE_LINE_SIZE-1)	/* Align buffer & count - avoid overflow problems */
-	andc	r4,r4,r7
-	andc	r3,r3,r7
-
-.L_invalidate_icache_check:
-	cmpwi	r4,	CACHE_LINE_SIZE
-	ble	.L_invalidate_icache_one_line
-	
-	/* Make ctr hold count of how many times we should loop */
-	addi	r8,	r4,	(CACHE_LINE_SIZE-1)
-	srwi	r8,	r8,	CACHE_LINE_POW2
-	mtctr	r8
-
-.L_invalidate_icache_invalidate_loop:
-	subic	r4,	r4,	CACHE_LINE_SIZE
-	icbi	r3,	r4
-	bdnz	.L_invalidate_icache_invalidate_loop
-
-.L_invalidate_icache_done:
-	sync			/* make sure invalidates have completed */
-	mtmsr	r6		/* Restore original translations */
-	isync			/* Ensure data translations are on */
-	blr
-
-.L_invalidate_icache_one_line:
-	xor	r4,r4,r4
-	icbi	0,r3
-	b	.L_invalidate_icache_done