xnu-344.23.tar.gz

[apple/xnu.git] / osfmk / ppc / cache.s
diff --git a/osfmk/ppc/cache.s b/osfmk/ppc/cache.s

index 94d9fe0ca80ee01bf3c1d6a2d3a4afacacdaa3a4..0f3157714600076d7c1c29e93a2f7a6ca38ab0e4 100644 (file)
--- a/osfmk/ppc/cache.s
+++ b/osfmk/ppc/cache.s
@@ -3,22 +3,19 @@
   *
   * @APPLE_LICENSE_HEADER_START@
   * 
- * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
   * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
   * 
   * @APPLE_LICENSE_HEADER_END@
   */
@@ -26,27 +23,82 @@
   * @OSF_COPYRIGHT@
   */
  
+#include <cpus.h>
+
  #include <ppc/asm.h>
  #include <ppc/proc_reg.h>
-#include <ppc/exception.h>
+#include <cpus.h>
  #include <assym.s>
+#include <mach_debug.h>
+#include <mach/ppc/vm_param.h>
  
-/* These routines run in 32 or 64-bit addressing, and handle
- * 32 and 128 byte caches.  They do not use compare instructions
- * on addresses, since compares are 32/64-bit-mode-specific.
+/*
+ * extern void sync_cache(vm_offset_t pa, unsigned count);
+ *
+ * sync_cache takes a physical address and count to sync, thus
+ * must not be called for multiple virtual pages.
+ *
+ * it writes out the data cache and invalidates the instruction
+ * cache for the address range in question
   */
  
-#define        kDcbf                   0x1
-#define        kDcbfb                  31
-#define        kDcbi                   0x2
-#define        kDcbib                  30
-#define        kIcbi                   0x4
-#define        kIcbib                  29
+ENTRY(sync_cache, TAG_NO_FRAME_USED)
+
+       /* Switch off data translations */
+       mfmsr   r6
+       rlwinm  r6,r6,0,MSR_FP_BIT+1,MSR_FP_BIT-1       ; Force floating point off
+       rlwinm  r6,r6,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1     ; Force vectors off
+       rlwinm  r7,     r6,     0,      MSR_DR_BIT+1,   MSR_DR_BIT-1
+       mtmsr   r7
+       isync
+
+       /* Check to see if the address is aligned. */
+       add     r8, r3,r4
+       andi.   r8,r8,(CACHE_LINE_SIZE-1)
+       beq-    .L_sync_check
+       addi    r4,r4,CACHE_LINE_SIZE
+       li      r7,(CACHE_LINE_SIZE-1)  /* Align buffer & count - avoid overflow problems */
+       andc    r4,r4,r7
+       andc    r3,r3,r7
+
+.L_sync_check:
+       cmpwi   r4,     CACHE_LINE_SIZE
+       ble     .L_sync_one_line
+       
+       /* Make ctr hold count of how many times we should loop */
+       addi    r8,     r4,     (CACHE_LINE_SIZE-1)
+       srwi    r8,     r8,     CACHE_LINE_POW2
+       mtctr   r8
  
+       /* loop to flush the data cache */
+.L_sync_data_loop:
+       subic   r4,     r4,     CACHE_LINE_SIZE
+       dcbf    r3,     r4
+       bdnz    .L_sync_data_loop
+       
+       sync
+       mtctr   r8
+
+       /* loop to invalidate the instruction cache */
+.L_sync_inval_loop:
+       icbi    r3,     r4
+       addic   r4,     r4,     CACHE_LINE_SIZE
+       bdnz    .L_sync_inval_loop
+
+.L_sync_cache_done:
+       sync                    /* Finish physical writes */
+       mtmsr   r6              /* Restore original translations */
+       isync                   /* Ensure data translations are on */
+       blr
+
+.L_sync_one_line:
+       dcbf    0,r3
+       sync
+       icbi    0,r3
+       b       .L_sync_cache_done
  
  /*
   * extern void flush_dcache(vm_offset_t addr, unsigned count, boolean phys);
- * extern void flush_dcache64(addr64_t addr, unsigned count, boolean phys);
   *
   * flush_dcache takes a virtual or physical address and count to flush
   * and (can be called for multiple virtual pages).
@@ -57,222 +109,172 @@
   * if 'phys' is non-zero then physical addresses will be used
   */
  
+ENTRY(flush_dcache, TAG_NO_FRAME_USED)
+
+       /* optionally switch off data translations */
+
+       cmpwi   r5,     0
+       mfmsr   r6
+       beq+    0f
+       rlwinm  r6,r6,0,MSR_FP_BIT+1,MSR_FP_BIT-1       ; Force floating point off
+       rlwinm  r6,r6,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1     ; Force vectors off
+       rlwinm  r7,     r6,     0,      MSR_DR_BIT+1,   MSR_DR_BIT-1
+       mtmsr   r7
+       isync
+0:     
+
+       /* Check to see if the address is aligned. */
+       add     r8, r3,r4
+       andi.   r8,r8,(CACHE_LINE_SIZE-1)
+       beq-    .L_flush_dcache_check
+       addi    r4,r4,CACHE_LINE_SIZE
+       li      r7,(CACHE_LINE_SIZE-1)  /* Align buffer & count - avoid overflow problems */
+       andc    r4,r4,r7
+       andc    r3,r3,r7
  
- 
-        .text
-        .align 5
-        .globl _flush_dcache
-_flush_dcache:
-        li             r0,kDcbf                                        // use DCBF instruction
-        rlwinm r3,r3,0,0,31                            // truncate address in case this is a 64-bit machine
-        b              cache_op_join                           // join common code
+.L_flush_dcache_check:
+       cmpwi   r4,     CACHE_LINE_SIZE
+       ble     .L_flush_dcache_one_line
+       
+       /* Make ctr hold count of how many times we should loop */
+       addi    r8,     r4,     (CACHE_LINE_SIZE-1)
+       srwi    r8,     r8,     CACHE_LINE_POW2
+       mtctr   r8
  
-        .align 5
-        .globl _flush_dcache64
-_flush_dcache64:
-               rlwinm  r3,r3,0,1,0                                     ; Duplicate high half of long long paddr into top of reg
-               li              r0,kDcbf                                        // use DCBF instruction
-               rlwimi  r3,r4,0,0,31                            ; Combine bottom of long long to full 64-bits
-               mr              r4,r5                                           ; Move count
-               mr              r5,r6                                           ; Move physical flag
-        b              cache_op_join                           // join common code
+.L_flush_dcache_flush_loop:
+       subic   r4,     r4,     CACHE_LINE_SIZE
+       dcbf    r3,     r4
+       bdnz    .L_flush_dcache_flush_loop
+
+.L_flush_dcache_done:
+       /* Sync restore msr if it was modified */
+       cmpwi   r5,     0
+       sync                    /* make sure invalidates have completed */
+       beq+    0f
+       mtmsr   r6              /* Restore original translations */
+       isync                   /* Ensure data translations are on */
+0:
+       blr
+
+.L_flush_dcache_one_line:
+       xor     r4,r4,r4
+       dcbf    0,r3
+       b       .L_flush_dcache_done
  
  
  /*
   * extern void invalidate_dcache(vm_offset_t va, unsigned count, boolean phys);
- * extern void invalidate_dcache64(addr64_t va, unsigned count, boolean phys);
   *
   * invalidate_dcache takes a virtual or physical address and count to
   * invalidate and (can be called for multiple virtual pages).
   *
   * it invalidates the data cache for the address range in question
   */
- 
-        .globl _invalidate_dcache
-_invalidate_dcache:
-        li             r0,kDcbi                                        // use DCBI instruction
-        rlwinm r3,r3,0,0,31                            // truncate address in case this is a 64-bit machine
-        b              cache_op_join                           // join common code
-
-
-        .align 5
-        .globl _invalidate_dcache64
-_invalidate_dcache64:
-               rlwinm  r3,r3,0,1,0                                     ; Duplicate high half of long long paddr into top of reg
-        li             r0,kDcbi                                        // use DCBI instruction
-               rlwimi  r3,r4,0,0,31                            ; Combine bottom of long long to full 64-bits
-               mr              r4,r5                                           ; Move count
-               mr              r5,r6                                           ; Move physical flag
-        b              cache_op_join                           // join common code
+
+ENTRY(invalidate_dcache, TAG_NO_FRAME_USED)
+
+       /* optionally switch off data translations */
+
+       cmpwi   r5,     0
+       mfmsr   r6
+       beq+    0f
+       rlwinm  r6,r6,0,MSR_FP_BIT+1,MSR_FP_BIT-1       ; Force floating point off
+       rlwinm  r6,r6,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1     ; Force vectors off
+       rlwinm  r7,     r6,     0,      MSR_DR_BIT+1,   MSR_DR_BIT-1
+       mtmsr   r7
+       isync
+0:     
+
+       /* Check to see if the address is aligned. */
+       add     r8, r3,r4
+       andi.   r8,r8,(CACHE_LINE_SIZE-1)
+       beq-    .L_invalidate_dcache_check
+       addi    r4,r4,CACHE_LINE_SIZE
+       li      r7,(CACHE_LINE_SIZE-1)  /* Align buffer & count - avoid overflow problems */
+       andc    r4,r4,r7
+       andc    r3,r3,r7
+
+.L_invalidate_dcache_check:
+       cmpwi   r4,     CACHE_LINE_SIZE
+       ble     .L_invalidate_dcache_one_line
+       
+       /* Make ctr hold count of how many times we should loop */
+       addi    r8,     r4,     (CACHE_LINE_SIZE-1)
+       srwi    r8,     r8,     CACHE_LINE_POW2
+       mtctr   r8
+
+.L_invalidate_dcache_invalidate_loop:
+       subic   r4,     r4,     CACHE_LINE_SIZE
+       dcbi    r3,     r4
+       bdnz    .L_invalidate_dcache_invalidate_loop
+
+.L_invalidate_dcache_done:
+       /* Sync restore msr if it was modified */
+       cmpwi   r5,     0
+       sync                    /* make sure invalidates have completed */
+       beq+    0f
+       mtmsr   r6              /* Restore original translations */
+       isync                   /* Ensure data translations are on */
+0:
+       blr
+
+.L_invalidate_dcache_one_line:
+       xor     r4,r4,r4
+       dcbi    0,r3
+       b       .L_invalidate_dcache_done
  
  /*
   * extern void invalidate_icache(vm_offset_t addr, unsigned cnt, boolean phys);
- * extern void invalidate_icache64(addr64_t addr, unsigned cnt, boolean phys);
   *
   * invalidate_icache takes a virtual or physical address and
   * count to invalidate, (can be called for multiple virtual pages).
   *
   * it invalidates the instruction cache for the address range in question.
- */
- 
-        .globl _invalidate_icache
-_invalidate_icache:
-        li             r0,kIcbi                                        // use ICBI instruction
-        rlwinm r3,r3,0,0,31                            // truncate address in case this is a 64-bit machine
-        b              cache_op_join                           // join common code
-        
-
-        .align 5
-        .globl _invalidate_icache64
-_invalidate_icache64:
-               rlwinm  r3,r3,0,1,0                                     ; Duplicate high half of long long paddr into top of reg
-        li             r0,kIcbi                                        // use ICBI instruction
-               rlwimi  r3,r4,0,0,31                            ; Combine bottom of long long to full 64-bits
-               mr              r4,r5                                           ; Move count
-               mr              r5,r6                                           ; Move physical flag
-        b              cache_op_join                           // join common code
-                        
-/*
- * extern void sync_ppage(ppnum_t pa);
- *
- * sync_ppage takes a physical page number
- *
- * it writes out the data cache and invalidates the instruction
- * cache for the address range in question
   */
  
-        .globl _sync_ppage
-        .align 5
-_sync_ppage:                                                           // Should be the most commonly called routine, by far 
-               mfsprg  r2,2
-        li             r0,kDcbf+kIcbi                          // we need to dcbf and then icbi
-               mtcrf   0x02,r2                                         ; Move pf64Bit to cr6
-        li             r5,1                                            // set flag for physical addresses
-               li              r4,4096                                         ; Set page size
-               bt++    pf64Bitb,spp64                          ; Skip if 64-bit (only they take the hint)
-        rlwinm r3,r3,12,0,19                           ; Convert to physical address - 32-bit
-        b              cache_op_join                           ; Join up....
-        
-spp64: sldi    r3,r3,12                                        ; Convert to physical address - 64-bit        
-        b              cache_op_join                           ; Join up....
-                        
+ENTRY(invalidate_icache, TAG_NO_FRAME_USED)
  
+       /* optionally switch off data translations */
+       cmpwi   r5,     0
+       mfmsr   r6
+       beq+    0f
+       rlwinm  r6,r6,0,MSR_FP_BIT+1,MSR_FP_BIT-1       ; Force floating point off
+       rlwinm  r6,r6,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1     ; Force vectors off
+       rlwinm  r7,     r6,     0,      MSR_DR_BIT+1,   MSR_DR_BIT-1
+       mtmsr   r7
+       isync
+0:     
  
-/*
- * extern void sync_cache_virtual(vm_offset_t addr, unsigned count);
- *
- * Like "sync_cache", except it takes a virtual address and byte count.
- * It flushes the data cache, invalidates the I cache, and sync's.
- */
- 
-        .globl _sync_cache_virtual
-        .align 5
-_sync_cache_virtual:
-        li             r0,kDcbf+kIcbi                          // we need to dcbf and then icbi
-        li             r5,0                                            // set flag for virtual addresses
-        b              cache_op_join                           // join common code
-        
-                        
-/*
- * extern void sync_cache(vm_offset_t pa, unsigned count);
- * extern void sync_cache64(addr64_t pa, unsigned count);
- *
- * sync_cache takes a physical address and count to sync, thus
- * must not be called for multiple virtual pages.
- *
- * it writes out the data cache and invalidates the instruction
- * cache for the address range in question
- */
+       /* Check to see if the address is aligned. */
+       add     r8, r3,r4
+       andi.   r8,r8,(CACHE_LINE_SIZE-1)
+       beq-    .L_invalidate_icache_check
+       addi    r4,r4,CACHE_LINE_SIZE
+       li      r7,(CACHE_LINE_SIZE-1)  /* Align buffer & count - avoid overflow problems */
+       andc    r4,r4,r7
+       andc    r3,r3,r7
+
+.L_invalidate_icache_check:
+       cmpwi   r4,     CACHE_LINE_SIZE
+       ble     .L_invalidate_icache_one_line
+       
+       /* Make ctr hold count of how many times we should loop */
+       addi    r8,     r4,     (CACHE_LINE_SIZE-1)
+       srwi    r8,     r8,     CACHE_LINE_POW2
+       mtctr   r8
+
+.L_invalidate_icache_invalidate_loop:
+       subic   r4,     r4,     CACHE_LINE_SIZE
+       icbi    r3,     r4
+       bdnz    .L_invalidate_icache_invalidate_loop
  
-        .globl _sync_cache
-        .align 5
-_sync_cache:
-        li             r0,kDcbf+kIcbi                          // we need to dcbf and then icbi
-        li             r5,1                                            // set flag for physical addresses
-        rlwinm r3,r3,0,0,31                            // truncate address in case this is a 64-bit machine
-        b              cache_op_join                           // join common code
-
-        .globl _sync_cache64
-        .align 5
-_sync_cache64: 
-               rlwinm  r3,r3,0,1,0                                     ; Duplicate high half of long long paddr into top of reg
-        li             r0,kDcbf+kIcbi                          // we need to dcbf and then icbi
-               rlwimi  r3,r4,0,0,31                            ; Combine bottom of long long to full 64-bits
-               mr              r4,r5                                           ; Copy over the length
-        li             r5,1                                            // set flag for physical addresses
-
-        
-        // Common code to handle the cache operations.
-
-cache_op_join:                                                         // here with r3=addr, r4=count, r5=phys flag, r0=bits
-        mfsprg r10,2                                           // r10 <- processor feature flags
-        cmpwi  cr5,r5,0                                        // using physical addresses?
-        mtcrf  0x01,r0                                         // move kDcbf, kDcbi, and kIcbi bits to CR7
-        andi.  r9,r10,pf32Byte+pf128Byte       // r9 <- cache line size
-        mtcrf  0x02,r10                                        // move pf64Bit bit to CR6
-        subi   r8,r9,1                                         // r8 <- (linesize-1)
-        beq--  cr5,cache_op_2                          // skip if using virtual addresses
-        
-        bf--   pf64Bitb,cache_op_not64         // This is not a 64-bit machine
-       
-        srdi   r12,r3,31                                       // Slide bit 32 to bit 63
-        cmpldi r12,1                                           // Are we in the I/O mapped area?
-        beqlr--                                                                // No cache ops allowed here...
-        
-cache_op_not64:
-        mflr   r12                                                     // save return address
-        bl             EXT(ml_set_physical)            // turn on physical addressing
-        mtlr   r12                                                     // restore return address
-
-        // get r3=first cache line, r4=first line not in set, r6=byte count
-        
-cache_op_2:        
-        add            r7,r3,r4                                        // point to 1st byte not to operate on
-        andc   r3,r3,r8                                        // r3 <- 1st line to operate on
-        add            r4,r7,r8                                        // round up
-        andc   r4,r4,r8                                        // r4 <- 1st line not to operate on
-        sub.   r6,r4,r3                                        // r6 <- byte count to operate on
-        beq--  cache_op_exit                           // nothing to do
-        bf--   kDcbfb,cache_op_6                       // no need to dcbf
-        
-        
-        // DCBF loop
-        
-cache_op_5:
-        sub.   r6,r6,r9                                        // more to go?
-        dcbf   r6,r3                                           // flush next line to RAM
-        bne            cache_op_5                                      // loop if more to go
-        sync                                                           // make sure the data reaches RAM
-        sub            r6,r4,r3                                        // reset count
-
-
-        // ICBI loop
-        
-cache_op_6:
-        bf--   kIcbib,cache_op_8                       // no need to icbi
-cache_op_7:
-        sub.   r6,r6,r9                                        // more to go?
-        icbi   r6,r3                                           // invalidate next line
-        bne            cache_op_7
-        sub            r6,r4,r3                                        // reset count
-        isync
-        sync
-        
-        
-        // DCBI loop
-        
-cache_op_8:
-        bf++   kDcbib,cache_op_exit            // no need to dcbi
-cache_op_9:
-        sub.   r6,r6,r9                                        // more to go?
-        dcbi   r6,r3                                           // invalidate next line
-        bne            cache_op_9
-        sync
-        
-        
-        // restore MSR iff necessary and done
-        
-cache_op_exit:
-        beqlr--        cr5                                                     // if using virtual addresses, no need to restore MSR
-        b              EXT(ml_restore)                         // restore MSR and return
+.L_invalidate_icache_done:
+       sync                    /* make sure invalidates have completed */
+       mtmsr   r6              /* Restore original translations */
+       isync                   /* Ensure data translations are on */
+       blr
  
+.L_invalidate_icache_one_line:
+       xor     r4,r4,r4
+       icbi    0,r3
+       b       .L_invalidate_icache_done