]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/ppc/cache.s
xnu-1228.3.13.tar.gz
[apple/xnu.git] / osfmk / ppc / cache.s
index 0f3157714600076d7c1c29e93a2f7a6ca38ab0e4..94aa0aeeb1648ce4de600960c3de594464a2dcdd 100644 (file)
 /*
  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
  *
 /*
  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
  * 
  * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
  * 
  * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
  * @OSF_COPYRIGHT@
  */
 
  */
 /*
  * @OSF_COPYRIGHT@
  */
 
-#include <cpus.h>
-
 #include <ppc/asm.h>
 #include <ppc/proc_reg.h>
 #include <ppc/asm.h>
 #include <ppc/proc_reg.h>
-#include <cpus.h>
+#include <ppc/exception.h>
 #include <assym.s>
 #include <assym.s>
-#include <mach_debug.h>
-#include <mach/ppc/vm_param.h>
 
 
-/*
- * extern void sync_cache(vm_offset_t pa, unsigned count);
- *
- * sync_cache takes a physical address and count to sync, thus
- * must not be called for multiple virtual pages.
- *
- * it writes out the data cache and invalidates the instruction
- * cache for the address range in question
+/* These routines run in 32 or 64-bit addressing, and handle
+ * 32 and 128 byte caches.  They do not use compare instructions
+ * on addresses, since compares are 32/64-bit-mode-specific.
  */
 
  */
 
-ENTRY(sync_cache, TAG_NO_FRAME_USED)
-
-       /* Switch off data translations */
-       mfmsr   r6
-       rlwinm  r6,r6,0,MSR_FP_BIT+1,MSR_FP_BIT-1       ; Force floating point off
-       rlwinm  r6,r6,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1     ; Force vectors off
-       rlwinm  r7,     r6,     0,      MSR_DR_BIT+1,   MSR_DR_BIT-1
-       mtmsr   r7
-       isync
-
-       /* Check to see if the address is aligned. */
-       add     r8, r3,r4
-       andi.   r8,r8,(CACHE_LINE_SIZE-1)
-       beq-    .L_sync_check
-       addi    r4,r4,CACHE_LINE_SIZE
-       li      r7,(CACHE_LINE_SIZE-1)  /* Align buffer & count - avoid overflow problems */
-       andc    r4,r4,r7
-       andc    r3,r3,r7
-
-.L_sync_check:
-       cmpwi   r4,     CACHE_LINE_SIZE
-       ble     .L_sync_one_line
-       
-       /* Make ctr hold count of how many times we should loop */
-       addi    r8,     r4,     (CACHE_LINE_SIZE-1)
-       srwi    r8,     r8,     CACHE_LINE_POW2
-       mtctr   r8
-
-       /* loop to flush the data cache */
-.L_sync_data_loop:
-       subic   r4,     r4,     CACHE_LINE_SIZE
-       dcbf    r3,     r4
-       bdnz    .L_sync_data_loop
-       
-       sync
-       mtctr   r8
-
-       /* loop to invalidate the instruction cache */
-.L_sync_inval_loop:
-       icbi    r3,     r4
-       addic   r4,     r4,     CACHE_LINE_SIZE
-       bdnz    .L_sync_inval_loop
-
-.L_sync_cache_done:
-       sync                    /* Finish physical writes */
-       mtmsr   r6              /* Restore original translations */
-       isync                   /* Ensure data translations are on */
-       blr
-
-.L_sync_one_line:
-       dcbf    0,r3
-       sync
-       icbi    0,r3
-       b       .L_sync_cache_done
+#define        kDcbf                   0x1
+#define        kDcbfb                  31
+#define        kDcbi                   0x2
+#define        kDcbib                  30
+#define        kIcbi                   0x4
+#define        kIcbib                  29
+
 
 /*
  * extern void flush_dcache(vm_offset_t addr, unsigned count, boolean phys);
 
 /*
  * extern void flush_dcache(vm_offset_t addr, unsigned count, boolean phys);
+ * extern void flush_dcache64(addr64_t addr, unsigned count, boolean phys);
  *
  * flush_dcache takes a virtual or physical address and count to flush
  * and (can be called for multiple virtual pages).
  *
  * flush_dcache takes a virtual or physical address and count to flush
  * and (can be called for multiple virtual pages).
@@ -109,172 +60,330 @@ ENTRY(sync_cache, TAG_NO_FRAME_USED)
  * if 'phys' is non-zero then physical addresses will be used
  */
 
  * if 'phys' is non-zero then physical addresses will be used
  */
 
-ENTRY(flush_dcache, TAG_NO_FRAME_USED)
-
-       /* optionally switch off data translations */
-
-       cmpwi   r5,     0
-       mfmsr   r6
-       beq+    0f
-       rlwinm  r6,r6,0,MSR_FP_BIT+1,MSR_FP_BIT-1       ; Force floating point off
-       rlwinm  r6,r6,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1     ; Force vectors off
-       rlwinm  r7,     r6,     0,      MSR_DR_BIT+1,   MSR_DR_BIT-1
-       mtmsr   r7
-       isync
-0:     
-
-       /* Check to see if the address is aligned. */
-       add     r8, r3,r4
-       andi.   r8,r8,(CACHE_LINE_SIZE-1)
-       beq-    .L_flush_dcache_check
-       addi    r4,r4,CACHE_LINE_SIZE
-       li      r7,(CACHE_LINE_SIZE-1)  /* Align buffer & count - avoid overflow problems */
-       andc    r4,r4,r7
-       andc    r3,r3,r7
-
-.L_flush_dcache_check:
-       cmpwi   r4,     CACHE_LINE_SIZE
-       ble     .L_flush_dcache_one_line
-       
-       /* Make ctr hold count of how many times we should loop */
-       addi    r8,     r4,     (CACHE_LINE_SIZE-1)
-       srwi    r8,     r8,     CACHE_LINE_POW2
-       mtctr   r8
-
-.L_flush_dcache_flush_loop:
-       subic   r4,     r4,     CACHE_LINE_SIZE
-       dcbf    r3,     r4
-       bdnz    .L_flush_dcache_flush_loop
-
-.L_flush_dcache_done:
-       /* Sync restore msr if it was modified */
-       cmpwi   r5,     0
-       sync                    /* make sure invalidates have completed */
-       beq+    0f
-       mtmsr   r6              /* Restore original translations */
-       isync                   /* Ensure data translations are on */
-0:
-       blr
-
-.L_flush_dcache_one_line:
-       xor     r4,r4,r4
-       dcbf    0,r3
-       b       .L_flush_dcache_done
+
+        .text
+        .align 5
+        .globl _flush_dcache
+_flush_dcache:
+        li             r0,kDcbf                                        // use DCBF instruction
+        rlwinm r3,r3,0,0,31                            // truncate address in case this is a 64-bit machine
+        b              cache_op_join                           // join common code
+
+        .align 5
+        .globl _flush_dcache64
+_flush_dcache64:
+               rlwinm  r3,r3,0,1,0                                     ; Duplicate high half of long long paddr into top of reg
+               li              r0,kDcbf                                        // use DCBF instruction
+               rlwimi  r3,r4,0,0,31                            ; Combine bottom of long long to full 64-bits
+               mr              r4,r5                                           ; Move count
+               mr              r5,r6                                           ; Move physical flag
+        b              cache_op_join                           // join common code
 
 
 /*
  * extern void invalidate_dcache(vm_offset_t va, unsigned count, boolean phys);
 
 
 /*
  * extern void invalidate_dcache(vm_offset_t va, unsigned count, boolean phys);
+ * extern void invalidate_dcache64(addr64_t va, unsigned count, boolean phys);
  *
  * invalidate_dcache takes a virtual or physical address and count to
  * invalidate and (can be called for multiple virtual pages).
  *
  * it invalidates the data cache for the address range in question
  */
  *
  * invalidate_dcache takes a virtual or physical address and count to
  * invalidate and (can be called for multiple virtual pages).
  *
  * it invalidates the data cache for the address range in question
  */
-
-ENTRY(invalidate_dcache, TAG_NO_FRAME_USED)
-
-       /* optionally switch off data translations */
-
-       cmpwi   r5,     0
-       mfmsr   r6
-       beq+    0f
-       rlwinm  r6,r6,0,MSR_FP_BIT+1,MSR_FP_BIT-1       ; Force floating point off
-       rlwinm  r6,r6,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1     ; Force vectors off
-       rlwinm  r7,     r6,     0,      MSR_DR_BIT+1,   MSR_DR_BIT-1
-       mtmsr   r7
-       isync
-0:     
-
-       /* Check to see if the address is aligned. */
-       add     r8, r3,r4
-       andi.   r8,r8,(CACHE_LINE_SIZE-1)
-       beq-    .L_invalidate_dcache_check
-       addi    r4,r4,CACHE_LINE_SIZE
-       li      r7,(CACHE_LINE_SIZE-1)  /* Align buffer & count - avoid overflow problems */
-       andc    r4,r4,r7
-       andc    r3,r3,r7
-
-.L_invalidate_dcache_check:
-       cmpwi   r4,     CACHE_LINE_SIZE
-       ble     .L_invalidate_dcache_one_line
-       
-       /* Make ctr hold count of how many times we should loop */
-       addi    r8,     r4,     (CACHE_LINE_SIZE-1)
-       srwi    r8,     r8,     CACHE_LINE_POW2
-       mtctr   r8
-
-.L_invalidate_dcache_invalidate_loop:
-       subic   r4,     r4,     CACHE_LINE_SIZE
-       dcbi    r3,     r4
-       bdnz    .L_invalidate_dcache_invalidate_loop
-
-.L_invalidate_dcache_done:
-       /* Sync restore msr if it was modified */
-       cmpwi   r5,     0
-       sync                    /* make sure invalidates have completed */
-       beq+    0f
-       mtmsr   r6              /* Restore original translations */
-       isync                   /* Ensure data translations are on */
-0:
-       blr
-
-.L_invalidate_dcache_one_line:
-       xor     r4,r4,r4
-       dcbi    0,r3
-       b       .L_invalidate_dcache_done
+        .globl _invalidate_dcache
+_invalidate_dcache:
+        li             r0,kDcbi                                        // use DCBI instruction
+        rlwinm r3,r3,0,0,31                            // truncate address in case this is a 64-bit machine
+        b              cache_op_join                           // join common code
+
+
+        .align 5
+        .globl _invalidate_dcache64
+_invalidate_dcache64:
+               rlwinm  r3,r3,0,1,0                                     ; Duplicate high half of long long paddr into top of reg
+        li             r0,kDcbi                                        // use DCBI instruction
+               rlwimi  r3,r4,0,0,31                            ; Combine bottom of long long to full 64-bits
+               mr              r4,r5                                           ; Move count
+               mr              r5,r6                                           ; Move physical flag
+        b              cache_op_join                           // join common code
 
 /*
  * extern void invalidate_icache(vm_offset_t addr, unsigned cnt, boolean phys);
 
 /*
  * extern void invalidate_icache(vm_offset_t addr, unsigned cnt, boolean phys);
+ * extern void invalidate_icache64(addr64_t addr, unsigned cnt, boolean phys);
  *
  * invalidate_icache takes a virtual or physical address and
  * count to invalidate, (can be called for multiple virtual pages).
  *
  * it invalidates the instruction cache for the address range in question.
  */
  *
  * invalidate_icache takes a virtual or physical address and
  * count to invalidate, (can be called for multiple virtual pages).
  *
  * it invalidates the instruction cache for the address range in question.
  */
+        .globl _invalidate_icache
+_invalidate_icache:
+        li             r0,kIcbi                                        // use ICBI instruction
+        rlwinm r3,r3,0,0,31                            // truncate address in case this is a 64-bit machine
+        b              cache_op_join                           // join common code
+        
+
+        .align 5
+        .globl _invalidate_icache64
+_invalidate_icache64:
+               rlwinm  r3,r3,0,1,0                                     ; Duplicate high half of long long paddr into top of reg
+        li             r0,kIcbi                                        // use ICBI instruction
+               rlwimi  r3,r4,0,0,31                            ; Combine bottom of long long to full 64-bits
+               mr              r4,r5                                           ; Move count
+               mr              r5,r6                                           ; Move physical flag
+        b              cache_op_join                           // join common code
+                        
+/*
+ * extern void sync_ppage(ppnum_t pa);
+ *
+ * sync_ppage takes a physical page number
+ *
+ * it writes out the data cache and invalidates the instruction
+ * cache for the address range in question
+ */
+
+        .globl _sync_ppage
+        .align 5
+_sync_ppage:                                                           // Should be the most commonly called routine, by far 
+               mfsprg  r2,2
+        li             r0,kDcbf+kIcbi                          // we need to dcbf and then icbi
+               mtcrf   0x02,r2                                         ; Move pf64Bit to cr6
+        li             r5,1                                            // set flag for physical addresses
+               li              r4,4096                                         ; Set page size
+               bt++    pf64Bitb,spp64                          ; Skip if 64-bit (only they take the hint)
+        rlwinm r3,r3,12,0,19                           ; Convert to physical address - 32-bit
+        b              cache_op_join                           ; Join up....
+        
+spp64: sldi    r3,r3,12                                        ; Convert to physical address - 64-bit        
+        b              cache_op_join                           ; Join up....
+                        
+
+
+/*
+ * extern void sync_cache_virtual(vm_offset_t addr, unsigned count);
+ *
+ * Like "sync_cache", except it takes a virtual address and byte count.
+ * It flushes the data cache, invalidates the I cache, and sync's.
+ */
+        .globl _sync_cache_virtual
+        .align 5
+_sync_cache_virtual:
+        li             r0,kDcbf+kIcbi                          // we need to dcbf and then icbi
+        li             r5,0                                            // set flag for virtual addresses
+        b              cache_op_join                           // join common code
+        
+                        
+/*
+ * extern void sync_cache(vm_offset_t pa, unsigned count);
+ * extern void sync_cache64(addr64_t pa, unsigned count);
+ *
+ * sync_cache takes a physical address and count to sync, thus
+ * must not be called for multiple virtual pages.
+ *
+ * it writes out the data cache and invalidates the instruction
+ * cache for the address range in question
+ */
+
+        .globl _sync_cache
+        .align 5
+_sync_cache:
+        li             r0,kDcbf+kIcbi                          // we need to dcbf and then icbi
+        li             r5,1                                            // set flag for physical addresses
+        rlwinm r3,r3,0,0,31                            // truncate address in case this is a 64-bit machine
+        b              cache_op_join                           // join common code
+
+        .globl _sync_cache64
+        .align 5
+_sync_cache64: 
+               rlwinm  r3,r3,0,1,0                                     ; Duplicate high half of long long paddr into top of reg
+        li             r0,kDcbf+kIcbi                          // we need to dcbf and then icbi
+               rlwimi  r3,r4,0,0,31                            ; Combine bottom of long long to full 64-bits
+               mr              r4,r5                                           ; Copy over the length
+        li             r5,1                                            // set flag for physical addresses
+
+        
+        // Common code to handle the cache operations.
+
+cache_op_join:                                                         // here with r3=addr, r4=count, r5=phys flag, r0=bits
+        mfsprg r10,2                                           // r10 <- processor feature flags
+        cmpwi  cr5,r5,0                                        // using physical addresses?
+        mtcrf  0x01,r0                                         // move kDcbf, kDcbi, and kIcbi bits to CR7
+        andi.  r9,r10,pf32Byte+pf128Byte       // r9 <- cache line size
+        mtcrf  0x02,r10                                        // move pf64Bit bit to CR6
+        subi   r8,r9,1                                         // r8 <- (linesize-1)
+        beq--  cr5,cache_op_2                          // skip if using virtual addresses
+        
+        bf--   pf64Bitb,cache_op_not64         // This is not a 64-bit machine
+       
+        srdi   r12,r3,31                                       // Slide bit 32 to bit 63
+        cmpldi r12,1                                           // Are we in the I/O mapped area?
+        beqlr--                                                                // No cache ops allowed here...
+        
+cache_op_not64:
+        mflr   r12                                                     // save return address
+        bl             EXT(ml_set_physical)            // turn on physical addressing
+        mtlr   r12                                                     // restore return address
+
+        // get r3=first cache line, r4=first line not in set, r6=byte count
+        
+cache_op_2:        
+        add            r7,r3,r4                                        // point to 1st byte not to operate on
+        andc   r3,r3,r8                                        // r3 <- 1st line to operate on
+        add            r4,r7,r8                                        // round up
+        andc   r4,r4,r8                                        // r4 <- 1st line not to operate on
+        sub.   r6,r4,r3                                        // r6 <- byte count to operate on
+        beq--  cache_op_exit                           // nothing to do
+        bf--   kDcbfb,cache_op_6                       // no need to dcbf
+        
+        
+        // DCBF loop
+        
+cache_op_5:
+        sub.   r6,r6,r9                                        // more to go?
+        dcbf   r6,r3                                           // flush next line to RAM
+        bne            cache_op_5                                      // loop if more to go
+        sync                                                           // make sure the data reaches RAM
+        sub            r6,r4,r3                                        // reset count
+
+
+        // ICBI loop
+        
+cache_op_6:
+        bf--   kIcbib,cache_op_8                       // no need to icbi
+cache_op_7:
+        sub.   r6,r6,r9                                        // more to go?
+        icbi   r6,r3                                           // invalidate next line
+        bne            cache_op_7
+        sub            r6,r4,r3                                        // reset count
+        isync
+        sync
+        
+        
+        // DCBI loop
+        
+cache_op_8:
+        bf++   kDcbib,cache_op_exit            // no need to dcbi
+cache_op_9:
+        sub.   r6,r6,r9                                        // more to go?
+        dcbi   r6,r3                                           // invalidate next line
+        bne            cache_op_9
+        sync
+        
+        
+        // restore MSR iff necessary and done
+        
+cache_op_exit:
+        beqlr--        cr5                                                     // if using virtual addresses, no need to restore MSR
+        b              EXT(ml_restore)                         // restore MSR and return
+
+
+////////////////////////////////////////////////////
+
+        .align 5
+        .globl _dcache_incoherent_io_store64
+_dcache_incoherent_io_store64:
+               rlwinm  r3,r3,0,1,0                                     ; Duplicate high half of long long paddr into top of reg
+               rlwimi  r3,r4,0,0,31                            ; Combine bottom of long long to full 64-bits
+               mr              r4,r5                                           ; Move count
+
+                                                                                       // here with r3=addr, r4=count
+        mfsprg r10,2                                           // r10 <- processor feature flags
+        andi.  r9,r10,pf32Byte+pf128Byte       // r9 <- cache line size
+        mtcrf  0x02,r10                                        // move pf64Bit bit to CR6
+        subi   r8,r9,1                                         // r8 <- (linesize-1)
+        
+        bf--   pf64Bitb,cache_ios_not64        // This is not a 64-bit machine
+       
+        srdi   r12,r3,31                                       // Slide bit 32 to bit 63
+        cmpldi r12,1                                           // Are we in the I/O mapped area?
+        beqlr--                                                                // No cache ops allowed here...
+        
+cache_ios_not64:
+        mflr   r12                                                     // save return address
+        bl             EXT(ml_set_physical)            // turn on physical addressing
+        mtlr   r12                                                     // restore return address
+
+        // get r3=first cache line, r4=first line not in set, r6=byte count
+        add            r7,r3,r4                                        // point to 1st byte not to operate on
+        andc   r3,r3,r8                                        // r3 <- 1st line to operate on
+        add            r4,r7,r8                                        // round up
+        andc   r4,r4,r8                                        // r4 <- 1st line not to operate on
+        sub.   r6,r4,r3                                        // r6 <- byte count to operate on
+        beq--  cache_ios_exit                          // nothing to do
+        
+        sub.   r6,r6,r9                                        // >1 line?
+        beq            cache_ios_last_line                     // use dcbst on all lines but last
+        
+        // DCBST loop
+cache_ios_5:
+        sub.   r6,r6,r9                                        // more to go?
+        dcbst  r6,r3                                           // store next line
+        bne            cache_ios_5                                     // loop if more to go
+
+cache_ios_last_line:
+        sync                                                           // flush last line
+        isync
+        dcbf   r6,r3
+        sync
+        isync
+        add            r6,r6,r3
+        lwz            r0,0(r6)                                        // make sure the data reaches RAM (not just the memory controller)
+        isync
+
+        // restore MSR
+cache_ios_exit:
+        b              EXT(ml_restore)                         // restore MSR and return
+
+
+////////////////////////////////////////////////////
+
+        .align 5
+        .globl _dcache_incoherent_io_flush64
+_dcache_incoherent_io_flush64:
+               rlwinm  r3,r3,0,1,0                                     ; Duplicate high half of long long paddr into top of reg
+               rlwimi  r3,r4,0,0,31                            ; Combine bottom of long long to full 64-bits
+               mr              r4,r5                                           ; Move count
+
+                                                                                       // here with r3=addr, r4=count
+        mfsprg r10,2                                           // r10 <- processor feature flags
+        andi.  r9,r10,pf32Byte+pf128Byte       // r9 <- cache line size
+        mtcrf  0x02,r10                                        // move pf64Bit bit to CR6
+        subi   r8,r9,1                                         // r8 <- (linesize-1)
+        
+        bf--   pf64Bitb,cache_iof_not64        // This is not a 64-bit machine
+       
+        srdi   r12,r3,31                                       // Slide bit 32 to bit 63
+        cmpldi r12,1                                           // Are we in the I/O mapped area?
+        beqlr--                                                                // No cache ops allowed here...
+        
+cache_iof_not64:
+        mflr   r12                                                     // save return address
+        bl             EXT(ml_set_physical)            // turn on physical addressing
+        mtlr   r12                                                     // restore return address
+
+        // get r3=first cache line, r4=first line not in set, r6=byte count
+        add            r7,r3,r4                                        // point to 1st byte not to operate on
+        andc   r3,r3,r8                                        // r3 <- 1st line to operate on
+        add            r4,r7,r8                                        // round up
+        andc   r4,r4,r8                                        // r4 <- 1st line not to operate on
+        sub.   r6,r4,r3                                        // r6 <- byte count to operate on
+        beq--  cache_iof_exit                          // nothing to do
+        
+        // DCBF loop
+cache_iof_5:
+        sub.   r6,r6,r9                                        // more to go?
+        dcbf   r6,r3                                           // store next line
+        bne            cache_iof_5                                     // loop if more to go
+
+cache_iof_last_line:
+        sync                                                           // flush last line
+        isync
+
+        // restore MSR
+cache_iof_exit:
+        b              EXT(ml_restore)                         // restore MSR and return
+
 
 
-ENTRY(invalidate_icache, TAG_NO_FRAME_USED)
-
-       /* optionally switch off data translations */
-       cmpwi   r5,     0
-       mfmsr   r6
-       beq+    0f
-       rlwinm  r6,r6,0,MSR_FP_BIT+1,MSR_FP_BIT-1       ; Force floating point off
-       rlwinm  r6,r6,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1     ; Force vectors off
-       rlwinm  r7,     r6,     0,      MSR_DR_BIT+1,   MSR_DR_BIT-1
-       mtmsr   r7
-       isync
-0:     
-
-       /* Check to see if the address is aligned. */
-       add     r8, r3,r4
-       andi.   r8,r8,(CACHE_LINE_SIZE-1)
-       beq-    .L_invalidate_icache_check
-       addi    r4,r4,CACHE_LINE_SIZE
-       li      r7,(CACHE_LINE_SIZE-1)  /* Align buffer & count - avoid overflow problems */
-       andc    r4,r4,r7
-       andc    r3,r3,r7
-
-.L_invalidate_icache_check:
-       cmpwi   r4,     CACHE_LINE_SIZE
-       ble     .L_invalidate_icache_one_line
-       
-       /* Make ctr hold count of how many times we should loop */
-       addi    r8,     r4,     (CACHE_LINE_SIZE-1)
-       srwi    r8,     r8,     CACHE_LINE_POW2
-       mtctr   r8
-
-.L_invalidate_icache_invalidate_loop:
-       subic   r4,     r4,     CACHE_LINE_SIZE
-       icbi    r3,     r4
-       bdnz    .L_invalidate_icache_invalidate_loop
-
-.L_invalidate_icache_done:
-       sync                    /* make sure invalidates have completed */
-       mtmsr   r6              /* Restore original translations */
-       isync                   /* Ensure data translations are on */
-       blr
-
-.L_invalidate_icache_one_line:
-       xor     r4,r4,r4
-       icbi    0,r3
-       b       .L_invalidate_icache_done