+
+ .globl _invalidate_icache
+_invalidate_icache:
+ li r0,kIcbi // use ICBI instruction
+ rlwinm r3,r3,0,0,31 // truncate address in case this is a 64-bit machine
+ b cache_op_join // join common code
+
+
+ .align 5
+ .globl _invalidate_icache64
+_invalidate_icache64:
+ rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg
+ li r0,kIcbi // use ICBI instruction
+ rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits
+ mr r4,r5 ; Move count
+ mr r5,r6 ; Move physical flag
+ b cache_op_join // join common code
+
+/*
+ * extern void sync_ppage(ppnum_t pa);
+ *
+ * sync_ppage takes a physical page number
+ *
+ * it writes out the data cache and invalidates the instruction
+ * cache for the address range in question
+ */
+
+ .globl _sync_ppage
+ .align 5
+_sync_ppage: // Should be the most commonly called routine, by far
+ mfsprg r2,2
+ li r0,kDcbf+kIcbi // we need to dcbf and then icbi
+ mtcrf 0x02,r2 ; Move pf64Bit to cr6
+ li r5,1 // set flag for physical addresses
+ li r4,4096 ; Set page size
+ bt++ pf64Bitb,spp64 ; Skip if 64-bit (only they take the hint)
+ rlwinm r3,r3,12,0,19 ; Convert to physical address - 32-bit
+ b cache_op_join ; Join up....
+
+spp64: sldi r3,r3,12 ; Convert to physical address - 64-bit
+ b cache_op_join ; Join up....
+
+
+
+/*
+ * extern void sync_cache_virtual(vm_offset_t addr, unsigned count);
+ *
+ * Like "sync_cache", except it takes a virtual address and byte count.
+ * It flushes the data cache, invalidates the I cache, and sync's.
+ */
+
+ .globl _sync_cache_virtual
+ .align 5
+_sync_cache_virtual:
+ li r0,kDcbf+kIcbi // we need to dcbf and then icbi
+ li r5,0 // set flag for virtual addresses
+ b cache_op_join // join common code
+
+
+/*
+ * extern void sync_cache(vm_offset_t pa, unsigned count);
+ * extern void sync_cache64(addr64_t pa, unsigned count);
+ *
+ * sync_cache takes a physical address and count to sync, thus
+ * must not be called for multiple virtual pages.
+ *
+ * it writes out the data cache and invalidates the instruction
+ * cache for the address range in question
+ */
+
+ .globl _sync_cache
+ .align 5
+_sync_cache:
+ li r0,kDcbf+kIcbi // we need to dcbf and then icbi
+ li r5,1 // set flag for physical addresses
+ rlwinm r3,r3,0,0,31 // truncate address in case this is a 64-bit machine
+ b cache_op_join // join common code
+
+ .globl _sync_cache64
+ .align 5
+_sync_cache64:
+ rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg
+ li r0,kDcbf+kIcbi // we need to dcbf and then icbi
+ rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits
+ mr r4,r5 ; Copy over the length
+ li r5,1 // set flag for physical addresses
+
+
+ // Common code to handle the cache operations.
+
+cache_op_join: // here with r3=addr, r4=count, r5=phys flag, r0=bits
+ mfsprg r10,2 // r10 <- processor feature flags
+ cmpwi cr5,r5,0 // using physical addresses?
+ mtcrf 0x01,r0 // move kDcbf, kDcbi, and kIcbi bits to CR7
+ andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size
+ mtcrf 0x02,r10 // move pf64Bit bit to CR6
+ subi r8,r9,1 // r8 <- (linesize-1)
+ beq-- cr5,cache_op_2 // skip if using virtual addresses
+
+ bf-- pf64Bitb,cache_op_not64 // This is not a 64-bit machine
+
+ srdi r12,r3,31 // Slide bit 32 to bit 63
+ cmpldi r12,1 // Are we in the I/O mapped area?
+ beqlr-- // No cache ops allowed here...
+
+cache_op_not64:
+ mflr r12 // save return address
+ bl EXT(ml_set_physical) // turn on physical addressing
+ mtlr r12 // restore return address
+
+ // get r3=first cache line, r4=first line not in set, r6=byte count
+
+cache_op_2:
+ add r7,r3,r4 // point to 1st byte not to operate on
+ andc r3,r3,r8 // r3 <- 1st line to operate on
+ add r4,r7,r8 // round up
+ andc r4,r4,r8 // r4 <- 1st line not to operate on
+ sub. r6,r4,r3 // r6 <- byte count to operate on
+ beq-- cache_op_exit // nothing to do
+ bf-- kDcbfb,cache_op_6 // no need to dcbf
+
+
+ // DCBF loop
+
+cache_op_5:
+ sub. r6,r6,r9 // more to go?
+ dcbf r6,r3 // flush next line to RAM
+ bne cache_op_5 // loop if more to go
+ sync // make sure the data reaches RAM
+ sub r6,r4,r3 // reset count
+
+
+ // ICBI loop
+
+cache_op_6:
+ bf-- kIcbib,cache_op_8 // no need to icbi
+cache_op_7:
+ sub. r6,r6,r9 // more to go?
+ icbi r6,r3 // invalidate next line
+ bne cache_op_7
+ sub r6,r4,r3 // reset count
+ isync
+ sync
+
+
+ // DCBI loop
+
+cache_op_8:
+ bf++ kDcbib,cache_op_exit // no need to dcbi
+cache_op_9:
+ sub. r6,r6,r9 // more to go?
+ dcbi r6,r3 // invalidate next line
+ bne cache_op_9
+ sync
+
+
+ // restore MSR iff necessary and done
+
+cache_op_exit:
+ beqlr-- cr5 // if using virtual addresses, no need to restore MSR
+ b EXT(ml_restore) // restore MSR and return
+
+
+////////////////////////////////////////////////////
+
+ .align 5
+ .globl _dcache_incoherent_io_store64
+_dcache_incoherent_io_store64:
+ rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg
+ rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits
+ mr r4,r5 ; Move count
+
+ // here with r3=addr, r4=count
+ mfsprg r10,2 // r10 <- processor feature flags
+ andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size
+ mtcrf 0x02,r10 // move pf64Bit bit to CR6
+ subi r8,r9,1 // r8 <- (linesize-1)
+
+ bf-- pf64Bitb,cache_ios_not64 // This is not a 64-bit machine
+
+ srdi r12,r3,31 // Slide bit 32 to bit 63
+ cmpldi r12,1 // Are we in the I/O mapped area?
+ beqlr-- // No cache ops allowed here...
+
+cache_ios_not64:
+ mflr r12 // save return address
+ bl EXT(ml_set_physical) // turn on physical addressing
+ mtlr r12 // restore return address
+
+ // get r3=first cache line, r4=first line not in set, r6=byte count
+ add r7,r3,r4 // point to 1st byte not to operate on
+ andc r3,r3,r8 // r3 <- 1st line to operate on
+ add r4,r7,r8 // round up
+ andc r4,r4,r8 // r4 <- 1st line not to operate on
+ sub. r6,r4,r3 // r6 <- byte count to operate on
+ beq-- cache_ios_exit // nothing to do
+
+ sub. r6,r6,r9 // >1 line?
+ beq cache_ios_last_line // use dcbst on all lines but last
+
+ // DCBST loop
+cache_ios_5:
+ sub. r6,r6,r9 // more to go?
+ dcbst r6,r3 // store next line
+ bne cache_ios_5 // loop if more to go
+
+cache_ios_last_line:
+ sync // flush last line
+ isync
+ dcbf r6,r3
+ sync
+ isync
+ add r6,r6,r3
+ lwz r0,0(r6) // make sure the data reaches RAM (not just the memory controller)
+ isync
+
+ // restore MSR
+cache_ios_exit:
+ b EXT(ml_restore) // restore MSR and return
+
+
+////////////////////////////////////////////////////
+
+ .align 5
+ .globl _dcache_incoherent_io_flush64
+_dcache_incoherent_io_flush64:
+ rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg
+ rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits
+ mr r4,r5 ; Move count
+
+ // here with r3=addr, r4=count
+ mfsprg r10,2 // r10 <- processor feature flags
+ andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size
+ mtcrf 0x02,r10 // move pf64Bit bit to CR6
+ subi r8,r9,1 // r8 <- (linesize-1)
+
+ bf-- pf64Bitb,cache_iof_not64 // This is not a 64-bit machine
+
+ srdi r12,r3,31 // Slide bit 32 to bit 63
+ cmpldi r12,1 // Are we in the I/O mapped area?
+ beqlr-- // No cache ops allowed here...
+
+cache_iof_not64:
+ mflr r12 // save return address
+ bl EXT(ml_set_physical) // turn on physical addressing
+ mtlr r12 // restore return address
+
+ // get r3=first cache line, r4=first line not in set, r6=byte count
+ add r7,r3,r4 // point to 1st byte not to operate on
+ andc r3,r3,r8 // r3 <- 1st line to operate on
+ add r4,r7,r8 // round up
+ andc r4,r4,r8 // r4 <- 1st line not to operate on
+ sub. r6,r4,r3 // r6 <- byte count to operate on
+ beq-- cache_iof_exit // nothing to do
+
+ // DCBF loop
+cache_iof_5:
+ sub. r6,r6,r9 // more to go?
+ dcbf r6,r3 // store next line
+ bne cache_iof_5 // loop if more to go
+
+cache_iof_last_line:
+ sync // flush last line
+ isync
+
+ // restore MSR
+cache_iof_exit:
+ b EXT(ml_restore) // restore MSR and return
+