X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/d7e50217d7adf6e52786a38bcaa4cd698cb9a79e..de355530ae67247cbd0da700edb3a2a1dae884c2:/osfmk/ppc/cache.s diff --git a/osfmk/ppc/cache.s b/osfmk/ppc/cache.s index 94d9fe0ca..0f3157714 100644 --- a/osfmk/ppc/cache.s +++ b/osfmk/ppc/cache.s @@ -3,22 +3,19 @@ * * @APPLE_LICENSE_HEADER_START@ * - * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. * * @APPLE_LICENSE_HEADER_END@ */ @@ -26,27 +23,82 @@ * @OSF_COPYRIGHT@ */ +#include + #include #include -#include +#include #include +#include +#include -/* These routines run in 32 or 64-bit addressing, and handle - * 32 and 128 byte caches. They do not use compare instructions - * on addresses, since compares are 32/64-bit-mode-specific. +/* + * extern void sync_cache(vm_offset_t pa, unsigned count); + * + * sync_cache takes a physical address and count to sync, thus + * must not be called for multiple virtual pages. + * + * it writes out the data cache and invalidates the instruction + * cache for the address range in question */ -#define kDcbf 0x1 -#define kDcbfb 31 -#define kDcbi 0x2 -#define kDcbib 30 -#define kIcbi 0x4 -#define kIcbib 29 +ENTRY(sync_cache, TAG_NO_FRAME_USED) + + /* Switch off data translations */ + mfmsr r6 + rlwinm r6,r6,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off + rlwinm r6,r6,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off + rlwinm r7, r6, 0, MSR_DR_BIT+1, MSR_DR_BIT-1 + mtmsr r7 + isync + + /* Check to see if the address is aligned. */ + add r8, r3,r4 + andi. r8,r8,(CACHE_LINE_SIZE-1) + beq- .L_sync_check + addi r4,r4,CACHE_LINE_SIZE + li r7,(CACHE_LINE_SIZE-1) /* Align buffer & count - avoid overflow problems */ + andc r4,r4,r7 + andc r3,r3,r7 + +.L_sync_check: + cmpwi r4, CACHE_LINE_SIZE + ble .L_sync_one_line + + /* Make ctr hold count of how many times we should loop */ + addi r8, r4, (CACHE_LINE_SIZE-1) + srwi r8, r8, CACHE_LINE_POW2 + mtctr r8 + /* loop to flush the data cache */ +.L_sync_data_loop: + subic r4, r4, CACHE_LINE_SIZE + dcbf r3, r4 + bdnz .L_sync_data_loop + + sync + mtctr r8 + + /* loop to invalidate the instruction cache */ +.L_sync_inval_loop: + icbi r3, r4 + addic r4, r4, CACHE_LINE_SIZE + bdnz .L_sync_inval_loop + +.L_sync_cache_done: + sync /* Finish physical writes */ + mtmsr r6 /* Restore original translations */ + isync /* Ensure data translations are on */ + blr + +.L_sync_one_line: + dcbf 0,r3 + sync + icbi 0,r3 + b .L_sync_cache_done /* * extern void flush_dcache(vm_offset_t addr, unsigned count, boolean phys); - * extern void flush_dcache64(addr64_t addr, unsigned count, boolean phys); * * flush_dcache takes a virtual or physical address and count to flush * and (can be called for multiple virtual pages). @@ -57,222 +109,172 @@ * if 'phys' is non-zero then physical addresses will be used */ +ENTRY(flush_dcache, TAG_NO_FRAME_USED) + + /* optionally switch off data translations */ + + cmpwi r5, 0 + mfmsr r6 + beq+ 0f + rlwinm r6,r6,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off + rlwinm r6,r6,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off + rlwinm r7, r6, 0, MSR_DR_BIT+1, MSR_DR_BIT-1 + mtmsr r7 + isync +0: + + /* Check to see if the address is aligned. */ + add r8, r3,r4 + andi. r8,r8,(CACHE_LINE_SIZE-1) + beq- .L_flush_dcache_check + addi r4,r4,CACHE_LINE_SIZE + li r7,(CACHE_LINE_SIZE-1) /* Align buffer & count - avoid overflow problems */ + andc r4,r4,r7 + andc r3,r3,r7 - - .text - .align 5 - .globl _flush_dcache -_flush_dcache: - li r0,kDcbf // use DCBF instruction - rlwinm r3,r3,0,0,31 // truncate address in case this is a 64-bit machine - b cache_op_join // join common code +.L_flush_dcache_check: + cmpwi r4, CACHE_LINE_SIZE + ble .L_flush_dcache_one_line + + /* Make ctr hold count of how many times we should loop */ + addi r8, r4, (CACHE_LINE_SIZE-1) + srwi r8, r8, CACHE_LINE_POW2 + mtctr r8 - .align 5 - .globl _flush_dcache64 -_flush_dcache64: - rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg - li r0,kDcbf // use DCBF instruction - rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits - mr r4,r5 ; Move count - mr r5,r6 ; Move physical flag - b cache_op_join // join common code +.L_flush_dcache_flush_loop: + subic r4, r4, CACHE_LINE_SIZE + dcbf r3, r4 + bdnz .L_flush_dcache_flush_loop + +.L_flush_dcache_done: + /* Sync restore msr if it was modified */ + cmpwi r5, 0 + sync /* make sure invalidates have completed */ + beq+ 0f + mtmsr r6 /* Restore original translations */ + isync /* Ensure data translations are on */ +0: + blr + +.L_flush_dcache_one_line: + xor r4,r4,r4 + dcbf 0,r3 + b .L_flush_dcache_done /* * extern void invalidate_dcache(vm_offset_t va, unsigned count, boolean phys); - * extern void invalidate_dcache64(addr64_t va, unsigned count, boolean phys); * * invalidate_dcache takes a virtual or physical address and count to * invalidate and (can be called for multiple virtual pages). * * it invalidates the data cache for the address range in question */ - - .globl _invalidate_dcache -_invalidate_dcache: - li r0,kDcbi // use DCBI instruction - rlwinm r3,r3,0,0,31 // truncate address in case this is a 64-bit machine - b cache_op_join // join common code - - - .align 5 - .globl _invalidate_dcache64 -_invalidate_dcache64: - rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg - li r0,kDcbi // use DCBI instruction - rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits - mr r4,r5 ; Move count - mr r5,r6 ; Move physical flag - b cache_op_join // join common code + +ENTRY(invalidate_dcache, TAG_NO_FRAME_USED) + + /* optionally switch off data translations */ + + cmpwi r5, 0 + mfmsr r6 + beq+ 0f + rlwinm r6,r6,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off + rlwinm r6,r6,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off + rlwinm r7, r6, 0, MSR_DR_BIT+1, MSR_DR_BIT-1 + mtmsr r7 + isync +0: + + /* Check to see if the address is aligned. */ + add r8, r3,r4 + andi. r8,r8,(CACHE_LINE_SIZE-1) + beq- .L_invalidate_dcache_check + addi r4,r4,CACHE_LINE_SIZE + li r7,(CACHE_LINE_SIZE-1) /* Align buffer & count - avoid overflow problems */ + andc r4,r4,r7 + andc r3,r3,r7 + +.L_invalidate_dcache_check: + cmpwi r4, CACHE_LINE_SIZE + ble .L_invalidate_dcache_one_line + + /* Make ctr hold count of how many times we should loop */ + addi r8, r4, (CACHE_LINE_SIZE-1) + srwi r8, r8, CACHE_LINE_POW2 + mtctr r8 + +.L_invalidate_dcache_invalidate_loop: + subic r4, r4, CACHE_LINE_SIZE + dcbi r3, r4 + bdnz .L_invalidate_dcache_invalidate_loop + +.L_invalidate_dcache_done: + /* Sync restore msr if it was modified */ + cmpwi r5, 0 + sync /* make sure invalidates have completed */ + beq+ 0f + mtmsr r6 /* Restore original translations */ + isync /* Ensure data translations are on */ +0: + blr + +.L_invalidate_dcache_one_line: + xor r4,r4,r4 + dcbi 0,r3 + b .L_invalidate_dcache_done /* * extern void invalidate_icache(vm_offset_t addr, unsigned cnt, boolean phys); - * extern void invalidate_icache64(addr64_t addr, unsigned cnt, boolean phys); * * invalidate_icache takes a virtual or physical address and * count to invalidate, (can be called for multiple virtual pages). * * it invalidates the instruction cache for the address range in question. - */ - - .globl _invalidate_icache -_invalidate_icache: - li r0,kIcbi // use ICBI instruction - rlwinm r3,r3,0,0,31 // truncate address in case this is a 64-bit machine - b cache_op_join // join common code - - - .align 5 - .globl _invalidate_icache64 -_invalidate_icache64: - rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg - li r0,kIcbi // use ICBI instruction - rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits - mr r4,r5 ; Move count - mr r5,r6 ; Move physical flag - b cache_op_join // join common code - -/* - * extern void sync_ppage(ppnum_t pa); - * - * sync_ppage takes a physical page number - * - * it writes out the data cache and invalidates the instruction - * cache for the address range in question */ - .globl _sync_ppage - .align 5 -_sync_ppage: // Should be the most commonly called routine, by far - mfsprg r2,2 - li r0,kDcbf+kIcbi // we need to dcbf and then icbi - mtcrf 0x02,r2 ; Move pf64Bit to cr6 - li r5,1 // set flag for physical addresses - li r4,4096 ; Set page size - bt++ pf64Bitb,spp64 ; Skip if 64-bit (only they take the hint) - rlwinm r3,r3,12,0,19 ; Convert to physical address - 32-bit - b cache_op_join ; Join up.... - -spp64: sldi r3,r3,12 ; Convert to physical address - 64-bit - b cache_op_join ; Join up.... - +ENTRY(invalidate_icache, TAG_NO_FRAME_USED) + /* optionally switch off data translations */ + cmpwi r5, 0 + mfmsr r6 + beq+ 0f + rlwinm r6,r6,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off + rlwinm r6,r6,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off + rlwinm r7, r6, 0, MSR_DR_BIT+1, MSR_DR_BIT-1 + mtmsr r7 + isync +0: -/* - * extern void sync_cache_virtual(vm_offset_t addr, unsigned count); - * - * Like "sync_cache", except it takes a virtual address and byte count. - * It flushes the data cache, invalidates the I cache, and sync's. - */ - - .globl _sync_cache_virtual - .align 5 -_sync_cache_virtual: - li r0,kDcbf+kIcbi // we need to dcbf and then icbi - li r5,0 // set flag for virtual addresses - b cache_op_join // join common code - - -/* - * extern void sync_cache(vm_offset_t pa, unsigned count); - * extern void sync_cache64(addr64_t pa, unsigned count); - * - * sync_cache takes a physical address and count to sync, thus - * must not be called for multiple virtual pages. - * - * it writes out the data cache and invalidates the instruction - * cache for the address range in question - */ + /* Check to see if the address is aligned. */ + add r8, r3,r4 + andi. r8,r8,(CACHE_LINE_SIZE-1) + beq- .L_invalidate_icache_check + addi r4,r4,CACHE_LINE_SIZE + li r7,(CACHE_LINE_SIZE-1) /* Align buffer & count - avoid overflow problems */ + andc r4,r4,r7 + andc r3,r3,r7 + +.L_invalidate_icache_check: + cmpwi r4, CACHE_LINE_SIZE + ble .L_invalidate_icache_one_line + + /* Make ctr hold count of how many times we should loop */ + addi r8, r4, (CACHE_LINE_SIZE-1) + srwi r8, r8, CACHE_LINE_POW2 + mtctr r8 + +.L_invalidate_icache_invalidate_loop: + subic r4, r4, CACHE_LINE_SIZE + icbi r3, r4 + bdnz .L_invalidate_icache_invalidate_loop - .globl _sync_cache - .align 5 -_sync_cache: - li r0,kDcbf+kIcbi // we need to dcbf and then icbi - li r5,1 // set flag for physical addresses - rlwinm r3,r3,0,0,31 // truncate address in case this is a 64-bit machine - b cache_op_join // join common code - - .globl _sync_cache64 - .align 5 -_sync_cache64: - rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg - li r0,kDcbf+kIcbi // we need to dcbf and then icbi - rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits - mr r4,r5 ; Copy over the length - li r5,1 // set flag for physical addresses - - - // Common code to handle the cache operations. - -cache_op_join: // here with r3=addr, r4=count, r5=phys flag, r0=bits - mfsprg r10,2 // r10 <- processor feature flags - cmpwi cr5,r5,0 // using physical addresses? - mtcrf 0x01,r0 // move kDcbf, kDcbi, and kIcbi bits to CR7 - andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size - mtcrf 0x02,r10 // move pf64Bit bit to CR6 - subi r8,r9,1 // r8 <- (linesize-1) - beq-- cr5,cache_op_2 // skip if using virtual addresses - - bf-- pf64Bitb,cache_op_not64 // This is not a 64-bit machine - - srdi r12,r3,31 // Slide bit 32 to bit 63 - cmpldi r12,1 // Are we in the I/O mapped area? - beqlr-- // No cache ops allowed here... - -cache_op_not64: - mflr r12 // save return address - bl EXT(ml_set_physical) // turn on physical addressing - mtlr r12 // restore return address - - // get r3=first cache line, r4=first line not in set, r6=byte count - -cache_op_2: - add r7,r3,r4 // point to 1st byte not to operate on - andc r3,r3,r8 // r3 <- 1st line to operate on - add r4,r7,r8 // round up - andc r4,r4,r8 // r4 <- 1st line not to operate on - sub. r6,r4,r3 // r6 <- byte count to operate on - beq-- cache_op_exit // nothing to do - bf-- kDcbfb,cache_op_6 // no need to dcbf - - - // DCBF loop - -cache_op_5: - sub. r6,r6,r9 // more to go? - dcbf r6,r3 // flush next line to RAM - bne cache_op_5 // loop if more to go - sync // make sure the data reaches RAM - sub r6,r4,r3 // reset count - - - // ICBI loop - -cache_op_6: - bf-- kIcbib,cache_op_8 // no need to icbi -cache_op_7: - sub. r6,r6,r9 // more to go? - icbi r6,r3 // invalidate next line - bne cache_op_7 - sub r6,r4,r3 // reset count - isync - sync - - - // DCBI loop - -cache_op_8: - bf++ kDcbib,cache_op_exit // no need to dcbi -cache_op_9: - sub. r6,r6,r9 // more to go? - dcbi r6,r3 // invalidate next line - bne cache_op_9 - sync - - - // restore MSR iff necessary and done - -cache_op_exit: - beqlr-- cr5 // if using virtual addresses, no need to restore MSR - b EXT(ml_restore) // restore MSR and return +.L_invalidate_icache_done: + sync /* make sure invalidates have completed */ + mtmsr r6 /* Restore original translations */ + isync /* Ensure data translations are on */ + blr +.L_invalidate_icache_one_line: + xor r4,r4,r4 + icbi 0,r3 + b .L_invalidate_icache_done