X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/cb3231590a3c94ab4375e2228bd5e86b0cf1ad7e..HEAD:/osfmk/arm64/tlb.h?ds=sidebyside diff --git a/osfmk/arm64/tlb.h b/osfmk/arm64/tlb.h index eb1face77..aa3a73c8e 100644 --- a/osfmk/arm64/tlb.h +++ b/osfmk/arm64/tlb.h @@ -55,6 +55,13 @@ sync_tlb_flush(void) __builtin_arm_isb(ISB_SY); } +static inline void +sync_tlb_flush_local(void) +{ + __builtin_arm_dsb(DSB_NSH); + __builtin_arm_isb(ISB_SY); +} + // flush_mmu_tlb: full TLB flush on all cores static inline void flush_mmu_tlb_async(void) @@ -86,37 +93,39 @@ flush_core_tlb(void) // flush_mmu_tlb_allentries_async: flush entries that map VA range, all ASIDS, all cores // start and end are in units of 4K pages. static inline void -flush_mmu_tlb_allentries_async(uint64_t start, uint64_t end) +flush_mmu_tlb_allentries_async(uint64_t start, uint64_t end, uint64_t pmap_page_size) { #if __ARM_16K_PG__ - start = start & ~0x3ULL; + if (pmap_page_size == 16384) { + start = start & ~0x3ULL; - /* - * The code below is not necessarily correct. From an overview of - * the client code, the expected contract for TLB flushes is that - * we will expand from an "address, length" pair to "start address, - * end address" in the course of a TLB flush. This suggests that - * a flush for "X, X+4" is actually only asking for a flush of a - * single 16KB page. At the same time, we'd like to be prepared - * for bad inputs (X, X+3), so add 3 and then truncate the 4KB page - * number to a 16KB page boundary. This should deal correctly with - * unaligned inputs. - * - * If our expecations about client behavior are wrong however, this - * will lead to occasional TLB corruption on platforms with 16KB - * pages. - */ - end = (end + 0x3ULL) & ~0x3ULL; + /* + * The code below is not necessarily correct. From an overview of + * the client code, the expected contract for TLB flushes is that + * we will expand from an "address, length" pair to "start address, + * end address" in the course of a TLB flush. This suggests that + * a flush for "X, X+4" is actually only asking for a flush of a + * single 16KB page. At the same time, we'd like to be prepared + * for bad inputs (X, X+3), so add 3 and then truncate the 4KB page + * number to a 16KB page boundary. This should deal correctly with + * unaligned inputs. + * + * If our expecations about client behavior are wrong however, this + * will lead to occasional TLB corruption on platforms with 16KB + * pages. + */ + end = (end + 0x3ULL) & ~0x3ULL; + } #endif // __ARM_16K_PG__ - for (; start < end; start += (ARM_PGBYTES / 4096)) { + for (; start < end; start += (pmap_page_size / 4096)) { asm volatile ("tlbi vaae1is, %0" : : "r"(start)); } } static inline void -flush_mmu_tlb_allentries(uint64_t start, uint64_t end) +flush_mmu_tlb_allentries(uint64_t start, uint64_t end, uint64_t pmap_page_size) { - flush_mmu_tlb_allentries_async(start, end); + flush_mmu_tlb_allentries_async(start, end, pmap_page_size); sync_tlb_flush(); } @@ -149,27 +158,29 @@ flush_mmu_tlb_entry(uint64_t val) // start and end must have the ASID in the high 16 bits, with the VA in units of 4K in the lowest bits // Will also flush global entries that match the VA range static inline void -flush_mmu_tlb_entries_async(uint64_t start, uint64_t end) +flush_mmu_tlb_entries_async(uint64_t start, uint64_t end, uint64_t pmap_page_size) { #if __ARM_16K_PG__ - start = start & ~0x3ULL; + if (pmap_page_size == 16384) { + start = start & ~0x3ULL; - /* - * The code below is not necessarily correct. From an overview of - * the client code, the expected contract for TLB flushes is that - * we will expand from an "address, length" pair to "start address, - * end address" in the course of a TLB flush. This suggests that - * a flush for "X, X+4" is actually only asking for a flush of a - * single 16KB page. At the same time, we'd like to be prepared - * for bad inputs (X, X+3), so add 3 and then truncate the 4KB page - * number to a 16KB page boundary. This should deal correctly with - * unaligned inputs. - * - * If our expecations about client behavior are wrong however, this - * will lead to occasional TLB corruption on platforms with 16KB - * pages. - */ - end = (end + 0x3ULL) & ~0x3ULL; + /* + * The code below is not necessarily correct. From an overview of + * the client code, the expected contract for TLB flushes is that + * we will expand from an "address, length" pair to "start address, + * end address" in the course of a TLB flush. This suggests that + * a flush for "X, X+4" is actually only asking for a flush of a + * single 16KB page. At the same time, we'd like to be prepared + * for bad inputs (X, X+3), so add 3 and then truncate the 4KB page + * number to a 16KB page boundary. This should deal correctly with + * unaligned inputs. + * + * If our expecations about client behavior are wrong however, this + * will lead to occasional TLB corruption on platforms with 16KB + * pages. + */ + end = (end + 0x3ULL) & ~0x3ULL; + } #endif // __ARM_16K_PG__ #if __ARM_KERNEL_PROTECT__ uint64_t asid = start >> TLBI_ASID_SHIFT; @@ -178,30 +189,30 @@ flush_mmu_tlb_entries_async(uint64_t start, uint64_t end) * ASID scheme, this means we should flush all ASIDs. */ if (asid == 0) { - for (; start < end; start += (ARM_PGBYTES / 4096)) { + for (; start < end; start += (pmap_page_size / 4096)) { asm volatile ("tlbi vaae1is, %0" : : "r"(start)); } return; } start = start | (1ULL << TLBI_ASID_SHIFT); end = end | (1ULL << TLBI_ASID_SHIFT); - for (; start < end; start += (ARM_PGBYTES / 4096)) { + for (; start < end; start += (pmap_page_size / 4096)) { start = start & ~(1ULL << TLBI_ASID_SHIFT); asm volatile ("tlbi vae1is, %0" : : "r"(start)); start = start | (1ULL << TLBI_ASID_SHIFT); asm volatile ("tlbi vae1is, %0" : : "r"(start)); } #else - for (; start < end; start += (ARM_PGBYTES / 4096)) { + for (; start < end; start += (pmap_page_size / 4096)) { asm volatile ("tlbi vae1is, %0" : : "r"(start)); } #endif /* __ARM_KERNEL_PROTECT__ */ } static inline void -flush_mmu_tlb_entries(uint64_t start, uint64_t end) +flush_mmu_tlb_entries(uint64_t start, uint64_t end, uint64_t pmap_page_size) { - flush_mmu_tlb_entries_async(start, end); + flush_mmu_tlb_entries_async(start, end, pmap_page_size); sync_tlb_flush(); } @@ -270,8 +281,8 @@ flush_core_tlb_asid(uint64_t val) #error __ARM_RANGE_TLBI__ + __ARM_KERNEL_PROTECT__ is not currently supported #endif -#define ARM64_16K_TLB_RANGE_PAGES (1ULL << 21) -#define rtlbi_addr(x) (((x) >> RTLBI_ADDR_SHIFT) & RTLBI_ADDR_MASK) +#define ARM64_TLB_RANGE_PAGES (1ULL << 21) +#define rtlbi_addr(x, shift) (((x) >> (shift)) & RTLBI_ADDR_MASK) #define rtlbi_scale(x) ((uint64_t)(x) << RTLBI_SCALE_SHIFT) #define rtlbi_num(x) ((uint64_t)(x) << RTLBI_NUM_SHIFT) @@ -280,17 +291,17 @@ flush_core_tlb_asid(uint64_t val) * pass to any of the TLBI by range methods. */ static inline uint64_t -generate_rtlbi_param(ppnum_t npages, uint32_t asid, vm_offset_t va) +generate_rtlbi_param(ppnum_t npages, uint32_t asid, vm_offset_t va, uint64_t pmap_page_shift) { /** * Per the armv8.4 RTLBI extension spec, the range encoded in the rtlbi register operand is defined by: * BaseADDR <= VA < BaseADDR+((NUM+1)*2^(5*SCALE+1) * Translation_Granule_Size) */ - unsigned order = (sizeof(npages) * 8) - __builtin_clz(npages - 1) - 1; + unsigned order = (unsigned)(sizeof(npages) * 8) - (unsigned)__builtin_clz(npages - 1) - 1; unsigned scale = ((order ? order : 1) - 1) / 5; unsigned granule = 1 << ((5 * scale) + 1); unsigned num = (((npages + granule - 1) & ~(granule - 1)) / granule) - 1; - return tlbi_asid(asid) | RTLBI_TG | rtlbi_scale(scale) | rtlbi_num(num) | rtlbi_addr(va); + return tlbi_asid(asid) | RTLBI_TG(pmap_page_shift) | rtlbi_scale(scale) | rtlbi_num(num) | rtlbi_addr(va, pmap_page_shift); } // flush_mmu_tlb_range: flush TLB entries that map a VA range using a single instruction @@ -325,6 +336,22 @@ flush_mmu_tlb_allrange(uint64_t val) sync_tlb_flush(); } +// flush_core_tlb_allrange: flush TLB entries that map a VA range using a single instruction, local core only +// The argument should be encoded according to generate_rtlbi_param(). +// Follows the same ASID matching behavior as flush_mmu_tlb_allentries() +static inline void +flush_core_tlb_allrange_async(uint64_t val) +{ + asm volatile ("tlbi rvaae1, %0" : : "r"(val)); +} + +static inline void +flush_core_tlb_allrange(uint64_t val) +{ + flush_core_tlb_allrange_async(val); + sync_tlb_flush(); +} + #endif // __ARM_RANGE_TLBI__