/*
- * Copyright (c) 2011-2019 Apple Inc. All rights reserved.
+ * Copyright (c) 2011-2020 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <libkern/img4/interface.h>
#include <libkern/section_keywords.h>
+#include <sys/errno.h>
#include <machine/atomic.h>
#include <machine/thread.h>
#if CONFIG_PGTRACE_NONKEXT
#include <arm64/pgtrace_decoder.h>
#endif // CONFIG_PGTRACE_NONKEXT
-#endif
+#endif // CONFIG_PGTRACE
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
+#include <arm64/amcc_rorgn.h>
+#endif // defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
#endif
#include <pexpert/device_tree.h>
#include <ptrauth.h>
#endif
+#ifdef CONFIG_XNUPOST
+#include <tests/xnupost.h>
+#endif
+
+
+#if HIBERNATION
+#include <IOKit/IOHibernatePrivate.h>
+#endif /* HIBERNATION */
+
#define PMAP_TT_L0_LEVEL 0x0
#define PMAP_TT_L1_LEVEL 0x1
#define PMAP_TT_L2_LEVEL 0x2
#define PMAP_TT_L3_LEVEL 0x3
-#if (__ARM_VMSA__ == 7)
-#define PMAP_TT_MAX_LEVEL PMAP_TT_L2_LEVEL
+
+#ifdef __ARM64_PMAP_SUBPAGE_L1__
+#if (__ARM_VMSA__ <= 7)
+#error This is not supported for old-style page tables
+#endif
+#define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
+#else
+#if (__ARM_VMSA__ <= 7)
+#define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES * 2)
#else
-#define PMAP_TT_MAX_LEVEL PMAP_TT_L3_LEVEL
+#define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
+#endif
#endif
-#define PMAP_TT_LEAF_LEVEL PMAP_TT_MAX_LEVEL
-#define PMAP_TT_TWIG_LEVEL (PMAP_TT_MAX_LEVEL - 1)
+
+extern u_int32_t random(void); /* from <libkern/libkern.h> */
static bool alloc_asid(pmap_t pmap);
static void free_asid(pmap_t pmap);
-static void flush_mmu_tlb_region_asid_async(vm_offset_t va, unsigned length, pmap_t pmap);
-static void flush_mmu_tlb_tte_asid_async(vm_offset_t va, pmap_t pmap);
+static void flush_mmu_tlb_region_asid_async(vm_offset_t va, size_t length, pmap_t pmap);
static void flush_mmu_tlb_full_asid_async(pmap_t pmap);
static pt_entry_t wimg_to_pte(unsigned int wimg);
struct page_table_ops {
bool (*alloc_id)(pmap_t pmap);
void (*free_id)(pmap_t pmap);
- void (*flush_tlb_region_async)(vm_offset_t va, unsigned length, pmap_t pmap);
- void (*flush_tlb_tte_async)(vm_offset_t va, pmap_t pmap);
+ void (*flush_tlb_region_async)(vm_offset_t va, size_t length, pmap_t pmap);
void (*flush_tlb_async)(pmap_t pmap);
pt_entry_t (*wimg_to_pte)(unsigned int wimg);
};
.alloc_id = alloc_asid,
.free_id = free_asid,
.flush_tlb_region_async = flush_mmu_tlb_region_asid_async,
- .flush_tlb_tte_async = flush_mmu_tlb_tte_asid_async,
.flush_tlb_async = flush_mmu_tlb_full_asid_async,
.wimg_to_pte = wimg_to_pte,
};
const uintptr_t ap_xn;
const uintptr_t ap_x;
const unsigned int pta_root_level;
+ const unsigned int pta_sharedpage_level;
const unsigned int pta_max_level;
+#if __ARM_MIXED_PAGE_SIZE__
+ const uint64_t pta_tcr_value;
+#endif /* __ARM_MIXED_PAGE_SIZE__ */
+ const uint64_t pta_page_size;
+ const uint64_t pta_page_shift;
};
const struct page_table_attr pmap_pt_attr_4k = {
.pta_level_info = pmap_table_level_info_4k,
- .pta_root_level = PMAP_TT_L1_LEVEL,
+ .pta_root_level = (T0SZ_BOOT - 16) / 9,
+#if __ARM_MIXED_PAGE_SIZE__
+ .pta_sharedpage_level = PMAP_TT_L2_LEVEL,
+#else /* __ARM_MIXED_PAGE_SIZE__ */
+#if __ARM_16K_PG__
+ .pta_sharedpage_level = PMAP_TT_L2_LEVEL,
+#else /* __ARM_16K_PG__ */
+ .pta_sharedpage_level = PMAP_TT_L1_LEVEL,
+#endif /* __ARM_16K_PG__ */
+#endif /* __ARM_MIXED_PAGE_SIZE__ */
.pta_max_level = PMAP_TT_L3_LEVEL,
.pta_ops = &native_pt_ops,
.ap_ro = ARM_PTE_AP(AP_RORO),
.ap_rwna = ARM_PTE_AP(AP_RWNA),
.ap_xn = ARM_PTE_PNX | ARM_PTE_NX,
.ap_x = ARM_PTE_PNX,
+#if __ARM_MIXED_PAGE_SIZE__
+ .pta_tcr_value = TCR_EL1_4KB,
+#endif /* __ARM_MIXED_PAGE_SIZE__ */
+ .pta_page_size = 4096,
+ .pta_page_shift = 12,
};
const struct page_table_attr pmap_pt_attr_16k = {
.pta_level_info = pmap_table_level_info_16k,
.pta_root_level = PMAP_TT_L1_LEVEL,
+ .pta_sharedpage_level = PMAP_TT_L2_LEVEL,
.pta_max_level = PMAP_TT_L3_LEVEL,
.pta_ops = &native_pt_ops,
.ap_ro = ARM_PTE_AP(AP_RORO),
.ap_rwna = ARM_PTE_AP(AP_RWNA),
.ap_xn = ARM_PTE_PNX | ARM_PTE_NX,
.ap_x = ARM_PTE_PNX,
+#if __ARM_MIXED_PAGE_SIZE__
+ .pta_tcr_value = TCR_EL1_16KB,
+#endif /* __ARM_MIXED_PAGE_SIZE__ */
+ .pta_page_size = 16384,
+ .pta_page_shift = 14,
};
#if __ARM_16K_PG__
#if (__ARM_VMSA__ > 7)
static inline uint64_t
+pt_attr_page_size(const pt_attr_t * const pt_attr)
+{
+ return pt_attr->pta_page_size;
+}
+
+__unused static inline uint64_t
pt_attr_ln_size(const pt_attr_t * const pt_attr, unsigned int level)
{
return pt_attr->pta_level_info[level].size;
return pt_attr->pta_level_info[level].shift;
}
-__unused static inline uint64_t
+static inline uint64_t
pt_attr_ln_offmask(const pt_attr_t * const pt_attr, unsigned int level)
{
return pt_attr->pta_level_info[level].offmask;
}
+__unused static inline uint64_t
+pt_attr_ln_pt_offmask(const pt_attr_t * const pt_attr, unsigned int level)
+{
+ return pt_attr_ln_offmask(pt_attr, level);
+}
+
+__unused static inline uint64_t
+pt_attr_ln_index_mask(const pt_attr_t * const pt_attr, unsigned int level)
+{
+ return pt_attr->pta_level_info[level].index_mask;
+}
+
static inline unsigned int
pt_attr_twig_level(const pt_attr_t * const pt_attr)
{
return pt_attr->pta_root_level;
}
+/**
+ * This is the level at which to copy a pt_entry from the sharedpage_pmap into
+ * the user pmap. Typically L1 for 4K pages, and L2 for 16K pages. In this way,
+ * the sharedpage's L2/L3 page tables are reused in every 4k task, whereas only
+ * the L3 page table is reused in 16K tasks.
+ */
+static inline unsigned int
+pt_attr_sharedpage_level(const pt_attr_t * const pt_attr)
+{
+ return pt_attr->pta_sharedpage_level;
+}
+
static __unused inline uint64_t
pt_attr_leaf_size(const pt_attr_t * const pt_attr)
{
}
#else /* (__ARM_VMSA__ > 7) */
+static inline uint64_t
+pt_attr_page_size(__unused const pt_attr_t * const pt_attr)
+{
+ return PAGE_SIZE;
+}
+
+__unused static inline unsigned int
+pt_attr_root_level(__unused const pt_attr_t * const pt_attr)
+{
+ return PMAP_TT_L1_LEVEL;
+}
+
+__unused static inline unsigned int
+pt_attr_sharedpage_level(__unused const pt_attr_t * const pt_attr)
+{
+ return PMAP_TT_L1_LEVEL;
+}
static inline unsigned int
pt_attr_twig_level(__unused const pt_attr_t * const pt_attr)
return ARM_PTE_NX;
}
+static inline uintptr_t
+pt_attr_leaf_x(__unused const pt_attr_t * const pt_attr)
+{
+ return ARM_PTE_PNX;
+}
+
+__unused static inline uintptr_t
+pt_attr_ln_offmask(__unused const pt_attr_t * const pt_attr, unsigned int level)
+{
+ if (level == PMAP_TT_L1_LEVEL) {
+ return ARM_TT_L1_OFFMASK;
+ } else if (level == PMAP_TT_L2_LEVEL) {
+ return ARM_TT_L2_OFFMASK;
+ }
+
+ return 0;
+}
+
+static inline uintptr_t
+pt_attr_ln_pt_offmask(__unused const pt_attr_t * const pt_attr, unsigned int level)
+{
+ if (level == PMAP_TT_L1_LEVEL) {
+ return ARM_TT_L1_PT_OFFMASK;
+ } else if (level == PMAP_TT_L2_LEVEL) {
+ return ARM_TT_L2_OFFMASK;
+ }
+
+ return 0;
+}
+
#endif /* (__ARM_VMSA__ > 7) */
+static inline unsigned int
+pt_attr_leaf_level(const pt_attr_t * const pt_attr)
+{
+ return pt_attr_twig_level(pt_attr) + 1;
+}
+
+
static inline void
pmap_sync_tlb(bool strong __unused)
{
#endif /* DEVELOPMENT || DEBUG */
+#ifdef PLATFORM_BridgeOS
+static struct pmap_legacy_trust_cache *pmap_legacy_trust_caches MARK_AS_PMAP_DATA = NULL;
+#endif
+static struct pmap_image4_trust_cache *pmap_image4_trust_caches MARK_AS_PMAP_DATA = NULL;
+
+MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmap_loaded_trust_caches_lock, 0);
+
+
+/*
+ * Represents a tlb range that will be flushed before exiting
+ * the ppl.
+ * Used by phys_attribute_clear_range to defer flushing pages in
+ * this range until the end of the operation.
+ */
+typedef struct pmap_tlb_flush_range {
+ pmap_t ptfr_pmap;
+ vm_map_address_t ptfr_start;
+ vm_map_address_t ptfr_end;
+ bool ptfr_flush_needed;
+} pmap_tlb_flush_range_t;
+
+#if XNU_MONITOR
+/*
+ * PPL External References.
+ */
+extern vm_offset_t segPPLDATAB;
+extern unsigned long segSizePPLDATA;
+extern vm_offset_t segPPLTEXTB;
+extern unsigned long segSizePPLTEXT;
+extern vm_offset_t segPPLDATACONSTB;
+extern unsigned long segSizePPLDATACONST;
+
+
+/*
+ * PPL Global Variables
+ */
+
+#if (DEVELOPMENT || DEBUG) || CONFIG_CSR_FROM_DT
+/* Indicates if the PPL will enforce mapping policies; set by -unsafe_kernel_text */
+SECURITY_READ_ONLY_LATE(boolean_t) pmap_ppl_disable = FALSE;
+#else
+const boolean_t pmap_ppl_disable = FALSE;
+#endif
+
+/* Indicates if the PPL has started applying APRR. */
+boolean_t pmap_ppl_locked_down MARK_AS_PMAP_DATA = FALSE;
+
+/*
+ * The PPL cannot invoke the kernel in order to allocate memory, so we must
+ * maintain a list of free pages that the PPL owns. The kernel can give the PPL
+ * additional pages.
+ */
+MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmap_ppl_free_page_lock, 0);
+void ** pmap_ppl_free_page_list MARK_AS_PMAP_DATA = NULL;
+uint64_t pmap_ppl_free_page_count MARK_AS_PMAP_DATA = 0;
+uint64_t pmap_ppl_pages_returned_to_kernel_count_total = 0;
+
+struct pmap_cpu_data_array_entry pmap_cpu_data_array[MAX_CPUS] MARK_AS_PMAP_DATA = {0};
+
+extern void *pmap_stacks_start;
+extern void *pmap_stacks_end;
+SECURITY_READ_ONLY_LATE(pmap_paddr_t) pmap_stacks_start_pa = 0;
+SECURITY_READ_ONLY_LATE(pmap_paddr_t) pmap_stacks_end_pa = 0;
+SECURITY_READ_ONLY_LATE(pmap_paddr_t) ppl_cpu_save_area_start = 0;
+SECURITY_READ_ONLY_LATE(pmap_paddr_t) ppl_cpu_save_area_end = 0;
+
+/* Allocation data/locks for pmap structures. */
+#if XNU_MONITOR
+MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmap_free_list_lock, 0);
+#endif
+SECURITY_READ_ONLY_LATE(unsigned long) pmap_array_count = 0;
+SECURITY_READ_ONLY_LATE(void *) pmap_array_begin = NULL;
+SECURITY_READ_ONLY_LATE(void *) pmap_array_end = NULL;
+SECURITY_READ_ONLY_LATE(pmap_t) pmap_array = NULL;
+pmap_t pmap_free_list MARK_AS_PMAP_DATA = NULL;
+
+/* Allocation data/locks/structs for task ledger structures. */
+#define PMAP_LEDGER_DATA_BYTES \
+ (((sizeof(task_ledgers) / sizeof(int)) * sizeof(struct ledger_entry)) + sizeof(struct ledger))
+
+/*
+ * Maximum number of ledgers allowed are maximum number of tasks
+ * allowed on system plus some more i.e. ~10% of total tasks = 200.
+ */
+#define MAX_PMAP_LEDGERS (pmap_max_asids + 200)
+#define PMAP_ARRAY_SIZE (pmap_max_asids)
+
+typedef struct pmap_ledger_data {
+ char pld_data[PMAP_LEDGER_DATA_BYTES];
+} pmap_ledger_data_t;
+
+typedef struct pmap_ledger {
+ union {
+ struct pmap_ledger_data ple_data;
+ struct pmap_ledger * next;
+ };
+
+ struct pmap_ledger ** back_ptr;
+} pmap_ledger_t;
+
+SECURITY_READ_ONLY_LATE(bool) pmap_ledger_alloc_initialized = false;
+MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmap_ledger_lock, 0);
+SECURITY_READ_ONLY_LATE(void *) pmap_ledger_refcnt_begin = NULL;
+SECURITY_READ_ONLY_LATE(void *) pmap_ledger_refcnt_end = NULL;
+SECURITY_READ_ONLY_LATE(os_refcnt_t *) pmap_ledger_refcnt = NULL;
+SECURITY_READ_ONLY_LATE(void *) pmap_ledger_ptr_array_begin = NULL;
+SECURITY_READ_ONLY_LATE(void *) pmap_ledger_ptr_array_end = NULL;
+SECURITY_READ_ONLY_LATE(pmap_ledger_t * *) pmap_ledger_ptr_array = NULL;
+uint64_t pmap_ledger_ptr_array_free_index MARK_AS_PMAP_DATA = 0;
+pmap_ledger_t * pmap_ledger_free_list MARK_AS_PMAP_DATA = NULL;
+
+#define pmap_ledger_debit(p, e, a) ledger_debit_nocheck((p)->ledger, e, a)
+#define pmap_ledger_credit(p, e, a) ledger_credit_nocheck((p)->ledger, e, a)
+
+static inline void
+pmap_check_ledger_fields(ledger_t ledger)
+{
+ if (ledger == NULL) {
+ return;
+ }
+
+ thread_t cur_thread = current_thread();
+ ledger_check_new_balance(cur_thread, ledger, task_ledgers.alternate_accounting);
+ ledger_check_new_balance(cur_thread, ledger, task_ledgers.alternate_accounting_compressed);
+ ledger_check_new_balance(cur_thread, ledger, task_ledgers.internal);
+ ledger_check_new_balance(cur_thread, ledger, task_ledgers.internal_compressed);
+ ledger_check_new_balance(cur_thread, ledger, task_ledgers.page_table);
+ ledger_check_new_balance(cur_thread, ledger, task_ledgers.phys_footprint);
+ ledger_check_new_balance(cur_thread, ledger, task_ledgers.phys_mem);
+ ledger_check_new_balance(cur_thread, ledger, task_ledgers.tkm_private);
+ ledger_check_new_balance(cur_thread, ledger, task_ledgers.wired_mem);
+}
+
+#define pmap_ledger_check_balance(p) pmap_check_ledger_fields((p)->ledger)
+
+#else /* XNU_MONITOR */
#define pmap_ledger_debit(p, e, a) ledger_debit((p)->ledger, e, a)
#define pmap_ledger_credit(p, e, a) ledger_credit((p)->ledger, e, a)
-
-#if DEVELOPMENT || DEBUG
-int panic_on_unsigned_execute = 0;
-#endif /* DEVELOPMENT || DEBUG */
+#endif /* !XNU_MONITOR */
/* Virtual memory region for early allocation */
#define VREGION1_START ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VREGION1_HIGH_WINDOW)
#define VREGION1_SIZE (trunc_page(VM_MAX_KERNEL_ADDRESS - (VREGION1_START)))
+extern uint8_t bootstrap_pagetables[];
+
extern unsigned int not_in_kdp;
extern vm_offset_t first_avail;
extern vm_offset_t virtual_space_end; /* End of kernel address space */
extern vm_offset_t static_memory_end;
+extern const vm_map_address_t physmap_base;
+extern const vm_map_address_t physmap_end;
+
extern int maxproc, hard_maxproc;
+vm_address_t MARK_AS_PMAP_DATA image4_slab = 0;
+
#if (__ARM_VMSA__ > 7)
/* The number of address bits one TTBR can cover. */
#define PGTABLE_ADDR_BITS (64ULL - T0SZ_BOOT)
struct pmap kernel_pmap_store MARK_AS_PMAP_DATA;
SECURITY_READ_ONLY_LATE(pmap_t) kernel_pmap = &kernel_pmap_store;
-struct vm_object pmap_object_store __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT))); /* store pt pages */
-vm_object_t pmap_object = &pmap_object_store;
+struct vm_object pmap_object_store VM_PAGE_PACKED_ALIGNED; /* store pt pages */
+SECURITY_READ_ONLY_LATE(vm_object_t) pmap_object = &pmap_object_store;
-static struct zone *pmap_zone; /* zone of pmap structures */
+static SECURITY_READ_ONLY_LATE(zone_t) pmap_zone; /* zone of pmap structures */
-decl_simple_lock_data(, pmaps_lock MARK_AS_PMAP_DATA);
-decl_simple_lock_data(, tt1_lock MARK_AS_PMAP_DATA);
+MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmaps_lock, 0);
+MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(tt1_lock, 0);
unsigned int pmap_stamp MARK_AS_PMAP_DATA;
queue_head_t map_pmap_list MARK_AS_PMAP_DATA;
-decl_simple_lock_data(, pt_pages_lock MARK_AS_PMAP_DATA);
+MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pt_pages_lock, 0);
queue_head_t pt_page_list MARK_AS_PMAP_DATA; /* pt page ptd entries list */
-decl_simple_lock_data(, pmap_pages_lock MARK_AS_PMAP_DATA);
+MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmap_pages_lock, 0);
typedef struct page_free_entry {
struct page_free_entry *next;
int allow_data_exec = 0; /* No apps may execute data */
int allow_stack_exec = 0; /* No apps may execute from the stack */
unsigned long pmap_asid_flushes MARK_AS_PMAP_DATA = 0;
+unsigned long pmap_asid_hits MARK_AS_PMAP_DATA = 0;
+unsigned long pmap_asid_misses MARK_AS_PMAP_DATA = 0;
#else /* DEVELOPMENT || DEBUG */
const int nx_enabled = 1; /* enable no-execute protection */
const int allow_data_exec = 0; /* No apps may execute data */
const int allow_stack_exec = 0; /* No apps may execute from the stack */
#endif /* DEVELOPMENT || DEBUG */
-/*
- * pv_entry_t - structure to track the active mappings for a given page
- */
-typedef struct pv_entry {
- struct pv_entry *pve_next; /* next alias */
- pt_entry_t *pve_ptep; /* page table entry */
-}
-#if __arm__ && (__BIGGEST_ALIGNMENT__ > 4)
-/* For the newer ARMv7k ABI where 64-bit types are 64-bit aligned, but pointers
- * are 32-bit:
- * Since pt_desc is 64-bit aligned and we cast often from pv_entry to
- * pt_desc.
+/**
+ * This variable is set true during hibernation entry to protect pmap data structures
+ * during image copying, and reset false on hibernation exit.
*/
-__attribute__ ((aligned(8))) pv_entry_t;
+bool hib_entry_pmap_lockdown MARK_AS_PMAP_DATA = false;
+
+/* Macro used to ensure that pmap data structures aren't modified during hibernation image copying. */
+#if HIBERNATION
+#define ASSERT_NOT_HIBERNATING() (assertf(!hib_entry_pmap_lockdown, \
+ "Attempted to modify PMAP data structures after hibernation image copying has begun."))
#else
-pv_entry_t;
-#endif
+#define ASSERT_NOT_HIBERNATING()
+#endif /* HIBERNATION */
#define PV_ENTRY_NULL ((pv_entry_t *) 0)
SECURITY_READ_ONLY_LATE(pv_entry_t * *) pv_head_table; /* array of pv entry pointers */
-pv_entry_t *pv_free_list MARK_AS_PMAP_DATA;
-pv_entry_t *pv_kern_free_list MARK_AS_PMAP_DATA;
-decl_simple_lock_data(, pv_free_list_lock MARK_AS_PMAP_DATA);
-decl_simple_lock_data(, pv_kern_free_list_lock MARK_AS_PMAP_DATA);
+pv_free_list_t pv_free MARK_AS_PMAP_DATA = {0};
+pv_free_list_t pv_kern_free MARK_AS_PMAP_DATA = {0};
+MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pv_free_list_lock, 0);
+MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pv_kern_free_list_lock, 0);
-decl_simple_lock_data(, phys_backup_lock);
+SIMPLE_LOCK_DECLARE(phys_backup_lock, 0);
/*
* pt_desc - structure to keep info on page assigned to page tables
*/
#if (__ARM_VMSA__ == 7)
-#define PT_INDEX_MAX 1
-#else
-#if (ARM_PGSHIFT == 14)
-#define PT_INDEX_MAX 1
+#define PT_INDEX_MAX 1
+#else /* (__ARM_VMSA__ != 7) */
+
+#if __ARM_MIXED_PAGE_SIZE__
+#define PT_INDEX_MAX (ARM_PGBYTES / 4096)
+#elif (ARM_PGSHIFT == 14)
+#define PT_INDEX_MAX 1
+#elif (ARM_PGSHIFT == 12)
+#define PT_INDEX_MAX 4
#else
-#define PT_INDEX_MAX 4
-#endif
-#endif
+#error Unsupported ARM_PGSHIFT
+#endif /* (ARM_PGSHIFT != 14) */
+
+#endif /* (__ARM_VMSA__ != 7) */
#define PT_DESC_REFCOUNT 0x4000U
#define PT_DESC_IOMMU_REFCOUNT 0x8000U
+typedef struct {
+ /*
+ * For non-leaf pagetables, should always be PT_DESC_REFCOUNT
+ * For leaf pagetables, should reflect the number of non-empty PTEs
+ * For IOMMU pages, should always be PT_DESC_IOMMU_REFCOUNT
+ */
+ unsigned short refcnt;
+ /*
+ * For non-leaf pagetables, should be 0
+ * For leaf pagetables, should reflect the number of wired entries
+ * For IOMMU pages, may optionally reflect a driver-defined refcount (IOMMU operations are implicitly wired)
+ */
+ unsigned short wiredcnt;
+ vm_offset_t va;
+} ptd_info_t;
+
typedef struct pt_desc {
queue_chain_t pt_page;
union {
struct pmap *pmap;
};
- /*
- * Locate this struct towards the end of the pt_desc; our long term
- * goal is to make this a VLA to avoid wasting memory if we don't need
- * multiple entries.
- */
- struct {
- /*
- * For non-leaf pagetables, should always be PT_DESC_REFCOUNT
- * For leaf pagetables, should reflect the number of non-empty PTEs
- * For IOMMU pages, should always be PT_DESC_IOMMU_REFCOUNT
- */
- unsigned short refcnt;
- /*
- * For non-leaf pagetables, should be 0
- * For leaf pagetables, should reflect the number of wired entries
- * For IOMMU pages, may optionally reflect a driver-defined refcount (IOMMU operations are implicitly wired)
- */
- unsigned short wiredcnt;
- vm_offset_t va;
- } ptd_info[PT_INDEX_MAX];
+ ptd_info_t ptd_info[PT_INDEX_MAX];
} pt_desc_t;
#define PP_ATTR_REFFAULT 0x1000
#define PP_ATTR_MODFAULT 0x2000
+#if XNU_MONITOR
+/*
+ * Denotes that a page is owned by the PPL. This is modified/checked with the
+ * PVH lock held, to avoid ownership related races. This does not need to be a
+ * PP_ATTR bit (as we have the lock), but for now this is a convenient place to
+ * put the bit.
+ */
+#define PP_ATTR_MONITOR 0x4000
+
+/*
+ * Denotes that a page *cannot* be owned by the PPL. This is required in order
+ * to temporarily 'pin' kernel pages that are used to store PPL output parameters.
+ * Otherwise a malicious or buggy caller could pass PPL-owned memory for these
+ * parameters and in so doing stage a write gadget against the PPL.
+ */
+#define PP_ATTR_NO_MONITOR 0x8000
+
+/*
+ * All of the bits owned by the PPL; kernel requests to set or clear these bits
+ * are illegal.
+ */
+#define PP_ATTR_PPL_OWNED_BITS (PP_ATTR_MONITOR | PP_ATTR_NO_MONITOR)
+#endif
-SECURITY_READ_ONLY_LATE(pp_attr_t*) pp_attr_table;
+SECURITY_READ_ONLY_LATE(volatile pp_attr_t*) pp_attr_table;
+/**
+ * The layout of this structure needs to map 1-to-1 with the pmap-io-range device
+ * tree nodes. Astris (through the LowGlobals) also depends on the consistency
+ * of this structure.
+ */
typedef struct pmap_io_range {
uint64_t addr;
uint64_t len;
#define PMAP_IO_RANGE_STRONG_SYNC (1UL << 31) // Strong DSB required for pages in this range
+ #define PMAP_IO_RANGE_CARVEOUT (1UL << 30) // Corresponds to memory carved out by bootloader
+ #define PMAP_IO_RANGE_NEEDS_HIBERNATING (1UL << 29) // Pages in this range need to be included in the hibernation image
uint32_t wimg; // lower 16 bits treated as pp_attr_t, upper 16 bits contain additional mapping flags
uint32_t signature; // 4CC
} __attribute__((packed)) pmap_io_range_t;
-SECURITY_READ_ONLY_LATE(pmap_io_range_t*) io_attr_table;
+SECURITY_READ_ONLY_LATE(pmap_io_range_t*) io_attr_table = (pmap_io_range_t*)0;
SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_first_phys = (pmap_paddr_t) 0;
SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_last_phys = (pmap_paddr_t) 0;
SECURITY_READ_ONLY_LATE(boolean_t) pmap_initialized = FALSE; /* Has pmap_init completed? */
-SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_min;
-SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_max;
-
SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm_pmap_max_offset_default = 0x0;
#if defined(__arm64__)
+# ifdef XNU_TARGET_OS_OSX
+SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm64_pmap_max_offset_default = MACH_VM_MAX_ADDRESS;
+# else
SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm64_pmap_max_offset_default = 0x0;
-#endif
+# endif
+#endif /* __arm64__ */
-#define PMAP_MAX_SW_ASID ((MAX_ASID + MAX_HW_ASID - 1) / MAX_HW_ASID)
-_Static_assert(PMAP_MAX_SW_ASID <= (UINT8_MAX + 1),
- "VASID bits can't be represented by an 8-bit integer");
+#if PMAP_PANIC_DEV_WIMG_ON_MANAGED && (DEVELOPMENT || DEBUG)
+SECURITY_READ_ONLY_LATE(boolean_t) pmap_panic_dev_wimg_on_managed = TRUE;
+#else
+SECURITY_READ_ONLY_LATE(boolean_t) pmap_panic_dev_wimg_on_managed = FALSE;
+#endif
-decl_simple_lock_data(, asid_lock MARK_AS_PMAP_DATA);
-static bitmap_t asid_bitmap[BITMAP_LEN(MAX_ASID)] MARK_AS_PMAP_DATA;
+MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(asid_lock, 0);
+SECURITY_READ_ONLY_LATE(static uint32_t) pmap_max_asids = 0;
+SECURITY_READ_ONLY_LATE(int) pmap_asid_plru = 1;
+SECURITY_READ_ONLY_LATE(uint16_t) asid_chunk_size = 0;
+SECURITY_READ_ONLY_LATE(static bitmap_t*) asid_bitmap;
+static bitmap_t asid_plru_bitmap[BITMAP_LEN(MAX_HW_ASIDS)] MARK_AS_PMAP_DATA;
+static uint64_t asid_plru_generation[BITMAP_LEN(MAX_HW_ASIDS)] MARK_AS_PMAP_DATA = {0};
+static uint64_t asid_plru_gencount MARK_AS_PMAP_DATA = 0;
-#if (__ARM_VMSA__ > 7)
-SECURITY_READ_ONLY_LATE(pmap_t) sharedpage_pmap;
+#if (__ARM_VMSA__ > 7)
+#if __ARM_MIXED_PAGE_SIZE__
+SECURITY_READ_ONLY_LATE(pmap_t) sharedpage_pmap_4k;
+#endif
+SECURITY_READ_ONLY_LATE(pmap_t) sharedpage_pmap_default;
#endif
+#if XNU_MONITOR
+/*
+ * We define our target as 8 pages; enough for 2 page table pages, a PTD page,
+ * and a PV page; in essence, twice as many pages as may be necessary to satisfy
+ * a single pmap_enter request.
+ */
+#define PMAP_MIN_FREE_PPL_PAGES 8
+#endif
#define pa_index(pa) \
(atop((pa) - vm_first_phys))
#define pte_is_wired(pte) \
(((pte) & ARM_PTE_WIRED_MASK) == ARM_PTE_WIRED)
-#define pte_set_wired(ptep, wired) \
- do { \
- SInt16 *ptd_wiredcnt_ptr; \
- ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_ptd(ptep)->ptd_info[ARM_PT_DESC_INDEX(ptep)].wiredcnt); \
- if (wired) { \
- *ptep |= ARM_PTE_WIRED; \
- OSAddAtomic16(1, ptd_wiredcnt_ptr); \
- } else { \
- *ptep &= ~ARM_PTE_WIRED; \
- OSAddAtomic16(-1, ptd_wiredcnt_ptr); \
- } \
- } while(0)
-
#define pte_was_writeable(pte) \
(((pte) & ARM_PTE_WRITEABLE) == ARM_PTE_WRITEABLE)
#define ARM_TT_PT_INDEX_MASK ARM_PGMASK
#if (__ARM_VMSA__ == 7)
-#define ARM_PT_DESC_INDEX_MASK 0x00000
-#define ARM_PT_DESC_INDEX_SHIFT 0
/*
* Shift value used for reconstructing the virtual address for a PTE.
*/
#define ARM_TT_PT_ADDR_SHIFT (10U)
-#define ptep_get_va(ptep) \
- ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->ptd_info[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
-
#define ptep_get_pmap(ptep) \
((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
#else
#if (ARM_PGSHIFT == 12)
-#define ARM_PT_DESC_INDEX_MASK ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0x00000ULL : 0x03000ULL)
-#define ARM_PT_DESC_INDEX_SHIFT ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0 : 12)
/*
* Shift value used for reconstructing the virtual address for a PTE.
*/
#define ARM_TT_PT_ADDR_SHIFT (9ULL)
#else
-#define ARM_PT_DESC_INDEX_MASK (0x00000)
-#define ARM_PT_DESC_INDEX_SHIFT (0)
/*
* Shift value used for reconstructing the virtual address for a PTE.
*/
#define ARM_TT_PT_ADDR_SHIFT (11ULL)
#endif
-
-#define ARM_PT_DESC_INDEX(ptep) \
- (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
-
-#define ptep_get_va(ptep) \
- ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->ptd_info[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
-
#define ptep_get_pmap(ptep) \
((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
#endif
-#define ARM_PT_DESC_INDEX(ptep) \
- (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
-
#define ptep_get_ptd(ptep) \
((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)(ptep)))))))
#define PVH_FLAG_LOCK (1ULL << PVH_LOCK_BIT)
#define PVH_FLAG_EXEC (1ULL << 60)
#define PVH_FLAG_LOCKDOWN (1ULL << 59)
-#define PVH_HIGH_FLAGS (PVH_FLAG_CPU | PVH_FLAG_LOCK | PVH_FLAG_EXEC | PVH_FLAG_LOCKDOWN)
+#define PVH_FLAG_HASHED (1ULL << 58) /* Used to mark that a page has been hashed into the hibernation image. */
+#define PVH_HIGH_FLAGS (PVH_FLAG_CPU | PVH_FLAG_LOCK | PVH_FLAG_EXEC | PVH_FLAG_LOCKDOWN | PVH_FLAG_HASHED)
#else /* !__arm64__ */
/* PPATTR Define Macros */
-#define ppattr_set_bits(h, b) \
- do { \
- while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) | (b), (pp_attr_t *)(h))); \
- } while (0)
-
-#define ppattr_clear_bits(h, b) \
- do { \
- while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) & ~(b), (pp_attr_t *)(h))); \
- } while (0)
+#define ppattr_set_bits(h, b) os_atomic_or((h), (pp_attr_t)(b), acq_rel)
+#define ppattr_clear_bits(h, b) os_atomic_andnot((h), (pp_attr_t)(b), acq_rel)
#define ppattr_test_bits(h, b) \
- ((*(pp_attr_t *)(h) & (b)) == (b))
+ ((*(h) & (pp_attr_t)(b)) == (pp_attr_t)(b))
#define pa_set_bits(x, b) \
do { \
#define pa_clear_reference(x) \
pa_clear_bits(x, PP_ATTR_REFERENCED)
+#if XNU_MONITOR
+#define pa_set_monitor(x) \
+ pa_set_bits((x), PP_ATTR_MONITOR)
+
+#define pa_clear_monitor(x) \
+ pa_clear_bits((x), PP_ATTR_MONITOR)
+
+#define pa_test_monitor(x) \
+ pa_test_bits((x), PP_ATTR_MONITOR)
+
+#define pa_set_no_monitor(x) \
+ pa_set_bits((x), PP_ATTR_NO_MONITOR)
+
+#define pa_clear_no_monitor(x) \
+ pa_clear_bits((x), PP_ATTR_NO_MONITOR)
+
+#define pa_test_no_monitor(x) \
+ pa_test_bits((x), PP_ATTR_NO_MONITOR)
+#endif
#define IS_INTERNAL_PAGE(pai) \
ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
#endif
+
+static inline ptd_info_t *
+ptd_get_info(pt_desc_t *ptd, const tt_entry_t *ttep)
+{
+ assert(ptd->ptd_info[0].refcnt != PT_DESC_IOMMU_REFCOUNT);
+#if PT_INDEX_MAX == 1
+ #pragma unused(ttep)
+ return &ptd->ptd_info[0];
+#else
+ uint64_t pmap_page_shift = pt_attr_leaf_shift(pmap_get_pt_attr(ptd->pmap));
+ vm_offset_t ttep_page = (vm_offset_t)ttep >> pmap_page_shift;
+ unsigned int ttep_index = ttep_page & ((1U << (PAGE_SHIFT - pmap_page_shift)) - 1);
+ assert(ttep_index < PT_INDEX_MAX);
+ return &ptd->ptd_info[ttep_index];
+#endif
+}
+
+static inline ptd_info_t *
+ptep_get_info(const pt_entry_t *ptep)
+{
+ return ptd_get_info(ptep_get_ptd(ptep), ptep);
+}
+
+static inline vm_map_address_t
+ptep_get_va(const pt_entry_t *ptep)
+{
+ pv_entry_t **pv_h;
+ const pt_attr_t * const pt_attr = pmap_get_pt_attr(ptep_get_pmap(ptep));
+ pv_h = pai_to_pvh(pa_index(ml_static_vtop(((vm_offset_t)ptep))));;
+
+ assert(pvh_test_type(pv_h, PVH_TYPE_PTDP));
+ pt_desc_t *ptdp = (pt_desc_t *)(pvh_list(pv_h));
+
+ vm_map_address_t va = ptd_get_info(ptdp, ptep)->va;
+ vm_offset_t ptep_index = ((vm_offset_t)ptep & pt_attr_leaf_offmask(pt_attr)) / sizeof(*ptep);
+
+ va += (ptep_index << pt_attr_leaf_shift(pt_attr));
+
+ return va;
+}
+
+static inline void
+pte_set_wired(pmap_t pmap, pt_entry_t *ptep, boolean_t wired)
+{
+ if (wired) {
+ *ptep |= ARM_PTE_WIRED;
+ } else {
+ *ptep &= ~ARM_PTE_WIRED;
+ }
+ /*
+ * Do not track wired page count for kernel pagetable pages. Kernel mappings are
+ * not guaranteed to have PTDs in the first place, and kernel pagetable pages are
+ * never reclaimed.
+ */
+ if (pmap == kernel_pmap) {
+ return;
+ }
+ unsigned short *ptd_wiredcnt_ptr;
+ ptd_wiredcnt_ptr = &(ptep_get_info(ptep)->wiredcnt);
+ if (wired) {
+ os_atomic_add(ptd_wiredcnt_ptr, (unsigned short)1, relaxed);
+ } else {
+ unsigned short prev_wired = os_atomic_sub_orig(ptd_wiredcnt_ptr, (unsigned short)1, relaxed);
+ if (__improbable(prev_wired == 0)) {
+ panic("pmap %p (pte %p): wired count underflow", pmap, ptep);
+ }
+ }
+}
+
/*
* Lock on pmap system
*/
-lck_grp_t pmap_lck_grp;
+lck_grp_t pmap_lck_grp MARK_AS_PMAP_DATA;
+
+static inline void
+pmap_lock_init(pmap_t pmap)
+{
+ lck_rw_init(&pmap->rwlock, &pmap_lck_grp, 0);
+ pmap->rwlock.lck_rw_can_sleep = FALSE;
+}
+
+static inline void
+pmap_lock_destroy(pmap_t pmap)
+{
+ lck_rw_destroy(&pmap->rwlock, &pmap_lck_grp);
+}
+
+static inline void
+pmap_lock(pmap_t pmap)
+{
+ #if !XNU_MONITOR
+ mp_disable_preemption();
+ #endif
+ lck_rw_lock_exclusive(&pmap->rwlock);
+}
+
+static inline void
+pmap_lock_ro(pmap_t pmap)
+{
+ #if !XNU_MONITOR
+ mp_disable_preemption();
+ #endif
+ lck_rw_lock_shared(&pmap->rwlock);
+}
-#define PMAP_LOCK_INIT(pmap) { \
- simple_lock_init(&(pmap)->lock, 0); \
- }
+static inline void
+pmap_unlock(pmap_t pmap)
+{
+ lck_rw_unlock_exclusive(&pmap->rwlock);
+ #if !XNU_MONITOR
+ mp_enable_preemption();
+ #endif
+}
-#define PMAP_LOCK(pmap) { \
- pmap_simple_lock(&(pmap)->lock); \
+static inline void
+pmap_unlock_ro(pmap_t pmap)
+{
+ lck_rw_unlock_shared(&pmap->rwlock);
+ #if !XNU_MONITOR
+ mp_enable_preemption();
+ #endif
}
-#define PMAP_UNLOCK(pmap) { \
- pmap_simple_unlock(&(pmap)->lock); \
+static inline bool
+pmap_try_lock(pmap_t pmap)
+{
+ bool ret;
+
+ #if !XNU_MONITOR
+ mp_disable_preemption();
+ #endif
+ ret = lck_rw_try_lock_exclusive(&pmap->rwlock);
+ if (!ret) {
+ #if !XNU_MONITOR
+ mp_enable_preemption();
+ #endif
+ }
+
+ return ret;
}
+//assert that ONLY READ lock is held
+__unused static inline void
+pmap_assert_locked_r(__unused pmap_t pmap)
+{
#if MACH_ASSERT
-#define PMAP_ASSERT_LOCKED(pmap) { \
- simple_lock_assert(&(pmap)->lock, LCK_ASSERT_OWNED); \
+ lck_rw_assert(&pmap->rwlock, LCK_RW_ASSERT_SHARED);
+#else
+ (void)pmap;
+#endif
}
+//assert that ONLY WRITE lock is held
+__unused static inline void
+pmap_assert_locked_w(__unused pmap_t pmap)
+{
+#if MACH_ASSERT
+ lck_rw_assert(&pmap->rwlock, LCK_RW_ASSERT_EXCLUSIVE);
#else
-#define PMAP_ASSERT_LOCKED(pmap)
+ (void)pmap;
+#endif
+}
+
+//assert that either READ or WRITE lock is held
+__unused static inline void
+pmap_assert_locked_any(__unused pmap_t pmap)
+{
+#if MACH_ASSERT
+ lck_rw_assert(&pmap->rwlock, LCK_RW_ASSERT_HELD);
#endif
+}
+
#if defined(__arm64__)
#define PVH_LOCK_WORD 1 /* Assumes little-endian */
} while (0)
#define PMAP_UPDATE_TLBS(pmap, s, e, strong) { \
- pmap_get_pt_ops(pmap)->flush_tlb_region_async(s, (unsigned)(e - s), pmap); \
+ pmap_get_pt_ops(pmap)->flush_tlb_region_async(s, (size_t)((e) - (s)), pmap); \
pmap_sync_tlb(strong); \
}
#define current_pmap() \
(vm_map_pmap(current_thread()->map))
+#if XNU_MONITOR
+/*
+ * PPL-related macros.
+ */
+#define ARRAY_ELEM_PTR_IS_VALID(_ptr_, _elem_size_, _array_begin_, _array_end_) \
+ (((_ptr_) >= (typeof(_ptr_))_array_begin_) && \
+ ((_ptr_) < (typeof(_ptr_))_array_end_) && \
+ !((((void *)(_ptr_)) - ((void *)_array_begin_)) % (_elem_size_)))
-#define VALIDATE_USER_PMAP(x)
-#define VALIDATE_PMAP(x)
-#define VALIDATE_LEDGER(x)
+#define PMAP_PTR_IS_VALID(x) ARRAY_ELEM_PTR_IS_VALID(x, sizeof(struct pmap), pmap_array_begin, pmap_array_end)
+#define USER_PMAP_IS_VALID(x) (PMAP_PTR_IS_VALID(x) && (os_atomic_load(&(x)->ref_count, relaxed) > 0))
-#if DEVELOPMENT || DEBUG
+#define VALIDATE_PMAP(x) \
+ if (__improbable(((x) != kernel_pmap) && !USER_PMAP_IS_VALID(x))) \
+ panic("%s: invalid pmap %p", __func__, (x));
-/*
- * Trace levels are controlled by a bitmask in which each
+#define VALIDATE_LEDGER_PTR(x) \
+ if (__improbable(!ARRAY_ELEM_PTR_IS_VALID(x, sizeof(void *), pmap_ledger_ptr_array_begin, pmap_ledger_ptr_array_end))) \
+ panic("%s: invalid ledger ptr %p", __func__, (x));
+
+#define ARRAY_ELEM_INDEX(x, _elem_size_, _array_begin_) ((uint64_t)((((void *)(x)) - (_array_begin_)) / (_elem_size_)))
+
+static uint64_t
+pmap_ledger_validate(void * ledger)
+{
+ uint64_t array_index;
+ pmap_ledger_t ** ledger_ptr_array_ptr = ((pmap_ledger_t*)ledger)->back_ptr;
+ VALIDATE_LEDGER_PTR(ledger_ptr_array_ptr);
+ array_index = ARRAY_ELEM_INDEX(ledger_ptr_array_ptr, sizeof(pmap_ledger_t *), pmap_ledger_ptr_array_begin);
+
+ if (array_index >= MAX_PMAP_LEDGERS) {
+ panic("%s: ledger %p array index invalid, index was %#llx", __func__, ledger, array_index);
+ }
+
+ pmap_ledger_t *ledger_ptr = *ledger_ptr_array_ptr;
+
+ if (__improbable(ledger_ptr != ledger)) {
+ panic("%s: ledger pointer mismatch, %p != %p", __func__, ledger, ledger_ptr);
+ }
+
+ return array_index;
+}
+
+#else /* XNU_MONITOR */
+
+#define VALIDATE_PMAP(x) assert((x) != NULL);
+
+#endif /* XNU_MONITOR */
+
+#if DEVELOPMENT || DEBUG
+
+/*
+ * Trace levels are controlled by a bitmask in which each
* level can be enabled/disabled by the (1<<level) position
* in the boot arg
+ * Level 0: PPL extension functionality
* Level 1: pmap lifecycle (create/destroy/switch)
* Level 2: mapping lifecycle (enter/remove/protect/nest/unnest)
- * Level 3: internal state management (tte/attributes/fast-fault)
+ * Level 3: internal state management (attributes/fast-fault)
+ * Level 4-7: TTE traces for paging levels 0-3. TTBs are traced at level 4.
*/
SECURITY_READ_ONLY_LATE(unsigned int) pmap_trace_mask = 0;
if (__improbable((1 << (level)) & pmap_trace_mask)) { \
KDBG_RELEASE(__VA_ARGS__); \
}
-#else
+#else /* DEVELOPMENT || DEBUG */
#define PMAP_TRACE(level, ...)
-#endif
+#endif /* DEVELOPMENT || DEBUG */
/*
* Internal function prototypes (forward declarations).
*/
-static void pv_init(
- void);
+typedef enum {
+ PV_ALLOC_SUCCESS,
+ PV_ALLOC_RETRY,
+ PV_ALLOC_FAIL
+} pv_alloc_return_t;
-static boolean_t pv_alloc(
+static pv_alloc_return_t pv_alloc(
pmap_t pmap,
unsigned int pai,
pv_entry_t **pvepp);
-static void pv_free(
- pv_entry_t *pvep);
-
-static void pv_list_free(
- pv_entry_t *pvehp,
- pv_entry_t *pvetp,
- unsigned int cnt);
-
static void ptd_bootstrap(
pt_desc_t *ptdp, unsigned int ptd_cnt);
-static inline pt_desc_t *ptd_alloc_unlinked(bool reclaim);
+static inline pt_desc_t *ptd_alloc_unlinked(void);
-static pt_desc_t *ptd_alloc(pmap_t pmap, bool reclaim);
+static pt_desc_t *ptd_alloc(pmap_t pmap);
static void ptd_deallocate(pt_desc_t *ptdp);
static void ptd_init(
pt_desc_t *ptdp, pmap_t pmap, vm_map_address_t va, unsigned int ttlevel, pt_entry_t * pte_p);
-static void pmap_zone_init(
- void);
-
static void pmap_set_reference(
ppnum_t pn);
-ppnum_t pmap_vtophys(
+pmap_paddr_t pmap_vtophys(
pmap_t pmap, addr64_t va);
void pmap_switch_user_ttb(
pmap_t, vm_map_address_t, unsigned int options, unsigned int level);
static int pmap_remove_range(
- pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, uint32_t *);
+ pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *);
static int pmap_remove_range_options(
- pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, uint32_t *, bool *, int);
+ pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, vm_map_address_t *, bool *, int);
static tt_entry_t *pmap_tt1_allocate(
pmap_t, vm_size_t, unsigned int);
#define PMAP_TT_ALLOCATE_NOWAIT 0x1
static void pmap_tte_deallocate(
- pmap_t, tt_entry_t *, unsigned int);
-
-#ifdef __ARM64_PMAP_SUBPAGE_L1__
-#if (__ARM_VMSA__ <= 7)
-#error This is not supported for old-style page tables
-#endif /* (__ARM_VMSA__ <= 7) */
-#define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
-#else /* !defined(__ARM64_PMAP_SUBPAGE_L1__) */
-#if (__ARM_VMSA__ <= 7)
-#define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES * 2)
-#else /* (__ARM_VMSA__ > 7) */
-#define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
-#endif /* (__ARM_VMSA__ > 7) */
-#endif /* !defined(__ARM64_PMAP_SUBPAGE_L1__) */
+ pmap_t, vm_offset_t, vm_offset_t, bool, tt_entry_t *, unsigned int);
const unsigned int arm_hardware_page_size = ARM_PGBYTES;
const unsigned int arm_pt_desc_size = sizeof(pt_desc_t);
pmap_is_64bit(pmap_t);
-#endif
+#endif /* (__ARM_VMSA__ > 7) */
+
static inline tt_entry_t *pmap_tte(
pmap_t, vm_map_address_t);
static void pmap_update_cache_attributes_locked(
ppnum_t, unsigned);
-boolean_t arm_clear_fast_fault(
+static boolean_t arm_clear_fast_fault(
ppnum_t ppnum,
vm_prot_t fault_type);
static pmap_paddr_t pmap_pages_reclaim(
void);
-static kern_return_t pmap_pages_alloc(
+static kern_return_t pmap_pages_alloc_zeroed(
pmap_paddr_t *pa,
unsigned size,
unsigned option);
static void pmap_trim_subord(pmap_t subord);
+/*
+ * Temporary prototypes, while we wait for pmap_enter to move to taking an
+ * address instead of a page number.
+ */
+static kern_return_t
+pmap_enter_addr(
+ pmap_t pmap,
+ vm_map_address_t v,
+ pmap_paddr_t pa,
+ vm_prot_t prot,
+ vm_prot_t fault_type,
+ unsigned int flags,
+ boolean_t wired);
+
+kern_return_t
+pmap_enter_options_addr(
+ pmap_t pmap,
+ vm_map_address_t v,
+ pmap_paddr_t pa,
+ vm_prot_t prot,
+ vm_prot_t fault_type,
+ unsigned int flags,
+ boolean_t wired,
+ unsigned int options,
+ __unused void *arg);
+
+#ifdef CONFIG_XNUPOST
+kern_return_t pmap_test(void);
+#endif /* CONFIG_XNUPOST */
+
+#if XNU_MONITOR
+static pmap_paddr_t pmap_alloc_page_for_kern(unsigned int options);
+static void pmap_alloc_page_for_ppl(unsigned int options);
+
+
+/*
+ * This macro generates prototypes for the *_internal functions, which
+ * represent the PPL interface. When the PPL is enabled, this will also
+ * generate prototypes for the PPL entrypoints (*_ppl), as well as generating
+ * the entrypoints.
+ */
+#define GEN_ASM_NAME(__function_name) _##__function_name##_ppl
+
+#define PMAP_SUPPORT_PROTOTYPES_WITH_ASM_INTERNAL(__return_type, __function_name, __function_args, __function_index, __assembly_function_name) \
+ static __return_type __function_name##_internal __function_args; \
+ extern __return_type __function_name##_ppl __function_args; \
+ __asm__ (".text \n" \
+ ".align 2 \n" \
+ ".globl " #__assembly_function_name "\n" \
+ #__assembly_function_name ":\n" \
+ "mov x15, " #__function_index "\n" \
+ "b _aprr_ppl_enter\n")
+
+#define PMAP_SUPPORT_PROTOTYPES_WITH_ASM(__return_type, __function_name, __function_args, __function_index, __assembly_function_name) \
+ PMAP_SUPPORT_PROTOTYPES_WITH_ASM_INTERNAL(__return_type, __function_name, __function_args, __function_index, __assembly_function_name)
+
+#define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
+ PMAP_SUPPORT_PROTOTYPES_WITH_ASM(__return_type, __function_name, __function_args, __function_index, GEN_ASM_NAME(__function_name))
+#else /* XNU_MONITOR */
#define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
static __return_type __function_name##_internal __function_args
+#endif /* XNU_MONITOR */
PMAP_SUPPORT_PROTOTYPES(
kern_return_t,
vm_prot_t allow_mode,
int options), ARM_FORCE_FAST_FAULT_INDEX);
-PMAP_SUPPORT_PROTOTYPES(
- kern_return_t,
- mapping_free_prime, (void), MAPPING_FREE_PRIME_INDEX);
+MARK_AS_PMAP_TEXT static boolean_t
+arm_force_fast_fault_with_flush_range(
+ ppnum_t ppnum,
+ vm_prot_t allow_mode,
+ int options,
+ pmap_tlb_flush_range_t *flush_range);
PMAP_SUPPORT_PROTOTYPES(
kern_return_t,
- mapping_replenish, (void), MAPPING_REPLENISH_INDEX);
+ mapping_free_prime, (void), MAPPING_FREE_PRIME_INDEX);
PMAP_SUPPORT_PROTOTYPES(
boolean_t,
pmap_t,
pmap_create_options, (ledger_t ledger,
vm_map_size_t size,
- unsigned int flags), PMAP_CREATE_INDEX);
+ unsigned int flags,
+ kern_return_t * kr), PMAP_CREATE_INDEX);
PMAP_SUPPORT_PROTOTYPES(
void,
kern_return_t,
pmap_enter_options, (pmap_t pmap,
vm_map_address_t v,
- ppnum_t pn,
+ pmap_paddr_t pa,
vm_prot_t prot,
vm_prot_t fault_type,
unsigned int flags,
unsigned int options), PMAP_ENTER_OPTIONS_INDEX);
PMAP_SUPPORT_PROTOTYPES(
- vm_offset_t,
- pmap_extract, (pmap_t pmap,
- vm_map_address_t va), PMAP_EXTRACT_INDEX);
-
-PMAP_SUPPORT_PROTOTYPES(
- ppnum_t,
- pmap_find_phys, (pmap_t pmap,
- addr64_t va), PMAP_FIND_PHYS_INDEX);
+ pmap_paddr_t,
+ pmap_find_pa, (pmap_t pmap,
+ addr64_t va), PMAP_FIND_PA_INDEX);
#if (__ARM_VMSA__ > 7)
PMAP_SUPPORT_PROTOTYPES(
pmap_nest, (pmap_t grand,
pmap_t subord,
addr64_t vstart,
- addr64_t nstart,
uint64_t size), PMAP_NEST_INDEX);
PMAP_SUPPORT_PROTOTYPES(
void,
pmap_page_protect_options, (ppnum_t ppnum,
vm_prot_t prot,
- unsigned int options), PMAP_PAGE_PROTECT_OPTIONS_INDEX);
+ unsigned int options,
+ void *arg), PMAP_PAGE_PROTECT_OPTIONS_INDEX);
PMAP_SUPPORT_PROTOTYPES(
- void,
+ vm_map_address_t,
pmap_protect_options, (pmap_t pmap,
vm_map_address_t start,
vm_map_address_t end,
pmap_reference, (pmap_t pmap), PMAP_REFERENCE_INDEX);
PMAP_SUPPORT_PROTOTYPES(
- int,
+ vm_map_address_t,
pmap_remove_options, (pmap_t pmap,
vm_map_address_t start,
vm_map_address_t end,
void,
pmap_set_nested, (pmap_t pmap), PMAP_SET_NESTED_INDEX);
-#if MACH_ASSERT
+#if MACH_ASSERT || XNU_MONITOR
PMAP_SUPPORT_PROTOTYPES(
void,
pmap_set_process, (pmap_t pmap,
uint64_t size,
unsigned int option), PMAP_UNNEST_OPTIONS_INDEX);
+#if XNU_MONITOR
+PMAP_SUPPORT_PROTOTYPES(
+ void,
+ pmap_cpu_data_init, (unsigned int cpu_number), PMAP_CPU_DATA_INIT_INDEX);
+#endif
PMAP_SUPPORT_PROTOTYPES(
void,
phys_attribute_set, (ppnum_t pn,
unsigned int bits), PHYS_ATTRIBUTE_SET_INDEX);
+#if XNU_MONITOR
+PMAP_SUPPORT_PROTOTYPES(
+ void,
+ pmap_mark_page_as_ppl_page, (pmap_paddr_t pa, bool initially_free), PMAP_MARK_PAGE_AS_PMAP_PAGE_INDEX);
+#endif
PMAP_SUPPORT_PROTOTYPES(
void,
int options,
void *arg), PHYS_ATTRIBUTE_CLEAR_INDEX);
+#if __ARM_RANGE_TLBI__
+PMAP_SUPPORT_PROTOTYPES(
+ vm_map_address_t,
+ phys_attribute_clear_range, (pmap_t pmap,
+ vm_map_address_t start,
+ vm_map_address_t end,
+ unsigned int bits,
+ unsigned int options), PHYS_ATTRIBUTE_CLEAR_RANGE_INDEX);
+#endif /* __ARM_RANGE_TLBI__ */
+
+
PMAP_SUPPORT_PROTOTYPES(
void,
pmap_switch, (pmap_t pmap), PMAP_SWITCH_INDEX);
void,
pmap_clear_user_ttb, (void), PMAP_CLEAR_USER_TTB_INDEX);
+#if XNU_MONITOR
+PMAP_SUPPORT_PROTOTYPES(
+ uint64_t,
+ pmap_release_ppl_pages_to_kernel, (void), PMAP_RELEASE_PAGES_TO_KERNEL_INDEX);
+#endif
+
+PMAP_SUPPORT_PROTOTYPES(
+ void,
+ pmap_set_vm_map_cs_enforced, (pmap_t pmap, bool new_value), PMAP_SET_VM_MAP_CS_ENFORCED_INDEX);
PMAP_SUPPORT_PROTOTYPES(
void,
pmap_set_jit_entitled, (pmap_t pmap), PMAP_SET_JIT_ENTITLED_INDEX);
+#if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
+PMAP_SUPPORT_PROTOTYPES(
+ void,
+ pmap_disable_user_jop, (pmap_t pmap), PMAP_DISABLE_USER_JOP_INDEX);
+#endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
+
PMAP_SUPPORT_PROTOTYPES(
void,
pmap_trim, (pmap_t grand,
pmap_t subord,
addr64_t vstart,
- addr64_t nstart,
uint64_t size), PMAP_TRIM_INDEX);
+#if HAS_APPLE_PAC
+PMAP_SUPPORT_PROTOTYPES(
+ void *,
+ pmap_sign_user_ptr, (void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key), PMAP_SIGN_USER_PTR);
+PMAP_SUPPORT_PROTOTYPES(
+ void *,
+ pmap_auth_user_ptr, (void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key), PMAP_AUTH_USER_PTR);
+#endif /* HAS_APPLE_PAC */
+
+PMAP_SUPPORT_PROTOTYPES(
+ bool,
+ pmap_is_trust_cache_loaded, (const uuid_t uuid), PMAP_IS_TRUST_CACHE_LOADED_INDEX);
+
+PMAP_SUPPORT_PROTOTYPES(
+ uint32_t,
+ pmap_lookup_in_static_trust_cache, (const uint8_t cdhash[CS_CDHASH_LEN]), PMAP_LOOKUP_IN_STATIC_TRUST_CACHE_INDEX);
+
+PMAP_SUPPORT_PROTOTYPES(
+ bool,
+ pmap_lookup_in_loaded_trust_caches, (const uint8_t cdhash[CS_CDHASH_LEN]), PMAP_LOOKUP_IN_LOADED_TRUST_CACHES_INDEX);
+
+PMAP_SUPPORT_PROTOTYPES(
+ void,
+ pmap_set_compilation_service_cdhash, (const uint8_t cdhash[CS_CDHASH_LEN]),
+ PMAP_SET_COMPILATION_SERVICE_CDHASH_INDEX);
+PMAP_SUPPORT_PROTOTYPES(
+ bool,
+ pmap_match_compilation_service_cdhash, (const uint8_t cdhash[CS_CDHASH_LEN]),
+ PMAP_MATCH_COMPILATION_SERVICE_CDHASH_INDEX);
+
+#if XNU_MONITOR
+static void pmap_mark_page_as_ppl_page(pmap_paddr_t pa);
+#endif
void pmap_footprint_suspend(vm_map_t map,
boolean_t suspend);
boolean_t suspend),
PMAP_FOOTPRINT_SUSPEND_INDEX);
+#if XNU_MONITOR
+PMAP_SUPPORT_PROTOTYPES(
+ void,
+ pmap_ledger_alloc_init, (size_t),
+ PMAP_LEDGER_ALLOC_INIT_INDEX);
+
+PMAP_SUPPORT_PROTOTYPES(
+ ledger_t,
+ pmap_ledger_alloc, (void),
+ PMAP_LEDGER_ALLOC_INDEX);
+
+PMAP_SUPPORT_PROTOTYPES(
+ void,
+ pmap_ledger_free, (ledger_t),
+ PMAP_LEDGER_FREE_INDEX);
+#endif
+
+
+
#if CONFIG_PGTRACE
boolean_t pgtrace_enabled = 0;
static void pmap_pgtrace_remove_all_clone(pmap_paddr_t pa);
#endif
+#if DEVELOPMENT || DEBUG
+PMAP_SUPPORT_PROTOTYPES(
+ kern_return_t,
+ pmap_test_text_corruption, (pmap_paddr_t),
+ PMAP_TEST_TEXT_CORRUPTION_INDEX);
+#endif /* DEVELOPMENT || DEBUG */
+
#if (__ARM_VMSA__ > 7)
/*
* The low global vector page is mapped at a fixed alias.
long long alloc_ptepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
long long alloc_pmap_pages_count __attribute__((aligned(8))) = 0LL;
-int pt_fake_zone_index = -1; /* index of pmap fake zone */
+#if XNU_MONITOR
+
+#if __has_feature(ptrauth_calls)
+#define __ptrauth_ppl_handler __ptrauth(ptrauth_key_function_pointer, true, 0)
+#else
+#define __ptrauth_ppl_handler
+#endif
+
+/*
+ * Table of function pointers used for PPL dispatch.
+ */
+const void * __ptrauth_ppl_handler const ppl_handler_table[PMAP_COUNT] = {
+ [ARM_FAST_FAULT_INDEX] = arm_fast_fault_internal,
+ [ARM_FORCE_FAST_FAULT_INDEX] = arm_force_fast_fault_internal,
+ [MAPPING_FREE_PRIME_INDEX] = mapping_free_prime_internal,
+ [PHYS_ATTRIBUTE_CLEAR_INDEX] = phys_attribute_clear_internal,
+ [PHYS_ATTRIBUTE_SET_INDEX] = phys_attribute_set_internal,
+ [PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX] = pmap_batch_set_cache_attributes_internal,
+ [PMAP_CHANGE_WIRING_INDEX] = pmap_change_wiring_internal,
+ [PMAP_CREATE_INDEX] = pmap_create_options_internal,
+ [PMAP_DESTROY_INDEX] = pmap_destroy_internal,
+ [PMAP_ENTER_OPTIONS_INDEX] = pmap_enter_options_internal,
+ [PMAP_FIND_PA_INDEX] = pmap_find_pa_internal,
+ [PMAP_INSERT_SHAREDPAGE_INDEX] = pmap_insert_sharedpage_internal,
+ [PMAP_IS_EMPTY_INDEX] = pmap_is_empty_internal,
+ [PMAP_MAP_CPU_WINDOWS_COPY_INDEX] = pmap_map_cpu_windows_copy_internal,
+ [PMAP_MARK_PAGE_AS_PMAP_PAGE_INDEX] = pmap_mark_page_as_ppl_page_internal,
+ [PMAP_NEST_INDEX] = pmap_nest_internal,
+ [PMAP_PAGE_PROTECT_OPTIONS_INDEX] = pmap_page_protect_options_internal,
+ [PMAP_PROTECT_OPTIONS_INDEX] = pmap_protect_options_internal,
+ [PMAP_QUERY_PAGE_INFO_INDEX] = pmap_query_page_info_internal,
+ [PMAP_QUERY_RESIDENT_INDEX] = pmap_query_resident_internal,
+ [PMAP_REFERENCE_INDEX] = pmap_reference_internal,
+ [PMAP_REMOVE_OPTIONS_INDEX] = pmap_remove_options_internal,
+ [PMAP_RETURN_INDEX] = pmap_return_internal,
+ [PMAP_SET_CACHE_ATTRIBUTES_INDEX] = pmap_set_cache_attributes_internal,
+ [PMAP_UPDATE_COMPRESSOR_PAGE_INDEX] = pmap_update_compressor_page_internal,
+ [PMAP_SET_NESTED_INDEX] = pmap_set_nested_internal,
+ [PMAP_SET_PROCESS_INDEX] = pmap_set_process_internal,
+ [PMAP_SWITCH_INDEX] = pmap_switch_internal,
+ [PMAP_SWITCH_USER_TTB_INDEX] = pmap_switch_user_ttb_internal,
+ [PMAP_CLEAR_USER_TTB_INDEX] = pmap_clear_user_ttb_internal,
+ [PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX] = pmap_unmap_cpu_windows_copy_internal,
+ [PMAP_UNNEST_OPTIONS_INDEX] = pmap_unnest_options_internal,
+ [PMAP_FOOTPRINT_SUSPEND_INDEX] = pmap_footprint_suspend_internal,
+ [PMAP_CPU_DATA_INIT_INDEX] = pmap_cpu_data_init_internal,
+ [PMAP_RELEASE_PAGES_TO_KERNEL_INDEX] = pmap_release_ppl_pages_to_kernel_internal,
+ [PMAP_SET_VM_MAP_CS_ENFORCED_INDEX] = pmap_set_vm_map_cs_enforced_internal,
+ [PMAP_SET_JIT_ENTITLED_INDEX] = pmap_set_jit_entitled_internal,
+ [PMAP_IS_TRUST_CACHE_LOADED_INDEX] = pmap_is_trust_cache_loaded_internal,
+ [PMAP_LOOKUP_IN_STATIC_TRUST_CACHE_INDEX] = pmap_lookup_in_static_trust_cache_internal,
+ [PMAP_LOOKUP_IN_LOADED_TRUST_CACHES_INDEX] = pmap_lookup_in_loaded_trust_caches_internal,
+ [PMAP_SET_COMPILATION_SERVICE_CDHASH_INDEX] = pmap_set_compilation_service_cdhash_internal,
+ [PMAP_MATCH_COMPILATION_SERVICE_CDHASH_INDEX] = pmap_match_compilation_service_cdhash_internal,
+ [PMAP_TRIM_INDEX] = pmap_trim_internal,
+ [PMAP_LEDGER_ALLOC_INIT_INDEX] = pmap_ledger_alloc_init_internal,
+ [PMAP_LEDGER_ALLOC_INDEX] = pmap_ledger_alloc_internal,
+ [PMAP_LEDGER_FREE_INDEX] = pmap_ledger_free_internal,
+#if HAS_APPLE_PAC
+ [PMAP_SIGN_USER_PTR] = pmap_sign_user_ptr_internal,
+ [PMAP_AUTH_USER_PTR] = pmap_auth_user_ptr_internal,
+#endif /* HAS_APPLE_PAC */
+#if __ARM_RANGE_TLBI__
+ [PHYS_ATTRIBUTE_CLEAR_RANGE_INDEX] = phys_attribute_clear_range_internal,
+#endif /* __ARM_RANGE_TLBI__ */
+#if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
+ [PMAP_DISABLE_USER_JOP_INDEX] = pmap_disable_user_jop_internal,
+#endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
+#if DEVELOPMENT || DEBUG
+ [PMAP_TEST_TEXT_CORRUPTION_INDEX] = pmap_test_text_corruption_internal,
+#endif /* DEVELOPMENT || DEBUG */
+};
+#endif
/*
{
pmap_cpu_data_t * pmap_cpu_data = pmap_get_cpu_data();
+#if XNU_MONITOR
+ /* Verify cacheline-aligned */
+ assert(((vm_offset_t)pmap_cpu_data & ((1 << MAX_L2_CLINE) - 1)) == 0);
+ if (pmap_cpu_data->cpu_number != PMAP_INVALID_CPU_NUM) {
+ panic("%s: pmap_cpu_data->cpu_number=%u, "
+ "cpu_number=%u",
+ __FUNCTION__, pmap_cpu_data->cpu_number,
+ cpu_number);
+ }
+#endif
pmap_cpu_data->cpu_number = cpu_number;
}
void
pmap_cpu_data_init(void)
{
+#if XNU_MONITOR
+ pmap_cpu_data_init_ppl(cpu_number());
+#else
pmap_cpu_data_init_internal(cpu_number());
+#endif
}
static void
pmap_cpu_data_array_init(void)
{
+#if XNU_MONITOR
+ unsigned int i = 0;
+ pmap_paddr_t ppl_cpu_save_area_cur = 0;
+ pt_entry_t template, *pte_p;
+ vm_offset_t stack_va = (vm_offset_t)pmap_stacks_start + ARM_PGBYTES;
+ assert((pmap_stacks_start != NULL) && (pmap_stacks_end != NULL));
+ pmap_stacks_start_pa = avail_start;
+
+ for (i = 0; i < MAX_CPUS; i++) {
+ for (vm_offset_t cur_va = stack_va; cur_va < (stack_va + PPL_STACK_SIZE); cur_va += ARM_PGBYTES) {
+ assert(cur_va < (vm_offset_t)pmap_stacks_end);
+ pte_p = pmap_pte(kernel_pmap, cur_va);
+ assert(*pte_p == ARM_PTE_EMPTY);
+ template = pa_to_pte(avail_start) | ARM_PTE_AF | ARM_PTE_SH(SH_OUTER_MEMORY) | ARM_PTE_TYPE |
+ ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT) | xprr_perm_to_pte(XPRR_PPL_RW_PERM);
+#if __ARM_KERNEL_PROTECT__
+ template |= ARM_PTE_NG;
+#endif /* __ARM_KERNEL_PROTECT__ */
+ WRITE_PTE(pte_p, template);
+ __builtin_arm_isb(ISB_SY);
+ avail_start += ARM_PGBYTES;
+ }
+#if KASAN
+ kasan_map_shadow(stack_va, PPL_STACK_SIZE, false);
+#endif
+ pmap_cpu_data_array[i].cpu_data.cpu_number = PMAP_INVALID_CPU_NUM;
+ pmap_cpu_data_array[i].cpu_data.ppl_state = PPL_STATE_KERNEL;
+ pmap_cpu_data_array[i].cpu_data.ppl_stack = (void*)(stack_va + PPL_STACK_SIZE);
+ stack_va += (PPL_STACK_SIZE + ARM_PGBYTES);
+ }
+ sync_tlb_flush();
+ pmap_stacks_end_pa = avail_start;
+
+ ppl_cpu_save_area_start = avail_start;
+ ppl_cpu_save_area_end = ppl_cpu_save_area_start;
+ ppl_cpu_save_area_cur = ppl_cpu_save_area_start;
+
+ for (i = 0; i < MAX_CPUS; i++) {
+ while ((ppl_cpu_save_area_end - ppl_cpu_save_area_cur) < sizeof(arm_context_t)) {
+ avail_start += PAGE_SIZE;
+ ppl_cpu_save_area_end = avail_start;
+ }
+
+ pmap_cpu_data_array[i].cpu_data.save_area = (arm_context_t *)phystokv(ppl_cpu_save_area_cur);
+ ppl_cpu_save_area_cur += sizeof(arm_context_t);
+ }
+#endif
pmap_cpu_data_init();
}
{
pmap_cpu_data_t * pmap_cpu_data = NULL;
+#if XNU_MONITOR
+ extern pmap_cpu_data_t* ml_get_ppl_cpu_data(void);
+ pmap_cpu_data = ml_get_ppl_cpu_data();
+#else
pmap_cpu_data = &getCpuDatap()->cpu_pmap_cpu_data;
+#endif
return pmap_cpu_data;
}
+#if __arm64__
+/*
+ * Disable interrupts and return previous state.
+ *
+ * The PPL has its own interrupt state facility separately from
+ * ml_set_interrupts_enable(), since that function is not part of the
+ * PPL, and so doing things like manipulating untrusted data and
+ * taking ASTs.
+ *
+ * @return The previous interrupt state, to be restored with
+ * pmap_interrupts_restore().
+ */
+static uint64_t __attribute__((warn_unused_result)) __used
+pmap_interrupts_disable(void)
+{
+ uint64_t state = __builtin_arm_rsr64("DAIF");
+
+ if ((state & DAIF_STANDARD_DISABLE) != DAIF_STANDARD_DISABLE) {
+ __builtin_arm_wsr64("DAIFSet", DAIFSC_STANDARD_DISABLE);
+ }
+ return state;
+}
-/* TODO */
-pmap_paddr_t
-pmap_pages_reclaim(
- void)
+/*
+ * Restore previous interrupt state.
+ *
+ * @param state The previous interrupt state to restore.
+ */
+static void __used
+pmap_interrupts_restore(uint64_t state)
{
- boolean_t found_page;
- unsigned i;
- pt_desc_t *ptdp;
+ // no unknown bits?
+ assert((state & ~DAIF_ALL) == 0);
+
+ if (state != DAIF_STANDARD_DISABLE) {
+ __builtin_arm_wsr64("DAIF", state);
+ }
+}
+
+/*
+ * Query interrupt state.
+ *
+ * ml_get_interrupts_enabled() is safe enough at the time of writing
+ * this comment, but because it is not considered part of the PPL, so
+ * could change without notice, and because it presently only checks
+ * DAIF_IRQ, we have our own version.
+ *
+ * @return true if interrupts are enable (not fully disabled).
+ */
+
+static bool __attribute__((warn_unused_result)) __used
+pmap_interrupts_enabled(void)
+{
+ return (__builtin_arm_rsr64("DAIF") & DAIF_STANDARD_DISABLE) != DAIF_STANDARD_DISABLE;
+}
+#endif /* __arm64__ */
+
+#if XNU_MONITOR
+/*
+ * pmap_set_range_xprr_perm takes a range (specified using start and end) that
+ * falls within the physical aperture. All mappings within this range have
+ * their protections changed from those specified by the expected_perm to those
+ * specified by the new_perm.
+ */
+static void
+pmap_set_range_xprr_perm(vm_address_t start,
+ vm_address_t end,
+ unsigned int expected_perm,
+ unsigned int new_perm)
+{
+#if (__ARM_VMSA__ == 7)
+#error This function is not supported on older ARM hardware
+#else
+ pmap_t pmap = NULL;
+
+ vm_address_t va = 0;
+ vm_address_t tte_start = 0;
+ vm_address_t tte_end = 0;
+
+ tt_entry_t *tte_p = NULL;
+ pt_entry_t *pte_p = NULL;
+ pt_entry_t *cpte_p = NULL;
+ pt_entry_t *bpte_p = NULL;
+ pt_entry_t *epte_p = NULL;
+
+ tt_entry_t tte = 0;
+ pt_entry_t cpte = 0;
+ pt_entry_t template = 0;
+
+ pmap = kernel_pmap;
+
+ va = start;
/*
- * pmap_pages_reclaim() is returning a page by freeing an active pt page.
- * To be eligible, a pt page is assigned to a user pmap. It doesn't have any wired pte
- * entry and it contains at least one valid pte entry.
- *
- * In a loop, check for a page in the reclaimed pt page list.
- * if one is present, unlink that page and return the physical page address.
- * Otherwise, scan the pt page list for an eligible pt page to reclaim.
- * If found, invoke pmap_remove_range() on its pmap and address range then
- * deallocates that pt page. This will end up adding the pt page to the
- * reclaimed pt page list.
- * If no eligible page were found in the pt page list, panic.
+ * Validate our arguments; any invalid argument will be grounds for a
+ * panic.
*/
+ if ((start | end) % ARM_PGBYTES) {
+ panic("%s: start or end not page aligned, "
+ "start=%p, end=%p, new_perm=%u, expected_perm=%u",
+ __FUNCTION__,
+ (void *)start, (void *)end, new_perm, expected_perm);
+ }
- pmap_simple_lock(&pmap_pages_lock);
- pmap_pages_request_count++;
- pmap_pages_request_acum++;
+ if (start > end) {
+ panic("%s: start > end, "
+ "start=%p, end=%p, new_perm=%u, expected_perm=%u",
+ __FUNCTION__,
+ (void *)start, (void *)end, new_perm, expected_perm);
+ }
- while (1) {
- if (pmap_pages_reclaim_list != (page_free_entry_t *)NULL) {
- page_free_entry_t *page_entry;
+ bool in_physmap = (start >= physmap_base) && (end < physmap_end);
+ bool in_static = (start >= gVirtBase) && (end < static_memory_end);
- page_entry = pmap_pages_reclaim_list;
- pmap_pages_reclaim_list = pmap_pages_reclaim_list->next;
- pmap_simple_unlock(&pmap_pages_lock);
+ if (!(in_physmap || in_static)) {
+ panic("%s: address not in static region or physical aperture, "
+ "start=%p, end=%p, new_perm=%u, expected_perm=%u",
+ __FUNCTION__,
+ (void *)start, (void *)end, new_perm, expected_perm);
+ }
- return (pmap_paddr_t)ml_static_vtop((vm_offset_t)page_entry);
+ if ((new_perm > XPRR_MAX_PERM) || (expected_perm > XPRR_MAX_PERM)) {
+ panic("%s: invalid XPRR index, "
+ "start=%p, end=%p, new_perm=%u, expected_perm=%u",
+ __FUNCTION__,
+ (void *)start, (void *)end, new_perm, expected_perm);
+ }
+
+ /*
+ * Walk over the PTEs for the given range, and set the protections on
+ * those PTEs.
+ */
+ while (va < end) {
+ tte_start = va;
+ tte_end = ((va + pt_attr_twig_size(native_pt_attr)) & ~pt_attr_twig_offmask(native_pt_attr));
+
+ if (tte_end > end) {
+ tte_end = end;
}
- pmap_simple_unlock(&pmap_pages_lock);
+ tte_p = pmap_tte(pmap, va);
- pmap_simple_lock(&pt_pages_lock);
- ptdp = (pt_desc_t *)queue_first(&pt_page_list);
- found_page = FALSE;
+ /*
+ * The physical aperture should not have holes.
+ * The physical aperture should be contiguous.
+ * Do not make eye contact with the physical aperture.
+ */
+ if (tte_p == NULL) {
+ panic("%s: physical aperture tte is NULL, "
+ "start=%p, end=%p, new_perm=%u, expected_perm=%u",
+ __FUNCTION__,
+ (void *)start, (void *)end, new_perm, expected_perm);
+ }
+
+ tte = *tte_p;
+
+ if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
+ /*
+ * Walk over the given L3 page table page and update the
+ * PTEs.
+ */
+ pte_p = (pt_entry_t *)ttetokv(tte);
+ bpte_p = &pte_p[pte_index(pmap, native_pt_attr, va)];
+ epte_p = bpte_p + ((tte_end - va) >> pt_attr_leaf_shift(native_pt_attr));
+
+ for (cpte_p = bpte_p; cpte_p < epte_p;
+ cpte_p += PAGE_SIZE / ARM_PGBYTES, va += PAGE_SIZE) {
+ int pai = (int)pa_index(pte_to_pa(*cpte_p));
+ LOCK_PVH(pai);
+ cpte = *cpte_p;
+
+ /*
+ * Every PTE involved should be valid, should
+ * not have the hint bit set, and should have
+ * Every valid PTE involved should
+ * not have the hint bit set and should have
+ * the expected APRR index.
+ */
+ if ((cpte & ARM_PTE_TYPE_MASK) ==
+ ARM_PTE_TYPE_FAULT) {
+ panic("%s: physical aperture PTE is invalid, va=%p, "
+ "start=%p, end=%p, new_perm=%u, expected_perm=%u",
+ __FUNCTION__,
+ (void *)va,
+ (void *)start, (void *)end, new_perm, expected_perm);
+ UNLOCK_PVH(pai);
+ continue;
+ }
+
+ if (cpte & ARM_PTE_HINT_MASK) {
+ panic("%s: physical aperture PTE has hint bit set, va=%p, cpte=0x%llx, "
+ "start=%p, end=%p, new_perm=%u, expected_perm=%u",
+ __FUNCTION__,
+ (void *)va, cpte,
+ (void *)start, (void *)end, new_perm, expected_perm);
+ }
+
+ if (pte_to_xprr_perm(cpte) != expected_perm) {
+ panic("%s: perm=%llu does not match expected_perm, cpte=0x%llx, "
+ "start=%p, end=%p, new_perm=%u, expected_perm=%u",
+ __FUNCTION__,
+ pte_to_xprr_perm(cpte), cpte,
+ (void *)start, (void *)end, new_perm, expected_perm);
+ }
+
+ template = cpte;
+ template &= ~ARM_PTE_XPRR_MASK;
+ template |= xprr_perm_to_pte(new_perm);
+
+ WRITE_PTE_STRONG(cpte_p, template);
+ UNLOCK_PVH(pai);
+ }
+ } else {
+ panic("%s: tte=0x%llx is not a table type entry, "
+ "start=%p, end=%p, new_perm=%u, expected_perm=%u",
+ __FUNCTION__,
+ tte,
+ (void *)start, (void *)end, new_perm, expected_perm);
+ }
+
+ va = tte_end;
+ }
+
+ PMAP_UPDATE_TLBS(pmap, start, end, false);
+#endif /* (__ARM_VMSA__ == 7) */
+}
+
+/*
+ * A convenience function for setting protections on a single page.
+ */
+static inline void
+pmap_set_xprr_perm(vm_address_t page_kva,
+ unsigned int expected_perm,
+ unsigned int new_perm)
+{
+ pmap_set_range_xprr_perm(page_kva, page_kva + PAGE_SIZE, expected_perm, new_perm);
+}
+#endif /* XNU_MONITOR */
+
+
+/*
+ * pmap_pages_reclaim(): return a page by freeing an active pagetable page.
+ * To be eligible, a pt page must be assigned to a non-kernel pmap.
+ * It must not have any wired PTEs and must contain at least one valid PTE.
+ * If no eligible page is found in the pt page list, return 0.
+ */
+pmap_paddr_t
+pmap_pages_reclaim(
+ void)
+{
+ boolean_t found_page;
+ unsigned i;
+ pt_desc_t *ptdp;
+
+ /*
+ * In a loop, check for a page in the reclaimed pt page list.
+ * if one is present, unlink that page and return the physical page address.
+ * Otherwise, scan the pt page list for an eligible pt page to reclaim.
+ * If found, invoke pmap_remove_range() on its pmap and address range then
+ * deallocates that pt page. This will end up adding the pt page to the
+ * reclaimed pt page list.
+ */
+
+ pmap_simple_lock(&pmap_pages_lock);
+ pmap_pages_request_count++;
+ pmap_pages_request_acum++;
+
+ while (1) {
+ if (pmap_pages_reclaim_list != (page_free_entry_t *)NULL) {
+ page_free_entry_t *page_entry;
+
+ page_entry = pmap_pages_reclaim_list;
+ pmap_pages_reclaim_list = pmap_pages_reclaim_list->next;
+ pmap_simple_unlock(&pmap_pages_lock);
+
+ return (pmap_paddr_t)ml_static_vtop((vm_offset_t)page_entry);
+ }
+
+ pmap_simple_unlock(&pmap_pages_lock);
+
+ pmap_simple_lock(&pt_pages_lock);
+ ptdp = (pt_desc_t *)queue_first(&pt_page_list);
+ found_page = FALSE;
while (!queue_end(&pt_page_list, (queue_entry_t)ptdp)) {
if ((ptdp->pmap->nested == FALSE)
- && (pmap_simple_lock_try(&ptdp->pmap->lock))) {
+ && (pmap_try_lock(ptdp->pmap))) {
assert(ptdp->pmap != kernel_pmap);
unsigned refcnt_acc = 0;
unsigned wiredcnt_acc = 0;
* with it while we do that. */
break;
}
- pmap_simple_unlock(&ptdp->pmap->lock);
+ pmap_unlock(ptdp->pmap);
}
ptdp = (pt_desc_t *)queue_next((queue_t)ptdp);
}
if (!found_page) {
- panic("%s: No eligible page in pt_page_list", __FUNCTION__);
+ pmap_simple_unlock(&pt_pages_lock);
+ return (pmap_paddr_t)0;
} else {
- int remove_count = 0;
bool need_strong_sync = false;
vm_map_address_t va;
pmap_t pmap;
pt_entry_t *bpte, *epte;
pt_entry_t *pte_p;
tt_entry_t *tte_p;
- uint32_t rmv_spte = 0;
pmap_simple_unlock(&pt_pages_lock);
pmap = ptdp->pmap;
- PMAP_ASSERT_LOCKED(pmap); // pmap lock should be held from loop above
+ pmap_assert_locked_w(pmap); // pmap write lock should be held from loop above
- __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
+ const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
- for (i = 0; i < PT_INDEX_MAX; i++) {
+ for (i = 0; i < (PAGE_SIZE / pt_attr_page_size(pt_attr)); i++) {
va = ptdp->ptd_info[i].va;
/* If the VA is bogus, this may represent an unallocated region
&& ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
pte_p = (pt_entry_t *) ttetokv(*tte_p);
bpte = &pte_p[pte_index(pmap, pt_attr, va)];
- epte = bpte + PAGE_SIZE / sizeof(pt_entry_t);
+ epte = bpte + pt_attr_page_size(pt_attr) / sizeof(pt_entry_t);
/*
* Use PMAP_OPTIONS_REMOVE to clear any
* "compressed" markers and update the
* which could cause the counter to drift
* more and more.
*/
- remove_count += pmap_remove_range_options(
- pmap, va, bpte, epte,
- &rmv_spte, &need_strong_sync, PMAP_OPTIONS_REMOVE);
- if (ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt != 0) {
- panic("%s: ptdp %p, count %d", __FUNCTION__, ptdp, ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt);
+ pmap_remove_range_options(
+ pmap, va, bpte, epte, NULL,
+ &need_strong_sync, PMAP_OPTIONS_REMOVE);
+ if (ptd_get_info(ptdp, pte_p)->refcnt != 0) {
+ panic("%s: ptdp %p, count %d", __FUNCTION__, ptdp, ptd_get_info(ptdp, pte_p)->refcnt);
}
- pmap_tte_deallocate(pmap, tte_p, PMAP_TT_TWIG_LEVEL);
-
- if (remove_count > 0) {
- pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, (unsigned int)pt_attr_leaf_table_size(pt_attr), pmap);
- } else {
- pmap_get_pt_ops(pmap)->flush_tlb_tte_async(va, pmap);
- }
+ pmap_tte_deallocate(pmap, va, va + (size_t)pt_attr_leaf_table_size(pt_attr), need_strong_sync,
+ tte_p, pt_attr_twig_level(pt_attr));
}
}
// Undo the lock we grabbed when we found ptdp above
- PMAP_UNLOCK(pmap);
- pmap_sync_tlb(need_strong_sync);
+ pmap_unlock(pmap);
}
pmap_simple_lock(&pmap_pages_lock);
}
}
-
-static kern_return_t
-pmap_pages_alloc(
- pmap_paddr_t *pa,
- unsigned size,
- unsigned option)
+#if XNU_MONITOR
+/*
+ * Return a PPL page to the free list.
+ */
+MARK_AS_PMAP_TEXT static void
+pmap_give_free_ppl_page(pmap_paddr_t paddr)
{
- vm_page_t m = VM_PAGE_NULL, m_prev;
+ assert((paddr & ARM_PGMASK) == 0);
+ void ** new_head = (void **)phystokv(paddr);
+ pmap_simple_lock(&pmap_ppl_free_page_lock);
- if (option & PMAP_PAGES_RECLAIM_NOWAIT) {
- assert(size == PAGE_SIZE);
- *pa = pmap_pages_reclaim();
- return KERN_SUCCESS;
- }
- if (size == PAGE_SIZE) {
- while ((m = vm_page_grab()) == VM_PAGE_NULL) {
- if (option & PMAP_PAGES_ALLOCATE_NOWAIT) {
- return KERN_RESOURCE_SHORTAGE;
- }
+ void * cur_head = pmap_ppl_free_page_list;
+ *new_head = cur_head;
+ pmap_ppl_free_page_list = new_head;
+ pmap_ppl_free_page_count++;
- VM_PAGE_WAIT();
- }
- vm_page_lock_queues();
- vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
- vm_page_unlock_queues();
- }
- if (size == 2 * PAGE_SIZE) {
- while (cpm_allocate(size, &m, 0, 1, TRUE, 0) != KERN_SUCCESS) {
- if (option & PMAP_PAGES_ALLOCATE_NOWAIT) {
- return KERN_RESOURCE_SHORTAGE;
- }
+ pmap_simple_unlock(&pmap_ppl_free_page_lock);
+}
- VM_PAGE_WAIT();
- }
- }
+/*
+ * Get a PPL page from the free list.
+ */
+MARK_AS_PMAP_TEXT static pmap_paddr_t
+pmap_get_free_ppl_page(void)
+{
+ pmap_paddr_t result = 0;
- *pa = (pmap_paddr_t)ptoa(VM_PAGE_GET_PHYS_PAGE(m));
+ pmap_simple_lock(&pmap_ppl_free_page_lock);
- vm_object_lock(pmap_object);
- while (m != VM_PAGE_NULL) {
- vm_page_insert_wired(m, pmap_object, (vm_object_offset_t) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m))) - gPhysBase), VM_KERN_MEMORY_PTE);
- m_prev = m;
- m = NEXT_PAGE(m_prev);
- *(NEXT_PAGE_PTR(m_prev)) = VM_PAGE_NULL;
+ if (pmap_ppl_free_page_list != NULL) {
+ void ** new_head = NULL;
+ new_head = *((void**)pmap_ppl_free_page_list);
+ result = kvtophys((vm_offset_t)pmap_ppl_free_page_list);
+ pmap_ppl_free_page_list = new_head;
+ pmap_ppl_free_page_count--;
+ } else {
+ result = 0L;
}
- vm_object_unlock(pmap_object);
- OSAddAtomic(size >> PAGE_SHIFT, &inuse_pmap_pages_count);
- OSAddAtomic64(size >> PAGE_SHIFT, &alloc_pmap_pages_count);
+ pmap_simple_unlock(&pmap_ppl_free_page_lock);
+ assert((result & ARM_PGMASK) == 0);
- return KERN_SUCCESS;
+ return result;
}
-
-static void
-pmap_pages_free(
- pmap_paddr_t pa,
- unsigned size)
+/*
+ * pmap_mark_page_as_ppl_page claims a page on behalf of the PPL by marking it
+ * as PPL-owned and only allowing the PPL to write to it.
+ */
+MARK_AS_PMAP_TEXT static void
+pmap_mark_page_as_ppl_page_internal(pmap_paddr_t pa, bool initially_free)
{
- pmap_simple_lock(&pmap_pages_lock);
+ vm_offset_t kva = 0;
+ unsigned int pai = 0;
+ pp_attr_t attr;
- if (pmap_pages_request_count != 0) {
- page_free_entry_t *page_entry;
+ /*
+ * Mark each page that we allocate as belonging to the monitor, as we
+ * intend to use it for monitor-y stuff (page tables, table pages, that
+ * sort of thing).
+ */
+ if (!pa_valid(pa)) {
+ panic("%s: bad address, "
+ "pa=%p",
+ __func__,
+ (void *)pa);
+ }
- pmap_pages_request_count--;
- page_entry = (page_free_entry_t *)phystokv(pa);
- page_entry->next = pmap_pages_reclaim_list;
- pmap_pages_reclaim_list = page_entry;
- pmap_simple_unlock(&pmap_pages_lock);
+ pai = (unsigned int)pa_index(pa);
+ LOCK_PVH(pai);
- return;
+ /* A page that the PPL already owns can't be given to the PPL. */
+ if (pa_test_monitor(pa)) {
+ panic("%s: page already belongs to PPL, "
+ "pa=0x%llx",
+ __FUNCTION__,
+ pa);
+ }
+ /* The page cannot be mapped outside of the physical aperture. */
+ if (!pmap_verify_free((ppnum_t)atop(pa))) {
+ panic("%s: page is not free, "
+ "pa=0x%llx",
+ __FUNCTION__,
+ pa);
}
- pmap_simple_unlock(&pmap_pages_lock);
+ do {
+ attr = pp_attr_table[pai];
+ if (attr & PP_ATTR_NO_MONITOR) {
+ panic("%s: page excluded from PPL, "
+ "pa=0x%llx",
+ __FUNCTION__,
+ pa);
+ }
+ } while (!OSCompareAndSwap16(attr, attr | PP_ATTR_MONITOR, &pp_attr_table[pai]));
- vm_page_t m;
- pmap_paddr_t pa_max;
+ UNLOCK_PVH(pai);
- OSAddAtomic(-(size >> PAGE_SHIFT), &inuse_pmap_pages_count);
+ kva = phystokv(pa);
+ pmap_set_xprr_perm(kva, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
- for (pa_max = pa + size; pa < pa_max; pa = pa + PAGE_SIZE) {
- vm_object_lock(pmap_object);
- m = vm_page_lookup(pmap_object, (pa - gPhysBase));
- assert(m != VM_PAGE_NULL);
- assert(VM_PAGE_WIRED(m));
- vm_page_lock_queues();
- vm_page_free(m);
- vm_page_unlock_queues();
- vm_object_unlock(pmap_object);
+ if (initially_free) {
+ pmap_give_free_ppl_page(pa);
}
}
-static inline void
-PMAP_ZINFO_PALLOC(
- pmap_t pmap, int bytes)
+static void
+pmap_mark_page_as_ppl_page(pmap_paddr_t pa)
{
- pmap_ledger_credit(pmap, task_ledgers.tkm_private, bytes);
+ pmap_mark_page_as_ppl_page_ppl(pa, true);
}
-static inline void
-PMAP_ZINFO_PFREE(
- pmap_t pmap,
- int bytes)
+MARK_AS_PMAP_TEXT static void
+pmap_mark_page_as_kernel_page(pmap_paddr_t pa)
{
- pmap_ledger_debit(pmap, task_ledgers.tkm_private, bytes);
-}
+ vm_offset_t kva = 0;
+ unsigned int pai = 0;
-static inline void
-pmap_tt_ledger_credit(
- pmap_t pmap,
- vm_size_t size)
-{
- if (pmap != kernel_pmap) {
- pmap_ledger_credit(pmap, task_ledgers.phys_footprint, size);
- pmap_ledger_credit(pmap, task_ledgers.page_table, size);
- }
-}
+ pai = (unsigned int)pa_index(pa);
+ LOCK_PVH(pai);
-static inline void
-pmap_tt_ledger_debit(
- pmap_t pmap,
- vm_size_t size)
-{
- if (pmap != kernel_pmap) {
- pmap_ledger_debit(pmap, task_ledgers.phys_footprint, size);
- pmap_ledger_debit(pmap, task_ledgers.page_table, size);
+ if (!pa_test_monitor(pa)) {
+ panic("%s: page is not a PPL page, "
+ "pa=%p",
+ __FUNCTION__,
+ (void *)pa);
}
-}
-static bool
-alloc_asid(pmap_t pmap)
-{
- int vasid;
- uint16_t hw_asid;
+ pa_clear_monitor(pa);
+ UNLOCK_PVH(pai);
- pmap_simple_lock(&asid_lock);
- vasid = bitmap_first(&asid_bitmap[0], MAX_ASID);
- if (vasid < 0) {
- pmap_simple_unlock(&asid_lock);
- return false;
- }
- assert(vasid < MAX_ASID);
- bitmap_clear(&asid_bitmap[0], (unsigned int)vasid);
- pmap_simple_unlock(&asid_lock);
- // bitmap_first() returns highest-order bits first, but a 0-based scheme works
- // slightly better with the collision detection scheme used by pmap_switch_internal().
- vasid = MAX_ASID - 1 - vasid;
- hw_asid = vasid % MAX_HW_ASID;
- pmap->sw_asid = vasid / MAX_HW_ASID;
- hw_asid += 1; // Account for ASID 0, which is reserved for the kernel
-#if __ARM_KERNEL_PROTECT__
- hw_asid <<= 1; // We're really handing out 2 hardware ASIDs, one for EL0 and one for EL1 access
-#endif
- pmap->hw_asid = hw_asid;
- return true;
+ kva = phystokv(pa);
+ pmap_set_xprr_perm(kva, XPRR_PPL_RW_PERM, XPRR_KERN_RW_PERM);
}
-static void
-free_asid(pmap_t pmap)
+MARK_AS_PMAP_TEXT static pmap_paddr_t
+pmap_release_ppl_pages_to_kernel_internal(void)
{
- unsigned int vasid;
- uint16_t hw_asid = pmap->hw_asid;
- assert(hw_asid != 0); // Should not try to free kernel ASID
+ pmap_paddr_t pa = 0;
-#if __ARM_KERNEL_PROTECT__
- hw_asid >>= 1;
-#endif
- hw_asid -= 1;
+ if (pmap_ppl_free_page_count <= PMAP_MIN_FREE_PPL_PAGES) {
+ goto done;
+ }
- vasid = ((unsigned int)pmap->sw_asid * MAX_HW_ASID) + hw_asid;
- vasid = MAX_ASID - 1 - vasid;
+ pa = pmap_get_free_ppl_page();
- pmap_simple_lock(&asid_lock);
- assert(!bitmap_test(&asid_bitmap[0], vasid));
- bitmap_set(&asid_bitmap[0], vasid);
- pmap_simple_unlock(&asid_lock);
-}
+ if (!pa) {
+ goto done;
+ }
+ pmap_mark_page_as_kernel_page(pa);
-#ifndef PMAP_PV_LOAD_FACTOR
-#define PMAP_PV_LOAD_FACTOR 1
-#endif
+done:
+ return pa;
+}
-#define PV_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
-#define PV_KERN_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
-#define PV_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
-#define PV_KERN_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
-#define PV_ALLOC_INITIAL_TARGET (PV_ALLOC_CHUNK_INITIAL * 5)
-#define PV_KERN_ALLOC_INITIAL_TARGET (PV_KERN_ALLOC_CHUNK_INITIAL)
+static uint64_t
+pmap_release_ppl_pages_to_kernel(void)
+{
+ pmap_paddr_t pa = 0;
+ vm_page_t m = VM_PAGE_NULL;
+ vm_page_t local_freeq = VM_PAGE_NULL;
+ uint64_t pmap_ppl_pages_returned_to_kernel_count = 0;
-uint32_t pv_free_count MARK_AS_PMAP_DATA = 0;
-uint32_t pv_page_count MARK_AS_PMAP_DATA = 0;
-uint32_t pv_kern_free_count MARK_AS_PMAP_DATA = 0;
+ while (pmap_ppl_free_page_count > PMAP_MIN_FREE_PPL_PAGES) {
+ pa = pmap_release_ppl_pages_to_kernel_ppl();
-uint32_t pv_low_water_mark MARK_AS_PMAP_DATA;
-uint32_t pv_kern_low_water_mark MARK_AS_PMAP_DATA;
-uint32_t pv_alloc_chunk MARK_AS_PMAP_DATA;
-uint32_t pv_kern_alloc_chunk MARK_AS_PMAP_DATA;
+ if (!pa) {
+ break;
+ }
-thread_t mapping_replenish_thread;
-event_t mapping_replenish_event;
-event_t pmap_user_pv_throttle_event;
-volatile uint32_t mappingrecurse = 0;
+ /* If we retrieved a page, add it to the free queue. */
+ vm_object_lock(pmap_object);
+ m = vm_page_lookup(pmap_object, (pa - gPhysBase));
+ assert(m != VM_PAGE_NULL);
+ assert(VM_PAGE_WIRED(m));
-uint64_t pmap_pv_throttle_stat;
-uint64_t pmap_pv_throttled_waiters;
+ m->vmp_busy = TRUE;
+ m->vmp_snext = local_freeq;
+ local_freeq = m;
+ pmap_ppl_pages_returned_to_kernel_count++;
+ pmap_ppl_pages_returned_to_kernel_count_total++;
-unsigned pmap_mapping_thread_wakeups;
-unsigned pmap_kernel_reserve_replenish_stat MARK_AS_PMAP_DATA;
-unsigned pmap_user_reserve_replenish_stat MARK_AS_PMAP_DATA;
-unsigned pmap_kern_reserve_alloc_stat MARK_AS_PMAP_DATA;
+ vm_object_unlock(pmap_object);
+ }
+ if (local_freeq) {
+ /* We need to hold the object lock for freeing pages. */
+ vm_object_lock(pmap_object);
+ vm_page_free_list(local_freeq, TRUE);
+ vm_object_unlock(pmap_object);
+ }
-static void
-pv_init(
- void)
-{
- simple_lock_init(&pv_free_list_lock, 0);
- simple_lock_init(&pv_kern_free_list_lock, 0);
- pv_free_list = PV_ENTRY_NULL;
- pv_free_count = 0x0U;
- pv_kern_free_list = PV_ENTRY_NULL;
- pv_kern_free_count = 0x0U;
+ return pmap_ppl_pages_returned_to_kernel_count;
}
+#endif
-static inline void PV_ALLOC(pv_entry_t **pv_ep);
-static inline void PV_KERN_ALLOC(pv_entry_t **pv_e);
-static inline void PV_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt);
-static inline void PV_KERN_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt);
-
-static inline void pmap_pv_throttle(pmap_t p);
-
-static boolean_t
-pv_alloc(
- pmap_t pmap,
- unsigned int pai,
- pv_entry_t **pvepp)
+static inline void
+pmap_enqueue_pages(vm_page_t m)
{
- if (pmap != NULL) {
- PMAP_ASSERT_LOCKED(pmap);
+ vm_page_t m_prev;
+ vm_object_lock(pmap_object);
+ while (m != VM_PAGE_NULL) {
+ vm_page_insert_wired(m, pmap_object, (vm_object_offset_t) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m))) - gPhysBase), VM_KERN_MEMORY_PTE);
+ m_prev = m;
+ m = NEXT_PAGE(m_prev);
+ *(NEXT_PAGE_PTR(m_prev)) = VM_PAGE_NULL;
}
- ASSERT_PVH_LOCKED(pai);
- PV_ALLOC(pvepp);
- if (PV_ENTRY_NULL == *pvepp) {
- if ((pmap == NULL) || (kernel_pmap == pmap)) {
- PV_KERN_ALLOC(pvepp);
-
- if (PV_ENTRY_NULL == *pvepp) {
- pv_entry_t *pv_e;
- pv_entry_t *pv_eh;
- pv_entry_t *pv_et;
- int pv_cnt;
- unsigned j;
- pmap_paddr_t pa;
- kern_return_t ret;
+ vm_object_unlock(pmap_object);
+}
- UNLOCK_PVH(pai);
- if (pmap != NULL) {
- PMAP_UNLOCK(pmap);
- }
+static kern_return_t
+pmap_pages_alloc_zeroed(
+ pmap_paddr_t *pa,
+ unsigned size,
+ unsigned option)
+{
+#if XNU_MONITOR
+ ASSERT_NOT_HIBERNATING();
- ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
+ if (size != PAGE_SIZE) {
+ panic("%s: size != PAGE_SIZE, "
+ "pa=%p, size=%u, option=%u",
+ __FUNCTION__,
+ pa, size, option);
+ }
- if (ret == KERN_RESOURCE_SHORTAGE) {
- ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_RECLAIM_NOWAIT);
- }
- if (ret != KERN_SUCCESS) {
- panic("%s: failed to alloc page for kernel, ret=%d, "
- "pmap=%p, pai=%u, pvepp=%p",
- __FUNCTION__, ret,
- pmap, pai, pvepp);
- }
+ assert(option & PMAP_PAGES_ALLOCATE_NOWAIT);
- pv_page_count++;
+ *pa = pmap_get_free_ppl_page();
- pv_e = (pv_entry_t *)phystokv(pa);
- pv_cnt = 0;
- pv_eh = pv_et = PV_ENTRY_NULL;
- *pvepp = pv_e;
- pv_e++;
+ if ((*pa == 0) && (option & PMAP_PAGES_RECLAIM_NOWAIT)) {
+ *pa = pmap_pages_reclaim();
+ }
- for (j = 1; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
- pv_e->pve_next = pv_eh;
- pv_eh = pv_e;
+ if (*pa == 0) {
+ return KERN_RESOURCE_SHORTAGE;
+ } else {
+ bzero((void*)phystokv(*pa), size);
+ return KERN_SUCCESS;
+ }
+#else
+ vm_page_t m = VM_PAGE_NULL;
- if (pv_et == PV_ENTRY_NULL) {
- pv_et = pv_e;
- }
- pv_cnt++;
- pv_e++;
- }
- PV_KERN_FREE_LIST(pv_eh, pv_et, pv_cnt);
- if (pmap != NULL) {
- PMAP_LOCK(pmap);
- }
- LOCK_PVH(pai);
- return FALSE;
+ thread_t self = current_thread();
+ // We qualify to allocate reserved memory
+ uint16_t thread_options = self->options;
+ self->options |= TH_OPT_VMPRIV;
+ if (__probable(size == PAGE_SIZE)) {
+ while ((m = vm_page_grab()) == VM_PAGE_NULL) {
+ if (option & PMAP_PAGES_ALLOCATE_NOWAIT) {
+ break;
}
- } else {
- UNLOCK_PVH(pai);
- PMAP_UNLOCK(pmap);
- pmap_pv_throttle(pmap);
- {
- pv_entry_t *pv_e;
- pv_entry_t *pv_eh;
- pv_entry_t *pv_et;
- int pv_cnt;
- unsigned j;
- pmap_paddr_t pa;
- kern_return_t ret;
-
- ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
-
- if (ret != KERN_SUCCESS) {
- panic("%s: failed to alloc page, ret=%d, "
- "pmap=%p, pai=%u, pvepp=%p",
- __FUNCTION__, ret,
- pmap, pai, pvepp);
- }
- pv_page_count++;
+ VM_PAGE_WAIT();
+ }
+ if (m != VM_PAGE_NULL) {
+ vm_page_lock_queues();
+ vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
+ vm_page_unlock_queues();
+ }
+ } else if (size == 2 * PAGE_SIZE) {
+ while (cpm_allocate(size, &m, 0, 1, TRUE, 0) != KERN_SUCCESS) {
+ if (option & PMAP_PAGES_ALLOCATE_NOWAIT) {
+ break;
+ }
- pv_e = (pv_entry_t *)phystokv(pa);
- pv_cnt = 0;
- pv_eh = pv_et = PV_ENTRY_NULL;
- *pvepp = pv_e;
- pv_e++;
+ VM_PAGE_WAIT();
+ }
+ } else {
+ panic("%s: invalid size %u", __func__, size);
+ }
- for (j = 1; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
- pv_e->pve_next = pv_eh;
- pv_eh = pv_e;
+ self->options = thread_options;
- if (pv_et == PV_ENTRY_NULL) {
- pv_et = pv_e;
- }
- pv_cnt++;
- pv_e++;
- }
- PV_FREE_LIST(pv_eh, pv_et, pv_cnt);
- }
- PMAP_LOCK(pmap);
- LOCK_PVH(pai);
- return FALSE;
+ if ((m == VM_PAGE_NULL) && (option & PMAP_PAGES_RECLAIM_NOWAIT)) {
+ assert(size == PAGE_SIZE);
+ *pa = pmap_pages_reclaim();
+ if (*pa != 0) {
+ bzero((void*)phystokv(*pa), size);
+ return KERN_SUCCESS;
}
}
- assert(PV_ENTRY_NULL != *pvepp);
- return TRUE;
+
+ if (m == VM_PAGE_NULL) {
+ return KERN_RESOURCE_SHORTAGE;
+ }
+
+ *pa = (pmap_paddr_t)ptoa(VM_PAGE_GET_PHYS_PAGE(m));
+
+ pmap_enqueue_pages(m);
+
+ OSAddAtomic(size >> PAGE_SHIFT, &inuse_pmap_pages_count);
+ OSAddAtomic64(size >> PAGE_SHIFT, &alloc_pmap_pages_count);
+
+ bzero((void*)phystokv(*pa), size);
+ return KERN_SUCCESS;
+#endif
}
-static void
-pv_free(
- pv_entry_t *pvep)
+#if XNU_MONITOR
+static pmap_paddr_t
+pmap_alloc_page_for_kern(unsigned int options)
{
- PV_FREE_LIST(pvep, pvep, 1);
+ pmap_paddr_t paddr;
+ vm_page_t m;
+
+ while ((m = vm_page_grab()) == VM_PAGE_NULL) {
+ if (options & PMAP_PAGES_ALLOCATE_NOWAIT) {
+ return 0;
+ }
+ VM_PAGE_WAIT();
+ }
+
+ vm_page_lock_queues();
+ vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
+ vm_page_unlock_queues();
+
+ paddr = (pmap_paddr_t)ptoa(VM_PAGE_GET_PHYS_PAGE(m));
+
+ if (__improbable(paddr == 0)) {
+ panic("%s: paddr is 0", __func__);
+ }
+
+ pmap_enqueue_pages(m);
+
+ OSAddAtomic(1, &inuse_pmap_pages_count);
+ OSAddAtomic64(1, &alloc_pmap_pages_count);
+
+ return paddr;
}
static void
-pv_list_free(
- pv_entry_t *pvehp,
- pv_entry_t *pvetp,
- unsigned int cnt)
+pmap_alloc_page_for_ppl(unsigned int options)
{
- PV_FREE_LIST(pvehp, pvetp, cnt);
+ thread_t self = current_thread();
+ // We qualify to allocate reserved memory
+ uint16_t thread_options = self->options;
+ self->options |= TH_OPT_VMPRIV;
+ pmap_paddr_t paddr = pmap_alloc_page_for_kern(options);
+ self->options = thread_options;
+ if (paddr != 0) {
+ pmap_mark_page_as_ppl_page(paddr);
+ }
}
-static inline void
-pv_water_mark_check(void)
+static pmap_t
+pmap_alloc_pmap(void)
{
- if (__improbable((pv_free_count < pv_low_water_mark) || (pv_kern_free_count < pv_kern_low_water_mark))) {
- if (!mappingrecurse && os_atomic_cmpxchg(&mappingrecurse, 0, 1, acq_rel)) {
- thread_wakeup(&mapping_replenish_event);
+ pmap_t pmap = PMAP_NULL;
+
+ pmap_simple_lock(&pmap_free_list_lock);
+
+ if (pmap_free_list != PMAP_NULL) {
+ pmap = pmap_free_list;
+ pmap_free_list = *((pmap_t *)pmap);
+
+ if (!PMAP_PTR_IS_VALID(pmap)) {
+ panic("%s: allocated pmap is not valid, pmap=%p",
+ __FUNCTION__, pmap);
}
}
+
+ pmap_simple_unlock(&pmap_free_list_lock);
+
+ return pmap;
}
-static inline void
-PV_ALLOC(pv_entry_t **pv_ep)
+static void
+pmap_free_pmap(pmap_t pmap)
{
- assert(*pv_ep == PV_ENTRY_NULL);
- pmap_simple_lock(&pv_free_list_lock);
- /*
- * If the kernel reserved pool is low, let non-kernel mappings allocate
- * synchronously, possibly subject to a throttle.
- */
- if ((pv_kern_free_count >= pv_kern_low_water_mark) && ((*pv_ep = pv_free_list) != 0)) {
- pv_free_list = (pv_entry_t *)(*pv_ep)->pve_next;
- (*pv_ep)->pve_next = PV_ENTRY_NULL;
- pv_free_count--;
+ if (!PMAP_PTR_IS_VALID(pmap)) {
+ panic("%s: pmap is not valid, "
+ "pmap=%p",
+ __FUNCTION__,
+ pmap);
}
- pmap_simple_unlock(&pv_free_list_lock);
+ pmap_simple_lock(&pmap_free_list_lock);
+ *((pmap_t *)pmap) = pmap_free_list;
+ pmap_free_list = pmap;
+ pmap_simple_unlock(&pmap_free_list_lock);
+}
+
+static void
+pmap_bootstrap_pmap_free_list(void)
+{
+ pmap_t cur_head = PMAP_NULL;
+ unsigned long i = 0;
+
+ simple_lock_init(&pmap_free_list_lock, 0);
+
+ for (i = 0; i < pmap_array_count; i++) {
+ *((pmap_t *)(&pmap_array[i])) = cur_head;
+ cur_head = &pmap_array[i];
+ }
+
+ pmap_free_list = cur_head;
+}
+#endif
+
+static void
+pmap_pages_free(
+ pmap_paddr_t pa,
+ unsigned size)
+{
+ if (__improbable(pmap_pages_request_count != 0)) {
+ page_free_entry_t *page_entry;
+
+ pmap_simple_lock(&pmap_pages_lock);
+
+ if (pmap_pages_request_count != 0) {
+ pmap_pages_request_count--;
+ page_entry = (page_free_entry_t *)phystokv(pa);
+ page_entry->next = pmap_pages_reclaim_list;
+ pmap_pages_reclaim_list = page_entry;
+ pmap_simple_unlock(&pmap_pages_lock);
+ return;
+ }
+
+ pmap_simple_unlock(&pmap_pages_lock);
+ }
+
+#if XNU_MONITOR
+ (void)size;
+
+ pmap_give_free_ppl_page(pa);
+#else
+ vm_page_t m;
+ pmap_paddr_t pa_max;
+
+ OSAddAtomic(-(size >> PAGE_SHIFT), &inuse_pmap_pages_count);
+
+ for (pa_max = pa + size; pa < pa_max; pa = pa + PAGE_SIZE) {
+ vm_object_lock(pmap_object);
+ m = vm_page_lookup(pmap_object, (pa - gPhysBase));
+ assert(m != VM_PAGE_NULL);
+ assert(VM_PAGE_WIRED(m));
+ vm_page_lock_queues();
+ vm_page_free(m);
+ vm_page_unlock_queues();
+ vm_object_unlock(pmap_object);
+ }
+#endif
}
static inline void
-PV_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt)
+PMAP_ZINFO_PALLOC(
+ pmap_t pmap, int bytes)
{
- pmap_simple_lock(&pv_free_list_lock);
- pv_et->pve_next = (pv_entry_t *)pv_free_list;
- pv_free_list = pv_eh;
- pv_free_count += pv_cnt;
- pmap_simple_unlock(&pv_free_list_lock);
+ pmap_ledger_credit(pmap, task_ledgers.tkm_private, bytes);
}
static inline void
-PV_KERN_ALLOC(pv_entry_t **pv_e)
+PMAP_ZINFO_PFREE(
+ pmap_t pmap,
+ int bytes)
{
- assert(*pv_e == PV_ENTRY_NULL);
- pmap_simple_lock(&pv_kern_free_list_lock);
+ pmap_ledger_debit(pmap, task_ledgers.tkm_private, bytes);
+}
- if ((*pv_e = pv_kern_free_list) != 0) {
- pv_kern_free_list = (pv_entry_t *)(*pv_e)->pve_next;
- (*pv_e)->pve_next = PV_ENTRY_NULL;
- pv_kern_free_count--;
- pmap_kern_reserve_alloc_stat++;
+static inline void
+pmap_tt_ledger_credit(
+ pmap_t pmap,
+ vm_size_t size)
+{
+ if (pmap != kernel_pmap) {
+ pmap_ledger_credit(pmap, task_ledgers.phys_footprint, size);
+ pmap_ledger_credit(pmap, task_ledgers.page_table, size);
}
-
- pmap_simple_unlock(&pv_kern_free_list_lock);
}
static inline void
-PV_KERN_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt)
+pmap_tt_ledger_debit(
+ pmap_t pmap,
+ vm_size_t size)
{
- pmap_simple_lock(&pv_kern_free_list_lock);
- pv_et->pve_next = pv_kern_free_list;
- pv_kern_free_list = pv_eh;
- pv_kern_free_count += pv_cnt;
- pmap_simple_unlock(&pv_kern_free_list_lock);
+ if (pmap != kernel_pmap) {
+ pmap_ledger_debit(pmap, task_ledgers.phys_footprint, size);
+ pmap_ledger_debit(pmap, task_ledgers.page_table, size);
+ }
}
static inline void
-pmap_pv_throttle(__unused pmap_t p)
-{
- assert(p != kernel_pmap);
- /* Apply throttle on non-kernel mappings */
- if (pv_kern_free_count < (pv_kern_low_water_mark / 2)) {
- pmap_pv_throttle_stat++;
- /* This doesn't need to be strictly accurate, merely a hint
- * to eliminate the timeout when the reserve is replenished.
- */
- pmap_pv_throttled_waiters++;
- assert_wait_timeout(&pmap_user_pv_throttle_event, THREAD_UNINT, 1, 1000 * NSEC_PER_USEC);
- thread_block(THREAD_CONTINUE_NULL);
+pmap_update_plru(uint16_t asid_index)
+{
+ if (__probable(pmap_asid_plru)) {
+ unsigned plru_index = asid_index >> 6;
+ if (__improbable(os_atomic_andnot(&asid_plru_bitmap[plru_index], (1ULL << (asid_index & 63)), relaxed) == 0)) {
+ asid_plru_generation[plru_index] = ++asid_plru_gencount;
+ asid_plru_bitmap[plru_index] = ((plru_index == (MAX_HW_ASIDS >> 6)) ? ~(1ULL << 63) : UINT64_MAX);
+ }
}
}
-/*
- * Creates a target number of free pv_entry_t objects for the kernel free list
- * and the general free list.
- */
-MARK_AS_PMAP_TEXT static kern_return_t
-mapping_free_prime_internal(void)
+static bool
+alloc_asid(pmap_t pmap)
{
- unsigned j;
- pmap_paddr_t pa;
- kern_return_t ret;
- pv_entry_t *pv_e;
- pv_entry_t *pv_eh;
- pv_entry_t *pv_et;
- int pv_cnt;
- int alloc_options = 0;
- int needed_pv_cnt = 0;
- int target_pv_free_cnt = 0;
+ int vasid = -1;
+ uint16_t hw_asid;
- SECURITY_READ_ONLY_LATE(static boolean_t) mapping_free_prime_internal_called = FALSE;
- SECURITY_READ_ONLY_LATE(static boolean_t) mapping_free_prime_internal_done = FALSE;
+ pmap_simple_lock(&asid_lock);
- if (mapping_free_prime_internal_done) {
- return KERN_FAILURE;
+ if (__probable(pmap_asid_plru)) {
+ unsigned plru_index = 0;
+ uint64_t lowest_gen = asid_plru_generation[0];
+ uint64_t lowest_gen_bitmap = asid_plru_bitmap[0];
+ for (unsigned i = 1; i < (sizeof(asid_plru_generation) / sizeof(asid_plru_generation[0])); ++i) {
+ if (asid_plru_generation[i] < lowest_gen) {
+ plru_index = i;
+ lowest_gen = asid_plru_generation[i];
+ lowest_gen_bitmap = asid_plru_bitmap[i];
+ }
+ }
+
+ for (; plru_index < BITMAP_LEN(pmap_max_asids); plru_index += ((MAX_HW_ASIDS + 1) >> 6)) {
+ uint64_t temp_plru = lowest_gen_bitmap & asid_bitmap[plru_index];
+ if (temp_plru) {
+ vasid = (plru_index << 6) + lsb_first(temp_plru);
+#if DEVELOPMENT || DEBUG
+ ++pmap_asid_hits;
+#endif
+ break;
+ }
+ }
+ }
+ if (__improbable(vasid < 0)) {
+ // bitmap_first() returns highest-order bits first, but a 0-based scheme works
+ // slightly better with the collision detection scheme used by pmap_switch_internal().
+ vasid = bitmap_lsb_first(&asid_bitmap[0], pmap_max_asids);
+#if DEVELOPMENT || DEBUG
+ ++pmap_asid_misses;
+#endif
}
+ if (__improbable(vasid < 0)) {
+ pmap_simple_unlock(&asid_lock);
+ return false;
+ }
+ assert((uint32_t)vasid < pmap_max_asids);
+ assert(bitmap_test(&asid_bitmap[0], (unsigned int)vasid));
+ bitmap_clear(&asid_bitmap[0], (unsigned int)vasid);
+ pmap_simple_unlock(&asid_lock);
+ hw_asid = vasid % asid_chunk_size;
+ pmap->sw_asid = (uint8_t)(vasid / asid_chunk_size);
+ if (__improbable(hw_asid == MAX_HW_ASIDS)) {
+ /* If we took a PLRU "miss" and ended up with a hardware ASID we can't actually support,
+ * reassign to a reserved VASID. */
+ assert(pmap->sw_asid < UINT8_MAX);
+ pmap->sw_asid = UINT8_MAX;
+ /* Allocate from the high end of the hardware ASID range to reduce the likelihood of
+ * aliasing with vital system processes, which are likely to have lower ASIDs. */
+ hw_asid = MAX_HW_ASIDS - 1 - (uint16_t)(vasid / asid_chunk_size);
+ assert(hw_asid < MAX_HW_ASIDS);
+ }
+ pmap_update_plru(hw_asid);
+ hw_asid += 1; // Account for ASID 0, which is reserved for the kernel
+#if __ARM_KERNEL_PROTECT__
+ hw_asid <<= 1; // We're really handing out 2 hardware ASIDs, one for EL0 and one for EL1 access
+#endif
+ pmap->hw_asid = hw_asid;
+ return true;
+}
- if (!mapping_free_prime_internal_called) {
- mapping_free_prime_internal_called = TRUE;
+static void
+free_asid(pmap_t pmap)
+{
+ unsigned int vasid;
+ uint16_t hw_asid = os_atomic_xchg(&pmap->hw_asid, 0, relaxed);
+ if (__improbable(hw_asid == 0)) {
+ return;
+ }
- pv_low_water_mark = PV_LOW_WATER_MARK_DEFAULT;
+#if __ARM_KERNEL_PROTECT__
+ hw_asid >>= 1;
+#endif
+ hw_asid -= 1;
- /* Alterable via sysctl */
- pv_kern_low_water_mark = PV_KERN_LOW_WATER_MARK_DEFAULT;
+ if (__improbable(pmap->sw_asid == UINT8_MAX)) {
+ vasid = ((MAX_HW_ASIDS - 1 - hw_asid) * asid_chunk_size) + MAX_HW_ASIDS;
+ } else {
+ vasid = ((unsigned int)pmap->sw_asid * asid_chunk_size) + hw_asid;
+ }
- pv_kern_alloc_chunk = PV_KERN_ALLOC_CHUNK_INITIAL;
- pv_alloc_chunk = PV_ALLOC_CHUNK_INITIAL;
+ if (__probable(pmap_asid_plru)) {
+ os_atomic_or(&asid_plru_bitmap[hw_asid >> 6], (1ULL << (hw_asid & 63)), relaxed);
}
+ pmap_simple_lock(&asid_lock);
+ assert(!bitmap_test(&asid_bitmap[0], vasid));
+ bitmap_set(&asid_bitmap[0], vasid);
+ pmap_simple_unlock(&asid_lock);
+}
- pv_cnt = 0;
- pv_eh = pv_et = PV_ENTRY_NULL;
- target_pv_free_cnt = PV_ALLOC_INITIAL_TARGET;
- /*
- * We don't take the lock to read pv_free_count, as we should not be
- * invoking this from a multithreaded context.
- */
- needed_pv_cnt = target_pv_free_cnt - pv_free_count;
+#if XNU_MONITOR
- if (needed_pv_cnt > target_pv_free_cnt) {
- needed_pv_cnt = 0;
- }
+/*
+ * Increase the padding for PPL devices to accommodate increased
+ * mapping pressure from IOMMUs. This isn't strictly necessary, but
+ * will reduce the need to retry mappings due to PV allocation failure.
+ */
- while (pv_cnt < needed_pv_cnt) {
- ret = pmap_pages_alloc(&pa, PAGE_SIZE, alloc_options);
+#define PV_LOW_WATER_MARK_DEFAULT (0x400)
+#define PV_KERN_LOW_WATER_MARK_DEFAULT (0x400)
+#define PV_ALLOC_CHUNK_INITIAL (0x400)
+#define PV_KERN_ALLOC_CHUNK_INITIAL (0x400)
+#define PV_CPU_MIN (0x80)
+#define PV_CPU_MAX (0x400)
- assert(ret == KERN_SUCCESS);
+#else
- pv_page_count++;
+#define PV_LOW_WATER_MARK_DEFAULT (0x200)
+#define PV_KERN_LOW_WATER_MARK_DEFAULT (0x200)
+#define PV_ALLOC_CHUNK_INITIAL (0x200)
+#define PV_KERN_ALLOC_CHUNK_INITIAL (0x200)
+#define PV_CPU_MIN (0x40)
+#define PV_CPU_MAX (0x200)
- pv_e = (pv_entry_t *)phystokv(pa);
+#endif
- for (j = 0; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
- pv_e->pve_next = pv_eh;
- pv_eh = pv_e;
+#define PV_ALLOC_INITIAL_TARGET (PV_ALLOC_CHUNK_INITIAL * 5)
+#define PV_KERN_ALLOC_INITIAL_TARGET (PV_KERN_ALLOC_CHUNK_INITIAL)
- if (pv_et == PV_ENTRY_NULL) {
- pv_et = pv_e;
- }
- pv_cnt++;
- pv_e++;
- }
+uint32_t pv_page_count MARK_AS_PMAP_DATA = 0;
+
+uint32_t pv_kern_low_water_mark MARK_AS_PMAP_DATA = PV_KERN_LOW_WATER_MARK_DEFAULT;
+uint32_t pv_alloc_initial_target MARK_AS_PMAP_DATA = PV_ALLOC_INITIAL_TARGET;
+uint32_t pv_kern_alloc_initial_target MARK_AS_PMAP_DATA = PV_KERN_ALLOC_INITIAL_TARGET;
+
+unsigned pmap_reserve_replenish_stat MARK_AS_PMAP_DATA;
+unsigned pmap_kern_reserve_alloc_stat MARK_AS_PMAP_DATA;
+
+static inline void pv_list_alloc(pv_entry_t **pv_ep);
+static inline void pv_list_kern_alloc(pv_entry_t **pv_e);
+static inline void pv_list_free(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt, uint32_t kern_target);
+
+static pv_alloc_return_t
+pv_alloc(
+ pmap_t pmap,
+ unsigned int pai,
+ pv_entry_t **pvepp)
+{
+ if (pmap != NULL) {
+ pmap_assert_locked_w(pmap);
+ }
+ ASSERT_PVH_LOCKED(pai);
+ pv_list_alloc(pvepp);
+ if (PV_ENTRY_NULL != *pvepp) {
+ return PV_ALLOC_SUCCESS;
}
+#if XNU_MONITOR
+ unsigned alloc_flags = PMAP_PAGES_ALLOCATE_NOWAIT;
+#else
+ unsigned alloc_flags = 0;
+#endif
+ if ((pmap == NULL) || (kernel_pmap == pmap)) {
+ pv_list_kern_alloc(pvepp);
- if (pv_cnt) {
- PV_FREE_LIST(pv_eh, pv_et, pv_cnt);
+ if (PV_ENTRY_NULL != *pvepp) {
+ return PV_ALLOC_SUCCESS;
+ }
+ alloc_flags = PMAP_PAGES_ALLOCATE_NOWAIT | PMAP_PAGES_RECLAIM_NOWAIT;
}
+ pv_entry_t *pv_e;
+ pv_entry_t *pv_eh;
+ pv_entry_t *pv_et;
+ int pv_cnt;
+ pmap_paddr_t pa;
+ kern_return_t ret;
+ pv_alloc_return_t pv_status = PV_ALLOC_RETRY;
- pv_cnt = 0;
- pv_eh = pv_et = PV_ENTRY_NULL;
- target_pv_free_cnt = PV_KERN_ALLOC_INITIAL_TARGET;
+ UNLOCK_PVH(pai);
+ if (pmap != NULL) {
+ pmap_unlock(pmap);
+ }
- /*
- * We don't take the lock to read pv_kern_free_count, as we should not
- * be invoking this from a multithreaded context.
- */
- needed_pv_cnt = target_pv_free_cnt - pv_kern_free_count;
+ ret = pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, alloc_flags);
- if (needed_pv_cnt > target_pv_free_cnt) {
- needed_pv_cnt = 0;
+ if (ret != KERN_SUCCESS) {
+ pv_status = PV_ALLOC_FAIL;
+ goto pv_alloc_cleanup;
}
- while (pv_cnt < needed_pv_cnt) {
- ret = pmap_pages_alloc(&pa, PAGE_SIZE, alloc_options);
+ pv_page_count++;
- assert(ret == KERN_SUCCESS);
- pv_page_count++;
+ pv_e = (pv_entry_t *)phystokv(pa);
+ *pvepp = pv_e;
+ pv_cnt = (PAGE_SIZE / sizeof(pv_entry_t)) - 1;
+ pv_eh = pv_e + 1;
+ pv_et = &pv_e[pv_cnt];
+
+ pv_list_free(pv_eh, pv_et, pv_cnt, pv_kern_low_water_mark);
+pv_alloc_cleanup:
+ if (pmap != NULL) {
+ pmap_lock(pmap);
+ }
+ LOCK_PVH(pai);
+ return pv_status;
+}
- pv_e = (pv_entry_t *)phystokv(pa);
+static inline void
+pv_free_entry(
+ pv_entry_t *pvep)
+{
+ pv_list_free(pvep, pvep, 1, pv_kern_low_water_mark);
+}
- for (j = 0; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
- pv_e->pve_next = pv_eh;
- pv_eh = pv_e;
+static inline void
+pv_free_list_alloc(pv_free_list_t *free_list, pv_entry_t **pv_ep)
+{
+ assert(((free_list->list != NULL) && (free_list->count > 0)) ||
+ ((free_list->list == NULL) && (free_list->count == 0)));
- if (pv_et == PV_ENTRY_NULL) {
- pv_et = pv_e;
- }
- pv_cnt++;
- pv_e++;
+ if ((*pv_ep = free_list->list) != NULL) {
+ pv_entry_t *pv_e = *pv_ep;
+ if ((pv_e->pve_next == NULL) && (free_list->count > 1)) {
+ free_list->list = pv_e + 1;
+ } else {
+ free_list->list = pv_e->pve_next;
+ pv_e->pve_next = PV_ENTRY_NULL;
}
+ free_list->count--;
}
+}
- if (pv_cnt) {
- PV_KERN_FREE_LIST(pv_eh, pv_et, pv_cnt);
+static inline void
+pv_list_alloc(pv_entry_t **pv_ep)
+{
+ assert(*pv_ep == PV_ENTRY_NULL);
+#if !XNU_MONITOR
+ mp_disable_preemption();
+#endif
+ pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
+ pv_free_list_alloc(&pmap_cpu_data->pv_free, pv_ep);
+#if !XNU_MONITOR
+ mp_enable_preemption();
+#endif
+ if (*pv_ep != PV_ENTRY_NULL) {
+ return;
+ }
+#if !XNU_MONITOR
+ if (pv_kern_free.count < pv_kern_low_water_mark) {
+ /*
+ * If the kernel reserved pool is low, let non-kernel mappings wait for a page
+ * from the VM.
+ */
+ return;
}
+#endif
+ pmap_simple_lock(&pv_free_list_lock);
+ pv_free_list_alloc(&pv_free, pv_ep);
+ pmap_simple_unlock(&pv_free_list_lock);
+}
- mapping_free_prime_internal_done = TRUE;
- return KERN_SUCCESS;
+static inline void
+pv_list_free(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt, uint32_t kern_target)
+{
+ if (pv_cnt == 1) {
+ bool limit_exceeded = false;
+#if !XNU_MONITOR
+ mp_disable_preemption();
+#endif
+ pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
+ pv_et->pve_next = pmap_cpu_data->pv_free.list;
+ pmap_cpu_data->pv_free.list = pv_eh;
+ if (pmap_cpu_data->pv_free.count == PV_CPU_MIN) {
+ pmap_cpu_data->pv_free_tail = pv_et;
+ }
+ pmap_cpu_data->pv_free.count += pv_cnt;
+ if (__improbable(pmap_cpu_data->pv_free.count > PV_CPU_MAX)) {
+ pv_et = pmap_cpu_data->pv_free_tail;
+ pv_cnt = pmap_cpu_data->pv_free.count - PV_CPU_MIN;
+ pmap_cpu_data->pv_free.list = pmap_cpu_data->pv_free_tail->pve_next;
+ pmap_cpu_data->pv_free.count = PV_CPU_MIN;
+ limit_exceeded = true;
+ }
+#if !XNU_MONITOR
+ mp_enable_preemption();
+#endif
+ if (__probable(!limit_exceeded)) {
+ return;
+ }
+ }
+ if (__improbable(pv_kern_free.count < kern_target)) {
+ pmap_simple_lock(&pv_kern_free_list_lock);
+ pv_et->pve_next = pv_kern_free.list;
+ pv_kern_free.list = pv_eh;
+ pv_kern_free.count += pv_cnt;
+ pmap_simple_unlock(&pv_kern_free_list_lock);
+ } else {
+ pmap_simple_lock(&pv_free_list_lock);
+ pv_et->pve_next = pv_free.list;
+ pv_free.list = pv_eh;
+ pv_free.count += pv_cnt;
+ pmap_simple_unlock(&pv_free_list_lock);
+ }
}
-void
-mapping_free_prime(void)
+static inline void
+pv_list_kern_alloc(pv_entry_t **pv_ep)
{
- kern_return_t kr = KERN_FAILURE;
-
- kr = mapping_free_prime_internal();
-
- if (kr != KERN_SUCCESS) {
- panic("%s: failed, kr=%d",
- __FUNCTION__, kr);
+ assert(*pv_ep == PV_ENTRY_NULL);
+ pmap_simple_lock(&pv_kern_free_list_lock);
+ if (pv_kern_free.count > 0) {
+ pmap_kern_reserve_alloc_stat++;
}
+ pv_free_list_alloc(&pv_kern_free, pv_ep);
+ pmap_simple_unlock(&pv_kern_free_list_lock);
}
-void mapping_replenish(void);
-
void
mapping_adjust(void)
{
- kern_return_t mres;
-
- mres = kernel_thread_start_priority((thread_continue_t)mapping_replenish, NULL, MAXPRI_KERNEL, &mapping_replenish_thread);
- if (mres != KERN_SUCCESS) {
- panic("%s: mapping_replenish thread creation failed",
- __FUNCTION__);
- }
- thread_deallocate(mapping_replenish_thread);
+ // Not implemented for arm/arm64
}
/*
* Fills the kernel and general PV free lists back up to their low watermarks.
*/
MARK_AS_PMAP_TEXT static kern_return_t
-mapping_replenish_internal(void)
+mapping_replenish_internal(uint32_t kern_target_count, uint32_t user_target_count)
{
- pv_entry_t *pv_e;
pv_entry_t *pv_eh;
pv_entry_t *pv_et;
int pv_cnt;
- unsigned j;
pmap_paddr_t pa;
kern_return_t ret = KERN_SUCCESS;
- while (pv_kern_free_count < pv_kern_low_water_mark) {
- pv_cnt = 0;
- pv_eh = pv_et = PV_ENTRY_NULL;
-
- ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
- assert(ret == KERN_SUCCESS);
-
- pv_page_count++;
-
- pv_e = (pv_entry_t *)phystokv(pa);
-
- for (j = 0; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
- pv_e->pve_next = pv_eh;
- pv_eh = pv_e;
-
- if (pv_et == PV_ENTRY_NULL) {
- pv_et = pv_e;
- }
- pv_cnt++;
- pv_e++;
+ while ((pv_free.count < user_target_count) || (pv_kern_free.count < kern_target_count)) {
+#if XNU_MONITOR
+ if ((ret = pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT)) != KERN_SUCCESS) {
+ return ret;
}
- pmap_kernel_reserve_replenish_stat += pv_cnt;
- PV_KERN_FREE_LIST(pv_eh, pv_et, pv_cnt);
- }
-
- while (pv_free_count < pv_low_water_mark) {
- pv_cnt = 0;
- pv_eh = pv_et = PV_ENTRY_NULL;
-
- ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
+#else
+ ret = pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, 0);
assert(ret == KERN_SUCCESS);
+#endif
pv_page_count++;
- pv_e = (pv_entry_t *)phystokv(pa);
+ pv_eh = (pv_entry_t *)phystokv(pa);
+ pv_cnt = PAGE_SIZE / sizeof(pv_entry_t);
+ pv_et = &pv_eh[pv_cnt - 1];
- for (j = 0; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
- pv_e->pve_next = pv_eh;
- pv_eh = pv_e;
-
- if (pv_et == PV_ENTRY_NULL) {
- pv_et = pv_e;
- }
- pv_cnt++;
- pv_e++;
- }
- pmap_user_reserve_replenish_stat += pv_cnt;
- PV_FREE_LIST(pv_eh, pv_et, pv_cnt);
+ pmap_reserve_replenish_stat += pv_cnt;
+ pv_list_free(pv_eh, pv_et, pv_cnt, kern_target_count);
}
return ret;
}
/*
- * Continuation function that keeps the PV free lists from running out of free
- * elements.
+ * Creates a target number of free pv_entry_t objects for the kernel free list
+ * and the general free list.
*/
-__attribute__((noreturn))
+MARK_AS_PMAP_TEXT static kern_return_t
+mapping_free_prime_internal(void)
+{
+ return mapping_replenish_internal(pv_kern_alloc_initial_target, pv_alloc_initial_target);
+}
+
void
-mapping_replenish(void)
+mapping_free_prime(void)
{
- kern_return_t kr;
+ kern_return_t kr = KERN_FAILURE;
- /* We qualify for VM privileges...*/
- current_thread()->options |= TH_OPT_VMPRIV;
+#if XNU_MONITOR
+ unsigned int i = 0;
- for (;;) {
- kr = mapping_replenish_internal();
+ /*
+ * Allocate the needed PPL pages up front, to minimize the chance that
+ * we will need to call into the PPL multiple times.
+ */
+ for (i = 0; i < pv_alloc_initial_target; i += (PAGE_SIZE / sizeof(pv_entry_t))) {
+ pmap_alloc_page_for_ppl(0);
+ }
- if (kr != KERN_SUCCESS) {
- panic("%s: failed, kr=%d", __FUNCTION__, kr);
- }
+ for (i = 0; i < pv_kern_alloc_initial_target; i += (PAGE_SIZE / sizeof(pv_entry_t))) {
+ pmap_alloc_page_for_ppl(0);
+ }
- /*
- * Wake threads throttled while the kernel reserve was being replenished.
- */
- if (pmap_pv_throttled_waiters) {
- pmap_pv_throttled_waiters = 0;
- thread_wakeup(&pmap_user_pv_throttle_event);
- }
+ while ((kr = mapping_free_prime_ppl()) == KERN_RESOURCE_SHORTAGE) {
+ pmap_alloc_page_for_ppl(0);
+ }
+#else
+ kr = mapping_free_prime_internal();
+#endif
- /* Check if the kernel pool has been depleted since the
- * first pass, to reduce refill latency.
- */
- if (pv_kern_free_count < pv_kern_low_water_mark) {
- continue;
- }
- /* Block sans continuation to avoid yielding kernel stack */
- assert_wait(&mapping_replenish_event, THREAD_UNINT);
- mappingrecurse = 0;
- thread_block(THREAD_CONTINUE_NULL);
- pmap_mapping_thread_wakeups++;
+ if (kr != KERN_SUCCESS) {
+ panic("%s: failed, kr=%d",
+ __FUNCTION__, kr);
}
}
-
static void
ptd_bootstrap(
pt_desc_t *ptdp,
unsigned int ptd_cnt)
{
simple_lock_init(&ptd_free_list_lock, 0);
- while (ptd_cnt != 0) {
- (*(void **)ptdp) = (void *)ptd_free_list;
- ptd_free_list = ptdp;
- ptdp++;
- ptd_cnt--;
- ptd_free_count++;
- }
+ // Region represented by ptdp should be cleared by pmap_bootstrap()
+ *((void**)(&ptdp[ptd_cnt - 1])) = (void*)ptd_free_list;
+ ptd_free_list = ptdp;
+ ptd_free_count += ptd_cnt;
ptd_preboot = FALSE;
}
static pt_desc_t*
-ptd_alloc_unlinked(bool reclaim)
+ptd_alloc_unlinked(void)
{
pt_desc_t *ptdp;
unsigned i;
pmap_simple_lock(&ptd_free_list_lock);
}
+ assert(((ptd_free_list != NULL) && (ptd_free_count > 0)) ||
+ ((ptd_free_list == NULL) && (ptd_free_count == 0)));
+
if (ptd_free_count == 0) {
- unsigned int ptd_cnt;
- pt_desc_t *ptdp_next;
+ unsigned int ptd_cnt = PAGE_SIZE / sizeof(pt_desc_t);
if (ptd_preboot) {
ptdp = (pt_desc_t *)avail_start;
- avail_start += ARM_PGBYTES;
- ptdp_next = ptdp;
- ptd_cnt = ARM_PGBYTES / sizeof(pt_desc_t);
+ avail_start += PAGE_SIZE;
+ bzero(ptdp, PAGE_SIZE);
} else {
pmap_paddr_t pa;
- kern_return_t ret;
pmap_simple_unlock(&ptd_free_list_lock);
- if (pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT) != KERN_SUCCESS) {
- if (reclaim) {
- ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_RECLAIM_NOWAIT);
- assert(ret == KERN_SUCCESS);
- } else {
- return NULL;
- }
+ if (pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT) != KERN_SUCCESS) {
+ return NULL;
}
ptdp = (pt_desc_t *)phystokv(pa);
pmap_simple_lock(&ptd_free_list_lock);
- ptdp_next = ptdp;
- ptd_cnt = PAGE_SIZE / sizeof(pt_desc_t);
}
- while (ptd_cnt != 0) {
- (*(void **)ptdp_next) = (void *)ptd_free_list;
- ptd_free_list = ptdp_next;
- ptdp_next++;
- ptd_cnt--;
- ptd_free_count++;
- }
+ *((void**)(&ptdp[ptd_cnt - 1])) = (void*)ptd_free_list;
+ ptd_free_list = ptdp;
+ ptd_free_count += ptd_cnt;
}
if ((ptdp = ptd_free_list) != PTD_ENTRY_NULL) {
ptd_free_list = (pt_desc_t *)(*(void **)ptdp);
+ if ((ptd_free_list == NULL) && (ptd_free_count > 1)) {
+ ptd_free_list = ptdp + 1;
+ }
ptd_free_count--;
} else {
panic("%s: out of ptd entry",
}
static inline pt_desc_t*
-ptd_alloc(pmap_t pmap, bool reclaim)
+ptd_alloc(pmap_t pmap)
{
- pt_desc_t *ptdp = ptd_alloc_unlinked(reclaim);
+ pt_desc_t *ptdp = ptd_alloc_unlinked();
if (ptdp == NULL) {
return NULL;
unsigned int level,
pt_entry_t *pte_p)
{
+ const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
+
if (ptdp->pmap != pmap) {
panic("%s: pmap mismatch, "
"ptdp=%p, pmap=%p, va=%p, level=%u, pte_p=%p",
ptdp, pmap, (void*)va, level, pte_p);
}
-#if (__ARM_VMSA__ == 7)
- assert(level == 2);
- ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].va = (vm_offset_t) va & ~(ARM_TT_L1_PT_OFFMASK);
-#else
- assert(level > pt_attr_root_level(pmap_get_pt_attr(pmap)));
- ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].va = (vm_offset_t) va & ~(pt_attr_ln_offmask(pmap_get_pt_attr(pmap), level - 1));
-#endif
- if (level < PMAP_TT_MAX_LEVEL) {
- ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt = PT_DESC_REFCOUNT;
+ assert(level > pt_attr_root_level(pt_attr));
+ ptd_info_t *ptd_info = ptd_get_info(ptdp, pte_p);
+ ptd_info->va = (vm_offset_t) va & ~pt_attr_ln_pt_offmask(pt_attr, level - 1);
+
+ if (level < pt_attr_leaf_level(pt_attr)) {
+ ptd_info->refcnt = PT_DESC_REFCOUNT;
}
}
if ((tte & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE) {
return PT_ENTRY_NULL;
}
- ptp = (pt_entry_t *) ttetokv(tte) + ptenum(addr);
+ ptp = (pt_entry_t *) ttetokv(tte) + pte_index(pmap, pt_addr, addr);
return ptp;
}
#define PMAP_ALIGN(addr, align) ((addr) + ((align) - 1) & ~((align) - 1))
+static void
+pmap_compute_pv_targets(void)
+{
+ DTEntry entry;
+ void const *prop = NULL;
+ int err;
+ unsigned int prop_size;
+
+ err = SecureDTLookupEntry(NULL, "/defaults", &entry);
+ assert(err == kSuccess);
+
+ if (kSuccess == SecureDTGetProperty(entry, "pmap-pv-count", &prop, &prop_size)) {
+ if (prop_size != sizeof(pv_alloc_initial_target)) {
+ panic("pmap-pv-count property is not a 32-bit integer");
+ }
+ pv_alloc_initial_target = *((uint32_t const *)prop);
+ }
+
+ if (kSuccess == SecureDTGetProperty(entry, "pmap-kern-pv-count", &prop, &prop_size)) {
+ if (prop_size != sizeof(pv_kern_alloc_initial_target)) {
+ panic("pmap-kern-pv-count property is not a 32-bit integer");
+ }
+ pv_kern_alloc_initial_target = *((uint32_t const *)prop);
+ }
+
+ if (kSuccess == SecureDTGetProperty(entry, "pmap-kern-pv-min", &prop, &prop_size)) {
+ if (prop_size != sizeof(pv_kern_low_water_mark)) {
+ panic("pmap-kern-pv-min property is not a 32-bit integer");
+ }
+ pv_kern_low_water_mark = *((uint32_t const *)prop);
+ }
+}
+
+
+static uint32_t
+pmap_compute_max_asids(void)
+{
+ DTEntry entry;
+ void const *prop = NULL;
+ uint32_t max_asids;
+ int err;
+ unsigned int prop_size;
+
+ err = SecureDTLookupEntry(NULL, "/defaults", &entry);
+ assert(err == kSuccess);
+
+ if (kSuccess != SecureDTGetProperty(entry, "pmap-max-asids", &prop, &prop_size)) {
+ /* TODO: consider allowing maxproc limits to be scaled earlier so that
+ * we can choose a more flexible default value here. */
+ return MAX_ASIDS;
+ }
+
+ if (prop_size != sizeof(max_asids)) {
+ panic("pmap-max-asids property is not a 32-bit integer");
+ }
+
+ max_asids = *((uint32_t const *)prop);
+ /* Round up to the nearest 64 to make things a bit easier for the Pseudo-LRU allocator. */
+ max_asids = (max_asids + 63) & ~63UL;
+
+ if (((max_asids + MAX_HW_ASIDS) / (MAX_HW_ASIDS + 1)) > MIN(MAX_HW_ASIDS, UINT8_MAX)) {
+ /* currently capped by size of pmap->sw_asid */
+ panic("pmap-max-asids too large");
+ }
+ if (max_asids == 0) {
+ panic("pmap-max-asids cannot be zero");
+ }
+ return max_asids;
+}
+
+
static vm_size_t
pmap_compute_io_rgns(void)
{
DTEntry entry;
- pmap_io_range_t *ranges;
+ pmap_io_range_t const *ranges;
uint64_t rgn_end;
- void *prop = NULL;
+ void const *prop = NULL;
int err;
unsigned int prop_size;
- err = DTLookupEntry(NULL, "/defaults", &entry);
+ err = SecureDTLookupEntry(NULL, "/defaults", &entry);
assert(err == kSuccess);
- if (kSuccess != DTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size)) {
+ if (kSuccess != SecureDTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size)) {
return 0;
}
pmap_load_io_rgns(void)
{
DTEntry entry;
- pmap_io_range_t *ranges;
- void *prop = NULL;
+ pmap_io_range_t const *ranges;
+ void const *prop = NULL;
int err;
unsigned int prop_size;
return;
}
- err = DTLookupEntry(NULL, "/defaults", &entry);
+ err = SecureDTLookupEntry(NULL, "/defaults", &entry);
assert(err == kSuccess);
- err = DTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size);
+ err = SecureDTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size);
assert(err == kSuccess);
ranges = prop;
vm_size_t ptd_root_table_size;
vm_size_t pp_attr_table_size;
vm_size_t io_attr_table_size;
+ vm_size_t asid_table_size;
unsigned int npages;
vm_map_offset_t maxoffset;
lck_grp_init(&pmap_lck_grp, "pmap", LCK_GRP_ATTR_NULL);
+#if XNU_MONITOR
+
+#if DEVELOPMENT || DEBUG
+ PE_parse_boot_argn("-unsafe_kernel_text", &pmap_ppl_disable, sizeof(pmap_ppl_disable));
+#endif
+
+#if CONFIG_CSR_FROM_DT
+ if (csr_unsafe_kernel_text) {
+ pmap_ppl_disable = true;
+ }
+#endif /* CONFIG_CSR_FROM_DT */
+
+#endif /* XNU_MONITOR */
#if DEVELOPMENT || DEBUG
if (PE_parse_boot_argn("pmap_trace", &pmap_trace_mask, sizeof(pmap_trace_mask))) {
#endif
kernel_pmap->stamp = os_atomic_inc(&pmap_stamp, relaxed);
- kernel_pmap->nested_region_grand_addr = 0x0ULL;
- kernel_pmap->nested_region_subord_addr = 0x0ULL;
+#if ARM_PARAMETERIZED_PMAP
+ kernel_pmap->pmap_pt_attr = native_pt_attr;
+#endif /* ARM_PARAMETERIZED_PMAP */
+
+ kernel_pmap->nested_region_addr = 0x0ULL;
kernel_pmap->nested_region_size = 0x0ULL;
kernel_pmap->nested_region_asid_bitmap = NULL;
kernel_pmap->nested_region_asid_bitmap_size = 0x0UL;
kernel_pmap->hw_asid = 0;
kernel_pmap->sw_asid = 0;
- PMAP_LOCK_INIT(kernel_pmap);
+ pmap_lock_init(kernel_pmap);
memset((void *) &kernel_pmap->stats, 0, sizeof(kernel_pmap->stats));
/* allocate space for and initialize the bookkeeping structures */
pv_head_size = round_page(sizeof(pv_entry_t *) * npages);
// allocate enough initial PTDs to map twice the available physical memory
ptd_root_table_size = sizeof(pt_desc_t) * (mem_size / ((PAGE_SIZE / sizeof(pt_entry_t)) * ARM_PGBYTES)) * 2;
+ pmap_max_asids = pmap_compute_max_asids();
+ pmap_asid_plru = (pmap_max_asids > MAX_HW_ASIDS);
+ PE_parse_boot_argn("pmap_asid_plru", &pmap_asid_plru, sizeof(pmap_asid_plru));
+ /* Align the range of available hardware ASIDs to a multiple of 64 to enable the
+ * masking used by the PLRU scheme. This means we must handle the case in which
+ * the returned hardware ASID is MAX_HW_ASIDS, which we do in alloc_asid() and free_asid(). */
+ _Static_assert(sizeof(asid_plru_bitmap[0] == sizeof(uint64_t)), "bitmap_t is not a 64-bit integer");
+ _Static_assert(((MAX_HW_ASIDS + 1) % 64) == 0, "MAX_HW_ASIDS + 1 is not divisible by 64");
+ asid_chunk_size = (pmap_asid_plru ? (MAX_HW_ASIDS + 1) : MAX_HW_ASIDS);
+
+ asid_table_size = sizeof(*asid_bitmap) * BITMAP_LEN(pmap_max_asids);
+
+ pmap_compute_pv_targets();
pmap_struct_start = avail_start;
pv_head_table = (pv_entry_t **) phystokv(avail_start);
avail_start = PMAP_ALIGN(avail_start + pv_head_size, __alignof(pt_desc_t));
ptd_root_table = (pt_desc_t *)phystokv(avail_start);
- avail_start = round_page(avail_start + ptd_root_table_size);
+ avail_start = PMAP_ALIGN(avail_start + ptd_root_table_size, __alignof(bitmap_t));
+ asid_bitmap = (bitmap_t*)phystokv(avail_start);
+ avail_start = round_page(avail_start + asid_table_size);
memset((char *)phystokv(pmap_struct_start), 0, avail_start - pmap_struct_start);
pmap_load_io_rgns();
ptd_bootstrap(ptd_root_table, (unsigned int)(ptd_root_table_size / sizeof(pt_desc_t)));
+#if XNU_MONITOR
+ pmap_array_begin = (void *)phystokv(avail_start);
+ pmap_array = pmap_array_begin;
+ avail_start += round_page(PMAP_ARRAY_SIZE * sizeof(struct pmap));
+ pmap_array_end = (void *)phystokv(avail_start);
+
+ pmap_array_count = ((pmap_array_end - pmap_array_begin) / sizeof(struct pmap));
+
+ pmap_bootstrap_pmap_free_list();
+
+ pmap_ledger_ptr_array_begin = (void *)phystokv(avail_start);
+ pmap_ledger_ptr_array = pmap_ledger_ptr_array_begin;
+ avail_start += round_page(MAX_PMAP_LEDGERS * sizeof(void*));
+ pmap_ledger_ptr_array_end = (void *)phystokv(avail_start);
+
+ pmap_ledger_refcnt_begin = (void *)phystokv(avail_start);
+ pmap_ledger_refcnt = pmap_ledger_refcnt_begin;
+ avail_start += round_page(MAX_PMAP_LEDGERS * sizeof(os_refcnt_t));
+ pmap_ledger_refcnt_end = (void *)phystokv(avail_start);
+#endif
pmap_cpu_data_array_init();
vm_first_phys = gPhysBase;
vm_last_phys = trunc_page(avail_end);
- simple_lock_init(&pmaps_lock, 0);
- simple_lock_init(&asid_lock, 0);
- simple_lock_init(&tt1_lock, 0);
queue_init(&map_pmap_list);
queue_enter(&map_pmap_list, kernel_pmap, pmap_t, pmaps);
free_page_size_tt_list = TT_FREE_ENTRY_NULL;
free_tt_count = 0;
free_tt_max = 0;
- simple_lock_init(&pt_pages_lock, 0);
queue_init(&pt_page_list);
- simple_lock_init(&pmap_pages_lock, 0);
pmap_pages_request_count = 0;
pmap_pages_request_acum = 0;
pmap_pages_reclaim_list = PAGE_FREE_ENTRY_NULL;
virtual_space_start = vstart;
virtual_space_end = VM_MAX_KERNEL_ADDRESS;
- bitmap_full(&asid_bitmap[0], MAX_ASID);
+ bitmap_full(&asid_bitmap[0], pmap_max_asids);
+ bitmap_full(&asid_plru_bitmap[0], MAX_HW_ASIDS);
+ // Clear the highest-order bit, which corresponds to MAX_HW_ASIDS + 1
+ asid_plru_bitmap[MAX_HW_ASIDS >> 6] = ~(1ULL << 63);
}
#endif
-#if DEVELOPMENT || DEBUG
- PE_parse_boot_argn("panic_on_unsigned_execute", &panic_on_unsigned_execute, sizeof(panic_on_unsigned_execute));
-#endif /* DEVELOPMENT || DEBUG */
-
- pmap_nesting_size_min = ARM_NESTING_SIZE_MIN;
- pmap_nesting_size_max = ARM_NESTING_SIZE_MAX;
-
- simple_lock_init(&phys_backup_lock, 0);
+ PE_parse_boot_argn("pmap_panic_dev_wimg_on_managed", &pmap_panic_dev_wimg_on_managed, sizeof(pmap_panic_dev_wimg_on_managed));
#if MACH_ASSERT
#endif /* KASAN */
}
+#if XNU_MONITOR
+
+static inline void
+pa_set_range_monitor(pmap_paddr_t start_pa, pmap_paddr_t end_pa)
+{
+ pmap_paddr_t cur_pa;
+ for (cur_pa = start_pa; cur_pa < end_pa; cur_pa += ARM_PGBYTES) {
+ assert(pa_valid(cur_pa));
+ pa_set_monitor(cur_pa);
+ }
+}
+
+static void
+pa_set_range_xprr_perm(pmap_paddr_t start_pa,
+ pmap_paddr_t end_pa,
+ unsigned int expected_perm,
+ unsigned int new_perm)
+{
+ vm_offset_t start_va = phystokv(start_pa);
+ vm_offset_t end_va = start_va + (end_pa - start_pa);
+
+ pa_set_range_monitor(start_pa, end_pa);
+ pmap_set_range_xprr_perm(start_va, end_va, expected_perm, new_perm);
+}
+
+static void
+pmap_lockdown_kc(void)
+{
+ extern vm_offset_t vm_kernelcache_base;
+ extern vm_offset_t vm_kernelcache_top;
+ pmap_paddr_t start_pa = kvtophys(vm_kernelcache_base);
+ pmap_paddr_t end_pa = start_pa + (vm_kernelcache_top - vm_kernelcache_base);
+ pmap_paddr_t cur_pa = start_pa;
+ vm_offset_t cur_va = vm_kernelcache_base;
+ while (cur_pa < end_pa) {
+ vm_size_t range_size = end_pa - cur_pa;
+ vm_offset_t ptov_va = phystokv_range(cur_pa, &range_size);
+ if (ptov_va != cur_va) {
+ /*
+ * If the physical address maps back to a virtual address that is non-linear
+ * w.r.t. the kernelcache, that means it corresponds to memory that will be
+ * reclaimed by the OS and should therefore not be locked down.
+ */
+ cur_pa += range_size;
+ cur_va += range_size;
+ continue;
+ }
+ unsigned int pai = (unsigned int)pa_index(cur_pa);
+ pv_entry_t **pv_h = pai_to_pvh(pai);
+
+ vm_offset_t pvh_flags = pvh_get_flags(pv_h);
+
+ if (__improbable(pvh_flags & PVH_FLAG_LOCKDOWN)) {
+ panic("pai %d already locked down", pai);
+ }
+ pvh_set_flags(pv_h, pvh_flags | PVH_FLAG_LOCKDOWN);
+ cur_pa += ARM_PGBYTES;
+ cur_va += ARM_PGBYTES;
+ }
+#if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
+ extern uint64_t ctrr_ro_test;
+ extern uint64_t ctrr_nx_test;
+ pmap_paddr_t exclude_pages[] = {kvtophys((vm_offset_t)&ctrr_ro_test), kvtophys((vm_offset_t)&ctrr_nx_test)};
+ for (unsigned i = 0; i < (sizeof(exclude_pages) / sizeof(exclude_pages[0])); ++i) {
+ pv_entry_t **pv_h = pai_to_pvh(pa_index(exclude_pages[i]));
+ pvh_set_flags(pv_h, pvh_get_flags(pv_h) & ~PVH_FLAG_LOCKDOWN);
+ }
+#endif
+}
+
+void
+pmap_static_allocations_done(void)
+{
+ pmap_paddr_t monitor_start_pa;
+ pmap_paddr_t monitor_end_pa;
+
+ /*
+ * Protect the bootstrap (V=P and V->P) page tables.
+ *
+ * These bootstrap allocations will be used primarily for page tables.
+ * If we wish to secure the page tables, we need to start by marking
+ * these bootstrap allocations as pages that we want to protect.
+ */
+ monitor_start_pa = kvtophys((vm_offset_t)&bootstrap_pagetables);
+ monitor_end_pa = monitor_start_pa + BOOTSTRAP_TABLE_SIZE;
+
+ /* The bootstrap page tables are mapped RW at boostrap. */
+ pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_KERN_RO_PERM);
+
+ /*
+ * We use avail_start as a pointer to the first address that has not
+ * been reserved for bootstrap, so we know which pages to give to the
+ * virtual memory layer.
+ */
+ monitor_start_pa = BootArgs->topOfKernelData;
+ monitor_end_pa = avail_start;
+
+ /* The other bootstrap allocations are mapped RW at bootstrap. */
+ pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
+
+ /*
+ * The RO page tables are mapped RW in arm_vm_init() and later restricted
+ * to RO in arm_vm_prot_finalize(), which is called after this function.
+ * Here we only need to mark the underlying physical pages as PPL-owned to ensure
+ * they can't be allocated for other uses. We don't need a special xPRR
+ * protection index, as there is no PPL_RO index, and these pages are ultimately
+ * protected by KTRR/CTRR. Furthermore, use of PPL_RW for these pages would
+ * expose us to a functional issue on H11 devices where CTRR shifts the APRR
+ * lookup table index to USER_XO before APRR is applied, leading the hardware
+ * to believe we are dealing with an user XO page upon performing a translation.
+ */
+ monitor_start_pa = kvtophys((vm_offset_t)&ropagetable_begin);
+ monitor_end_pa = monitor_start_pa + ((vm_offset_t)&ropagetable_end - (vm_offset_t)&ropagetable_begin);
+ pa_set_range_monitor(monitor_start_pa, monitor_end_pa);
+
+ monitor_start_pa = kvtophys(segPPLDATAB);
+ monitor_end_pa = monitor_start_pa + segSizePPLDATA;
+
+ /* PPL data is RW for the PPL, RO for the kernel. */
+ pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
+
+ monitor_start_pa = kvtophys(segPPLTEXTB);
+ monitor_end_pa = monitor_start_pa + segSizePPLTEXT;
+
+ /* PPL text is RX for the PPL, RO for the kernel. */
+ pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RX_PERM, XPRR_PPL_RX_PERM);
+
+
+ /*
+ * In order to support DTrace, the save areas for the PPL must be
+ * writable. This is due to the fact that DTrace will try to update
+ * register state.
+ */
+ if (pmap_ppl_disable) {
+ vm_offset_t monitor_start_va = phystokv(ppl_cpu_save_area_start);
+ vm_offset_t monitor_end_va = monitor_start_va + (ppl_cpu_save_area_end - ppl_cpu_save_area_start);
+
+ pmap_set_range_xprr_perm(monitor_start_va, monitor_end_va, XPRR_PPL_RW_PERM, XPRR_KERN_RW_PERM);
+ }
+
+
+ if (segSizePPLDATACONST > 0) {
+ monitor_start_pa = kvtophys(segPPLDATACONSTB);
+ monitor_end_pa = monitor_start_pa + segSizePPLDATACONST;
+
+ pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RO_PERM, XPRR_KERN_RO_PERM);
+ }
+
+ /*
+ * Mark the original physical aperture mapping for the PPL stack pages RO as an additional security
+ * precaution. The real RW mappings are at a different location with guard pages.
+ */
+ pa_set_range_xprr_perm(pmap_stacks_start_pa, pmap_stacks_end_pa, XPRR_PPL_RW_PERM, XPRR_KERN_RO_PERM);
+
+ /* Prevent remapping of the kernelcache */
+ pmap_lockdown_kc();
+}
+
+
+void
+pmap_lockdown_ppl(void)
+{
+ /* Mark the PPL as being locked down. */
+
+#error "XPRR configuration error"
+
+}
+#endif /* XNU_MONITOR */
void
pmap_virtual_space(
)
{
boolean_t ret = FALSE;
-#if __ARM64_PMAP_SUBPAGE_L1__ && __ARM_16K_PG__
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
if (region_select == 0) {
/*
* In this config, the bootstrap mappings should occupy their own L2
#else
#error Unsupported configuration
#endif
+#if defined(ARM_LARGE_MEMORY)
+ *size = ((KERNEL_PMAP_HEAP_RANGE_START - *startp) & ~PAGE_MASK);
+#else
*size = ((VM_MAX_KERNEL_ADDRESS - *startp) & ~PAGE_MASK);
+#endif
ret = TRUE;
}
#else
*startp = gVirtBase & 0xFFFFFFFFFE000000;
*size = ((virtual_space_start - (gVirtBase & 0xFFFFFFFFFE000000)) + ~0xFFFFFFFFFE000000) & 0xFFFFFFFFFE000000;
} else {
- *startp = gVirtBase & 0xFFFFFFFFFF800000;
- *size = ((virtual_space_start - (gVirtBase & 0xFFFFFFFFFF800000)) + ~0xFFFFFFFFFF800000) & 0xFFFFFFFFFF800000;
+ *startp = gVirtBase & 0xFFFFFFFFFF800000;
+ *size = ((virtual_space_start - (gVirtBase & 0xFFFFFFFFFF800000)) + ~0xFFFFFFFFFF800000) & 0xFFFFFFFFFF800000;
+ }
+#endif
+ ret = TRUE;
+ }
+ if (region_select == 1) {
+ *startp = VREGION1_START;
+ *size = VREGION1_SIZE;
+ ret = TRUE;
+ }
+#if (__ARM_VMSA__ > 7)
+ /* We need to reserve a range that is at least the size of an L2 block mapping for the low globals */
+ if (!TEST_PAGE_SIZE_4K) {
+ low_global_vr_mask = 0xFFFFFFFFFE000000;
+ low_global_vr_size = 0x2000000;
+ } else {
+ low_global_vr_mask = 0xFFFFFFFFFF800000;
+ low_global_vr_size = 0x800000;
+ }
+
+ if (((gVirtBase & low_global_vr_mask) != LOW_GLOBAL_BASE_ADDRESS) && (region_select == 2)) {
+ *startp = LOW_GLOBAL_BASE_ADDRESS;
+ *size = low_global_vr_size;
+ ret = TRUE;
+ }
+
+ if (region_select == 3) {
+ /* In this config, we allow the bootstrap mappings to occupy the same
+ * page table pages as the heap.
+ */
+ *startp = VM_MIN_KERNEL_ADDRESS;
+ *size = LOW_GLOBAL_BASE_ADDRESS - *startp;
+ ret = TRUE;
+ }
+#endif
+#endif
+ return ret;
+}
+
+/*
+ * Routines to track and allocate physical pages during early boot.
+ * On most systems that memory runs from first_avail through to avail_end
+ * with no gaps.
+ *
+ * However if the system supports ECC and bad_ram_pages_count > 0, we
+ * need to be careful and skip those pages.
+ */
+static unsigned int avail_page_count = 0;
+static bool need_ram_ranges_init = true;
+
+#if defined(__arm64__)
+pmap_paddr_t *bad_ram_pages = NULL;
+unsigned int bad_ram_pages_count = 0;
+
+/*
+ * We use this sub-range of bad_ram_pages for pmap_next_page()
+ */
+static pmap_paddr_t *skip_pages;
+static unsigned int skip_pages_count = 0;
+
+#define MAX_BAD_RAM_PAGE_COUNT 64
+static pmap_paddr_t bad_ram_pages_arr[MAX_BAD_RAM_PAGE_COUNT];
+
+/*
+ * XXX - temporary code to get the bad pages array from boot-args.
+ * expects a comma separated list of offsets from the start
+ * of physical memory to be considered bad.
+ *
+ * HERE JOE -- will eventually be replaced by data provided by iboot
+ */
+static void
+parse_bad_ram_pages_boot_arg(void)
+{
+ char buf[256] = {0};
+ char *s = buf;
+ char *end;
+ int count = 0;
+ pmap_paddr_t num;
+ extern uint64_t strtouq(const char *, char **, int);
+
+ if (!PE_parse_boot_arg_str("bad_ram_pages", buf, sizeof(buf))) {
+ goto done;
+ }
+
+ while (*s && count < MAX_BAD_RAM_PAGE_COUNT) {
+ num = (pmap_paddr_t)strtouq(s, &end, 0);
+ if (num == 0) {
+ break;
+ }
+ num &= ~PAGE_MASK;
+
+ bad_ram_pages_arr[count++] = gDramBase + num;
+
+ if (*end != ',') {
+ break;
+ }
+
+ s = end + 1;
+ }
+
+done:
+ bad_ram_pages = bad_ram_pages_arr;
+ bad_ram_pages_count = count;
+}
+
+/*
+ * Comparison routine for qsort of array of physical addresses.
+ */
+static int
+pmap_paddr_cmp(void *a, void *b)
+{
+ pmap_paddr_t *x = a;
+ pmap_paddr_t *y = b;
+ if (*x < *y) {
+ return -1;
+ }
+ return *x > *y;
+}
+#endif /* defined(__arm64__) */
+
+/*
+ * Look up ppn in the sorted bad_ram_pages array.
+ */
+bool
+pmap_is_bad_ram(__unused ppnum_t ppn)
+{
+#if defined(__arm64__)
+ pmap_paddr_t pa = ptoa(ppn);
+ int low = 0;
+ int high = bad_ram_pages_count - 1;
+ int mid;
+
+ while (low <= high) {
+ mid = (low + high) / 2;
+ if (bad_ram_pages[mid] < pa) {
+ low = mid + 1;
+ } else if (bad_ram_pages[mid] > pa) {
+ high = mid - 1;
+ } else {
+ return true;
}
-#endif
- ret = TRUE;
- }
- if (region_select == 1) {
- *startp = VREGION1_START;
- *size = VREGION1_SIZE;
- ret = TRUE;
- }
-#if (__ARM_VMSA__ > 7)
- /* We need to reserve a range that is at least the size of an L2 block mapping for the low globals */
- if (!TEST_PAGE_SIZE_4K) {
- low_global_vr_mask = 0xFFFFFFFFFE000000;
- low_global_vr_size = 0x2000000;
- } else {
- low_global_vr_mask = 0xFFFFFFFFFF800000;
- low_global_vr_size = 0x800000;
}
+#endif /* defined(__arm64__) */
+ return false;
+}
- if (((gVirtBase & low_global_vr_mask) != LOW_GLOBAL_BASE_ADDRESS) && (region_select == 2)) {
- *startp = LOW_GLOBAL_BASE_ADDRESS;
- *size = low_global_vr_size;
- ret = TRUE;
- }
+/*
+ * Initialize the count of available pages. If we have bad_ram_pages, then sort the list of them.
+ * No lock needed here, as this code is called while kernel boot up is single threaded.
+ */
+static void
+initialize_ram_ranges(void)
+{
+ pmap_paddr_t first = first_avail;
+ pmap_paddr_t end = avail_end;
- if (region_select == 3) {
- /* In this config, we allow the bootstrap mappings to occupy the same
- * page table pages as the heap.
- */
- *startp = VM_MIN_KERNEL_ADDRESS;
- *size = LOW_GLOBAL_BASE_ADDRESS - *startp;
- ret = TRUE;
+ assert(first <= end);
+ assert(first == (first & ~PAGE_MASK));
+ assert(end == (end & ~PAGE_MASK));
+ avail_page_count = atop(end - first);
+
+#if defined(__arm64__)
+ /*
+ * XXX Temporary code for testing, until there is iboot support
+ *
+ * Parse a list of known bad pages from a boot-args.
+ */
+ parse_bad_ram_pages_boot_arg();
+
+ /*
+ * Sort and filter the bad pages list and adjust avail_page_count.
+ */
+ if (bad_ram_pages_count != 0) {
+ qsort(bad_ram_pages, bad_ram_pages_count, sizeof(*bad_ram_pages), (cmpfunc_t)pmap_paddr_cmp);
+ skip_pages = bad_ram_pages;
+ skip_pages_count = bad_ram_pages_count;
+
+ /* ignore any pages before first */
+ while (skip_pages_count > 0 && skip_pages[0] < first) {
+ --skip_pages_count;
+ ++skip_pages;
+ }
+
+ /* ignore any pages at or after end */
+ while (skip_pages_count > 0 && skip_pages[skip_pages_count - 1] >= end) {
+ --skip_pages_count;
+ }
+
+ avail_page_count -= skip_pages_count;
}
-#endif
-#endif
- return ret;
+#endif /* defined(__arm64__) */
+ need_ram_ranges_init = false;
}
unsigned int
pmap_free_pages(
void)
{
+ if (need_ram_ranges_init) {
+ initialize_ram_ranges();
+ }
+ return avail_page_count;
+}
+
+unsigned int
+pmap_free_pages_span(
+ void)
+{
+ if (need_ram_ranges_init) {
+ initialize_ram_ranges();
+ }
return (unsigned int)atop(avail_end - first_avail);
}
pmap_next_page(
ppnum_t *pnum)
{
+ if (need_ram_ranges_init) {
+ initialize_ram_ranges();
+ }
+
+#if defined(__arm64__)
+ /*
+ * Skip over any known bad pages.
+ */
+ while (skip_pages_count > 0 && first_avail == skip_pages[0]) {
+ first_avail += PAGE_SIZE;
+ ++skip_pages;
+ --skip_pages_count;
+ }
+#endif /* defined(__arm64__) */
+
if (first_avail != avail_end) {
*pnum = (ppnum_t)atop(first_avail);
first_avail += PAGE_SIZE;
+ assert(avail_page_count > 0);
+ --avail_page_count;
return TRUE;
}
+ assert(avail_page_count == 0);
return FALSE;
}
+void
+pmap_retire_page(
+ __unused ppnum_t pnum)
+{
+ /* XXX Justin TBD - mark the page as unusable in pmap data structures */
+}
+
/*
* Initialize the pmap module.
pmap_initialized = TRUE;
- pmap_zone_init();
+ /*
+ * Create the zone of physical maps
+ * and the physical-to-virtual entries.
+ */
+ pmap_zone = zone_create_ext("pmap", sizeof(struct pmap),
+ ZC_ZFREE_CLEARMEM, ZONE_ID_PMAP, NULL);
/*
_vm_object_allocate(mem_size, pmap_object);
pmap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
- pv_init();
-
/*
* The values of [hard_]maxproc may have been scaled, make sure
- * they are still less than the value of MAX_ASID.
+ * they are still less than the value of pmap_max_asids.
*/
- if (maxproc > MAX_ASID) {
- maxproc = MAX_ASID;
+ if ((uint32_t)maxproc > pmap_max_asids) {
+ maxproc = pmap_max_asids;
}
- if (hard_maxproc > MAX_ASID) {
- hard_maxproc = MAX_ASID;
+ if ((uint32_t)hard_maxproc > pmap_max_asids) {
+ hard_maxproc = pmap_max_asids;
}
#if CONFIG_PGTRACE
#endif
-/*
- * Initialize zones used by pmap.
- */
+#if XNU_MONITOR
+MARK_AS_PMAP_TEXT static void
+pmap_ledger_alloc_init_internal(size_t size)
+{
+ pmap_simple_lock(&pmap_ledger_lock);
+
+ if (pmap_ledger_alloc_initialized) {
+ panic("%s: already initialized, "
+ "size=%lu",
+ __func__,
+ size);
+ }
+
+ if ((size > sizeof(pmap_ledger_data_t)) ||
+ ((sizeof(pmap_ledger_data_t) - size) % sizeof(struct ledger_entry))) {
+ panic("%s: size mismatch, expected %lu, "
+ "size=%lu",
+ __func__, PMAP_LEDGER_DATA_BYTES,
+ size);
+ }
+
+ pmap_ledger_alloc_initialized = true;
+
+ pmap_simple_unlock(&pmap_ledger_lock);
+}
+
+MARK_AS_PMAP_TEXT static ledger_t
+pmap_ledger_alloc_internal(void)
+{
+ pmap_paddr_t paddr;
+ uint64_t vaddr, vstart, vend;
+ uint64_t index;
+
+ ledger_t new_ledger;
+ uint64_t array_index;
+
+ pmap_simple_lock(&pmap_ledger_lock);
+ if (pmap_ledger_free_list == NULL) {
+ paddr = pmap_get_free_ppl_page();
+
+ if (paddr == 0) {
+ pmap_simple_unlock(&pmap_ledger_lock);
+ return NULL;
+ }
+
+ vstart = phystokv(paddr);
+ vend = vstart + PAGE_SIZE;
+
+ for (vaddr = vstart; (vaddr < vend) && ((vaddr + sizeof(pmap_ledger_t)) <= vend); vaddr += sizeof(pmap_ledger_t)) {
+ pmap_ledger_t *free_ledger;
+
+ index = pmap_ledger_ptr_array_free_index++;
+
+ if (index >= MAX_PMAP_LEDGERS) {
+ panic("%s: pmap_ledger_ptr_array is full, index=%llu",
+ __func__, index);
+ }
+
+ free_ledger = (pmap_ledger_t*)vaddr;
+
+ pmap_ledger_ptr_array[index] = free_ledger;
+ free_ledger->back_ptr = &pmap_ledger_ptr_array[index];
+
+ free_ledger->next = pmap_ledger_free_list;
+ pmap_ledger_free_list = free_ledger;
+ }
+
+ pa_set_range_xprr_perm(paddr, paddr + PAGE_SIZE, XPRR_PPL_RW_PERM, XPRR_KERN_RW_PERM);
+ }
+
+ new_ledger = (ledger_t)pmap_ledger_free_list;
+ pmap_ledger_free_list = pmap_ledger_free_list->next;
+
+ array_index = pmap_ledger_validate(new_ledger);
+ os_ref_init(&pmap_ledger_refcnt[array_index], NULL);
+
+ pmap_simple_unlock(&pmap_ledger_lock);
+
+ return new_ledger;
+}
+
+MARK_AS_PMAP_TEXT static void
+pmap_ledger_free_internal(ledger_t ledger)
+{
+ pmap_ledger_t* free_ledger;
+
+ free_ledger = (pmap_ledger_t*)ledger;
+
+ pmap_simple_lock(&pmap_ledger_lock);
+ uint64_t array_index = pmap_ledger_validate(ledger);
+
+ if (os_ref_release(&pmap_ledger_refcnt[array_index]) != 0) {
+ panic("%s: ledger still referenced, "
+ "ledger=%p",
+ __func__,
+ ledger);
+ }
+
+ free_ledger->next = pmap_ledger_free_list;
+ pmap_ledger_free_list = free_ledger;
+ pmap_simple_unlock(&pmap_ledger_lock);
+}
+
+
static void
-pmap_zone_init(
- void)
+pmap_ledger_retain(ledger_t ledger)
{
- /*
- * Create the zone of physical maps
- * and the physical-to-virtual entries.
- */
- pmap_zone = zinit((vm_size_t) sizeof(struct pmap), (vm_size_t) sizeof(struct pmap) * 256,
- PAGE_SIZE, "pmap");
+ pmap_simple_lock(&pmap_ledger_lock);
+ uint64_t array_index = pmap_ledger_validate(ledger);
+ os_ref_retain(&pmap_ledger_refcnt[array_index]);
+ pmap_simple_unlock(&pmap_ledger_lock);
+}
+
+static void
+pmap_ledger_release(ledger_t ledger)
+{
+ pmap_simple_lock(&pmap_ledger_lock);
+ uint64_t array_index = pmap_ledger_validate(ledger);
+ os_ref_release_live(&pmap_ledger_refcnt[array_index]);
+ pmap_simple_unlock(&pmap_ledger_lock);
+}
+
+void
+pmap_ledger_alloc_init(size_t size)
+{
+ pmap_ledger_alloc_init_ppl(size);
+}
+
+ledger_t
+pmap_ledger_alloc(void)
+{
+ ledger_t retval = NULL;
+
+ while ((retval = pmap_ledger_alloc_ppl()) == NULL) {
+ pmap_alloc_page_for_ppl(0);
+ }
+
+ return retval;
}
+void
+pmap_ledger_free(ledger_t ledger)
+{
+ pmap_ledger_free_ppl(ledger);
+}
+#else /* XNU_MONITOR */
__dead2
void
pmap_ledger_alloc_init(size_t size)
"ledger=%p",
__func__, ledger);
}
+#endif /* XNU_MONITOR */
+
+static vm_size_t
+pmap_root_alloc_size(pmap_t pmap)
+{
+#if (__ARM_VMSA__ > 7)
+#pragma unused(pmap)
+ const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
+ unsigned int root_level = pt_attr_root_level(pt_attr);
+ return ((pt_attr_ln_index_mask(pt_attr, root_level) >> pt_attr_ln_shift(pt_attr, root_level)) + 1) * sizeof(tt_entry_t);
+#else
+ (void)pmap;
+ return PMAP_ROOT_ALLOC_SIZE;
+#endif
+}
/*
* Create and return a physical map.
pmap_create_options_internal(
ledger_t ledger,
vm_map_size_t size,
- unsigned int flags)
+ unsigned int flags,
+ kern_return_t *kr)
{
unsigned i;
unsigned tte_index_max;
#if defined(HAS_APPLE_PAC)
bool disable_jop = flags & PMAP_CREATE_DISABLE_JOP;
#endif /* defined(HAS_APPLE_PAC) */
+ kern_return_t local_kr = KERN_SUCCESS;
/*
* A software use-only map doesn't even need a pmap.
return PMAP_NULL;
}
+ if (0 != (flags & ~PMAP_CREATE_KNOWN_FLAGS)) {
+ return PMAP_NULL;
+ }
+
+#if XNU_MONITOR
+ if ((p = pmap_alloc_pmap()) == PMAP_NULL) {
+ local_kr = KERN_NO_SPACE;
+ goto pmap_create_fail;
+ }
+
+ if (ledger) {
+ pmap_ledger_validate(ledger);
+ pmap_ledger_retain(ledger);
+ }
+#else
/*
* Allocate a pmap struct from the pmap_zone. Then allocate
* the translation table of the right size for the pmap.
*/
if ((p = (pmap_t) zalloc(pmap_zone)) == PMAP_NULL) {
- return PMAP_NULL;
+ local_kr = KERN_RESOURCE_SHORTAGE;
+ goto pmap_create_fail;
}
+#endif
+
+ p->ledger = ledger;
+
+
+ p->pmap_vm_map_cs_enforced = false;
if (flags & PMAP_CREATE_64BIT) {
p->min = MACH_VM_MIN_ADDRESS;
p->min = VM_MIN_ADDRESS;
p->max = VM_MAX_ADDRESS;
}
-
#if defined(HAS_APPLE_PAC)
p->disable_jop = disable_jop;
#endif /* defined(HAS_APPLE_PAC) */
p->nested_pmap = PMAP_NULL;
#if ARM_PARAMETERIZED_PMAP
+ /* Default to the native pt_attr */
p->pmap_pt_attr = native_pt_attr;
#endif /* ARM_PARAMETERIZED_PMAP */
+#if __ARM_MIXED_PAGE_SIZE__
+ if (flags & PMAP_CREATE_FORCE_4K_PAGES) {
+ p->pmap_pt_attr = &pmap_pt_attr_4k;
+ }
+#endif /* __ARM_MIXED_PAGE_SIZE__ */
if (!pmap_get_pt_ops(p)->alloc_id(p)) {
+ local_kr = KERN_NO_SPACE;
goto id_alloc_fail;
}
-
-
- p->ledger = ledger;
-
- PMAP_LOCK_INIT(p);
+ pmap_lock_init(p);
memset((void *) &p->stats, 0, sizeof(p->stats));
p->tt_entry_free = (tt_entry_t *)0;
- tte_index_max = PMAP_ROOT_ALLOC_SIZE / sizeof(tt_entry_t);
+ tte_index_max = ((unsigned)pmap_root_alloc_size(p) / sizeof(tt_entry_t));
#if (__ARM_VMSA__ == 7)
p->tte_index_max = tte_index_max;
#endif
- p->tte = pmap_tt1_allocate(p, PMAP_ROOT_ALLOC_SIZE, 0);
+#if XNU_MONITOR
+ p->tte = pmap_tt1_allocate(p, pmap_root_alloc_size(p), PMAP_TT_ALLOCATE_NOWAIT);
+#else
+ p->tte = pmap_tt1_allocate(p, pmap_root_alloc_size(p), 0);
+#endif
if (!(p->tte)) {
+ local_kr = KERN_RESOURCE_SHORTAGE;
goto tt1_alloc_fail;
}
p->ttep = ml_static_vtop((vm_offset_t)p->tte);
- PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(p), VM_KERNEL_ADDRHIDE(p->min), VM_KERNEL_ADDRHIDE(p->max), p->ttep);
+ PMAP_TRACE(4, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(p), VM_KERNEL_ADDRHIDE(p->min), VM_KERNEL_ADDRHIDE(p->max), p->ttep);
/* nullify the translation table */
for (i = 0; i < tte_index_max; i++) {
/*
* initialize the rest of the structure
*/
- p->nested_region_grand_addr = 0x0ULL;
- p->nested_region_subord_addr = 0x0ULL;
+ p->nested_region_addr = 0x0ULL;
p->nested_region_size = 0x0ULL;
p->nested_region_asid_bitmap = NULL;
p->nested_region_asid_bitmap_size = 0x0UL;
tt1_alloc_fail:
pmap_get_pt_ops(p)->free_id(p);
id_alloc_fail:
+#if XNU_MONITOR
+ pmap_free_pmap(p);
+
+ if (ledger) {
+ pmap_ledger_release(ledger);
+ }
+#else
zfree(pmap_zone, p);
+#endif
+pmap_create_fail:
+#if XNU_MONITOR
+ pmap_pin_kernel_pages((vm_offset_t)kr, sizeof(*kr));
+#endif
+ *kr = local_kr;
+#if XNU_MONITOR
+ pmap_unpin_kernel_pages((vm_offset_t)kr, sizeof(*kr));
+#endif
return PMAP_NULL;
}
unsigned int flags)
{
pmap_t pmap;
+ kern_return_t kr = KERN_SUCCESS;
PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, size, flags);
ledger_reference(ledger);
- pmap = pmap_create_options_internal(ledger, size, flags);
+#if XNU_MONITOR
+ for (;;) {
+ pmap = pmap_create_options_ppl(ledger, size, flags, &kr);
+ if (kr != KERN_RESOURCE_SHORTAGE) {
+ break;
+ }
+ assert(pmap == PMAP_NULL);
+ pmap_alloc_page_for_ppl(0);
+ kr = KERN_SUCCESS;
+ }
+#else
+ pmap = pmap_create_options_internal(ledger, size, flags, &kr);
+#endif
if (pmap == PMAP_NULL) {
ledger_dereference(ledger);
return pmap;
}
-#if MACH_ASSERT
+#if XNU_MONITOR
+/*
+ * This symbol remains in place when the PPL is enabled so that the dispatch
+ * table does not change from development to release configurations.
+ */
+#endif
+#if MACH_ASSERT || XNU_MONITOR
MARK_AS_PMAP_TEXT static void
pmap_set_process_internal(
__unused pmap_t pmap,
}
#endif /* MACH_ASSERT */
}
-#endif /* MACH_ASSERT*/
+#endif /* MACH_ASSERT || XNU_MONITOR */
#if MACH_ASSERT
void
int pid,
char *procname)
{
+#if XNU_MONITOR
+ pmap_set_process_ppl(pmap, pid, procname);
+#else
pmap_set_process_internal(pmap, pid, procname);
+#endif
}
#endif /* MACH_ASSERT */
+#if (__ARM_VMSA__ > 7)
+/*
+ * pmap_deallocate_all_leaf_tts:
+ *
+ * Recursive function for deallocating all leaf TTEs. Walks the given TT,
+ * removing and deallocating all TTEs.
+ */
+MARK_AS_PMAP_TEXT static void
+pmap_deallocate_all_leaf_tts(pmap_t pmap, tt_entry_t * first_ttep, unsigned level)
+{
+ tt_entry_t tte = ARM_TTE_EMPTY;
+ tt_entry_t * ttep = NULL;
+ tt_entry_t * last_ttep = NULL;
+
+ const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
+
+ assert(level < pt_attr_leaf_level(pt_attr));
+
+ last_ttep = &first_ttep[ttn_index(pmap, pt_attr, ~0, level)];
+
+ for (ttep = first_ttep; ttep <= last_ttep; ttep++) {
+ tte = *ttep;
+
+ if (!(tte & ARM_TTE_VALID)) {
+ continue;
+ }
+
+ if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
+ panic("%s: found block mapping, ttep=%p, tte=%p, "
+ "pmap=%p, first_ttep=%p, level=%u",
+ __FUNCTION__, ttep, (void *)tte,
+ pmap, first_ttep, level);
+ }
+
+ /* Must be valid, type table */
+ if (level < pt_attr_twig_level(pt_attr)) {
+ /* If we haven't reached the twig level, recurse to the next level. */
+ pmap_deallocate_all_leaf_tts(pmap, (tt_entry_t *)phystokv((tte) & ARM_TTE_TABLE_MASK), level + 1);
+ }
+
+ /* Remove the TTE. */
+ pmap_lock(pmap);
+ pmap_tte_deallocate(pmap, 0, 0, false, ttep, level);
+ pmap_unlock(pmap);
+ }
+}
+#endif /* (__ARM_VMSA__ > 7) */
+
/*
* We maintain stats and ledgers so that a task's physical footprint is:
* phys_footprint = ((internal - alternate_accounting)
* where "alternate_accounting" includes "iokit" and "purgeable" memory.
*/
-
/*
* Retire the given physical map from service.
* Should only be called if the map contains
panic("pmap %p: attempt to destroy kernel pmap", pmap);
}
- pt_entry_t *ttep;
-
#if (__ARM_VMSA__ > 7)
pmap_unmap_sharedpage(pmap);
#endif /* (__ARM_VMSA__ > 7) */
*/
#if (__ARM_VMSA__ == 7)
unsigned int i = 0;
+ pt_entry_t *ttep;
- PMAP_LOCK(pmap);
+ pmap_lock(pmap);
for (i = 0; i < pmap->tte_index_max; i++) {
ttep = &pmap->tte[i];
if ((*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
- pmap_tte_deallocate(pmap, ttep, PMAP_TT_L1_LEVEL);
+ pmap_tte_deallocate(pmap, 0, 0, false, ttep, PMAP_TT_L1_LEVEL);
}
}
- PMAP_UNLOCK(pmap);
+ pmap_unlock(pmap);
#else /* (__ARM_VMSA__ == 7) */
- vm_map_address_t c;
- unsigned int level;
-
- for (level = pt_attr->pta_max_level - 1; level >= pt_attr->pta_root_level; level--) {
- for (c = pmap->min; c < pmap->max; c += pt_attr_ln_size(pt_attr, level)) {
- ttep = pmap_ttne(pmap, level, c);
-
- if ((ttep != PT_ENTRY_NULL) && (*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
- PMAP_LOCK(pmap);
- pmap_tte_deallocate(pmap, ttep, level);
- PMAP_UNLOCK(pmap);
- }
- }
- }
+ pmap_deallocate_all_leaf_tts(pmap, pmap->tte, pt_attr_root_level(pt_attr));
#endif /* (__ARM_VMSA__ == 7) */
pmap_tt1_deallocate(pmap, pmap->tte, pmap->tte_index_max * sizeof(tt_entry_t), 0);
pmap->tte_index_max = 0;
#else /* (__ARM_VMSA__ == 7) */
- pmap_tt1_deallocate(pmap, pmap->tte, PMAP_ROOT_ALLOC_SIZE, 0);
+ pmap_tt1_deallocate(pmap, pmap->tte, pmap_root_alloc_size(pmap), 0);
#endif /* (__ARM_VMSA__ == 7) */
pmap->tte = (tt_entry_t *) NULL;
pmap->ttep = 0;
assert((tt_free_entry_t*)pmap->tt_entry_free == NULL);
- pmap_get_pt_ops(pmap)->flush_tlb_async(pmap);
- sync_tlb_flush();
+ if (__improbable(pmap->nested)) {
+ pmap_get_pt_ops(pmap)->flush_tlb_region_async(pmap->nested_region_addr, pmap->nested_region_size, pmap);
+ sync_tlb_flush();
+ } else {
+ pmap_get_pt_ops(pmap)->flush_tlb_async(pmap);
+ sync_tlb_flush();
+ /* return its asid to the pool */
+ pmap_get_pt_ops(pmap)->free_id(pmap);
+ /* release the reference we hold on the nested pmap */
+ pmap_destroy_internal(pmap->nested_pmap);
+ }
- /* return its asid to the pool */
- pmap_get_pt_ops(pmap)->free_id(pmap);
pmap_check_ledgers(pmap);
if (pmap->nested_region_asid_bitmap) {
- kfree(pmap->nested_region_asid_bitmap, pmap->nested_region_asid_bitmap_size * sizeof(unsigned int));
+#if XNU_MONITOR
+ pmap_pages_free(kvtophys((vm_offset_t)(pmap->nested_region_asid_bitmap)), PAGE_SIZE);
+#else
+ kheap_free(KHEAP_DATA_BUFFERS, pmap->nested_region_asid_bitmap,
+ pmap->nested_region_asid_bitmap_size * sizeof(unsigned int));
+#endif
}
+#if XNU_MONITOR
+ if (pmap->ledger) {
+ pmap_ledger_release(pmap->ledger);
+ }
+
+ pmap_lock_destroy(pmap);
+ pmap_free_pmap(pmap);
+#else
+ pmap_lock_destroy(pmap);
zfree(pmap_zone, pmap);
+#endif
}
void
ledger = pmap->ledger;
+#if XNU_MONITOR
+ pmap_destroy_ppl(pmap);
+
+ pmap_check_ledger_fields(ledger);
+#else
pmap_destroy_internal(pmap);
+#endif
ledger_dereference(ledger);
pmap_reference(
pmap_t pmap)
{
+#if XNU_MONITOR
+ pmap_reference_ppl(pmap);
+#else
pmap_reference_internal(pmap);
+#endif
}
static tt_entry_t *
vm_address_t va_end;
kern_return_t ret;
+ if ((size < PAGE_SIZE) && (size != PMAP_ROOT_ALLOC_SIZE)) {
+ size = PAGE_SIZE;
+ }
+
pmap_simple_lock(&tt1_lock);
if ((size == PAGE_SIZE) && (free_page_size_tt_count != 0)) {
free_page_size_tt_count--;
return (tt_entry_t *)tt1;
}
- ret = pmap_pages_alloc(&pa, (unsigned)((size < PAGE_SIZE)? PAGE_SIZE : size), ((option & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0));
+ ret = pmap_pages_alloc_zeroed(&pa, (unsigned)((size < PAGE_SIZE)? PAGE_SIZE : size), ((option & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0));
if (ret == KERN_RESOURCE_SHORTAGE) {
return (tt_entry_t *)0;
}
+#if XNU_MONITOR
+ assert(pa);
+#endif
if (size < PAGE_SIZE) {
va = phystokv(pa) + size;
{
tt_free_entry_t *tt_entry;
+ if ((size < PAGE_SIZE) && (size != PMAP_ROOT_ALLOC_SIZE)) {
+ size = PAGE_SIZE;
+ }
+
tt_entry = (tt_free_entry_t *)tt;
assert(not_in_kdp);
pmap_simple_lock(&tt1_lock);
pmap_paddr_t pa;
*ttp = NULL;
- PMAP_LOCK(pmap);
+ pmap_lock(pmap);
if ((tt_free_entry_t *)pmap->tt_entry_free != NULL) {
- tt_free_entry_t *tt_free_next;
+ tt_free_entry_t *tt_free_cur, *tt_free_next;
- tt_free_next = ((tt_free_entry_t *)pmap->tt_entry_free)->next;
- *ttp = (tt_entry_t *)pmap->tt_entry_free;
+ tt_free_cur = ((tt_free_entry_t *)pmap->tt_entry_free);
+ tt_free_next = tt_free_cur->next;
+ tt_free_cur->next = NULL;
+ *ttp = (tt_entry_t *)tt_free_cur;
pmap->tt_entry_free = (tt_entry_t *)tt_free_next;
}
- PMAP_UNLOCK(pmap);
+ pmap_unlock(pmap);
if (*ttp == NULL) {
pt_desc_t *ptdp;
/*
* Allocate a VM page for the level x page table entries.
*/
- while (pmap_pages_alloc(&pa, PAGE_SIZE, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
+ while (pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
if (options & PMAP_OPTIONS_NOWAIT) {
return KERN_RESOURCE_SHORTAGE;
}
VM_PAGE_WAIT();
}
- while ((ptdp = ptd_alloc(pmap, false)) == NULL) {
+ while ((ptdp = ptd_alloc(pmap)) == NULL) {
if (options & PMAP_OPTIONS_NOWAIT) {
pmap_pages_free(pa, PAGE_SIZE);
return KERN_RESOURCE_SHORTAGE;
VM_PAGE_WAIT();
}
- if (level < PMAP_TT_MAX_LEVEL) {
+ if (level < pt_attr_leaf_level(pmap_get_pt_attr(pmap))) {
OSAddAtomic64(1, &alloc_ttepages_count);
OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
} else {
pvh_update_head_unlocked(pai_to_pvh(pa_index(pa)), ptdp, PVH_TYPE_PTDP);
- __unreachable_ok_push
- if (TEST_PAGE_RATIO_4) {
+ uint64_t pmap_page_size = pt_attr_page_size(pmap_get_pt_attr(pmap));
+ if (PAGE_SIZE > pmap_page_size) {
vm_address_t va;
vm_address_t va_end;
- PMAP_LOCK(pmap);
+ pmap_lock(pmap);
- for (va_end = phystokv(pa) + PAGE_SIZE, va = phystokv(pa) + ARM_PGBYTES; va < va_end; va = va + ARM_PGBYTES) {
+ for (va_end = phystokv(pa) + PAGE_SIZE, va = phystokv(pa) + pmap_page_size; va < va_end; va = va + pmap_page_size) {
((tt_free_entry_t *)va)->next = (tt_free_entry_t *)pmap->tt_entry_free;
pmap->tt_entry_free = (tt_entry_t *)va;
}
- PMAP_UNLOCK(pmap);
+ pmap_unlock(pmap);
}
- __unreachable_ok_pop
*ttp = (tt_entry_t *)phystokv(pa);
}
+#if XNU_MONITOR
+ assert(*ttp);
+#endif
return KERN_SUCCESS;
}
unsigned int level)
{
pt_desc_t *ptdp;
+ ptd_info_t *ptd_info;
unsigned pt_acc_cnt;
- unsigned i, max_pt_index = PAGE_RATIO;
+ unsigned i;
vm_offset_t free_page = 0;
+ const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
+ unsigned max_pt_index = PAGE_SIZE / pt_attr_page_size(pt_attr);
- PMAP_LOCK(pmap);
+ pmap_lock(pmap);
ptdp = ptep_get_ptd((vm_offset_t)ttp);
+ ptd_info = ptd_get_info(ptdp, ttp);
- ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].va = (vm_offset_t)-1;
+ ptd_info->va = (vm_offset_t)-1;
- if ((level < PMAP_TT_MAX_LEVEL) && (ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt == PT_DESC_REFCOUNT)) {
- ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt = 0;
+ if ((level < pt_attr_leaf_level(pt_attr)) && (ptd_info->refcnt == PT_DESC_REFCOUNT)) {
+ ptd_info->refcnt = 0;
}
- if (ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt != 0) {
- panic("pmap_tt_deallocate(): ptdp %p, count %d\n", ptdp, ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt);
+ if (ptd_info->refcnt != 0) {
+ panic("pmap_tt_deallocate(): ptdp %p, count %d\n", ptdp, ptd_info->refcnt);
}
- ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt = 0;
+ ptd_info->refcnt = 0;
for (i = 0, pt_acc_cnt = 0; i < max_pt_index; i++) {
pt_acc_cnt += ptdp->ptd_info[i].refcnt;
pmap->tt_entry_free = ttp;
}
- PMAP_UNLOCK(pmap);
+ pmap_unlock(pmap);
if (free_page != 0) {
ptd_deallocate(ptep_get_ptd((vm_offset_t)free_page));
*(pt_desc_t **)pai_to_pvh(pa_index(ml_static_vtop(free_page))) = NULL;
pmap_pages_free(ml_static_vtop(free_page), PAGE_SIZE);
- if (level < PMAP_TT_MAX_LEVEL) {
+ if (level < pt_attr_leaf_level(pt_attr)) {
OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
} else {
OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
}
}
+/**
+ * Safely clear out a translation table entry.
+ *
+ * @note If the TTE to clear out points to a leaf table, then that leaf table
+ * must have a refcnt of zero before the TTE can be removed.
+ *
+ * @param pmap The pmap containing the page table whose TTE is being removed.
+ * @param va_start Beginning of the VA range mapped by the table being removed, for TLB maintenance
+ * @param va_end Non-inclusive end of the VA range mapped by the table being removed, for TLB maintenance
+ * @param need_strong_sync Indicates whether strong DSB should be used to synchronize TLB maintenance
+ * @param ttep Pointer to the TTE that should be cleared out.
+ * @param level The level of the page table that contains the TTE to be removed.
+ */
static void
pmap_tte_remove(
pmap_t pmap,
+ vm_offset_t va_start,
+ vm_offset_t va_end,
+ bool need_strong_sync,
tt_entry_t *ttep,
unsigned int level)
{
tt_entry_t tte = *ttep;
- if (tte == 0) {
- panic("pmap_tte_deallocate(): null tt_entry ttep==%p\n", ttep);
+ if (__improbable(tte == 0)) {
+ panic("%s: null tt_entry ttep==%p", __func__, ttep);
}
- if (((level + 1) == PMAP_TT_MAX_LEVEL) && (tte_get_ptd(tte)->ptd_info[ARM_PT_DESC_INDEX(ttetokv(*ttep))].refcnt != 0)) {
- panic("pmap_tte_deallocate(): pmap=%p ttep=%p ptd=%p refcnt=0x%x \n", pmap, ttep,
- tte_get_ptd(tte), (tte_get_ptd(tte)->ptd_info[ARM_PT_DESC_INDEX(ttetokv(*ttep))].refcnt));
+ if (__improbable((level == pt_attr_twig_level(pmap_get_pt_attr(pmap))) &&
+ (ptep_get_info((pt_entry_t*)ttetokv(tte))->refcnt != 0))) {
+ panic("%s: non-zero pagetable refcount: pmap=%p ttep=%p ptd=%p refcnt=0x%x", __func__,
+ pmap, ttep, tte_get_ptd(tte), ptep_get_info((pt_entry_t*)ttetokv(tte))->refcnt);
}
-#if (__ARM_VMSA__ == 7)
+#if (__ARM_VMSA__ == 7)
{
tt_entry_t *ttep_4M = (tt_entry_t *) ((vm_offset_t)ttep & 0xFFFFFFF0);
unsigned i;
#else
*ttep = (tt_entry_t) 0;
FLUSH_PTE_STRONG(ttep);
+#endif /* (__ARM_VMSA__ == 7) */
+ // If given a VA range, we're being asked to flush the TLB before the table in ttep is freed.
+ if (va_end > va_start) {
+#if (__ARM_VMSA__ == 7)
+ // Ensure intermediate translations are flushed for each 1MB block
+ flush_mmu_tlb_entry_async((va_start & ~ARM_TT_L1_PT_OFFMASK) | (pmap->hw_asid & 0xff));
+ flush_mmu_tlb_entry_async(((va_start & ~ARM_TT_L1_PT_OFFMASK) + ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
+ flush_mmu_tlb_entry_async(((va_start & ~ARM_TT_L1_PT_OFFMASK) + 2 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
+ flush_mmu_tlb_entry_async(((va_start & ~ARM_TT_L1_PT_OFFMASK) + 3 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
#endif
+ PMAP_UPDATE_TLBS(pmap, va_start, va_end, need_strong_sync);
+ }
}
+/**
+ * Given a pointer to an entry within a `level` page table, delete the
+ * page table at `level` + 1 that is represented by that entry. For instance,
+ * to delete an unused L3 table, `ttep` would be a pointer to the L2 entry that
+ * contains the PA of the L3 table, and `level` would be "2".
+ *
+ * @note If the table getting deallocated is a leaf table, then that leaf table
+ * must have a refcnt of zero before getting deallocated. All other levels
+ * must have a refcnt of PT_DESC_REFCOUNT in their page table descriptor.
+ *
+ * @param pmap The pmap that owns the page table to be deallocated.
+ * @param va_start Beginning of the VA range mapped by the table being removed, for TLB maintenance
+ * @param va_end Non-inclusive end of the VA range mapped by the table being removed, for TLB maintenance
+ * @param need_strong_sync Indicates whether strong DSB should be used to synchronize TLB maintenance
+ * @param ttep Pointer to the `level` TTE to remove.
+ * @param level The level of the table that contains an entry pointing to the
+ * table to be removed. The deallocated page table will be a
+ * `level` + 1 table (so if `level` is 2, then an L3 table will be
+ * deleted).
+ */
static void
pmap_tte_deallocate(
pmap_t pmap,
+ vm_offset_t va_start,
+ vm_offset_t va_end,
+ bool need_strong_sync,
tt_entry_t *ttep,
unsigned int level)
{
pmap_paddr_t pa;
tt_entry_t tte;
- PMAP_ASSERT_LOCKED(pmap);
+ pmap_assert_locked_w(pmap);
tte = *ttep;
-#if MACH_ASSERT
+#if MACH_ASSERT
if (tte_get_ptd(tte)->pmap != pmap) {
- panic("pmap_tte_deallocate(): ptd=%p ptd->pmap=%p pmap=%p \n",
- tte_get_ptd(tte), tte_get_ptd(tte)->pmap, pmap);
+ panic("%s: Passed in pmap doesn't own the page table to be deleted ptd=%p ptd->pmap=%p pmap=%p",
+ __func__, tte_get_ptd(tte), tte_get_ptd(tte)->pmap, pmap);
}
-#endif
+#endif /* MACH_ASSERT */
- pmap_tte_remove(pmap, ttep, level);
+ pmap_tte_remove(pmap, va_start, va_end, need_strong_sync, ttep, level);
if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
-#if MACH_ASSERT
- {
- pt_entry_t *pte_p = ((pt_entry_t *) (ttetokv(tte) & ~ARM_PGMASK));
- unsigned i;
-
- for (i = 0; i < (ARM_PGBYTES / sizeof(*pte_p)); i++, pte_p++) {
- if (ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
- panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx compressed\n",
- (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
- } else if (((*pte_p) & ARM_PTE_TYPE_MASK) != ARM_PTE_TYPE_FAULT) {
- panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx\n",
- (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
- }
+ uint64_t pmap_page_size = pt_attr_page_size(pmap_get_pt_attr(pmap));
+#if MACH_ASSERT
+ pt_entry_t *pte_p = ((pt_entry_t *) (ttetokv(tte) & ~(pmap_page_size - 1)));
+
+ for (unsigned i = 0; i < (pmap_page_size / sizeof(*pte_p)); i++, pte_p++) {
+ if (__improbable(ARM_PTE_IS_COMPRESSED(*pte_p, pte_p))) {
+ panic_plain("%s: Found compressed mapping in soon to be deleted "
+ "L%d table tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx",
+ __func__, level + 1, (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
+ } else if (__improbable(((*pte_p) & ARM_PTE_TYPE_MASK) != ARM_PTE_TYPE_FAULT)) {
+ panic_plain("%s: Found valid mapping in soon to be deleted L%d "
+ "table tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx",
+ __func__, level + 1, (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
}
}
-#endif
- PMAP_UNLOCK(pmap);
+#endif /* MACH_ASSERT */
+ pmap_unlock(pmap);
/* Clear any page offset: we mean to free the whole page, but armv7 TTEs may only be
* aligned on 1K boundaries. We clear the surrounding "chunk" of 4 TTEs above. */
- pa = tte_to_pa(tte) & ~ARM_PGMASK;
+ pa = tte_to_pa(tte) & ~(pmap_page_size - 1);
pmap_tt_deallocate(pmap, (tt_entry_t *) phystokv(pa), level + 1);
- PMAP_LOCK(pmap);
+ pmap_lock(pmap);
}
}
* entirely within one pte-page. This is NOT checked.
* Assumes that the pte-page exists.
*
- * Returns the number of PTE changed, and sets *rmv_cnt
- * to the number of SPTE changed.
+ * Returns the number of PTE changed
*/
static int
pmap_remove_range(
pmap_t pmap,
vm_map_address_t va,
pt_entry_t *bpte,
- pt_entry_t *epte,
- uint32_t *rmv_cnt)
+ pt_entry_t *epte)
{
bool need_strong_sync = false;
- int num_changed = pmap_remove_range_options(pmap, va, bpte, epte, rmv_cnt,
+ int num_changed = pmap_remove_range_options(pmap, va, bpte, epte, NULL,
&need_strong_sync, PMAP_OPTIONS_REMOVE);
if (num_changed > 0) {
- PMAP_UPDATE_TLBS(pmap, va, va + (PAGE_SIZE * (epte - bpte)), need_strong_sync);
+ PMAP_UPDATE_TLBS(pmap, va,
+ va + (pt_attr_page_size(pmap_get_pt_attr(pmap)) * (epte - bpte)), need_strong_sync);
}
return num_changed;
}
pv_entry_t **pv_h, **pve_pp;
pv_entry_t *pve_p;
+ ASSERT_NOT_HIBERNATING();
ASSERT_PVH_LOCKED(pai);
pv_h = pai_to_pvh(pai);
vm_offset_t pvh_flags = pvh_get_flags(pv_h);
+#if XNU_MONITOR
+ if (__improbable(pvh_flags & PVH_FLAG_LOCKDOWN)) {
+ panic("%d is locked down (%#lx), cannot remove", pai, pvh_flags);
+ }
+#endif
if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
- if (__builtin_expect((cpte != pvh_ptep(pv_h)), 0)) {
+ if (__improbable((cpte != pvh_ptep(pv_h)))) {
panic("%s: cpte=%p does not match pv_h=%p (%p), pai=0x%x\n", __func__, cpte, pv_h, pvh_ptep(pv_h), pai);
}
if (IS_ALTACCT_PAGE(pai, PV_ENTRY_NULL)) {
pve_p = PVE_NEXT_PTR(pve_next(pve_p));
}
- if (__builtin_expect((pve_p == PV_ENTRY_NULL), 0)) {
+ if (__improbable((pve_p == PV_ENTRY_NULL))) {
panic("%s: cpte=%p (pai=0x%x) not in pv_h=%p\n", __func__, cpte, pai, pv_h);
}
}
pvh_remove(pv_h, pve_pp, pve_p);
- pv_free(pve_p);
+ pv_free_entry(pve_p);
if (!pvh_test_type(pv_h, PVH_TYPE_NULL)) {
pvh_set_flags(pv_h, pvh_flags);
}
vm_map_address_t va,
pt_entry_t *bpte,
pt_entry_t *epte,
- uint32_t *rmv_cnt,
+ vm_map_address_t *eva,
bool *need_strong_sync __unused,
int options)
{
pt_entry_t *cpte;
+ size_t npages = 0;
int num_removed, num_unwired;
int num_pte_changed;
int pai = 0;
int num_external, num_internal, num_reusable;
int num_alt_internal;
uint64_t num_compressed, num_alt_compressed;
+ int16_t refcnt = 0;
+
+ pmap_assert_locked_w(pmap);
+
+ const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
+ uint64_t pmap_page_size = PAGE_RATIO * pt_attr_page_size(pt_attr);
- PMAP_ASSERT_LOCKED(pmap);
+ if (__improbable((uintptr_t)epte > (((uintptr_t)bpte + pmap_page_size) & ~(pmap_page_size - 1)))) {
+ panic("%s: PTE range [%p, %p) in pmap %p crosses page table boundary", __func__, bpte, epte, pmap);
+ }
num_removed = 0;
num_unwired = 0;
num_alt_compressed = 0;
for (cpte = bpte; cpte < epte;
- cpte += PAGE_SIZE / ARM_PGBYTES, va += PAGE_SIZE) {
+ cpte += PAGE_RATIO, va += pmap_page_size) {
pt_entry_t spte;
boolean_t managed = FALSE;
- spte = *cpte;
+ /*
+ * Check for pending preemption on every iteration: the PV list may be arbitrarily long,
+ * so we need to be as aggressive as possible in checking for preemption when we can.
+ */
+ if (__improbable((eva != NULL) && npages++ && pmap_pending_preemption())) {
+ *eva = va;
+ break;
+ }
+ spte = *((volatile pt_entry_t*)cpte);
#if CONFIG_PGTRACE
if (pgtrace_enabled) {
* our "compressed" markers,
* so let's update it here.
*/
- if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(cpte)->ptd_info[ARM_PT_DESC_INDEX(cpte)].refcnt)) <= 0) {
- panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte), cpte);
- }
- spte = *cpte;
+ --refcnt;
+ spte = *((volatile pt_entry_t*)cpte);
}
/*
* It may be possible for the pte to transition from managed
//assert(!ARM_PTE_IS_COMPRESSED(spte));
pa = pte_to_pa(spte);
if (!pa_valid(pa)) {
+#if XNU_MONITOR
+ unsigned int cacheattr = pmap_cache_attributes((ppnum_t)atop(pa));
+#endif
+#if XNU_MONITOR
+ if (__improbable((cacheattr & PP_ATTR_MONITOR) &&
+ (pte_to_xprr_perm(spte) != XPRR_KERN_RO_PERM) && !pmap_ppl_disable)) {
+ panic("%s: attempt to remove mapping of writable PPL-protected I/O address 0x%llx",
+ __func__, (uint64_t)pa);
+ }
+#endif
break;
}
pai = (int)pa_index(pa);
LOCK_PVH(pai);
- spte = *cpte;
+ spte = *((volatile pt_entry_t*)cpte);
pa = pte_to_pa(spte);
if (pai == (int)pa_index(pa)) {
managed = TRUE;
assertf((*cpte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", cpte, (uint64_t)*cpte);
#if MACH_ASSERT
if (managed && (pmap != kernel_pmap) && (ptep_get_va(cpte) != va)) {
- panic("pmap_remove_range_options(): cpte=%p ptd=%p pte=0x%llx va=0x%llx\n",
- cpte, ptep_get_ptd(cpte), (uint64_t)*cpte, (uint64_t)va);
+ panic("pmap_remove_range_options(): VA mismatch: cpte=%p ptd=%p pte=0x%llx va=0x%llx, cpte va=0x%llx",
+ cpte, ptep_get_ptd(cpte), (uint64_t)*cpte, (uint64_t)va, (uint64_t)ptep_get_va(cpte));
}
#endif
WRITE_PTE_FAST(cpte, ARM_PTE_TYPE_FAULT);
(pmap != kernel_pmap)) {
assertf(!ARM_PTE_IS_COMPRESSED(spte, cpte), "unexpected compressed pte %p (=0x%llx)", cpte, (uint64_t)spte);
assertf((spte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", cpte, (uint64_t)spte);
- if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(cpte)->ptd_info[ARM_PT_DESC_INDEX(cpte)].refcnt)) <= 0) {
- panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte), cpte);
- }
- if (rmv_cnt) {
- (*rmv_cnt)++;
- }
+ --refcnt;
}
if (pte_is_wired(spte)) {
- pte_set_wired(cpte, 0);
+ pte_set_wired(pmap, cpte, 0);
num_unwired++;
}
/*
* Update the counts
*/
OSAddAtomic(-num_removed, (SInt32 *) &pmap->stats.resident_count);
- pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
+ pmap_ledger_debit(pmap, task_ledgers.phys_mem, num_removed * pmap_page_size);
if (pmap != kernel_pmap) {
- /* sanity checks... */
-#if MACH_ASSERT
- if (pmap->stats.internal < num_internal) {
- if ((!pmap_stats_assert ||
- !pmap->pmap_stats_assert)) {
- printf("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d\n",
- pmap->pmap_pid,
- pmap->pmap_procname,
- pmap,
- (uint64_t) va,
- bpte,
- epte,
- options,
- num_internal,
- num_removed,
- num_unwired,
- num_external,
- num_reusable,
- num_compressed,
- num_alt_internal,
- num_alt_compressed,
- num_pte_changed,
- pmap->stats.internal,
- pmap->stats.reusable);
- } else {
- panic("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d",
- pmap->pmap_pid,
- pmap->pmap_procname,
- pmap,
- (uint64_t) va,
- bpte,
- epte,
- options,
- num_internal,
- num_removed,
- num_unwired,
- num_external,
- num_reusable,
- num_compressed,
- num_alt_internal,
- num_alt_compressed,
- num_pte_changed,
- pmap->stats.internal,
- pmap->stats.reusable);
- }
+ if ((refcnt != 0) && (OSAddAtomic16(refcnt, (SInt16 *) &(ptep_get_info(bpte)->refcnt)) <= 0)) {
+ panic("pmap_remove_range_options: over-release of ptdp %p for pte [%p, %p)", ptep_get_ptd(bpte), bpte, epte);
}
-#endif /* MACH_ASSERT */
- PMAP_STATS_ASSERTF(pmap->stats.external >= num_external,
- pmap,
- "pmap=%p num_external=%d stats.external=%d",
- pmap, num_external, pmap->stats.external);
- PMAP_STATS_ASSERTF(pmap->stats.internal >= num_internal,
- pmap,
- "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
- pmap,
- num_internal, pmap->stats.internal,
- num_reusable, pmap->stats.reusable);
- PMAP_STATS_ASSERTF(pmap->stats.reusable >= num_reusable,
- pmap,
- "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
- pmap,
- num_internal, pmap->stats.internal,
- num_reusable, pmap->stats.reusable);
- PMAP_STATS_ASSERTF(pmap->stats.compressed >= num_compressed,
- pmap,
- "pmap=%p num_compressed=%lld num_alt_compressed=%lld stats.compressed=%lld",
- pmap, num_compressed, num_alt_compressed,
- pmap->stats.compressed);
-
/* update pmap stats... */
OSAddAtomic(-num_unwired, (SInt32 *) &pmap->stats.wired_count);
if (num_external) {
- OSAddAtomic(-num_external, &pmap->stats.external);
+ __assert_only int32_t orig_external = OSAddAtomic(-num_external, &pmap->stats.external);
+ PMAP_STATS_ASSERTF(orig_external >= num_external,
+ pmap,
+ "pmap=%p bpte=%p epte=%p num_external=%d stats.external=%d",
+ pmap, bpte, epte, num_external, orig_external);
}
if (num_internal) {
- OSAddAtomic(-num_internal, &pmap->stats.internal);
+ __assert_only int32_t orig_internal = OSAddAtomic(-num_internal, &pmap->stats.internal);
+ PMAP_STATS_ASSERTF(orig_internal >= num_internal,
+ pmap,
+ "pmap=%p bpte=%p epte=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
+ pmap, bpte, epte,
+ num_internal, orig_internal,
+ num_reusable, pmap->stats.reusable);
}
if (num_reusable) {
- OSAddAtomic(-num_reusable, &pmap->stats.reusable);
+ __assert_only int32_t orig_reusable = OSAddAtomic(-num_reusable, &pmap->stats.reusable);
+ PMAP_STATS_ASSERTF(orig_reusable >= num_reusable,
+ pmap,
+ "pmap=%p bpte=%p epte=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
+ pmap, bpte, epte,
+ num_internal, pmap->stats.internal,
+ num_reusable, orig_reusable);
}
if (num_compressed) {
- OSAddAtomic64(-num_compressed, &pmap->stats.compressed);
+ __assert_only uint64_t orig_compressed = OSAddAtomic64(-num_compressed, &pmap->stats.compressed);
+ PMAP_STATS_ASSERTF(orig_compressed >= num_compressed,
+ pmap,
+ "pmap=%p bpte=%p epte=%p num_compressed=%lld num_alt_compressed=%lld stats.compressed=%lld",
+ pmap, bpte, epte, num_compressed, num_alt_compressed,
+ orig_compressed);
}
/* ... and ledgers */
- pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
- pmap_ledger_debit(pmap, task_ledgers.internal, machine_ptob(num_internal));
- pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, machine_ptob(num_alt_internal));
- pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, machine_ptob(num_alt_compressed));
- pmap_ledger_debit(pmap, task_ledgers.internal_compressed, machine_ptob(num_compressed));
+ pmap_ledger_debit(pmap, task_ledgers.wired_mem, (num_unwired) * pmap_page_size);
+ pmap_ledger_debit(pmap, task_ledgers.internal, (num_internal) * pmap_page_size);
+ pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, (num_alt_internal) * pmap_page_size);
+ pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, (num_alt_compressed) * pmap_page_size);
+ pmap_ledger_debit(pmap, task_ledgers.internal_compressed, (num_compressed) * pmap_page_size);
/* make needed adjustments to phys_footprint */
pmap_ledger_debit(pmap, task_ledgers.phys_footprint,
- machine_ptob((num_internal -
+ ((num_internal -
num_alt_internal) +
(num_compressed -
- num_alt_compressed)));
+ num_alt_compressed)) * pmap_page_size);
}
/* flush the ptable entries we have written */
pmap_remove_options(pmap, start, end, PMAP_OPTIONS_REMOVE);
}
-MARK_AS_PMAP_TEXT static int
+MARK_AS_PMAP_TEXT static vm_map_address_t
pmap_remove_options_internal(
pmap_t pmap,
vm_map_address_t start,
vm_map_address_t end,
int options)
{
- int remove_count = 0;
+ vm_map_address_t eva = end;
pt_entry_t *bpte, *epte;
pt_entry_t *pte_p;
tt_entry_t *tte_p;
- uint32_t rmv_spte = 0;
+ int remove_count = 0;
bool need_strong_sync = false;
- bool flush_tte = false;
if (__improbable(end < start)) {
panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
- PMAP_LOCK(pmap);
+ pmap_lock(pmap);
tte_p = pmap_tte(pmap, start);
if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
pte_p = (pt_entry_t *) ttetokv(*tte_p);
- bpte = &pte_p[ptenum(start)];
+ bpte = &pte_p[pte_index(pmap, pt_attr, start)];
epte = bpte + ((end - start) >> pt_attr_leaf_shift(pt_attr));
- remove_count += pmap_remove_range_options(pmap, start, bpte, epte,
- &rmv_spte, &need_strong_sync, options);
+ remove_count = pmap_remove_range_options(pmap, start, bpte, epte, &eva,
+ &need_strong_sync, options);
- if (rmv_spte && (ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt == 0) &&
- (pmap != kernel_pmap) && (pmap->nested == FALSE)) {
- pmap_tte_deallocate(pmap, tte_p, pt_attr_twig_level(pt_attr));
- flush_tte = true;
+ if ((pmap != kernel_pmap) && (pmap->nested == FALSE) && (ptep_get_info(pte_p)->refcnt == 0)) {
+ pmap_tte_deallocate(pmap, start, eva, need_strong_sync, tte_p, pt_attr_twig_level(pt_attr));
+ remove_count = 0; // pmap_tte_deallocate has flushed the TLB for us
}
}
done:
- PMAP_UNLOCK(pmap);
+ pmap_unlock(pmap);
if (remove_count > 0) {
- PMAP_UPDATE_TLBS(pmap, start, end, need_strong_sync);
- } else if (flush_tte > 0) {
- pmap_get_pt_ops(pmap)->flush_tlb_tte_async(start, pmap);
- sync_tlb_flush();
+ PMAP_UPDATE_TLBS(pmap, start, eva, need_strong_sync);
}
- return remove_count;
+ return eva;
}
void
vm_map_address_t end,
int options)
{
- int remove_count = 0;
vm_map_address_t va;
if (pmap == PMAP_NULL) {
VM_KERNEL_ADDRHIDE(end));
#if MACH_ASSERT
- if ((start | end) & PAGE_MASK) {
+ if ((start | end) & pt_attr_leaf_offmask(pt_attr)) {
panic("pmap_remove_options() pmap %p start 0x%llx end 0x%llx\n",
pmap, (uint64_t)start, (uint64_t)end);
}
pmap, (uint64_t)start, (uint64_t)end);
}
#endif
+ assert(get_preemption_level() == 0);
/*
* Invalidate the translation buffer first
l = end;
}
- remove_count += pmap_remove_options_internal(pmap, va, l, options);
+#if XNU_MONITOR
+ va = pmap_remove_options_ppl(pmap, va, l, options);
- va = l;
+ pmap_ledger_check_balance(pmap);
+#else
+ va = pmap_remove_options_internal(pmap, va, l, options);
+#endif
}
PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END);
{
pmap_switch(pmap);
#if __ARM_USER_PROTECT__
- if (pmap->tte_index_max == NTTES) {
- thread->machine.uptw_ttc = 2;
- } else {
- thread->machine.uptw_ttc = 1;
- }
thread->machine.uptw_ttb = ((unsigned int) pmap->ttep) | TTBR_SETUP;
thread->machine.asid = pmap->hw_asid;
#endif
}
static void
-pmap_flush_core_tlb_asid(pmap_t pmap)
+pmap_flush_core_tlb_asid_async(pmap_t pmap)
{
#if (__ARM_VMSA__ == 7)
- flush_core_tlb_asid(pmap->hw_asid);
+ flush_core_tlb_asid_async(pmap->hw_asid);
+#else
+ flush_core_tlb_asid_async(((uint64_t) pmap->hw_asid) << TLBI_ASID_SHIFT);
+#endif
+}
+
+static inline bool
+pmap_user_ttb_is_clear(void)
+{
+#if (__ARM_VMSA__ > 7)
+ return get_mmu_ttb() == (invalid_ttep & TTBR_BADDR_MASK);
#else
- flush_core_tlb_asid(((uint64_t) pmap->hw_asid) << TLBI_ASID_SHIFT);
+ return get_mmu_ttb() == kernel_pmap->ttep;
#endif
}
VALIDATE_PMAP(pmap);
pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
uint16_t asid_index = pmap->hw_asid;
- boolean_t do_asid_flush = FALSE;
+ bool do_asid_flush = false;
+ if (__improbable((asid_index == 0) && (pmap != kernel_pmap))) {
+ panic("%s: attempt to activate pmap with invalid ASID %p", __func__, pmap);
+ }
#if __ARM_KERNEL_PROTECT__
asid_index >>= 1;
#endif
-#if (__ARM_VMSA__ > 7)
- pmap_t last_nested_pmap = cpu_data_ptr->cpu_nested_pmap;
+#if (__ARM_VMSA__ > 7)
+ pmap_t last_nested_pmap = cpu_data_ptr->cpu_nested_pmap;
+ __unused const pt_attr_t *last_nested_pmap_attr = cpu_data_ptr->cpu_nested_pmap_attr;
+ __unused vm_map_address_t last_nested_region_addr = cpu_data_ptr->cpu_nested_region_addr;
+ __unused vm_map_offset_t last_nested_region_size = cpu_data_ptr->cpu_nested_region_size;
+ bool do_shared_region_flush = ((pmap != kernel_pmap) && (last_nested_pmap != NULL) && (pmap->nested_pmap != last_nested_pmap));
+ bool break_before_make = do_shared_region_flush;
+#else
+ bool do_shared_region_flush = false;
+ bool break_before_make = false;
#endif
-#if MAX_ASID > MAX_HW_ASID
- if (asid_index > 0) {
+ if ((pmap_max_asids > MAX_HW_ASIDS) && (asid_index > 0)) {
asid_index -= 1;
+ pmap_update_plru(asid_index);
+
/* Paranoia. */
- assert(asid_index < (sizeof(cpu_data_ptr->cpu_asid_high_bits) / sizeof(*cpu_data_ptr->cpu_asid_high_bits)));
+ assert(asid_index < (sizeof(cpu_data_ptr->cpu_sw_asids) / sizeof(*cpu_data_ptr->cpu_sw_asids)));
/* Extract the "virtual" bits of the ASIDs (which could cause us to alias). */
- uint8_t asid_high_bits = pmap->sw_asid;
- uint8_t last_asid_high_bits = cpu_data_ptr->cpu_asid_high_bits[asid_index];
+ uint8_t new_sw_asid = pmap->sw_asid;
+ uint8_t last_sw_asid = cpu_data_ptr->cpu_sw_asids[asid_index];
- if (asid_high_bits != last_asid_high_bits) {
+ if (new_sw_asid != last_sw_asid) {
/*
* If the virtual ASID of the new pmap does not match the virtual ASID
* last seen on this CPU for the physical ASID (that was a mouthful),
* then this switch runs the risk of aliasing. We need to flush the
* TLB for this phyiscal ASID in this case.
*/
- cpu_data_ptr->cpu_asid_high_bits[asid_index] = asid_high_bits;
- do_asid_flush = TRUE;
+ cpu_data_ptr->cpu_sw_asids[asid_index] = new_sw_asid;
+ do_asid_flush = true;
+ break_before_make = true;
}
}
-#endif /* MAX_ASID > MAX_HW_ASID */
- pmap_switch_user_ttb_internal(pmap);
+#if __ARM_MIXED_PAGE_SIZE__
+ if (pmap_get_pt_attr(pmap)->pta_tcr_value != get_tcr()) {
+ break_before_make = true;
+ }
+#endif
+ if (__improbable(break_before_make && !pmap_user_ttb_is_clear())) {
+ PMAP_TRACE(1, PMAP_CODE(PMAP__CLEAR_USER_TTB), VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
+ pmap_clear_user_ttb_internal();
+ }
#if (__ARM_VMSA__ > 7)
/* If we're switching to a different nested pmap (i.e. shared region), we'll need
* to flush the userspace mappings for that region. Those mappings are global
* and will not be protected by the ASID. It should also be cheaper to flush the
* entire local TLB rather than to do a broadcast MMU flush by VA region. */
- if ((pmap != kernel_pmap) && (last_nested_pmap != NULL) && (pmap->nested_pmap != last_nested_pmap)) {
- flush_core_tlb();
- } else
-#endif
- if (do_asid_flush) {
- pmap_flush_core_tlb_asid(pmap);
+ if (__improbable(do_shared_region_flush)) {
+#if __ARM_RANGE_TLBI__
+ uint64_t page_shift_prev = pt_attr_leaf_shift(last_nested_pmap_attr);
+ vm_map_offset_t npages_prev = last_nested_region_size >> page_shift_prev;
+
+ /* NOTE: here we flush the global TLB entries for the previous nested region only.
+ * There may still be non-global entries that overlap with the incoming pmap's
+ * nested region. On Apple SoCs at least, this is acceptable. Those non-global entries
+ * must necessarily belong to a different ASID than the incoming pmap, or they would
+ * be flushed in the do_asid_flush case below. This will prevent them from conflicting
+ * with the incoming pmap's nested region. However, the ARMv8 ARM is not crystal clear
+ * on whether such a global/inactive-nonglobal overlap is acceptable, so we may need
+ * to consider additional invalidation here in the future. */
+ if (npages_prev <= ARM64_TLB_RANGE_PAGES) {
+ flush_core_tlb_allrange_async(generate_rtlbi_param((ppnum_t)npages_prev, 0, last_nested_region_addr, page_shift_prev));
+ } else {
+ do_asid_flush = false;
+ flush_core_tlb_async();
+ }
+#else
+ do_asid_flush = false;
+ flush_core_tlb_async();
+#endif // __ARM_RANGE_TLBI__
+ }
+#endif // (__ARM_VMSA__ > 7)
+ if (__improbable(do_asid_flush)) {
+ pmap_flush_core_tlb_asid_async(pmap);
#if DEVELOPMENT || DEBUG
os_atomic_inc(&pmap_asid_flushes, relaxed);
#endif
}
+ if (__improbable(do_asid_flush || do_shared_region_flush)) {
+ sync_tlb_flush();
+ }
+
+ pmap_switch_user_ttb_internal(pmap);
}
void
pmap_t pmap)
{
PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
+#if XNU_MONITOR
+ pmap_switch_ppl(pmap);
+#else
pmap_switch_internal(pmap);
+#endif
PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_END);
}
+void
+pmap_require(pmap_t pmap)
+{
+#if XNU_MONITOR
+ VALIDATE_PMAP(pmap);
+#else
+ if (pmap != kernel_pmap) {
+ zone_id_require(ZONE_ID_PMAP, sizeof(struct pmap), pmap);
+ }
+#endif
+}
+
void
pmap_page_protect(
ppnum_t ppnum,
* page.
*/
MARK_AS_PMAP_TEXT static void
-pmap_page_protect_options_internal(
+pmap_page_protect_options_with_flush_range(
ppnum_t ppnum,
vm_prot_t prot,
- unsigned int options)
+ unsigned int options,
+ pmap_tlb_flush_range_t *flush_range)
{
pmap_paddr_t phys = ptoa(ppnum);
pv_entry_t **pv_h;
remove = FALSE;
break;
default:
+ /* PPL security model requires that we flush TLBs before we exit if the page may be recycled. */
+ options = options & ~PMAP_OPTIONS_NOFLUSH;
remove = TRUE;
break;
}
pv_h = pai_to_pvh(pai);
pvh_flags = pvh_get_flags(pv_h);
+#if XNU_MONITOR
+ if (__improbable(remove && (pvh_flags & PVH_FLAG_LOCKDOWN))) {
+ panic("%d is locked down (%#llx), cannot remove", pai, pvh_get_flags(pv_h));
+ }
+#endif
pte_p = PT_ENTRY_NULL;
pve_p = PV_ENTRY_NULL;
}
while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
- vm_map_address_t va;
- pmap_t pmap;
- pt_entry_t tmplate;
- boolean_t update = FALSE;
+ vm_map_address_t va = 0;
+ pmap_t pmap = NULL;
+ pt_entry_t tmplate = ARM_PTE_TYPE_FAULT;
+ boolean_t update = FALSE;
if (pve_p != PV_ENTRY_NULL) {
pte_p = pve_get_ptep(pve_p);
#ifdef PVH_FLAG_IOMMU
if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
+#if XNU_MONITOR
+ if (__improbable(pvh_flags & PVH_FLAG_LOCKDOWN)) {
+ panic("pmap_page_protect: ppnum 0x%x locked down, cannot be owned by iommu 0x%llx, pve_p=%p",
+ ppnum, (uint64_t)pte_p & ~PVH_FLAG_IOMMU, pve_p);
+ }
+#endif
if (remove) {
if (options & PMAP_OPTIONS_COMPRESSOR) {
panic("pmap_page_protect: attempt to compress ppnum 0x%x owned by iommu 0x%llx, pve_p=%p",
#else
if ((prot & VM_PROT_EXECUTE))
#endif
- { set_NX = FALSE;} else {
+ {
+ set_NX = FALSE;
+ } else {
set_NX = TRUE;
}
/* Remove the mapping if new protection is NONE */
if (remove) {
boolean_t is_altacct = FALSE;
+ const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
+ pt_entry_t spte = *pte_p;
if (IS_ALTACCT_PAGE(pai, pve_p)) {
is_altacct = TRUE;
is_altacct = FALSE;
}
- if (pte_is_wired(*pte_p)) {
- pte_set_wired(pte_p, 0);
+ if (pte_is_wired(spte)) {
+ pte_set_wired(pmap, pte_p, 0);
+ spte = *pte_p;
if (pmap != kernel_pmap) {
- pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
+ pmap_ledger_debit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
}
}
- if (*pte_p != ARM_PTE_TYPE_FAULT &&
+ if (spte != ARM_PTE_TYPE_FAULT &&
pmap != kernel_pmap &&
(options & PMAP_OPTIONS_COMPRESSOR) &&
IS_INTERNAL_PAGE(pai)) {
tmplate = ARM_PTE_TYPE_FAULT;
}
- if ((*pte_p != ARM_PTE_TYPE_FAULT) &&
- tmplate == ARM_PTE_TYPE_FAULT &&
+ /**
+ * The entry must be written before the refcnt is decremented to
+ * prevent use-after-free races with code paths that deallocate page
+ * tables based on a zero refcnt.
+ */
+ if (spte != tmplate) {
+ WRITE_PTE_STRONG(pte_p, tmplate);
+ update = TRUE;
+ }
+
+ if ((spte != ARM_PTE_TYPE_FAULT) &&
+ (tmplate == ARM_PTE_TYPE_FAULT) &&
(pmap != kernel_pmap)) {
- if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt)) <= 0) {
+ if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_info(pte_p)->refcnt)) <= 0) {
panic("pmap_page_protect_options(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
}
}
- if (*pte_p != tmplate) {
- WRITE_PTE_STRONG(pte_p, tmplate);
- update = TRUE;
- }
pvh_cnt++;
- pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
+ pmap_ledger_debit(pmap, task_ledgers.phys_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
OSAddAtomic(-1, (SInt32 *) &pmap->stats.resident_count);
#if MACH_ASSERT
if (IS_REUSABLE_PAGE(pai) &&
IS_INTERNAL_PAGE(pai) &&
!is_altacct) {
- PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
- OSAddAtomic(-1, &pmap->stats.reusable);
+ __assert_only int32_t orig_reusable = OSAddAtomic(-1, &pmap->stats.reusable);
+ PMAP_STATS_ASSERTF(orig_reusable > 0, pmap, "stats.reusable %d", orig_reusable);
} else if (IS_INTERNAL_PAGE(pai)) {
- PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
- OSAddAtomic(-1, &pmap->stats.internal);
+ __assert_only int32_t orig_internal = OSAddAtomic(-1, &pmap->stats.internal);
+ PMAP_STATS_ASSERTF(orig_internal > 0, pmap, "stats.internal %d", orig_internal);
} else {
- PMAP_STATS_ASSERTF(pmap->stats.external > 0, pmap, "stats.external %d", pmap->stats.external);
- OSAddAtomic(-1, &pmap->stats.external);
+ __assert_only int32_t orig_external = OSAddAtomic(-1, &pmap->stats.external);
+ PMAP_STATS_ASSERTF(orig_external > 0, pmap, "stats.external %d", orig_external);
}
if ((options & PMAP_OPTIONS_COMPRESSOR) &&
IS_INTERNAL_PAGE(pai)) {
if (IS_ALTACCT_PAGE(pai, pve_p)) {
assert(IS_INTERNAL_PAGE(pai));
- pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
- pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
+ pmap_ledger_debit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
+ pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, pt_attr_page_size(pt_attr) * PAGE_RATIO);
if (options & PMAP_OPTIONS_COMPRESSOR) {
- pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
- pmap_ledger_credit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
+ pmap_ledger_credit(pmap, task_ledgers.internal_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
+ pmap_ledger_credit(pmap, task_ledgers.alternate_accounting_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
}
/*
} else if (IS_REUSABLE_PAGE(pai)) {
assert(IS_INTERNAL_PAGE(pai));
if (options & PMAP_OPTIONS_COMPRESSOR) {
- pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
+ pmap_ledger_credit(pmap, task_ledgers.internal_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
/* was not in footprint, but is now */
- pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
+ pmap_ledger_credit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
}
} else if (IS_INTERNAL_PAGE(pai)) {
- pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
+ pmap_ledger_debit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
/*
* Update all stats related to physical footprint, which only
* This removal is only being done so we can send this page to
* the compressor; therefore it mustn't affect total task footprint.
*/
- pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
+ pmap_ledger_credit(pmap, task_ledgers.internal_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
} else {
/*
* This internal page isn't going to the compressor, so adjust stats to keep
* phys_footprint up to date.
*/
- pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
+ pmap_ledger_debit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
}
} else {
/* external page: no impact on ledgers */
if (*pte_p != ARM_PTE_TYPE_FAULT &&
!ARM_PTE_IS_COMPRESSED(*pte_p, pte_p) &&
*pte_p != tmplate) {
- WRITE_PTE_STRONG(pte_p, tmplate);
+ if (options & PMAP_OPTIONS_NOFLUSH) {
+ WRITE_PTE_FAST(pte_p, tmplate);
+ } else {
+ WRITE_PTE_STRONG(pte_p, tmplate);
+ }
update = TRUE;
}
}
/* Invalidate TLBs for all CPUs using it */
- if (update) {
+ if (update && !(options & PMAP_OPTIONS_NOFLUSH)) {
+ if (remove || !flush_range ||
+ ((flush_range->ptfr_pmap != pmap) || va >= flush_range->ptfr_end || va < flush_range->ptfr_start)) {
+ pmap_get_pt_ops(pmap)->flush_tlb_region_async(va,
+ pt_attr_page_size(pmap_get_pt_attr(pmap)) * PAGE_RATIO, pmap);
+ }
tlb_flush_needed = TRUE;
- pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, PAGE_SIZE, pmap);
}
#ifdef PVH_FLAG_IOMMU
pmap_set_ptov_ap(pai, AP_RWNA, tlb_flush_needed);
}
#endif
- if (tlb_flush_needed) {
- sync_tlb_flush();
- }
-
/* if we removed a bunch of entries, take care of them now */
if (remove) {
if (new_pve_p != PV_ENTRY_NULL) {
}
}
+ if (flush_range && tlb_flush_needed) {
+ if (!remove) {
+ flush_range->ptfr_flush_needed = true;
+ tlb_flush_needed = FALSE;
+ }
+ }
+
+ /*
+ * If we removed PV entries, ensure prior TLB flushes are complete before we drop the PVH
+ * lock to allow the backing pages to be repurposed. This is a security precaution, aimed
+ * primarily at XNU_MONITOR configurations, to reduce the likelihood of an attacker causing
+ * a page to be repurposed while it is still live in the TLBs.
+ */
+ if (remove && tlb_flush_needed) {
+ sync_tlb_flush();
+ }
+
UNLOCK_PVH(pai);
+ if (!remove && tlb_flush_needed) {
+ sync_tlb_flush();
+ }
+
if (remove && (pvet_p != PV_ENTRY_NULL)) {
- pv_list_free(pveh_p, pvet_p, pvh_cnt);
+ pv_list_free(pveh_p, pvet_p, pvh_cnt, pv_kern_low_water_mark);
+ }
+}
+
+MARK_AS_PMAP_TEXT static void
+pmap_page_protect_options_internal(
+ ppnum_t ppnum,
+ vm_prot_t prot,
+ unsigned int options,
+ void *arg)
+{
+ if (arg != NULL) {
+ /*
+ * If the argument is non-NULL, the VM layer is conveying its intention that the TLBs should
+ * ultimately be flushed. The nature of ARM TLB maintenance is such that we can flush the
+ * TLBs much more precisely if we do so inline with the pagetable updates, and PPL security
+ * model requires that we not exit the PPL without performing required TLB flushes anyway.
+ * In that case, force the flush to take place.
+ */
+ options &= ~PMAP_OPTIONS_NOFLUSH;
}
+ pmap_page_protect_options_with_flush_range(ppnum, prot, options, NULL);
}
void
ppnum_t ppnum,
vm_prot_t prot,
unsigned int options,
- __unused void *arg)
+ void *arg)
{
pmap_paddr_t phys = ptoa(ppnum);
PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, ppnum, prot);
- pmap_page_protect_options_internal(ppnum, prot, options);
+#if XNU_MONITOR
+ pmap_page_protect_options_ppl(ppnum, prot, options, arg);
+#else
+ pmap_page_protect_options_internal(ppnum, prot, options, arg);
+#endif
PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END);
}
+
+#if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
+MARK_AS_PMAP_TEXT void
+pmap_disable_user_jop_internal(pmap_t pmap)
+{
+ if (pmap == kernel_pmap) {
+ panic("%s: called with kernel_pmap\n", __func__);
+ }
+ VALIDATE_PMAP(pmap);
+ pmap->disable_jop = true;
+}
+
+void
+pmap_disable_user_jop(pmap_t pmap)
+{
+#if XNU_MONITOR
+ pmap_disable_user_jop_ppl(pmap);
+#else
+ pmap_disable_user_jop_internal(pmap);
+#endif
+}
+#endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
+
/*
* Indicates if the pmap layer enforces some additional restrictions on the
* given set of protections.
*/
bool
-pmap_has_prot_policy(__unused vm_prot_t prot)
+pmap_has_prot_policy(__unused pmap_t pmap, __unused bool translated_allow_execute, __unused vm_prot_t prot)
{
- return FALSE;
+ return false;
}
/*
pmap_protect_options(pmap, b, e, prot, 0, NULL);
}
-MARK_AS_PMAP_TEXT static void
+MARK_AS_PMAP_TEXT static vm_map_address_t
pmap_protect_options_internal(
pmap_t pmap,
vm_map_address_t start,
boolean_t should_have_removed = FALSE;
bool need_strong_sync = false;
- if (__improbable(end < start)) {
- panic("%s called with bogus range: %p, %p", __func__, (void*)start, (void*)end);
+ if (__improbable((end < start) || (end > ((start + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr))))) {
+ panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
}
#if DEVELOPMENT || DEBUG
#if (__ARM_VMSA__ > 7)
case VM_PROT_EXECUTE:
set_XO = TRUE;
- /* fall through */
+ OS_FALLTHROUGH;
#endif
case VM_PROT_READ:
case VM_PROT_READ | VM_PROT_EXECUTE:
break;
case VM_PROT_READ | VM_PROT_WRITE:
case VM_PROT_ALL:
- return; /* nothing to do */
+ return end; /* nothing to do */
default:
should_have_removed = TRUE;
}
set_NX = TRUE;
}
+ const uint64_t pmap_page_size = PAGE_RATIO * pt_attr_page_size(pt_attr);
+ vm_map_address_t va = start;
+ unsigned int npages = 0;
+
VALIDATE_PMAP(pmap);
- PMAP_LOCK(pmap);
+ pmap_lock(pmap);
+
tte_p = pmap_tte(pmap, start);
if ((tte_p != (tt_entry_t *) NULL) && (*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
bpte_p = (pt_entry_t *) ttetokv(*tte_p);
- bpte_p = &bpte_p[ptenum(start)];
- epte_p = bpte_p + arm_atop(end - start);
+ bpte_p = &bpte_p[pte_index(pmap, pt_attr, start)];
+ epte_p = bpte_p + ((end - start) >> pt_attr_leaf_shift(pt_attr));
pte_p = bpte_p;
for (pte_p = bpte_p;
pte_p < epte_p;
- pte_p += PAGE_SIZE / ARM_PGBYTES) {
+ pte_p += PAGE_RATIO, va += pmap_page_size) {
+ ++npages;
+ if (__improbable(!(npages % PMAP_DEFAULT_PREEMPTION_CHECK_PAGE_INTERVAL) &&
+ pmap_pending_preemption())) {
+ break;
+ }
pt_entry_t spte;
#if DEVELOPMENT || DEBUG
boolean_t force_write = FALSE;
#endif
- spte = *pte_p;
+ spte = *((volatile pt_entry_t*)pte_p);
if ((spte == ARM_PTE_TYPE_FAULT) ||
ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
}
pai = (int)pa_index(pa);
LOCK_PVH(pai);
- spte = *pte_p;
+ spte = *((volatile pt_entry_t*)pte_p);
pa = pte_to_pa(spte);
if (pai == (int)pa_index(pa)) {
managed = TRUE;
UNLOCK_PVH(pai);
}
}
- FLUSH_PTE_RANGE_STRONG(bpte_p, epte_p);
- PMAP_UPDATE_TLBS(pmap, start, end, need_strong_sync);
+ FLUSH_PTE_RANGE_STRONG(bpte_p, pte_p);
+ PMAP_UPDATE_TLBS(pmap, start, va, need_strong_sync);
+ } else {
+ va = end;
}
- PMAP_UNLOCK(pmap);
+ pmap_unlock(pmap);
+ return va;
}
void
__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
- if ((b | e) & PAGE_MASK) {
+ if ((b | e) & pt_attr_leaf_offmask(pt_attr)) {
panic("pmap_protect_options() pmap %p start 0x%llx end 0x%llx\n",
pmap, (uint64_t)b, (uint64_t)e);
}
+ assert(get_preemption_level() == 0);
+
#if DEVELOPMENT || DEBUG
if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
l = e;
}
- pmap_protect_options_internal(pmap, beg, l, prot, options, args);
-
- beg = l;
+#if XNU_MONITOR
+ beg = pmap_protect_options_ppl(pmap, beg, l, prot, options, args);
+#else
+ beg = pmap_protect_options_internal(pmap, beg, l, prot, options, args);
+#endif
}
PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END);
return KERN_SUCCESS;
}
+kern_return_t
+pmap_enter_addr(
+ pmap_t pmap,
+ vm_map_address_t v,
+ pmap_paddr_t pa,
+ vm_prot_t prot,
+ vm_prot_t fault_type,
+ unsigned int flags,
+ boolean_t wired)
+{
+ return pmap_enter_options_addr(pmap, v, pa, prot, fault_type, flags, wired, 0, NULL);
+}
+
/*
* Insert the given physical page (p) at
* the specified virtual address (v) in the
unsigned int flags,
boolean_t wired)
{
- return pmap_enter_options(pmap, v, pn, prot, fault_type, flags, wired, 0, NULL);
+ return pmap_enter_addr(pmap, v, ((pmap_paddr_t)pn) << PAGE_SHIFT, prot, fault_type, flags, wired);
}
-
static inline void
pmap_enter_pte(pmap_t pmap, pt_entry_t *pte_p, pt_entry_t pte, vm_map_address_t v)
{
+ const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
+
if (pmap != kernel_pmap && ((pte & ARM_PTE_WIRED) != (*pte_p & ARM_PTE_WIRED))) {
- SInt16 *ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].wiredcnt);
+ SInt16 *ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_info(pte_p)->wiredcnt);
if (pte & ARM_PTE_WIRED) {
OSAddAtomic16(1, ptd_wiredcnt_ptr);
- pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
+ pmap_ledger_credit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
OSAddAtomic(1, (SInt32 *) &pmap->stats.wired_count);
} else {
OSAddAtomic16(-1, ptd_wiredcnt_ptr);
- pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
+ pmap_ledger_debit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
}
}
if (*pte_p != ARM_PTE_TYPE_FAULT &&
!ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
WRITE_PTE_STRONG(pte_p, pte);
- PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE, false);
+ PMAP_UPDATE_TLBS(pmap, v, v + (pt_attr_page_size(pt_attr) * PAGE_RATIO), false);
} else {
WRITE_PTE(pte_p, pte);
__builtin_arm_isb(ISB_SY);
}
- PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v), VM_KERNEL_ADDRHIDE(v + PAGE_SIZE), pte);
+ PMAP_TRACE(4 + pt_attr_leaf_level(pt_attr), PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap),
+ VM_KERNEL_ADDRHIDE(v), VM_KERNEL_ADDRHIDE(v + (pt_attr_page_size(pt_attr) * PAGE_RATIO)), pte);
}
MARK_AS_PMAP_TEXT static pt_entry_t
return pte;
}
-static boolean_t
+static pv_alloc_return_t
pmap_enter_pv(
pmap_t pmap,
pt_entry_t *pte_p,
pv_h = pai_to_pvh(pai);
boolean_t first_cpu_mapping;
+ ASSERT_NOT_HIBERNATING();
ASSERT_PVH_LOCKED(pai);
vm_offset_t pvh_flags = pvh_get_flags(pv_h);
+#if XNU_MONITOR
+ if (__improbable(pvh_flags & PVH_FLAG_LOCKDOWN)) {
+ panic("%d is locked down (%#lx), cannot enter", pai, pvh_flags);
+ }
+#endif
#ifdef PVH_FLAG_CPU
/* An IOMMU mapping may already be present for a page that hasn't yet
CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
}
} else {
+ pv_alloc_return_t ret;
if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
pt_entry_t *pte1_p;
*/
pte1_p = pvh_ptep(pv_h);
pvh_set_flags(pv_h, pvh_flags);
- if ((*pve_p == PV_ENTRY_NULL) && (!pv_alloc(pmap, pai, pve_p))) {
- return FALSE;
+ if ((*pve_p == PV_ENTRY_NULL) && ((ret = pv_alloc(pmap, pai, pve_p)) != PV_ALLOC_SUCCESS)) {
+ return ret;
}
pve_set_ptep(*pve_p, pte1_p);
* add it to the list for this physical page.
*/
pvh_set_flags(pv_h, pvh_flags);
- if ((*pve_p == PV_ENTRY_NULL) && (!pv_alloc(pmap, pai, pve_p))) {
- return FALSE;
+ if ((*pve_p == PV_ENTRY_NULL) && ((ret = pv_alloc(pmap, pai, pve_p)) != PV_ALLOC_SUCCESS)) {
+ return ret;
}
pve_set_ptep(*pve_p, pte_p);
pvh_set_flags(pv_h, pvh_flags);
- return TRUE;
+ return PV_ALLOC_SUCCESS;
}
MARK_AS_PMAP_TEXT static kern_return_t
pmap_enter_options_internal(
pmap_t pmap,
vm_map_address_t v,
- ppnum_t pn,
+ pmap_paddr_t pa,
vm_prot_t prot,
vm_prot_t fault_type,
unsigned int flags,
boolean_t wired,
unsigned int options)
{
- pmap_paddr_t pa = ptoa(pn);
+ ppnum_t pn = (ppnum_t)atop(pa);
pt_entry_t pte;
pt_entry_t spte;
pt_entry_t *pte_p;
VALIDATE_PMAP(pmap);
+#if XNU_MONITOR
+ if (__improbable((options & PMAP_OPTIONS_NOWAIT) == 0)) {
+ panic("pmap_enter_options() called without PMAP_OPTIONS_NOWAIT set");
+ }
+#endif
+
__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
- if ((v) & PAGE_MASK) {
+ if ((v) & pt_attr_leaf_offmask(pt_attr)) {
panic("pmap_enter_options() pmap %p v 0x%llx\n",
pmap, (uint64_t)v);
}
- if ((prot & VM_PROT_EXECUTE) && (prot & VM_PROT_WRITE) && (pmap == kernel_pmap)) {
- panic("pmap_enter_options(): WX request on kernel_pmap");
+ if ((pa) & pt_attr_leaf_offmask(pt_attr)) {
+ panic("pmap_enter_options() pmap %p pa 0x%llx\n",
+ pmap, (uint64_t)pa);
+ }
+
+ if ((prot & VM_PROT_EXECUTE) && (pmap == kernel_pmap)) {
+#if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
+ extern vm_offset_t ctrr_test_page;
+ if (__probable(v != ctrr_test_page))
+#endif
+ panic("pmap_enter_options(): attempt to add executable mapping to kernel_pmap");
}
#if DEVELOPMENT || DEBUG
#else
if ((prot & VM_PROT_EXECUTE))
#endif
- { set_NX = FALSE;} else {
+ {
+ set_NX = FALSE;
+ } else {
set_NX = TRUE;
}
was_compressed = FALSE;
was_alt_compressed = FALSE;
- PMAP_LOCK(pmap);
+ pmap_lock(pmap);
/*
* Expand pmap to include this pte. Assume that
*/
while ((pte_p = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
/* Must unlock to expand the pmap. */
- PMAP_UNLOCK(pmap);
+ pmap_unlock(pmap);
- kr = pmap_expand(pmap, v, options, PMAP_TT_MAX_LEVEL);
+ kr = pmap_expand(pmap, v, options, pt_attr_leaf_level(pt_attr));
if (kr != KERN_SUCCESS) {
return kr;
}
- PMAP_LOCK(pmap);
+ pmap_lock(pmap);
}
if (options & PMAP_OPTIONS_NOENTER) {
- PMAP_UNLOCK(pmap);
+ pmap_unlock(pmap);
return KERN_SUCCESS;
}
spte = *pte_p;
- if (ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
+ if (ARM_PTE_IS_COMPRESSED(spte, pte_p) && !refcnt_updated) {
/*
* "pmap" should be locked at this point, so this should
* not race with another pmap_enter() or pmap_remove_range().
/* one less "compressed" */
OSAddAtomic64(-1, &pmap->stats.compressed);
pmap_ledger_debit(pmap, task_ledgers.internal_compressed,
- PAGE_SIZE);
+ pt_attr_page_size(pt_attr) * PAGE_RATIO);
was_compressed = TRUE;
if (spte & ARM_PTE_COMPRESSED_ALT) {
was_alt_compressed = TRUE;
- pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
+ pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
} else {
/* was part of the footprint */
- pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
+ pmap_ledger_debit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
}
/* clear "compressed" marker */
}
if ((spte != ARM_PTE_TYPE_FAULT) && (pte_to_pa(spte) != pa)) {
- pmap_remove_range(pmap, v, pte_p, pte_p + 1, 0);
+ pmap_remove_range(pmap, v, pte_p, pte_p + PAGE_RATIO);
}
pte = pa_to_pte(pa) | ARM_PTE_TYPE;
- /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
- * wired memory statistics for user pmaps, but kernel PTEs are assumed
- * to be wired in nearly all cases. For VM layer functionality, the wired
- * count in vm_page_t is sufficient. */
- if (wired && pmap != kernel_pmap) {
+ if (wired) {
pte |= ARM_PTE_WIRED;
}
if (!pmap->nested) {
pte |= ARM_PTE_NG;
} else if ((pmap->nested_region_asid_bitmap)
- && (v >= pmap->nested_region_subord_addr)
- && (v < (pmap->nested_region_subord_addr + pmap->nested_region_size))) {
- unsigned int index = (unsigned int)((v - pmap->nested_region_subord_addr) >> pt_attr_twig_shift(pt_attr));
+ && (v >= pmap->nested_region_addr)
+ && (v < (pmap->nested_region_addr + pmap->nested_region_size))) {
+ unsigned int index = (unsigned int)((v - pmap->nested_region_addr) >> pt_attr_twig_shift(pt_attr));
if ((pmap->nested_region_asid_bitmap)
&& testbit(index, (int *)pmap->nested_region_asid_bitmap)) {
vm_map_address_t nest_vaddr;
pt_entry_t *nest_pte_p;
- nest_vaddr = v - pmap->nested_region_grand_addr + pmap->nested_region_subord_addr;
+ nest_vaddr = v;
- if ((nest_vaddr >= pmap->nested_region_subord_addr)
- && (nest_vaddr < (pmap->nested_region_subord_addr + pmap->nested_region_size))
+ if ((nest_vaddr >= pmap->nested_region_addr)
+ && (nest_vaddr < (pmap->nested_region_addr + pmap->nested_region_size))
&& ((nest_pte_p = pmap_pte(pmap->nested_pmap, nest_vaddr)) != PT_ENTRY_NULL)
&& (*nest_pte_p != ARM_PTE_TYPE_FAULT)
&& (!ARM_PTE_IS_COMPRESSED(*nest_pte_p, nest_pte_p))
&& (((*nest_pte_p) & ARM_PTE_NG) != ARM_PTE_NG)) {
- unsigned int index = (unsigned int)((v - pmap->nested_region_subord_addr) >> pt_attr_twig_shift(pt_attr));
+ unsigned int index = (unsigned int)((v - pmap->nested_region_addr) >> pt_attr_twig_shift(pt_attr));
if ((pmap->nested_pmap->nested_region_asid_bitmap)
&& !testbit(index, (int *)pmap->nested_pmap->nested_region_asid_bitmap)) {
#endif
if (prot & VM_PROT_WRITE) {
if (pa_valid(pa) && (!pa_test_bits(pa, PP_ATTR_MODIFIED))) {
+ assert(!pmap->nested); /* no write access in a nested pmap */
if (fault_type & VM_PROT_WRITE) {
if (set_XO) {
pte |= pt_attr_leaf_rwna(pt_attr);
} else {
pte |= pt_attr_leaf_ro(pt_attr);
}
- pa_set_bits(pa, PP_ATTR_REFERENCED);
+ /*
+ * Mark the page as MODFAULT so that a subsequent write
+ * may be handled through arm_fast_fault().
+ */
+ pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODFAULT);
pte_set_was_writeable(pte, true);
}
} else {
volatile uint16_t *refcnt = NULL;
volatile uint16_t *wiredcnt = NULL;
if (pmap != kernel_pmap) {
- refcnt = &(ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt);
- wiredcnt = &(ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].wiredcnt);
+ ptd_info_t *ptd_info = ptep_get_info(pte_p);
+ refcnt = &ptd_info->refcnt;
+ wiredcnt = &ptd_info->wiredcnt;
/* Bump the wired count to keep the PTE page from being reclaimed. We need this because
* we may drop the PVH and pmap locks later in pmap_enter() if we need to allocate
* a new PV entry. */
pte &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
pte |= pmap_get_pt_ops(pmap)->wimg_to_pte(wimg_bits);
+#if XNU_MONITOR
+ /* The regular old kernel is not allowed to remap PPL pages. */
+ if (__improbable(pa_test_monitor(pa))) {
+ panic("%s: page belongs to PPL, "
+ "pmap=%p, v=0x%llx, pa=%p, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
+ __FUNCTION__,
+ pmap, v, (void*)pa, prot, fault_type, flags, wired, options);
+ }
+
+ if (__improbable(pvh_get_flags(pai_to_pvh(pai)) & PVH_FLAG_LOCKDOWN)) {
+ panic("%s: page locked down, "
+ "pmap=%p, v=0x%llx, pa=%p, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
+ __FUNCTION__,
+ pmap, v, (void *)pa, prot, fault_type, flags, wired, options);
+ }
+#endif
if (pte == *pte_p) {
UNLOCK_PVH(pai);
goto Pmap_enter_retry;
}
- if (!pmap_enter_pv(pmap, pte_p, pai, options, &pve_p, &is_altacct)) {
+ pv_alloc_return_t pv_status = pmap_enter_pv(pmap, pte_p, pai, options, &pve_p, &is_altacct);
+ if (pv_status == PV_ALLOC_RETRY) {
goto Pmap_enter_loop;
+ } else if (pv_status == PV_ALLOC_FAIL) {
+ UNLOCK_PVH(pai);
+ kr = KERN_RESOURCE_SHORTAGE;
+ goto Pmap_enter_cleanup;
}
pmap_enter_pte(pmap, pte_p, pte, v);
UNLOCK_PVH(pai);
if (pmap != kernel_pmap) {
- pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
+ pmap_ledger_credit(pmap, task_ledgers.phys_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
if (is_internal) {
/*
* Make corresponding adjustments to
* phys_footprint statistics.
*/
- pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
+ pmap_ledger_credit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
if (is_altacct) {
/*
* If this page is internal and
* is 0. That means: don't
* touch phys_footprint here.
*/
- pmap_ledger_credit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
+ pmap_ledger_credit(pmap, task_ledgers.alternate_accounting, pt_attr_page_size(pt_attr) * PAGE_RATIO);
} else {
- pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
+ pmap_ledger_credit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
}
}
}
pte |= pmap_get_pt_ops(pmap)->wimg_to_pte(wimg_bits);
+#if XNU_MONITOR
+ if ((wimg_bits & PP_ATTR_MONITOR) && !pmap_ppl_disable) {
+ uint64_t xprr_perm = pte_to_xprr_perm(pte);
+ switch (xprr_perm) {
+ case XPRR_KERN_RO_PERM:
+ break;
+ case XPRR_KERN_RW_PERM:
+ pte &= ~ARM_PTE_XPRR_MASK;
+ pte |= xprr_perm_to_pte(XPRR_PPL_RW_PERM);
+ break;
+ default:
+ panic("Unsupported xPRR perm %llu for pte 0x%llx", xprr_perm, (uint64_t)pte);
+ }
+ }
+#endif
pmap_enter_pte(pmap, pte_p, pte, v);
}
#if CONFIG_PGTRACE
if (pgtrace_enabled) {
// Clone and invalidate original mapping if eligible
- for (int i = 0; i < PAGE_RATIO; i++) {
- pmap_pgtrace_enter_clone(pmap, v + ARM_PGBYTES * i, 0, 0);
- }
+ pmap_pgtrace_enter_clone(pmap, v + ARM_PGBYTES, 0, 0);
}
-#endif
+#endif /* CONFIG_PGTRACE */
if (pve_p != PV_ENTRY_NULL) {
- pv_free(pve_p);
+ pv_free_entry(pve_p);
+ }
+
+ if (wiredcnt_updated && (OSAddAtomic16(-1, (volatile int16_t*)wiredcnt) <= 0)) {
+ panic("pmap_enter(): over-unwire of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
+ }
+
+ pmap_unlock(pmap);
+
+ return kr;
+}
+
+kern_return_t
+pmap_enter_options_addr(
+ pmap_t pmap,
+ vm_map_address_t v,
+ pmap_paddr_t pa,
+ vm_prot_t prot,
+ vm_prot_t fault_type,
+ unsigned int flags,
+ boolean_t wired,
+ unsigned int options,
+ __unused void *arg)
+{
+ kern_return_t kr = KERN_FAILURE;
+
+
+ PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
+ VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v), pa, prot);
+
+
+#if XNU_MONITOR
+ /*
+ * If NOWAIT was not requested, loop until the enter does not
+ * fail due to lack of resources.
+ */
+ while ((kr = pmap_enter_options_ppl(pmap, v, pa, prot, fault_type, flags, wired, options | PMAP_OPTIONS_NOWAIT)) == KERN_RESOURCE_SHORTAGE) {
+ pmap_alloc_page_for_ppl((options & PMAP_OPTIONS_NOWAIT) ? PMAP_PAGES_ALLOCATE_NOWAIT : 0);
+ if (options & PMAP_OPTIONS_NOWAIT) {
+ break;
+ }
}
- if (wiredcnt_updated && (OSAddAtomic16(-1, (volatile int16_t*)wiredcnt) <= 0)) {
- panic("pmap_enter(): over-unwire of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
- }
+ pmap_ledger_check_balance(pmap);
+#else
+ kr = pmap_enter_options_internal(pmap, v, pa, prot, fault_type, flags, wired, options);
+#endif
- PMAP_UNLOCK(pmap);
+ PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, kr);
return kr;
}
unsigned int options,
__unused void *arg)
{
- kern_return_t kr = KERN_FAILURE;
-
- PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
- VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v), pn, prot);
-
- kr = pmap_enter_options_internal(pmap, v, pn, prot, fault_type, flags, wired, options);
- pv_water_mark_check();
-
- PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, kr);
-
- return kr;
+ return pmap_enter_options_addr(pmap, v, ((pmap_paddr_t)pn) << PAGE_SHIFT, prot, fault_type, flags, wired, options, arg);
}
/*
pt_entry_t *pte_p;
pmap_paddr_t pa;
- /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
- * wired memory statistics for user pmaps, but kernel PTEs are assumed
- * to be wired in nearly all cases. For VM layer functionality, the wired
- * count in vm_page_t is sufficient. */
- if (pmap == kernel_pmap) {
- return;
- }
- VALIDATE_USER_PMAP(pmap);
+ VALIDATE_PMAP(pmap);
+
+ pmap_lock(pmap);
+
+ const pt_attr_t * pt_attr = pmap_get_pt_attr(pmap);
- PMAP_LOCK(pmap);
pte_p = pmap_pte(pmap, v);
- assert(pte_p != PT_ENTRY_NULL);
- pa = pte_to_pa(*pte_p);
+ if (pte_p == PT_ENTRY_NULL) {
+ if (!wired) {
+ /*
+ * The PTE may have already been cleared by a disconnect/remove operation, and the L3 table
+ * may have been freed by a remove operation.
+ */
+ goto pmap_change_wiring_return;
+ } else {
+ panic("%s: Attempt to wire nonexistent PTE for pmap %p", __func__, pmap);
+ }
+ }
+ /*
+ * Use volatile loads to prevent the compiler from collapsing references to 'pa' back to loads of pte_p
+ * until we've grabbed the final PVH lock; PTE contents may change during this time.
+ */
+ pa = pte_to_pa(*((volatile pt_entry_t*)pte_p));
while (pa_valid(pa)) {
pmap_paddr_t new_pa;
LOCK_PVH((int)pa_index(pa));
- new_pa = pte_to_pa(*pte_p);
+ new_pa = pte_to_pa(*((volatile pt_entry_t*)pte_p));
if (pa == new_pa) {
break;
pa = new_pa;
}
- if (wired && !pte_is_wired(*pte_p)) {
- pte_set_wired(pte_p, wired);
- OSAddAtomic(+1, (SInt32 *) &pmap->stats.wired_count);
- pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
- } else if (!wired && pte_is_wired(*pte_p)) {
- PMAP_STATS_ASSERTF(pmap->stats.wired_count >= 1, pmap, "stats.wired_count %d", pmap->stats.wired_count);
- pte_set_wired(pte_p, wired);
- OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
- pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
+ /* PTE checks must be performed after acquiring the PVH lock (if applicable for the PA) */
+ if ((*pte_p == ARM_PTE_EMPTY) || (ARM_PTE_IS_COMPRESSED(*pte_p, pte_p))) {
+ if (!wired) {
+ /* PTE cleared by prior remove/disconnect operation */
+ goto pmap_change_wiring_cleanup;
+ } else {
+ panic("%s: Attempt to wire empty/compressed PTE %p (=0x%llx) for pmap %p",
+ __func__, pte_p, (uint64_t)*pte_p, pmap);
+ }
+ }
+
+ assertf((*pte_p & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", pte_p, (uint64_t)*pte_p);
+ if (wired != pte_is_wired(*pte_p)) {
+ pte_set_wired(pmap, pte_p, wired);
+ if (pmap != kernel_pmap) {
+ if (wired) {
+ OSAddAtomic(+1, (SInt32 *) &pmap->stats.wired_count);
+ pmap_ledger_credit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
+ } else if (!wired) {
+ __assert_only int32_t orig_wired = OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
+ PMAP_STATS_ASSERTF(orig_wired > 0, pmap, "stats.wired_count %d", orig_wired);
+ pmap_ledger_debit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
+ }
+ }
}
+pmap_change_wiring_cleanup:
if (pa_valid(pa)) {
UNLOCK_PVH((int)pa_index(pa));
}
- PMAP_UNLOCK(pmap);
+pmap_change_wiring_return:
+ pmap_unlock(pmap);
}
void
vm_map_address_t v,
boolean_t wired)
{
+#if XNU_MONITOR
+ pmap_change_wiring_ppl(pmap, v, wired);
+
+ pmap_ledger_check_balance(pmap);
+#else
pmap_change_wiring_internal(pmap, v, wired);
+#endif
}
-MARK_AS_PMAP_TEXT static ppnum_t
-pmap_find_phys_internal(
+MARK_AS_PMAP_TEXT static pmap_paddr_t
+pmap_find_pa_internal(
pmap_t pmap,
addr64_t va)
{
- ppnum_t ppn = 0;
+ pmap_paddr_t pa = 0;
VALIDATE_PMAP(pmap);
if (pmap != kernel_pmap) {
- PMAP_LOCK(pmap);
+ pmap_lock_ro(pmap);
}
- ppn = pmap_vtophys(pmap, va);
+ pa = pmap_vtophys(pmap, va);
if (pmap != kernel_pmap) {
- PMAP_UNLOCK(pmap);
+ pmap_unlock_ro(pmap);
}
- return ppn;
+ return pa;
}
-ppnum_t
-pmap_find_phys(
- pmap_t pmap,
- addr64_t va)
+pmap_paddr_t
+pmap_find_pa_nofault(pmap_t pmap, addr64_t va)
{
- pmap_paddr_t pa = 0;
+ pmap_paddr_t pa = 0;
if (pmap == kernel_pmap) {
pa = mmu_kvtop(va);
} else if ((current_thread()->map) && (pmap == vm_map_pmap(current_thread()->map))) {
+ /*
+ * Note that this doesn't account for PAN: mmu_uvtop() may return a valid
+ * translation even if PAN would prevent kernel access through the translation.
+ * It's therefore assumed the UVA will be accessed in a PAN-disabled context.
+ */
pa = mmu_uvtop(va);
}
+ return pa;
+}
- if (pa) {
- return (ppnum_t)(pa >> PAGE_SHIFT);
+pmap_paddr_t
+pmap_find_pa(
+ pmap_t pmap,
+ addr64_t va)
+{
+ pmap_paddr_t pa = pmap_find_pa_nofault(pmap, va);
+
+ if (pa != 0) {
+ return pa;
}
if (not_in_kdp) {
- return pmap_find_phys_internal(pmap, va);
+#if XNU_MONITOR
+ return pmap_find_pa_ppl(pmap, va);
+#else
+ return pmap_find_pa_internal(pmap, va);
+#endif
} else {
return pmap_vtophys(pmap, va);
}
}
+ppnum_t
+pmap_find_phys_nofault(
+ pmap_t pmap,
+ addr64_t va)
+{
+ ppnum_t ppn;
+ ppn = atop(pmap_find_pa_nofault(pmap, va));
+ return ppn;
+}
+
+ppnum_t
+pmap_find_phys(
+ pmap_t pmap,
+ addr64_t va)
+{
+ ppnum_t ppn;
+ ppn = atop(pmap_find_pa(pmap, va));
+ return ppn;
+}
+
+
pmap_paddr_t
kvtophys(
vm_offset_t va)
if (pa) {
return pa;
}
- pa = ((pmap_paddr_t)pmap_vtophys(kernel_pmap, va)) << PAGE_SHIFT;
- if (pa) {
- pa |= (va & PAGE_MASK);
- }
-
- return (pmap_paddr_t)pa;
+ return pmap_vtophys(kernel_pmap, va);
}
-ppnum_t
+pmap_paddr_t
pmap_vtophys(
pmap_t pmap,
addr64_t va)
return 0;
}
-#if (__ARM_VMSA__ == 7)
+#if (__ARM_VMSA__ == 7)
tt_entry_t *tte_p, tte;
pt_entry_t *pte_p;
- ppnum_t ppn;
+ pmap_paddr_t pa;
tte_p = pmap_tte(pmap, va);
if (tte_p == (tt_entry_t *) NULL) {
- return (ppnum_t) 0;
+ return (pmap_paddr_t) 0;
}
tte = *tte_p;
if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
- pte_p = (pt_entry_t *) ttetokv(tte) + ptenum(va);
- ppn = (ppnum_t) atop(pte_to_pa(*pte_p) | (va & ARM_PGMASK));
+ pte_p = (pt_entry_t *) ttetokv(tte) + pte_index(pmap, pt_attr, va);
+ pa = pte_to_pa(*pte_p) | (va & ARM_PGMASK);
+ //LIONEL ppn = (ppnum_t) atop(pte_to_pa(*pte_p) | (va & ARM_PGMASK));
#if DEVELOPMENT || DEBUG
- if (ppn != 0 &&
+ if (atop(pa) != 0 &&
ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
panic("pmap_vtophys(%p,0x%llx): compressed pte_p=%p 0x%llx with ppn=0x%x\n",
- pmap, va, pte_p, (uint64_t) (*pte_p), ppn);
+ pmap, va, pte_p, (uint64_t) (*pte_p), atop(pa));
}
#endif /* DEVELOPMENT || DEBUG */
} else if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
if ((tte & ARM_TTE_BLOCK_SUPER) == ARM_TTE_BLOCK_SUPER) {
- ppn = (ppnum_t) atop(suptte_to_pa(tte) | (va & ARM_TT_L1_SUPER_OFFMASK));
+ pa = suptte_to_pa(tte) | (va & ARM_TT_L1_SUPER_OFFMASK);
} else {
- ppn = (ppnum_t) atop(sectte_to_pa(tte) | (va & ARM_TT_L1_BLOCK_OFFMASK));
+ pa = sectte_to_pa(tte) | (va & ARM_TT_L1_BLOCK_OFFMASK);
}
} else {
- ppn = 0;
+ pa = 0;
}
#else
- tt_entry_t *ttp;
- tt_entry_t tte;
- ppnum_t ppn = 0;
-
- __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
-
- /* Level 0 currently unused */
-
- /* Get first-level (1GB) entry */
- ttp = pmap_tt1e(pmap, va);
- tte = *ttp;
- if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
- return ppn;
- }
-
- tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt2_index(pmap, pt_attr, va)];
-
- if ((tte & ARM_TTE_VALID) != (ARM_TTE_VALID)) {
- return ppn;
- }
+ tt_entry_t * ttp = NULL;
+ tt_entry_t * ttep = NULL;
+ tt_entry_t tte = ARM_TTE_EMPTY;
+ pmap_paddr_t pa = 0;
+ unsigned int cur_level;
- if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
- ppn = (ppnum_t) atop((tte & ARM_TTE_BLOCK_L2_MASK) | (va & ARM_TT_L2_OFFMASK));
- return ppn;
- }
- tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt3_index(pmap, pt_attr, va)];
- ppn = (ppnum_t) atop((tte & ARM_PTE_MASK) | (va & ARM_TT_L3_OFFMASK));
-#endif
+ const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
- return ppn;
-}
+ ttp = pmap->tte;
-MARK_AS_PMAP_TEXT static vm_offset_t
-pmap_extract_internal(
- pmap_t pmap,
- vm_map_address_t va)
-{
- pmap_paddr_t pa = 0;
- ppnum_t ppn = 0;
+ for (cur_level = pt_attr_root_level(pt_attr); cur_level <= pt_attr_leaf_level(pt_attr); cur_level++) {
+ ttep = &ttp[ttn_index(pmap, pt_attr, va, cur_level)];
- if (pmap == NULL) {
- return 0;
- }
+ tte = *ttep;
- VALIDATE_PMAP(pmap);
+ const uint64_t valid_mask = pt_attr->pta_level_info[cur_level].valid_mask;
+ const uint64_t type_mask = pt_attr->pta_level_info[cur_level].type_mask;
+ const uint64_t type_block = pt_attr->pta_level_info[cur_level].type_block;
+ const uint64_t offmask = pt_attr->pta_level_info[cur_level].offmask;
- PMAP_LOCK(pmap);
+ if ((tte & valid_mask) != valid_mask) {
+ return (pmap_paddr_t) 0;
+ }
- ppn = pmap_vtophys(pmap, va);
+ /* This detects both leaf entries and intermediate block mappings. */
+ if ((tte & type_mask) == type_block) {
+ pa = ((tte & ARM_TTE_PA_MASK & ~offmask) | (va & offmask));
+ break;
+ }
- if (ppn != 0) {
- pa = ptoa(ppn) | ((va) & PAGE_MASK);
+ ttp = (tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
}
-
- PMAP_UNLOCK(pmap);
+#endif
return pa;
}
-/*
- * Routine: pmap_extract
- * Function:
- * Extract the physical page address associated
- * with the given map/virtual_address pair.
- *
- */
-vm_offset_t
-pmap_extract(
- pmap_t pmap,
- vm_map_address_t va)
-{
- pmap_paddr_t pa = 0;
-
- if (pmap == kernel_pmap) {
- pa = mmu_kvtop(va);
- } else if (pmap == vm_map_pmap(current_thread()->map)) {
- pa = mmu_uvtop(va);
- }
-
- if (pa) {
- return pa;
- }
-
- return pmap_extract_internal(pmap, va);
-}
-
/*
* pmap_init_pte_page - Initialize a page table page.
*/
pt_entry_t *pte_p,
vm_offset_t va,
unsigned int ttlevel,
- boolean_t alloc_ptd,
- boolean_t clear)
+ boolean_t alloc_ptd)
{
pt_desc_t *ptdp = NULL;
vm_offset_t *pvh;
* on 4KB hardware, we may already have allocated a page table descriptor for a
* bootstrap request, so we check for an existing PTD here.
*/
- ptdp = ptd_alloc(pmap, true);
+ ptdp = ptd_alloc(pmap);
+ if (ptdp == NULL) {
+ panic("%s: unable to allocate PTD", __func__);
+ }
pvh_update_head_unlocked(pvh, ptdp, PVH_TYPE_PTDP);
} else {
panic("pmap_init_pte_page(): pte_p %p", pte_p);
panic("pmap_init_pte_page(): invalid PVH type for pte_p %p", pte_p);
}
- if (clear) {
- bzero(pte_p, ARM_PGBYTES);
- // below barrier ensures the page zeroing is visible to PTW before
- // it is linked to the PTE of previous level
- __builtin_arm_dmb(DMB_ISHST);
- }
+ // below barrier ensures previous updates to the page are visible to PTW before
+ // it is linked to the PTE of previous level
+ __builtin_arm_dmb(DMB_ISHST);
ptd_init(ptdp, pmap, va, ttlevel, pte_p);
}
{
tt_entry_t *tte_next_p;
- PMAP_LOCK(pmap);
+ pmap_lock(pmap);
pa = 0;
if (pmap_pte(pmap, v) != PT_ENTRY_NULL) {
- PMAP_UNLOCK(pmap);
+ pmap_unlock(pmap);
return KERN_SUCCESS;
}
tte_p = &pmap->tte[ttenum(v & ~ARM_TT_L1_PT_OFFMASK)];
tte_p = &pmap->tte[ttenum(v)];
*tte_p = pa_to_tte(pa) | (((v >> ARM_TT_L1_SHIFT) & 0x3) << 10) | ARM_TTE_TYPE_TABLE;
FLUSH_PTE(tte_p);
- PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~ARM_TT_L1_OFFMASK),
+ PMAP_TRACE(5, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~ARM_TT_L1_OFFMASK),
VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE), *tte_p);
- PMAP_UNLOCK(pmap);
+ pmap_unlock(pmap);
return KERN_SUCCESS;
}
- PMAP_UNLOCK(pmap);
+ pmap_unlock(pmap);
}
v = v & ~ARM_TT_L1_PT_OFFMASK;
VM_PAGE_WAIT();
}
- PMAP_LOCK(pmap);
+ pmap_lock(pmap);
/*
* See if someone else expanded us first
*/
if (pmap_pte(pmap, v) == PT_ENTRY_NULL) {
tt_entry_t *tte_next_p;
- pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, PMAP_TT_L2_LEVEL, FALSE, TRUE);
+ pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, PMAP_TT_L2_LEVEL, FALSE);
pa = kvtophys((vm_offset_t)tt_p);
tte_p = &pmap->tte[ttenum(v)];
for (i = 0, tte_next_p = tte_p; i < 4; i++) {
*tte_next_p = pa_to_tte(pa) | ARM_TTE_TYPE_TABLE;
- PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_PT_OFFMASK) + (i * ARM_TT_L1_SIZE)),
+ PMAP_TRACE(5, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_PT_OFFMASK) + (i * ARM_TT_L1_SIZE)),
VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_PT_OFFMASK) + ((i + 1) * ARM_TT_L1_SIZE)), *tte_p);
tte_next_p++;
pa = pa + 0x400;
pa = 0x0ULL;
tt_p = (tt_entry_t *)NULL;
}
- PMAP_UNLOCK(pmap);
+ pmap_unlock(pmap);
if (tt_p != (tt_entry_t *)NULL) {
pmap_tt_deallocate(pmap, tt_p, PMAP_TT_L2_LEVEL);
tt_p = (tt_entry_t *)NULL;
tt_p = (tt_entry_t *)NULL;
for (; ttlevel < level; ttlevel++) {
- PMAP_LOCK(pmap);
+ pmap_lock_ro(pmap);
if (pmap_ttne(pmap, ttlevel + 1, v) == PT_ENTRY_NULL) {
- PMAP_UNLOCK(pmap);
+ pmap_unlock_ro(pmap);
while (pmap_tt_allocate(pmap, &tt_p, ttlevel + 1, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
if (options & PMAP_OPTIONS_NOWAIT) {
return KERN_RESOURCE_SHORTAGE;
}
+#if XNU_MONITOR
+ panic("%s: failed to allocate tt, "
+ "pmap=%p, v=%p, options=0x%x, level=%u",
+ __FUNCTION__,
+ pmap, (void *)v, options, level);
+#else
VM_PAGE_WAIT();
+#endif
}
- PMAP_LOCK(pmap);
+ pmap_lock(pmap);
if ((pmap_ttne(pmap, ttlevel + 1, v) == PT_ENTRY_NULL)) {
- pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, ttlevel + 1, FALSE, TRUE);
+ pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, ttlevel + 1, FALSE);
pa = kvtophys((vm_offset_t)tt_p);
tte_p = pmap_ttne(pmap, ttlevel, v);
*tte_p = (pa & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
- PMAP_TRACE(ttlevel + 1, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~pt_attr_ln_offmask(pt_attr, ttlevel)),
+ PMAP_TRACE(4 + ttlevel, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~pt_attr_ln_offmask(pt_attr, ttlevel)),
VM_KERNEL_ADDRHIDE((v & ~pt_attr_ln_offmask(pt_attr, ttlevel)) + pt_attr_ln_size(pt_attr, ttlevel)), *tte_p);
pa = 0x0ULL;
tt_p = (tt_entry_t *)NULL;
}
+ pmap_unlock(pmap);
+ } else {
+ pmap_unlock_ro(pmap);
}
- PMAP_UNLOCK(pmap);
-
if (tt_p != (tt_entry_t *)NULL) {
pmap_tt_deallocate(pmap, tt_p, ttlevel + 1);
tt_p = (tt_entry_t *)NULL;
}
#if 0
- PMAP_LOCK(pmap);
+ pmap_lock(pmap);
if ((pmap->nested == FALSE) && (pmap != kernel_pmap)) {
/* TODO: Scan for vm page assigned to top level page tables with no reference */
}
- PMAP_UNLOCK(pmap);
+ pmap_unlock(pmap);
#endif
return;
pmap_gc(
void)
{
+#if XNU_MONITOR
+ /*
+ * We cannot invoke the scheduler from the PPL, so for now we elide the
+ * GC logic if the PPL is enabled.
+ */
+#endif
+#if !XNU_MONITOR
pmap_t pmap, pmap_next;
boolean_t gc_wait;
}
pmap_simple_unlock(&pmaps_lock);
}
+#endif
}
/*
uint64_t
pmap_release_pages_fast(void)
{
+#if XNU_MONITOR
+ return pmap_release_ppl_pages_to_kernel();
+#else /* XNU_MONITOR */
return 0;
+#endif
}
/*
boolean_t
coredumpok(
vm_map_t map,
- vm_offset_t va)
+ mach_vm_offset_t va)
{
pt_entry_t *pte_p;
pt_entry_t spte;
* attributes alone.
*/
MARK_AS_PMAP_TEXT static void
-phys_attribute_clear_internal(
+phys_attribute_clear_with_flush_range(
ppnum_t pn,
unsigned int bits,
int options,
- void *arg)
+ void *arg,
+ pmap_tlb_flush_range_t *flush_range)
{
pmap_paddr_t pa = ptoa(pn);
vm_prot_t allow_mode = VM_PROT_ALL;
+#if XNU_MONITOR
+ if (__improbable(bits & PP_ATTR_PPL_OWNED_BITS)) {
+ panic("%s: illegal request, "
+ "pn=%u, bits=%#x, options=%#x, arg=%p, flush_range=%p",
+ __FUNCTION__,
+ pn, bits, options, arg, flush_range);
+ }
+#endif
+ if ((arg != NULL) || (flush_range != NULL)) {
+ options = options & ~PMAP_OPTIONS_NOFLUSH;
+ }
- if ((bits & PP_ATTR_MODIFIED) &&
- (options & PMAP_OPTIONS_NOFLUSH) &&
- (arg == NULL)) {
- panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
+ if (__improbable((bits & PP_ATTR_MODIFIED) &&
+ (options & PMAP_OPTIONS_NOFLUSH))) {
+ panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p,%p): "
"should not clear 'modified' without flushing TLBs\n",
- pn, bits, options, arg);
+ pn, bits, options, arg, flush_range);
}
assert(pn != vm_page_fictitious_addr);
if (options & PMAP_OPTIONS_CLEAR_WRITE) {
assert(bits == PP_ATTR_MODIFIED);
- pmap_page_protect_options_internal(pn, (VM_PROT_ALL & ~VM_PROT_WRITE), 0);
+ pmap_page_protect_options_with_flush_range(pn, (VM_PROT_ALL & ~VM_PROT_WRITE), options, flush_range);
/*
* We short circuit this case; it should not need to
* invoke arm_force_fast_fault, so just clear the modified bit.
return;
}
- if (arm_force_fast_fault_internal(pn, allow_mode, options)) {
+ if (arm_force_fast_fault_with_flush_range(pn, allow_mode, options, flush_range)) {
pa_clear_bits(pa, bits);
}
- return;
}
+MARK_AS_PMAP_TEXT static void
+phys_attribute_clear_internal(
+ ppnum_t pn,
+ unsigned int bits,
+ int options,
+ void *arg)
+{
+ phys_attribute_clear_with_flush_range(pn, bits, options, arg, NULL);
+}
+
+#if __ARM_RANGE_TLBI__
+MARK_AS_PMAP_TEXT static vm_map_address_t
+phys_attribute_clear_twig_internal(
+ pmap_t pmap,
+ vm_map_address_t start,
+ vm_map_address_t end,
+ unsigned int bits,
+ unsigned int options,
+ pmap_tlb_flush_range_t *flush_range)
+{
+ pmap_assert_locked_r(pmap);
+ const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
+ assert(end >= start);
+ assert((end - start) <= pt_attr_twig_size(pt_attr));
+ const uint64_t pmap_page_size = pt_attr_page_size(pt_attr);
+ vm_map_address_t va = start;
+ pt_entry_t *pte_p, *start_pte_p, *end_pte_p, *curr_pte_p;
+ tt_entry_t *tte_p;
+ tte_p = pmap_tte(pmap, start);
+ unsigned int npages = 0;
+
+ if (tte_p == (tt_entry_t *) NULL) {
+ return end;
+ }
+
+ if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
+ pte_p = (pt_entry_t *) ttetokv(*tte_p);
+
+ start_pte_p = &pte_p[pte_index(pmap, pt_attr, start)];
+ end_pte_p = start_pte_p + ((end - start) >> pt_attr_leaf_shift(pt_attr));
+ assert(end_pte_p >= start_pte_p);
+ for (curr_pte_p = start_pte_p; curr_pte_p < end_pte_p; curr_pte_p++, va += pmap_page_size) {
+ if (__improbable(npages++ && pmap_pending_preemption())) {
+ return va;
+ }
+ pmap_paddr_t pa = pte_to_pa(*((volatile pt_entry_t*)curr_pte_p));
+ if (pa_valid(pa)) {
+ ppnum_t pn = (ppnum_t) atop(pa);
+ phys_attribute_clear_with_flush_range(pn, bits, options, NULL, flush_range);
+ }
+ }
+ }
+ return end;
+}
+
+MARK_AS_PMAP_TEXT static vm_map_address_t
+phys_attribute_clear_range_internal(
+ pmap_t pmap,
+ vm_map_address_t start,
+ vm_map_address_t end,
+ unsigned int bits,
+ unsigned int options)
+{
+ if (__improbable(end < start)) {
+ panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
+ }
+ VALIDATE_PMAP(pmap);
+
+ vm_map_address_t va = start;
+ pmap_tlb_flush_range_t flush_range = {
+ .ptfr_pmap = pmap,
+ .ptfr_start = start,
+ .ptfr_end = end,
+ .ptfr_flush_needed = false
+ };
+
+ pmap_lock_ro(pmap);
+ const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
+
+ while (va < end) {
+ vm_map_address_t curr_end;
+
+ curr_end = ((va + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
+ if (curr_end > end) {
+ curr_end = end;
+ }
+
+ va = phys_attribute_clear_twig_internal(pmap, va, curr_end, bits, options, &flush_range);
+ if ((va < curr_end) || pmap_pending_preemption()) {
+ break;
+ }
+ }
+ pmap_unlock_ro(pmap);
+ if (flush_range.ptfr_flush_needed) {
+ flush_range.ptfr_end = va;
+ pmap_get_pt_ops(pmap)->flush_tlb_region_async(
+ flush_range.ptfr_start,
+ flush_range.ptfr_end - flush_range.ptfr_start,
+ flush_range.ptfr_pmap);
+ sync_tlb_flush();
+ }
+ return va;
+}
+
+static void
+phys_attribute_clear_range(
+ pmap_t pmap,
+ vm_map_address_t start,
+ vm_map_address_t end,
+ unsigned int bits,
+ unsigned int options)
+{
+ assert(get_preemption_level() == 0);
+
+ PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR_RANGE) | DBG_FUNC_START, bits);
+
+ while (start < end) {
+#if XNU_MONITOR
+ start = phys_attribute_clear_range_ppl(pmap, start, end, bits, options);
+#else
+ start = phys_attribute_clear_range_internal(pmap, start, end, bits, options);
+#endif
+ }
+
+ PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR_RANGE) | DBG_FUNC_END);
+}
+#endif /* __ARM_RANGE_TLBI__ */
+
static void
phys_attribute_clear(
ppnum_t pn,
*/
PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, pn, bits);
+#if XNU_MONITOR
+ phys_attribute_clear_ppl(pn, bits, options, arg);
+#else
phys_attribute_clear_internal(pn, bits, options, arg);
+#endif
PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END);
}
pmap_paddr_t pa = ptoa(pn);
assert(pn != vm_page_fictitious_addr);
+#if XNU_MONITOR
+ if (bits & PP_ATTR_PPL_OWNED_BITS) {
+ panic("%s: illegal request, "
+ "pn=%u, bits=%#x",
+ __FUNCTION__,
+ pn, bits);
+ }
+#endif
- pa_set_bits(pa, bits);
+ pa_set_bits(pa, (uint16_t)bits);
return;
}
ppnum_t pn,
unsigned int bits)
{
+#if XNU_MONITOR
+ phys_attribute_set_ppl(pn, bits);
+#else
phys_attribute_set_internal(pn, bits);
+#endif
}
| ((phys_attribute_test(pn, PP_ATTR_REFERENCED)) ? VM_MEM_REFERENCED : 0);
}
+static inline unsigned int
+pmap_clear_refmod_mask_to_modified_bits(const unsigned int mask)
+{
+ return ((mask & VM_MEM_MODIFIED) ? PP_ATTR_MODIFIED : 0) |
+ ((mask & VM_MEM_REFERENCED) ? PP_ATTR_REFERENCED : 0);
+}
+
/*
* pmap_clear_refmod(phys, mask)
* clears the referenced and modified bits as specified by the mask
{
unsigned int bits;
- bits = ((mask & VM_MEM_MODIFIED) ? PP_ATTR_MODIFIED : 0) |
- ((mask & VM_MEM_REFERENCED) ? PP_ATTR_REFERENCED : 0);
+ bits = pmap_clear_refmod_mask_to_modified_bits(mask);
phys_attribute_clear(pn, bits, options, arg);
}
+/*
+ * Perform pmap_clear_refmod_options on a virtual address range.
+ * The operation will be performed in bulk & tlb flushes will be coalesced
+ * if possible.
+ *
+ * Returns true if the operation is supported on this platform.
+ * If this function returns false, the operation is not supported and
+ * nothing has been modified in the pmap.
+ */
+bool
+pmap_clear_refmod_range_options(
+ pmap_t pmap __unused,
+ vm_map_address_t start __unused,
+ vm_map_address_t end __unused,
+ unsigned int mask __unused,
+ unsigned int options __unused)
+{
+#if __ARM_RANGE_TLBI__
+ unsigned int bits;
+ bits = pmap_clear_refmod_mask_to_modified_bits(mask);
+ phys_attribute_clear_range(pmap, start, end, bits, options);
+ return true;
+#else /* __ARM_RANGE_TLBI__ */
+#pragma unused(pmap, start, end, mask, options)
+ /*
+ * This operation allows the VM to bulk modify refmod bits on a virtually
+ * contiguous range of addresses. This is large performance improvement on
+ * platforms that support ranged tlbi instructions. But on older platforms,
+ * we can only flush per-page or the entire asid. So we currently
+ * only support this operation on platforms that support ranged tlbi.
+ * instructions. On other platforms, we require that
+ * the VM modify the bits on a per-page basis.
+ */
+ return false;
+#endif /* __ARM_RANGE_TLBI__ */
+}
+
void
pmap_clear_refmod(
ppnum_t pn,
#endif
}
+#if XNU_MONITOR
+boolean_t
+pmap_is_monitor(ppnum_t pn)
+{
+ assert(pa_valid(ptoa(pn)));
+ return phys_attribute_test(pn, PP_ATTR_MONITOR);
+}
+#endif
void
pmap_lock_phys_page(ppnum_t pn)
{
+#if !XNU_MONITOR
int pai;
pmap_paddr_t phys = ptoa(pn);
pai = (int)pa_index(phys);
LOCK_PVH(pai);
} else
+#else
+ (void)pn;
+#endif
{ simple_lock(&phys_backup_lock, LCK_GRP_NULL);}
}
void
pmap_unlock_phys_page(ppnum_t pn)
{
+#if !XNU_MONITOR
int pai;
pmap_paddr_t phys = ptoa(pn);
pai = (int)pa_index(phys);
UNLOCK_PVH(pai);
} else
+#else
+ (void)pn;
+#endif
{ simple_unlock(&phys_backup_lock);}
}
if (ttbr0_val != ttbr1_val) {
panic("Misaligned ttbr0 %08X\n", ttbr0_val);
}
- }
-#endif
- if (pmap->tte_index_max == NTTES) {
- /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x40000000 */
- __asm__ volatile ("mcr p15,0,%0,c2,c0,2" : : "r"(2));
- __builtin_arm_isb(ISB_SY);
-#if !__ARM_USER_PROTECT__
- set_mmu_ttb(pmap->ttep);
-#endif
- } else {
-#if !__ARM_USER_PROTECT__
- set_mmu_ttb(pmap->ttep);
-#endif
- /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x80000000 */
- __asm__ volatile ("mcr p15,0,%0,c2,c0,2" : : "r"(1));
- __builtin_arm_isb(ISB_SY);
-#if MACH_ASSERT && __ARM_USER_PROTECT__
if (pmap->ttep & 0x1000) {
panic("Misaligned ttbr0 %08X\n", pmap->ttep);
}
-#endif
}
-
+#endif
#if !__ARM_USER_PROTECT__
+ set_mmu_ttb(pmap->ttep);
set_context_id(pmap->hw_asid);
#endif
if (pmap != kernel_pmap) {
cpu_data_ptr->cpu_nested_pmap = pmap->nested_pmap;
+ cpu_data_ptr->cpu_nested_pmap_attr = (cpu_data_ptr->cpu_nested_pmap == NULL) ?
+ NULL : pmap_get_pt_attr(cpu_data_ptr->cpu_nested_pmap);
+ cpu_data_ptr->cpu_nested_region_addr = pmap->nested_region_addr;
+ cpu_data_ptr->cpu_nested_region_size = pmap->nested_region_size;
}
- if (pmap == kernel_pmap) {
- pmap_clear_user_ttb_internal();
- } else {
- set_mmu_ttb((pmap->ttep & TTBR_BADDR_MASK) | (((uint64_t)pmap->hw_asid) << TTBR_ASID_SHIFT));
+
+#if __ARM_MIXED_PAGE_SIZE__
+ if ((pmap != kernel_pmap) && (pmap_get_pt_attr(pmap)->pta_tcr_value != get_tcr())) {
+ set_tcr(pmap_get_pt_attr(pmap)->pta_tcr_value);
}
+#endif /* __ARM_MIXED_PAGE_SIZE__ */
-#if defined(HAS_APPLE_PAC) && (__APCFG_SUPPORTED__ || __APSTS_SUPPORTED__)
- if (!(BootArgs->bootFlags & kBootFlagsDisableJOP) && !(BootArgs->bootFlags & kBootFlagsDisableUserJOP)) {
- uint64_t sctlr = __builtin_arm_rsr64("SCTLR_EL1");
- bool jop_enabled = sctlr & SCTLR_JOP_KEYS_ENABLED;
- if (!jop_enabled && !pmap->disable_jop) {
- // turn on JOP
- sctlr |= SCTLR_JOP_KEYS_ENABLED;
- __builtin_arm_wsr64("SCTLR_EL1", sctlr);
- // no ISB necessary because this won't take effect until eret returns to EL0
- } else if (jop_enabled && pmap->disable_jop) {
- // turn off JOP
- sctlr &= ~SCTLR_JOP_KEYS_ENABLED;
- __builtin_arm_wsr64("SCTLR_EL1", sctlr);
- }
+ if (pmap != kernel_pmap) {
+ set_mmu_ttb((pmap->ttep & TTBR_BADDR_MASK) | (((uint64_t)pmap->hw_asid) << TTBR_ASID_SHIFT));
+ } else if (!pmap_user_ttb_is_clear()) {
+ pmap_clear_user_ttb_internal();
}
-#endif /* HAS_APPLE_PAC && (__APCFG_SUPPORTED__ || __APSTS_SUPPORTED__) */
+
#endif /* (__ARM_VMSA__ == 7) */
}
pmap_t pmap)
{
PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
+#if XNU_MONITOR
+ pmap_switch_user_ttb_ppl(pmap);
+#else
pmap_switch_user_ttb_internal(pmap);
+#endif
PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB) | DBG_FUNC_END);
}
void
pmap_clear_user_ttb(void)
{
+ PMAP_TRACE(3, PMAP_CODE(PMAP__CLEAR_USER_TTB) | DBG_FUNC_START, NULL, 0, 0);
+#if XNU_MONITOR
+ pmap_clear_user_ttb_ppl();
+#else
pmap_clear_user_ttb_internal();
+#endif
+ PMAP_TRACE(3, PMAP_CODE(PMAP__CLEAR_USER_TTB) | DBG_FUNC_END);
}
-/*
- * Routine: arm_force_fast_fault
- *
- * Function:
- * Force all mappings for this page to fault according
- * to the access modes allowed, so we can gather ref/modify
- * bits again.
- */
MARK_AS_PMAP_TEXT static boolean_t
-arm_force_fast_fault_internal(
+arm_force_fast_fault_with_flush_range(
ppnum_t ppnum,
vm_prot_t allow_mode,
- int options)
+ int options,
+ pmap_tlb_flush_range_t *flush_range)
{
pmap_paddr_t phys = ptoa(ppnum);
pv_entry_t *pve_p;
boolean_t tlb_flush_needed = FALSE;
boolean_t ref_fault;
boolean_t mod_fault;
+ boolean_t clear_write_fault = FALSE;
+ boolean_t ref_aliases_mod = FALSE;
+ bool mustsynch = ((options & PMAP_OPTIONS_FF_LOCKED) == 0);
assert(ppnum != vm_page_fictitious_addr);
ref_fault = FALSE;
mod_fault = FALSE;
pai = (int)pa_index(phys);
- LOCK_PVH(pai);
+ if (__probable(mustsynch)) {
+ LOCK_PVH(pai);
+ }
pv_h = pai_to_pvh(pai);
pte_p = PT_ENTRY_NULL;
}
pmap = ptep_get_pmap(pte_p);
+ const pt_attr_t * pt_attr = pmap_get_pt_attr(pmap);
va = ptep_get_va(pte_p);
assert(va >= pmap->min && va < pmap->max);
- if (pte_is_wired(*pte_p) || pmap == kernel_pmap) {
+ /* update pmap stats and ledgers */
+ if (IS_ALTACCT_PAGE(pai, pve_p)) {
+ /*
+ * We do not track "reusable" status for
+ * "alternate accounting" mappings.
+ */
+ } else if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
+ is_reusable &&
+ is_internal &&
+ pmap != kernel_pmap) {
+ /* one less "reusable" */
+ __assert_only int32_t orig_reusable = OSAddAtomic(-1, &pmap->stats.reusable);
+ PMAP_STATS_ASSERTF(orig_reusable > 0, pmap, "stats.reusable %d", orig_reusable);
+ /* one more "internal" */
+ __assert_only int32_t orig_internal = OSAddAtomic(+1, &pmap->stats.internal);
+ PMAP_STATS_PEAK(pmap->stats.internal);
+ PMAP_STATS_ASSERTF(orig_internal >= 0, pmap, "stats.internal %d", orig_internal);
+ pmap_ledger_credit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
+ assert(!IS_ALTACCT_PAGE(pai, pve_p));
+ assert(IS_INTERNAL_PAGE(pai));
+ pmap_ledger_credit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
+
+ /*
+ * Since the page is being marked non-reusable, we assume that it will be
+ * modified soon. Avoid the cost of another trap to handle the fast
+ * fault when we next write to this page.
+ */
+ clear_write_fault = TRUE;
+ } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
+ !is_reusable &&
+ is_internal &&
+ pmap != kernel_pmap) {
+ /* one more "reusable" */
+ __assert_only int32_t orig_reusable = OSAddAtomic(+1, &pmap->stats.reusable);
+ PMAP_STATS_PEAK(pmap->stats.reusable);
+ PMAP_STATS_ASSERTF(orig_reusable >= 0, pmap, "stats.reusable %d", orig_reusable);
+ /* one less "internal" */
+ __assert_only int32_t orig_internal = OSAddAtomic(-1, &pmap->stats.internal);
+ PMAP_STATS_ASSERTF(orig_internal > 0, pmap, "stats.internal %d", orig_internal);
+ pmap_ledger_debit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
+ assert(!IS_ALTACCT_PAGE(pai, pve_p));
+ assert(IS_INTERNAL_PAGE(pai));
+ pmap_ledger_debit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
+ }
+
+ bool wiredskip = pte_is_wired(*pte_p) &&
+ ((options & PMAP_OPTIONS_FF_WIRED) == 0);
+
+ if (wiredskip) {
result = FALSE;
- break;
+ goto fff_skip_pve;
}
spte = *pte_p;
mod_fault = TRUE;
}
} else {
- if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(AP_RWRW)) {
- tmplate = ((tmplate & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pmap_get_pt_attr(pmap)));
+ if ((tmplate & ARM_PTE_APMASK) == pt_attr_leaf_rw(pt_attr)) {
+ tmplate = ((tmplate & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pt_attr));
pte_set_was_writeable(tmplate, true);
update_pte = TRUE;
mod_fault = TRUE;
}
}
-
- if (update_pte) {
- if (*pte_p != ARM_PTE_TYPE_FAULT &&
- !ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
- WRITE_PTE_STRONG(pte_p, tmplate);
- pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, PAGE_SIZE, pmap);
- tlb_flush_needed = TRUE;
- } else {
- WRITE_PTE(pte_p, tmplate);
- __builtin_arm_isb(ISB_SY);
+#if MACH_ASSERT && XNU_MONITOR
+ if (is_pte_xprr_protected(pmap, spte)) {
+ if (pte_to_xprr_perm(spte) != pte_to_xprr_perm(tmplate)) {
+ panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
+ "ppnum=0x%x, options=0x%x, allow_mode=0x%x",
+ __FUNCTION__, pte_p, pmap, pv_h, pve_p, (unsigned long long)spte, (unsigned long long)tmplate, (unsigned long long)va,
+ ppnum, options, allow_mode);
}
}
+#endif /* MACH_ASSERT && XNU_MONITOR */
- /* update pmap stats and ledgers */
- if (IS_ALTACCT_PAGE(pai, pve_p)) {
- /*
- * We do not track "reusable" status for
- * "alternate accounting" mappings.
- */
- } else if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
- is_reusable &&
- is_internal &&
- pmap != kernel_pmap) {
- /* one less "reusable" */
- PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
- OSAddAtomic(-1, &pmap->stats.reusable);
- /* one more "internal" */
- OSAddAtomic(+1, &pmap->stats.internal);
- PMAP_STATS_PEAK(pmap->stats.internal);
- PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
- pmap_ledger_credit(pmap, task_ledgers.internal, machine_ptob(1));
- assert(!IS_ALTACCT_PAGE(pai, pve_p));
- assert(IS_INTERNAL_PAGE(pai));
- pmap_ledger_credit(pmap, task_ledgers.phys_footprint, machine_ptob(1));
-
- /*
- * Avoid the cost of another trap to handle the fast
- * fault when we next write to this page: let's just
- * handle that now since we already have all the
- * necessary information.
- */
- {
- arm_clear_fast_fault(ppnum, VM_PROT_WRITE);
+ if (result && update_pte) {
+ if (options & PMAP_OPTIONS_NOFLUSH) {
+ WRITE_PTE_FAST(pte_p, tmplate);
+ } else {
+ WRITE_PTE_STRONG(pte_p, tmplate);
+ if (!flush_range ||
+ ((flush_range->ptfr_pmap != pmap) || va >= flush_range->ptfr_end || va < flush_range->ptfr_start)) {
+ pmap_get_pt_ops(pmap)->flush_tlb_region_async(va,
+ pt_attr_page_size(pt_attr) * PAGE_RATIO, pmap);
+ }
+ tlb_flush_needed = TRUE;
}
- } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
- !is_reusable &&
- is_internal &&
- pmap != kernel_pmap) {
- /* one more "reusable" */
- OSAddAtomic(+1, &pmap->stats.reusable);
- PMAP_STATS_PEAK(pmap->stats.reusable);
- PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
- /* one less "internal" */
- PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
- OSAddAtomic(-1, &pmap->stats.internal);
- pmap_ledger_debit(pmap, task_ledgers.internal, machine_ptob(1));
- assert(!IS_ALTACCT_PAGE(pai, pve_p));
- assert(IS_INTERNAL_PAGE(pai));
- pmap_ledger_debit(pmap, task_ledgers.phys_footprint, machine_ptob(1));
}
-#ifdef PVH_FLAG_IOMMU
fff_skip_pve:
-#endif
pte_p = PT_ENTRY_NULL;
if (pve_p != PV_ENTRY_NULL) {
pve_p = PVE_NEXT_PTR(pve_next(pve_p));
}
}
+ /*
+ * If we are using the same approach for ref and mod
+ * faults on this PTE, do not clear the write fault;
+ * this would cause both ref and mod to be set on the
+ * page again, and prevent us from taking ANY read/write
+ * fault on the mapping.
+ */
+ if (clear_write_fault && !ref_aliases_mod) {
+ arm_clear_fast_fault(ppnum, VM_PROT_WRITE);
+ }
if (tlb_flush_needed) {
- sync_tlb_flush();
+ if (flush_range) {
+ /* Delayed flush. Signal to the caller that the flush is needed. */
+ flush_range->ptfr_flush_needed = true;
+ } else {
+ sync_tlb_flush();
+ }
}
/* update global "reusable" status for this page */
if (ref_fault) {
SET_REFFAULT_PAGE(pai);
}
-
- UNLOCK_PVH(pai);
+ if (__probable(mustsynch)) {
+ UNLOCK_PVH(pai);
+ }
return result;
}
+MARK_AS_PMAP_TEXT static boolean_t
+arm_force_fast_fault_internal(
+ ppnum_t ppnum,
+ vm_prot_t allow_mode,
+ int options)
+{
+ if (__improbable((options & (PMAP_OPTIONS_FF_LOCKED | PMAP_OPTIONS_NOFLUSH)) != 0)) {
+ panic("arm_force_fast_fault(0x%x, 0x%x, 0x%x): invalid options", ppnum, allow_mode, options);
+ }
+ return arm_force_fast_fault_with_flush_range(ppnum, allow_mode, options, NULL);
+}
+
+/*
+ * Routine: arm_force_fast_fault
+ *
+ * Function:
+ * Force all mappings for this page to fault according
+ * to the access modes allowed, so we can gather ref/modify
+ * bits again.
+ */
+
boolean_t
arm_force_fast_fault(
ppnum_t ppnum,
return FALSE; /* Not a managed page. */
}
+#if XNU_MONITOR
+ return arm_force_fast_fault_ppl(ppnum, allow_mode, options);
+#else
return arm_force_fast_fault_internal(ppnum, allow_mode, options);
+#endif
}
/*
* Clear pending force fault for all mappings for this page based on
* the observed fault type, update ref/modify bits.
*/
-boolean_t
+MARK_AS_PMAP_TEXT static boolean_t
arm_clear_fast_fault(
ppnum_t ppnum,
vm_prot_t fault_type)
if (pmap == kernel_pmap) {
tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
} else {
+ assert(!pmap->nested); /* no write access in a nested pmap */
tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_rw(pmap_get_pt_attr(pmap)));
}
}
}
}
+#if MACH_ASSERT && XNU_MONITOR
+ if (is_pte_xprr_protected(pmap, spte)) {
+ if (pte_to_xprr_perm(spte) != pte_to_xprr_perm(tmplate)) {
+ panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
+ "ppnum=0x%x, fault_type=0x%x",
+ __FUNCTION__, pte_p, pmap, pv_h, pve_p, (unsigned long long)spte, (unsigned long long)tmplate, (unsigned long long)va,
+ ppnum, fault_type);
+ }
+ }
+#endif /* MACH_ASSERT && XNU_MONITOR */
if (spte != tmplate) {
if (spte != ARM_PTE_TYPE_FAULT) {
WRITE_PTE_STRONG(pte_p, tmplate);
- pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, PAGE_SIZE, pmap);
+ pmap_get_pt_ops(pmap)->flush_tlb_region_async(va,
+ pt_attr_page_size(pmap_get_pt_attr(pmap)) * PAGE_RATIO, pmap);
tlb_flush_needed = TRUE;
} else {
WRITE_PTE(pte_p, tmplate);
pmap_paddr_t pa;
VALIDATE_PMAP(pmap);
- PMAP_LOCK(pmap);
+ pmap_lock_ro(pmap);
/*
* If the entry doesn't exist, is completely invalid, or is already
ptep = pmap_pte(pmap, va);
if (ptep != PT_ENTRY_NULL) {
while (true) {
- spte = *ptep;
+ spte = *((volatile pt_entry_t*)ptep);
pa = pte_to_pa(spte);
if ((spte == ARM_PTE_TYPE_FAULT) ||
ARM_PTE_IS_COMPRESSED(spte, ptep)) {
- PMAP_UNLOCK(pmap);
+ pmap_unlock_ro(pmap);
return result;
}
if (!pa_valid(pa)) {
- PMAP_UNLOCK(pmap);
+ pmap_unlock_ro(pmap);
+#if XNU_MONITOR
+ if (pmap_cache_attributes((ppnum_t)atop(pa)) & PP_ATTR_MONITOR) {
+ return KERN_PROTECTION_FAILURE;
+ } else
+#endif
return result;
}
pai = (int)pa_index(pa);
break;
}
} else {
- PMAP_UNLOCK(pmap);
+ pmap_unlock_ro(pmap);
return result;
}
- if ((IS_REFFAULT_PAGE(pai)) ||
- ((fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai))) {
+ if ((result != KERN_SUCCESS) &&
+ ((IS_REFFAULT_PAGE(pai)) || ((fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai)))) {
/*
* An attempted access will always clear ref/mod fault state, as
* appropriate for the fault type. arm_clear_fast_fault will
}
}
+ /*
+ * If the PTE already has sufficient permissions, we can report the fault as handled.
+ * This may happen, for example, if multiple threads trigger roughly simultaneous faults
+ * on mappings of the same page
+ */
+ if ((result == KERN_FAILURE) && (spte & ARM_PTE_AF)) {
+ uintptr_t ap_ro, ap_rw, ap_x;
+ if (pmap == kernel_pmap) {
+ ap_ro = ARM_PTE_AP(AP_RONA);
+ ap_rw = ARM_PTE_AP(AP_RWNA);
+ ap_x = ARM_PTE_NX;
+ } else {
+ ap_ro = pt_attr_leaf_ro(pmap_get_pt_attr(pmap));
+ ap_rw = pt_attr_leaf_rw(pmap_get_pt_attr(pmap));
+ ap_x = pt_attr_leaf_x(pmap_get_pt_attr(pmap));
+ }
+ /*
+ * NOTE: this doesn't currently handle user-XO mappings. Depending upon the
+ * hardware they may be xPRR-protected, in which case they'll be handled
+ * by the is_pte_xprr_protected() case above. Additionally, the exception
+ * handling path currently does not call arm_fast_fault() without at least
+ * VM_PROT_READ in fault_type.
+ */
+ if (((spte & ARM_PTE_APMASK) == ap_rw) ||
+ (!(fault_type & VM_PROT_WRITE) && ((spte & ARM_PTE_APMASK) == ap_ro))) {
+ if (!(fault_type & VM_PROT_EXECUTE) || ((spte & ARM_PTE_XMASK) == ap_x)) {
+ result = KERN_SUCCESS;
+ }
+ }
+ }
+
UNLOCK_PVH(pai);
- PMAP_UNLOCK(pmap);
+ pmap_unlock_ro(pmap);
return result;
}
}
#endif
+#if XNU_MONITOR
+ result = arm_fast_fault_ppl(pmap, va, fault_type, was_af_fault, from_user);
+#else
result = arm_fast_fault_internal(pmap, va, fault_type, was_af_fault, from_user);
+#endif
#if (__ARM_VMSA__ == 7)
done:
bzero_phys((addr64_t) (ptoa(pn) + offset), len);
}
-
-/*
- * nop in current arm implementation
- */
-void
-inval_copy_windows(
- __unused thread_t t)
-{
-}
-
void
pmap_map_globals(
void)
vm_offset_t cpu_copywindow_vaddr = 0;
bool need_strong_sync = false;
+#if XNU_MONITOR
+ unsigned int cacheattr = (!pa_valid(ptoa(pn)) ? pmap_cache_attributes(pn) : 0);
+ need_strong_sync = ((cacheattr & PMAP_IO_RANGE_STRONG_SYNC) != 0);
+#endif
+
+#if XNU_MONITOR
+#ifdef __ARM_COHERENT_IO__
+ if (__improbable(pa_valid(ptoa(pn)) && !pmap_ppl_disable)) {
+ panic("%s: attempted to map a managed page, "
+ "pn=%u, prot=0x%x, wimg_bits=0x%x",
+ __FUNCTION__,
+ pn, prot, wimg_bits);
+ }
+ if (__improbable((cacheattr & PP_ATTR_MONITOR) && (prot != VM_PROT_READ) && !pmap_ppl_disable)) {
+ panic("%s: attempt to map PPL-protected I/O address 0x%llx as writable", __func__, (uint64_t)ptoa(pn));
+ }
+#else /* __ARM_COHERENT_IO__ */
+#error CPU copy windows are not properly supported with both the PPL and incoherent IO
+#endif /* __ARM_COHERENT_IO__ */
+#endif /* XNU_MONITOR */
cpu_num = pmap_cpu_data->cpu_number;
for (i = 0; i < CPUWINDOWS_MAX; i++) {
vm_prot_t prot,
unsigned int wimg_bits)
{
+#if XNU_MONITOR
+ return pmap_map_cpu_windows_copy_ppl(pn, prot, wimg_bits);
+#else
return pmap_map_cpu_windows_copy_internal(pn, prot, wimg_bits);
+#endif
+}
+
+MARK_AS_PMAP_TEXT static void
+pmap_unmap_cpu_windows_copy_internal(
+ unsigned int index)
+{
+ pt_entry_t *ptep;
+ unsigned int cpu_num;
+ vm_offset_t cpu_copywindow_vaddr = 0;
+ pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
+
+ cpu_num = pmap_cpu_data->cpu_number;
+
+ cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, index);
+ /* Issue full-system DSB to ensure prior operations on the per-CPU window
+ * (which are likely to have been on I/O memory) are complete before
+ * tearing down the mapping. */
+ __builtin_arm_dsb(DSB_SY);
+ ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
+ WRITE_PTE_STRONG(ptep, ARM_PTE_TYPE_FAULT);
+ PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE, pmap_cpu_data->copywindow_strong_sync[index]);
+}
+
+void
+pmap_unmap_cpu_windows_copy(
+ unsigned int index)
+{
+#if XNU_MONITOR
+ return pmap_unmap_cpu_windows_copy_ppl(index);
+#else
+ return pmap_unmap_cpu_windows_copy_internal(index);
+#endif
+}
+
+#if XNU_MONITOR
+
+MARK_AS_PMAP_TEXT void
+pmap_invoke_with_page(
+ ppnum_t page_number,
+ void *ctx,
+ void (*callback)(void *ctx, ppnum_t page_number, const void *page))
+{
+ #pragma unused(page_number, ctx, callback)
+}
+
+/*
+ * Loop over every pmap_io_range (I/O ranges marked as owned by
+ * the PPL in the device tree) and conditionally call callback() on each range
+ * that needs to be included in the hibernation image.
+ *
+ * @param ctx Will be passed as-is into the callback method. Use NULL if no
+ * context is needed in the callback.
+ * @param callback Callback function invoked on each range (gated by flag).
+ */
+MARK_AS_PMAP_TEXT void
+pmap_hibernate_invoke(void *ctx, void (*callback)(void *ctx, uint64_t addr, uint64_t len))
+{
+ for (unsigned int i = 0; i < num_io_rgns; ++i) {
+ if (io_attr_table[i].wimg & PMAP_IO_RANGE_NEEDS_HIBERNATING) {
+ callback(ctx, io_attr_table[i].addr, io_attr_table[i].len);
+ }
+ }
+}
+
+/**
+ * Set the HASHED pv_head_table flag for the passed in physical page if it's a
+ * PPL-owned page. Otherwise, do nothing.
+ *
+ * @param addr Physical address of the page to set the HASHED flag on.
+ */
+MARK_AS_PMAP_TEXT void
+pmap_set_ppl_hashed_flag(const pmap_paddr_t addr)
+{
+ /* Ignore non-managed kernel memory. */
+ if (!pa_valid(addr)) {
+ return;
+ }
+
+ const int pai = (int)pa_index(addr);
+ if (pp_attr_table[pai] & PP_ATTR_MONITOR) {
+ pv_entry_t **pv_h = pai_to_pvh(pai);
+
+ /* Mark that the PPL-owned page has been hashed into the hibernation image. */
+ LOCK_PVH(pai);
+ pvh_set_flags(pv_h, pvh_get_flags(pv_h) | PVH_FLAG_HASHED);
+ UNLOCK_PVH(pai);
+ }
+}
+
+/**
+ * Loop through every physical page in the system and clear out the HASHED flag
+ * on every PPL-owned page. That flag is used to keep track of which pages have
+ * been hashed into the hibernation image during the hibernation entry process.
+ *
+ * The HASHED flag needs to be cleared out between hibernation cycles because the
+ * pv_head_table and pp_attr_table's might have been copied into the hibernation
+ * image with the HASHED flag set on certain pages. It's important to clear the
+ * HASHED flag to ensure that the enforcement of all PPL-owned memory being hashed
+ * into the hibernation image can't be compromised across hibernation cycles.
+ */
+MARK_AS_PMAP_TEXT void
+pmap_clear_ppl_hashed_flag_all(void)
+{
+ const int last_index = (int)pa_index(vm_last_phys);
+ pv_entry_t **pv_h = NULL;
+
+ for (int pai = 0; pai < last_index; ++pai) {
+ pv_h = pai_to_pvh(pai);
+
+ /* Test for PPL-owned pages that have the HASHED flag set in its pv_head_table entry. */
+ if ((pvh_get_flags(pv_h) & PVH_FLAG_HASHED) &&
+ (pp_attr_table[pai] & PP_ATTR_MONITOR)) {
+ LOCK_PVH(pai);
+ pvh_set_flags(pv_h, pvh_get_flags(pv_h) & ~PVH_FLAG_HASHED);
+ UNLOCK_PVH(pai);
+ }
+ }
}
-MARK_AS_PMAP_TEXT static void
-pmap_unmap_cpu_windows_copy_internal(
- unsigned int index)
+/**
+ * Enforce that all PPL-owned pages were hashed into the hibernation image. The
+ * ppl_hib driver will call this after all wired pages have been copied into the
+ * hibernation image.
+ */
+MARK_AS_PMAP_TEXT void
+pmap_check_ppl_hashed_flag_all(void)
{
- pt_entry_t *ptep;
- unsigned int cpu_num;
- vm_offset_t cpu_copywindow_vaddr = 0;
- pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
+ const int last_index = (int)pa_index(vm_last_phys);
+ pv_entry_t **pv_h = NULL;
- cpu_num = pmap_cpu_data->cpu_number;
+ for (int pai = 0; pai < last_index; ++pai) {
+ pv_h = pai_to_pvh(pai);
- cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, index);
- /* Issue full-system DSB to ensure prior operations on the per-CPU window
- * (which are likely to have been on I/O memory) are complete before
- * tearing down the mapping. */
- __builtin_arm_dsb(DSB_SY);
- ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
- WRITE_PTE_STRONG(ptep, ARM_PTE_TYPE_FAULT);
- PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE, pmap_cpu_data->copywindow_strong_sync[index]);
-}
+ /**
+ * The PMAP stacks are explicitly not saved into the image so skip checking
+ * the pages that contain the PMAP stacks.
+ */
+ const bool is_pmap_stack = (pai >= (int)pa_index(pmap_stacks_start_pa)) &&
+ (pai < (int)pa_index(pmap_stacks_end_pa));
-void
-pmap_unmap_cpu_windows_copy(
- unsigned int index)
-{
- return pmap_unmap_cpu_windows_copy_internal(index);
+ if (!is_pmap_stack &&
+ (pp_attr_table[pai] & PP_ATTR_MONITOR) &&
+ !(pvh_get_flags(pv_h) & PVH_FLAG_HASHED)) {
+ panic("Found PPL-owned page that was not hashed into the hibernation image: pai %d", pai);
+ }
+ }
}
+#endif /* XNU_MONITOR */
+
/*
* Indicate that a pmap is intended to be used as a nested pmap
* within one or more larger address spaces. This must be set
{
VALIDATE_PMAP(pmap);
pmap->nested = TRUE;
+ pmap_get_pt_ops(pmap)->free_id(pmap);
}
void
pmap_set_nested(
pmap_t pmap)
{
+#if XNU_MONITOR
+ pmap_set_nested_ppl(pmap);
+#else
pmap_set_nested_internal(pmap);
+#endif
}
/*
pmap, (void*)start, (void*)end);
}
- nested_region_start = pmap->nested ? pmap->nested_region_subord_addr : pmap->nested_region_subord_addr;
+ nested_region_start = pmap->nested_region_addr;
nested_region_end = nested_region_start + pmap->nested_region_size;
if (__improbable((start < nested_region_start) || (end > nested_region_end))) {
adjust_offmask = pt_attr_leaf_table_offmask(pt_attr);
adjusted_start = ((start + adjust_offmask) & ~adjust_offmask);
adjusted_end = end & ~adjust_offmask;
- bool modified = false;
/* Iterate over the range, trying to remove TTEs. */
for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += pt_attr_twig_size(pt_attr)) {
- PMAP_LOCK(pmap);
+ pmap_lock(pmap);
tte_p = pmap_tte(pmap, cur);
if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
pte_p = (pt_entry_t *) ttetokv(*tte_p);
- if ((ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt == 0) &&
+ if ((ptep_get_info(pte_p)->refcnt == 0) &&
(pmap != kernel_pmap)) {
if (pmap->nested == TRUE) {
/* Deallocate for the nested map. */
- pmap_tte_deallocate(pmap, tte_p, pt_attr_twig_level(pt_attr));
+ pmap_tte_deallocate(pmap, cur, cur + PAGE_SIZE, false, tte_p, pt_attr_twig_level(pt_attr));
} else {
/* Just remove for the parent map. */
- pmap_tte_remove(pmap, tte_p, pt_attr_twig_level(pt_attr));
+ pmap_tte_remove(pmap, cur, cur + PAGE_SIZE, false, tte_p, pt_attr_twig_level(pt_attr));
}
-
- pmap_get_pt_ops(pmap)->flush_tlb_tte_async(cur, pmap);
- modified = true;
}
}
done:
- PMAP_UNLOCK(pmap);
- }
-
- if (modified) {
- sync_tlb_flush();
+ pmap_unlock(pmap);
}
#if (__ARM_VMSA__ > 7)
/* Remove empty L2 TTs. */
- adjusted_start = ((start + ARM_TT_L1_OFFMASK) & ~ARM_TT_L1_OFFMASK);
- adjusted_end = end & ~ARM_TT_L1_OFFMASK;
+ adjusted_start = ((start + pt_attr_ln_offmask(pt_attr, PMAP_TT_L1_LEVEL)) & ~pt_attr_ln_offmask(pt_attr, PMAP_TT_L1_LEVEL));
+ adjusted_end = end & ~pt_attr_ln_offmask(pt_attr, PMAP_TT_L1_LEVEL);
- for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += ARM_TT_L1_SIZE) {
+ for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += pt_attr_ln_size(pt_attr, PMAP_TT_L1_LEVEL)) {
/* For each L1 entry in our range... */
- PMAP_LOCK(pmap);
+ pmap_lock(pmap);
bool remove_tt1e = true;
tt_entry_t * tt1e_p = pmap_tt1e(pmap, cur);
tt_entry_t tt1e;
if (tt1e_p == NULL) {
- PMAP_UNLOCK(pmap);
+ pmap_unlock(pmap);
continue;
}
tt1e = *tt1e_p;
if (tt1e == ARM_TTE_TYPE_FAULT) {
- PMAP_UNLOCK(pmap);
+ pmap_unlock(pmap);
continue;
}
tt2e_start = &((tt_entry_t*) phystokv(tt1e & ARM_TTE_TABLE_MASK))[0];
- tt2e_end = &tt2e_start[TTE_PGENTRIES];
+ tt2e_end = &tt2e_start[pt_attr_page_size(pt_attr) / sizeof(*tt2e_start)];
for (tt2e_p = tt2e_start; tt2e_p < tt2e_end; tt2e_p++) {
if (*tt2e_p != ARM_TTE_TYPE_FAULT) {
}
if (remove_tt1e) {
- pmap_tte_deallocate(pmap, tt1e_p, PMAP_TT_L1_LEVEL);
- PMAP_UPDATE_TLBS(pmap, cur, cur + PAGE_SIZE, false);
+ pmap_tte_deallocate(pmap, cur, cur + PAGE_SIZE, false, tt1e_p, PMAP_TT_L1_LEVEL);
}
- PMAP_UNLOCK(pmap);
+ pmap_unlock(pmap);
}
#endif /* (__ARM_VMSA__ > 7) */
}
/*
- * pmap_trim_internal(grand, subord, vstart, nstart, size)
+ * pmap_trim_internal(grand, subord, vstart, size)
*
* grand = pmap subord is nested in
* subord = nested pmap
* vstart = start of the used range in grand
- * nstart = start of the used range in nstart
* size = size of the used range
*
* Attempts to trim the shared region page tables down to only cover the given
pmap_t grand,
pmap_t subord,
addr64_t vstart,
- addr64_t nstart,
uint64_t size)
{
- addr64_t vend, nend;
+ addr64_t vend;
addr64_t adjust_offmask;
if (__improbable(os_add_overflow(vstart, size, &vend))) {
panic("%s: grand addr wraps around, "
- "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
- __func__, grand, subord, (void*)vstart, (void*)nstart, size);
- }
-
- if (__improbable(os_add_overflow(nstart, size, &nend))) {
- panic("%s: nested addr wraps around, "
- "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
- __func__, grand, subord, (void*)vstart, (void*)nstart, size);
+ "grand=%p, subord=%p, vstart=%p, size=%#llx",
+ __func__, grand, subord, (void*)vstart, size);
}
VALIDATE_PMAP(grand);
__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
- PMAP_LOCK(subord);
+ pmap_lock(subord);
- if (!subord->nested) {
+ if (__improbable(!subord->nested)) {
panic("%s: subord is not nestable, "
- "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
- __func__, grand, subord, (void*)vstart, (void*)nstart, size);
+ "grand=%p, subord=%p, vstart=%p, size=%#llx",
+ __func__, grand, subord, (void*)vstart, size);
}
- if (grand->nested) {
+ if (__improbable(grand->nested)) {
panic("%s: grand is nestable, "
- "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
- __func__, grand, subord, (void*)vstart, (void*)nstart, size);
+ "grand=%p, subord=%p, vstart=%p, size=%#llx",
+ __func__, grand, subord, (void*)vstart, size);
}
- if (grand->nested_pmap != subord) {
+ if (__improbable(grand->nested_pmap != subord)) {
panic("%s: grand->nested != subord, "
- "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
- __func__, grand, subord, (void*)vstart, (void*)nstart, size);
+ "grand=%p, subord=%p, vstart=%p, size=%#llx",
+ __func__, grand, subord, (void*)vstart, size);
}
- if (size != 0) {
- if ((vstart < grand->nested_region_grand_addr) || (vend > (grand->nested_region_grand_addr + grand->nested_region_size))) {
- panic("%s: grand range not in nested region, "
- "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
- __func__, grand, subord, (void*)vstart, (void*)nstart, size);
- }
-
- if ((nstart < grand->nested_region_grand_addr) || (nend > (grand->nested_region_grand_addr + grand->nested_region_size))) {
- panic("%s: subord range not in nested region, "
- "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
- __func__, grand, subord, (void*)vstart, (void*)nstart, size);
- }
+ if (__improbable((size != 0) &&
+ ((vstart < grand->nested_region_addr) || (vend > (grand->nested_region_addr + grand->nested_region_size))))) {
+ panic("%s: grand range not in nested region, "
+ "grand=%p, subord=%p, vstart=%p, size=%#llx",
+ __func__, grand, subord, (void*)vstart, size);
}
if (!grand->nested_bounds_set) {
/* Inherit the bounds from subord. */
- grand->nested_region_true_start = (subord->nested_region_true_start - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
- grand->nested_region_true_end = (subord->nested_region_true_end - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
+ grand->nested_region_true_start = subord->nested_region_true_start;
+ grand->nested_region_true_end = subord->nested_region_true_end;
grand->nested_bounds_set = true;
}
- PMAP_UNLOCK(subord);
+ pmap_unlock(subord);
return;
}
if ((!subord->nested_bounds_set) && size) {
adjust_offmask = pt_attr_leaf_table_offmask(pt_attr);
- subord->nested_region_true_start = nstart;
- subord->nested_region_true_end = nend;
+ subord->nested_region_true_start = vstart;
+ subord->nested_region_true_end = vend;
subord->nested_region_true_start &= ~adjust_offmask;
if (__improbable(os_add_overflow(subord->nested_region_true_end, adjust_offmask, &subord->nested_region_true_end))) {
panic("%s: padded true end wraps around, "
- "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
- __func__, grand, subord, (void*)vstart, (void*)nstart, size);
+ "grand=%p, subord=%p, vstart=%p, size=%#llx",
+ __func__, grand, subord, (void*)vstart, size);
}
subord->nested_region_true_end &= ~adjust_offmask;
if (subord->nested_bounds_set) {
/* Inherit the bounds from subord. */
- grand->nested_region_true_start = (subord->nested_region_true_start - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
- grand->nested_region_true_end = (subord->nested_region_true_end - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
+ grand->nested_region_true_start = subord->nested_region_true_start;
+ grand->nested_region_true_end = subord->nested_region_true_end;
grand->nested_bounds_set = true;
/* If we know the bounds, we can trim the pmap. */
grand->nested_has_no_bounds_ref = false;
- PMAP_UNLOCK(subord);
+ pmap_unlock(subord);
} else {
/* Don't trim if we don't know the bounds. */
- PMAP_UNLOCK(subord);
+ pmap_unlock(subord);
return;
}
/* Trim grand to only cover the given range. */
- pmap_trim_range(grand, grand->nested_region_grand_addr, grand->nested_region_true_start);
- pmap_trim_range(grand, grand->nested_region_true_end, (grand->nested_region_grand_addr + grand->nested_region_size));
+ pmap_trim_range(grand, grand->nested_region_addr, grand->nested_region_true_start);
+ pmap_trim_range(grand, grand->nested_region_true_end, (grand->nested_region_addr + grand->nested_region_size));
/* Try to trim subord. */
pmap_trim_subord(subord);
{
if (pmap->nested_has_no_bounds_ref && pmap->nested_pmap) {
/* If we have a no bounds ref, we need to drop it. */
- PMAP_LOCK(pmap->nested_pmap);
+ pmap_lock_ro(pmap->nested_pmap);
pmap->nested_has_no_bounds_ref = false;
boolean_t nested_bounds_set = pmap->nested_pmap->nested_bounds_set;
- vm_map_offset_t nested_region_true_start = (pmap->nested_pmap->nested_region_true_start - pmap->nested_region_subord_addr) + pmap->nested_region_grand_addr;
- vm_map_offset_t nested_region_true_end = (pmap->nested_pmap->nested_region_true_end - pmap->nested_region_subord_addr) + pmap->nested_region_grand_addr;
- PMAP_UNLOCK(pmap->nested_pmap);
+ vm_map_offset_t nested_region_true_start = pmap->nested_pmap->nested_region_true_start;
+ vm_map_offset_t nested_region_true_end = pmap->nested_pmap->nested_region_true_end;
+ pmap_unlock_ro(pmap->nested_pmap);
if (nested_bounds_set) {
- pmap_trim_range(pmap, pmap->nested_region_grand_addr, nested_region_true_start);
- pmap_trim_range(pmap, nested_region_true_end, (pmap->nested_region_grand_addr + pmap->nested_region_size));
+ pmap_trim_range(pmap, pmap->nested_region_addr, nested_region_true_start);
+ pmap_trim_range(pmap, nested_region_true_end, (pmap->nested_region_addr + pmap->nested_region_size));
}
/*
* Try trimming the nested pmap, in case we had the
{
bool contract_subord = false;
- PMAP_LOCK(subord);
+ pmap_lock(subord);
subord->nested_no_bounds_refcnt--;
contract_subord = true;
}
- PMAP_UNLOCK(subord);
+ pmap_unlock(subord);
if (contract_subord) {
- pmap_trim_range(subord, subord->nested_region_subord_addr, subord->nested_region_true_start);
- pmap_trim_range(subord, subord->nested_region_true_end, subord->nested_region_subord_addr + subord->nested_region_size);
+ pmap_trim_range(subord, subord->nested_region_addr, subord->nested_region_true_start);
+ pmap_trim_range(subord, subord->nested_region_true_end, subord->nested_region_addr + subord->nested_region_size);
}
}
pmap_t grand,
pmap_t subord,
addr64_t vstart,
- addr64_t nstart,
uint64_t size)
{
- pmap_trim_internal(grand, subord, vstart, nstart, size);
+#if XNU_MONITOR
+ pmap_trim_ppl(grand, subord, vstart, size);
+
+ pmap_ledger_check_balance(grand);
+ pmap_ledger_check_balance(subord);
+#else
+ pmap_trim_internal(grand, subord, vstart, size);
+#endif
+}
+
+#if HAS_APPLE_PAC
+static void *
+pmap_sign_user_ptr_internal(void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key)
+{
+ void *res = NULL;
+ uint64_t current_intr_state = pmap_interrupts_disable();
+
+ uint64_t saved_jop_state = ml_enable_user_jop_key(jop_key);
+ switch (key) {
+ case ptrauth_key_asia:
+ res = ptrauth_sign_unauthenticated(value, ptrauth_key_asia, discriminator);
+ break;
+ case ptrauth_key_asda:
+ res = ptrauth_sign_unauthenticated(value, ptrauth_key_asda, discriminator);
+ break;
+ default:
+ panic("attempt to sign user pointer without process independent key");
+ }
+ ml_disable_user_jop_key(jop_key, saved_jop_state);
+
+ pmap_interrupts_restore(current_intr_state);
+
+ return res;
+}
+
+void *
+pmap_sign_user_ptr(void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key)
+{
+ return pmap_sign_user_ptr_internal(value, key, discriminator, jop_key);
+}
+
+static void *
+pmap_auth_user_ptr_internal(void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key)
+{
+ if ((key != ptrauth_key_asia) && (key != ptrauth_key_asda)) {
+ panic("attempt to auth user pointer without process independent key");
+ }
+
+ void *res = NULL;
+ uint64_t current_intr_state = pmap_interrupts_disable();
+
+ uint64_t saved_jop_state = ml_enable_user_jop_key(jop_key);
+ res = ml_auth_ptr_unchecked(value, key, discriminator);
+ ml_disable_user_jop_key(jop_key, saved_jop_state);
+
+ pmap_interrupts_restore(current_intr_state);
+
+ return res;
}
+void *
+pmap_auth_user_ptr(void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key)
+{
+ return pmap_auth_user_ptr_internal(value, key, discriminator, jop_key);
+}
+#endif /* HAS_APPLE_PAC */
/*
* kern_return_t pmap_nest(grand, subord, vstart, size)
* grand = the pmap that we will nest subord into
* subord = the pmap that goes into the grand
* vstart = start of range in pmap to be inserted
- * nstart = start of range in pmap nested pmap
* size = Size of nest area (up to 16TB)
*
* Inserts a pmap into another. This is used to implement shared segments.
pmap_t grand,
pmap_t subord,
addr64_t vstart,
- addr64_t nstart,
uint64_t size)
{
kern_return_t kr = KERN_FAILURE;
- vm_map_offset_t vaddr, nvaddr;
+ vm_map_offset_t vaddr;
tt_entry_t *stte_p;
tt_entry_t *gtte_p;
unsigned int i;
unsigned int num_tte;
unsigned int nested_region_asid_bitmap_size;
unsigned int* nested_region_asid_bitmap;
- int expand_options = 0;
+ int expand_options = 0;
+ bool deref_subord = true;
+ pmap_t __ptrauth_only subord_addr;
- addr64_t vend, nend;
+ addr64_t vend;
if (__improbable(os_add_overflow(vstart, size, &vend))) {
panic("%s: %p grand addr wraps around: 0x%llx + 0x%llx", __func__, grand, vstart, size);
}
- if (__improbable(os_add_overflow(nstart, size, &nend))) {
- panic("%s: %p nested addr wraps around: 0x%llx + 0x%llx", __func__, subord, nstart, size);
- }
VALIDATE_PMAP(grand);
- VALIDATE_PMAP(subord);
+ pmap_reference_internal(subord); // This call will also validate subord
__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
assert(pmap_get_pt_attr(subord) == pt_attr);
+#if XNU_MONITOR
+ expand_options |= PMAP_TT_ALLOCATE_NOWAIT;
+#endif
- if (((size | vstart | nstart) & (pt_attr_leaf_table_offmask(pt_attr))) != 0x0ULL) {
- panic("pmap_nest() pmap %p unaligned nesting request 0x%llx, 0x%llx, 0x%llx\n", grand, vstart, nstart, size);
+ if (__improbable(((size | vstart) & (pt_attr_leaf_table_offmask(pt_attr))) != 0x0ULL)) {
+ panic("pmap_nest() pmap %p unaligned nesting request 0x%llx, 0x%llx\n", grand, vstart, size);
}
- if (!subord->nested) {
+ if (__improbable(!subord->nested)) {
panic("%s: subordinate pmap %p is not nestable", __func__, subord);
}
- if ((grand->nested_pmap != PMAP_NULL) && (grand->nested_pmap != subord)) {
- panic("pmap_nest() pmap %p has a nested pmap\n", grand);
- }
-
if (subord->nested_region_asid_bitmap == NULL) {
nested_region_asid_bitmap_size = (unsigned int)(size >> pt_attr_twig_shift(pt_attr)) / (sizeof(unsigned int) * NBBY);
- nested_region_asid_bitmap = kalloc(nested_region_asid_bitmap_size * sizeof(unsigned int));
- bzero(nested_region_asid_bitmap, nested_region_asid_bitmap_size * sizeof(unsigned int));
+#if XNU_MONITOR
+ pmap_paddr_t pa = 0;
+
+ if (__improbable((nested_region_asid_bitmap_size * sizeof(unsigned int)) > PAGE_SIZE)) {
+ panic("%s: nested_region_asid_bitmap_size=%u will not fit in a page, "
+ "grand=%p, subord=%p, vstart=0x%llx, size=%llx",
+ __FUNCTION__, nested_region_asid_bitmap_size,
+ grand, subord, vstart, size);
+ }
+
+ kr = pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
+
+ if (kr != KERN_SUCCESS) {
+ goto nest_cleanup;
+ }
+
+ assert(pa);
+
+ nested_region_asid_bitmap = (unsigned int *)phystokv(pa);
+#else
+ nested_region_asid_bitmap = kheap_alloc(KHEAP_DATA_BUFFERS,
+ nested_region_asid_bitmap_size * sizeof(unsigned int),
+ Z_WAITOK | Z_ZERO);
+#endif
- PMAP_LOCK(subord);
+ pmap_lock(subord);
if (subord->nested_region_asid_bitmap == NULL) {
- subord->nested_region_asid_bitmap = nested_region_asid_bitmap;
subord->nested_region_asid_bitmap_size = nested_region_asid_bitmap_size;
- subord->nested_region_subord_addr = nstart;
+ subord->nested_region_addr = vstart;
subord->nested_region_size = (mach_vm_offset_t) size;
+
+ /**
+ * Ensure that the rest of the subord->nested_region_* fields are
+ * initialized and visible before setting the nested_region_asid_bitmap
+ * field (which is used as the flag to say that the rest are initialized).
+ */
+ __builtin_arm_dmb(DMB_ISHST);
+ subord->nested_region_asid_bitmap = nested_region_asid_bitmap;
nested_region_asid_bitmap = NULL;
}
- PMAP_UNLOCK(subord);
+ pmap_unlock(subord);
if (nested_region_asid_bitmap != NULL) {
- kfree(nested_region_asid_bitmap, nested_region_asid_bitmap_size * sizeof(unsigned int));
+#if XNU_MONITOR
+ pmap_pages_free(kvtophys((vm_offset_t)nested_region_asid_bitmap), PAGE_SIZE);
+#else
+ kheap_free(KHEAP_DATA_BUFFERS, nested_region_asid_bitmap,
+ nested_region_asid_bitmap_size * sizeof(unsigned int));
+#endif
}
}
- if ((subord->nested_region_subord_addr + subord->nested_region_size) < nend) {
+
+ /**
+ * Ensure subsequent reads of the subord->nested_region_* fields don't get
+ * speculated before their initialization.
+ */
+ __builtin_arm_dmb(DMB_ISHLD);
+
+ if ((subord->nested_region_addr + subord->nested_region_size) < vend) {
uint64_t new_size;
unsigned int new_nested_region_asid_bitmap_size;
unsigned int* new_nested_region_asid_bitmap;
nested_region_asid_bitmap = NULL;
nested_region_asid_bitmap_size = 0;
- new_size = nend - subord->nested_region_subord_addr;
+ new_size = vend - subord->nested_region_addr;
/* We explicitly add 1 to the bitmap allocation size in order to avoid issues with truncation. */
new_nested_region_asid_bitmap_size = (unsigned int)((new_size >> pt_attr_twig_shift(pt_attr)) / (sizeof(unsigned int) * NBBY)) + 1;
- new_nested_region_asid_bitmap = kalloc(new_nested_region_asid_bitmap_size * sizeof(unsigned int));
- PMAP_LOCK(subord);
+#if XNU_MONITOR
+ pmap_paddr_t pa = 0;
+
+ if (__improbable((new_nested_region_asid_bitmap_size * sizeof(unsigned int)) > PAGE_SIZE)) {
+ panic("%s: new_nested_region_asid_bitmap_size=%u will not fit in a page, "
+ "grand=%p, subord=%p, vstart=0x%llx, new_size=%llx",
+ __FUNCTION__, new_nested_region_asid_bitmap_size,
+ grand, subord, vstart, new_size);
+ }
+
+ kr = pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
+
+ if (kr != KERN_SUCCESS) {
+ goto nest_cleanup;
+ }
+
+ assert(pa);
+
+ new_nested_region_asid_bitmap = (unsigned int *)phystokv(pa);
+#else
+ new_nested_region_asid_bitmap = kheap_alloc(KHEAP_DATA_BUFFERS,
+ new_nested_region_asid_bitmap_size * sizeof(unsigned int),
+ Z_WAITOK | Z_ZERO);
+#endif
+ pmap_lock(subord);
if (subord->nested_region_size < new_size) {
- bzero(new_nested_region_asid_bitmap, new_nested_region_asid_bitmap_size * sizeof(unsigned int));
- bcopy(subord->nested_region_asid_bitmap, new_nested_region_asid_bitmap, subord->nested_region_asid_bitmap_size);
+ bcopy(subord->nested_region_asid_bitmap,
+ new_nested_region_asid_bitmap, subord->nested_region_asid_bitmap_size);
nested_region_asid_bitmap_size = subord->nested_region_asid_bitmap_size;
nested_region_asid_bitmap = subord->nested_region_asid_bitmap;
subord->nested_region_asid_bitmap = new_nested_region_asid_bitmap;
subord->nested_region_size = new_size;
new_nested_region_asid_bitmap = NULL;
}
- PMAP_UNLOCK(subord);
- if (nested_region_asid_bitmap != NULL)
- { kfree(nested_region_asid_bitmap, nested_region_asid_bitmap_size * sizeof(unsigned int));}
- if (new_nested_region_asid_bitmap != NULL)
- { kfree(new_nested_region_asid_bitmap, new_nested_region_asid_bitmap_size * sizeof(unsigned int));}
+ pmap_unlock(subord);
+ if (nested_region_asid_bitmap != NULL) {
+#if XNU_MONITOR
+ pmap_pages_free(kvtophys((vm_offset_t)nested_region_asid_bitmap), PAGE_SIZE);
+#else
+ kheap_free(KHEAP_DATA_BUFFERS, nested_region_asid_bitmap,
+ nested_region_asid_bitmap_size * sizeof(unsigned int));
+#endif
+ }
+ if (new_nested_region_asid_bitmap != NULL) {
+#if XNU_MONITOR
+ pmap_pages_free(kvtophys((vm_offset_t)new_nested_region_asid_bitmap), PAGE_SIZE);
+#else
+ kheap_free(KHEAP_DATA_BUFFERS, new_nested_region_asid_bitmap,
+ new_nested_region_asid_bitmap_size * sizeof(unsigned int));
+#endif
+ }
}
- PMAP_LOCK(subord);
- if (grand->nested_pmap == PMAP_NULL) {
- grand->nested_pmap = subord;
+ pmap_lock(subord);
+
+#if __has_feature(ptrauth_calls)
+ subord_addr = ptrauth_sign_unauthenticated(subord,
+ ptrauth_key_process_independent_data,
+ ptrauth_blend_discriminator(&grand->nested_pmap, ptrauth_string_discriminator("pmap.nested_pmap")));
+#else
+ subord_addr = subord;
+#endif // __has_feature(ptrauth_calls)
+
+ if (os_atomic_cmpxchg(&grand->nested_pmap, PMAP_NULL, subord_addr, relaxed)) {
+ /*
+ * If this is grand's first nesting operation, keep the reference on subord.
+ * It will be released by pmap_destroy_internal() when grand is destroyed.
+ */
+ deref_subord = false;
if (!subord->nested_bounds_set) {
/*
subord->nested_no_bounds_refcnt++;
}
- grand->nested_region_grand_addr = vstart;
- grand->nested_region_subord_addr = nstart;
+ grand->nested_region_addr = vstart;
grand->nested_region_size = (mach_vm_offset_t) size;
} else {
- if ((grand->nested_region_grand_addr > vstart)) {
+ if (__improbable(grand->nested_pmap != subord)) {
+ panic("pmap_nest() pmap %p has a nested pmap\n", grand);
+ } else if (__improbable(grand->nested_region_addr > vstart)) {
panic("pmap_nest() pmap %p : attempt to nest outside the nested region\n", grand);
- } else if ((grand->nested_region_grand_addr + grand->nested_region_size) < vend) {
- grand->nested_region_size = (mach_vm_offset_t)(vstart - grand->nested_region_grand_addr + size);
+ } else if ((grand->nested_region_addr + grand->nested_region_size) < vend) {
+ grand->nested_region_size = (mach_vm_offset_t)(vstart - grand->nested_region_addr + size);
}
}
#if (__ARM_VMSA__ == 7)
- nvaddr = (vm_map_offset_t) nstart;
vaddr = (vm_map_offset_t) vstart;
num_tte = size >> ARM_TT_L1_SHIFT;
for (i = 0; i < num_tte; i++) {
- if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
+ if (((subord->nested_region_true_start) > vaddr) || ((subord->nested_region_true_end) <= vaddr)) {
goto expand_next;
}
- stte_p = pmap_tte(subord, nvaddr);
+ stte_p = pmap_tte(subord, vaddr);
if ((stte_p == (tt_entry_t *)NULL) || (((*stte_p) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE)) {
- PMAP_UNLOCK(subord);
- kr = pmap_expand(subord, nvaddr, expand_options, PMAP_TT_L2_LEVEL);
+ pmap_unlock(subord);
+ kr = pmap_expand(subord, vaddr, expand_options, PMAP_TT_L2_LEVEL);
if (kr != KERN_SUCCESS) {
- PMAP_LOCK(grand);
+ pmap_lock(grand);
goto done;
}
- PMAP_LOCK(subord);
+ pmap_lock(subord);
}
- PMAP_UNLOCK(subord);
- PMAP_LOCK(grand);
+ pmap_unlock(subord);
+ pmap_lock(grand);
stte_p = pmap_tte(grand, vaddr);
if (stte_p == (tt_entry_t *)NULL) {
- PMAP_UNLOCK(grand);
+ pmap_unlock(grand);
kr = pmap_expand(grand, vaddr, expand_options, PMAP_TT_L1_LEVEL);
if (kr != KERN_SUCCESS) {
- PMAP_LOCK(grand);
+ pmap_lock(grand);
goto done;
}
} else {
- PMAP_UNLOCK(grand);
+ pmap_unlock(grand);
kr = KERN_SUCCESS;
}
- PMAP_LOCK(subord);
+ pmap_lock(subord);
expand_next:
- nvaddr += ARM_TT_L1_SIZE;
vaddr += ARM_TT_L1_SIZE;
}
#else
- nvaddr = (vm_map_offset_t) nstart;
+ vaddr = (vm_map_offset_t) vstart;
num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
for (i = 0; i < num_tte; i++) {
- if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
+ if (((subord->nested_region_true_start) > vaddr) || ((subord->nested_region_true_end) <= vaddr)) {
goto expand_next;
}
- stte_p = pmap_tte(subord, nvaddr);
+ stte_p = pmap_tte(subord, vaddr);
if (stte_p == PT_ENTRY_NULL || *stte_p == ARM_TTE_EMPTY) {
- PMAP_UNLOCK(subord);
- kr = pmap_expand(subord, nvaddr, expand_options, PMAP_TT_LEAF_LEVEL);
+ pmap_unlock(subord);
+ kr = pmap_expand(subord, vaddr, expand_options, pt_attr_leaf_level(pt_attr));
if (kr != KERN_SUCCESS) {
- PMAP_LOCK(grand);
+ pmap_lock(grand);
goto done;
}
- PMAP_LOCK(subord);
+ pmap_lock(subord);
}
expand_next:
- nvaddr += pt_attr_twig_size(pt_attr);
+ vaddr += pt_attr_twig_size(pt_attr);
}
#endif
- PMAP_UNLOCK(subord);
+ pmap_unlock(subord);
/*
* copy tte's from subord pmap into grand pmap
*/
- PMAP_LOCK(grand);
- nvaddr = (vm_map_offset_t) nstart;
+ pmap_lock(grand);
vaddr = (vm_map_offset_t) vstart;
#if (__ARM_VMSA__ == 7)
for (i = 0; i < num_tte; i++) {
- if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
+ if (((subord->nested_region_true_start) > vaddr) || ((subord->nested_region_true_end) <= vaddr)) {
goto nest_next;
}
- stte_p = pmap_tte(subord, nvaddr);
+ stte_p = pmap_tte(subord, vaddr);
gtte_p = pmap_tte(grand, vaddr);
*gtte_p = *stte_p;
nest_next:
- nvaddr += ARM_TT_L1_SIZE;
vaddr += ARM_TT_L1_SIZE;
}
#else
for (i = 0; i < num_tte; i++) {
- if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
+ if (((subord->nested_region_true_start) > vaddr) || ((subord->nested_region_true_end) <= vaddr)) {
goto nest_next;
}
- stte_p = pmap_tte(subord, nvaddr);
+ stte_p = pmap_tte(subord, vaddr);
gtte_p = pmap_tte(grand, vaddr);
if (gtte_p == PT_ENTRY_NULL) {
- PMAP_UNLOCK(grand);
- kr = pmap_expand(grand, vaddr, expand_options, PMAP_TT_TWIG_LEVEL);
- PMAP_LOCK(grand);
+ pmap_unlock(grand);
+ kr = pmap_expand(grand, vaddr, expand_options, pt_attr_twig_level(pt_attr));
+ pmap_lock(grand);
if (kr != KERN_SUCCESS) {
goto done;
nest_next:
vaddr += pt_attr_twig_size(pt_attr);
- nvaddr += pt_attr_twig_size(pt_attr);
}
#endif
stte_p = pmap_tte(grand, vstart);
FLUSH_PTE_RANGE_STRONG(stte_p, stte_p + num_tte);
-
-#if (__ARM_VMSA__ > 7)
- /*
- * check for overflow on LP64 arch
- */
- assert((size & 0xFFFFFFFF00000000ULL) == 0);
-#endif
PMAP_UPDATE_TLBS(grand, vstart, vend, false);
- PMAP_UNLOCK(grand);
+ pmap_unlock(grand);
+#if XNU_MONITOR
+nest_cleanup:
+#endif
+ if (deref_subord) {
+ pmap_destroy_internal(subord);
+ }
return kr;
}
pmap_t grand,
pmap_t subord,
addr64_t vstart,
- addr64_t nstart,
uint64_t size)
{
kern_return_t kr = KERN_FAILURE;
VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(subord),
VM_KERNEL_ADDRHIDE(vstart));
- kr = pmap_nest_internal(grand, subord, vstart, nstart, size);
+#if XNU_MONITOR
+ while ((kr = pmap_nest_ppl(grand, subord, vstart, size)) == KERN_RESOURCE_SHORTAGE) {
+ pmap_alloc_page_for_ppl(0);
+ }
+
+ pmap_ledger_check_balance(grand);
+ pmap_ledger_check_balance(subord);
+#else
+ kr = pmap_nest_internal(grand, subord, vstart, size);
+#endif
PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, kr);
panic("%s: %p has no nested pmap", __func__, grand);
}
- if ((vaddr < grand->nested_region_grand_addr) || (vend > (grand->nested_region_grand_addr + grand->nested_region_size))) {
+ if ((vaddr < grand->nested_region_addr) || (vend > (grand->nested_region_addr + grand->nested_region_size))) {
panic("%s: %p: unnest request to region not-fully-nested region [%p, %p)", __func__, grand, (void*)vaddr, (void*)vend);
}
- PMAP_LOCK(grand->nested_pmap);
+ pmap_lock(grand->nested_pmap);
- start = vaddr - grand->nested_region_grand_addr + grand->nested_region_subord_addr;
- start_index = (unsigned int)((vaddr - grand->nested_region_grand_addr) >> pt_attr_twig_shift(pt_attr));
+ start = vaddr;
+ start_index = (unsigned int)((vaddr - grand->nested_region_addr) >> pt_attr_twig_shift(pt_attr));
max_index = (unsigned int)(start_index + (size >> pt_attr_twig_shift(pt_attr)));
num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
if ((*cpte != ARM_PTE_TYPE_FAULT)
&& (!ARM_PTE_IS_COMPRESSED(*cpte, cpte))) {
- spte = *cpte;
+ spte = *((volatile pt_entry_t*)cpte);
while (!managed) {
pa = pte_to_pa(spte);
if (!pa_valid(pa)) {
}
pai = (int)pa_index(pa);
LOCK_PVH(pai);
- spte = *cpte;
+ spte = *((volatile pt_entry_t*)cpte);
pa = pte_to_pa(spte);
if (pai == (int)pa_index(pa)) {
managed = TRUE;
}
FLUSH_PTE_RANGE_STRONG(bpte, epte);
- flush_mmu_tlb_region_asid_async(start, (unsigned)size, grand->nested_pmap);
}
+ flush_mmu_tlb_region_asid_async(vaddr, (unsigned)size, grand->nested_pmap);
sync_tlb_flush();
- PMAP_UNLOCK(grand->nested_pmap);
+ pmap_unlock(grand->nested_pmap);
}
- PMAP_LOCK(grand);
+ pmap_lock(grand);
/*
* invalidate all pdes for segment at vaddr in pmap grand
FLUSH_PTE_RANGE_STRONG(tte_p, tte_p + num_tte);
PMAP_UPDATE_TLBS(grand, start, vend, false);
- PMAP_UNLOCK(grand);
+ pmap_unlock(grand);
return KERN_SUCCESS;
}
PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(vaddr));
+#if XNU_MONITOR
+ kr = pmap_unnest_options_ppl(grand, vaddr, size, option);
+#else
kr = pmap_unnest_options_internal(grand, vaddr, size, option);
+#endif
PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, kr);
}
#endif
-void
-pt_fake_zone_init(
- int zone_index)
-{
- pt_fake_zone_index = zone_index;
-}
-
-void
-pt_fake_zone_info(
- int *count,
- vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size,
- uint64_t *sum_size, int *collectable, int *exhaustable, int *caller_acct)
-{
- *count = inuse_pmap_pages_count;
- *cur_size = PAGE_SIZE * (inuse_pmap_pages_count);
- *max_size = PAGE_SIZE * (inuse_pmap_pages_count + vm_page_inactive_count + vm_page_active_count + vm_page_free_count);
- *elem_size = PAGE_SIZE;
- *alloc_size = PAGE_SIZE;
- *sum_size = (alloc_pmap_pages_count) * PAGE_SIZE;
-
- *collectable = 1;
- *exhaustable = 0;
- *caller_acct = 1;
-}
-
/*
* flush a range of hardware TLB entries.
* NOTE: assumes the smallest TLB entry in use will be for
#if __ARM_RANGE_TLBI__
#define ARM64_RANGE_TLB_FLUSH_THRESHOLD 1
-#define ARM64_FULL_TLB_FLUSH_THRESHOLD ARM64_16K_TLB_RANGE_PAGES
+#define ARM64_FULL_TLB_FLUSH_THRESHOLD ARM64_TLB_RANGE_PAGES
#else
#define ARM64_FULL_TLB_FLUSH_THRESHOLD 256
#endif // __ARM_RANGE_TLBI__
static void
flush_mmu_tlb_region_asid_async(
vm_offset_t va,
- unsigned length,
+ size_t length,
pmap_t pmap)
{
#if (__ARM_VMSA__ == 7)
flush_mmu_tlb_entries_async(va, end);
#else
- unsigned npages = length >> pt_attr_leaf_shift(pmap_get_pt_attr(pmap));
+ unsigned long pmap_page_shift = pt_attr_leaf_shift(pmap_get_pt_attr(pmap));
+ const uint64_t pmap_page_size = 1ULL << pmap_page_shift;
+ ppnum_t npages = (ppnum_t)(length >> pmap_page_shift);
uint32_t asid;
asid = pmap->hw_asid;
}
#if __ARM_RANGE_TLBI__
if (npages > ARM64_RANGE_TLB_FLUSH_THRESHOLD) {
- va = generate_rtlbi_param(npages, asid, va);
+ va = generate_rtlbi_param(npages, asid, va, pmap_page_shift);
if (pmap->nested == TRUE) {
flush_mmu_tlb_allrange_async(va);
} else {
#endif
vm_offset_t end = tlbi_asid(asid) | tlbi_addr(va + length);
va = tlbi_asid(asid) | tlbi_addr(va);
+
if (pmap->nested == TRUE) {
- flush_mmu_tlb_allentries_async(va, end);
+ flush_mmu_tlb_allentries_async(va, end, pmap_page_size);
} else {
- flush_mmu_tlb_entries_async(va, end);
+ flush_mmu_tlb_entries_async(va, end, pmap_page_size);
}
#endif
}
-MARK_AS_PMAP_TEXT static void
-flush_mmu_tlb_tte_asid_async(vm_offset_t va, pmap_t pmap)
-{
-#if (__ARM_VMSA__ == 7)
- flush_mmu_tlb_entry_async((va & ~ARM_TT_L1_PT_OFFMASK) | (pmap->hw_asid & 0xff));
- flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
- flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + 2 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
- flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + 3 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
-#else
- flush_mmu_tlb_entry_async(tlbi_addr(va & ~pt_attr_twig_offmask(pmap_get_pt_attr(pmap))) | tlbi_asid(pmap->hw_asid));
-#endif
-}
-
MARK_AS_PMAP_TEXT static void
flush_mmu_tlb_full_asid_async(pmap_t pmap)
{
LOCK_PVH(pai);
+#if XNU_MONITOR
+ if (__improbable(pa_test_monitor(paddr))) {
+ panic("%s invoked on PPL page 0x%08x", __func__, pn);
+ }
+#endif
pmap_update_cache_attributes_locked(pn, new_cacheattr);
#if __ARM_PTE_PHYSMAP__
unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
if (cacheattr != VM_WIMG_DEFAULT) {
+#if XNU_MONITOR
+ pmap_update_compressor_page_ppl(pn, cacheattr, VM_WIMG_DEFAULT);
+#else
pmap_update_compressor_page_internal(pn, cacheattr, VM_WIMG_DEFAULT);
+#endif
}
#endif
return (void*)phystokv(ptoa(pn));
#if __ARM_PTE_PHYSMAP__
unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
if (cacheattr != VM_WIMG_DEFAULT) {
+#if XNU_MONITOR
+ pmap_update_compressor_page_ppl(pn, VM_WIMG_DEFAULT, cacheattr);
+#else
pmap_update_compressor_page_internal(pn, VM_WIMG_DEFAULT, cacheattr);
+#endif
}
#endif
}
if (doit) {
LOCK_PVH(pai);
+#if XNU_MONITOR
+ if (pa_test_monitor(paddr)) {
+ panic("%s invoked on PPL page 0x%llx", __func__, (uint64_t)paddr);
+ }
+#endif
}
do {
boolean_t doit,
unsigned int *res)
{
+#if XNU_MONITOR
+ return pmap_batch_set_cache_attributes_ppl(pn, cacheattr, page_cnt, page_index, doit, res);
+#else
return pmap_batch_set_cache_attributes_internal(pn, cacheattr, page_cnt, page_index, doit, res);
+#endif
}
MARK_AS_PMAP_TEXT static void
LOCK_PVH(pai);
+#if XNU_MONITOR
+ if (external && pa_test_monitor(paddr)) {
+ panic("%s invoked on PPL page 0x%llx", __func__, (uint64_t)paddr);
+ } else if (!external && !pa_test_monitor(paddr)) {
+ panic("%s invoked on non-PPL page 0x%llx", __func__, (uint64_t)paddr);
+ }
+#endif
do {
pp_attr_current = pp_attr_table[pai];
ppnum_t pn,
unsigned int cacheattr)
{
+#if XNU_MONITOR
+ pmap_set_cache_attributes_ppl(pn, cacheattr);
+#else
pmap_set_cache_attributes_internal(pn, cacheattr);
+#endif
}
MARK_AS_PMAP_TEXT void
PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING) | DBG_FUNC_START, ppnum, attributes);
+ if (pmap_panic_dev_wimg_on_managed) {
+ switch (attributes & VM_WIMG_MASK) {
+ case VM_WIMG_IO: // nGnRnE
+ case VM_WIMG_POSTED: // nGnRE
+ /* supported on DRAM, but slow, so we disallow */
+
+ case VM_WIMG_POSTED_REORDERED: // nGRE
+ case VM_WIMG_POSTED_COMBINED_REORDERED: // GRE
+ /* unsupported on DRAM */
+
+ panic("%s: trying to use unsupported VM_WIMG type for managed page, VM_WIMG=%x, ppnum=%#x",
+ __FUNCTION__, attributes & VM_WIMG_MASK, ppnum);
+ break;
+
+ default:
+ /* not device type memory, all good */
+
+ break;
+ }
+ }
+
#if __ARM_PTE_PHYSMAP__
vm_offset_t kva = phystokv(phys);
pte_p = pmap_pte(kernel_pmap, kva);
tmplate = *pte_p;
tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
+#if XNU_MONITOR
+ tmplate |= (wimg_to_pte(attributes) & ~ARM_PTE_XPRR_MASK);
+#else
tmplate |= wimg_to_pte(attributes);
+#endif
#if (__ARM_VMSA__ > 7)
if (tmplate & ARM_PTE_HINT_MASK) {
panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
tmplate |= pmap_get_pt_ops(pmap)->wimg_to_pte(attributes);
WRITE_PTE_STRONG(pte_p, tmplate);
- pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, PAGE_SIZE, pmap);
+ pmap_get_pt_ops(pmap)->flush_tlb_region_async(va,
+ pt_attr_page_size(pmap_get_pt_attr(pmap)) * PAGE_RATIO, pmap);
tlb_flush_needed = TRUE;
#ifdef PVH_FLAG_IOMMU
}
}
if (tlb_flush_needed) {
- sync_tlb_flush();
+ pmap_sync_tlb((attributes & VM_WIMG_MASK) == VM_WIMG_RT);
}
PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING) | DBG_FUNC_END, ppnum, attributes);
}
-#if (__ARM_VMSA__ == 7)
-vm_map_address_t
-pmap_create_sharedpage(
- void)
+#if (__ARM_VMSA__ == 7)
+void
+pmap_create_sharedpages(vm_map_address_t *kernel_data_addr, vm_map_address_t *kernel_text_addr,
+ vm_map_address_t *user_commpage_addr)
{
pmap_paddr_t pa;
kern_return_t kr;
- (void) pmap_pages_alloc(&pa, PAGE_SIZE, 0);
- memset((char *) phystokv(pa), 0, PAGE_SIZE);
+ assert(kernel_data_addr != NULL);
+ assert(kernel_text_addr != NULL);
+ assert(user_commpage_addr != NULL);
+
+ (void) pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, 0);
kr = pmap_enter(kernel_pmap, _COMM_PAGE_BASE_ADDRESS, atop(pa), VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
assert(kr == KERN_SUCCESS);
- return (vm_map_address_t)phystokv(pa);
+ *kernel_data_addr = phystokv(pa);
+ // We don't have PFZ for 32 bit arm, always NULL
+ *kernel_text_addr = 0;
+ *user_commpage_addr = 0;
}
-#else
+
+#else /* __ARM_VMSA__ == 7 */
+
static void
pmap_update_tt3e(
pmap_t pmap,
| ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_NX \
| ARM_PTE_PNX | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
-vm_map_address_t
-pmap_create_sharedpage(
- void
- )
+/* Note absence of non-global bit and no-execute bit. */
+#define PMAP_COMM_PAGE_TEXT_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
+ | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
+ | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_PNX \
+ | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
+
+void
+pmap_create_sharedpages(vm_map_address_t *kernel_data_addr, vm_map_address_t *kernel_text_addr,
+ vm_map_address_t *user_text_addr)
{
- kern_return_t kr;
- pmap_paddr_t pa = 0;
+ kern_return_t kr;
+ pmap_paddr_t data_pa = 0; // data address
+ pmap_paddr_t text_pa = 0; // text address
+ *kernel_data_addr = 0;
+ *kernel_text_addr = 0;
+ *user_text_addr = 0;
- (void) pmap_pages_alloc(&pa, PAGE_SIZE, 0);
+#if XNU_MONITOR
+ data_pa = pmap_alloc_page_for_kern(0);
+ assert(data_pa);
+ memset((char *) phystokv(data_pa), 0, PAGE_SIZE);
+#if CONFIG_ARM_PFZ
+ text_pa = pmap_alloc_page_for_kern(0);
+ assert(text_pa);
+ memset((char *) phystokv(text_pa), 0, PAGE_SIZE);
+#endif
+
+#else /* XNU_MONITOR */
+ (void) pmap_pages_alloc_zeroed(&data_pa, PAGE_SIZE, 0);
+#if CONFIG_ARM_PFZ
+ (void) pmap_pages_alloc_zeroed(&text_pa, PAGE_SIZE, 0);
+#endif
- memset((char *) phystokv(pa), 0, PAGE_SIZE);
+#endif /* XNU_MONITOR */
#ifdef CONFIG_XNUPOST
/*
* The kernel pmap maintains a user accessible mapping of the commpage
* to test PAN.
*/
- kr = pmap_enter(kernel_pmap, _COMM_HIGH_PAGE64_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
+ kr = pmap_enter(kernel_pmap, _COMM_HIGH_PAGE64_BASE_ADDRESS, (ppnum_t)atop(data_pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
assert(kr == KERN_SUCCESS);
/*
* create a dedicated pmap for the shared page. We forcibly nest the
* translation tables from this pmap into other pmaps. The level we
* will nest at depends on the MMU configuration (page size, TTBR range,
- * etc).
+ * etc). Typically, this is at L1 for 4K tasks and L2 for 16K tasks.
*
* Note that this is NOT "the nested pmap" (which is used to nest the
* shared cache).
* Note that we update parameters of the entry for our unique needs (NG
* entry, etc.).
*/
- sharedpage_pmap = pmap_create_options(NULL, 0x0, 0);
- assert(sharedpage_pmap != NULL);
+ sharedpage_pmap_default = pmap_create_options(NULL, 0x0, 0);
+ assert(sharedpage_pmap_default != NULL);
+
+ /* The user 64-bit mapping... */
+ kr = pmap_enter_addr(sharedpage_pmap_default, _COMM_PAGE64_BASE_ADDRESS, data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
+ assert(kr == KERN_SUCCESS);
+ pmap_update_tt3e(sharedpage_pmap_default, _COMM_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
+#if CONFIG_ARM_PFZ
+ /* User mapping of comm page text section for 64 bit mapping only
+ *
+ * We don't insert it into the 32 bit mapping because we don't want 32 bit
+ * user processes to get this page mapped in, they should never call into
+ * this page.
+ *
+ * The data comm page is in a pre-reserved L3 VA range and the text commpage
+ * is slid in the same L3 as the data commpage. It is either outside the
+ * max of user VA or is pre-reserved in the vm_map_exec(). This means that
+ * it is reserved and unavailable to mach VM for future mappings.
+ */
+ const pt_attr_t * const pt_attr = pmap_get_pt_attr(sharedpage_pmap_default);
+ int num_ptes = pt_attr_leaf_size(pt_attr) >> PTE_SHIFT;
+
+ vm_map_address_t commpage_text_va = 0;
+
+ do {
+ int text_leaf_index = random() % num_ptes;
+
+ // Generate a VA for the commpage text with the same root and twig index as data
+ // comm page, but with new leaf index we've just generated.
+ commpage_text_va = (_COMM_PAGE64_BASE_ADDRESS & ~pt_attr_leaf_index_mask(pt_attr));
+ commpage_text_va |= (text_leaf_index << pt_attr_leaf_shift(pt_attr));
+ } while (commpage_text_va == _COMM_PAGE64_BASE_ADDRESS); // Try again if we collide (should be unlikely)
+
+ // Assert that this is empty
+ __assert_only pt_entry_t *ptep = pmap_pte(sharedpage_pmap_default, commpage_text_va);
+ assert(ptep != PT_ENTRY_NULL);
+ assert(*ptep == ARM_TTE_EMPTY);
+
+ // At this point, we've found the address we want to insert our comm page at
+ kr = pmap_enter_addr(sharedpage_pmap_default, commpage_text_va, text_pa, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
+ assert(kr == KERN_SUCCESS);
+ // Mark it as global page R/X so that it doesn't get thrown out on tlb flush
+ pmap_update_tt3e(sharedpage_pmap_default, commpage_text_va, PMAP_COMM_PAGE_TEXT_PTE_TEMPLATE);
+
+ *user_text_addr = commpage_text_va;
+#endif
+
+ /* ...and the user 32-bit mapping. */
+ kr = pmap_enter_addr(sharedpage_pmap_default, _COMM_PAGE32_BASE_ADDRESS, data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
+ assert(kr == KERN_SUCCESS);
+ pmap_update_tt3e(sharedpage_pmap_default, _COMM_PAGE32_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
+
+#if __ARM_MIXED_PAGE_SIZE__
+ /**
+ * To handle 4K tasks a new view/pmap of the shared page is needed. These are a
+ * new set of page tables that point to the exact same 16K shared page as
+ * before. Only the first 4K of the 16K shared page is mapped since that's
+ * the only part that contains relevant data.
+ */
+ sharedpage_pmap_4k = pmap_create_options(NULL, 0x0, PMAP_CREATE_FORCE_4K_PAGES);
+ assert(sharedpage_pmap_4k != NULL);
/* The user 64-bit mapping... */
- kr = pmap_enter(sharedpage_pmap, _COMM_PAGE64_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
+ kr = pmap_enter_addr(sharedpage_pmap_4k, _COMM_PAGE64_BASE_ADDRESS, data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
assert(kr == KERN_SUCCESS);
- pmap_update_tt3e(sharedpage_pmap, _COMM_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
+ pmap_update_tt3e(sharedpage_pmap_4k, _COMM_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
/* ...and the user 32-bit mapping. */
- kr = pmap_enter(sharedpage_pmap, _COMM_PAGE32_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
+ kr = pmap_enter_addr(sharedpage_pmap_4k, _COMM_PAGE32_BASE_ADDRESS, data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
assert(kr == KERN_SUCCESS);
- pmap_update_tt3e(sharedpage_pmap, _COMM_PAGE32_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
+ pmap_update_tt3e(sharedpage_pmap_4k, _COMM_PAGE32_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
+
+#endif
/* For manipulation in kernel, go straight to physical page */
- return (vm_map_address_t)phystokv(pa);
+ *kernel_data_addr = phystokv(data_pa);
+ *kernel_text_addr = (text_pa) ? phystokv(text_pa) : 0;
+
+ return;
}
+
/*
* Asserts to ensure that the TTEs we nest to map the shared page do not overlap
- * with user controlled TTEs.
+ * with user controlled TTEs for regions that aren't explicitly reserved by the
+ * VM (e.g., _COMM_PAGE64_NESTING_START/_COMM_PAGE64_BASE_ADDRESS).
*/
#if (ARM_PGSHIFT == 14)
-static_assert((_COMM_PAGE64_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK) >= MACH_VM_MAX_ADDRESS);
static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK) >= VM_MAX_ADDRESS);
#elif (ARM_PGSHIFT == 12)
-static_assert((_COMM_PAGE64_BASE_ADDRESS & ~ARM_TT_L1_OFFMASK) >= MACH_VM_MAX_ADDRESS);
static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L1_OFFMASK) >= VM_MAX_ADDRESS);
#else
#error Nested shared page mapping is unsupported on this config
vm_offset_t sharedpage_vaddr;
pt_entry_t *ttep, *src_ttep;
int options = 0;
+ pmap_t sharedpage_pmap = sharedpage_pmap_default;
+
+ const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
+ const unsigned int sharedpage_level = pt_attr_sharedpage_level(pt_attr);
+
+#if __ARM_MIXED_PAGE_SIZE__
+#if !__ARM_16K_PG__
+ /* The following code assumes that sharedpage_pmap_default is a 16KB pmap. */
+ #error "pmap_insert_sharedpage_internal requires a 16KB default kernel page size when __ARM_MIXED_PAGE_SIZE__ is enabled"
+#endif /* !__ARM_16K_PG__ */
+
+ /* Choose the correct shared page pmap to use. */
+ const uint64_t pmap_page_size = pt_attr_page_size(pt_attr);
+ if (pmap_page_size == 16384) {
+ sharedpage_pmap = sharedpage_pmap_default;
+ } else if (pmap_page_size == 4096) {
+ sharedpage_pmap = sharedpage_pmap_4k;
+ } else {
+ panic("No shared page pmap exists for the wanted page size: %llu", pmap_page_size);
+ }
+#endif /* __ARM_MIXED_PAGE_SIZE__ */
VALIDATE_PMAP(pmap);
+#if XNU_MONITOR
+ options |= PMAP_OPTIONS_NOWAIT;
+#endif /* XNU_MONITOR */
#if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
#error We assume a single page.
sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
}
- PMAP_LOCK(pmap);
+
+ pmap_lock(pmap);
/*
- * For 4KB pages, we can force the commpage to nest at the level one
- * page table, as each entry is 1GB (i.e, there will be no overlap
- * with regular userspace mappings). For 16KB pages, each level one
- * entry is 64GB, so we must go to the second level entry (32MB) in
- * order to nest.
+ * For 4KB pages, we either "nest" at the level one page table (1GB) or level
+ * two (2MB) depending on the address space layout. For 16KB pages, each level
+ * one entry is 64GB, so we must go to the second level entry (32MB) in order
+ * to "nest".
+ *
+ * Note: This is not "nesting" in the shared cache sense. This definition of
+ * nesting just means inserting pointers to pre-allocated tables inside of
+ * the passed in pmap to allow us to share page tables (which map the shared
+ * page) for every task. This saves at least one page of memory per process
+ * compared to creating new page tables in every process for mapping the
+ * shared page.
*/
-#if (ARM_PGSHIFT == 12)
- (void)options;
-
- /* Just slam in the L1 entry. */
- ttep = pmap_tt1e(pmap, sharedpage_vaddr);
-
- if (*ttep != ARM_PTE_EMPTY) {
- panic("%s: Found something mapped at the commpage address?!", __FUNCTION__);
- }
- src_ttep = pmap_tt1e(sharedpage_pmap, sharedpage_vaddr);
-#elif (ARM_PGSHIFT == 14)
- /* Allocate for the L2 entry if necessary, and slam it into place. */
- /*
- * As long as we are use a three level page table, the first level
- * should always exist, so we don't need to check for it.
+ /**
+ * Allocate the twig page tables if needed, and slam a pointer to the shared
+ * page's tables into place.
*/
- while (*pmap_tt1e(pmap, sharedpage_vaddr) == ARM_PTE_EMPTY) {
- PMAP_UNLOCK(pmap);
+ while ((ttep = pmap_ttne(pmap, sharedpage_level, sharedpage_vaddr)) == TT_ENTRY_NULL) {
+ pmap_unlock(pmap);
- kr = pmap_expand(pmap, sharedpage_vaddr, options, PMAP_TT_L2_LEVEL);
+ kr = pmap_expand(pmap, sharedpage_vaddr, options, sharedpage_level);
if (kr != KERN_SUCCESS) {
+#if XNU_MONITOR
+ if (kr == KERN_RESOURCE_SHORTAGE) {
+ return kr;
+ } else
+#endif
{
panic("Failed to pmap_expand for commpage, pmap=%p", pmap);
}
}
- PMAP_LOCK(pmap);
+ pmap_lock(pmap);
}
- ttep = pmap_tt2e(pmap, sharedpage_vaddr);
-
if (*ttep != ARM_PTE_EMPTY) {
panic("%s: Found something mapped at the commpage address?!", __FUNCTION__);
}
- src_ttep = pmap_tt2e(sharedpage_pmap, sharedpage_vaddr);
-#endif
+ src_ttep = pmap_ttne(sharedpage_pmap, sharedpage_level, sharedpage_vaddr);
- *ttep = *src_ttep;
+ *ttep = *src_ttep;
FLUSH_PTE_STRONG(ttep);
- /* TODO: Should we flush in the 64-bit case? */
- flush_mmu_tlb_region_asid_async(sharedpage_vaddr, PAGE_SIZE, kernel_pmap);
-
-#if (ARM_PGSHIFT == 12)
- flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L1_OFFMASK) | tlbi_asid(pmap->hw_asid));
-#elif (ARM_PGSHIFT == 14)
- flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->hw_asid));
-#endif
- sync_tlb_flush();
-
- PMAP_UNLOCK(pmap);
+ pmap_unlock(pmap);
return kr;
}
{
pt_entry_t *ttep;
vm_offset_t sharedpage_vaddr;
+ pmap_t sharedpage_pmap = sharedpage_pmap_default;
+
+ const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
+ const unsigned int sharedpage_level = pt_attr_sharedpage_level(pt_attr);
+
+#if __ARM_MIXED_PAGE_SIZE__
+#if !__ARM_16K_PG__
+ /* The following code assumes that sharedpage_pmap_default is a 16KB pmap. */
+ #error "pmap_unmap_sharedpage requires a 16KB default kernel page size when __ARM_MIXED_PAGE_SIZE__ is enabled"
+#endif /* !__ARM_16K_PG__ */
+
+ /* Choose the correct shared page pmap to use. */
+ const uint64_t pmap_page_size = pt_attr_page_size(pt_attr);
+ if (pmap_page_size == 16384) {
+ sharedpage_pmap = sharedpage_pmap_default;
+ } else if (pmap_page_size == 4096) {
+ sharedpage_pmap = sharedpage_pmap_4k;
+ } else {
+ panic("No shared page pmap exists for the wanted page size: %llu", pmap_page_size);
+ }
+#endif /* __ARM_MIXED_PAGE_SIZE__ */
#if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
#error We assume a single page.
sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
}
-#if (ARM_PGSHIFT == 12)
- ttep = pmap_tt1e(pmap, sharedpage_vaddr);
-
- if (ttep == NULL) {
- return;
- }
- /* It had better be mapped to the shared page */
- if (*ttep != ARM_TTE_EMPTY && *ttep != *pmap_tt1e(sharedpage_pmap, sharedpage_vaddr)) {
- panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__);
- }
-#elif (ARM_PGSHIFT == 14)
- ttep = pmap_tt2e(pmap, sharedpage_vaddr);
+ ttep = pmap_ttne(pmap, sharedpage_level, sharedpage_vaddr);
if (ttep == NULL) {
return;
}
- /* It had better be mapped to the shared page */
- if (*ttep != ARM_TTE_EMPTY && *ttep != *pmap_tt2e(sharedpage_pmap, sharedpage_vaddr)) {
+ /* It had better be mapped to the shared page. */
+ if (*ttep != ARM_TTE_EMPTY && *ttep != *pmap_ttne(sharedpage_pmap, sharedpage_level, sharedpage_vaddr)) {
panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__);
}
-#endif
*ttep = ARM_TTE_EMPTY;
- flush_mmu_tlb_region_asid_async(sharedpage_vaddr, PAGE_SIZE, kernel_pmap);
+ FLUSH_PTE_STRONG(ttep);
-#if (ARM_PGSHIFT == 12)
- flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L1_OFFMASK) | tlbi_asid(pmap->hw_asid));
-#elif (ARM_PGSHIFT == 14)
- flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->hw_asid));
-#endif
+ flush_mmu_tlb_region_asid_async(sharedpage_vaddr, PAGE_SIZE, pmap);
sync_tlb_flush();
}
pmap_insert_sharedpage(
pmap_t pmap)
{
+#if XNU_MONITOR
+ kern_return_t kr = KERN_FAILURE;
+
+ while ((kr = pmap_insert_sharedpage_ppl(pmap)) == KERN_RESOURCE_SHORTAGE) {
+ pmap_alloc_page_for_ppl(0);
+ }
+
+ pmap_ledger_check_balance(pmap);
+
+ if (kr != KERN_SUCCESS) {
+ panic("%s: failed to insert the shared page, kr=%d, "
+ "pmap=%p",
+ __FUNCTION__, kr,
+ pmap);
+ }
+#else
pmap_insert_sharedpage_internal(pmap);
+#endif
}
static boolean_t
return pmap->is_64bit;
}
+bool
+pmap_is_exotic(
+ pmap_t pmap __unused)
+{
+ return false;
+}
+
#endif
/* ARMTODO -- an implementation that accounts for
return pa_valid(ptoa(pn));
}
+boolean_t
+pmap_bootloader_page(
+ ppnum_t pn)
+{
+ pmap_paddr_t paddr = ptoa(pn);
+
+ if (pa_valid(paddr)) {
+ return FALSE;
+ }
+ pmap_io_range_t *io_rgn = pmap_find_io_attr(paddr);
+ return (io_rgn != NULL) && (io_rgn->wimg & PMAP_IO_RANGE_CARVEOUT);
+}
+
MARK_AS_PMAP_TEXT static boolean_t
pmap_is_empty_internal(
pmap_t pmap,
unsigned int initial_not_in_kdp = not_in_kdp;
if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
- PMAP_LOCK(pmap);
+ pmap_lock_ro(pmap);
}
#if (__ARM_VMSA__ == 7)
if (tte_index(pmap, pt_attr, va_end) >= pmap->tte_index_max) {
if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
- PMAP_UNLOCK(pmap);
+ pmap_unlock_ro(pmap);
}
return TRUE;
}
for (pte_p = bpte_p; pte_p < epte_p; pte_p++) {
if (*pte_p != ARM_PTE_EMPTY) {
if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
- PMAP_UNLOCK(pmap);
+ pmap_unlock_ro(pmap);
}
return FALSE;
}
}
if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
- PMAP_UNLOCK(pmap);
+ pmap_unlock_ro(pmap);
}
return TRUE;
vm_map_offset_t va_start,
vm_map_offset_t va_end)
{
+#if XNU_MONITOR
+ return pmap_is_empty_ppl(pmap, va_start, va_end);
+#else
return pmap_is_empty_internal(pmap, va_start, va_end);
+#endif
}
vm_map_offset_t
vm_map_offset_t max_offset_ret = 0;
#if defined(__arm64__)
- const vm_map_offset_t min_max_offset = SHARED_REGION_BASE_ARM64 + SHARED_REGION_SIZE_ARM64 + 0x20000000; // end of shared region + 512MB for various purposes
+ #define ARM64_MIN_MAX_ADDRESS (SHARED_REGION_BASE_ARM64 + SHARED_REGION_SIZE_ARM64 + 0x20000000) // end of shared region + 512MB for various purposes
+ _Static_assert((ARM64_MIN_MAX_ADDRESS > SHARED_REGION_BASE_ARM64) && (ARM64_MIN_MAX_ADDRESS <= MACH_VM_MAX_ADDRESS),
+ "Minimum address space size outside allowable range");
+ const vm_map_offset_t min_max_offset = ARM64_MIN_MAX_ADDRESS; // end of shared region + 512MB for various purposes
if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
max_offset_ret = arm64_pmap_max_offset_default;
} else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
return;
}
+#if XNU_MONITOR
+
+/*
+ * Enforce that the address range described by kva and nbytes is not currently
+ * PPL-owned, and won't become PPL-owned while pinned. This is to prevent
+ * unintentionally writing to PPL-owned memory.
+ */
+static void
+pmap_pin_kernel_pages(vm_offset_t kva, size_t nbytes)
+{
+ vm_offset_t end;
+ if (os_add_overflow(kva, nbytes, &end)) {
+ panic("%s(%p, 0x%llx): overflow", __func__, (void*)kva, (uint64_t)nbytes);
+ }
+ for (vm_offset_t ckva = kva; ckva < end; ckva = round_page(ckva + 1)) {
+ pmap_paddr_t pa = kvtophys(ckva);
+ if (!pa_valid(pa)) {
+ panic("%s(%p): invalid physical page 0x%llx", __func__, (void*)kva, (uint64_t)pa);
+ }
+ pp_attr_t attr;
+ unsigned int pai = (unsigned int)pa_index(pa);
+ if (ckva == phystokv(pa)) {
+ panic("%s(%p): attempt to pin static mapping for page 0x%llx", __func__, (void*)kva, (uint64_t)pa);
+ }
+ do {
+ attr = pp_attr_table[pai] & ~PP_ATTR_NO_MONITOR;
+ if (attr & PP_ATTR_MONITOR) {
+ panic("%s(%p): physical page 0x%llx belongs to PPL", __func__, (void*)kva, (uint64_t)pa);
+ }
+ } while (!OSCompareAndSwap16(attr, attr | PP_ATTR_NO_MONITOR, &pp_attr_table[pai]));
+ }
+}
+
+static void
+pmap_unpin_kernel_pages(vm_offset_t kva, size_t nbytes)
+{
+ vm_offset_t end;
+ if (os_add_overflow(kva, nbytes, &end)) {
+ panic("%s(%p, 0x%llx): overflow", __func__, (void*)kva, (uint64_t)nbytes);
+ }
+ for (vm_offset_t ckva = kva; ckva < end; ckva = round_page(ckva + 1)) {
+ pmap_paddr_t pa = kvtophys(ckva);
+ if (!pa_valid(pa)) {
+ panic("%s(%p): invalid physical page 0x%llx", __func__, (void*)kva, (uint64_t)pa);
+ }
+ if (!(pp_attr_table[pa_index(pa)] & PP_ATTR_NO_MONITOR)) {
+ panic("%s(%p): physical page 0x%llx not pinned", __func__, (void*)kva, (uint64_t)pa);
+ }
+ assert(!(pp_attr_table[pa_index(pa)] & PP_ATTR_MONITOR));
+ pa_clear_no_monitor(pa);
+ }
+}
+
+/*
+ * Lock down a page, making all mappings read-only, and preventing
+ * further mappings or removal of this particular kva's mapping.
+ * Effectively, it makes the page at kva immutable.
+ */
+MARK_AS_PMAP_TEXT static void
+pmap_ppl_lockdown_page(vm_address_t kva)
+{
+ pmap_paddr_t pa = kvtophys(kva);
+ unsigned int pai = (unsigned int)pa_index(pa);
+ LOCK_PVH(pai);
+ pv_entry_t **pv_h = pai_to_pvh(pai);
+
+ if (__improbable(pa_test_monitor(pa))) {
+ panic("%#lx: page %llx belongs to PPL", kva, pa);
+ }
+
+ if (__improbable(pvh_get_flags(pv_h) & (PVH_FLAG_LOCKDOWN | PVH_FLAG_EXEC))) {
+ panic("%#lx: already locked down/executable (%#llx)", kva, pvh_get_flags(pv_h));
+ }
+
+ pt_entry_t *pte_p = pmap_pte(kernel_pmap, kva);
+
+ if (pte_p == PT_ENTRY_NULL) {
+ panic("%#lx: NULL pte", kva);
+ }
+
+ pt_entry_t tmplate = *pte_p;
+ if (__improbable((tmplate & ARM_PTE_APMASK) != ARM_PTE_AP(AP_RWNA))) {
+ panic("%#lx: not a kernel r/w page (%#llx)", kva, tmplate & ARM_PTE_APMASK);
+ }
+
+ pvh_set_flags(pv_h, pvh_get_flags(pv_h) | PVH_FLAG_LOCKDOWN);
+
+ pmap_set_ptov_ap(pai, AP_RONA, FALSE);
+
+ UNLOCK_PVH(pai);
+
+ pmap_page_protect_options_internal((ppnum_t)atop(pa), VM_PROT_READ, 0, NULL);
+}
+
+/*
+ * Release a page from being locked down to the PPL, making it writable
+ * to the kernel once again.
+ */
+MARK_AS_PMAP_TEXT static void
+pmap_ppl_unlockdown_page(vm_address_t kva)
+{
+ pmap_paddr_t pa = kvtophys(kva);
+ unsigned int pai = (unsigned int)pa_index(pa);
+ LOCK_PVH(pai);
+ pv_entry_t **pv_h = pai_to_pvh(pai);
+
+ vm_offset_t pvh_flags = pvh_get_flags(pv_h);
+
+ if (__improbable(!(pvh_flags & PVH_FLAG_LOCKDOWN))) {
+ panic("unlockdown attempt on not locked down virtual %#lx/pai %d", kva, pai);
+ }
+
+ pvh_set_flags(pv_h, pvh_flags & ~PVH_FLAG_LOCKDOWN);
+ pmap_set_ptov_ap(pai, AP_RWNA, FALSE);
+ UNLOCK_PVH(pai);
+}
+
+#else /* XNU_MONITOR */
static void __unused
pmap_pin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
{
}
+#endif /* !XNU_MONITOR */
#define PMAP_RESIDENT_INVALID ((mach_vm_size_t)-1)
VALIDATE_PMAP(pmap);
+ const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
+
/* Ensure that this request is valid, and addresses exactly one TTE. */
- if (__improbable((start % ARM_PGBYTES) || (end % ARM_PGBYTES))) {
- panic("%s: address range %p, %p not page-aligned", __func__, (void*)start, (void*)end);
+ if (__improbable((start % pt_attr_page_size(pt_attr)) ||
+ (end % pt_attr_page_size(pt_attr)))) {
+ panic("%s: address range %p, %p not page-aligned to 0x%llx", __func__, (void*)start, (void*)end, pt_attr_page_size(pt_attr));
}
- if (__improbable((end < start) || ((end - start) > (PTE_PGENTRIES * ARM_PGBYTES)))) {
+ if (__improbable((end < start) || (end > ((start + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr))))) {
panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
}
- PMAP_LOCK(pmap);
+ pmap_lock_ro(pmap);
tte_p = pmap_tte(pmap, start);
if (tte_p == (tt_entry_t *) NULL) {
- PMAP_UNLOCK(pmap);
+ pmap_unlock_ro(pmap);
return PMAP_RESIDENT_INVALID;
}
if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
- __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
pte_p = (pt_entry_t *) ttetokv(*tte_p);
bpte = &pte_p[pte_index(pmap, pt_attr, start)];
epte = &pte_p[pte_index(pmap, pt_attr, end)];
for (; bpte < epte; bpte++) {
if (ARM_PTE_IS_COMPRESSED(*bpte, bpte)) {
- compressed_bytes += ARM_PGBYTES;
+ compressed_bytes += pt_attr_page_size(pt_attr);
} else if (pa_valid(pte_to_pa(*bpte))) {
- resident_bytes += ARM_PGBYTES;
+ resident_bytes += pt_attr_page_size(pt_attr);
}
}
}
- PMAP_UNLOCK(pmap);
+ pmap_unlock_ro(pmap);
if (compressed_bytes_p) {
pmap_pin_kernel_pages((vm_offset_t)compressed_bytes_p, sizeof(*compressed_bytes_p));
if (l > end) {
l = end;
}
+#if XNU_MONITOR
+ resident_bytes = pmap_query_resident_ppl(pmap, va, l, compressed_bytes_p);
+#else
resident_bytes = pmap_query_resident_internal(pmap, va, l, compressed_bytes_p);
+#endif
if (resident_bytes == PMAP_RESIDENT_INVALID) {
break;
}
#define PMAP_PGTRACE_LOCK(p) \
do { \
- *(p) = ml_set_interrupts_enabled(false); \
+ *(p) = pmap_interrupts_disable(); \
if (simple_lock_try(&(pmap_pgtrace.lock), LCK_GRP_NULL)) break; \
- ml_set_interrupts_enabled(*(p)); \
+ pmap_interrupts_restore(*(p)); \
} while (true)
#define PMAP_PGTRACE_UNLOCK(p) \
do { \
simple_unlock(&(pmap_pgtrace.lock)); \
- ml_set_interrupts_enabled(*(p)); \
+ pmap_interrupts_restore(*(p)); \
} while (0)
#define PGTRACE_WRITE_PTE(pte_p, pte_entry) \
pmap_pgtrace_page_state_t state;
} pmap_pgtrace_page_t;
+typedef struct {
+ queue_chain_t chain;
+ pmap_t pmap;
+ vm_map_offset_t va;
+} pmap_va_t;
+
+static ZONE_VIEW_DEFINE(ZV_PMAP_VA, "pmap va",
+ KHEAP_ID_DEFAULT, sizeof(pmap_va_t));
+
+static ZONE_VIEW_DEFINE(ZV_PMAP_PGTRACE, "pmap pgtrace",
+ KHEAP_ID_DEFAULT, sizeof(pmap_pgtrace_page_t));
+
static struct {
/*
* pages - list of tracing page info
static bool
pmap_pgtrace_enter_clone(pmap_t pmap, vm_map_offset_t va_page, vm_map_offset_t start, vm_map_offset_t end)
{
- bool ints;
+ uint64_t ints;
queue_head_t *q = &(pmap_pgtrace.pages);
pmap_paddr_t pa_page;
pt_entry_t *ptep, *cptep;
pmap_pgtrace_page_t *p;
bool found = false;
- PMAP_ASSERT_LOCKED(pmap);
+ pmap_assert_locked_w(pmap);
assert(va_page == arm_trunc_page(va_page));
PMAP_PGTRACE_LOCK(&ints);
static void
pmap_pgtrace_remove_clone(pmap_t pmap, pmap_paddr_t pa, vm_map_offset_t va)
{
- bool ints, found = false;
+ uint64_t ints, found = false;
pmap_pgtrace_page_t *p;
pt_entry_t *ptep;
static void
pmap_pgtrace_remove_all_clone(pmap_paddr_t pa)
{
- bool ints;
+ uint64_t ints;
pmap_pgtrace_page_t *p;
pt_entry_t *ptep;
// sanitize maps in waste
queue_iterate(mapwaste, map, pmap_pgtrace_map_t *, chain) {
if (map->cloned == true) {
- PMAP_LOCK(map->pmap);
+ pmap_lock(map->pmap);
// restore back original pte
ptep = pmap_pte(map->pmap, map->ova);
PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES, false);
}
- PMAP_UNLOCK(map->pmap);
+ pmap_unlock(map->pmap);
}
map->pmap = NULL;
while (cur_page <= end_page) {
vm_map_offset_t add = 0;
- PMAP_LOCK(pmap);
+ pmap_lock(pmap);
// skip uninterested space
if (pmap == kernel_pmap &&
add = ARM_PGBYTES;
unlock_continue:
- PMAP_UNLOCK(pmap);
+ pmap_unlock(pmap);
//overflow
if (cur_page + add < cur_page) {
pt_entry_t *ptep;
pmap_t pmap;
- typedef struct {
- queue_chain_t chain;
- pmap_t pmap;
- vm_map_offset_t va;
- } pmap_va_t;
-
queue_head_t pmapvaq;
pmap_va_t *pmapva;
ptep = pvh_ptep(pvh);
pmap = ptep_get_pmap(ptep);
- pmapva = (pmap_va_t *)kalloc(sizeof(pmap_va_t));
+ pmapva = (pmap_va_t *)zalloc(ZV_PMAP_VA);
pmapva->pmap = pmap;
pmapva->va = ptep_get_va(ptep);
ptep = pve_get_ptep(pvep);
pmap = ptep_get_pmap(ptep);
- pmapva = (pmap_va_t *)kalloc(sizeof(pmap_va_t));
+ pmapva = (pmap_va_t *)zalloc(ZV_PMAP_VA);
pmapva->pmap = pmap;
pmapva->va = ptep_get_va(ptep);
// clone them while making sure mapping still exists
queue_iterate(&pmapvaq, pmapva, pmap_va_t *, chain) {
- PMAP_LOCK(pmapva->pmap);
+ pmap_lock(pmapva->pmap);
ptep = pmap_pte(pmapva->pmap, pmapva->va);
if (pte_to_pa(*ptep) == pa) {
if (pmap_pgtrace_enter_clone(pmapva->pmap, pmapva->va, start_offset, end_offset) == true) {
ret++;
}
}
- PMAP_UNLOCK(pmapva->pmap);
+ pmap_unlock(pmapva->pmap);
- kfree(pmapva, sizeof(pmap_va_t));
+ zfree(ZV_PMAP_VA, pmapva);
}
return ret;
queue_head_t *mapwaste;
pmap_pgtrace_map_t *map;
- p = kalloc(sizeof(pmap_pgtrace_page_t));
+ p = zalloc(ZV_PMAP_PGTRACE);
assert(p);
p->state = UNDEFINED;
vm_map_unlock(kernel_map);
// fill default clone page info and add to pool
- map = kalloc(sizeof(pmap_pgtrace_map_t));
+ map = zalloc(ZV_PMAP_PGTRACE);
for (int j = 0; j < 3; j++) {
vm_map_offset_t addr = newcva + j * ARM_PGBYTES;
// pre-expand pmap while preemption enabled
- kr = pmap_expand(kernel_pmap, addr, 0, PMAP_TT_MAX_LEVEL);
+ kr = pmap_expand(kernel_pmap, addr, 0, PMAP_TT_L3_LEVEL);
if (kr != KERN_SUCCESS) {
panic("%s: pmap_expand(kernel_pmap, addr=%llx) returns kr=%d\n", __func__, addr, kr);
}
while (!queue_empty(mapq)) {
queue_remove_first(mapq, map, pmap_pgtrace_map_t *, chain);
- kfree(map, sizeof(pmap_pgtrace_map_t));
+ zfree(ZV_PMAP_PGTRACE, map);
}
while (!queue_empty(mappool)) {
queue_remove_first(mappool, map, pmap_pgtrace_map_t *, chain);
- kfree(map, sizeof(pmap_pgtrace_map_t));
+ zfree(ZV_PMAP_PGTRACE, map);
}
while (!queue_empty(mapwaste)) {
queue_remove_first(mapwaste, map, pmap_pgtrace_map_t *, chain);
- kfree(map, sizeof(pmap_pgtrace_map_t));
+ zfree(ZV_PMAP_PGTRACE, map);
}
- kfree(p, sizeof(pmap_pgtrace_page_t));
+ zfree(ZV_PMAP_PGTRACE, p);
}
// construct page infos with the given address range
int ret = 0;
pt_entry_t *ptep;
queue_head_t *q = &(pmap_pgtrace.pages);
- bool ints;
+ uint64_t ints;
vm_map_offset_t cur_page, end_page;
if (start > end) {
// keep lock orders in pmap, kernel_pmap and pgtrace lock
if (pmap != NULL) {
- PMAP_LOCK(pmap);
+ pmap_lock_ro(pmap);
}
if (pmap != kernel_pmap) {
- PMAP_LOCK(kernel_pmap);
+ pmap_lock_ro(kernel_pmap);
}
// addresses are physical if pmap is null
// unlock locks
PMAP_PGTRACE_UNLOCK(&ints);
if (pmap != kernel_pmap) {
- PMAP_UNLOCK(kernel_pmap);
+ pmap_unlock_ro(kernel_pmap);
}
if (pmap != NULL) {
- PMAP_UNLOCK(pmap);
+ pmap_unlock_ro(pmap);
}
// now clone it
pmap_pgtrace_delete_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end)
{
int ret = 0;
- bool ints;
+ uint64_t ints;
queue_head_t *q = &(pmap_pgtrace.pages);
pmap_pgtrace_page_t *p;
vm_map_offset_t cur_page, end_page;
if (pmap == NULL) {
pa_page = cur_page;
} else {
- PMAP_LOCK(pmap);
+ pmap_lock(pmap);
ptep = pmap_pte(pmap, cur_page);
if (ptep == NULL) {
- PMAP_UNLOCK(pmap);
+ pmap_unlock(pmap);
goto cont;
}
pa_page = pte_to_pa(*ptep);
- PMAP_UNLOCK(pmap);
+ pmap_unlock(pmap);
}
// remove all clones and validate
pt_entry_t *ptep;
pgtrace_run_result_t res;
pmap_pgtrace_page_t *p;
- bool ints, found = false;
+ uint64_t ints, found = false;
pmap_paddr_t pa;
// Quick check if we are interested
}
#endif
+/**
+ * The minimum shared region nesting size is used by the VM to determine when to
+ * break up large mappings to nested regions. The smallest size that these
+ * mappings can be broken into is determined by what page table level those
+ * regions are being nested in at and the size of the page tables.
+ *
+ * For instance, if a nested region is nesting at L2 for a process utilizing
+ * 16KB page tables, then the minimum nesting size would be 32MB (size of an L2
+ * block entry).
+ *
+ * @param pmap The target pmap to determine the block size based on whether it's
+ * using 16KB or 4KB page tables.
+ */
+uint64_t
+pmap_shared_region_size_min(__unused pmap_t pmap)
+{
+#if (__ARM_VMSA__ > 7)
+ const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
+
+ /**
+ * We always nest the shared region at L2 (32MB for 16KB pages, 2MB for
+ * 4KB pages). This means that a target pmap will contain L2 entries that
+ * point to shared L3 page tables in the shared region pmap.
+ */
+ return pt_attr_twig_size(pt_attr);
+
+#else
+ return ARM_NESTING_SIZE_MIN;
+#endif
+}
+
+/**
+ * The concept of a nesting size maximum was made to accomodate restrictions in
+ * place for nesting regions on PowerPC. There are no restrictions to max nesting
+ * sizes on x86/armv7/armv8 and this should get removed.
+ *
+ * TODO: <rdar://problem/65247502> Completely remove pmap_nesting_size_max()
+ */
+uint64_t
+pmap_nesting_size_max(__unused pmap_t pmap)
+{
+ return ARM_NESTING_SIZE_MAX;
+}
+
boolean_t
pmap_enforces_execute_only(
#if (__ARM_VMSA__ == 7)
#endif
}
+MARK_AS_PMAP_TEXT void
+pmap_set_vm_map_cs_enforced_internal(
+ pmap_t pmap,
+ bool new_value)
+{
+ VALIDATE_PMAP(pmap);
+ pmap->pmap_vm_map_cs_enforced = new_value;
+}
+
+void
+pmap_set_vm_map_cs_enforced(
+ pmap_t pmap,
+ bool new_value)
+{
+#if XNU_MONITOR
+ pmap_set_vm_map_cs_enforced_ppl(pmap, new_value);
+#else
+ pmap_set_vm_map_cs_enforced_internal(pmap, new_value);
+#endif
+}
+
+extern int cs_process_enforcement_enable;
+bool
+pmap_get_vm_map_cs_enforced(
+ pmap_t pmap)
+{
+ if (cs_process_enforcement_enable) {
+ return true;
+ }
+ return pmap->pmap_vm_map_cs_enforced;
+}
+
MARK_AS_PMAP_TEXT void
pmap_set_jit_entitled_internal(
__unused pmap_t pmap)
pmap_set_jit_entitled(
pmap_t pmap)
{
+#if XNU_MONITOR
+ pmap_set_jit_entitled_ppl(pmap);
+#else
pmap_set_jit_entitled_internal(pmap);
+#endif
+}
+
+bool
+pmap_get_jit_entitled(
+ __unused pmap_t pmap)
+{
+ return false;
}
MARK_AS_PMAP_TEXT static kern_return_t
disp = 0;
VALIDATE_PMAP(pmap);
- PMAP_LOCK(pmap);
+ pmap_lock_ro(pmap);
pte = pmap_pte(pmap, va);
if (pte == PT_ENTRY_NULL) {
goto done;
}
- pa = pte_to_pa(*pte);
+ pa = pte_to_pa(*((volatile pt_entry_t*)pte));
if (pa == 0) {
if (ARM_PTE_IS_COMPRESSED(*pte, pte)) {
disp |= PMAP_QUERY_PAGE_COMPRESSED;
if (!pa_valid(pa)) {
goto done;
}
- LOCK_PVH(pai);
- pv_h = pai_to_pvh(pai);
- pve_p = PV_ENTRY_NULL;
- if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
- pve_p = pvh_list(pv_h);
- while (pve_p != PV_ENTRY_NULL &&
- pve_get_ptep(pve_p) != pte) {
- pve_p = PVE_NEXT_PTR(pve_next(pve_p));
+ LOCK_PVH(pai);
+ pv_h = pai_to_pvh(pai);
+ pve_p = PV_ENTRY_NULL;
+ if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
+ pve_p = pvh_list(pv_h);
+ while (pve_p != PV_ENTRY_NULL &&
+ pve_get_ptep(pve_p) != pte) {
+ pve_p = PVE_NEXT_PTR(pve_next(pve_p));
+ }
+ }
+ if (IS_ALTACCT_PAGE(pai, pve_p)) {
+ disp |= PMAP_QUERY_PAGE_ALTACCT;
+ } else if (IS_REUSABLE_PAGE(pai)) {
+ disp |= PMAP_QUERY_PAGE_REUSABLE;
+ } else if (IS_INTERNAL_PAGE(pai)) {
+ disp |= PMAP_QUERY_PAGE_INTERNAL;
+ }
+ UNLOCK_PVH(pai);
+ }
+
+done:
+ pmap_unlock_ro(pmap);
+ pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
+ *disp_p = disp;
+ pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
+ return KERN_SUCCESS;
+}
+
+kern_return_t
+pmap_query_page_info(
+ pmap_t pmap,
+ vm_map_offset_t va,
+ int *disp_p)
+{
+#if XNU_MONITOR
+ return pmap_query_page_info_ppl(pmap, va, disp_p);
+#else
+ return pmap_query_page_info_internal(pmap, va, disp_p);
+#endif
+}
+
+MARK_AS_PMAP_TEXT kern_return_t
+pmap_return_internal(__unused boolean_t do_panic, __unused boolean_t do_recurse)
+{
+
+ return KERN_SUCCESS;
+}
+
+kern_return_t
+pmap_return(boolean_t do_panic, boolean_t do_recurse)
+{
+#if XNU_MONITOR
+ return pmap_return_ppl(do_panic, do_recurse);
+#else
+ return pmap_return_internal(do_panic, do_recurse);
+#endif
+}
+
+
+
+
+kern_return_t
+pmap_load_legacy_trust_cache(struct pmap_legacy_trust_cache __unused *trust_cache,
+ const vm_size_t __unused trust_cache_len)
+{
+ // Unsupported
+ return KERN_NOT_SUPPORTED;
+}
+
+pmap_tc_ret_t
+pmap_load_image4_trust_cache(struct pmap_image4_trust_cache __unused *trust_cache,
+ const vm_size_t __unused trust_cache_len,
+ uint8_t const * __unused img4_manifest,
+ const vm_size_t __unused img4_manifest_buffer_len,
+ const vm_size_t __unused img4_manifest_actual_len,
+ bool __unused dry_run)
+{
+ // Unsupported
+ return PMAP_TC_UNKNOWN_FORMAT;
+}
+
+bool
+pmap_in_ppl(void)
+{
+ // Unsupported
+ return false;
+}
+
+void
+pmap_lockdown_image4_slab(__unused vm_offset_t slab, __unused vm_size_t slab_len, __unused uint64_t flags)
+{
+ // Unsupported
+}
+
+void *
+pmap_claim_reserved_ppl_page(void)
+{
+ // Unsupported
+ return NULL;
+}
+
+void
+pmap_free_reserved_ppl_page(void __unused *kva)
+{
+ // Unsupported
+}
+
+
+MARK_AS_PMAP_TEXT static bool
+pmap_is_trust_cache_loaded_internal(const uuid_t uuid)
+{
+ bool found = false;
+
+ pmap_simple_lock(&pmap_loaded_trust_caches_lock);
+
+ for (struct pmap_image4_trust_cache const *c = pmap_image4_trust_caches; c != NULL; c = c->next) {
+ if (bcmp(uuid, c->module->uuid, sizeof(uuid_t)) == 0) {
+ found = true;
+ goto done;
+ }
+ }
+
+#ifdef PLATFORM_BridgeOS
+ for (struct pmap_legacy_trust_cache const *c = pmap_legacy_trust_caches; c != NULL; c = c->next) {
+ if (bcmp(uuid, c->uuid, sizeof(uuid_t)) == 0) {
+ found = true;
+ goto done;
+ }
+ }
+#endif
+
+done:
+ pmap_simple_unlock(&pmap_loaded_trust_caches_lock);
+ return found;
+}
+
+bool
+pmap_is_trust_cache_loaded(const uuid_t uuid)
+{
+#if XNU_MONITOR
+ return pmap_is_trust_cache_loaded_ppl(uuid);
+#else
+ return pmap_is_trust_cache_loaded_internal(uuid);
+#endif
+}
+
+MARK_AS_PMAP_TEXT static bool
+pmap_lookup_in_loaded_trust_caches_internal(const uint8_t cdhash[CS_CDHASH_LEN])
+{
+ struct pmap_image4_trust_cache const *cache = NULL;
+#ifdef PLATFORM_BridgeOS
+ struct pmap_legacy_trust_cache const *legacy = NULL;
+#endif
+
+ pmap_simple_lock(&pmap_loaded_trust_caches_lock);
+
+ for (cache = pmap_image4_trust_caches; cache != NULL; cache = cache->next) {
+ uint8_t hash_type = 0, flags = 0;
+
+ if (lookup_in_trust_cache_module(cache->module, cdhash, &hash_type, &flags)) {
+ goto done;
+ }
+ }
+
+#ifdef PLATFORM_BridgeOS
+ for (legacy = pmap_legacy_trust_caches; legacy != NULL; legacy = legacy->next) {
+ for (uint32_t i = 0; i < legacy->num_hashes; i++) {
+ if (bcmp(legacy->hashes[i], cdhash, CS_CDHASH_LEN) == 0) {
+ goto done;
}
}
- if (IS_ALTACCT_PAGE(pai, pve_p)) {
- disp |= PMAP_QUERY_PAGE_ALTACCT;
- } else if (IS_REUSABLE_PAGE(pai)) {
- disp |= PMAP_QUERY_PAGE_REUSABLE;
- } else if (IS_INTERNAL_PAGE(pai)) {
- disp |= PMAP_QUERY_PAGE_INTERNAL;
- }
- UNLOCK_PVH(pai);
}
+#endif
done:
- PMAP_UNLOCK(pmap);
- pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
- *disp_p = disp;
- pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
- return KERN_SUCCESS;
+ pmap_simple_unlock(&pmap_loaded_trust_caches_lock);
+
+ if (cache != NULL) {
+ return true;
+#ifdef PLATFORM_BridgeOS
+ } else if (legacy != NULL) {
+ return true;
+#endif
+ }
+
+ return false;
}
-kern_return_t
-pmap_query_page_info(
- pmap_t pmap,
- vm_map_offset_t va,
- int *disp_p)
+bool
+pmap_lookup_in_loaded_trust_caches(const uint8_t cdhash[CS_CDHASH_LEN])
{
- return pmap_query_page_info_internal(pmap, va, disp_p);
+#if XNU_MONITOR
+ return pmap_lookup_in_loaded_trust_caches_ppl(cdhash);
+#else
+ return pmap_lookup_in_loaded_trust_caches_internal(cdhash);
+#endif
}
-MARK_AS_PMAP_TEXT kern_return_t
-pmap_return_internal(__unused boolean_t do_panic, __unused boolean_t do_recurse)
+MARK_AS_PMAP_TEXT static uint32_t
+pmap_lookup_in_static_trust_cache_internal(const uint8_t cdhash[CS_CDHASH_LEN])
{
+ // Awkward indirection, because the PPL macros currently force their functions to be static.
+ return lookup_in_static_trust_cache(cdhash);
+}
- return KERN_SUCCESS;
+uint32_t
+pmap_lookup_in_static_trust_cache(const uint8_t cdhash[CS_CDHASH_LEN])
+{
+#if XNU_MONITOR
+ return pmap_lookup_in_static_trust_cache_ppl(cdhash);
+#else
+ return pmap_lookup_in_static_trust_cache_internal(cdhash);
+#endif
}
-kern_return_t
-pmap_return(boolean_t do_panic, boolean_t do_recurse)
+MARK_AS_PMAP_TEXT static void
+pmap_set_compilation_service_cdhash_internal(const uint8_t cdhash[CS_CDHASH_LEN])
{
- return pmap_return_internal(do_panic, do_recurse);
+ pmap_simple_lock(&pmap_compilation_service_cdhash_lock);
+ memcpy(pmap_compilation_service_cdhash, cdhash, CS_CDHASH_LEN);
+ pmap_simple_unlock(&pmap_compilation_service_cdhash_lock);
+
+ pmap_cs_log("Added Compilation Service CDHash through the PPL: 0x%02X 0x%02X 0x%02X 0x%02X", cdhash[0], cdhash[1], cdhash[2], cdhash[4]);
}
+MARK_AS_PMAP_TEXT static bool
+pmap_match_compilation_service_cdhash_internal(const uint8_t cdhash[CS_CDHASH_LEN])
+{
+ bool match = false;
+
+ pmap_simple_lock(&pmap_compilation_service_cdhash_lock);
+ if (bcmp(pmap_compilation_service_cdhash, cdhash, CS_CDHASH_LEN) == 0) {
+ match = true;
+ }
+ pmap_simple_unlock(&pmap_compilation_service_cdhash_lock);
+
+ if (match) {
+ pmap_cs_log("Matched Compilation Service CDHash through the PPL");
+ }
+
+ return match;
+}
+void
+pmap_set_compilation_service_cdhash(const uint8_t cdhash[CS_CDHASH_LEN])
+{
+#if XNU_MONITOR
+ pmap_set_compilation_service_cdhash_ppl(cdhash);
+#else
+ pmap_set_compilation_service_cdhash_internal(cdhash);
+#endif
+}
+bool
+pmap_match_compilation_service_cdhash(const uint8_t cdhash[CS_CDHASH_LEN])
+{
+#if XNU_MONITOR
+ return pmap_match_compilation_service_cdhash_ppl(cdhash);
+#else
+ return pmap_match_compilation_service_cdhash_internal(cdhash);
+#endif
+}
MARK_AS_PMAP_TEXT static void
pmap_footprint_suspend_internal(
vm_map_t map,
boolean_t suspend)
{
+#if XNU_MONITOR
+ pmap_footprint_suspend_ppl(map, suspend);
+#else
pmap_footprint_suspend_internal(map, suspend);
+#endif
}
#if defined(__arm64__) && (DEVELOPMENT || DEBUG)
uint64_t end_va;
};
-static size_t
+static kern_return_t
pmap_dump_page_tables_recurse(pmap_t pmap,
const tt_entry_t *ttp,
unsigned int cur_level,
+ unsigned int level_mask,
uint64_t start_va,
- void *bufp,
- void *buf_end)
+ void *buf_start,
+ void *buf_end,
+ size_t *bytes_copied)
{
- size_t bytes_used = 0;
- uint64_t num_entries = ARM_PGBYTES / sizeof(*ttp);
const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
+ uint64_t num_entries = pt_attr_page_size(pt_attr) / sizeof(*ttp);
uint64_t size = pt_attr->pta_level_info[cur_level].size;
uint64_t valid_mask = pt_attr->pta_level_info[cur_level].valid_mask;
uint64_t type_mask = pt_attr->pta_level_info[cur_level].type_mask;
uint64_t type_block = pt_attr->pta_level_info[cur_level].type_block;
- if (cur_level == arm64_root_pgtable_level) {
- num_entries = arm64_root_pgtable_num_ttes;
+ void *bufp = (uint8_t*)buf_start + *bytes_copied;
+
+ if (cur_level == pt_attr_root_level(pt_attr)) {
+ num_entries = pmap_root_alloc_size(pmap) / sizeof(tt_entry_t);
}
uint64_t tt_size = num_entries * sizeof(tt_entry_t);
const tt_entry_t *tt_end = &ttp[num_entries];
if (((vm_offset_t)buf_end - (vm_offset_t)bufp) < (tt_size + sizeof(struct page_table_dump_header))) {
- return 0;
+ return KERN_INSUFFICIENT_BUFFER_SIZE;
}
- struct page_table_dump_header *header = (struct page_table_dump_header*)bufp;
- header->pa = ml_static_vtop((vm_offset_t)ttp);
- header->num_entries = num_entries;
- header->start_va = start_va;
- header->end_va = start_va + (num_entries * size);
+ if (level_mask & (1U << cur_level)) {
+ struct page_table_dump_header *header = (struct page_table_dump_header*)bufp;
+ header->pa = ml_static_vtop((vm_offset_t)ttp);
+ header->num_entries = num_entries;
+ header->start_va = start_va;
+ header->end_va = start_va + (num_entries * size);
- bcopy(ttp, (uint8_t*)bufp + sizeof(*header), tt_size);
- bytes_used += (sizeof(*header) + tt_size);
+ bcopy(ttp, (uint8_t*)bufp + sizeof(*header), tt_size);
+ *bytes_copied = *bytes_copied + sizeof(*header) + tt_size;
+ }
uint64_t current_va = start_va;
for (const tt_entry_t *ttep = ttp; ttep < tt_end; ttep++, current_va += size) {
if ((tte & type_mask) == type_block) {
continue;
} else {
- if (cur_level >= PMAP_TT_MAX_LEVEL) {
+ if (cur_level >= pt_attr_leaf_level(pt_attr)) {
panic("%s: corrupt entry %#llx at %p, "
"ttp=%p, cur_level=%u, bufp=%p, buf_end=%p",
__FUNCTION__, tte, ttep,
const tt_entry_t *next_tt = (const tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
- size_t recurse_result = pmap_dump_page_tables_recurse(pmap, next_tt, cur_level + 1, current_va, (uint8_t*)bufp + bytes_used, buf_end);
+ kern_return_t recurse_result = pmap_dump_page_tables_recurse(pmap, next_tt, cur_level + 1,
+ level_mask, current_va, buf_start, buf_end, bytes_copied);
- if (recurse_result == 0) {
- return 0;
+ if (recurse_result != KERN_SUCCESS) {
+ return recurse_result;
}
-
- bytes_used += recurse_result;
}
}
- return bytes_used;
+ return KERN_SUCCESS;
}
-size_t
-pmap_dump_page_tables(pmap_t pmap, void *bufp, void *buf_end)
+kern_return_t
+pmap_dump_page_tables(pmap_t pmap, void *bufp, void *buf_end, unsigned int level_mask, size_t *bytes_copied)
{
if (not_in_kdp) {
panic("pmap_dump_page_tables must only be called from kernel debugger context");
}
- return pmap_dump_page_tables_recurse(pmap, pmap->tte, arm64_root_pgtable_level, pmap->min, bufp, buf_end);
+ return pmap_dump_page_tables_recurse(pmap, pmap->tte, pt_attr_root_level(pmap_get_pt_attr(pmap)),
+ level_mask, pmap->min, bufp, buf_end, bytes_copied);
}
#else /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
-size_t
-pmap_dump_page_tables(pmap_t pmap __unused, void *bufp __unused, void *buf_end __unused)
+kern_return_t
+pmap_dump_page_tables(pmap_t pmap __unused, void *bufp __unused, void *buf_end __unused,
+ unsigned int level_mask __unused, size_t *bytes_copied __unused)
{
- return (size_t)-1;
+ return KERN_NOT_SUPPORTED;
}
-
#endif /* !defined(__arm64__) */
+
+
+#ifdef CONFIG_XNUPOST
+#ifdef __arm64__
+static volatile bool pmap_test_took_fault = false;
+
+static bool
+pmap_test_fault_handler(arm_saved_state_t * state)
+{
+ bool retval = false;
+ uint32_t esr = get_saved_state_esr(state);
+ esr_exception_class_t class = ESR_EC(esr);
+ fault_status_t fsc = ISS_IA_FSC(ESR_ISS(esr));
+
+ if ((class == ESR_EC_DABORT_EL1) &&
+ ((fsc == FSC_PERMISSION_FAULT_L3) || (fsc == FSC_ACCESS_FLAG_FAULT_L3))) {
+ pmap_test_took_fault = true;
+ /* return to the instruction immediately after the call to NX page */
+ set_saved_state_pc(state, get_saved_state_pc(state) + 4);
+ retval = true;
+ }
+
+ return retval;
+}
+
+static bool
+pmap_test_access(pmap_t pmap, vm_map_address_t va, bool should_fault, bool is_write)
+{
+ /*
+ * We're switching pmaps without using the normal thread mechanism;
+ * disable interrupts and preemption to avoid any unexpected memory
+ * accesses.
+ */
+ uint64_t old_int_state = pmap_interrupts_disable();
+ pmap_t old_pmap = current_pmap();
+ mp_disable_preemption();
+ pmap_switch(pmap);
+
+ pmap_test_took_fault = false;
+
+ /* Disable PAN; pmap shouldn't be the kernel pmap. */
+#if __ARM_PAN_AVAILABLE__
+ __builtin_arm_wsr("pan", 0);
+#endif /* __ARM_PAN_AVAILABLE__ */
+ ml_expect_fault_begin(pmap_test_fault_handler, va);
+
+ if (is_write) {
+ *((volatile uint64_t*)(va)) = 0xdec0de;
+ } else {
+ volatile uint64_t tmp = *((volatile uint64_t*)(va));
+ (void)tmp;
+ }
+
+ /* Save the fault bool, and undo the gross stuff we did. */
+ bool took_fault = pmap_test_took_fault;
+ ml_expect_fault_end();
+#if __ARM_PAN_AVAILABLE__
+ __builtin_arm_wsr("pan", 1);
+#endif /* __ARM_PAN_AVAILABLE__ */
+
+ pmap_switch(old_pmap);
+ mp_enable_preemption();
+ pmap_interrupts_restore(old_int_state);
+ bool retval = (took_fault == should_fault);
+ return retval;
+}
+
+static bool
+pmap_test_read(pmap_t pmap, vm_map_address_t va, bool should_fault)
+{
+ bool retval = pmap_test_access(pmap, va, should_fault, false);
+
+ if (!retval) {
+ T_FAIL("%s: %s, "
+ "pmap=%p, va=%p, should_fault=%u",
+ __func__, should_fault ? "did not fault" : "faulted",
+ pmap, (void*)va, (unsigned)should_fault);
+ }
+
+ return retval;
+}
+
+static bool
+pmap_test_write(pmap_t pmap, vm_map_address_t va, bool should_fault)
+{
+ bool retval = pmap_test_access(pmap, va, should_fault, true);
+
+ if (!retval) {
+ T_FAIL("%s: %s, "
+ "pmap=%p, va=%p, should_fault=%u",
+ __func__, should_fault ? "did not fault" : "faulted",
+ pmap, (void*)va, (unsigned)should_fault);
+ }
+
+ return retval;
+}
+
+static bool
+pmap_test_check_refmod(pmap_paddr_t pa, unsigned int should_be_set)
+{
+ unsigned int should_be_clear = (~should_be_set) & (VM_MEM_REFERENCED | VM_MEM_MODIFIED);
+ unsigned int bits = pmap_get_refmod((ppnum_t)atop(pa));
+
+ bool retval = (((bits & should_be_set) == should_be_set) && ((bits & should_be_clear) == 0));
+
+ if (!retval) {
+ T_FAIL("%s: bits=%u, "
+ "pa=%p, should_be_set=%u",
+ __func__, bits,
+ (void*)pa, should_be_set);
+ }
+
+ return retval;
+}
+
+static __attribute__((noinline)) bool
+pmap_test_read_write(pmap_t pmap, vm_map_address_t va, bool allow_read, bool allow_write)
+{
+ bool retval = (pmap_test_read(pmap, va, !allow_read) | pmap_test_write(pmap, va, !allow_write));
+ return retval;
+}
+
+static int
+pmap_test_test_config(unsigned int flags)
+{
+ T_LOG("running pmap_test_test_config flags=0x%X", flags);
+ unsigned int map_count = 0;
+ unsigned long page_ratio = 0;
+ pmap_t pmap = pmap_create_options(NULL, 0, flags);
+
+ if (!pmap) {
+ panic("Failed to allocate pmap");
+ }
+
+ __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
+ uintptr_t native_page_size = pt_attr_page_size(native_pt_attr);
+ uintptr_t pmap_page_size = pt_attr_page_size(pt_attr);
+ uintptr_t pmap_twig_size = pt_attr_twig_size(pt_attr);
+
+ if (pmap_page_size <= native_page_size) {
+ page_ratio = native_page_size / pmap_page_size;
+ } else {
+ /*
+ * We claim to support a page_ratio of less than 1, which is
+ * not currently supported by the pmap layer; panic.
+ */
+ panic("%s: page_ratio < 1, native_page_size=%lu, pmap_page_size=%lu"
+ "flags=%u",
+ __func__, native_page_size, pmap_page_size,
+ flags);
+ }
+
+ if (PAGE_RATIO > 1) {
+ /*
+ * The kernel is deliberately pretending to have 16KB pages.
+ * The pmap layer has code that supports this, so pretend the
+ * page size is larger than it is.
+ */
+ pmap_page_size = PAGE_SIZE;
+ native_page_size = PAGE_SIZE;
+ }
+
+ /*
+ * Get two pages from the VM; one to be mapped wired, and one to be
+ * mapped nonwired.
+ */
+ vm_page_t unwired_vm_page = vm_page_grab();
+ vm_page_t wired_vm_page = vm_page_grab();
+
+ if ((unwired_vm_page == VM_PAGE_NULL) || (wired_vm_page == VM_PAGE_NULL)) {
+ panic("Failed to grab VM pages");
+ }
+
+ ppnum_t pn = VM_PAGE_GET_PHYS_PAGE(unwired_vm_page);
+ ppnum_t wired_pn = VM_PAGE_GET_PHYS_PAGE(wired_vm_page);
+
+ pmap_paddr_t pa = ptoa(pn);
+ pmap_paddr_t wired_pa = ptoa(wired_pn);
+
+ /*
+ * We'll start mappings at the second twig TT. This keeps us from only
+ * using the first entry in each TT, which would trivially be address
+ * 0; one of the things we will need to test is retrieving the VA for
+ * a given PTE.
+ */
+ vm_map_address_t va_base = pmap_twig_size;
+ vm_map_address_t wired_va_base = ((2 * pmap_twig_size) - pmap_page_size);
+
+ if (wired_va_base < (va_base + (page_ratio * pmap_page_size))) {
+ /*
+ * Not exactly a functional failure, but this test relies on
+ * there being a spare PTE slot we can use to pin the TT.
+ */
+ panic("Cannot pin translation table");
+ }
+
+ /*
+ * Create the wired mapping; this will prevent the pmap layer from
+ * reclaiming our test TTs, which would interfere with this test
+ * ("interfere" -> "make it panic").
+ */
+ pmap_enter_addr(pmap, wired_va_base, wired_pa, VM_PROT_READ, VM_PROT_READ, 0, true);
+
+ /*
+ * Create read-only mappings of the nonwired page; if the pmap does
+ * not use the same page size as the kernel, create multiple mappings
+ * so that the kernel page is fully mapped.
+ */
+ for (map_count = 0; map_count < page_ratio; map_count++) {
+ pmap_enter_addr(pmap, va_base + (pmap_page_size * map_count), pa + (pmap_page_size * (map_count)), VM_PROT_READ, VM_PROT_READ, 0, false);
+ }
+
+ /* Validate that all the PTEs have the expected PA and VA. */
+ for (map_count = 0; map_count < page_ratio; map_count++) {
+ pt_entry_t * ptep = pmap_pte(pmap, va_base + (pmap_page_size * map_count));
+
+ if (pte_to_pa(*ptep) != (pa + (pmap_page_size * map_count))) {
+ T_FAIL("Unexpected pa=%p, expected %p, map_count=%u",
+ (void*)pte_to_pa(*ptep), (void*)(pa + (pmap_page_size * map_count)), map_count);
+ }
+
+ if (ptep_get_va(ptep) != (va_base + (pmap_page_size * map_count))) {
+ T_FAIL("Unexpected va=%p, expected %p, map_count=%u",
+ (void*)ptep_get_va(ptep), (void*)(va_base + (pmap_page_size * map_count)), map_count);
+ }
+ }
+
+ T_LOG("Validate that reads to our mapping do not fault.");
+ pmap_test_read(pmap, va_base, false);
+
+ T_LOG("Validate that writes to our mapping fault.");
+ pmap_test_write(pmap, va_base, true);
+
+ T_LOG("Make the first mapping writable.");
+ pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0, false);
+
+ T_LOG("Validate that writes to our mapping do not fault.");
+ pmap_test_write(pmap, va_base, false);
+
+
+ T_LOG("Make the first mapping XO.");
+ pmap_enter_addr(pmap, va_base, pa, VM_PROT_EXECUTE, VM_PROT_EXECUTE, 0, false);
+
+ T_LOG("Validate that reads to our mapping do not fault.");
+ pmap_test_read(pmap, va_base, false);
+
+ T_LOG("Validate that writes to our mapping fault.");
+ pmap_test_write(pmap, va_base, true);
+
+
+ /*
+ * For page ratios of greater than 1: validate that writes to the other
+ * mappings still fault. Remove the mappings afterwards (we're done
+ * with page ratio testing).
+ */
+ for (map_count = 1; map_count < page_ratio; map_count++) {
+ pmap_test_write(pmap, va_base + (pmap_page_size * map_count), true);
+ pmap_remove(pmap, va_base + (pmap_page_size * map_count), va_base + (pmap_page_size * map_count) + pmap_page_size);
+ }
+
+ T_LOG("Mark the page unreferenced and unmodified.");
+ pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
+ pmap_test_check_refmod(pa, 0);
+
+ /*
+ * Begin testing the ref/mod state machine. Re-enter the mapping with
+ * different protection/fault_type settings, and confirm that the
+ * ref/mod state matches our expectations at each step.
+ */
+ T_LOG("!ref/!mod: read, no fault. Expect ref/!mod");
+ pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ, VM_PROT_NONE, 0, false);
+ pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
+
+ T_LOG("!ref/!mod: read, read fault. Expect ref/!mod");
+ pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
+ pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ, VM_PROT_READ, 0, false);
+ pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
+
+ T_LOG("!ref/!mod: rw, read fault. Expect ref/!mod");
+ pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
+ pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, false);
+ pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
+
+ T_LOG("ref/!mod: rw, read fault. Expect ref/!mod");
+ pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ, 0, false);
+ pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
+
+ T_LOG("!ref/!mod: rw, rw fault. Expect ref/mod");
+ pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
+ pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0, false);
+ pmap_test_check_refmod(pa, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
+
+ /*
+ * Shared memory testing; we'll have two mappings; one read-only,
+ * one read-write.
+ */
+ vm_map_address_t rw_base = va_base;
+ vm_map_address_t ro_base = va_base + pmap_page_size;
+
+ pmap_enter_addr(pmap, rw_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0, false);
+ pmap_enter_addr(pmap, ro_base, pa, VM_PROT_READ, VM_PROT_READ, 0, false);
+
+ /*
+ * Test that we take faults as expected for unreferenced/unmodified
+ * pages. Also test the arm_fast_fault interface, to ensure that
+ * mapping permissions change as expected.
+ */
+ T_LOG("!ref/!mod: expect no access");
+ pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
+ pmap_test_read_write(pmap, ro_base, false, false);
+ pmap_test_read_write(pmap, rw_base, false, false);
+
+ T_LOG("Read fault; expect !ref/!mod -> ref/!mod, read access");
+ arm_fast_fault(pmap, rw_base, VM_PROT_READ, false, false);
+ pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
+ pmap_test_read_write(pmap, ro_base, true, false);
+ pmap_test_read_write(pmap, rw_base, true, false);
+
+ T_LOG("Write fault; expect ref/!mod -> ref/mod, read and write access");
+ arm_fast_fault(pmap, rw_base, VM_PROT_READ | VM_PROT_WRITE, false, false);
+ pmap_test_check_refmod(pa, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
+ pmap_test_read_write(pmap, ro_base, true, false);
+ pmap_test_read_write(pmap, rw_base, true, true);
+
+ T_LOG("Write fault; expect !ref/!mod -> ref/mod, read and write access");
+ pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
+ arm_fast_fault(pmap, rw_base, VM_PROT_READ | VM_PROT_WRITE, false, false);
+ pmap_test_check_refmod(pa, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
+ pmap_test_read_write(pmap, ro_base, true, false);
+ pmap_test_read_write(pmap, rw_base, true, true);
+
+ T_LOG("RW protect both mappings; should not change protections.");
+ pmap_protect(pmap, ro_base, ro_base + pmap_page_size, VM_PROT_READ | VM_PROT_WRITE);
+ pmap_protect(pmap, rw_base, rw_base + pmap_page_size, VM_PROT_READ | VM_PROT_WRITE);
+ pmap_test_read_write(pmap, ro_base, true, false);
+ pmap_test_read_write(pmap, rw_base, true, true);
+
+ T_LOG("Read protect both mappings; RW mapping should become RO.");
+ pmap_protect(pmap, ro_base, ro_base + pmap_page_size, VM_PROT_READ);
+ pmap_protect(pmap, rw_base, rw_base + pmap_page_size, VM_PROT_READ);
+ pmap_test_read_write(pmap, ro_base, true, false);
+ pmap_test_read_write(pmap, rw_base, true, false);
+
+ T_LOG("RW protect the page; mappings should not change protections.");
+ pmap_enter_addr(pmap, rw_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0, false);
+ pmap_page_protect(pn, VM_PROT_ALL);
+ pmap_test_read_write(pmap, ro_base, true, false);
+ pmap_test_read_write(pmap, rw_base, true, true);
+
+ T_LOG("Read protect the page; RW mapping should become RO.");
+ pmap_page_protect(pn, VM_PROT_READ);
+ pmap_test_read_write(pmap, ro_base, true, false);
+ pmap_test_read_write(pmap, rw_base, true, false);
+
+ T_LOG("Validate that disconnect removes all known mappings of the page.");
+ pmap_disconnect(pn);
+ if (!pmap_verify_free(pn)) {
+ T_FAIL("Page still has mappings");
+ }
+
+ T_LOG("Remove the wired mapping, so we can tear down the test map.");
+ pmap_remove(pmap, wired_va_base, wired_va_base + pmap_page_size);
+ pmap_destroy(pmap);
+
+ T_LOG("Release the pages back to the VM.");
+ vm_page_lock_queues();
+ vm_page_free(unwired_vm_page);
+ vm_page_free(wired_vm_page);
+ vm_page_unlock_queues();
+
+ T_LOG("Testing successful!");
+ return 0;
+}
+#endif /* __arm64__ */
+
+kern_return_t
+pmap_test(void)
+{
+ T_LOG("Starting pmap_tests");
+#ifdef __arm64__
+ int flags = 0;
+ flags |= PMAP_CREATE_64BIT;
+
+#if __ARM_MIXED_PAGE_SIZE__
+ T_LOG("Testing VM_PAGE_SIZE_4KB");
+ pmap_test_test_config(flags | PMAP_CREATE_FORCE_4K_PAGES);
+ T_LOG("Testing VM_PAGE_SIZE_16KB");
+ pmap_test_test_config(flags);
+#else /* __ARM_MIXED_PAGE_SIZE__ */
+ pmap_test_test_config(flags);
+#endif /* __ARM_MIXED_PAGE_SIZE__ */
+
+#endif /* __arm64__ */
+ T_PASS("completed pmap_test successfully");
+ return KERN_SUCCESS;
+}
+#endif /* CONFIG_XNUPOST */
+
+/*
+ * The following function should never make it to RELEASE code, since
+ * it provides a way to get the PPL to modify text pages.
+ */
+#if DEVELOPMENT || DEBUG
+
+#define ARM_UNDEFINED_INSN 0xe7f000f0
+#define ARM_UNDEFINED_INSN_THUMB 0xde00
+
+/**
+ * Forcibly overwrite executable text with an illegal instruction.
+ *
+ * @note Only used for xnu unit testing.
+ *
+ * @param pa The physical address to corrupt.
+ *
+ * @return KERN_SUCCESS on success.
+ */
+kern_return_t
+pmap_test_text_corruption(pmap_paddr_t pa)
+{
+#if XNU_MONITOR
+ return pmap_test_text_corruption_ppl(pa);
+#else /* XNU_MONITOR */
+ return pmap_test_text_corruption_internal(pa);
+#endif /* XNU_MONITOR */
+}
+
+MARK_AS_PMAP_TEXT kern_return_t
+pmap_test_text_corruption_internal(pmap_paddr_t pa)
+{
+ vm_offset_t va = phystokv(pa);
+ unsigned int pai = pa_index(pa);
+
+ assert(pa_valid(pa));
+
+ LOCK_PVH(pai);
+
+ pv_entry_t **pv_h = pai_to_pvh(pai);
+ assert(!pvh_test_type(pv_h, PVH_TYPE_NULL));
+#if defined(PVH_FLAG_EXEC)
+ const bool need_ap_twiddle = pvh_get_flags(pv_h) & PVH_FLAG_EXEC;
+
+ if (need_ap_twiddle) {
+ pmap_set_ptov_ap(pai, AP_RWNA, FALSE);
+ }
+#endif /* defined(PVH_FLAG_EXEC) */
+
+ /*
+ * The low bit in an instruction address indicates a THUMB instruction
+ */
+ if (va & 1) {
+ va &= ~(vm_offset_t)1;
+ *(uint16_t *)va = ARM_UNDEFINED_INSN_THUMB;
+ } else {
+ *(uint32_t *)va = ARM_UNDEFINED_INSN;
+ }
+
+#if defined(PVH_FLAG_EXEC)
+ if (need_ap_twiddle) {
+ pmap_set_ptov_ap(pai, AP_RONA, FALSE);
+ }
+#endif /* defined(PVH_FLAG_EXEC) */
+
+ InvalidatePoU_IcacheRegion(va, sizeof(uint32_t));
+
+ UNLOCK_PVH(pai);
+
+ return KERN_SUCCESS;
+}
+
+#endif /* DEVELOPMENT || DEBUG */