X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/c6bf4f310a33a9262d455ea4d3f0630b1255e3fe..HEAD:/osfmk/arm64/machine_routines.c

diff --git a/osfmk/arm64/machine_routines.c b/osfmk/arm64/machine_routines.c
index 037f34c13..3b616122b 100644
--- a/osfmk/arm64/machine_routines.c
+++ b/osfmk/arm64/machine_routines.c
@@ -42,83 +42,133 @@
 #include <arm/cpu_capabilities.h>
 #include <console/serial_protos.h>
 #include <kern/machine.h>
+#include <kern/misc_protos.h>
 #include <prng/random.h>
 #include <kern/startup.h>
 #include <kern/thread.h>
 #include <kern/timer_queue.h>
 #include <mach/machine.h>
 #include <machine/atomic.h>
+#include <machine/config.h>
 #include <vm/pmap.h>
 #include <vm/vm_page.h>
+#include <vm/vm_shared_region.h>
+#include <vm/vm_map.h>
+#include <sys/codesign.h>
 #include <sys/kdebug.h>
 #include <kern/coalition.h>
 #include <pexpert/device_tree.h>
 
 #include <IOKit/IOPlatformExpert.h>
+#if HIBERNATION
+#include <IOKit/IOHibernatePrivate.h>
+#endif /* HIBERNATION */
 
 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
-#include <libkern/kernel_mach_header.h>
+#include <arm64/amcc_rorgn.h>
 #endif
 
 #include <libkern/section_keywords.h>
 
+/**
+ * On supported hardware, debuggable builds make the HID bits read-only
+ * without locking them.  This lets people manually modify HID bits while
+ * debugging, since they can use a debugging tool to first reset the HID
+ * bits back to read/write.  However it will still catch xnu changes that
+ * accidentally write to HID bits after they've been made read-only.
+ */
+#if HAS_TWO_STAGE_SPR_LOCK && !(DEVELOPMENT || DEBUG)
+#define USE_TWO_STAGE_SPR_LOCK
+#endif
+
 #if KPC
 #include <kern/kpc.h>
 #endif
 
+#define MPIDR_CPU_ID(mpidr_el1_val)             (((mpidr_el1_val) & MPIDR_AFF0_MASK) >> MPIDR_AFF0_SHIFT)
+#define MPIDR_CLUSTER_ID(mpidr_el1_val)         (((mpidr_el1_val) & MPIDR_AFF1_MASK) >> MPIDR_AFF1_SHIFT)
+
 #if HAS_CLUSTER
 static uint8_t cluster_initialized = 0;
 #endif
 
-
-static int max_cpus_initialized = 0;
-#define MAX_CPUS_SET    0x1
-#define MAX_CPUS_WAIT   0x2
-
 uint32_t LockTimeOut;
 uint32_t LockTimeOutUsec;
 uint64_t TLockTimeOut;
 uint64_t MutexSpin;
-boolean_t is_clock_configured = FALSE;
+uint64_t low_MutexSpin;
+int64_t high_MutexSpin;
 
-uint32_t yield_delay_us = 0; /* Must be less than cpu_idle_latency to ensure ml_delay_should_spin is true */
+static uint64_t ml_wfe_hint_max_interval;
+#define MAX_WFE_HINT_INTERVAL_US (500ULL)
 
-#if CONFIG_NONFATAL_ASSERTS
-extern int mach_assert;
-#endif
-extern volatile uint32_t debug_enabled;
+/* Must be less than cpu_idle_latency to ensure ml_delay_should_spin is true */
+TUNABLE(uint32_t, yield_delay_us, "yield_delay_us", 0);
 
 extern vm_offset_t   segLOWEST;
 extern vm_offset_t   segLOWESTTEXT;
 extern vm_offset_t   segLASTB;
 extern unsigned long segSizeLAST;
 
+/* ARM64 specific bounds; used to test for presence in the kernelcache. */
+extern vm_offset_t   vm_kernelcache_base;
+extern vm_offset_t   vm_kernelcache_top;
+
 #if defined(HAS_IPI)
 unsigned int gFastIPI = 1;
 #define kDeferredIPITimerDefault (64 * NSEC_PER_USEC) /* in nanoseconds */
-static uint64_t deferred_ipi_timer_ns = kDeferredIPITimerDefault;
+static TUNABLE_WRITEABLE(uint64_t, deferred_ipi_timer_ns, "fastipitimeout",
+    kDeferredIPITimerDefault);
 #endif /* defined(HAS_IPI) */
 
-void machine_conf(void);
-
 thread_t Idle_context(void);
 
-SECURITY_READ_ONLY_LATE(static uint32_t) cpu_phys_ids[MAX_CPUS] = {[0 ... MAX_CPUS - 1] = (uint32_t)-1};
-SECURITY_READ_ONLY_LATE(static unsigned int) avail_cpus = 0;
-SECURITY_READ_ONLY_LATE(static int) boot_cpu = -1;
-SECURITY_READ_ONLY_LATE(static int) max_cpu_number = 0;
-SECURITY_READ_ONLY_LATE(cluster_type_t) boot_cluster = CLUSTER_TYPE_SMP;
+SECURITY_READ_ONLY_LATE(static ml_topology_cpu_t) topology_cpu_array[MAX_CPUS];
+SECURITY_READ_ONLY_LATE(static ml_topology_cluster_t) topology_cluster_array[MAX_CPU_CLUSTERS];
+SECURITY_READ_ONLY_LATE(static ml_topology_info_t) topology_info = {
+	.version = CPU_TOPOLOGY_VERSION,
+	.cpus = topology_cpu_array,
+	.clusters = topology_cluster_array,
+};
+/**
+ * Represents the offset of each cluster within a hypothetical array of MAX_CPUS
+ * entries of an arbitrary data type.  This is intended for use by specialized consumers
+ * that must quickly access per-CPU data using only the physical CPU ID (MPIDR_EL1),
+ * as follows:
+ *	hypothetical_array[cluster_offsets[AFF1] + AFF0]
+ * Most consumers should instead use general-purpose facilities such as PERCPU or
+ * ml_get_cpu_number().
+ */
+SECURITY_READ_ONLY_LATE(int64_t) cluster_offsets[MAX_CPU_CLUSTER_PHY_ID + 1];
+
+SECURITY_READ_ONLY_LATE(static uint32_t) arm64_eventi = UINT32_MAX;
 
-SECURITY_READ_ONLY_LATE(static uint32_t) fiq_eventi = UINT32_MAX;
+extern uint32_t lockdown_done;
 
-lockdown_handler_t lockdown_handler;
-void *lockdown_this;
-lck_mtx_t lockdown_handler_lck;
-lck_grp_t *lockdown_handler_grp;
-int lockdown_done;
+/**
+ * Represents regions of virtual address space that should be reserved
+ * (pre-mapped) in each user address space.
+ */
+SECURITY_READ_ONLY_LATE(static struct vm_reserved_region) vm_reserved_regions[] = {
+	{
+		.vmrr_name = "GPU Carveout",
+		.vmrr_addr = MACH_VM_MIN_GPU_CARVEOUT_ADDRESS,
+		.vmrr_size = (vm_map_size_t)(MACH_VM_MAX_GPU_CARVEOUT_ADDRESS - MACH_VM_MIN_GPU_CARVEOUT_ADDRESS)
+	},
+	/*
+	 * Reserve the virtual memory space representing the commpage nesting region
+	 * to prevent user processes from allocating memory within it. The actual
+	 * page table entries for the commpage are inserted by vm_commpage_enter().
+	 * This vm_map_enter() just prevents userspace from allocating/deallocating
+	 * anything within the entire commpage nested region.
+	 */
+	{
+		.vmrr_name = "commpage nesting",
+		.vmrr_addr = _COMM_PAGE64_NESTING_START,
+		.vmrr_size = _COMM_PAGE64_NESTING_SIZE
+	}
+};
 
-void ml_lockdown_init(void);
-void ml_lockdown_run_handler(void);
 uint32_t get_arm_cpu_version(void);
 
 #if defined(HAS_IPI)
@@ -132,17 +182,17 @@ ml_cpu_signal_type(unsigned int cpu_mpidr, uint32_t type)
 	 * to a single CPU.  Otherwise we may migrate between choosing which
 	 * IPI mechanism to use and issuing the IPI. */
 	MRS(local_mpidr, "MPIDR_EL1");
-	if ((local_mpidr & MPIDR_AFF1_MASK) == (cpu_mpidr & MPIDR_AFF1_MASK)) {
-		uint64_t x = type | (cpu_mpidr & MPIDR_AFF0_MASK);
-		MSR(ARM64_REG_IPI_RR_LOCAL, x);
+	if (MPIDR_CLUSTER_ID(local_mpidr) == MPIDR_CLUSTER_ID(cpu_mpidr)) {
+		uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr);
+		MSR("S3_5_C15_C0_0", x);
 	} else {
 		#define IPI_RR_TARGET_CLUSTER_SHIFT 16
-		uint64_t x = type | ((cpu_mpidr & MPIDR_AFF1_MASK) << (IPI_RR_TARGET_CLUSTER_SHIFT - MPIDR_AFF1_SHIFT)) | (cpu_mpidr & MPIDR_AFF0_MASK);
-		MSR(ARM64_REG_IPI_RR_GLOBAL, x);
+		uint64_t x = type | (MPIDR_CLUSTER_ID(cpu_mpidr) << IPI_RR_TARGET_CLUSTER_SHIFT) | MPIDR_CPU_ID(cpu_mpidr);
+		MSR("S3_5_C15_C0_1", x);
 	}
 #else
-	uint64_t x = type | (cpu_mpidr & MPIDR_AFF0_MASK);
-	MSR(ARM64_REG_IPI_RR, x);
+	uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr);
+	MSR("S3_5_C15_C0_1", x);
 #endif
 }
 #endif
@@ -186,7 +236,7 @@ ml_cpu_signal_deferred_adjust_timer(uint64_t nanosecs)
 	/* update deferred_ipi_timer_ns with the new clamped value */
 	absolutetime_to_nanoseconds(abstime, &deferred_ipi_timer_ns);
 
-	MSR(ARM64_REG_IPI_CR, abstime);
+	MSR("S3_5_C15_C3_1", abstime);
 #else
 	(void)nanosecs;
 	panic("Platform does not support ACC Fast IPI");
@@ -232,23 +282,14 @@ ml_cpu_signal_retract(unsigned int cpu_mpidr __unused)
 void
 machine_idle(void)
 {
-	__builtin_arm_wsr("DAIFSet", (DAIFSC_IRQF | DAIFSC_FIQF));
+	/* Interrupts are expected to be masked on entry or re-entry via
+	 * Idle_load_context()
+	 */
+	assert((__builtin_arm_rsr("DAIF") & DAIF_IRQF) == DAIF_IRQF);
 	Idle_context();
 	__builtin_arm_wsr("DAIFClr", (DAIFSC_IRQF | DAIFSC_FIQF));
 }
 
-void
-init_vfp(void)
-{
-	return;
-}
-
-boolean_t
-get_vfp_enabled(void)
-{
-	return TRUE;
-}
-
 void
 OSSynchronizeIO(void)
 {
@@ -312,10 +353,21 @@ get_arm_cpu_version(void)
 	return ((value & MIDR_EL1_REV_MASK) >> MIDR_EL1_REV_SHIFT) | ((value & MIDR_EL1_VAR_MASK) >> (MIDR_EL1_VAR_SHIFT - 4));
 }
 
+bool
+ml_feature_supported(uint32_t feature_bit)
+{
+	uint64_t aidr_el1_value = 0;
+
+	MRS(aidr_el1_value, "AIDR_EL1");
+
+
+	return aidr_el1_value & feature_bit;
+}
+
 /*
  * user_cont_hwclock_allowed()
  *
- * Indicates whether we allow EL0 to read the physical timebase (CNTPCT_EL0)
+ * Indicates whether we allow EL0 to read the virtual timebase (CNTVCT_EL0)
  * as a continuous time source (e.g. from mach_continuous_time)
  */
 boolean_t
@@ -335,331 +387,15 @@ user_timebase_type(void)
 	return USER_TIMEBASE_SPEC;
 }
 
-boolean_t
-arm64_wfe_allowed(void)
-{
-	return TRUE;
-}
-
-#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
-
-uint64_t rorgn_begin __attribute__((section("__DATA, __const"))) = 0;
-uint64_t rorgn_end   __attribute__((section("__DATA, __const"))) = 0;
-vm_offset_t amcc_base;
-
-static void assert_unlocked(void);
-static void assert_amcc_cache_disabled(void);
-static void lock_amcc(void);
-static void lock_mmu(uint64_t begin, uint64_t end);
-
-void
-rorgn_stash_range(void)
-{
-#if DEVELOPMENT || DEBUG
-	boolean_t rorgn_disable = FALSE;
-
-	PE_parse_boot_argn("-unsafe_kernel_text", &rorgn_disable, sizeof(rorgn_disable));
-
-	if (rorgn_disable) {
-		/* take early out if boot arg present, don't query any machine registers to avoid
-		 * dependency on amcc DT entry
-		 */
-		return;
-	}
-#endif
-
-	/* Get the AMC values, and stash them into rorgn_begin, rorgn_end.
-	 * gPhysBase is the base of DRAM managed by xnu. we need DRAM_BASE as
-	 * the AMCC RO region begin/end registers are in units of 16KB page
-	 * numbers from DRAM_BASE so we'll truncate gPhysBase at 512MB granule
-	 * and assert the value is the canonical DRAM_BASE PA of 0x8_0000_0000 for arm64.
-	 */
-
-	uint64_t dram_base = gPhysBase & ~0x1FFFFFFFULL;  /* 512MB */
-	assert(dram_base == 0x800000000ULL);
-
-#if defined(KERNEL_INTEGRITY_KTRR)
-	uint64_t soc_base = 0;
-	DTEntry entryP = NULL;
-	uintptr_t *reg_prop = NULL;
-	uint32_t prop_size = 0;
-	int rc;
-
-	soc_base = pe_arm_get_soc_base_phys();
-	rc = DTFindEntry("name", "mcc", &entryP);
-	assert(rc == kSuccess);
-	rc = DTGetProperty(entryP, "reg", (void **)&reg_prop, &prop_size);
-	assert(rc == kSuccess);
-	amcc_base = ml_io_map(soc_base + *reg_prop, *(reg_prop + 1));
-#elif defined(KERNEL_INTEGRITY_CTRR)
-	/* TODO: t8020 mcc entry not in device tree yet; we'll do it LIVE */
-#define TEMP_AMCC_BASE_PA 0x200000000ULL
-#define TEMP_AMCC_SZ      0x100000
-	amcc_base = ml_io_map(TEMP_AMCC_BASE_PA, TEMP_AMCC_SZ);
-#else
-#error "KERNEL_INTEGRITY config error"
-#endif
-
-#if defined(KERNEL_INTEGRITY_KTRR)
-	assert(rRORGNENDADDR > rRORGNBASEADDR);
-	rorgn_begin = (rRORGNBASEADDR << AMCC_PGSHIFT) + dram_base;
-	rorgn_end   = (rRORGNENDADDR << AMCC_PGSHIFT) + dram_base;
-#elif defined(KERNEL_INTEGRITY_CTRR)
-	rorgn_begin = rCTRR_AMCC_PLANE_REG(0, CTRR_A_BASEADDR);
-	rorgn_end   = rCTRR_AMCC_PLANE_REG(0, CTRR_A_ENDADDR);
-	assert(rorgn_end > rorgn_begin);
-
-	for (int i = 0; i < CTRR_AMCC_MAX_PLANES; ++i) {
-		uint32_t begin = rCTRR_AMCC_PLANE_REG(i, CTRR_A_BASEADDR);
-		uint32_t end = rCTRR_AMCC_PLANE_REG(i, CTRR_A_ENDADDR);
-		if (!(begin == rorgn_begin && end == rorgn_end)) {
-#if DEVELOPMENT || DEBUG
-			panic("iboot programmed CTRR bounds are inconsistent");
-#else
-			panic("Inconsistent memory configuration");
-#endif
-		}
-	}
-
-	// convert from page number from DRAM base to PA
-	rorgn_begin = (rorgn_begin << AMCC_PGSHIFT) + dram_base;
-	rorgn_end   = (rorgn_end << AMCC_PGSHIFT) + dram_base;
-
-#else
-#error KERNEL_INTEGRITY config error
-#endif /* defined (KERNEL_INTEGRITY_KTRR) */
-}
-
-static void
-assert_unlocked()
-{
-	uint64_t ktrr_lock = 0;
-	uint32_t rorgn_lock = 0;
-
-	assert(amcc_base);
-#if defined(KERNEL_INTEGRITY_KTRR)
-	rorgn_lock = rRORGNLOCK;
-	ktrr_lock = __builtin_arm_rsr64(ARM64_REG_KTRR_LOCK_EL1);
-#elif defined(KERNEL_INTEGRITY_CTRR)
-	for (int i = 0; i < CTRR_AMCC_MAX_PLANES; ++i) {
-		rorgn_lock |= rCTRR_AMCC_PLANE_REG(i, CTRR_A_LOCK);
-	}
-	ktrr_lock = __builtin_arm_rsr64(ARM64_REG_CTRR_LOCK_EL1);
-#else
-#error KERNEL_INTEGRITY config error
-#endif /* defined(KERNEL_INTEGRITY_KTRR) */
-
-	assert(!ktrr_lock);
-	assert(!rorgn_lock);
-}
-
-static void
-lock_amcc()
-{
-#if defined(KERNEL_INTEGRITY_KTRR)
-	rRORGNLOCK = 1;
-	__builtin_arm_isb(ISB_SY);
-#elif defined(KERNEL_INTEGRITY_CTRR)
-	/* lockdown planes in reverse order as plane 0 should be locked last */
-	for (int i = 0; i < CTRR_AMCC_MAX_PLANES; ++i) {
-		rCTRR_AMCC_PLANE_REG(CTRR_AMCC_MAX_PLANES - i - 1, CTRR_A_ENABLE) = 1;
-		rCTRR_AMCC_PLANE_REG(CTRR_AMCC_MAX_PLANES - i - 1, CTRR_A_LOCK) = 1;
-		__builtin_arm_isb(ISB_SY);
-	}
-#else
-#error KERNEL_INTEGRITY config error
-#endif
-}
-
-static void
-lock_mmu(uint64_t begin, uint64_t end)
-{
-#if defined(KERNEL_INTEGRITY_KTRR)
-
-	__builtin_arm_wsr64(ARM64_REG_KTRR_LOWER_EL1, begin);
-	__builtin_arm_wsr64(ARM64_REG_KTRR_UPPER_EL1, end);
-	__builtin_arm_wsr64(ARM64_REG_KTRR_LOCK_EL1, 1ULL);
-
-	/* flush TLB */
-
-	__builtin_arm_isb(ISB_SY);
-	flush_mmu_tlb();
-
-#elif defined (KERNEL_INTEGRITY_CTRR)
-	/* this will lock the entire bootstrap cluster. non bootstrap clusters
-	 * will be locked by respective cluster master in start.s */
-
-	__builtin_arm_wsr64(ARM64_REG_CTRR_A_LWR_EL1, begin);
-	__builtin_arm_wsr64(ARM64_REG_CTRR_A_UPR_EL1, end);
-
-#if !defined(APPLEVORTEX)
-	/* H12 changed sequence, must invalidate TLB immediately after setting CTRR bounds */
-	__builtin_arm_isb(ISB_SY); /* ensure all prior MSRs are complete */
-	flush_mmu_tlb();
-#endif /* !defined(APPLEVORTEX) */
-
-	__builtin_arm_wsr64(ARM64_REG_CTRR_CTL_EL1, CTRR_CTL_EL1_A_PXN | CTRR_CTL_EL1_A_MMUON_WRPROTECT);
-	__builtin_arm_wsr64(ARM64_REG_CTRR_LOCK_EL1, 1ULL);
-
-	uint64_t current_el = __builtin_arm_rsr64("CurrentEL");
-	if (current_el == PSR64_MODE_EL2) {
-		// CTRR v2 has explicit registers for cluster config. they can only be written in EL2
-
-		__builtin_arm_wsr64(ACC_CTRR_A_LWR_EL2, begin);
-		__builtin_arm_wsr64(ACC_CTRR_A_UPR_EL2, end);
-		__builtin_arm_wsr64(ACC_CTRR_CTL_EL2, CTRR_CTL_EL1_A_PXN | CTRR_CTL_EL1_A_MMUON_WRPROTECT);
-		__builtin_arm_wsr64(ACC_CTRR_LOCK_EL2, 1ULL);
-	}
-
-	__builtin_arm_isb(ISB_SY); /* ensure all prior MSRs are complete */
-#if defined(APPLEVORTEX)
-	flush_mmu_tlb();
-#endif /* defined(APPLEVORTEX) */
-
-#else /* defined(KERNEL_INTEGRITY_KTRR) */
-#error KERNEL_INTEGRITY config error
-#endif /* defined(KERNEL_INTEGRITY_KTRR) */
-}
-
-static void
-assert_amcc_cache_disabled()
-{
-#if defined(KERNEL_INTEGRITY_KTRR)
-	assert((rMCCGEN & 1) == 0); /* assert M$ disabled or LLC clean will be unreliable */
-#elif defined(KERNEL_INTEGRITY_CTRR) && (defined(ARM64_BOARD_CONFIG_T8006))
-	/*
-	 * T8006 differentiates between data and tag ways being powered up, so
-	 * make sure to check that both are zero on its single memory plane.
-	 */
-	assert((rCTRR_AMCC_PLANE_REG(0, CTRR_AMCC_PWRONWAYCNTSTATUS) &
-	    (AMCC_CURTAGWAYCNT_MASK | AMCC_CURDATWAYCNT_MASK)) == 0);
-#elif defined (KERNEL_INTEGRITY_CTRR)
-	for (int i = 0; i < CTRR_AMCC_MAX_PLANES; ++i) {
-		assert(rCTRR_AMCC_PLANE_REG(i, CTRR_AMCC_WAYONCNT) == 0);
-	}
-#else
-#error KERNEL_INTEGRITY config error
-#endif
-}
-
-/*
- * void rorgn_lockdown(void)
- *
- * Lock the MMU and AMCC RORegion within lower and upper boundaries if not already locked
- *
- * [ ] - ensure this is being called ASAP on secondary CPUs: KTRR programming and lockdown handled in
- *       start.s:start_cpu() for subsequent wake/resume of all cores
- */
-void
-rorgn_lockdown(void)
-{
-	vm_offset_t ktrr_begin, ktrr_end;
-	unsigned long last_segsz;
-
-#if DEVELOPMENT || DEBUG
-	boolean_t ktrr_disable = FALSE;
-
-	PE_parse_boot_argn("-unsafe_kernel_text", &ktrr_disable, sizeof(ktrr_disable));
-
-	if (ktrr_disable) {
-		/*
-		 * take early out if boot arg present, since we may not have amcc DT entry present
-		 * we can't assert that iboot hasn't programmed the RO region lockdown registers
-		 */
-		goto out;
-	}
-#endif /* DEVELOPMENT || DEBUG */
-
-	assert_unlocked();
-
-	/* [x] - Use final method of determining all kernel text range or expect crashes */
-	ktrr_begin = segLOWEST;
-	assert(ktrr_begin && gVirtBase && gPhysBase);
-
-	ktrr_begin = kvtophys(ktrr_begin);
-
-	ktrr_end   = kvtophys(segLASTB);
-	last_segsz = segSizeLAST;
-#if defined(KERNEL_INTEGRITY_KTRR)
-	/* __LAST is not part of the MMU KTRR region (it is however part of the AMCC KTRR region) */
-	ktrr_end = (ktrr_end - 1) & ~AMCC_PGMASK;
-	/* ensure that iboot and xnu agree on the ktrr range */
-	assert(rorgn_begin == ktrr_begin && rorgn_end == (ktrr_end + last_segsz));
-	/* assert that __LAST segment containing privileged insns is only a single page */
-	assert(last_segsz == PAGE_SIZE);
-#elif defined(KERNEL_INTEGRITY_CTRR)
-	ktrr_end = (ktrr_end + last_segsz - 1) & ~AMCC_PGMASK;
-	/* __LAST is part of MMU CTRR region. Can't use the KTRR style method of making
-	 * __pinst no execute because PXN applies with MMU off in CTRR. */
-	assert(rorgn_begin == ktrr_begin && rorgn_end == ktrr_end);
-#endif
-
-
-#if DEBUG || DEVELOPMENT
-	printf("KTRR Begin: %p End: %p, setting lockdown\n", (void *)ktrr_begin, (void *)ktrr_end);
-#endif
-
-	/* [x] - ensure all in flight writes are flushed to AMCC before enabling RO Region Lock */
-
-	assert_amcc_cache_disabled();
-
-	CleanPoC_DcacheRegion_Force(phystokv(ktrr_begin),
-	    (unsigned)((ktrr_end + last_segsz) - ktrr_begin + AMCC_PGMASK));
-
-	lock_amcc();
-
-	lock_mmu(ktrr_begin, ktrr_end);
-
-#if DEVELOPMENT || DEBUG
-out:
-#endif
-
-#if defined(KERNEL_INTEGRITY_CTRR)
-	{
-		/* wake any threads blocked on cluster master lockdown */
-		cpu_data_t *cdp;
-		uint64_t mpidr_el1_value;
-
-		cdp = getCpuDatap();
-		MRS(mpidr_el1_value, "MPIDR_EL1");
-		cdp->cpu_cluster_id = (mpidr_el1_value & MPIDR_AFF1_MASK) >> MPIDR_AFF1_SHIFT;
-		assert(cdp->cpu_cluster_id < __ARM_CLUSTER_COUNT__);
-		ctrr_cluster_locked[cdp->cpu_cluster_id] = 1;
-		thread_wakeup(&ctrr_cluster_locked[cdp->cpu_cluster_id]);
-	}
-#endif
-	/* now we can run lockdown handler */
-	ml_lockdown_run_handler();
-}
-
-#endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
-
 void
 machine_startup(__unused boot_args * args)
 {
-	int boot_arg;
-
 #if defined(HAS_IPI) && (DEVELOPMENT || DEBUG)
 	if (!PE_parse_boot_argn("fastipi", &gFastIPI, sizeof(gFastIPI))) {
 		gFastIPI = 1;
 	}
-
-	PE_parse_boot_argn("fastipitimeout", &deferred_ipi_timer_ns, sizeof(deferred_ipi_timer_ns));
 #endif /* defined(HAS_IPI) && (DEVELOPMENT || DEBUG)*/
 
-#if CONFIG_NONFATAL_ASSERTS
-	PE_parse_boot_argn("assert", &mach_assert, sizeof(mach_assert));
-#endif
-
-	if (PE_parse_boot_argn("preempt", &boot_arg, sizeof(boot_arg))) {
-		default_preemption_rate = boot_arg;
-	}
-	if (PE_parse_boot_argn("bg_preempt", &boot_arg, sizeof(boot_arg))) {
-		default_bg_preemption_rate = boot_arg;
-	}
-
-	PE_parse_boot_argn("yield_delay_us", &yield_delay_us, sizeof(yield_delay_us));
-
 	machine_conf();
 
 	/*
@@ -669,21 +405,27 @@ machine_startup(__unused boot_args * args)
 	/* NOTREACHED */
 }
 
+typedef void (*invalidate_fn_t)(void);
+
+static SECURITY_READ_ONLY_LATE(invalidate_fn_t) invalidate_hmac_function = NULL;
+
+void set_invalidate_hmac_function(invalidate_fn_t fn);
+
 void
-machine_lockdown_preflight(void)
+set_invalidate_hmac_function(invalidate_fn_t fn)
 {
-#if CONFIG_KERNEL_INTEGRITY
-
-#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
-	rorgn_stash_range();
-#endif
+	if (NULL != invalidate_hmac_function) {
+		panic("Invalidate HMAC function already set");
+	}
 
-#endif
+	invalidate_hmac_function = fn;
 }
 
 void
 machine_lockdown(void)
 {
+	arm_vm_prot_finalize(PE_state.bootArgs);
+
 #if CONFIG_KERNEL_INTEGRITY
 #if KERNEL_INTEGRITY_WT
 	/* Watchtower
@@ -714,8 +456,16 @@ machine_lockdown(void)
 
 
 #endif /* CONFIG_KERNEL_INTEGRITY */
+
+
+	if (NULL != invalidate_hmac_function) {
+		invalidate_hmac_function();
+	}
+
+	lockdown_done = 1;
 }
 
+
 char           *
 machine_boot_info(
 	__unused char *buf,
@@ -724,26 +474,6 @@ machine_boot_info(
 	return PE_boot_args();
 }
 
-void
-machine_conf(void)
-{
-	/*
-	 * This is known to be inaccurate. mem_size should always be capped at 2 GB
-	 */
-	machine_info.memory_size = (uint32_t)mem_size;
-}
-
-void
-machine_init(void)
-{
-	debug_log_init();
-	clock_config();
-	is_clock_configured = TRUE;
-	if (debug_enabled) {
-		pmap_map_globals();
-	}
-}
-
 void
 slave_machine_init(__unused void *param)
 {
@@ -764,46 +494,6 @@ machine_processor_shutdown(
 	return Shutdown_context(doshutdown, processor);
 }
 
-/*
- *	Routine:        ml_init_max_cpus
- *	Function:
- */
-void
-ml_init_max_cpus(unsigned int max_cpus)
-{
-	boolean_t       current_state;
-
-	current_state = ml_set_interrupts_enabled(FALSE);
-	if (max_cpus_initialized != MAX_CPUS_SET) {
-		machine_info.max_cpus = max_cpus;
-		machine_info.physical_cpu_max = max_cpus;
-		machine_info.logical_cpu_max = max_cpus;
-		if (max_cpus_initialized == MAX_CPUS_WAIT) {
-			thread_wakeup((event_t) &max_cpus_initialized);
-		}
-		max_cpus_initialized = MAX_CPUS_SET;
-	}
-	(void) ml_set_interrupts_enabled(current_state);
-}
-
-/*
- *	Routine:        ml_get_max_cpus
- *	Function:
- */
-unsigned int
-ml_get_max_cpus(void)
-{
-	boolean_t       current_state;
-
-	current_state = ml_set_interrupts_enabled(FALSE);
-	if (max_cpus_initialized != MAX_CPUS_SET) {
-		max_cpus_initialized = MAX_CPUS_WAIT;
-		assert_wait((event_t) &max_cpus_initialized, THREAD_UNINT);
-		(void) thread_block(THREAD_CONTINUE_NULL);
-	}
-	(void) ml_set_interrupts_enabled(current_state);
-	return machine_info.max_cpus;
-}
 
 /*
  *      Routine:        ml_init_lock_timeout
@@ -841,6 +531,30 @@ ml_init_lock_timeout(void)
 		nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
 	}
 	MutexSpin = abstime;
+	low_MutexSpin = MutexSpin;
+
+
+	/*
+	 * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
+	 * real_ncpus is not set at this time
+	 *
+	 * NOTE: active spinning is disabled in arm. It can be activated
+	 * by setting high_MutexSpin through the sysctl.
+	 */
+	high_MutexSpin = low_MutexSpin;
+
+	nanoseconds_to_absolutetime(MAX_WFE_HINT_INTERVAL_US * NSEC_PER_USEC, &ml_wfe_hint_max_interval);
+}
+
+/*
+ * This is called when all of the ml_processor_info_t structures have been
+ * initialized and all the processors have been started through processor_start().
+ *
+ * Required by the scheduler subsystem.
+ */
+void
+ml_cpu_init_completed(void)
+{
 }
 
 /*
@@ -999,10 +713,7 @@ ml_install_interrupt_handler(
 	cpu_data_ptr->interrupt_handler = handler;
 	cpu_data_ptr->interrupt_refCon = refCon;
 
-	cpu_data_ptr->interrupts_enabled = TRUE;
 	(void) ml_set_interrupts_enabled(current_state);
-
-	initialize_screen(NULL, kPEAcquireScreen);
 }
 
 /*
@@ -1046,6 +757,85 @@ ml_init_timebase(
 	}
 }
 
+#define ML_READPROP_MANDATORY UINT64_MAX
+
+static uint64_t
+ml_readprop(const DTEntry entry, const char *propertyName, uint64_t default_value)
+{
+	void const *prop;
+	unsigned int propSize;
+
+	if (SecureDTGetProperty(entry, propertyName, &prop, &propSize) == kSuccess) {
+		if (propSize == sizeof(uint8_t)) {
+			return *((uint8_t const *)prop);
+		} else if (propSize == sizeof(uint16_t)) {
+			return *((uint16_t const *)prop);
+		} else if (propSize == sizeof(uint32_t)) {
+			return *((uint32_t const *)prop);
+		} else if (propSize == sizeof(uint64_t)) {
+			return *((uint64_t const *)prop);
+		} else {
+			panic("CPU property '%s' has bad size %u", propertyName, propSize);
+		}
+	} else {
+		if (default_value == ML_READPROP_MANDATORY) {
+			panic("Missing mandatory property '%s'", propertyName);
+		}
+		return default_value;
+	}
+}
+
+static boolean_t
+ml_read_reg_range(const DTEntry entry, const char *propertyName, uint64_t *pa_ptr, uint64_t *len_ptr)
+{
+	uint64_t const *prop;
+	unsigned int propSize;
+
+	if (SecureDTGetProperty(entry, propertyName, (void const **)&prop, &propSize) != kSuccess) {
+		return FALSE;
+	}
+
+	if (propSize != sizeof(uint64_t) * 2) {
+		panic("Wrong property size for %s", propertyName);
+	}
+
+	*pa_ptr = prop[0];
+	*len_ptr = prop[1];
+	return TRUE;
+}
+
+static boolean_t
+ml_is_boot_cpu(const DTEntry entry)
+{
+	void const *prop;
+	unsigned int propSize;
+
+	if (SecureDTGetProperty(entry, "state", &prop, &propSize) != kSuccess) {
+		panic("unable to retrieve state for cpu");
+	}
+
+	if (strncmp((char const *)prop, "running", propSize) == 0) {
+		return TRUE;
+	} else {
+		return FALSE;
+	}
+}
+
+static void
+ml_read_chip_revision(unsigned int *rev __unused)
+{
+	// The CPU_VERSION_* macros are only defined on APPLE_ARM64_ARCH_FAMILY builds
+#ifdef APPLE_ARM64_ARCH_FAMILY
+	DTEntry         entryP;
+
+	if ((SecureDTFindEntry("name", "arm-io", &entryP) == kSuccess)) {
+		*rev = (unsigned int)ml_readprop(entryP, "chip-revision", CPU_VERSION_UNKNOWN);
+	} else {
+		*rev = CPU_VERSION_UNKNOWN;
+	}
+#endif
+}
+
 void
 ml_parse_cpu_topology(void)
 {
@@ -1054,59 +844,148 @@ ml_parse_cpu_topology(void)
 	uint32_t cpu_boot_arg;
 	int err;
 
+	int64_t cluster_phys_to_logical[MAX_CPU_CLUSTER_PHY_ID + 1];
+	int64_t cluster_max_cpu_phys_id[MAX_CPU_CLUSTER_PHY_ID + 1];
 	cpu_boot_arg = MAX_CPUS;
-
 	PE_parse_boot_argn("cpus", &cpu_boot_arg, sizeof(cpu_boot_arg));
 
-	err = DTLookupEntry(NULL, "/cpus", &entry);
+	err = SecureDTLookupEntry(NULL, "/cpus", &entry);
 	assert(err == kSuccess);
 
-	err = DTInitEntryIterator(entry, &iter);
+	err = SecureDTInitEntryIterator(entry, &iter);
 	assert(err == kSuccess);
 
-	while (kSuccess == DTIterateEntries(&iter, &child)) {
-		unsigned int propSize;
-		void *prop = NULL;
-		int cpu_id = avail_cpus++;
+	for (int i = 0; i <= MAX_CPU_CLUSTER_PHY_ID; i++) {
+		cluster_offsets[i] = -1;
+		cluster_phys_to_logical[i] = -1;
+		cluster_max_cpu_phys_id[i] = 0;
+	}
+
+	while (kSuccess == SecureDTIterateEntries(&iter, &child)) {
+		boolean_t is_boot_cpu = ml_is_boot_cpu(child);
 
-		if (kSuccess == DTGetProperty(child, "cpu-id", &prop, &propSize)) {
-			cpu_id = *((int32_t*)prop);
+		// If the number of CPUs is constrained by the cpus= boot-arg, and the boot CPU hasn't
+		// been added to the topology struct yet, and we only have one slot left, then skip
+		// every other non-boot CPU in order to leave room for the boot CPU.
+		//
+		// e.g. if the boot-args say "cpus=3" and CPU4 is the boot CPU, then the cpus[]
+		// array will list CPU0, CPU1, and CPU4.  CPU2-CPU3 and CPU5-CPUn will be omitted.
+		if (topology_info.num_cpus >= (cpu_boot_arg - 1) && topology_info.boot_cpu == NULL && !is_boot_cpu) {
+			continue;
+		}
+		if (topology_info.num_cpus >= cpu_boot_arg) {
+			break;
 		}
 
-		assert(cpu_id < MAX_CPUS);
-		assert(cpu_phys_ids[cpu_id] == (uint32_t)-1);
+		ml_topology_cpu_t *cpu = &topology_info.cpus[topology_info.num_cpus];
 
-		if (boot_cpu == -1) {
-			if (kSuccess != DTGetProperty(child, "state", &prop, &propSize)) {
-				panic("unable to retrieve state for cpu %d", cpu_id);
-			}
+		cpu->cpu_id = topology_info.num_cpus++;
+		assert(cpu->cpu_id < MAX_CPUS);
+		topology_info.max_cpu_id = MAX(topology_info.max_cpu_id, cpu->cpu_id);
 
-			if (strncmp((char*)prop, "running", propSize) == 0) {
-				boot_cpu = cpu_id;
-			}
-		}
-		if (kSuccess != DTGetProperty(child, "reg", &prop, &propSize)) {
-			panic("unable to retrieve physical ID for cpu %d", cpu_id);
+		cpu->die_id = (int)ml_readprop(child, "die-id", 0);
+		topology_info.max_die_id = MAX(topology_info.max_die_id, cpu->die_id);
+
+		cpu->phys_id = (uint32_t)ml_readprop(child, "reg", ML_READPROP_MANDATORY);
+
+		cpu->l2_access_penalty = (uint32_t)ml_readprop(child, "l2-access-penalty", 0);
+		cpu->l2_cache_size = (uint32_t)ml_readprop(child, "l2-cache-size", 0);
+		cpu->l2_cache_id = (uint32_t)ml_readprop(child, "l2-cache-id", 0);
+		cpu->l3_cache_size = (uint32_t)ml_readprop(child, "l3-cache-size", 0);
+		cpu->l3_cache_id = (uint32_t)ml_readprop(child, "l3-cache-id", 0);
+
+		ml_read_reg_range(child, "cpu-uttdbg-reg", &cpu->cpu_UTTDBG_pa, &cpu->cpu_UTTDBG_len);
+		ml_read_reg_range(child, "cpu-impl-reg", &cpu->cpu_IMPL_pa, &cpu->cpu_IMPL_len);
+		ml_read_reg_range(child, "coresight-reg", &cpu->coresight_pa, &cpu->coresight_len);
+		cpu->cluster_type = CLUSTER_TYPE_SMP;
+
+		int cluster_type = (int)ml_readprop(child, "cluster-type", 0);
+		if (cluster_type == 'E') {
+			cpu->cluster_type = CLUSTER_TYPE_E;
+		} else if (cluster_type == 'P') {
+			cpu->cluster_type = CLUSTER_TYPE_P;
 		}
 
-		cpu_phys_ids[cpu_id] = *((uint32_t*)prop);
+		/*
+		 * Since we want to keep a linear cluster ID space, we cannot just rely
+		 * on the value provided by EDT. Instead, use the MPIDR value to see if we have
+		 * seen this exact cluster before. If so, then reuse that cluster ID for this CPU.
+		 */
+#if HAS_CLUSTER
+		uint32_t phys_cluster_id = MPIDR_CLUSTER_ID(cpu->phys_id);
+#else
+		uint32_t phys_cluster_id = (cpu->cluster_type == CLUSTER_TYPE_P);
+#endif
+		assert(phys_cluster_id <= MAX_CPU_CLUSTER_PHY_ID);
+		cpu->cluster_id = ((cluster_phys_to_logical[phys_cluster_id] == -1) ?
+		    topology_info.num_clusters : cluster_phys_to_logical[phys_cluster_id]);
+
+		assert(cpu->cluster_id < MAX_CPU_CLUSTERS);
+
+		ml_topology_cluster_t *cluster = &topology_info.clusters[cpu->cluster_id];
+		if (cluster->num_cpus == 0) {
+			assert(topology_info.num_clusters < MAX_CPU_CLUSTERS);
+
+			topology_info.num_clusters++;
+			topology_info.max_cluster_id = MAX(topology_info.max_cluster_id, cpu->cluster_id);
+
+			cluster->cluster_id = cpu->cluster_id;
+			cluster->cluster_type = cpu->cluster_type;
+			cluster->first_cpu_id = cpu->cpu_id;
+			assert(cluster_phys_to_logical[phys_cluster_id] == -1);
+			cluster_phys_to_logical[phys_cluster_id] = cpu->cluster_id;
+
+			// Since we don't have a per-cluster EDT node, this is repeated in each CPU node.
+			// If we wind up with a bunch of these, we might want to create separate per-cluster
+			// EDT nodes and have the CPU nodes reference them through a phandle.
+			ml_read_reg_range(child, "acc-impl-reg", &cluster->acc_IMPL_pa, &cluster->acc_IMPL_len);
+			ml_read_reg_range(child, "cpm-impl-reg", &cluster->cpm_IMPL_pa, &cluster->cpm_IMPL_len);
+		}
 
-		if ((cpu_id > max_cpu_number) && ((cpu_id == boot_cpu) || (avail_cpus <= cpu_boot_arg))) {
-			max_cpu_number = cpu_id;
+#if HAS_CLUSTER
+		if (MPIDR_CPU_ID(cpu->phys_id) > cluster_max_cpu_phys_id[phys_cluster_id]) {
+			cluster_max_cpu_phys_id[phys_cluster_id] = MPIDR_CPU_ID(cpu->phys_id);
 		}
-	}
+#endif
 
-	if (avail_cpus > cpu_boot_arg) {
-		avail_cpus = cpu_boot_arg;
-	}
+		cpu->die_cluster_id = (int)ml_readprop(child, "die-cluster-id", MPIDR_CLUSTER_ID(cpu->phys_id));
+		cpu->cluster_core_id = (int)ml_readprop(child, "cluster-core-id", MPIDR_CPU_ID(cpu->phys_id));
+
+		cluster->num_cpus++;
+		cluster->cpu_mask |= 1ULL << cpu->cpu_id;
 
-	if (avail_cpus == 0) {
-		panic("No cpus found!");
+		if (is_boot_cpu) {
+			assert(topology_info.boot_cpu == NULL);
+			topology_info.boot_cpu = cpu;
+			topology_info.boot_cluster = cluster;
+		}
 	}
 
-	if (boot_cpu == -1) {
-		panic("unable to determine boot cpu!");
+#if HAS_CLUSTER
+	/*
+	 * Build the cluster offset array, ensuring that the region reserved
+	 * for each physical cluster contains enough entries to be indexed
+	 * by the maximum physical CPU ID (AFF0) within the cluster.
+	 */
+	unsigned int cur_cluster_offset = 0;
+	for (int i = 0; i <= MAX_CPU_CLUSTER_PHY_ID; i++) {
+		if (cluster_phys_to_logical[i] != -1) {
+			cluster_offsets[i] = cur_cluster_offset;
+			cur_cluster_offset += (cluster_max_cpu_phys_id[i] + 1);
+		}
 	}
+	assert(cur_cluster_offset <= MAX_CPUS);
+#else
+	/*
+	 * For H10, there are really 2 physical clusters, but they are not separated
+	 * into distinct ACCs.  AFF1 therefore always reports 0, and AFF0 numbering
+	 * is linear across both clusters.   For the purpose of MPIDR_EL1-based indexing,
+	 * treat H10 and earlier devices as though they contain a single cluster.
+	 */
+	cluster_offsets[0] = 0;
+#endif
+	assert(topology_info.boot_cpu != NULL);
+	ml_read_chip_revision(&topology_info.chip_revision);
 
 	/*
 	 * Set TPIDRRO_EL0 to indicate the correct cpu number, as we may
@@ -1116,95 +995,162 @@ ml_parse_cpu_topology(void)
 	 * per-cpu data object.
 	 */
 	assert(__builtin_arm_rsr64("TPIDRRO_EL0") == 0);
-	__builtin_arm_wsr64("TPIDRRO_EL0", (uint64_t)boot_cpu);
+	__builtin_arm_wsr64("TPIDRRO_EL0", (uint64_t)topology_info.boot_cpu->cpu_id);
+}
+
+const ml_topology_info_t *
+ml_get_topology_info(void)
+{
+	return &topology_info;
+}
+
+void
+ml_map_cpu_pio(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < topology_info.num_cpus; i++) {
+		ml_topology_cpu_t *cpu = &topology_info.cpus[i];
+		if (cpu->cpu_IMPL_pa) {
+			cpu->cpu_IMPL_regs = (vm_offset_t)ml_io_map(cpu->cpu_IMPL_pa, cpu->cpu_IMPL_len);
+			cpu->coresight_regs = (vm_offset_t)ml_io_map(cpu->coresight_pa, cpu->coresight_len);
+		}
+		if (cpu->cpu_UTTDBG_pa) {
+			cpu->cpu_UTTDBG_regs = (vm_offset_t)ml_io_map(cpu->cpu_UTTDBG_pa, cpu->cpu_UTTDBG_len);
+		}
+	}
+
+	for (i = 0; i < topology_info.num_clusters; i++) {
+		ml_topology_cluster_t *cluster = &topology_info.clusters[i];
+		if (cluster->acc_IMPL_pa) {
+			cluster->acc_IMPL_regs = (vm_offset_t)ml_io_map(cluster->acc_IMPL_pa, cluster->acc_IMPL_len);
+		}
+		if (cluster->cpm_IMPL_pa) {
+			cluster->cpm_IMPL_regs = (vm_offset_t)ml_io_map(cluster->cpm_IMPL_pa, cluster->cpm_IMPL_len);
+		}
+	}
 }
 
 unsigned int
 ml_get_cpu_count(void)
 {
-	return avail_cpus;
+	return topology_info.num_cpus;
+}
+
+unsigned int
+ml_get_cluster_count(void)
+{
+	return topology_info.num_clusters;
 }
 
 int
 ml_get_boot_cpu_number(void)
 {
-	return boot_cpu;
+	return topology_info.boot_cpu->cpu_id;
 }
 
 cluster_type_t
 ml_get_boot_cluster(void)
 {
-	return boot_cluster;
+	return topology_info.boot_cluster->cluster_type;
 }
 
 int
 ml_get_cpu_number(uint32_t phys_id)
 {
-	for (int log_id = 0; log_id <= ml_get_max_cpu_number(); ++log_id) {
-		if (cpu_phys_ids[log_id] == phys_id) {
-			return log_id;
+	phys_id &= MPIDR_AFF1_MASK | MPIDR_AFF0_MASK;
+
+	for (unsigned i = 0; i < topology_info.num_cpus; i++) {
+		if (topology_info.cpus[i].phys_id == phys_id) {
+			return i;
 		}
 	}
+
 	return -1;
 }
 
+int
+ml_get_cluster_number(uint32_t phys_id)
+{
+	int cpu_id = ml_get_cpu_number(phys_id);
+	if (cpu_id < 0) {
+		return -1;
+	}
+
+	ml_topology_cpu_t *cpu = &topology_info.cpus[cpu_id];
+
+	return cpu->cluster_id;
+}
+
+unsigned int
+ml_get_cpu_number_local(void)
+{
+	uint64_t mpidr_el1_value = 0;
+	unsigned cpu_id;
+
+	/* We identify the CPU based on the constant bits of MPIDR_EL1. */
+	MRS(mpidr_el1_value, "MPIDR_EL1");
+	cpu_id = ml_get_cpu_number((uint32_t)mpidr_el1_value);
+
+	assert(cpu_id <= (unsigned int)ml_get_max_cpu_number());
+
+	return cpu_id;
+}
+
+int
+ml_get_cluster_number_local()
+{
+	uint64_t mpidr_el1_value = 0;
+	unsigned cluster_id;
+
+	/* We identify the cluster based on the constant bits of MPIDR_EL1. */
+	MRS(mpidr_el1_value, "MPIDR_EL1");
+	cluster_id = ml_get_cluster_number((uint32_t)mpidr_el1_value);
+
+	assert(cluster_id <= (unsigned int)ml_get_max_cluster_number());
+
+	return cluster_id;
+}
+
 int
 ml_get_max_cpu_number(void)
 {
-	return max_cpu_number;
+	return topology_info.max_cpu_id;
 }
 
+int
+ml_get_max_cluster_number(void)
+{
+	return topology_info.max_cluster_id;
+}
+
+unsigned int
+ml_get_first_cpu_id(unsigned int cluster_id)
+{
+	return topology_info.clusters[cluster_id].first_cpu_id;
+}
 
 void
 ml_lockdown_init()
 {
-	lockdown_handler_grp = lck_grp_alloc_init("lockdown_handler", NULL);
-	assert(lockdown_handler_grp != NULL);
-
-	lck_mtx_init(&lockdown_handler_lck, lockdown_handler_grp, NULL);
-
-#if defined(KERNEL_INTEGRITY_CTRR)
-	init_ctrr_cpu_start_lock();
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
+	rorgn_stash_range();
 #endif
 }
 
 kern_return_t
 ml_lockdown_handler_register(lockdown_handler_t f, void *this)
 {
-	if (lockdown_handler || !f) {
+	if (!f) {
 		return KERN_FAILURE;
 	}
 
-	lck_mtx_lock(&lockdown_handler_lck);
-	lockdown_handler = f;
-	lockdown_this = this;
-
-#if !(defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR))
-	lockdown_done = 1;
-	lockdown_handler(this);
-#else
-	if (lockdown_done) {
-		lockdown_handler(this);
-	}
-#endif
-	lck_mtx_unlock(&lockdown_handler_lck);
+	assert(lockdown_done);
+	f(this); // XXX: f this whole function
 
 	return KERN_SUCCESS;
 }
 
-void
-ml_lockdown_run_handler()
-{
-	lck_mtx_lock(&lockdown_handler_lck);
-	assert(!lockdown_done);
-
-	lockdown_done = 1;
-	if (lockdown_handler) {
-		lockdown_handler(lockdown_this);
-	}
-	lck_mtx_unlock(&lockdown_handler_lck);
-}
-
 kern_return_t
 ml_processor_register(ml_processor_info_t *in_processor_info,
     processor_t *processor_out, ipi_handler_t *ipi_handler_out,
@@ -1219,7 +1165,7 @@ ml_processor_register(ml_processor_info_t *in_processor_info,
 		return KERN_FAILURE;
 	}
 
-	if ((unsigned int)OSIncrementAtomic((SInt32*)&reg_cpu_count) >= avail_cpus) {
+	if ((unsigned)OSIncrementAtomic((SInt32*)&reg_cpu_count) >= topology_info.num_cpus) {
 		return KERN_FAILURE;
 	}
 
@@ -1232,7 +1178,7 @@ ml_processor_register(ml_processor_info_t *in_processor_info,
 		is_boot_cpu = TRUE;
 	}
 
-	assert(in_processor_info->log_id < MAX_CPUS);
+	assert(in_processor_info->log_id <= (uint32_t)ml_get_max_cpu_number());
 
 	this_cpu_datap->cpu_id = in_processor_info->cpu_id;
 
@@ -1242,22 +1188,22 @@ ml_processor_register(ml_processor_info_t *in_processor_info,
 	}
 
 	if (!is_boot_cpu) {
-		this_cpu_datap->cpu_number = in_processor_info->log_id;
+		this_cpu_datap->cpu_number = (unsigned short)(in_processor_info->log_id);
 
 		if (cpu_data_register(this_cpu_datap) != KERN_SUCCESS) {
 			goto processor_register_error;
 		}
 	}
 
-	this_cpu_datap->cpu_idle_notify = (void *) in_processor_info->processor_idle;
-	this_cpu_datap->cpu_cache_dispatch = in_processor_info->platform_cache_dispatch;
+	this_cpu_datap->cpu_idle_notify = in_processor_info->processor_idle;
+	this_cpu_datap->cpu_cache_dispatch = (cache_dispatch_t)in_processor_info->platform_cache_dispatch;
 	nanoseconds_to_absolutetime((uint64_t) in_processor_info->powergate_latency, &this_cpu_datap->cpu_idle_latency);
 	this_cpu_datap->cpu_reset_assist = kvtophys(in_processor_info->powergate_stub_addr);
 
-	this_cpu_datap->idle_timer_notify = (void *) in_processor_info->idle_timer;
+	this_cpu_datap->idle_timer_notify = in_processor_info->idle_timer;
 	this_cpu_datap->idle_timer_refcon = in_processor_info->idle_timer_refcon;
 
-	this_cpu_datap->platform_error_handler = (void *) in_processor_info->platform_error_handler;
+	this_cpu_datap->platform_error_handler = in_processor_info->platform_error_handler;
 	this_cpu_datap->cpu_regmap_paddr = in_processor_info->regmap_paddr;
 	this_cpu_datap->cpu_phys_id = in_processor_info->phys_id;
 	this_cpu_datap->cpu_l2_access_penalty = in_processor_info->l2_access_penalty;
@@ -1275,13 +1221,50 @@ ml_processor_register(ml_processor_info_t *in_processor_info,
 	this_cpu_datap->cluster_master = is_boot_cpu;
 #endif /* HAS_CLUSTER */
 
+#if !defined(RC_HIDE_XNU_FIRESTORM) && (MAX_CPU_CLUSTERS > 2)
+	{
+		/* Workaround for the existing scheduler
+		 * code, which only supports a limited number of psets.
+		 *
+		 * To get around that limitation, we distribute all cores into
+		 * two psets according to their cluster type, instead of
+		 * having a dedicated pset per cluster ID.
+		 */
+
+		pset_cluster_type_t pset_cluster_type;
+
+		/* For this workaround, we don't expect seeing anything else
+		 * than E or P clusters. */
+		switch (in_processor_info->cluster_type) {
+		case CLUSTER_TYPE_E:
+			pset_cluster_type = PSET_AMP_E;
+			break;
+		case CLUSTER_TYPE_P:
+			pset_cluster_type = PSET_AMP_P;
+			break;
+		default:
+			panic("unknown/unsupported cluster type %d", in_processor_info->cluster_type);
+		}
+
+		pset = pset_find_first_by_cluster_type(pset_cluster_type);
+
+		if (pset == NULL) {
+			panic("no pset for cluster type %d/%d", in_processor_info->cluster_type, pset_cluster_type);
+		}
+
+		kprintf("%s>chosen pset with cluster id %d cluster type %d for core:\n",
+		    __FUNCTION__, pset->pset_cluster_id, pset->pset_cluster_type);
+	}
+#else /* !defined(RC_HIDE_XNU_FIRESTORM) && (MAX_CPU_CLUSTERS > 2) */
 	pset = pset_find(in_processor_info->cluster_id, processor_pset(master_processor));
+#endif /* !defined(RC_HIDE_XNU_FIRESTORM) && (MAX_CPU_CLUSTERS > 2) */
+
 	assert(pset != NULL);
 	kprintf("%s>cpu_id %p cluster_id %d cpu_number %d is type %d\n", __FUNCTION__, in_processor_info->cpu_id, in_processor_info->cluster_id, this_cpu_datap->cpu_number, in_processor_info->cluster_type);
 
+	processor_t processor = PERCPU_GET_RELATIVE(processor, cpu_data, this_cpu_datap);
 	if (!is_boot_cpu) {
-		processor_init((struct processor *)this_cpu_datap->cpu_processor,
-		    this_cpu_datap->cpu_number, pset);
+		processor_init(processor, this_cpu_datap->cpu_number, pset);
 
 		if (this_cpu_datap->cpu_l2_access_penalty) {
 			/*
@@ -1290,12 +1273,11 @@ ml_processor_register(ml_processor_info_t *in_processor_info,
 			 * scheduler, so that threads use the cores with better L2
 			 * preferentially.
 			 */
-			processor_set_primary(this_cpu_datap->cpu_processor,
-			    master_processor);
+			processor_set_primary(processor, master_processor);
 		}
 	}
 
-	*processor_out = this_cpu_datap->cpu_processor;
+	*processor_out = processor;
 	*ipi_handler_out = cpu_signal_handler;
 #if CPMU_AIC_PMI && MONOTONIC
 	*pmi_handler_out = mt_cpmu_aic_pmi;
@@ -1410,6 +1392,13 @@ ml_io_map_wcomb(
 	return io_map(phys_addr, size, VM_WIMG_WCOMB);
 }
 
+void
+ml_io_unmap(vm_offset_t addr, vm_size_t sz)
+{
+	pmap_remove(kernel_pmap, addr, addr + sz);
+	kmem_free(kernel_map, addr, sz);
+}
+
 /* boot memory allocation */
 vm_offset_t
 ml_static_malloc(
@@ -1437,14 +1426,30 @@ vm_offset_t
 ml_static_slide(
 	vm_offset_t vaddr)
 {
-	return phystokv(vaddr + vm_kernel_slide - gVirtBase + gPhysBase);
+	vm_offset_t slid_vaddr = vaddr + vm_kernel_slide;
+
+	if ((slid_vaddr < vm_kernelcache_base) || (slid_vaddr >= vm_kernelcache_top)) {
+		/* This is only intended for use on kernelcache addresses. */
+		return 0;
+	}
+
+	/*
+	 * Because the address is in the kernelcache, we can do a simple
+	 * slide calculation.
+	 */
+	return slid_vaddr;
 }
 
 vm_offset_t
 ml_static_unslide(
 	vm_offset_t vaddr)
 {
-	return ml_static_vtop(vaddr) - gPhysBase + gVirtBase - vm_kernel_slide;
+	if ((vaddr < vm_kernelcache_base) || (vaddr >= vm_kernelcache_top)) {
+		/* This is only intended for use on kernelcache addresses. */
+		return 0;
+	}
+
+	return vaddr - vm_kernel_slide;
 }
 
 extern tt_entry_t *arm_kva_to_tte(vm_offset_t va);
@@ -1471,6 +1476,9 @@ ml_static_protect(
 	if ((new_prot & VM_PROT_WRITE) && (new_prot & VM_PROT_EXECUTE)) {
 		panic("ml_static_protect(): WX request on %p", (void *) vaddr);
 	}
+	if (lockdown_done && (new_prot & VM_PROT_EXECUTE)) {
+		panic("ml_static_protect(): attempt to inject executable mapping on %p", (void *) vaddr);
+	}
 
 	/* Set up the protection bits, and block bits so we can validate block mappings. */
 	if (new_prot & VM_PROT_WRITE) {
@@ -1499,8 +1507,8 @@ ml_static_protect(
 			pt_entry_t      ptmp;
 
 #if XNU_MONITOR
-			assert(!TEST_PAGE_RATIO_4);
 			assert(!pmap_is_monitor(ppn));
+			assert(!TEST_PAGE_RATIO_4);
 #endif
 
 			tte2 = arm_kva_to_tte(vaddr_cur);
@@ -1552,7 +1560,6 @@ ml_static_protect(
 				}
 			} else {
 				ptmp = *pte_p;
-
 				/* We only need to update the page tables if the protections do not match. */
 				if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
 					ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
@@ -1579,11 +1586,25 @@ ml_static_protect(
 void
 ml_static_mfree(
 	vm_offset_t vaddr,
-	vm_size_t size)
+	vm_size_t   size)
 {
-	vm_offset_t     vaddr_cur;
-	ppnum_t         ppn;
-	uint32_t freed_pages = 0;
+	vm_offset_t vaddr_cur;
+	ppnum_t     ppn;
+	uint32_t    freed_pages = 0;
+	uint32_t    bad_page_cnt = 0;
+	uint32_t    freed_kernelcache_pages = 0;
+
+#if defined(__arm64__) && (DEVELOPMENT || DEBUG)
+	/* For testing hitting a bad ram page */
+	static int count = 0;
+	static int bad_at_cnt = -1;
+	static bool first = true;
+
+	if (first) {
+		(void)PE_parse_boot_argn("bad_static_mfree", &bad_at_cnt, sizeof(bad_at_cnt));
+		first = false;
+	}
+#endif /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
 
 	/* It is acceptable (if bad) to fail to free. */
 	if (vaddr < VM_MIN_KERNEL_ADDRESS) {
@@ -1607,24 +1628,33 @@ ml_static_mfree(
 				panic("Failed ml_static_mfree on %p", (void *) vaddr_cur);
 			}
 
-#if 0
-			/*
-			 * Must NOT tear down the "V==P" mapping for vaddr_cur as the zone alias scheme
-			 * relies on the persistence of these mappings for all time.
-			 */
-			// pmap_remove(kernel_pmap, (addr64_t) vaddr_cur, (addr64_t) (vaddr_cur + PAGE_SIZE));
-#endif
+#if defined(__arm64__)
+			bool is_bad = pmap_is_bad_ram(ppn);
+#if DEVELOPMENT || DEBUG
+			is_bad |= (count++ == bad_at_cnt);
+#endif /* DEVELOPMENT || DEBUG */
+
+			if (is_bad) {
+				++bad_page_cnt;
+				vm_page_create_retired(ppn);
+				continue;
+			}
+#endif /* defined(__arm64__) */
 
 			vm_page_create(ppn, (ppn + 1));
 			freed_pages++;
+			if (vaddr_cur >= segLOWEST && vaddr_cur < end_kern) {
+				freed_kernelcache_pages++;
+			}
 		}
 	}
 	vm_page_lockspin_queues();
 	vm_page_wire_count -= freed_pages;
 	vm_page_wire_count_initial -= freed_pages;
+	vm_page_kernelcache_count -= freed_kernelcache_pages;
 	vm_page_unlock_queues();
 #if     DEBUG
-	kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
+	kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x, +%d bad\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn, bad_page_cnt);
 #endif
 }
 
@@ -1822,9 +1852,9 @@ ml_set_decrementer(uint32_t dec_value)
 	cdp->cpu_decrementer = dec_value;
 
 	if (cdp->cpu_set_decrementer_func) {
-		((void (*)(uint32_t))cdp->cpu_set_decrementer_func)(dec_value);
+		cdp->cpu_set_decrementer_func(dec_value);
 	} else {
-		__asm__ volatile ("msr CNTP_TVAL_EL0, %0" : : "r"((uint64_t)dec_value));
+		__builtin_arm_wsr64("CNTV_TVAL_EL0", (uint64_t)dec_value);
 	}
 }
 
@@ -1834,10 +1864,10 @@ ml_get_hwclock()
 	uint64_t timebase;
 
 	// ISB required by ARMV7C.b section B8.1.2 & ARMv8 section D6.1.2
-	// "Reads of CNTPCT[_EL0] can occur speculatively and out of order relative
+	// "Reads of CNT[PV]CT[_EL0] can occur speculatively and out of order relative
 	// to other instructions executed on the same processor."
 	__builtin_arm_isb(ISB_SY);
-	timebase = __builtin_arm_rsr64("CNTPCT_EL0");
+	timebase = __builtin_arm_rsr64("CNTVCT_EL0");
 
 	return timebase;
 }
@@ -1848,6 +1878,25 @@ ml_get_timebase()
 	return ml_get_hwclock() + getCpuDatap()->cpu_base_timebase;
 }
 
+/*
+ * Get the speculative timebase without an ISB.
+ */
+uint64_t
+ml_get_speculative_timebase()
+{
+	uint64_t timebase;
+
+	timebase = __builtin_arm_rsr64("CNTVCT_EL0");
+
+	return timebase + getCpuDatap()->cpu_base_timebase;
+}
+
+uint64_t
+ml_get_timebase_entropy(void)
+{
+	return ml_get_speculative_timebase();
+}
+
 uint32_t
 ml_get_decrementer()
 {
@@ -1857,11 +1906,11 @@ ml_get_decrementer()
 	assert(ml_get_interrupts_enabled() == FALSE);
 
 	if (cdp->cpu_get_decrementer_func) {
-		dec = ((uint32_t (*)(void))cdp->cpu_get_decrementer_func)();
+		dec = cdp->cpu_get_decrementer_func();
 	} else {
 		uint64_t wide_val;
 
-		__asm__ volatile ("mrs %0, CNTP_TVAL_EL0" : "=r"(wide_val));
+		wide_val = __builtin_arm_rsr64("CNTV_TVAL_EL0");
 		dec = (uint32_t)wide_val;
 		assert(wide_val == (uint64_t)dec);
 	}
@@ -1872,24 +1921,8 @@ ml_get_decrementer()
 boolean_t
 ml_get_timer_pending()
 {
-	uint64_t cntp_ctl;
-
-	__asm__ volatile ("mrs %0, CNTP_CTL_EL0" : "=r"(cntp_ctl));
-	return ((cntp_ctl & CNTP_CTL_EL0_ISTATUS) != 0) ? TRUE : FALSE;
-}
-
-boolean_t
-ml_wants_panic_trap_to_debugger(void)
-{
-	boolean_t result = FALSE;
-#if XNU_MONITOR
-	/*
-	 * This looks racey, but if we are in the PPL, preemption will be
-	 * disabled.
-	 */
-	result = ((pmap_get_cpu_data()->ppl_state == PPL_STATE_DISPATCH) && pmap_ppl_locked_down);
-#endif
-	return result;
+	uint64_t cntv_ctl = __builtin_arm_rsr64("CNTV_CTL_EL0");
+	return ((cntv_ctl & CNTV_CTL_EL0_ISTATUS) != 0) ? TRUE : FALSE;
 }
 
 static void
@@ -1907,7 +1940,7 @@ cache_trap_error(thread_t thread, vm_map_address_t fault_addr)
 }
 
 static void
-cache_trap_recover()
+cache_trap_recover(void)
 {
 	vm_map_address_t fault_addr;
 
@@ -1920,7 +1953,8 @@ static void
 set_cache_trap_recover(thread_t thread)
 {
 #if defined(HAS_APPLE_PAC)
-	thread->recover = (vm_address_t)ptrauth_auth_and_resign(&cache_trap_recover,
+	void *fun = &cache_trap_recover;
+	thread->recover = (vm_address_t)ptrauth_auth_and_resign(fun,
 	    ptrauth_key_function_pointer, 0,
 	    ptrauth_key_function_pointer, ptrauth_blend_discriminator(&thread->recover, PAC_DISCRIMINATOR_RECOVER));
 #else /* defined(HAS_APPLE_PAC) */
@@ -2053,13 +2087,13 @@ _enable_timebase_event_stream(uint32_t bit_index)
 
 	/*
 	 * If the SOC supports it (and it isn't broken), enable
-	 * EL0 access to the physical timebase register.
+	 * EL0 access to the timebase registers.
 	 */
 	if (user_timebase_type() != USER_TIMEBASE_NONE) {
-		cntkctl |= CNTKCTL_EL1_PL0PCTEN;
+		cntkctl |= (CNTKCTL_EL1_PL0PCTEN | CNTKCTL_EL1_PL0VCTEN);
 	}
 
-	__asm__ volatile ("msr	CNTKCTL_EL1, %0" : : "r"(cntkctl));
+	__builtin_arm_wsr64("CNTKCTL_EL1", cntkctl);
 }
 
 /*
@@ -2068,31 +2102,48 @@ _enable_timebase_event_stream(uint32_t bit_index)
 static void
 _enable_virtual_timer(void)
 {
-	uint64_t cntvctl = CNTP_CTL_EL0_ENABLE; /* One wants to use 32 bits, but "mrs" prefers it this way */
+	uint64_t cntvctl = CNTV_CTL_EL0_ENABLE; /* One wants to use 32 bits, but "mrs" prefers it this way */
 
-	__asm__ volatile ("msr CNTP_CTL_EL0, %0" : : "r"(cntvctl));
+	__builtin_arm_wsr64("CNTV_CTL_EL0", cntvctl);
+	/* disable the physical timer as a precaution, as its registers reset to architecturally unknown values */
+	__builtin_arm_wsr64("CNTP_CTL_EL0", CNTP_CTL_EL0_IMASKED);
 }
 
-uint64_t events_per_sec = 0;
-
 void
 fiq_context_init(boolean_t enable_fiq __unused)
 {
-	_enable_timebase_event_stream(fiq_eventi);
-
 	/* Interrupts still disabled. */
 	assert(ml_get_interrupts_enabled() == FALSE);
 	_enable_virtual_timer();
 }
 
 void
-fiq_context_bootstrap(boolean_t enable_fiq)
+wfe_timeout_init(void)
+{
+	_enable_timebase_event_stream(arm64_eventi);
+}
+
+void
+wfe_timeout_configure(void)
 {
-#if defined(APPLE_ARM64_ARCH_FAMILY) || defined(BCM2837)
 	/* Could fill in our own ops here, if we needed them */
-	uint64_t        ticks_per_sec, ticks_per_event;
+	uint64_t        ticks_per_sec, ticks_per_event, events_per_sec = 0;
 	uint32_t        bit_index;
 
+	if (PE_parse_boot_argn("wfe_events_sec", &events_per_sec, sizeof(events_per_sec))) {
+		if (events_per_sec <= 0) {
+			events_per_sec = 1;
+		} else if (events_per_sec > USEC_PER_SEC) {
+			events_per_sec = USEC_PER_SEC;
+		}
+	} else {
+#if defined(ARM_BOARD_WFE_TIMEOUT_NS)
+		events_per_sec = NSEC_PER_SEC / ARM_BOARD_WFE_TIMEOUT_NS;
+#else /* !defined(ARM_BOARD_WFE_TIMEOUT_NS) */
+		/* Default to 1usec (or as close as we can get) */
+		events_per_sec = USEC_PER_SEC;
+#endif /* !defined(ARM_BOARD_WFE_TIMEOUT_NS) */
+	}
 	ticks_per_sec = gPEClockFrequencyInfo.timebase_frequency_hz;
 	ticks_per_event = ticks_per_sec / events_per_sec;
 	bit_index = flsll(ticks_per_event) - 1; /* Highest bit set */
@@ -2112,11 +2163,8 @@ fiq_context_bootstrap(boolean_t enable_fiq)
 		bit_index--;
 	}
 
-	fiq_eventi = bit_index;
-#else
-#error Need a board configuration.
-#endif
-	fiq_context_init(enable_fiq);
+	arm64_eventi = bit_index;
+	wfe_timeout_init();
 }
 
 boolean_t
@@ -2172,13 +2220,11 @@ ml_energy_stat(thread_t t)
 void
 ml_gpu_stat_update(__unused uint64_t gpu_ns_delta)
 {
-#if CONFIG_EMBEDDED
 	/*
 	 * For now: update the resource coalition stats of the
 	 * current thread's coalition
 	 */
 	task_coalition_update_gpu_stats(current_task(), gpu_ns_delta);
-#endif
 }
 
 uint64_t
@@ -2187,7 +2233,8 @@ ml_gpu_stat(__unused thread_t t)
 	return 0;
 }
 
-#if !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME
+#if !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME || HAS_FAST_CNTVCT
+
 static void
 timer_state_event(boolean_t switch_to_kernel)
 {
@@ -2196,8 +2243,8 @@ timer_state_event(boolean_t switch_to_kernel)
 		return;
 	}
 
-	processor_data_t *pd = &getCpuDatap()->cpu_processor->processor_data;
-	uint64_t now = ml_get_timebase();
+	processor_t pd = current_processor();
+	uint64_t now = ml_get_speculative_timebase();
 
 	timer_stop(pd->current_state, now);
 	pd->current_state = (switch_to_kernel) ? &pd->system_state : &pd->user_state;
@@ -2219,7 +2266,7 @@ timer_state_event_kernel_to_user(void)
 {
 	timer_state_event(FALSE);
 }
-#endif /* !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME */
+#endif /* !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME || HAS_FAST_CNTVCT */
 
 /*
  * The following are required for parts of the kernel
@@ -2296,14 +2343,14 @@ ex_cb_invoke(
 
 #if defined(HAS_APPLE_PAC)
 void
-ml_task_set_disable_user_jop(task_t task, boolean_t disable_user_jop)
+ml_task_set_disable_user_jop(task_t task, uint8_t disable_user_jop)
 {
 	assert(task);
 	task->disable_user_jop = disable_user_jop;
 }
 
 void
-ml_thread_set_disable_user_jop(thread_t thread, boolean_t disable_user_jop)
+ml_thread_set_disable_user_jop(thread_t thread, uint8_t disable_user_jop)
 {
 	assert(thread);
 	thread->machine.disable_user_jop = disable_user_jop;
@@ -2318,35 +2365,180 @@ ml_task_set_rop_pid(task_t task, task_t parent_task, boolean_t inherit)
 		task->rop_pid = early_random();
 	}
 }
-#endif /* defined(HAS_APPLE_PAC) */
 
+/**
+ * jop_pid may be inherited from the parent task or generated inside the shared
+ * region.  Unfortunately these two parameters are available at very different
+ * times during task creation, so we need to split this into two steps.
+ */
+void
+ml_task_set_jop_pid(task_t task, task_t parent_task, boolean_t inherit)
+{
+	if (inherit) {
+		task->jop_pid = parent_task->jop_pid;
+	} else {
+		task->jop_pid = ml_default_jop_pid();
+	}
+}
+
+void
+ml_task_set_jop_pid_from_shared_region(task_t task)
+{
+	vm_shared_region_t sr = vm_shared_region_get(task);
+	/*
+	 * If there's no shared region, we can assign the key arbitrarily.  This
+	 * typically happens when Mach-O image activation failed part of the way
+	 * through, and this task is in the middle of dying with SIGKILL anyway.
+	 */
+	if (__improbable(!sr)) {
+		task->jop_pid = early_random();
+		return;
+	}
+	vm_shared_region_deallocate(sr);
+
+	/*
+	 * Similarly we have to worry about jetsam having killed the task and
+	 * already cleared the shared_region_id.
+	 */
+	task_lock(task);
+	if (task->shared_region_id != NULL) {
+		task->jop_pid = shared_region_find_key(task->shared_region_id);
+	} else {
+		task->jop_pid = early_random();
+	}
+	task_unlock(task);
+}
+
+void
+ml_thread_set_jop_pid(thread_t thread, task_t task)
+{
+	thread->machine.jop_pid = task->jop_pid;
+}
+#endif /* defined(HAS_APPLE_PAC) */
 
 #if defined(HAS_APPLE_PAC)
+#define _ml_auth_ptr_unchecked(_ptr, _suffix, _modifier) \
+	asm volatile ("aut" #_suffix " %[ptr], %[modifier]" : [ptr] "+r"(_ptr) : [modifier] "r"(_modifier));
 
 /*
  * ml_auth_ptr_unchecked: call this instead of ptrauth_auth_data
  * instrinsic when you don't want to trap on auth fail.
  *
  */
-
 void *
 ml_auth_ptr_unchecked(void *ptr, ptrauth_key key, uint64_t modifier)
 {
 	switch (key & 0x3) {
 	case ptrauth_key_asia:
-		asm volatile ("autia %[ptr], %[modifier]" : [ptr] "+r"(ptr) : [modifier] "r"(modifier));
+		_ml_auth_ptr_unchecked(ptr, ia, modifier);
 		break;
 	case ptrauth_key_asib:
-		asm volatile ("autib %[ptr], %[modifier]" : [ptr] "+r"(ptr) : [modifier] "r"(modifier));
+		_ml_auth_ptr_unchecked(ptr, ib, modifier);
 		break;
 	case ptrauth_key_asda:
-		asm volatile ("autda %[ptr], %[modifier]" : [ptr] "+r"(ptr) : [modifier] "r"(modifier));
+		_ml_auth_ptr_unchecked(ptr, da, modifier);
 		break;
 	case ptrauth_key_asdb:
-		asm volatile ("autdb %[ptr], %[modifier]" : [ptr] "+r"(ptr) : [modifier] "r"(modifier));
+		_ml_auth_ptr_unchecked(ptr, db, modifier);
 		break;
 	}
 
 	return ptr;
 }
 #endif /* defined(HAS_APPLE_PAC) */
+
+#ifdef CONFIG_XNUPOST
+void
+ml_expect_fault_begin(expected_fault_handler_t expected_fault_handler, uintptr_t expected_fault_addr)
+{
+	thread_t thread = current_thread();
+	thread->machine.expected_fault_handler = expected_fault_handler;
+	thread->machine.expected_fault_addr = expected_fault_addr;
+}
+
+void
+ml_expect_fault_end(void)
+{
+	thread_t thread = current_thread();
+	thread->machine.expected_fault_handler = NULL;
+	thread->machine.expected_fault_addr = 0;
+}
+#endif /* CONFIG_XNUPOST */
+
+void
+ml_hibernate_active_pre(void)
+{
+#if HIBERNATION
+	if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) {
+
+		hibernate_rebuild_vm_structs();
+	}
+#endif /* HIBERNATION */
+}
+
+void
+ml_hibernate_active_post(void)
+{
+#if HIBERNATION
+	if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) {
+		hibernate_machine_init();
+		hibernate_vm_lock_end();
+		current_cpu_datap()->cpu_hibernate = 0;
+	}
+#endif /* HIBERNATION */
+}
+
+/**
+ * Return back a machine-dependent array of address space regions that should be
+ * reserved by the VM (pre-mapped in the address space). This will prevent user
+ * processes from allocating or deallocating from within these regions.
+ *
+ * @param vm_is64bit True if the process has a 64-bit address space.
+ * @param regions An out parameter representing an array of regions to reserve.
+ *
+ * @return The number of reserved regions returned through `regions`.
+ */
+size_t
+ml_get_vm_reserved_regions(bool vm_is64bit, struct vm_reserved_region **regions)
+{
+	assert(regions != NULL);
+
+	/**
+	 * Reserved regions only apply to 64-bit address spaces. This is because
+	 * we only expect to grow the maximum user VA address on 64-bit address spaces
+	 * (we've essentially already reached the max for 32-bit spaces). The reserved
+	 * regions should safely fall outside of the max user VA for 32-bit processes.
+	 */
+	if (vm_is64bit) {
+		*regions = vm_reserved_regions;
+		return ARRAY_COUNT(vm_reserved_regions);
+	} else {
+		/* Don't reserve any VA regions on arm64_32 processes. */
+		*regions = NULL;
+		return 0;
+	}
+}
+/* These WFE recommendations are expected to be updated on a relatively
+ * infrequent cadence, possibly from a different cluster, hence
+ * false cacheline sharing isn't expected to be material
+ */
+static uint64_t arm64_cluster_wfe_recs[MAX_CPU_CLUSTERS];
+
+uint32_t
+ml_update_cluster_wfe_recommendation(uint32_t wfe_cluster_id, uint64_t wfe_timeout_abstime_interval, __unused uint64_t wfe_hint_flags)
+{
+	assert(wfe_cluster_id < MAX_CPU_CLUSTERS);
+	assert(wfe_timeout_abstime_interval <= ml_wfe_hint_max_interval);
+	os_atomic_store(&arm64_cluster_wfe_recs[wfe_cluster_id], wfe_timeout_abstime_interval, relaxed);
+	return 0; /* Success */
+}
+
+uint64_t
+ml_cluster_wfe_timeout(uint32_t wfe_cluster_id)
+{
+	/* This and its consumer does not synchronize vis-a-vis updates
+	 * of the recommendation; races are acceptable.
+	 */
+	uint64_t wfet = os_atomic_load(&arm64_cluster_wfe_recs[wfe_cluster_id], relaxed);
+	return wfet;
+}