#include <arm/cpu_capabilities.h>
#include <console/serial_protos.h>
#include <kern/machine.h>
+#include <kern/misc_protos.h>
#include <prng/random.h>
#include <kern/startup.h>
#include <kern/thread.h>
#include <kern/timer_queue.h>
#include <mach/machine.h>
#include <machine/atomic.h>
+#include <machine/config.h>
#include <vm/pmap.h>
#include <vm/vm_page.h>
+#include <vm/vm_shared_region.h>
+#include <vm/vm_map.h>
+#include <sys/codesign.h>
#include <sys/kdebug.h>
#include <kern/coalition.h>
#include <pexpert/device_tree.h>
#include <IOKit/IOPlatformExpert.h>
+#if HIBERNATION
+#include <IOKit/IOHibernatePrivate.h>
+#endif /* HIBERNATION */
#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
-#include <libkern/kernel_mach_header.h>
+#include <arm64/amcc_rorgn.h>
#endif
#include <libkern/section_keywords.h>
+/**
+ * On supported hardware, debuggable builds make the HID bits read-only
+ * without locking them. This lets people manually modify HID bits while
+ * debugging, since they can use a debugging tool to first reset the HID
+ * bits back to read/write. However it will still catch xnu changes that
+ * accidentally write to HID bits after they've been made read-only.
+ */
+#if HAS_TWO_STAGE_SPR_LOCK && !(DEVELOPMENT || DEBUG)
+#define USE_TWO_STAGE_SPR_LOCK
+#endif
+
#if KPC
#include <kern/kpc.h>
#endif
+#define MPIDR_CPU_ID(mpidr_el1_val) (((mpidr_el1_val) & MPIDR_AFF0_MASK) >> MPIDR_AFF0_SHIFT)
+#define MPIDR_CLUSTER_ID(mpidr_el1_val) (((mpidr_el1_val) & MPIDR_AFF1_MASK) >> MPIDR_AFF1_SHIFT)
+
#if HAS_CLUSTER
static uint8_t cluster_initialized = 0;
#endif
-
-static int max_cpus_initialized = 0;
-#define MAX_CPUS_SET 0x1
-#define MAX_CPUS_WAIT 0x2
-
uint32_t LockTimeOut;
uint32_t LockTimeOutUsec;
uint64_t TLockTimeOut;
uint64_t MutexSpin;
-boolean_t is_clock_configured = FALSE;
+uint64_t low_MutexSpin;
+int64_t high_MutexSpin;
-uint32_t yield_delay_us = 0; /* Must be less than cpu_idle_latency to ensure ml_delay_should_spin is true */
+static uint64_t ml_wfe_hint_max_interval;
+#define MAX_WFE_HINT_INTERVAL_US (500ULL)
-#if CONFIG_NONFATAL_ASSERTS
-extern int mach_assert;
-#endif
-extern volatile uint32_t debug_enabled;
+/* Must be less than cpu_idle_latency to ensure ml_delay_should_spin is true */
+TUNABLE(uint32_t, yield_delay_us, "yield_delay_us", 0);
extern vm_offset_t segLOWEST;
extern vm_offset_t segLOWESTTEXT;
extern vm_offset_t segLASTB;
extern unsigned long segSizeLAST;
+/* ARM64 specific bounds; used to test for presence in the kernelcache. */
+extern vm_offset_t vm_kernelcache_base;
+extern vm_offset_t vm_kernelcache_top;
+
#if defined(HAS_IPI)
unsigned int gFastIPI = 1;
#define kDeferredIPITimerDefault (64 * NSEC_PER_USEC) /* in nanoseconds */
-static uint64_t deferred_ipi_timer_ns = kDeferredIPITimerDefault;
+static TUNABLE_WRITEABLE(uint64_t, deferred_ipi_timer_ns, "fastipitimeout",
+ kDeferredIPITimerDefault);
#endif /* defined(HAS_IPI) */
-void machine_conf(void);
-
thread_t Idle_context(void);
-SECURITY_READ_ONLY_LATE(static uint32_t) cpu_phys_ids[MAX_CPUS] = {[0 ... MAX_CPUS - 1] = (uint32_t)-1};
-SECURITY_READ_ONLY_LATE(static unsigned int) avail_cpus = 0;
-SECURITY_READ_ONLY_LATE(static int) boot_cpu = -1;
-SECURITY_READ_ONLY_LATE(static int) max_cpu_number = 0;
-SECURITY_READ_ONLY_LATE(cluster_type_t) boot_cluster = CLUSTER_TYPE_SMP;
+SECURITY_READ_ONLY_LATE(static ml_topology_cpu_t) topology_cpu_array[MAX_CPUS];
+SECURITY_READ_ONLY_LATE(static ml_topology_cluster_t) topology_cluster_array[MAX_CPU_CLUSTERS];
+SECURITY_READ_ONLY_LATE(static ml_topology_info_t) topology_info = {
+ .version = CPU_TOPOLOGY_VERSION,
+ .cpus = topology_cpu_array,
+ .clusters = topology_cluster_array,
+};
+/**
+ * Represents the offset of each cluster within a hypothetical array of MAX_CPUS
+ * entries of an arbitrary data type. This is intended for use by specialized consumers
+ * that must quickly access per-CPU data using only the physical CPU ID (MPIDR_EL1),
+ * as follows:
+ * hypothetical_array[cluster_offsets[AFF1] + AFF0]
+ * Most consumers should instead use general-purpose facilities such as PERCPU or
+ * ml_get_cpu_number().
+ */
+SECURITY_READ_ONLY_LATE(int64_t) cluster_offsets[MAX_CPU_CLUSTER_PHY_ID + 1];
+
+SECURITY_READ_ONLY_LATE(static uint32_t) arm64_eventi = UINT32_MAX;
-SECURITY_READ_ONLY_LATE(static uint32_t) fiq_eventi = UINT32_MAX;
+extern uint32_t lockdown_done;
-lockdown_handler_t lockdown_handler;
-void *lockdown_this;
-lck_mtx_t lockdown_handler_lck;
-lck_grp_t *lockdown_handler_grp;
-int lockdown_done;
+/**
+ * Represents regions of virtual address space that should be reserved
+ * (pre-mapped) in each user address space.
+ */
+SECURITY_READ_ONLY_LATE(static struct vm_reserved_region) vm_reserved_regions[] = {
+ {
+ .vmrr_name = "GPU Carveout",
+ .vmrr_addr = MACH_VM_MIN_GPU_CARVEOUT_ADDRESS,
+ .vmrr_size = (vm_map_size_t)(MACH_VM_MAX_GPU_CARVEOUT_ADDRESS - MACH_VM_MIN_GPU_CARVEOUT_ADDRESS)
+ },
+ /*
+ * Reserve the virtual memory space representing the commpage nesting region
+ * to prevent user processes from allocating memory within it. The actual
+ * page table entries for the commpage are inserted by vm_commpage_enter().
+ * This vm_map_enter() just prevents userspace from allocating/deallocating
+ * anything within the entire commpage nested region.
+ */
+ {
+ .vmrr_name = "commpage nesting",
+ .vmrr_addr = _COMM_PAGE64_NESTING_START,
+ .vmrr_size = _COMM_PAGE64_NESTING_SIZE
+ }
+};
-void ml_lockdown_init(void);
-void ml_lockdown_run_handler(void);
uint32_t get_arm_cpu_version(void);
#if defined(HAS_IPI)
* to a single CPU. Otherwise we may migrate between choosing which
* IPI mechanism to use and issuing the IPI. */
MRS(local_mpidr, "MPIDR_EL1");
- if ((local_mpidr & MPIDR_AFF1_MASK) == (cpu_mpidr & MPIDR_AFF1_MASK)) {
- uint64_t x = type | (cpu_mpidr & MPIDR_AFF0_MASK);
- MSR(ARM64_REG_IPI_RR_LOCAL, x);
+ if (MPIDR_CLUSTER_ID(local_mpidr) == MPIDR_CLUSTER_ID(cpu_mpidr)) {
+ uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr);
+ MSR("S3_5_C15_C0_0", x);
} else {
#define IPI_RR_TARGET_CLUSTER_SHIFT 16
- uint64_t x = type | ((cpu_mpidr & MPIDR_AFF1_MASK) << (IPI_RR_TARGET_CLUSTER_SHIFT - MPIDR_AFF1_SHIFT)) | (cpu_mpidr & MPIDR_AFF0_MASK);
- MSR(ARM64_REG_IPI_RR_GLOBAL, x);
+ uint64_t x = type | (MPIDR_CLUSTER_ID(cpu_mpidr) << IPI_RR_TARGET_CLUSTER_SHIFT) | MPIDR_CPU_ID(cpu_mpidr);
+ MSR("S3_5_C15_C0_1", x);
}
#else
- uint64_t x = type | (cpu_mpidr & MPIDR_AFF0_MASK);
- MSR(ARM64_REG_IPI_RR, x);
+ uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr);
+ MSR("S3_5_C15_C0_1", x);
#endif
}
#endif
/* update deferred_ipi_timer_ns with the new clamped value */
absolutetime_to_nanoseconds(abstime, &deferred_ipi_timer_ns);
- MSR(ARM64_REG_IPI_CR, abstime);
+ MSR("S3_5_C15_C3_1", abstime);
#else
(void)nanosecs;
panic("Platform does not support ACC Fast IPI");
void
machine_idle(void)
{
- __builtin_arm_wsr("DAIFSet", (DAIFSC_IRQF | DAIFSC_FIQF));
+ /* Interrupts are expected to be masked on entry or re-entry via
+ * Idle_load_context()
+ */
+ assert((__builtin_arm_rsr("DAIF") & DAIF_IRQF) == DAIF_IRQF);
Idle_context();
__builtin_arm_wsr("DAIFClr", (DAIFSC_IRQF | DAIFSC_FIQF));
}
-void
-init_vfp(void)
-{
- return;
-}
-
-boolean_t
-get_vfp_enabled(void)
-{
- return TRUE;
-}
-
void
OSSynchronizeIO(void)
{
return ((value & MIDR_EL1_REV_MASK) >> MIDR_EL1_REV_SHIFT) | ((value & MIDR_EL1_VAR_MASK) >> (MIDR_EL1_VAR_SHIFT - 4));
}
+bool
+ml_feature_supported(uint32_t feature_bit)
+{
+ uint64_t aidr_el1_value = 0;
+
+ MRS(aidr_el1_value, "AIDR_EL1");
+
+
+ return aidr_el1_value & feature_bit;
+}
+
/*
* user_cont_hwclock_allowed()
*
- * Indicates whether we allow EL0 to read the physical timebase (CNTPCT_EL0)
+ * Indicates whether we allow EL0 to read the virtual timebase (CNTVCT_EL0)
* as a continuous time source (e.g. from mach_continuous_time)
*/
boolean_t
return USER_TIMEBASE_SPEC;
}
-boolean_t
-arm64_wfe_allowed(void)
-{
- return TRUE;
-}
-
-#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
-
-uint64_t rorgn_begin __attribute__((section("__DATA, __const"))) = 0;
-uint64_t rorgn_end __attribute__((section("__DATA, __const"))) = 0;
-vm_offset_t amcc_base;
-
-static void assert_unlocked(void);
-static void assert_amcc_cache_disabled(void);
-static void lock_amcc(void);
-static void lock_mmu(uint64_t begin, uint64_t end);
-
-void
-rorgn_stash_range(void)
-{
-#if DEVELOPMENT || DEBUG
- boolean_t rorgn_disable = FALSE;
-
- PE_parse_boot_argn("-unsafe_kernel_text", &rorgn_disable, sizeof(rorgn_disable));
-
- if (rorgn_disable) {
- /* take early out if boot arg present, don't query any machine registers to avoid
- * dependency on amcc DT entry
- */
- return;
- }
-#endif
-
- /* Get the AMC values, and stash them into rorgn_begin, rorgn_end.
- * gPhysBase is the base of DRAM managed by xnu. we need DRAM_BASE as
- * the AMCC RO region begin/end registers are in units of 16KB page
- * numbers from DRAM_BASE so we'll truncate gPhysBase at 512MB granule
- * and assert the value is the canonical DRAM_BASE PA of 0x8_0000_0000 for arm64.
- */
-
- uint64_t dram_base = gPhysBase & ~0x1FFFFFFFULL; /* 512MB */
- assert(dram_base == 0x800000000ULL);
-
-#if defined(KERNEL_INTEGRITY_KTRR)
- uint64_t soc_base = 0;
- DTEntry entryP = NULL;
- uintptr_t *reg_prop = NULL;
- uint32_t prop_size = 0;
- int rc;
-
- soc_base = pe_arm_get_soc_base_phys();
- rc = DTFindEntry("name", "mcc", &entryP);
- assert(rc == kSuccess);
- rc = DTGetProperty(entryP, "reg", (void **)®_prop, &prop_size);
- assert(rc == kSuccess);
- amcc_base = ml_io_map(soc_base + *reg_prop, *(reg_prop + 1));
-#elif defined(KERNEL_INTEGRITY_CTRR)
- /* TODO: t8020 mcc entry not in device tree yet; we'll do it LIVE */
-#define TEMP_AMCC_BASE_PA 0x200000000ULL
-#define TEMP_AMCC_SZ 0x100000
- amcc_base = ml_io_map(TEMP_AMCC_BASE_PA, TEMP_AMCC_SZ);
-#else
-#error "KERNEL_INTEGRITY config error"
-#endif
-
-#if defined(KERNEL_INTEGRITY_KTRR)
- assert(rRORGNENDADDR > rRORGNBASEADDR);
- rorgn_begin = (rRORGNBASEADDR << AMCC_PGSHIFT) + dram_base;
- rorgn_end = (rRORGNENDADDR << AMCC_PGSHIFT) + dram_base;
-#elif defined(KERNEL_INTEGRITY_CTRR)
- rorgn_begin = rCTRR_AMCC_PLANE_REG(0, CTRR_A_BASEADDR);
- rorgn_end = rCTRR_AMCC_PLANE_REG(0, CTRR_A_ENDADDR);
- assert(rorgn_end > rorgn_begin);
-
- for (int i = 0; i < CTRR_AMCC_MAX_PLANES; ++i) {
- uint32_t begin = rCTRR_AMCC_PLANE_REG(i, CTRR_A_BASEADDR);
- uint32_t end = rCTRR_AMCC_PLANE_REG(i, CTRR_A_ENDADDR);
- if (!(begin == rorgn_begin && end == rorgn_end)) {
-#if DEVELOPMENT || DEBUG
- panic("iboot programmed CTRR bounds are inconsistent");
-#else
- panic("Inconsistent memory configuration");
-#endif
- }
- }
-
- // convert from page number from DRAM base to PA
- rorgn_begin = (rorgn_begin << AMCC_PGSHIFT) + dram_base;
- rorgn_end = (rorgn_end << AMCC_PGSHIFT) + dram_base;
-
-#else
-#error KERNEL_INTEGRITY config error
-#endif /* defined (KERNEL_INTEGRITY_KTRR) */
-}
-
-static void
-assert_unlocked()
-{
- uint64_t ktrr_lock = 0;
- uint32_t rorgn_lock = 0;
-
- assert(amcc_base);
-#if defined(KERNEL_INTEGRITY_KTRR)
- rorgn_lock = rRORGNLOCK;
- ktrr_lock = __builtin_arm_rsr64(ARM64_REG_KTRR_LOCK_EL1);
-#elif defined(KERNEL_INTEGRITY_CTRR)
- for (int i = 0; i < CTRR_AMCC_MAX_PLANES; ++i) {
- rorgn_lock |= rCTRR_AMCC_PLANE_REG(i, CTRR_A_LOCK);
- }
- ktrr_lock = __builtin_arm_rsr64(ARM64_REG_CTRR_LOCK_EL1);
-#else
-#error KERNEL_INTEGRITY config error
-#endif /* defined(KERNEL_INTEGRITY_KTRR) */
-
- assert(!ktrr_lock);
- assert(!rorgn_lock);
-}
-
-static void
-lock_amcc()
-{
-#if defined(KERNEL_INTEGRITY_KTRR)
- rRORGNLOCK = 1;
- __builtin_arm_isb(ISB_SY);
-#elif defined(KERNEL_INTEGRITY_CTRR)
- /* lockdown planes in reverse order as plane 0 should be locked last */
- for (int i = 0; i < CTRR_AMCC_MAX_PLANES; ++i) {
- rCTRR_AMCC_PLANE_REG(CTRR_AMCC_MAX_PLANES - i - 1, CTRR_A_ENABLE) = 1;
- rCTRR_AMCC_PLANE_REG(CTRR_AMCC_MAX_PLANES - i - 1, CTRR_A_LOCK) = 1;
- __builtin_arm_isb(ISB_SY);
- }
-#else
-#error KERNEL_INTEGRITY config error
-#endif
-}
-
-static void
-lock_mmu(uint64_t begin, uint64_t end)
-{
-#if defined(KERNEL_INTEGRITY_KTRR)
-
- __builtin_arm_wsr64(ARM64_REG_KTRR_LOWER_EL1, begin);
- __builtin_arm_wsr64(ARM64_REG_KTRR_UPPER_EL1, end);
- __builtin_arm_wsr64(ARM64_REG_KTRR_LOCK_EL1, 1ULL);
-
- /* flush TLB */
-
- __builtin_arm_isb(ISB_SY);
- flush_mmu_tlb();
-
-#elif defined (KERNEL_INTEGRITY_CTRR)
- /* this will lock the entire bootstrap cluster. non bootstrap clusters
- * will be locked by respective cluster master in start.s */
-
- __builtin_arm_wsr64(ARM64_REG_CTRR_A_LWR_EL1, begin);
- __builtin_arm_wsr64(ARM64_REG_CTRR_A_UPR_EL1, end);
-
-#if !defined(APPLEVORTEX)
- /* H12 changed sequence, must invalidate TLB immediately after setting CTRR bounds */
- __builtin_arm_isb(ISB_SY); /* ensure all prior MSRs are complete */
- flush_mmu_tlb();
-#endif /* !defined(APPLEVORTEX) */
-
- __builtin_arm_wsr64(ARM64_REG_CTRR_CTL_EL1, CTRR_CTL_EL1_A_PXN | CTRR_CTL_EL1_A_MMUON_WRPROTECT);
- __builtin_arm_wsr64(ARM64_REG_CTRR_LOCK_EL1, 1ULL);
-
- uint64_t current_el = __builtin_arm_rsr64("CurrentEL");
- if (current_el == PSR64_MODE_EL2) {
- // CTRR v2 has explicit registers for cluster config. they can only be written in EL2
-
- __builtin_arm_wsr64(ACC_CTRR_A_LWR_EL2, begin);
- __builtin_arm_wsr64(ACC_CTRR_A_UPR_EL2, end);
- __builtin_arm_wsr64(ACC_CTRR_CTL_EL2, CTRR_CTL_EL1_A_PXN | CTRR_CTL_EL1_A_MMUON_WRPROTECT);
- __builtin_arm_wsr64(ACC_CTRR_LOCK_EL2, 1ULL);
- }
-
- __builtin_arm_isb(ISB_SY); /* ensure all prior MSRs are complete */
-#if defined(APPLEVORTEX)
- flush_mmu_tlb();
-#endif /* defined(APPLEVORTEX) */
-
-#else /* defined(KERNEL_INTEGRITY_KTRR) */
-#error KERNEL_INTEGRITY config error
-#endif /* defined(KERNEL_INTEGRITY_KTRR) */
-}
-
-static void
-assert_amcc_cache_disabled()
-{
-#if defined(KERNEL_INTEGRITY_KTRR)
- assert((rMCCGEN & 1) == 0); /* assert M$ disabled or LLC clean will be unreliable */
-#elif defined(KERNEL_INTEGRITY_CTRR) && (defined(ARM64_BOARD_CONFIG_T8006))
- /*
- * T8006 differentiates between data and tag ways being powered up, so
- * make sure to check that both are zero on its single memory plane.
- */
- assert((rCTRR_AMCC_PLANE_REG(0, CTRR_AMCC_PWRONWAYCNTSTATUS) &
- (AMCC_CURTAGWAYCNT_MASK | AMCC_CURDATWAYCNT_MASK)) == 0);
-#elif defined (KERNEL_INTEGRITY_CTRR)
- for (int i = 0; i < CTRR_AMCC_MAX_PLANES; ++i) {
- assert(rCTRR_AMCC_PLANE_REG(i, CTRR_AMCC_WAYONCNT) == 0);
- }
-#else
-#error KERNEL_INTEGRITY config error
-#endif
-}
-
-/*
- * void rorgn_lockdown(void)
- *
- * Lock the MMU and AMCC RORegion within lower and upper boundaries if not already locked
- *
- * [ ] - ensure this is being called ASAP on secondary CPUs: KTRR programming and lockdown handled in
- * start.s:start_cpu() for subsequent wake/resume of all cores
- */
-void
-rorgn_lockdown(void)
-{
- vm_offset_t ktrr_begin, ktrr_end;
- unsigned long last_segsz;
-
-#if DEVELOPMENT || DEBUG
- boolean_t ktrr_disable = FALSE;
-
- PE_parse_boot_argn("-unsafe_kernel_text", &ktrr_disable, sizeof(ktrr_disable));
-
- if (ktrr_disable) {
- /*
- * take early out if boot arg present, since we may not have amcc DT entry present
- * we can't assert that iboot hasn't programmed the RO region lockdown registers
- */
- goto out;
- }
-#endif /* DEVELOPMENT || DEBUG */
-
- assert_unlocked();
-
- /* [x] - Use final method of determining all kernel text range or expect crashes */
- ktrr_begin = segLOWEST;
- assert(ktrr_begin && gVirtBase && gPhysBase);
-
- ktrr_begin = kvtophys(ktrr_begin);
-
- ktrr_end = kvtophys(segLASTB);
- last_segsz = segSizeLAST;
-#if defined(KERNEL_INTEGRITY_KTRR)
- /* __LAST is not part of the MMU KTRR region (it is however part of the AMCC KTRR region) */
- ktrr_end = (ktrr_end - 1) & ~AMCC_PGMASK;
- /* ensure that iboot and xnu agree on the ktrr range */
- assert(rorgn_begin == ktrr_begin && rorgn_end == (ktrr_end + last_segsz));
- /* assert that __LAST segment containing privileged insns is only a single page */
- assert(last_segsz == PAGE_SIZE);
-#elif defined(KERNEL_INTEGRITY_CTRR)
- ktrr_end = (ktrr_end + last_segsz - 1) & ~AMCC_PGMASK;
- /* __LAST is part of MMU CTRR region. Can't use the KTRR style method of making
- * __pinst no execute because PXN applies with MMU off in CTRR. */
- assert(rorgn_begin == ktrr_begin && rorgn_end == ktrr_end);
-#endif
-
-
-#if DEBUG || DEVELOPMENT
- printf("KTRR Begin: %p End: %p, setting lockdown\n", (void *)ktrr_begin, (void *)ktrr_end);
-#endif
-
- /* [x] - ensure all in flight writes are flushed to AMCC before enabling RO Region Lock */
-
- assert_amcc_cache_disabled();
-
- CleanPoC_DcacheRegion_Force(phystokv(ktrr_begin),
- (unsigned)((ktrr_end + last_segsz) - ktrr_begin + AMCC_PGMASK));
-
- lock_amcc();
-
- lock_mmu(ktrr_begin, ktrr_end);
-
-#if DEVELOPMENT || DEBUG
-out:
-#endif
-
-#if defined(KERNEL_INTEGRITY_CTRR)
- {
- /* wake any threads blocked on cluster master lockdown */
- cpu_data_t *cdp;
- uint64_t mpidr_el1_value;
-
- cdp = getCpuDatap();
- MRS(mpidr_el1_value, "MPIDR_EL1");
- cdp->cpu_cluster_id = (mpidr_el1_value & MPIDR_AFF1_MASK) >> MPIDR_AFF1_SHIFT;
- assert(cdp->cpu_cluster_id < __ARM_CLUSTER_COUNT__);
- ctrr_cluster_locked[cdp->cpu_cluster_id] = 1;
- thread_wakeup(&ctrr_cluster_locked[cdp->cpu_cluster_id]);
- }
-#endif
- /* now we can run lockdown handler */
- ml_lockdown_run_handler();
-}
-
-#endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
-
void
machine_startup(__unused boot_args * args)
{
- int boot_arg;
-
#if defined(HAS_IPI) && (DEVELOPMENT || DEBUG)
if (!PE_parse_boot_argn("fastipi", &gFastIPI, sizeof(gFastIPI))) {
gFastIPI = 1;
}
-
- PE_parse_boot_argn("fastipitimeout", &deferred_ipi_timer_ns, sizeof(deferred_ipi_timer_ns));
#endif /* defined(HAS_IPI) && (DEVELOPMENT || DEBUG)*/
-#if CONFIG_NONFATAL_ASSERTS
- PE_parse_boot_argn("assert", &mach_assert, sizeof(mach_assert));
-#endif
-
- if (PE_parse_boot_argn("preempt", &boot_arg, sizeof(boot_arg))) {
- default_preemption_rate = boot_arg;
- }
- if (PE_parse_boot_argn("bg_preempt", &boot_arg, sizeof(boot_arg))) {
- default_bg_preemption_rate = boot_arg;
- }
-
- PE_parse_boot_argn("yield_delay_us", &yield_delay_us, sizeof(yield_delay_us));
-
machine_conf();
/*
/* NOTREACHED */
}
+typedef void (*invalidate_fn_t)(void);
+
+static SECURITY_READ_ONLY_LATE(invalidate_fn_t) invalidate_hmac_function = NULL;
+
+void set_invalidate_hmac_function(invalidate_fn_t fn);
+
void
-machine_lockdown_preflight(void)
+set_invalidate_hmac_function(invalidate_fn_t fn)
{
-#if CONFIG_KERNEL_INTEGRITY
-
-#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
- rorgn_stash_range();
-#endif
+ if (NULL != invalidate_hmac_function) {
+ panic("Invalidate HMAC function already set");
+ }
-#endif
+ invalidate_hmac_function = fn;
}
void
machine_lockdown(void)
{
+ arm_vm_prot_finalize(PE_state.bootArgs);
+
#if CONFIG_KERNEL_INTEGRITY
#if KERNEL_INTEGRITY_WT
/* Watchtower
#endif /* CONFIG_KERNEL_INTEGRITY */
+
+
+ if (NULL != invalidate_hmac_function) {
+ invalidate_hmac_function();
+ }
+
+ lockdown_done = 1;
}
+
char *
machine_boot_info(
__unused char *buf,
return PE_boot_args();
}
-void
-machine_conf(void)
-{
- /*
- * This is known to be inaccurate. mem_size should always be capped at 2 GB
- */
- machine_info.memory_size = (uint32_t)mem_size;
-}
-
-void
-machine_init(void)
-{
- debug_log_init();
- clock_config();
- is_clock_configured = TRUE;
- if (debug_enabled) {
- pmap_map_globals();
- }
-}
-
void
slave_machine_init(__unused void *param)
{
return Shutdown_context(doshutdown, processor);
}
-/*
- * Routine: ml_init_max_cpus
- * Function:
- */
-void
-ml_init_max_cpus(unsigned int max_cpus)
-{
- boolean_t current_state;
-
- current_state = ml_set_interrupts_enabled(FALSE);
- if (max_cpus_initialized != MAX_CPUS_SET) {
- machine_info.max_cpus = max_cpus;
- machine_info.physical_cpu_max = max_cpus;
- machine_info.logical_cpu_max = max_cpus;
- if (max_cpus_initialized == MAX_CPUS_WAIT) {
- thread_wakeup((event_t) &max_cpus_initialized);
- }
- max_cpus_initialized = MAX_CPUS_SET;
- }
- (void) ml_set_interrupts_enabled(current_state);
-}
-
-/*
- * Routine: ml_get_max_cpus
- * Function:
- */
-unsigned int
-ml_get_max_cpus(void)
-{
- boolean_t current_state;
-
- current_state = ml_set_interrupts_enabled(FALSE);
- if (max_cpus_initialized != MAX_CPUS_SET) {
- max_cpus_initialized = MAX_CPUS_WAIT;
- assert_wait((event_t) &max_cpus_initialized, THREAD_UNINT);
- (void) thread_block(THREAD_CONTINUE_NULL);
- }
- (void) ml_set_interrupts_enabled(current_state);
- return machine_info.max_cpus;
-}
/*
* Routine: ml_init_lock_timeout
nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
}
MutexSpin = abstime;
+ low_MutexSpin = MutexSpin;
+
+
+ /*
+ * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
+ * real_ncpus is not set at this time
+ *
+ * NOTE: active spinning is disabled in arm. It can be activated
+ * by setting high_MutexSpin through the sysctl.
+ */
+ high_MutexSpin = low_MutexSpin;
+
+ nanoseconds_to_absolutetime(MAX_WFE_HINT_INTERVAL_US * NSEC_PER_USEC, &ml_wfe_hint_max_interval);
+}
+
+/*
+ * This is called when all of the ml_processor_info_t structures have been
+ * initialized and all the processors have been started through processor_start().
+ *
+ * Required by the scheduler subsystem.
+ */
+void
+ml_cpu_init_completed(void)
+{
}
/*
cpu_data_ptr->interrupt_handler = handler;
cpu_data_ptr->interrupt_refCon = refCon;
- cpu_data_ptr->interrupts_enabled = TRUE;
(void) ml_set_interrupts_enabled(current_state);
-
- initialize_screen(NULL, kPEAcquireScreen);
}
/*
}
}
+#define ML_READPROP_MANDATORY UINT64_MAX
+
+static uint64_t
+ml_readprop(const DTEntry entry, const char *propertyName, uint64_t default_value)
+{
+ void const *prop;
+ unsigned int propSize;
+
+ if (SecureDTGetProperty(entry, propertyName, &prop, &propSize) == kSuccess) {
+ if (propSize == sizeof(uint8_t)) {
+ return *((uint8_t const *)prop);
+ } else if (propSize == sizeof(uint16_t)) {
+ return *((uint16_t const *)prop);
+ } else if (propSize == sizeof(uint32_t)) {
+ return *((uint32_t const *)prop);
+ } else if (propSize == sizeof(uint64_t)) {
+ return *((uint64_t const *)prop);
+ } else {
+ panic("CPU property '%s' has bad size %u", propertyName, propSize);
+ }
+ } else {
+ if (default_value == ML_READPROP_MANDATORY) {
+ panic("Missing mandatory property '%s'", propertyName);
+ }
+ return default_value;
+ }
+}
+
+static boolean_t
+ml_read_reg_range(const DTEntry entry, const char *propertyName, uint64_t *pa_ptr, uint64_t *len_ptr)
+{
+ uint64_t const *prop;
+ unsigned int propSize;
+
+ if (SecureDTGetProperty(entry, propertyName, (void const **)&prop, &propSize) != kSuccess) {
+ return FALSE;
+ }
+
+ if (propSize != sizeof(uint64_t) * 2) {
+ panic("Wrong property size for %s", propertyName);
+ }
+
+ *pa_ptr = prop[0];
+ *len_ptr = prop[1];
+ return TRUE;
+}
+
+static boolean_t
+ml_is_boot_cpu(const DTEntry entry)
+{
+ void const *prop;
+ unsigned int propSize;
+
+ if (SecureDTGetProperty(entry, "state", &prop, &propSize) != kSuccess) {
+ panic("unable to retrieve state for cpu");
+ }
+
+ if (strncmp((char const *)prop, "running", propSize) == 0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+static void
+ml_read_chip_revision(unsigned int *rev __unused)
+{
+ // The CPU_VERSION_* macros are only defined on APPLE_ARM64_ARCH_FAMILY builds
+#ifdef APPLE_ARM64_ARCH_FAMILY
+ DTEntry entryP;
+
+ if ((SecureDTFindEntry("name", "arm-io", &entryP) == kSuccess)) {
+ *rev = (unsigned int)ml_readprop(entryP, "chip-revision", CPU_VERSION_UNKNOWN);
+ } else {
+ *rev = CPU_VERSION_UNKNOWN;
+ }
+#endif
+}
+
void
ml_parse_cpu_topology(void)
{
uint32_t cpu_boot_arg;
int err;
+ int64_t cluster_phys_to_logical[MAX_CPU_CLUSTER_PHY_ID + 1];
+ int64_t cluster_max_cpu_phys_id[MAX_CPU_CLUSTER_PHY_ID + 1];
cpu_boot_arg = MAX_CPUS;
-
PE_parse_boot_argn("cpus", &cpu_boot_arg, sizeof(cpu_boot_arg));
- err = DTLookupEntry(NULL, "/cpus", &entry);
+ err = SecureDTLookupEntry(NULL, "/cpus", &entry);
assert(err == kSuccess);
- err = DTInitEntryIterator(entry, &iter);
+ err = SecureDTInitEntryIterator(entry, &iter);
assert(err == kSuccess);
- while (kSuccess == DTIterateEntries(&iter, &child)) {
- unsigned int propSize;
- void *prop = NULL;
- int cpu_id = avail_cpus++;
+ for (int i = 0; i <= MAX_CPU_CLUSTER_PHY_ID; i++) {
+ cluster_offsets[i] = -1;
+ cluster_phys_to_logical[i] = -1;
+ cluster_max_cpu_phys_id[i] = 0;
+ }
+
+ while (kSuccess == SecureDTIterateEntries(&iter, &child)) {
+ boolean_t is_boot_cpu = ml_is_boot_cpu(child);
- if (kSuccess == DTGetProperty(child, "cpu-id", &prop, &propSize)) {
- cpu_id = *((int32_t*)prop);
+ // If the number of CPUs is constrained by the cpus= boot-arg, and the boot CPU hasn't
+ // been added to the topology struct yet, and we only have one slot left, then skip
+ // every other non-boot CPU in order to leave room for the boot CPU.
+ //
+ // e.g. if the boot-args say "cpus=3" and CPU4 is the boot CPU, then the cpus[]
+ // array will list CPU0, CPU1, and CPU4. CPU2-CPU3 and CPU5-CPUn will be omitted.
+ if (topology_info.num_cpus >= (cpu_boot_arg - 1) && topology_info.boot_cpu == NULL && !is_boot_cpu) {
+ continue;
+ }
+ if (topology_info.num_cpus >= cpu_boot_arg) {
+ break;
}
- assert(cpu_id < MAX_CPUS);
- assert(cpu_phys_ids[cpu_id] == (uint32_t)-1);
+ ml_topology_cpu_t *cpu = &topology_info.cpus[topology_info.num_cpus];
- if (boot_cpu == -1) {
- if (kSuccess != DTGetProperty(child, "state", &prop, &propSize)) {
- panic("unable to retrieve state for cpu %d", cpu_id);
- }
+ cpu->cpu_id = topology_info.num_cpus++;
+ assert(cpu->cpu_id < MAX_CPUS);
+ topology_info.max_cpu_id = MAX(topology_info.max_cpu_id, cpu->cpu_id);
- if (strncmp((char*)prop, "running", propSize) == 0) {
- boot_cpu = cpu_id;
- }
- }
- if (kSuccess != DTGetProperty(child, "reg", &prop, &propSize)) {
- panic("unable to retrieve physical ID for cpu %d", cpu_id);
+ cpu->die_id = (int)ml_readprop(child, "die-id", 0);
+ topology_info.max_die_id = MAX(topology_info.max_die_id, cpu->die_id);
+
+ cpu->phys_id = (uint32_t)ml_readprop(child, "reg", ML_READPROP_MANDATORY);
+
+ cpu->l2_access_penalty = (uint32_t)ml_readprop(child, "l2-access-penalty", 0);
+ cpu->l2_cache_size = (uint32_t)ml_readprop(child, "l2-cache-size", 0);
+ cpu->l2_cache_id = (uint32_t)ml_readprop(child, "l2-cache-id", 0);
+ cpu->l3_cache_size = (uint32_t)ml_readprop(child, "l3-cache-size", 0);
+ cpu->l3_cache_id = (uint32_t)ml_readprop(child, "l3-cache-id", 0);
+
+ ml_read_reg_range(child, "cpu-uttdbg-reg", &cpu->cpu_UTTDBG_pa, &cpu->cpu_UTTDBG_len);
+ ml_read_reg_range(child, "cpu-impl-reg", &cpu->cpu_IMPL_pa, &cpu->cpu_IMPL_len);
+ ml_read_reg_range(child, "coresight-reg", &cpu->coresight_pa, &cpu->coresight_len);
+ cpu->cluster_type = CLUSTER_TYPE_SMP;
+
+ int cluster_type = (int)ml_readprop(child, "cluster-type", 0);
+ if (cluster_type == 'E') {
+ cpu->cluster_type = CLUSTER_TYPE_E;
+ } else if (cluster_type == 'P') {
+ cpu->cluster_type = CLUSTER_TYPE_P;
}
- cpu_phys_ids[cpu_id] = *((uint32_t*)prop);
+ /*
+ * Since we want to keep a linear cluster ID space, we cannot just rely
+ * on the value provided by EDT. Instead, use the MPIDR value to see if we have
+ * seen this exact cluster before. If so, then reuse that cluster ID for this CPU.
+ */
+#if HAS_CLUSTER
+ uint32_t phys_cluster_id = MPIDR_CLUSTER_ID(cpu->phys_id);
+#else
+ uint32_t phys_cluster_id = (cpu->cluster_type == CLUSTER_TYPE_P);
+#endif
+ assert(phys_cluster_id <= MAX_CPU_CLUSTER_PHY_ID);
+ cpu->cluster_id = ((cluster_phys_to_logical[phys_cluster_id] == -1) ?
+ topology_info.num_clusters : cluster_phys_to_logical[phys_cluster_id]);
+
+ assert(cpu->cluster_id < MAX_CPU_CLUSTERS);
+
+ ml_topology_cluster_t *cluster = &topology_info.clusters[cpu->cluster_id];
+ if (cluster->num_cpus == 0) {
+ assert(topology_info.num_clusters < MAX_CPU_CLUSTERS);
+
+ topology_info.num_clusters++;
+ topology_info.max_cluster_id = MAX(topology_info.max_cluster_id, cpu->cluster_id);
+
+ cluster->cluster_id = cpu->cluster_id;
+ cluster->cluster_type = cpu->cluster_type;
+ cluster->first_cpu_id = cpu->cpu_id;
+ assert(cluster_phys_to_logical[phys_cluster_id] == -1);
+ cluster_phys_to_logical[phys_cluster_id] = cpu->cluster_id;
+
+ // Since we don't have a per-cluster EDT node, this is repeated in each CPU node.
+ // If we wind up with a bunch of these, we might want to create separate per-cluster
+ // EDT nodes and have the CPU nodes reference them through a phandle.
+ ml_read_reg_range(child, "acc-impl-reg", &cluster->acc_IMPL_pa, &cluster->acc_IMPL_len);
+ ml_read_reg_range(child, "cpm-impl-reg", &cluster->cpm_IMPL_pa, &cluster->cpm_IMPL_len);
+ }
- if ((cpu_id > max_cpu_number) && ((cpu_id == boot_cpu) || (avail_cpus <= cpu_boot_arg))) {
- max_cpu_number = cpu_id;
+#if HAS_CLUSTER
+ if (MPIDR_CPU_ID(cpu->phys_id) > cluster_max_cpu_phys_id[phys_cluster_id]) {
+ cluster_max_cpu_phys_id[phys_cluster_id] = MPIDR_CPU_ID(cpu->phys_id);
}
- }
+#endif
- if (avail_cpus > cpu_boot_arg) {
- avail_cpus = cpu_boot_arg;
- }
+ cpu->die_cluster_id = (int)ml_readprop(child, "die-cluster-id", MPIDR_CLUSTER_ID(cpu->phys_id));
+ cpu->cluster_core_id = (int)ml_readprop(child, "cluster-core-id", MPIDR_CPU_ID(cpu->phys_id));
+
+ cluster->num_cpus++;
+ cluster->cpu_mask |= 1ULL << cpu->cpu_id;
- if (avail_cpus == 0) {
- panic("No cpus found!");
+ if (is_boot_cpu) {
+ assert(topology_info.boot_cpu == NULL);
+ topology_info.boot_cpu = cpu;
+ topology_info.boot_cluster = cluster;
+ }
}
- if (boot_cpu == -1) {
- panic("unable to determine boot cpu!");
+#if HAS_CLUSTER
+ /*
+ * Build the cluster offset array, ensuring that the region reserved
+ * for each physical cluster contains enough entries to be indexed
+ * by the maximum physical CPU ID (AFF0) within the cluster.
+ */
+ unsigned int cur_cluster_offset = 0;
+ for (int i = 0; i <= MAX_CPU_CLUSTER_PHY_ID; i++) {
+ if (cluster_phys_to_logical[i] != -1) {
+ cluster_offsets[i] = cur_cluster_offset;
+ cur_cluster_offset += (cluster_max_cpu_phys_id[i] + 1);
+ }
}
+ assert(cur_cluster_offset <= MAX_CPUS);
+#else
+ /*
+ * For H10, there are really 2 physical clusters, but they are not separated
+ * into distinct ACCs. AFF1 therefore always reports 0, and AFF0 numbering
+ * is linear across both clusters. For the purpose of MPIDR_EL1-based indexing,
+ * treat H10 and earlier devices as though they contain a single cluster.
+ */
+ cluster_offsets[0] = 0;
+#endif
+ assert(topology_info.boot_cpu != NULL);
+ ml_read_chip_revision(&topology_info.chip_revision);
/*
* Set TPIDRRO_EL0 to indicate the correct cpu number, as we may
* per-cpu data object.
*/
assert(__builtin_arm_rsr64("TPIDRRO_EL0") == 0);
- __builtin_arm_wsr64("TPIDRRO_EL0", (uint64_t)boot_cpu);
+ __builtin_arm_wsr64("TPIDRRO_EL0", (uint64_t)topology_info.boot_cpu->cpu_id);
+}
+
+const ml_topology_info_t *
+ml_get_topology_info(void)
+{
+ return &topology_info;
+}
+
+void
+ml_map_cpu_pio(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < topology_info.num_cpus; i++) {
+ ml_topology_cpu_t *cpu = &topology_info.cpus[i];
+ if (cpu->cpu_IMPL_pa) {
+ cpu->cpu_IMPL_regs = (vm_offset_t)ml_io_map(cpu->cpu_IMPL_pa, cpu->cpu_IMPL_len);
+ cpu->coresight_regs = (vm_offset_t)ml_io_map(cpu->coresight_pa, cpu->coresight_len);
+ }
+ if (cpu->cpu_UTTDBG_pa) {
+ cpu->cpu_UTTDBG_regs = (vm_offset_t)ml_io_map(cpu->cpu_UTTDBG_pa, cpu->cpu_UTTDBG_len);
+ }
+ }
+
+ for (i = 0; i < topology_info.num_clusters; i++) {
+ ml_topology_cluster_t *cluster = &topology_info.clusters[i];
+ if (cluster->acc_IMPL_pa) {
+ cluster->acc_IMPL_regs = (vm_offset_t)ml_io_map(cluster->acc_IMPL_pa, cluster->acc_IMPL_len);
+ }
+ if (cluster->cpm_IMPL_pa) {
+ cluster->cpm_IMPL_regs = (vm_offset_t)ml_io_map(cluster->cpm_IMPL_pa, cluster->cpm_IMPL_len);
+ }
+ }
}
unsigned int
ml_get_cpu_count(void)
{
- return avail_cpus;
+ return topology_info.num_cpus;
+}
+
+unsigned int
+ml_get_cluster_count(void)
+{
+ return topology_info.num_clusters;
}
int
ml_get_boot_cpu_number(void)
{
- return boot_cpu;
+ return topology_info.boot_cpu->cpu_id;
}
cluster_type_t
ml_get_boot_cluster(void)
{
- return boot_cluster;
+ return topology_info.boot_cluster->cluster_type;
}
int
ml_get_cpu_number(uint32_t phys_id)
{
- for (int log_id = 0; log_id <= ml_get_max_cpu_number(); ++log_id) {
- if (cpu_phys_ids[log_id] == phys_id) {
- return log_id;
+ phys_id &= MPIDR_AFF1_MASK | MPIDR_AFF0_MASK;
+
+ for (unsigned i = 0; i < topology_info.num_cpus; i++) {
+ if (topology_info.cpus[i].phys_id == phys_id) {
+ return i;
}
}
+
return -1;
}
+int
+ml_get_cluster_number(uint32_t phys_id)
+{
+ int cpu_id = ml_get_cpu_number(phys_id);
+ if (cpu_id < 0) {
+ return -1;
+ }
+
+ ml_topology_cpu_t *cpu = &topology_info.cpus[cpu_id];
+
+ return cpu->cluster_id;
+}
+
+unsigned int
+ml_get_cpu_number_local(void)
+{
+ uint64_t mpidr_el1_value = 0;
+ unsigned cpu_id;
+
+ /* We identify the CPU based on the constant bits of MPIDR_EL1. */
+ MRS(mpidr_el1_value, "MPIDR_EL1");
+ cpu_id = ml_get_cpu_number((uint32_t)mpidr_el1_value);
+
+ assert(cpu_id <= (unsigned int)ml_get_max_cpu_number());
+
+ return cpu_id;
+}
+
+int
+ml_get_cluster_number_local()
+{
+ uint64_t mpidr_el1_value = 0;
+ unsigned cluster_id;
+
+ /* We identify the cluster based on the constant bits of MPIDR_EL1. */
+ MRS(mpidr_el1_value, "MPIDR_EL1");
+ cluster_id = ml_get_cluster_number((uint32_t)mpidr_el1_value);
+
+ assert(cluster_id <= (unsigned int)ml_get_max_cluster_number());
+
+ return cluster_id;
+}
+
int
ml_get_max_cpu_number(void)
{
- return max_cpu_number;
+ return topology_info.max_cpu_id;
}
+int
+ml_get_max_cluster_number(void)
+{
+ return topology_info.max_cluster_id;
+}
+
+unsigned int
+ml_get_first_cpu_id(unsigned int cluster_id)
+{
+ return topology_info.clusters[cluster_id].first_cpu_id;
+}
void
ml_lockdown_init()
{
- lockdown_handler_grp = lck_grp_alloc_init("lockdown_handler", NULL);
- assert(lockdown_handler_grp != NULL);
-
- lck_mtx_init(&lockdown_handler_lck, lockdown_handler_grp, NULL);
-
-#if defined(KERNEL_INTEGRITY_CTRR)
- init_ctrr_cpu_start_lock();
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
+ rorgn_stash_range();
#endif
}
kern_return_t
ml_lockdown_handler_register(lockdown_handler_t f, void *this)
{
- if (lockdown_handler || !f) {
+ if (!f) {
return KERN_FAILURE;
}
- lck_mtx_lock(&lockdown_handler_lck);
- lockdown_handler = f;
- lockdown_this = this;
-
-#if !(defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR))
- lockdown_done = 1;
- lockdown_handler(this);
-#else
- if (lockdown_done) {
- lockdown_handler(this);
- }
-#endif
- lck_mtx_unlock(&lockdown_handler_lck);
+ assert(lockdown_done);
+ f(this); // XXX: f this whole function
return KERN_SUCCESS;
}
-void
-ml_lockdown_run_handler()
-{
- lck_mtx_lock(&lockdown_handler_lck);
- assert(!lockdown_done);
-
- lockdown_done = 1;
- if (lockdown_handler) {
- lockdown_handler(lockdown_this);
- }
- lck_mtx_unlock(&lockdown_handler_lck);
-}
-
kern_return_t
ml_processor_register(ml_processor_info_t *in_processor_info,
processor_t *processor_out, ipi_handler_t *ipi_handler_out,
return KERN_FAILURE;
}
- if ((unsigned int)OSIncrementAtomic((SInt32*)®_cpu_count) >= avail_cpus) {
+ if ((unsigned)OSIncrementAtomic((SInt32*)®_cpu_count) >= topology_info.num_cpus) {
return KERN_FAILURE;
}
is_boot_cpu = TRUE;
}
- assert(in_processor_info->log_id < MAX_CPUS);
+ assert(in_processor_info->log_id <= (uint32_t)ml_get_max_cpu_number());
this_cpu_datap->cpu_id = in_processor_info->cpu_id;
}
if (!is_boot_cpu) {
- this_cpu_datap->cpu_number = in_processor_info->log_id;
+ this_cpu_datap->cpu_number = (unsigned short)(in_processor_info->log_id);
if (cpu_data_register(this_cpu_datap) != KERN_SUCCESS) {
goto processor_register_error;
}
}
- this_cpu_datap->cpu_idle_notify = (void *) in_processor_info->processor_idle;
- this_cpu_datap->cpu_cache_dispatch = in_processor_info->platform_cache_dispatch;
+ this_cpu_datap->cpu_idle_notify = in_processor_info->processor_idle;
+ this_cpu_datap->cpu_cache_dispatch = (cache_dispatch_t)in_processor_info->platform_cache_dispatch;
nanoseconds_to_absolutetime((uint64_t) in_processor_info->powergate_latency, &this_cpu_datap->cpu_idle_latency);
this_cpu_datap->cpu_reset_assist = kvtophys(in_processor_info->powergate_stub_addr);
- this_cpu_datap->idle_timer_notify = (void *) in_processor_info->idle_timer;
+ this_cpu_datap->idle_timer_notify = in_processor_info->idle_timer;
this_cpu_datap->idle_timer_refcon = in_processor_info->idle_timer_refcon;
- this_cpu_datap->platform_error_handler = (void *) in_processor_info->platform_error_handler;
+ this_cpu_datap->platform_error_handler = in_processor_info->platform_error_handler;
this_cpu_datap->cpu_regmap_paddr = in_processor_info->regmap_paddr;
this_cpu_datap->cpu_phys_id = in_processor_info->phys_id;
this_cpu_datap->cpu_l2_access_penalty = in_processor_info->l2_access_penalty;
this_cpu_datap->cluster_master = is_boot_cpu;
#endif /* HAS_CLUSTER */
+#if !defined(RC_HIDE_XNU_FIRESTORM) && (MAX_CPU_CLUSTERS > 2)
+ {
+ /* Workaround for the existing scheduler
+ * code, which only supports a limited number of psets.
+ *
+ * To get around that limitation, we distribute all cores into
+ * two psets according to their cluster type, instead of
+ * having a dedicated pset per cluster ID.
+ */
+
+ pset_cluster_type_t pset_cluster_type;
+
+ /* For this workaround, we don't expect seeing anything else
+ * than E or P clusters. */
+ switch (in_processor_info->cluster_type) {
+ case CLUSTER_TYPE_E:
+ pset_cluster_type = PSET_AMP_E;
+ break;
+ case CLUSTER_TYPE_P:
+ pset_cluster_type = PSET_AMP_P;
+ break;
+ default:
+ panic("unknown/unsupported cluster type %d", in_processor_info->cluster_type);
+ }
+
+ pset = pset_find_first_by_cluster_type(pset_cluster_type);
+
+ if (pset == NULL) {
+ panic("no pset for cluster type %d/%d", in_processor_info->cluster_type, pset_cluster_type);
+ }
+
+ kprintf("%s>chosen pset with cluster id %d cluster type %d for core:\n",
+ __FUNCTION__, pset->pset_cluster_id, pset->pset_cluster_type);
+ }
+#else /* !defined(RC_HIDE_XNU_FIRESTORM) && (MAX_CPU_CLUSTERS > 2) */
pset = pset_find(in_processor_info->cluster_id, processor_pset(master_processor));
+#endif /* !defined(RC_HIDE_XNU_FIRESTORM) && (MAX_CPU_CLUSTERS > 2) */
+
assert(pset != NULL);
kprintf("%s>cpu_id %p cluster_id %d cpu_number %d is type %d\n", __FUNCTION__, in_processor_info->cpu_id, in_processor_info->cluster_id, this_cpu_datap->cpu_number, in_processor_info->cluster_type);
+ processor_t processor = PERCPU_GET_RELATIVE(processor, cpu_data, this_cpu_datap);
if (!is_boot_cpu) {
- processor_init((struct processor *)this_cpu_datap->cpu_processor,
- this_cpu_datap->cpu_number, pset);
+ processor_init(processor, this_cpu_datap->cpu_number, pset);
if (this_cpu_datap->cpu_l2_access_penalty) {
/*
* scheduler, so that threads use the cores with better L2
* preferentially.
*/
- processor_set_primary(this_cpu_datap->cpu_processor,
- master_processor);
+ processor_set_primary(processor, master_processor);
}
}
- *processor_out = this_cpu_datap->cpu_processor;
+ *processor_out = processor;
*ipi_handler_out = cpu_signal_handler;
#if CPMU_AIC_PMI && MONOTONIC
*pmi_handler_out = mt_cpmu_aic_pmi;
return io_map(phys_addr, size, VM_WIMG_WCOMB);
}
+void
+ml_io_unmap(vm_offset_t addr, vm_size_t sz)
+{
+ pmap_remove(kernel_pmap, addr, addr + sz);
+ kmem_free(kernel_map, addr, sz);
+}
+
/* boot memory allocation */
vm_offset_t
ml_static_malloc(
ml_static_slide(
vm_offset_t vaddr)
{
- return phystokv(vaddr + vm_kernel_slide - gVirtBase + gPhysBase);
+ vm_offset_t slid_vaddr = vaddr + vm_kernel_slide;
+
+ if ((slid_vaddr < vm_kernelcache_base) || (slid_vaddr >= vm_kernelcache_top)) {
+ /* This is only intended for use on kernelcache addresses. */
+ return 0;
+ }
+
+ /*
+ * Because the address is in the kernelcache, we can do a simple
+ * slide calculation.
+ */
+ return slid_vaddr;
}
vm_offset_t
ml_static_unslide(
vm_offset_t vaddr)
{
- return ml_static_vtop(vaddr) - gPhysBase + gVirtBase - vm_kernel_slide;
+ if ((vaddr < vm_kernelcache_base) || (vaddr >= vm_kernelcache_top)) {
+ /* This is only intended for use on kernelcache addresses. */
+ return 0;
+ }
+
+ return vaddr - vm_kernel_slide;
}
extern tt_entry_t *arm_kva_to_tte(vm_offset_t va);
if ((new_prot & VM_PROT_WRITE) && (new_prot & VM_PROT_EXECUTE)) {
panic("ml_static_protect(): WX request on %p", (void *) vaddr);
}
+ if (lockdown_done && (new_prot & VM_PROT_EXECUTE)) {
+ panic("ml_static_protect(): attempt to inject executable mapping on %p", (void *) vaddr);
+ }
/* Set up the protection bits, and block bits so we can validate block mappings. */
if (new_prot & VM_PROT_WRITE) {
pt_entry_t ptmp;
#if XNU_MONITOR
- assert(!TEST_PAGE_RATIO_4);
assert(!pmap_is_monitor(ppn));
+ assert(!TEST_PAGE_RATIO_4);
#endif
tte2 = arm_kva_to_tte(vaddr_cur);
}
} else {
ptmp = *pte_p;
-
/* We only need to update the page tables if the protections do not match. */
if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
void
ml_static_mfree(
vm_offset_t vaddr,
- vm_size_t size)
+ vm_size_t size)
{
- vm_offset_t vaddr_cur;
- ppnum_t ppn;
- uint32_t freed_pages = 0;
+ vm_offset_t vaddr_cur;
+ ppnum_t ppn;
+ uint32_t freed_pages = 0;
+ uint32_t bad_page_cnt = 0;
+ uint32_t freed_kernelcache_pages = 0;
+
+#if defined(__arm64__) && (DEVELOPMENT || DEBUG)
+ /* For testing hitting a bad ram page */
+ static int count = 0;
+ static int bad_at_cnt = -1;
+ static bool first = true;
+
+ if (first) {
+ (void)PE_parse_boot_argn("bad_static_mfree", &bad_at_cnt, sizeof(bad_at_cnt));
+ first = false;
+ }
+#endif /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
/* It is acceptable (if bad) to fail to free. */
if (vaddr < VM_MIN_KERNEL_ADDRESS) {
panic("Failed ml_static_mfree on %p", (void *) vaddr_cur);
}
-#if 0
- /*
- * Must NOT tear down the "V==P" mapping for vaddr_cur as the zone alias scheme
- * relies on the persistence of these mappings for all time.
- */
- // pmap_remove(kernel_pmap, (addr64_t) vaddr_cur, (addr64_t) (vaddr_cur + PAGE_SIZE));
-#endif
+#if defined(__arm64__)
+ bool is_bad = pmap_is_bad_ram(ppn);
+#if DEVELOPMENT || DEBUG
+ is_bad |= (count++ == bad_at_cnt);
+#endif /* DEVELOPMENT || DEBUG */
+
+ if (is_bad) {
+ ++bad_page_cnt;
+ vm_page_create_retired(ppn);
+ continue;
+ }
+#endif /* defined(__arm64__) */
vm_page_create(ppn, (ppn + 1));
freed_pages++;
+ if (vaddr_cur >= segLOWEST && vaddr_cur < end_kern) {
+ freed_kernelcache_pages++;
+ }
}
}
vm_page_lockspin_queues();
vm_page_wire_count -= freed_pages;
vm_page_wire_count_initial -= freed_pages;
+ vm_page_kernelcache_count -= freed_kernelcache_pages;
vm_page_unlock_queues();
#if DEBUG
- kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
+ kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x, +%d bad\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn, bad_page_cnt);
#endif
}
cdp->cpu_decrementer = dec_value;
if (cdp->cpu_set_decrementer_func) {
- ((void (*)(uint32_t))cdp->cpu_set_decrementer_func)(dec_value);
+ cdp->cpu_set_decrementer_func(dec_value);
} else {
- __asm__ volatile ("msr CNTP_TVAL_EL0, %0" : : "r"((uint64_t)dec_value));
+ __builtin_arm_wsr64("CNTV_TVAL_EL0", (uint64_t)dec_value);
}
}
uint64_t timebase;
// ISB required by ARMV7C.b section B8.1.2 & ARMv8 section D6.1.2
- // "Reads of CNTPCT[_EL0] can occur speculatively and out of order relative
+ // "Reads of CNT[PV]CT[_EL0] can occur speculatively and out of order relative
// to other instructions executed on the same processor."
__builtin_arm_isb(ISB_SY);
- timebase = __builtin_arm_rsr64("CNTPCT_EL0");
+ timebase = __builtin_arm_rsr64("CNTVCT_EL0");
return timebase;
}
return ml_get_hwclock() + getCpuDatap()->cpu_base_timebase;
}
+/*
+ * Get the speculative timebase without an ISB.
+ */
+uint64_t
+ml_get_speculative_timebase()
+{
+ uint64_t timebase;
+
+ timebase = __builtin_arm_rsr64("CNTVCT_EL0");
+
+ return timebase + getCpuDatap()->cpu_base_timebase;
+}
+
+uint64_t
+ml_get_timebase_entropy(void)
+{
+ return ml_get_speculative_timebase();
+}
+
uint32_t
ml_get_decrementer()
{
assert(ml_get_interrupts_enabled() == FALSE);
if (cdp->cpu_get_decrementer_func) {
- dec = ((uint32_t (*)(void))cdp->cpu_get_decrementer_func)();
+ dec = cdp->cpu_get_decrementer_func();
} else {
uint64_t wide_val;
- __asm__ volatile ("mrs %0, CNTP_TVAL_EL0" : "=r"(wide_val));
+ wide_val = __builtin_arm_rsr64("CNTV_TVAL_EL0");
dec = (uint32_t)wide_val;
assert(wide_val == (uint64_t)dec);
}
boolean_t
ml_get_timer_pending()
{
- uint64_t cntp_ctl;
-
- __asm__ volatile ("mrs %0, CNTP_CTL_EL0" : "=r"(cntp_ctl));
- return ((cntp_ctl & CNTP_CTL_EL0_ISTATUS) != 0) ? TRUE : FALSE;
-}
-
-boolean_t
-ml_wants_panic_trap_to_debugger(void)
-{
- boolean_t result = FALSE;
-#if XNU_MONITOR
- /*
- * This looks racey, but if we are in the PPL, preemption will be
- * disabled.
- */
- result = ((pmap_get_cpu_data()->ppl_state == PPL_STATE_DISPATCH) && pmap_ppl_locked_down);
-#endif
- return result;
+ uint64_t cntv_ctl = __builtin_arm_rsr64("CNTV_CTL_EL0");
+ return ((cntv_ctl & CNTV_CTL_EL0_ISTATUS) != 0) ? TRUE : FALSE;
}
static void
}
static void
-cache_trap_recover()
+cache_trap_recover(void)
{
vm_map_address_t fault_addr;
set_cache_trap_recover(thread_t thread)
{
#if defined(HAS_APPLE_PAC)
- thread->recover = (vm_address_t)ptrauth_auth_and_resign(&cache_trap_recover,
+ void *fun = &cache_trap_recover;
+ thread->recover = (vm_address_t)ptrauth_auth_and_resign(fun,
ptrauth_key_function_pointer, 0,
ptrauth_key_function_pointer, ptrauth_blend_discriminator(&thread->recover, PAC_DISCRIMINATOR_RECOVER));
#else /* defined(HAS_APPLE_PAC) */
/*
* If the SOC supports it (and it isn't broken), enable
- * EL0 access to the physical timebase register.
+ * EL0 access to the timebase registers.
*/
if (user_timebase_type() != USER_TIMEBASE_NONE) {
- cntkctl |= CNTKCTL_EL1_PL0PCTEN;
+ cntkctl |= (CNTKCTL_EL1_PL0PCTEN | CNTKCTL_EL1_PL0VCTEN);
}
- __asm__ volatile ("msr CNTKCTL_EL1, %0" : : "r"(cntkctl));
+ __builtin_arm_wsr64("CNTKCTL_EL1", cntkctl);
}
/*
static void
_enable_virtual_timer(void)
{
- uint64_t cntvctl = CNTP_CTL_EL0_ENABLE; /* One wants to use 32 bits, but "mrs" prefers it this way */
+ uint64_t cntvctl = CNTV_CTL_EL0_ENABLE; /* One wants to use 32 bits, but "mrs" prefers it this way */
- __asm__ volatile ("msr CNTP_CTL_EL0, %0" : : "r"(cntvctl));
+ __builtin_arm_wsr64("CNTV_CTL_EL0", cntvctl);
+ /* disable the physical timer as a precaution, as its registers reset to architecturally unknown values */
+ __builtin_arm_wsr64("CNTP_CTL_EL0", CNTP_CTL_EL0_IMASKED);
}
-uint64_t events_per_sec = 0;
-
void
fiq_context_init(boolean_t enable_fiq __unused)
{
- _enable_timebase_event_stream(fiq_eventi);
-
/* Interrupts still disabled. */
assert(ml_get_interrupts_enabled() == FALSE);
_enable_virtual_timer();
}
void
-fiq_context_bootstrap(boolean_t enable_fiq)
+wfe_timeout_init(void)
+{
+ _enable_timebase_event_stream(arm64_eventi);
+}
+
+void
+wfe_timeout_configure(void)
{
-#if defined(APPLE_ARM64_ARCH_FAMILY) || defined(BCM2837)
/* Could fill in our own ops here, if we needed them */
- uint64_t ticks_per_sec, ticks_per_event;
+ uint64_t ticks_per_sec, ticks_per_event, events_per_sec = 0;
uint32_t bit_index;
+ if (PE_parse_boot_argn("wfe_events_sec", &events_per_sec, sizeof(events_per_sec))) {
+ if (events_per_sec <= 0) {
+ events_per_sec = 1;
+ } else if (events_per_sec > USEC_PER_SEC) {
+ events_per_sec = USEC_PER_SEC;
+ }
+ } else {
+#if defined(ARM_BOARD_WFE_TIMEOUT_NS)
+ events_per_sec = NSEC_PER_SEC / ARM_BOARD_WFE_TIMEOUT_NS;
+#else /* !defined(ARM_BOARD_WFE_TIMEOUT_NS) */
+ /* Default to 1usec (or as close as we can get) */
+ events_per_sec = USEC_PER_SEC;
+#endif /* !defined(ARM_BOARD_WFE_TIMEOUT_NS) */
+ }
ticks_per_sec = gPEClockFrequencyInfo.timebase_frequency_hz;
ticks_per_event = ticks_per_sec / events_per_sec;
bit_index = flsll(ticks_per_event) - 1; /* Highest bit set */
bit_index--;
}
- fiq_eventi = bit_index;
-#else
-#error Need a board configuration.
-#endif
- fiq_context_init(enable_fiq);
+ arm64_eventi = bit_index;
+ wfe_timeout_init();
}
boolean_t
void
ml_gpu_stat_update(__unused uint64_t gpu_ns_delta)
{
-#if CONFIG_EMBEDDED
/*
* For now: update the resource coalition stats of the
* current thread's coalition
*/
task_coalition_update_gpu_stats(current_task(), gpu_ns_delta);
-#endif
}
uint64_t
return 0;
}
-#if !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME
+#if !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME || HAS_FAST_CNTVCT
+
static void
timer_state_event(boolean_t switch_to_kernel)
{
return;
}
- processor_data_t *pd = &getCpuDatap()->cpu_processor->processor_data;
- uint64_t now = ml_get_timebase();
+ processor_t pd = current_processor();
+ uint64_t now = ml_get_speculative_timebase();
timer_stop(pd->current_state, now);
pd->current_state = (switch_to_kernel) ? &pd->system_state : &pd->user_state;
{
timer_state_event(FALSE);
}
-#endif /* !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME */
+#endif /* !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME || HAS_FAST_CNTVCT */
/*
* The following are required for parts of the kernel
#if defined(HAS_APPLE_PAC)
void
-ml_task_set_disable_user_jop(task_t task, boolean_t disable_user_jop)
+ml_task_set_disable_user_jop(task_t task, uint8_t disable_user_jop)
{
assert(task);
task->disable_user_jop = disable_user_jop;
}
void
-ml_thread_set_disable_user_jop(thread_t thread, boolean_t disable_user_jop)
+ml_thread_set_disable_user_jop(thread_t thread, uint8_t disable_user_jop)
{
assert(thread);
thread->machine.disable_user_jop = disable_user_jop;
task->rop_pid = early_random();
}
}
-#endif /* defined(HAS_APPLE_PAC) */
+/**
+ * jop_pid may be inherited from the parent task or generated inside the shared
+ * region. Unfortunately these two parameters are available at very different
+ * times during task creation, so we need to split this into two steps.
+ */
+void
+ml_task_set_jop_pid(task_t task, task_t parent_task, boolean_t inherit)
+{
+ if (inherit) {
+ task->jop_pid = parent_task->jop_pid;
+ } else {
+ task->jop_pid = ml_default_jop_pid();
+ }
+}
+
+void
+ml_task_set_jop_pid_from_shared_region(task_t task)
+{
+ vm_shared_region_t sr = vm_shared_region_get(task);
+ /*
+ * If there's no shared region, we can assign the key arbitrarily. This
+ * typically happens when Mach-O image activation failed part of the way
+ * through, and this task is in the middle of dying with SIGKILL anyway.
+ */
+ if (__improbable(!sr)) {
+ task->jop_pid = early_random();
+ return;
+ }
+ vm_shared_region_deallocate(sr);
+
+ /*
+ * Similarly we have to worry about jetsam having killed the task and
+ * already cleared the shared_region_id.
+ */
+ task_lock(task);
+ if (task->shared_region_id != NULL) {
+ task->jop_pid = shared_region_find_key(task->shared_region_id);
+ } else {
+ task->jop_pid = early_random();
+ }
+ task_unlock(task);
+}
+
+void
+ml_thread_set_jop_pid(thread_t thread, task_t task)
+{
+ thread->machine.jop_pid = task->jop_pid;
+}
+#endif /* defined(HAS_APPLE_PAC) */
#if defined(HAS_APPLE_PAC)
+#define _ml_auth_ptr_unchecked(_ptr, _suffix, _modifier) \
+ asm volatile ("aut" #_suffix " %[ptr], %[modifier]" : [ptr] "+r"(_ptr) : [modifier] "r"(_modifier));
/*
* ml_auth_ptr_unchecked: call this instead of ptrauth_auth_data
* instrinsic when you don't want to trap on auth fail.
*
*/
-
void *
ml_auth_ptr_unchecked(void *ptr, ptrauth_key key, uint64_t modifier)
{
switch (key & 0x3) {
case ptrauth_key_asia:
- asm volatile ("autia %[ptr], %[modifier]" : [ptr] "+r"(ptr) : [modifier] "r"(modifier));
+ _ml_auth_ptr_unchecked(ptr, ia, modifier);
break;
case ptrauth_key_asib:
- asm volatile ("autib %[ptr], %[modifier]" : [ptr] "+r"(ptr) : [modifier] "r"(modifier));
+ _ml_auth_ptr_unchecked(ptr, ib, modifier);
break;
case ptrauth_key_asda:
- asm volatile ("autda %[ptr], %[modifier]" : [ptr] "+r"(ptr) : [modifier] "r"(modifier));
+ _ml_auth_ptr_unchecked(ptr, da, modifier);
break;
case ptrauth_key_asdb:
- asm volatile ("autdb %[ptr], %[modifier]" : [ptr] "+r"(ptr) : [modifier] "r"(modifier));
+ _ml_auth_ptr_unchecked(ptr, db, modifier);
break;
}
return ptr;
}
#endif /* defined(HAS_APPLE_PAC) */
+
+#ifdef CONFIG_XNUPOST
+void
+ml_expect_fault_begin(expected_fault_handler_t expected_fault_handler, uintptr_t expected_fault_addr)
+{
+ thread_t thread = current_thread();
+ thread->machine.expected_fault_handler = expected_fault_handler;
+ thread->machine.expected_fault_addr = expected_fault_addr;
+}
+
+void
+ml_expect_fault_end(void)
+{
+ thread_t thread = current_thread();
+ thread->machine.expected_fault_handler = NULL;
+ thread->machine.expected_fault_addr = 0;
+}
+#endif /* CONFIG_XNUPOST */
+
+void
+ml_hibernate_active_pre(void)
+{
+#if HIBERNATION
+ if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) {
+
+ hibernate_rebuild_vm_structs();
+ }
+#endif /* HIBERNATION */
+}
+
+void
+ml_hibernate_active_post(void)
+{
+#if HIBERNATION
+ if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) {
+ hibernate_machine_init();
+ hibernate_vm_lock_end();
+ current_cpu_datap()->cpu_hibernate = 0;
+ }
+#endif /* HIBERNATION */
+}
+
+/**
+ * Return back a machine-dependent array of address space regions that should be
+ * reserved by the VM (pre-mapped in the address space). This will prevent user
+ * processes from allocating or deallocating from within these regions.
+ *
+ * @param vm_is64bit True if the process has a 64-bit address space.
+ * @param regions An out parameter representing an array of regions to reserve.
+ *
+ * @return The number of reserved regions returned through `regions`.
+ */
+size_t
+ml_get_vm_reserved_regions(bool vm_is64bit, struct vm_reserved_region **regions)
+{
+ assert(regions != NULL);
+
+ /**
+ * Reserved regions only apply to 64-bit address spaces. This is because
+ * we only expect to grow the maximum user VA address on 64-bit address spaces
+ * (we've essentially already reached the max for 32-bit spaces). The reserved
+ * regions should safely fall outside of the max user VA for 32-bit processes.
+ */
+ if (vm_is64bit) {
+ *regions = vm_reserved_regions;
+ return ARRAY_COUNT(vm_reserved_regions);
+ } else {
+ /* Don't reserve any VA regions on arm64_32 processes. */
+ *regions = NULL;
+ return 0;
+ }
+}
+/* These WFE recommendations are expected to be updated on a relatively
+ * infrequent cadence, possibly from a different cluster, hence
+ * false cacheline sharing isn't expected to be material
+ */
+static uint64_t arm64_cluster_wfe_recs[MAX_CPU_CLUSTERS];
+
+uint32_t
+ml_update_cluster_wfe_recommendation(uint32_t wfe_cluster_id, uint64_t wfe_timeout_abstime_interval, __unused uint64_t wfe_hint_flags)
+{
+ assert(wfe_cluster_id < MAX_CPU_CLUSTERS);
+ assert(wfe_timeout_abstime_interval <= ml_wfe_hint_max_interval);
+ os_atomic_store(&arm64_cluster_wfe_recs[wfe_cluster_id], wfe_timeout_abstime_interval, relaxed);
+ return 0; /* Success */
+}
+
+uint64_t
+ml_cluster_wfe_timeout(uint32_t wfe_cluster_id)
+{
+ /* This and its consumer does not synchronize vis-a-vis updates
+ * of the recommendation; races are acceptable.
+ */
+ uint64_t wfet = os_atomic_load(&arm64_cluster_wfe_recs[wfe_cluster_id], relaxed);
+ return wfet;
+}