+static void
+sched_timer_deadline_tracking_init(void)
+{
+ nanoseconds_to_absolutetime(TIMER_DEADLINE_TRACKING_BIN_1_DEFAULT, &timer_deadline_tracking_bin_1);
+ nanoseconds_to_absolutetime(TIMER_DEADLINE_TRACKING_BIN_2_DEFAULT, &timer_deadline_tracking_bin_2);
+}
+
+#if __arm__ || __arm64__
+
+uint32_t perfcontrol_requested_recommended_cores = ALL_CORES_RECOMMENDED;
+uint32_t perfcontrol_requested_recommended_core_count = MAX_CPUS;
+bool perfcontrol_failsafe_active = false;
+bool perfcontrol_sleep_override = false;
+
+uint64_t perfcontrol_failsafe_maintenance_runnable_time;
+uint64_t perfcontrol_failsafe_activation_time;
+uint64_t perfcontrol_failsafe_deactivation_time;
+
+/* data covering who likely caused it and how long they ran */
+#define FAILSAFE_NAME_LEN 33 /* (2*MAXCOMLEN)+1 from size of p_name */
+char perfcontrol_failsafe_name[FAILSAFE_NAME_LEN];
+int perfcontrol_failsafe_pid;
+uint64_t perfcontrol_failsafe_tid;
+uint64_t perfcontrol_failsafe_thread_timer_at_start;
+uint64_t perfcontrol_failsafe_thread_timer_last_seen;
+uint32_t perfcontrol_failsafe_recommended_at_trigger;
+
+/*
+ * Perf controller calls here to update the recommended core bitmask.
+ * If the failsafe is active, we don't immediately apply the new value.
+ * Instead, we store the new request and use it after the failsafe deactivates.
+ *
+ * If the failsafe is not active, immediately apply the update.
+ *
+ * No scheduler locks are held, no other locks are held that scheduler might depend on,
+ * interrupts are enabled
+ *
+ * currently prototype is in osfmk/arm/machine_routines.h
+ */
+void
+sched_perfcontrol_update_recommended_cores(uint32_t recommended_cores)
+{
+ assert(preemption_enabled());
+
+ spl_t s = splsched();
+ simple_lock(&sched_recommended_cores_lock, LCK_GRP_NULL);
+
+ perfcontrol_requested_recommended_cores = recommended_cores;
+ perfcontrol_requested_recommended_core_count = __builtin_popcountll(recommended_cores);
+
+ if ((perfcontrol_failsafe_active == false) && (perfcontrol_sleep_override == false)) {
+ sched_update_recommended_cores(perfcontrol_requested_recommended_cores & usercontrol_requested_recommended_cores);
+ } else {
+ KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+ MACHDBG_CODE(DBG_MACH_SCHED, MACH_REC_CORES_FAILSAFE) | DBG_FUNC_NONE,
+ perfcontrol_requested_recommended_cores,
+ sched_maintenance_thread->last_made_runnable_time, 0, 0, 0);
+ }
+
+ simple_unlock(&sched_recommended_cores_lock);
+ splx(s);
+}
+
+void
+sched_override_recommended_cores_for_sleep(void)
+{
+ spl_t s = splsched();
+ simple_lock(&sched_recommended_cores_lock, LCK_GRP_NULL);
+
+ if (perfcontrol_sleep_override == false) {
+ perfcontrol_sleep_override = true;
+ sched_update_recommended_cores(ALL_CORES_RECOMMENDED);
+ }
+
+ simple_unlock(&sched_recommended_cores_lock);
+ splx(s);
+}
+
+void
+sched_restore_recommended_cores_after_sleep(void)
+{
+ spl_t s = splsched();
+ simple_lock(&sched_recommended_cores_lock, LCK_GRP_NULL);
+
+ if (perfcontrol_sleep_override == true) {
+ perfcontrol_sleep_override = false;
+ sched_update_recommended_cores(perfcontrol_requested_recommended_cores & usercontrol_requested_recommended_cores);
+ }
+
+ simple_unlock(&sched_recommended_cores_lock);
+ splx(s);
+}
+
+/*
+ * Consider whether we need to activate the recommended cores failsafe
+ *
+ * Called from quantum timer interrupt context of a realtime thread
+ * No scheduler locks are held, interrupts are disabled
+ */
+void
+sched_consider_recommended_cores(uint64_t ctime, thread_t cur_thread)
+{
+ /*
+ * Check if a realtime thread is starving the system
+ * and bringing up non-recommended cores would help
+ *
+ * TODO: Is this the correct check for recommended == possible cores?
+ * TODO: Validate the checks without the relevant lock are OK.
+ */
+
+ if (__improbable(perfcontrol_failsafe_active == TRUE)) {
+ /* keep track of how long the responsible thread runs */
+
+ simple_lock(&sched_recommended_cores_lock, LCK_GRP_NULL);
+
+ if (perfcontrol_failsafe_active == TRUE &&
+ cur_thread->thread_id == perfcontrol_failsafe_tid) {
+ perfcontrol_failsafe_thread_timer_last_seen = timer_grab(&cur_thread->user_timer) +
+ timer_grab(&cur_thread->system_timer);
+ }
+
+ simple_unlock(&sched_recommended_cores_lock);
+
+ /* we're already trying to solve the problem, so bail */
+ return;
+ }
+
+ /* The failsafe won't help if there are no more processors to enable */
+ if (__probable(perfcontrol_requested_recommended_core_count >= processor_count)) {
+ return;
+ }
+
+ uint64_t too_long_ago = ctime - perfcontrol_failsafe_starvation_threshold;
+
+ /* Use the maintenance thread as our canary in the coal mine */
+ thread_t m_thread = sched_maintenance_thread;
+
+ /* If it doesn't look bad, nothing to see here */
+ if (__probable(m_thread->last_made_runnable_time >= too_long_ago)) {
+ return;
+ }
+
+ /* It looks bad, take the lock to be sure */
+ thread_lock(m_thread);
+
+ if (m_thread->runq == PROCESSOR_NULL ||
+ (m_thread->state & (TH_RUN | TH_WAIT)) != TH_RUN ||
+ m_thread->last_made_runnable_time >= too_long_ago) {
+ /*
+ * Maintenance thread is either on cpu or blocked, and
+ * therefore wouldn't benefit from more cores
+ */
+ thread_unlock(m_thread);
+ return;
+ }
+
+ uint64_t maintenance_runnable_time = m_thread->last_made_runnable_time;
+
+ thread_unlock(m_thread);
+
+ /*
+ * There are cores disabled at perfcontrol's recommendation, but the
+ * system is so overloaded that the maintenance thread can't run.
+ * That likely means that perfcontrol can't run either, so it can't fix
+ * the recommendation. We have to kick in a failsafe to keep from starving.
+ *
+ * When the maintenance thread has been starved for too long,
+ * ignore the recommendation from perfcontrol and light up all the cores.
+ *
+ * TODO: Consider weird states like boot, sleep, or debugger
+ */
+
+ simple_lock(&sched_recommended_cores_lock, LCK_GRP_NULL);
+
+ if (perfcontrol_failsafe_active == TRUE) {
+ simple_unlock(&sched_recommended_cores_lock);
+ return;
+ }
+
+ KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+ MACHDBG_CODE(DBG_MACH_SCHED, MACH_REC_CORES_FAILSAFE) | DBG_FUNC_START,
+ perfcontrol_requested_recommended_cores, maintenance_runnable_time, 0, 0, 0);
+
+ perfcontrol_failsafe_active = TRUE;
+ perfcontrol_failsafe_activation_time = mach_absolute_time();
+ perfcontrol_failsafe_maintenance_runnable_time = maintenance_runnable_time;
+ perfcontrol_failsafe_recommended_at_trigger = perfcontrol_requested_recommended_cores;
+
+ /* Capture some data about who screwed up (assuming that the thread on core is at fault) */
+ task_t task = cur_thread->task;
+ perfcontrol_failsafe_pid = task_pid(task);
+ strlcpy(perfcontrol_failsafe_name, proc_name_address(task->bsd_info), sizeof(perfcontrol_failsafe_name));
+
+ perfcontrol_failsafe_tid = cur_thread->thread_id;
+
+ /* Blame the thread for time it has run recently */
+ uint64_t recent_computation = (ctime - cur_thread->computation_epoch) + cur_thread->computation_metered;
+
+ uint64_t last_seen = timer_grab(&cur_thread->user_timer) + timer_grab(&cur_thread->system_timer);
+
+ /* Compute the start time of the bad behavior in terms of the thread's on core time */
+ perfcontrol_failsafe_thread_timer_at_start = last_seen - recent_computation;
+ perfcontrol_failsafe_thread_timer_last_seen = last_seen;
+
+ /* Ignore the previously recommended core configuration */
+ sched_update_recommended_cores(ALL_CORES_RECOMMENDED);
+
+ simple_unlock(&sched_recommended_cores_lock);
+}
+
+/*
+ * Now that our bacon has been saved by the failsafe, consider whether to turn it off
+ *
+ * Runs in the context of the maintenance thread, no locks held
+ */
+static void
+sched_recommended_cores_maintenance(void)
+{
+ /* Common case - no failsafe, nothing to be done here */
+ if (__probable(perfcontrol_failsafe_active == FALSE)) {
+ return;
+ }
+
+ uint64_t ctime = mach_absolute_time();
+
+ boolean_t print_diagnostic = FALSE;
+ char p_name[FAILSAFE_NAME_LEN] = "";
+
+ spl_t s = splsched();
+ simple_lock(&sched_recommended_cores_lock, LCK_GRP_NULL);
+
+ /* Check again, under the lock, to avoid races */
+ if (perfcontrol_failsafe_active == FALSE) {
+ goto out;
+ }
+
+ /*
+ * Ensure that the other cores get another few ticks to run some threads
+ * If we don't have this hysteresis, the maintenance thread is the first
+ * to run, and then it immediately kills the other cores
+ */
+ if ((ctime - perfcontrol_failsafe_activation_time) < perfcontrol_failsafe_starvation_threshold) {
+ goto out;
+ }
+
+ /* Capture some diagnostic state under the lock so we can print it out later */
+
+ int pid = perfcontrol_failsafe_pid;
+ uint64_t tid = perfcontrol_failsafe_tid;
+
+ uint64_t thread_usage = perfcontrol_failsafe_thread_timer_last_seen -
+ perfcontrol_failsafe_thread_timer_at_start;
+ uint32_t rec_cores_before = perfcontrol_failsafe_recommended_at_trigger;
+ uint32_t rec_cores_after = perfcontrol_requested_recommended_cores;
+ uint64_t failsafe_duration = ctime - perfcontrol_failsafe_activation_time;
+ strlcpy(p_name, perfcontrol_failsafe_name, sizeof(p_name));
+
+ print_diagnostic = TRUE;
+
+ /* Deactivate the failsafe and reinstate the requested recommendation settings */
+
+ perfcontrol_failsafe_deactivation_time = ctime;
+ perfcontrol_failsafe_active = FALSE;
+
+ KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+ MACHDBG_CODE(DBG_MACH_SCHED, MACH_REC_CORES_FAILSAFE) | DBG_FUNC_END,
+ perfcontrol_requested_recommended_cores, failsafe_duration, 0, 0, 0);
+
+ sched_update_recommended_cores(perfcontrol_requested_recommended_cores & usercontrol_requested_recommended_cores);
+
+out:
+ simple_unlock(&sched_recommended_cores_lock);
+ splx(s);
+
+ if (print_diagnostic) {
+ uint64_t failsafe_duration_ms = 0, thread_usage_ms = 0;
+
+ absolutetime_to_nanoseconds(failsafe_duration, &failsafe_duration_ms);
+ failsafe_duration_ms = failsafe_duration_ms / NSEC_PER_MSEC;
+
+ absolutetime_to_nanoseconds(thread_usage, &thread_usage_ms);
+ thread_usage_ms = thread_usage_ms / NSEC_PER_MSEC;
+
+ printf("recommended core failsafe kicked in for %lld ms "
+ "likely due to %s[%d] thread 0x%llx spending "
+ "%lld ms on cpu at realtime priority - "
+ "new recommendation: 0x%x -> 0x%x\n",
+ failsafe_duration_ms, p_name, pid, tid, thread_usage_ms,
+ rec_cores_before, rec_cores_after);
+ }
+}
+
+#endif /* __arm__ || __arm64__ */
+
+kern_return_t
+sched_processor_enable(processor_t processor, boolean_t enable)