[apple/xnu.git] / osfmk / kern / sched_amp_common.c

/*
 * Copyright (c) 2019 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 *
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 *
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 *
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */

#include <mach/mach_types.h>
#include <mach/machine.h>
#include <machine/machine_routines.h>
#include <machine/sched_param.h>
#include <machine/machine_cpu.h>
#include <kern/kern_types.h>
#include <kern/debug.h>
#include <kern/machine.h>
#include <kern/misc_protos.h>
#include <kern/processor.h>
#include <kern/queue.h>
#include <kern/sched.h>
#include <kern/sched_prim.h>
#include <kern/task.h>
#include <kern/thread.h>
#include <machine/atomic.h>
#include <sys/kdebug.h>
#include <kern/sched_amp_common.h>
#include <stdatomic.h>

#if __AMP__

/* Exported globals */
processor_set_t ecore_set = NULL;
processor_set_t pcore_set = NULL;

static struct processor_set pset1;
static struct pset_node pset_node1;

#if DEVELOPMENT || DEBUG
bool system_ecore_only = false;
#endif /* DEVELOPMENT || DEBUG */

/*
 * sched_amp_init()
 *
 * Initialize the pcore_set and ecore_set globals which describe the
 * P/E processor sets.
 */
void
sched_amp_init(void)
{
	pset_init(&pset1, &pset_node1);
	pset_node1.psets = &pset1;
	pset_node0.node_list = &pset_node1;

	if (ml_get_boot_cluster() == CLUSTER_TYPE_P) {
		pcore_set = &pset0;
		ecore_set = &pset1;
	} else {
		ecore_set = &pset0;
		pcore_set = &pset1;
	}

	ecore_set->pset_cluster_type = PSET_AMP_E;
	ecore_set->pset_cluster_id = 0;

	pcore_set->pset_cluster_type = PSET_AMP_P;
	pcore_set->pset_cluster_id = 1;

#if DEVELOPMENT || DEBUG
	if (PE_parse_boot_argn("enable_skstsct", NULL, 0)) {
		system_ecore_only = true;
	}
#endif /* DEVELOPMENT || DEBUG */

	sched_timeshare_init();
}

/* Spill threshold load average is ncpus in pset + (sched_amp_spill_count/(1 << PSET_LOAD_FRACTIONAL_SHIFT) */
int sched_amp_spill_count = 3;
int sched_amp_idle_steal = 1;
int sched_amp_spill_steal = 1;

/*
 * We see performance gains from doing immediate IPIs to P-cores to run
 * P-eligible threads and lesser P-E migrations from using deferred IPIs
 * for spill.
 */
int sched_amp_spill_deferred_ipi = 1;
int sched_amp_pcores_preempt_immediate_ipi = 1;

/*
 * sched_perfcontrol_inherit_recommendation_from_tg changes amp
 * scheduling policy away from default and allows policy to be
 * modified at run-time.
 *
 * once modified from default, the policy toggles between "follow
 * thread group" and "restrict to e".
 */

_Atomic sched_perfctl_class_policy_t sched_perfctl_policy_util = SCHED_PERFCTL_POLICY_DEFAULT;
_Atomic sched_perfctl_class_policy_t sched_perfctl_policy_bg = SCHED_PERFCTL_POLICY_DEFAULT;

/*
 * sched_amp_spill_threshold()
 *
 * Routine to calulate spill threshold which decides if cluster should spill.
 */
int
sched_amp_spill_threshold(processor_set_t pset)
{
	int recommended_processor_count = bit_count(pset->recommended_bitmask & pset->cpu_bitmask);

	return (recommended_processor_count << PSET_LOAD_FRACTIONAL_SHIFT) + sched_amp_spill_count;
}

/*
 * pset_signal_spill()
 *
 * Routine to signal a running/idle CPU to cause a spill onto that CPU.
 * Called with pset locked, returns unlocked
 */
void
pset_signal_spill(processor_set_t pset, int spilled_thread_priority)
{
	processor_t processor;
	sched_ipi_type_t ipi_type = SCHED_IPI_NONE;

	uint64_t idle_map = pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_IDLE];
	for (int cpuid = lsb_first(idle_map); cpuid >= 0; cpuid = lsb_next(idle_map, cpuid)) {
		processor = processor_array[cpuid];
		if (bit_set_if_clear(pset->pending_spill_cpu_mask, processor->cpu_id)) {
			KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_SIGNAL_SPILL) | DBG_FUNC_NONE, processor->cpu_id, 0, 0, 0);

			processor->deadline = UINT64_MAX;
			pset_update_processor_state(pset, processor, PROCESSOR_DISPATCHING);

			if (processor == current_processor()) {
				bit_set(pset->pending_AST_URGENT_cpu_mask, processor->cpu_id);
			} else {
				ipi_type = sched_ipi_action(processor, NULL, true, SCHED_IPI_EVENT_SPILL);
			}
			pset_unlock(pset);
			sched_ipi_perform(processor, ipi_type);
			return;
		}
	}

	processor_t ast_processor = NULL;
	uint64_t running_map = pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_RUNNING];
	for (int cpuid = lsb_first(running_map); cpuid >= 0; cpuid = lsb_next(running_map, cpuid)) {
		processor = processor_array[cpuid];
		if (processor->current_recommended_pset_type == PSET_AMP_P) {
			/* Already running a spilled P-core recommended thread */
			continue;
		}
		if (bit_test(pset->pending_spill_cpu_mask, processor->cpu_id)) {
			/* Already received a spill signal */
			continue;
		}
		if (processor->current_pri >= spilled_thread_priority) {
			/* Already running a higher or equal priority thread */
			continue;
		}

		/* Found a suitable processor */
		bit_set(pset->pending_spill_cpu_mask, processor->cpu_id);
		KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_SIGNAL_SPILL) | DBG_FUNC_NONE, processor->cpu_id, 1, 0, 0);
		if (processor == current_processor()) {
			ast_on(AST_PREEMPT);
		}
		ipi_type = sched_ipi_action(processor, NULL, false, SCHED_IPI_EVENT_SPILL);
		if (ipi_type != SCHED_IPI_NONE) {
			ast_processor = processor;
		}
		break;
	}

	pset_unlock(pset);
	sched_ipi_perform(ast_processor, ipi_type);
}

/*
 * pset_should_accept_spilled_thread()
 *
 * Routine to decide if pset should accept spilled threads.
 * This function must be safe to call (to use as a hint) without holding the pset lock.
 */
bool
pset_should_accept_spilled_thread(processor_set_t pset, int spilled_thread_priority)
{
	if ((pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_IDLE]) != 0) {
		return true;
	}

	uint64_t cpu_map = (pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_RUNNING]);

	for (int cpuid = lsb_first(cpu_map); cpuid >= 0; cpuid = lsb_next(cpu_map, cpuid)) {
		processor_t processor = processor_array[cpuid];

		if (processor->current_recommended_pset_type == PSET_AMP_P) {
			/* This processor is already running a spilled thread */
			continue;
		}

		if (processor->current_pri < spilled_thread_priority) {
			return true;
		}
	}

	return false;
}

/*
 * should_spill_to_ecores()
 *
 * Spill policy is implemented here
 */
bool
should_spill_to_ecores(processor_set_t nset, thread_t thread)
{
	if (nset->pset_cluster_type == PSET_AMP_E) {
		/* Not relevant if ecores already preferred */
		return false;
	}

	if (!pset_is_recommended(ecore_set)) {
		/* E cores must be recommended */
		return false;
	}

	if (thread->sched_flags & TH_SFLAG_PCORE_ONLY) {
		return false;
	}

	if (thread->sched_pri >= BASEPRI_RTQUEUES) {
		/* Never spill realtime threads */
		return false;
	}

	if ((nset->recommended_bitmask & nset->cpu_state_map[PROCESSOR_IDLE]) != 0) {
		/* Don't spill if idle cores */
		return false;
	}

	if ((sched_get_pset_load_average(nset, 0) >= sched_amp_spill_threshold(nset)) &&  /* There is already a load on P cores */
	    pset_should_accept_spilled_thread(ecore_set, thread->sched_pri)) { /* There are lower priority E cores */
		return true;
	}

	return false;
}

/*
 * sched_amp_check_spill()
 *
 * Routine to check if the thread should be spilled and signal the pset if needed.
 */
void
sched_amp_check_spill(processor_set_t pset, thread_t thread)
{
	/* pset is unlocked */

	/* Bound threads don't call this function */
	assert(thread->bound_processor == PROCESSOR_NULL);

	if (should_spill_to_ecores(pset, thread)) {
		pset_lock(ecore_set);

		pset_signal_spill(ecore_set, thread->sched_pri);
		/* returns with ecore_set unlocked */
	}
}

/*
 * sched_amp_steal_threshold()
 *
 * Routine to calculate the steal threshold
 */
int
sched_amp_steal_threshold(processor_set_t pset, bool spill_pending)
{
	int recommended_processor_count = bit_count(pset->recommended_bitmask & pset->cpu_bitmask);

	return (recommended_processor_count << PSET_LOAD_FRACTIONAL_SHIFT) + (spill_pending ? sched_amp_spill_steal : sched_amp_idle_steal);
}

/*
 * sched_amp_steal_thread_enabled()
 *
 */
bool
sched_amp_steal_thread_enabled(processor_set_t pset)
{
	return (pset->pset_cluster_type == PSET_AMP_E) && (pcore_set->online_processor_count > 0);
}

/*
 * sched_amp_balance()
 *
 * Invoked with pset locked, returns with pset unlocked
 */
void
sched_amp_balance(processor_t cprocessor, processor_set_t cpset)
{
	assert(cprocessor == current_processor());

	pset_unlock(cpset);

	if (cpset->pset_cluster_type == PSET_AMP_E || !cprocessor->is_recommended) {
		return;
	}

	/*
	 * cprocessor is an idle, recommended P core processor.
	 * Look for P-eligible threads that have spilled to an E core
	 * and coax them to come back.
	 */

	processor_set_t pset = ecore_set;

	pset_lock(pset);

	processor_t eprocessor;
	uint64_t ast_processor_map = 0;

	sched_ipi_type_t ipi_type[MAX_CPUS] = {SCHED_IPI_NONE};
	uint64_t running_map = pset->cpu_state_map[PROCESSOR_RUNNING];
	for (int cpuid = lsb_first(running_map); cpuid >= 0; cpuid = lsb_next(running_map, cpuid)) {
		eprocessor = processor_array[cpuid];
		if ((eprocessor->current_pri < BASEPRI_RTQUEUES) &&
		    (eprocessor->current_recommended_pset_type == PSET_AMP_P)) {
			ipi_type[eprocessor->cpu_id] = sched_ipi_action(eprocessor, NULL, false, SCHED_IPI_EVENT_REBALANCE);
			if (ipi_type[eprocessor->cpu_id] != SCHED_IPI_NONE) {
				bit_set(ast_processor_map, eprocessor->cpu_id);
				assert(eprocessor != cprocessor);
			}
		}
	}

	pset_unlock(pset);

	for (int cpuid = lsb_first(ast_processor_map); cpuid >= 0; cpuid = lsb_next(ast_processor_map, cpuid)) {
		processor_t ast_processor = processor_array[cpuid];
		sched_ipi_perform(ast_processor, ipi_type[cpuid]);
	}
}

/*
 * Helper function for sched_amp_thread_group_recommendation_change()
 * Find all the cores in the pset running threads from the thread_group tg
 * and send them a rebalance interrupt.
 */
void
sched_amp_bounce_thread_group_from_ecores(processor_set_t pset, struct thread_group *tg)
{
	assert(pset->pset_cluster_type == PSET_AMP_E);
	uint64_t ast_processor_map = 0;
	sched_ipi_type_t ipi_type[MAX_CPUS] = {SCHED_IPI_NONE};

	spl_t s = splsched();
	pset_lock(pset);

	uint64_t running_map = pset->cpu_state_map[PROCESSOR_RUNNING];
	for (int cpuid = lsb_first(running_map); cpuid >= 0; cpuid = lsb_next(running_map, cpuid)) {
		processor_t eprocessor = processor_array[cpuid];
		if (eprocessor->current_thread_group == tg) {
			ipi_type[eprocessor->cpu_id] = sched_ipi_action(eprocessor, NULL, false, SCHED_IPI_EVENT_REBALANCE);
			if (ipi_type[eprocessor->cpu_id] != SCHED_IPI_NONE) {
				bit_set(ast_processor_map, eprocessor->cpu_id);
			} else if (eprocessor == current_processor()) {
				ast_on(AST_PREEMPT);
				bit_set(pset->pending_AST_PREEMPT_cpu_mask, eprocessor->cpu_id);
			}
		}
	}

	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_RECOMMENDATION_CHANGE) | DBG_FUNC_NONE, tg, ast_processor_map, 0, 0);

	pset_unlock(pset);

	for (int cpuid = lsb_first(ast_processor_map); cpuid >= 0; cpuid = lsb_next(ast_processor_map, cpuid)) {
		processor_t ast_processor = processor_array[cpuid];
		sched_ipi_perform(ast_processor, ipi_type[cpuid]);
	}

	splx(s);
}

/*
 * sched_amp_ipi_policy()
 */
sched_ipi_type_t
sched_amp_ipi_policy(processor_t dst, thread_t thread, boolean_t dst_idle, sched_ipi_event_t event)
{
	processor_set_t pset = dst->processor_set;
	assert(bit_test(pset->pending_AST_URGENT_cpu_mask, dst->cpu_id) == false);
	assert(dst != current_processor());

	boolean_t deferred_ipi_supported = false;
#if defined(CONFIG_SCHED_DEFERRED_AST)
	deferred_ipi_supported = true;
#endif /* CONFIG_SCHED_DEFERRED_AST */

	switch (event) {
	case SCHED_IPI_EVENT_SPILL:
		/* For Spill event, use deferred IPIs if sched_amp_spill_deferred_ipi set */
		if (deferred_ipi_supported && sched_amp_spill_deferred_ipi) {
			return sched_ipi_deferred_policy(pset, dst, event);
		}
		break;
	case SCHED_IPI_EVENT_PREEMPT:
		/* For preemption, the default policy is to use deferred IPIs
		 * for Non-RT P-core preemption. Override that behavior if
		 * sched_amp_pcores_preempt_immediate_ipi is set
		 */
		if (thread && thread->sched_pri < BASEPRI_RTQUEUES) {
			if (sched_amp_pcores_preempt_immediate_ipi && (pset == pcore_set)) {
				return dst_idle ? SCHED_IPI_IDLE : SCHED_IPI_IMMEDIATE;
			}
		}
		break;
	default:
		break;
	}
	/* Default back to the global policy for all other scenarios */
	return sched_ipi_policy(dst, thread, dst_idle, event);
}

/*
 * sched_amp_qos_max_parallelism()
 */
uint32_t
sched_amp_qos_max_parallelism(int qos, uint64_t options)
{
	uint32_t ecount = ecore_set->cpu_set_count;
	uint32_t pcount = pcore_set->cpu_set_count;

	if (options & QOS_PARALLELISM_REALTIME) {
		/* For realtime threads on AMP, we would want them
		 * to limit the width to just the P-cores since we
		 * do not spill/rebalance for RT threads.
		 */
		return pcount;
	}

	/*
	 * The default AMP scheduler policy is to run utility and by
	 * threads on E-Cores only.  Run-time policy adjustment unlocks
	 * ability of utility and bg to threads to be scheduled based on
	 * run-time conditions.
	 */
	switch (qos) {
	case THREAD_QOS_UTILITY:
		return (os_atomic_load(&sched_perfctl_policy_util, relaxed) == SCHED_PERFCTL_POLICY_DEFAULT) ? ecount : (ecount + pcount);
	case THREAD_QOS_BACKGROUND:
	case THREAD_QOS_MAINTENANCE:
		return (os_atomic_load(&sched_perfctl_policy_bg, relaxed) == SCHED_PERFCTL_POLICY_DEFAULT) ? ecount : (ecount + pcount);
	default:
		return ecount + pcount;
	}
}

pset_node_t
sched_amp_choose_node(thread_t thread)
{
	if (recommended_pset_type(thread) == PSET_AMP_P) {
		return pcore_set->node;
	} else {
		return ecore_set->node;
	}
}

/*
 * sched_amp_rt_runq()
 */
rt_queue_t
sched_amp_rt_runq(processor_set_t pset)
{
	return &pset->rt_runq;
}

/*
 * sched_amp_rt_init()
 */
void
sched_amp_rt_init(processor_set_t pset)
{
	pset_rt_init(pset);
}

/*
 * sched_amp_rt_queue_shutdown()
 */
void
sched_amp_rt_queue_shutdown(processor_t processor)
{
	processor_set_t pset = processor->processor_set;
	thread_t        thread;
	queue_head_t    tqueue;

	pset_lock(pset);

	/* We only need to migrate threads if this is the last active or last recommended processor in the pset */
	if ((pset->online_processor_count > 0) && pset_is_recommended(pset)) {
		pset_unlock(pset);
		return;
	}

	queue_init(&tqueue);

	while (rt_runq_count(pset) > 0) {
		thread = qe_dequeue_head(&pset->rt_runq.queue, struct thread, runq_links);
		thread->runq = PROCESSOR_NULL;
		SCHED_STATS_RUNQ_CHANGE(&pset->rt_runq.runq_stats,
		    os_atomic_load(&pset->rt_runq.count, relaxed));
		rt_runq_count_decr(pset);
		enqueue_tail(&tqueue, &thread->runq_links);
	}
	sched_update_pset_load_average(pset, 0);
	pset_unlock(pset);

	qe_foreach_element_safe(thread, &tqueue, runq_links) {
		remqueue(&thread->runq_links);

		thread_lock(thread);

		thread_setrun(thread, SCHED_TAILQ);

		thread_unlock(thread);
	}
}

/*
 * sched_amp_rt_runq_scan()
 *
 * Assumes RT lock is not held, and acquires splsched/rt_lock itself
 */
void
sched_amp_rt_runq_scan(sched_update_scan_context_t scan_context)
{
	thread_t        thread;

	pset_node_t node = &pset_node0;
	processor_set_t pset = node->psets;

	spl_t s = splsched();
	do {
		while (pset != NULL) {
			pset_lock(pset);

			qe_foreach_element_safe(thread, &pset->rt_runq.queue, runq_links) {
				if (thread->last_made_runnable_time < scan_context->earliest_rt_make_runnable_time) {
					scan_context->earliest_rt_make_runnable_time = thread->last_made_runnable_time;
				}
			}

			pset_unlock(pset);

			pset = pset->pset_list;
		}
	} while (((node = node->node_list) != NULL) && ((pset = node->psets) != NULL));
	splx(s);
}

/*
 * sched_amp_rt_runq_count_sum()
 */
int64_t
sched_amp_rt_runq_count_sum(void)
{
	pset_node_t node = &pset_node0;
	processor_set_t pset = node->psets;
	int64_t count = 0;

	do {
		while (pset != NULL) {
			count += pset->rt_runq.runq_stats.count_sum;

			pset = pset->pset_list;
		}
	} while (((node = node->node_list) != NULL) && ((pset = node->psets) != NULL));

	return count;
}

#endif /* __AMP__ */
Commit	Line	Data
c6bf4f31 A	1	/*
	2	* Copyright (c) 2019 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28
	29	#include <mach/mach_types.h>
	30	#include <mach/machine.h>
	31	#include <machine/machine_routines.h>
	32	#include <machine/sched_param.h>
	33	#include <machine/machine_cpu.h>
	34	#include <kern/kern_types.h>
	35	#include <kern/debug.h>
	36	#include <kern/machine.h>
	37	#include <kern/misc_protos.h>
	38	#include <kern/processor.h>
	39	#include <kern/queue.h>
	40	#include <kern/sched.h>
	41	#include <kern/sched_prim.h>
	42	#include <kern/task.h>
	43	#include <kern/thread.h>
	44	#include <machine/atomic.h>
	45	#include <sys/kdebug.h>
	46	#include <kern/sched_amp_common.h>
f427ee49	47	#include <stdatomic.h>
c6bf4f31 A	48
	49	#if __AMP__
	50
	51	/* Exported globals */
	52	processor_set_t ecore_set = NULL;
	53	processor_set_t pcore_set = NULL;
	54
	55	static struct processor_set pset1;
	56	static struct pset_node pset_node1;
	57
	58	#if DEVELOPMENT \|\| DEBUG
	59	bool system_ecore_only = false;
	60	#endif /* DEVELOPMENT \|\| DEBUG */
	61
	62	/*
	63	* sched_amp_init()
	64	*
	65	* Initialize the pcore_set and ecore_set globals which describe the
	66	* P/E processor sets.
	67	*/
	68	void
	69	sched_amp_init(void)
	70	{
	71	pset_init(&pset1, &pset_node1);
	72	pset_node1.psets = &pset1;
	73	pset_node0.node_list = &pset_node1;
	74
	75	if (ml_get_boot_cluster() == CLUSTER_TYPE_P) {
	76	pcore_set = &pset0;
	77	ecore_set = &pset1;
	78	} else {
	79	ecore_set = &pset0;
	80	pcore_set = &pset1;
	81	}
	82
	83	ecore_set->pset_cluster_type = PSET_AMP_E;
	84	ecore_set->pset_cluster_id = 0;
	85
	86	pcore_set->pset_cluster_type = PSET_AMP_P;
	87	pcore_set->pset_cluster_id = 1;
	88
c6bf4f31 A	89	#if DEVELOPMENT \|\| DEBUG
	90	if (PE_parse_boot_argn("enable_skstsct", NULL, 0)) {
	91	system_ecore_only = true;
	92	}
	93	#endif /* DEVELOPMENT \|\| DEBUG */
	94
c6bf4f31 A	95	sched_timeshare_init();
	96	}
	97
	98	/* Spill threshold load average is ncpus in pset + (sched_amp_spill_count/(1 << PSET_LOAD_FRACTIONAL_SHIFT) */
	99	int sched_amp_spill_count = 3;
	100	int sched_amp_idle_steal = 1;
	101	int sched_amp_spill_steal = 1;
	102
	103	/*
	104	* We see performance gains from doing immediate IPIs to P-cores to run
	105	* P-eligible threads and lesser P-E migrations from using deferred IPIs
	106	* for spill.
	107	*/
	108	int sched_amp_spill_deferred_ipi = 1;
	109	int sched_amp_pcores_preempt_immediate_ipi = 1;
	110
f427ee49 A	111	/*
	112	* sched_perfcontrol_inherit_recommendation_from_tg changes amp
	113	* scheduling policy away from default and allows policy to be
	114	* modified at run-time.
	115	*
	116	* once modified from default, the policy toggles between "follow
	117	* thread group" and "restrict to e".
	118	*/
	119
	120	_Atomic sched_perfctl_class_policy_t sched_perfctl_policy_util = SCHED_PERFCTL_POLICY_DEFAULT;
	121	_Atomic sched_perfctl_class_policy_t sched_perfctl_policy_bg = SCHED_PERFCTL_POLICY_DEFAULT;
c6bf4f31 A	122
	123	/*
	124	* sched_amp_spill_threshold()
	125	*
	126	* Routine to calulate spill threshold which decides if cluster should spill.
	127	*/
	128	int
	129	sched_amp_spill_threshold(processor_set_t pset)
	130	{
	131	int recommended_processor_count = bit_count(pset->recommended_bitmask & pset->cpu_bitmask);
	132
	133	return (recommended_processor_count << PSET_LOAD_FRACTIONAL_SHIFT) + sched_amp_spill_count;
	134	}
	135
	136	/*
	137	* pset_signal_spill()
	138	*
	139	* Routine to signal a running/idle CPU to cause a spill onto that CPU.
	140	* Called with pset locked, returns unlocked
	141	*/
	142	void
	143	pset_signal_spill(processor_set_t pset, int spilled_thread_priority)
	144	{
	145	processor_t processor;
	146	sched_ipi_type_t ipi_type = SCHED_IPI_NONE;
	147
	148	uint64_t idle_map = pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_IDLE];
	149	for (int cpuid = lsb_first(idle_map); cpuid >= 0; cpuid = lsb_next(idle_map, cpuid)) {
	150	processor = processor_array[cpuid];
	151	if (bit_set_if_clear(pset->pending_spill_cpu_mask, processor->cpu_id)) {
	152	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_SIGNAL_SPILL) \| DBG_FUNC_NONE, processor->cpu_id, 0, 0, 0);
	153
	154	processor->deadline = UINT64_MAX;
	155	pset_update_processor_state(pset, processor, PROCESSOR_DISPATCHING);
	156
	157	if (processor == current_processor()) {
	158	bit_set(pset->pending_AST_URGENT_cpu_mask, processor->cpu_id);
	159	} else {
	160	ipi_type = sched_ipi_action(processor, NULL, true, SCHED_IPI_EVENT_SPILL);
	161	}
	162	pset_unlock(pset);
	163	sched_ipi_perform(processor, ipi_type);
	164	return;
	165	}
	166	}
	167
	168	processor_t ast_processor = NULL;
	169	uint64_t running_map = pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_RUNNING];
	170	for (int cpuid = lsb_first(running_map); cpuid >= 0; cpuid = lsb_next(running_map, cpuid)) {
	171	processor = processor_array[cpuid];
	172	if (processor->current_recommended_pset_type == PSET_AMP_P) {
	173	/* Already running a spilled P-core recommended thread */
	174	continue;
	175	}
	176	if (bit_test(pset->pending_spill_cpu_mask, processor->cpu_id)) {
	177	/* Already received a spill signal */
	178	continue;
	179	}
	180	if (processor->current_pri >= spilled_thread_priority) {
	181	/* Already running a higher or equal priority thread */
	182	continue;
	183	}
	184
	185	/* Found a suitable processor */
186	bit_set(pset->pending_spill_cpu_mask, processor->cpu_id);
187	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_SIGNAL_SPILL) \| DBG_FUNC_NONE, processor->cpu_id, 1, 0, 0);
188	if (processor == current_processor()) {
189	ast_on(AST_PREEMPT);
190	}
191	ipi_type = sched_ipi_action(processor, NULL, false, SCHED_IPI_EVENT_SPILL);
192	if (ipi_type != SCHED_IPI_NONE) {
193	ast_processor = processor;
194	}
195	break;
196	}
197
198	pset_unlock(pset);
199	sched_ipi_perform(ast_processor, ipi_type);
200	}
201
202	/*
203	* pset_should_accept_spilled_thread()
204	*
205	* Routine to decide if pset should accept spilled threads.
206	* This function must be safe to call (to use as a hint) without holding the pset lock.
207	*/
208	bool
209	pset_should_accept_spilled_thread(processor_set_t pset, int spilled_thread_priority)
210	{
211	if ((pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_IDLE]) != 0) {
212	return true;
213	}
214
215	uint64_t cpu_map = (pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_RUNNING]);
216
217	for (int cpuid = lsb_first(cpu_map); cpuid >= 0; cpuid = lsb_next(cpu_map, cpuid)) {
218	processor_t processor = processor_array[cpuid];
219
220	if (processor->current_recommended_pset_type == PSET_AMP_P) {
221	/* This processor is already running a spilled thread */
222	continue;
223	}
224
225	if (processor->current_pri < spilled_thread_priority) {
226	return true;
227	}
228	}
229
230	return false;
231	}
232
233	/*
234	* should_spill_to_ecores()
235	*
236	* Spill policy is implemented here
237	*/
238	bool
239	should_spill_to_ecores(processor_set_t nset, thread_t thread)
240	{
241	if (nset->pset_cluster_type == PSET_AMP_E) {
242	/* Not relevant if ecores already preferred */
243	return false;
244	}
245
246	if (!pset_is_recommended(ecore_set)) {
247	/* E cores must be recommended */
248	return false;
249	}
250
c6bf4f31 A	251	if (thread->sched_flags & TH_SFLAG_PCORE_ONLY) {
	252	return false;
	253	}
c6bf4f31 A	254
	255	if (thread->sched_pri >= BASEPRI_RTQUEUES) {
	256	/* Never spill realtime threads */
	257	return false;
	258	}
	259
	260	if ((nset->recommended_bitmask & nset->cpu_state_map[PROCESSOR_IDLE]) != 0) {
	261	/* Don't spill if idle cores */
	262	return false;
	263	}
	264
f427ee49	265	if ((sched_get_pset_load_average(nset, 0) >= sched_amp_spill_threshold(nset)) && /* There is already a load on P cores */
c6bf4f31 A	266	pset_should_accept_spilled_thread(ecore_set, thread->sched_pri)) { /* There are lower priority E cores */
	267	return true;
	268	}
	269
	270	return false;
	271	}
	272
	273	/*
	274	* sched_amp_check_spill()
	275	*
	276	* Routine to check if the thread should be spilled and signal the pset if needed.
	277	*/
	278	void
	279	sched_amp_check_spill(processor_set_t pset, thread_t thread)
	280	{
	281	/* pset is unlocked */
	282
	283	/* Bound threads don't call this function */
	284	assert(thread->bound_processor == PROCESSOR_NULL);
	285
	286	if (should_spill_to_ecores(pset, thread)) {
	287	pset_lock(ecore_set);
	288
	289	pset_signal_spill(ecore_set, thread->sched_pri);
	290	/* returns with ecore_set unlocked */
	291	}
	292	}
	293
	294	/*
	295	* sched_amp_steal_threshold()
	296	*
	297	* Routine to calculate the steal threshold
	298	*/
	299	int
	300	sched_amp_steal_threshold(processor_set_t pset, bool spill_pending)
	301	{
	302	int recommended_processor_count = bit_count(pset->recommended_bitmask & pset->cpu_bitmask);
	303
	304	return (recommended_processor_count << PSET_LOAD_FRACTIONAL_SHIFT) + (spill_pending ? sched_amp_spill_steal : sched_amp_idle_steal);
	305	}
	306
	307	/*
	308	* sched_amp_steal_thread_enabled()
	309	*
	310	*/
	311	bool
	312	sched_amp_steal_thread_enabled(processor_set_t pset)
	313	{
	314	return (pset->pset_cluster_type == PSET_AMP_E) && (pcore_set->online_processor_count > 0);
	315	}
	316
	317	/*
	318	* sched_amp_balance()
	319	*
	320	* Invoked with pset locked, returns with pset unlocked
	321	*/
	322	void
	323	sched_amp_balance(processor_t cprocessor, processor_set_t cpset)
	324	{
	325	assert(cprocessor == current_processor());
	326
	327	pset_unlock(cpset);
	328
	329	if (cpset->pset_cluster_type == PSET_AMP_E \|\| !cprocessor->is_recommended) {
330	return;
331	}
332
333	/*
334	* cprocessor is an idle, recommended P core processor.
335	* Look for P-eligible threads that have spilled to an E core
336	* and coax them to come back.
337	*/
338
339	processor_set_t pset = ecore_set;
340
341	pset_lock(pset);
342
343	processor_t eprocessor;
344	uint64_t ast_processor_map = 0;
345
346	sched_ipi_type_t ipi_type[MAX_CPUS] = {SCHED_IPI_NONE};
347	uint64_t running_map = pset->cpu_state_map[PROCESSOR_RUNNING];
348	for (int cpuid = lsb_first(running_map); cpuid >= 0; cpuid = lsb_next(running_map, cpuid)) {
349	eprocessor = processor_array[cpuid];
350	if ((eprocessor->current_pri < BASEPRI_RTQUEUES) &&
351	(eprocessor->current_recommended_pset_type == PSET_AMP_P)) {
352	ipi_type[eprocessor->cpu_id] = sched_ipi_action(eprocessor, NULL, false, SCHED_IPI_EVENT_REBALANCE);
353	if (ipi_type[eprocessor->cpu_id] != SCHED_IPI_NONE) {
354	bit_set(ast_processor_map, eprocessor->cpu_id);
355	assert(eprocessor != cprocessor);
356	}
357	}
358	}
359
360	pset_unlock(pset);
361
362	for (int cpuid = lsb_first(ast_processor_map); cpuid >= 0; cpuid = lsb_next(ast_processor_map, cpuid)) {
363	processor_t ast_processor = processor_array[cpuid];
364	sched_ipi_perform(ast_processor, ipi_type[cpuid]);
365	}
366	}
367
368	/*
369	* Helper function for sched_amp_thread_group_recommendation_change()
370	* Find all the cores in the pset running threads from the thread_group tg
371	* and send them a rebalance interrupt.
372	*/
373	void
374	sched_amp_bounce_thread_group_from_ecores(processor_set_t pset, struct thread_group *tg)
375	{
376	assert(pset->pset_cluster_type == PSET_AMP_E);
377	uint64_t ast_processor_map = 0;
378	sched_ipi_type_t ipi_type[MAX_CPUS] = {SCHED_IPI_NONE};
379
380	spl_t s = splsched();
381	pset_lock(pset);
382
383	uint64_t running_map = pset->cpu_state_map[PROCESSOR_RUNNING];
384	for (int cpuid = lsb_first(running_map); cpuid >= 0; cpuid = lsb_next(running_map, cpuid)) {
385	processor_t eprocessor = processor_array[cpuid];
386	if (eprocessor->current_thread_group == tg) {
387	ipi_type[eprocessor->cpu_id] = sched_ipi_action(eprocessor, NULL, false, SCHED_IPI_EVENT_REBALANCE);
388	if (ipi_type[eprocessor->cpu_id] != SCHED_IPI_NONE) {
389	bit_set(ast_processor_map, eprocessor->cpu_id);
390	} else if (eprocessor == current_processor()) {
391	ast_on(AST_PREEMPT);
392	bit_set(pset->pending_AST_PREEMPT_cpu_mask, eprocessor->cpu_id);
393	}
394	}
395	}
396
397	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_RECOMMENDATION_CHANGE) \| DBG_FUNC_NONE, tg, ast_processor_map, 0, 0);
398
399	pset_unlock(pset);
400
401	for (int cpuid = lsb_first(ast_processor_map); cpuid >= 0; cpuid = lsb_next(ast_processor_map, cpuid)) {
402	processor_t ast_processor = processor_array[cpuid];
403	sched_ipi_perform(ast_processor, ipi_type[cpuid]);
404	}
405
406	splx(s);
407	}
408
409	/*
410	* sched_amp_ipi_policy()
411	*/
412	sched_ipi_type_t
413	sched_amp_ipi_policy(processor_t dst, thread_t thread, boolean_t dst_idle, sched_ipi_event_t event)
414	{
415	processor_set_t pset = dst->processor_set;
416	assert(bit_test(pset->pending_AST_URGENT_cpu_mask, dst->cpu_id) == false);
417	assert(dst != current_processor());
418
419	boolean_t deferred_ipi_supported = false;
420	#if defined(CONFIG_SCHED_DEFERRED_AST)
421	deferred_ipi_supported = true;
422	#endif /* CONFIG_SCHED_DEFERRED_AST */
423
424	switch (event) {
425	case SCHED_IPI_EVENT_SPILL:
426	/* For Spill event, use deferred IPIs if sched_amp_spill_deferred_ipi set */
427	if (deferred_ipi_supported && sched_amp_spill_deferred_ipi) {
428	return sched_ipi_deferred_policy(pset, dst, event);
429	}
430	break;
431	case SCHED_IPI_EVENT_PREEMPT:
432	/* For preemption, the default policy is to use deferred IPIs
433	* for Non-RT P-core preemption. Override that behavior if
434	* sched_amp_pcores_preempt_immediate_ipi is set
435	*/
436	if (thread && thread->sched_pri < BASEPRI_RTQUEUES) {
437	if (sched_amp_pcores_preempt_immediate_ipi && (pset == pcore_set)) {
438	return dst_idle ? SCHED_IPI_IDLE : SCHED_IPI_IMMEDIATE;
439	}
440	}
441	break;
442	default:
443	break;
444	}
445	/* Default back to the global policy for all other scenarios */
446	return sched_ipi_policy(dst, thread, dst_idle, event);
447	}
448
449	/*
450	* sched_amp_qos_max_parallelism()
451	*/
452	uint32_t
453	sched_amp_qos_max_parallelism(int qos, uint64_t options)
454	{
455	uint32_t ecount = ecore_set->cpu_set_count;
456	uint32_t pcount = pcore_set->cpu_set_count;
457
458	if (options & QOS_PARALLELISM_REALTIME) {
459	/* For realtime threads on AMP, we would want them
460	* to limit the width to just the P-cores since we
461	* do not spill/rebalance for RT threads.
462	*/
463	return pcount;
464	}
465
466	/*
f427ee49 A	467	* The default AMP scheduler policy is to run utility and by
	468	* threads on E-Cores only. Run-time policy adjustment unlocks
	469	* ability of utility and bg to threads to be scheduled based on
	470	* run-time conditions.
c6bf4f31 A	471	*/
	472	switch (qos) {
	473	case THREAD_QOS_UTILITY:
f427ee49	474	return (os_atomic_load(&sched_perfctl_policy_util, relaxed) == SCHED_PERFCTL_POLICY_DEFAULT) ? ecount : (ecount + pcount);
c6bf4f31 A	475	case THREAD_QOS_BACKGROUND:
c6bf4f31 A	476	case THREAD_QOS_MAINTENANCE:
f427ee49	477	return (os_atomic_load(&sched_perfctl_policy_bg, relaxed) == SCHED_PERFCTL_POLICY_DEFAULT) ? ecount : (ecount + pcount);
c6bf4f31 A	478	default:
	479	return ecount + pcount;
	480	}
	481	}
	482
f427ee49 A	483	pset_node_t
	484	sched_amp_choose_node(thread_t thread)
	485	{
	486	if (recommended_pset_type(thread) == PSET_AMP_P) {
	487	return pcore_set->node;
	488	} else {
	489	return ecore_set->node;
	490	}
	491	}
	492
c6bf4f31 A	493	/*
	494	* sched_amp_rt_runq()
	495	*/
	496	rt_queue_t
	497	sched_amp_rt_runq(processor_set_t pset)
	498	{
	499	return &pset->rt_runq;
	500	}
	501
	502	/*
	503	* sched_amp_rt_init()
	504	*/
	505	void
	506	sched_amp_rt_init(processor_set_t pset)
	507	{
	508	pset_rt_init(pset);
	509	}
	510
	511	/*
	512	* sched_amp_rt_queue_shutdown()
	513	*/
	514	void
	515	sched_amp_rt_queue_shutdown(processor_t processor)
	516	{
	517	processor_set_t pset = processor->processor_set;
	518	thread_t thread;
	519	queue_head_t tqueue;
	520
	521	pset_lock(pset);
	522
	523	/* We only need to migrate threads if this is the last active or last recommended processor in the pset */
	524	if ((pset->online_processor_count > 0) && pset_is_recommended(pset)) {
	525	pset_unlock(pset);
	526	return;
	527	}
	528
	529	queue_init(&tqueue);
	530
c6bf4f31 A	531	while (rt_runq_count(pset) > 0) {
	532	thread = qe_dequeue_head(&pset->rt_runq.queue, struct thread, runq_links);
	533	thread->runq = PROCESSOR_NULL;
f427ee49 A	534	SCHED_STATS_RUNQ_CHANGE(&pset->rt_runq.runq_stats,
f427ee49 A	535	os_atomic_load(&pset->rt_runq.count, relaxed));
c6bf4f31 A	536	rt_runq_count_decr(pset);
	537	enqueue_tail(&tqueue, &thread->runq_links);
	538	}
f427ee49	539	sched_update_pset_load_average(pset, 0);
c6bf4f31 A	540	pset_unlock(pset);
	541
	542	qe_foreach_element_safe(thread, &tqueue, runq_links) {
	543	remqueue(&thread->runq_links);
	544
	545	thread_lock(thread);
	546
	547	thread_setrun(thread, SCHED_TAILQ);
	548
	549	thread_unlock(thread);
	550	}
	551	}
	552
	553	/*
	554	* sched_amp_rt_runq_scan()
	555	*
	556	* Assumes RT lock is not held, and acquires splsched/rt_lock itself
	557	*/
	558	void
	559	sched_amp_rt_runq_scan(sched_update_scan_context_t scan_context)
	560	{
	561	thread_t thread;
	562
	563	pset_node_t node = &pset_node0;
	564	processor_set_t pset = node->psets;
	565
	566	spl_t s = splsched();
	567	do {
	568	while (pset != NULL) {
f427ee49	569	pset_lock(pset);
c6bf4f31 A	570
	571	qe_foreach_element_safe(thread, &pset->rt_runq.queue, runq_links) {
	572	if (thread->last_made_runnable_time < scan_context->earliest_rt_make_runnable_time) {
	573	scan_context->earliest_rt_make_runnable_time = thread->last_made_runnable_time;
	574	}
	575	}
	576
f427ee49	577	pset_unlock(pset);
c6bf4f31 A	578
	579	pset = pset->pset_list;
	580	}
	581	} while (((node = node->node_list) != NULL) && ((pset = node->psets) != NULL));
	582	splx(s);
	583	}
	584
	585	/*
	586	* sched_amp_rt_runq_count_sum()
	587	*/
	588	int64_t
	589	sched_amp_rt_runq_count_sum(void)
	590	{
	591	pset_node_t node = &pset_node0;
	592	processor_set_t pset = node->psets;
	593	int64_t count = 0;
	594
	595	do {
	596	while (pset != NULL) {
	597	count += pset->rt_runq.runq_stats.count_sum;
	598
	599	pset = pset->pset_list;
	600	}
	601	} while (((node = node->node_list) != NULL) && ((pset = node->psets) != NULL));
	602
	603	return count;
	604	}
	605
	606	#endif /* __AMP__ */