git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2016-2020 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28
	29	#include <mach/mach_types.h>
	30	#include <kern/kern_types.h>
	31	#include <kern/processor.h>
	32	#include <kern/thread.h>
	33	#include <kern/thread_group.h>
	34	#include <kern/zalloc.h>
	35	#include <kern/task.h>
	36	#include <kern/machine.h>
	37	#include <kern/coalition.h>
	38	#include <sys/errno.h>
	39	#include <kern/queue.h>
	40	#include <kern/locks.h>
	41	#include <kern/thread_group.h>
	42	#include <kern/sched_clutch.h>
	43
	44	#if CONFIG_THREAD_GROUPS
	45
	46	#define CACHELINE_SIZE (1 << MMU_CLINE)
	47
	48	struct thread_group {
	49	uint64_t tg_id;
	50	char tg_name[THREAD_GROUP_MAXNAME];
	51	struct os_refcnt tg_refcount;
	52	uint32_t tg_flags;
	53	cluster_type_t tg_recommendation;
	54	queue_chain_t tg_queue_chain;
	55	#if CONFIG_SCHED_CLUTCH
	56	struct sched_clutch tg_sched_clutch;
	57	#endif /* CONFIG_SCHED_CLUTCH */
	58	// 16 bytes of padding here
	59	uint8_t tg_machine_data[] __attribute__((aligned(CACHELINE_SIZE)));
	60	} __attribute__((aligned(8)));
	61
	62	static SECURITY_READ_ONLY_LATE(zone_t) tg_zone;
	63	static uint32_t tg_count;
	64	static queue_head_t tg_queue;
	65	static LCK_GRP_DECLARE(tg_lck_grp, "thread_group");
	66	static LCK_MTX_DECLARE(tg_lock, &tg_lck_grp);
	67	static LCK_SPIN_DECLARE(tg_flags_update_lock, &tg_lck_grp);
	68
	69	static uint64_t tg_next_id = 0;
	70	static uint32_t tg_size;
	71	static uint32_t tg_machine_data_size;
	72	static struct thread_group *tg_system;
	73	static struct thread_group *tg_background;
	74	static struct thread_group *tg_adaptive;
	75	static struct thread_group *tg_vm;
	76	static struct thread_group *tg_io_storage;
	77	static struct thread_group *tg_perf_controller;
	78	int tg_set_by_bankvoucher;
	79
	80	static bool thread_group_retain_try(struct thread_group *tg);
	81
	82	/*
	83	* Initialize thread groups at boot
	84	*/
	85	void
	86	thread_group_init(void)
	87	{
	88	// Get thread group structure extension from EDT or boot-args (which can override EDT)
	89	if (!PE_parse_boot_argn("kern.thread_group_extra_bytes", &tg_machine_data_size, sizeof(tg_machine_data_size))) {
	90	if (!PE_get_default("kern.thread_group_extra_bytes", &tg_machine_data_size, sizeof(tg_machine_data_size))) {
	91	tg_machine_data_size = 8;
	92	}
	93	}
	94
	95	// Check if thread group can be set by voucher adoption from EDT or boot-args (which can override EDT)
	96	if (!PE_parse_boot_argn("kern.thread_group_set_by_bankvoucher", &tg_set_by_bankvoucher, sizeof(tg_set_by_bankvoucher))) {
	97	if (!PE_get_default("kern.thread_group_set_by_bankvoucher", &tg_set_by_bankvoucher, sizeof(tg_set_by_bankvoucher))) {
	98	tg_set_by_bankvoucher = 1;
	99	}
	100	}
	101
	102	tg_size = sizeof(struct thread_group) + tg_machine_data_size;
	103	if (tg_size % CACHELINE_SIZE) {
	104	tg_size += CACHELINE_SIZE - (tg_size % CACHELINE_SIZE);
	105	}
	106	tg_machine_data_size = tg_size - sizeof(struct thread_group);
	107	// printf("tg_size=%d(%lu+%d)\n", tg_size, sizeof(struct thread_group), tg_machine_data_size);
	108	assert(offsetof(struct thread_group, tg_machine_data) % CACHELINE_SIZE == 0);
	109	tg_zone = zone_create("thread_groups", tg_size, ZC_NOENCRYPT \| ZC_ALIGNMENT_REQUIRED);
	110
	111	queue_head_init(tg_queue);
	112	tg_system = thread_group_create_and_retain();
	113	thread_group_set_name(tg_system, "system");
	114	tg_background = thread_group_create_and_retain();
	115	thread_group_set_name(tg_background, "background");
	116	tg_adaptive = thread_group_create_and_retain();
	117	thread_group_set_name(tg_adaptive, "adaptive");
	118	tg_vm = thread_group_create_and_retain();
	119	thread_group_set_name(tg_vm, "VM");
	120	tg_io_storage = thread_group_create_and_retain();
	121	thread_group_set_name(tg_io_storage, "io storage");
	122	tg_perf_controller = thread_group_create_and_retain();
	123	thread_group_set_name(tg_perf_controller, "perf_controller");
	124
	125	/*
	126	* If CLPC is disabled, it would recommend SMP for all thread groups.
	127	* In that mode, the scheduler would like to restrict the kernel thread
	128	* groups to the E-cluster while all other thread groups are run on the
	129	* P-cluster. To identify the kernel thread groups, mark them with a
	130	* special flag THREAD_GROUP_FLAGS_SMP_RESTRICT which is looked at by
	131	* recommended_pset_type().
	132	*/
	133	tg_system->tg_flags \|= THREAD_GROUP_FLAGS_SMP_RESTRICT;
	134	tg_vm->tg_flags \|= THREAD_GROUP_FLAGS_SMP_RESTRICT;
	135	tg_io_storage->tg_flags \|= THREAD_GROUP_FLAGS_SMP_RESTRICT;
	136	tg_perf_controller->tg_flags \|= THREAD_GROUP_FLAGS_SMP_RESTRICT;
	137	}
	138
	139	#if CONFIG_SCHED_CLUTCH
	140	/*
	141	* sched_clutch_for_thread
	142	*
	143	* The routine provides a back linkage from the thread to the
	144	* sched_clutch it belongs to. This relationship is based on the
	145	* thread group membership of the thread. Since that membership is
	146	* changed from the thread context with the thread lock held, this
	147	* linkage should be looked at only with the thread lock held or
	148	* when the thread cannot be running (for eg. the thread is in the
	149	* runq and being removed as part of thread_select().
	150	*/
	151	sched_clutch_t
	152	sched_clutch_for_thread(thread_t thread)
	153	{
	154	assert(thread->thread_group != NULL);
	155	return &(thread->thread_group->tg_sched_clutch);
	156	}
	157
	158	sched_clutch_t
	159	sched_clutch_for_thread_group(struct thread_group *thread_group)
	160	{
	161	return &(thread_group->tg_sched_clutch);
	162	}
	163
	164	/*
	165	* Translate the TG flags to a priority boost for the sched_clutch.
	166	* This priority boost will apply to the entire clutch represented
	167	* by the thread group.
	168	*/
	169	static void
	170	sched_clutch_update_tg_flags(sched_clutch_t clutch, uint8_t flags)
	171	{
	172	sched_clutch_tg_priority_t sc_tg_pri = 0;
	173	if (flags & THREAD_GROUP_FLAGS_UI_APP) {
	174	sc_tg_pri = SCHED_CLUTCH_TG_PRI_HIGH;
	175	} else if (flags & THREAD_GROUP_FLAGS_EFFICIENT) {
	176	sc_tg_pri = SCHED_CLUTCH_TG_PRI_LOW;
	177	} else {
	178	sc_tg_pri = SCHED_CLUTCH_TG_PRI_MED;
	179	}
	180	os_atomic_store(&clutch->sc_tg_priority, sc_tg_pri, relaxed);
	181	}
	182
	183	#endif /* CONFIG_SCHED_CLUTCH */
	184
	185	/*
	186	* Use a spinlock to protect all thread group flag updates.
	187	* The lock should not have heavy contention since these flag updates should
	188	* be infrequent. If this lock has contention issues, it should be changed to
	189	* a per thread-group lock.
	190	*
	191	* The lock protects the flags field in the thread_group structure. It is also
	192	* held while doing callouts to CLPC to reflect these flag changes.
	193	*/
	194
	195	void
	196	thread_group_flags_update_lock(void)
	197	{
	198	lck_spin_lock_grp(&tg_flags_update_lock, &tg_lck_grp);
	199	}
	200
	201	void
	202	thread_group_flags_update_unlock(void)
	203	{
	204	lck_spin_unlock(&tg_flags_update_lock);
	205	}
	206
	207	/*
	208	* Inform platform code about already existing thread groups
	209	* or ask it to free state for all thread groups
	210	*/
	211	void
	212	thread_group_resync(boolean_t create)
	213	{
	214	struct thread_group *tg;
	215
	216	lck_mtx_lock(&tg_lock);
	217	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
	218	if (create) {
	219	machine_thread_group_init(tg);
	220	} else {
	221	machine_thread_group_deinit(tg);
	222	}
	223	}
	224	lck_mtx_unlock(&tg_lock);
	225	}
	226
	227	/*
	228	* Create new thread group and add new reference to it.
	229	*/
	230	struct thread_group *
	231	thread_group_create_and_retain(void)
	232	{
	233	struct thread_group *tg;
	234
	235	tg = (struct thread_group *)zalloc(tg_zone);
	236	if (tg == NULL) {
	237	panic("thread group zone over commit");
	238	}
	239	assert((uintptr_t)tg % CACHELINE_SIZE == 0);
	240	bzero(tg, sizeof(struct thread_group));
	241
	242	#if CONFIG_SCHED_CLUTCH
	243	/*
	244	* The clutch scheduler maintains a bunch of runqs per thread group. For
	245	* each thread group it maintains a sched_clutch structure. The lifetime
	246	* of that structure is tied directly to the lifetime of the thread group.
	247	*/
	248	sched_clutch_init_with_thread_group(&(tg->tg_sched_clutch), tg);
	249
	250	/*
	251	* Since the thread group flags are used to determine any priority promotions
	252	* for the threads in the thread group, initialize them to 0.
	253	*/
	254	sched_clutch_update_tg_flags(&(tg->tg_sched_clutch), 0);
	255
	256	#endif /* CONFIG_SCHED_CLUTCH */
	257
	258	lck_mtx_lock(&tg_lock);
	259	tg->tg_id = tg_next_id++;
	260	tg->tg_recommendation = CLUSTER_TYPE_SMP; // no recommendation yet
	261	os_ref_init(&tg->tg_refcount, NULL);
	262	tg_count++;
	263	enqueue_tail(&tg_queue, &tg->tg_queue_chain);
	264	lck_mtx_unlock(&tg_lock);
	265
	266	// call machine layer init before this thread group becomes visible
	267	machine_thread_group_init(tg);
	268
	269	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NEW), tg->tg_id);
	270
	271	return tg;
	272	}
	273
	274	/*
	275	* Point newly created thread to its home thread group
	276	*/
	277	void
	278	thread_group_init_thread(thread_t t, task_t task)
	279	{
	280	struct thread_group *tg = task_coalition_get_thread_group(task);
	281	t->thread_group = tg;
	282	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
	283	THREAD_GROUP_INVALID, tg->tg_id, (uintptr_t)thread_tid(t));
	284	}
	285
	286	/*
	287	* Set thread group name
	288	*/
	289	void
	290	thread_group_set_name(__unused struct thread_group tg, __unused const char name)
	291	{
	292	if (name == NULL) {
	293	return;
	294	}
	295	if (!thread_group_retain_try(tg)) {
	296	return;
	297	}
	298	if (tg->tg_name[0] == '\0') {
	299	strncpy(&tg->tg_name[0], name, THREAD_GROUP_MAXNAME);
	300	#if defined(__LP64__)
	301	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME),
	302	tg->tg_id,
	303	(uint64_t)(void*)&tg->tg_name[0],
	304	(uint64_t)(void*)&tg->tg_name[sizeof(uint64_t)]
	305	);
	306	#else /* defined(__LP64__) */
	307	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME),
	308	tg->tg_id,
	309	(uint32_t)(void*)&tg->tg_name[0],
	310	(uint32_t)(void*)&tg->tg_name[sizeof(uint32_t)]
	311	);
	312	#endif /* defined(__LP64__) */
	313	}
	314	thread_group_release(tg);
	315	}
	316
	317	void
	318	thread_group_set_flags(struct thread_group *tg, uint64_t flags)
	319	{
	320	thread_group_flags_update_lock();
	321	thread_group_set_flags_locked(tg, flags);
	322	thread_group_flags_update_unlock();
	323	}
	324
	325	void
	326	thread_group_clear_flags(struct thread_group *tg, uint64_t flags)
	327	{
	328	thread_group_flags_update_lock();
	329	thread_group_clear_flags_locked(tg, flags);
	330	thread_group_flags_update_unlock();
	331	}
	332
	333	/*
	334	* Set thread group flags and perform related actions.
	335	* The tg_flags_update_lock should be held.
	336	* Currently supported flags are:
	337	* - THREAD_GROUP_FLAGS_EFFICIENT
	338	* - THREAD_GROUP_FLAGS_UI_APP
	339	*/
	340
	341	void
	342	thread_group_set_flags_locked(struct thread_group *tg, uint64_t flags)
	343	{
	344	if ((flags & THREAD_GROUP_FLAGS_VALID) != flags) {
	345	panic("thread_group_set_flags: Invalid flags %llu", flags);
	346	}
	347
	348	if ((tg->tg_flags & flags) == flags) {
	349	return;
	350	}
	351
	352	__kdebug_only uint64_t old_flags = tg->tg_flags;
	353	tg->tg_flags \|= flags;
	354	machine_thread_group_flags_update(tg, tg->tg_flags);
	355	#if CONFIG_SCHED_CLUTCH
	356	sched_clutch_update_tg_flags(&(tg->tg_sched_clutch), tg->tg_flags);
	357	#endif /* CONFIG_SCHED_CLUTCH */
	358	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS),
	359	tg->tg_id, tg->tg_flags, old_flags);
	360	}
	361
	362	/*
	363	* Clear thread group flags and perform related actions
	364	* The tg_flags_update_lock should be held.
	365	* Currently supported flags are:
	366	* - THREAD_GROUP_FLAGS_EFFICIENT
	367	* - THREAD_GROUP_FLAGS_UI_APP
	368	*/
	369
	370	void
	371	thread_group_clear_flags_locked(struct thread_group *tg, uint64_t flags)
	372	{
	373	if ((flags & THREAD_GROUP_FLAGS_VALID) != flags) {
	374	panic("thread_group_clear_flags: Invalid flags %llu", flags);
	375	}
	376
	377	if ((tg->tg_flags & flags) == 0) {
	378	return;
	379	}
	380
	381	__kdebug_only uint64_t old_flags = tg->tg_flags;
	382	tg->tg_flags &= ~flags;
	383	#if CONFIG_SCHED_CLUTCH
	384	sched_clutch_update_tg_flags(&(tg->tg_sched_clutch), tg->tg_flags);
	385	#endif /* CONFIG_SCHED_CLUTCH */
	386	machine_thread_group_flags_update(tg, tg->tg_flags);
	387	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS),
	388	tg->tg_id, tg->tg_flags, old_flags);
	389	}
	390
	391
	392
	393	/*
	394	* Find thread group with specified name and put new reference to it.
	395	*/
	396	struct thread_group *
	397	thread_group_find_by_name_and_retain(char *name)
	398	{
	399	struct thread_group *result = NULL;
	400
	401	if (name == NULL) {
	402	return NULL;
	403	}
	404
	405	if (strncmp("system", name, THREAD_GROUP_MAXNAME) == 0) {
	406	return thread_group_retain(tg_system);
	407	} else if (strncmp("background", name, THREAD_GROUP_MAXNAME) == 0) {
	408	return thread_group_retain(tg_background);
	409	} else if (strncmp("adaptive", name, THREAD_GROUP_MAXNAME) == 0) {
	410	return thread_group_retain(tg_adaptive);
	411	} else if (strncmp("perf_controller", name, THREAD_GROUP_MAXNAME) == 0) {
	412	return thread_group_retain(tg_perf_controller);
	413	}
	414
	415	struct thread_group *tg;
	416	lck_mtx_lock(&tg_lock);
	417	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
	418	if (strncmp(tg->tg_name, name, THREAD_GROUP_MAXNAME) == 0 &&
	419	thread_group_retain_try(tg)) {
	420	result = tg;
	421	break;
	422	}
	423	}
	424	lck_mtx_unlock(&tg_lock);
	425	return result;
	426	}
	427
	428	/*
	429	* Find thread group with specified ID and add new reference to it.
	430	*/
	431	struct thread_group *
	432	thread_group_find_by_id_and_retain(uint64_t id)
	433	{
	434	struct thread_group *tg = NULL;
	435	struct thread_group *result = NULL;
	436
	437	switch (id) {
	438	case THREAD_GROUP_SYSTEM:
	439	result = tg_system;
	440	thread_group_retain(tg_system);
	441	break;
	442	case THREAD_GROUP_BACKGROUND:
	443	result = tg_background;
	444	thread_group_retain(tg_background);
	445	break;
	446	case THREAD_GROUP_ADAPTIVE:
	447	result = tg_adaptive;
	448	thread_group_retain(tg_adaptive);
	449	break;
	450	case THREAD_GROUP_VM:
	451	result = tg_vm;
	452	thread_group_retain(tg_vm);
	453	break;
	454	case THREAD_GROUP_IO_STORAGE:
	455	result = tg_io_storage;
	456	thread_group_retain(tg_io_storage);
	457	break;
	458	case THREAD_GROUP_PERF_CONTROLLER:
	459	result = tg_perf_controller;
	460	thread_group_retain(tg_perf_controller);
	461	break;
	462	default:
	463	lck_mtx_lock(&tg_lock);
	464	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
	465	if (tg->tg_id == id && thread_group_retain_try(tg)) {
	466	result = tg;
	467	break;
	468	}
	469	}
	470	lck_mtx_unlock(&tg_lock);
	471	}
	472	return result;
	473	}
	474
	475	/*
	476	* Add new reference to specified thread group
	477	*/
	478	struct thread_group *
	479	thread_group_retain(struct thread_group *tg)
	480	{
	481	os_ref_retain(&tg->tg_refcount);
	482	return tg;
	483	}
	484
	485	/*
	486	* Similar to thread_group_retain, but fails for thread groups with a
	487	* zero reference count. Returns true if retained successfully.
	488	*/
	489	static bool
	490	thread_group_retain_try(struct thread_group *tg)
	491	{
	492	return os_ref_retain_try(&tg->tg_refcount);
	493	}
	494
	495	/*
	496	* Drop a reference to specified thread group
	497	*/
	498	void
	499	thread_group_release(struct thread_group *tg)
	500	{
	501	if (os_ref_release(&tg->tg_refcount) == 0) {
	502	lck_mtx_lock(&tg_lock);
	503	tg_count--;
	504	remqueue(&tg->tg_queue_chain);
	505	lck_mtx_unlock(&tg_lock);
	506	static_assert(THREAD_GROUP_MAXNAME >= (sizeof(uint64_t) * 2), "thread group name is too short");
	507	static_assert(__alignof(struct thread_group) >= __alignof(uint64_t), "thread group name is not 8 bytes aligned");
	508	#if defined(__LP64__)
	509	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME_FREE),
	510	tg->tg_id,
	511	(uint64_t)(void*)&tg->tg_name[0],
	512	(uint64_t)(void*)&tg->tg_name[sizeof(uint64_t)]
	513	);
	514	#else /* defined(__LP64__) */
	515	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME_FREE),
	516	tg->tg_id,
	517	(uint32_t)(void*)&tg->tg_name[0],
	518	(uint32_t)(void*)&tg->tg_name[sizeof(uint32_t)]
	519	);
	520	#endif /* defined(__LP64__) */
	521	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FREE), tg->tg_id);
	522	#if CONFIG_SCHED_CLUTCH
	523	sched_clutch_destroy(&(tg->tg_sched_clutch));
	524	#endif /* CONFIG_SCHED_CLUTCH */
	525	machine_thread_group_deinit(tg);
	526	zfree(tg_zone, tg);
	527	}
	528	}
	529
	530	/*
	531	* Get thread's current thread group
	532	*/
	533	inline struct thread_group *
	534	thread_group_get(thread_t t)
	535	{
	536	return t->thread_group;
	537	}
	538
	539	struct thread_group *
	540	thread_group_get_home_group(thread_t t)
	541	{
	542	return task_coalition_get_thread_group(t->task);
	543	}
	544
	545	#if CONFIG_SCHED_AUTO_JOIN
	546
	547	/*
	548	* thread_set_thread_group_auto_join()
	549	*
	550	* Sets the thread group of a thread based on auto-join rules.
	551	*
	552	* Preconditions:
	553	* - Thread must not be part of a runq (freshly made runnable threads or terminating only)
	554	* - Thread must be locked by the caller already
	555	*/
	556	static void
	557	thread_set_thread_group_auto_join(thread_t t, struct thread_group tg, __unused struct thread_group old_tg)
	558	{
	559	assert(t->runq == PROCESSOR_NULL);
	560	t->thread_group = tg;
	561
	562	/*
	563	* If the thread group is being changed for the current thread, callout to
	564	* CLPC to update the thread's information at that layer. This makes sure CLPC
	565	* has consistent state when the current thread is going off-core.
	566	*/
	567	if (t == current_thread()) {
	568	uint64_t ctime = mach_approximate_time();
	569	uint64_t arg1, arg2;
	570	machine_thread_going_on_core(t, thread_get_urgency(t, &arg1, &arg2), 0, 0, ctime);
	571	machine_switch_perfcontrol_state_update(THREAD_GROUP_UPDATE, ctime, PERFCONTROL_CALLOUT_WAKE_UNSAFE, t);
	572	}
	573	}
	574
	575	#endif /* CONFIG_SCHED_AUTO_JOIN */
	576
	577	/*
	578	* thread_set_thread_group_explicit()
	579	*
	580	* Sets the thread group of a thread based on default non auto-join rules.
	581	*
	582	* Preconditions:
	583	* - Thread must be the current thread
	584	* - Caller must not have the thread locked
	585	* - Interrupts must be disabled
	586	*/
	587	static void
	588	thread_set_thread_group_explicit(thread_t t, struct thread_group tg, __unused struct thread_group old_tg)
	589	{
	590	assert(t == current_thread());
	591	/*
	592	* In the clutch scheduler world, the runq membership of the thread
	593	* is based on its thread group membership and its scheduling bucket.
	594	* In order to synchronize with the priority (and therefore bucket)
	595	* getting updated concurrently, it is important to perform the
	596	* thread group change also under the thread lock.
	597	*/
	598	thread_lock(t);
	599	t->thread_group = tg;
	600
	601	#if CONFIG_SCHED_CLUTCH
	602	sched_clutch_t old_clutch = (old_tg) ? &(old_tg->tg_sched_clutch) : NULL;
	603	sched_clutch_t new_clutch = (tg) ? &(tg->tg_sched_clutch) : NULL;
	604	if (SCHED_CLUTCH_THREAD_ELIGIBLE(t)) {
	605	sched_clutch_thread_clutch_update(t, old_clutch, new_clutch);
	606	}
	607	#endif /* CONFIG_SCHED_CLUTCH */
	608
	609	thread_unlock(t);
	610
	611	uint64_t ctime = mach_approximate_time();
	612	uint64_t arg1, arg2;
	613	machine_thread_going_on_core(t, thread_get_urgency(t, &arg1, &arg2), 0, 0, ctime);
	614	machine_switch_perfcontrol_state_update(THREAD_GROUP_UPDATE, ctime, 0, t);
	615	}
	616
	617	/*
	618	* thread_set_thread_group()
	619	*
	620	* Overrides the current home thread group with an override group. However,
	621	* an adopted work interval overrides the override. Does not take a reference
	622	* on the group, so caller must guarantee group lifetime lasts as long as the
	623	* group is set.
	624	*
	625	* The thread group is set according to a hierarchy:
	626	*
	627	* 1) work interval specified group (explicit API)
	628	* 2) Auto-join thread group (wakeup tracking for special work intervals)
	629	* 3) bank voucher carried group (implicitly set)
	630	* 4) coalition default thread group (ambient)
	631	*/
	632	static void
	633	thread_set_thread_group(thread_t t, struct thread_group *tg, bool auto_join)
	634	{
	635	struct thread_group *home_tg = thread_group_get_home_group(t);
	636	struct thread_group *old_tg = NULL;
	637
	638	if (tg == NULL) {
	639	/* when removing an override, revert to home group */
	640	tg = home_tg;
	641	}
	642
	643	spl_t s = splsched();
	644
	645	old_tg = t->thread_group;
	646
	647	if (old_tg != tg) {
	648	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
	649	t->thread_group ? t->thread_group->tg_id : 0,
	650	tg->tg_id, (uintptr_t)thread_tid(t), home_tg->tg_id);
	651
	652	/*
	653	* Based on whether this is a change due to auto-join, the join does
	654	* different things and has different expectations.
	655	*/
	656	if (auto_join) {
	657	#if CONFIG_SCHED_AUTO_JOIN
	658	/*
	659	* set thread group with auto-join rules. This has the
	660	* implicit assumption that the thread lock is already held.
	661	* Also this could happen to any thread (current or thread
	662	* being context switched).
	663	*/
	664	thread_set_thread_group_auto_join(t, tg, old_tg);
	665	#else /* CONFIG_SCHED_AUTO_JOIN */
	666	panic("Auto-Join unsupported on this platform");
	667	#endif /* CONFIG_SCHED_AUTO_JOIN */
	668	} else {
	669	/*
	670	* set thread group with the explicit join rules. This has
	671	* the implicit assumption that the thread is not locked. Also
	672	* this would be done only to the current thread.
	673	*/
	674	thread_set_thread_group_explicit(t, tg, old_tg);
	675	}
	676	}
	677
	678	splx(s);
	679	}
	680
	681	void
	682	thread_group_set_bank(thread_t t, struct thread_group *tg)
	683	{
	684	/* work interval group overrides any bank override group */
	685	if (t->th_work_interval) {
	686	return;
	687	}
	688
	689	/* boot arg disables groups in bank */
	690	if (tg_set_by_bankvoucher == FALSE) {
	691	return;
	692	}
	693
	694	thread_set_thread_group(t, tg, false);
	695	}
	696
	697	/*
	698	* thread_set_work_interval_thread_group()
	699	*
	700	* Sets the thread's group to the work interval thread group.
	701	* If auto_join == true, thread group is being overriden through scheduler
	702	* auto-join policies.
	703	*
	704	* Preconditions for auto-join case:
	705	* - t is not current_thread and t should be locked.
	706	* - t should not be running on a remote core; thread context switching is a valid state for this.
	707	*/
	708	void
	709	thread_set_work_interval_thread_group(thread_t t, struct thread_group *tg, bool auto_join)
	710	{
	711	if (tg == NULL) {
	712	/*
	713	* when removing a work interval override, fall back
	714	* to the current voucher override.
	715	*
	716	* In the auto_join case, the thread is already locked by the caller so
	717	* its unsafe to get the thread group from the current voucher (since
	718	* that might require taking task lock and ivac lock). However, the
	719	* auto-join policy does not allow threads to switch thread groups based
	720	* on voucher overrides.
	721	*
	722	* For the normal case, lookup the thread group from the currently adopted
	723	* voucher and use that as the fallback tg.
	724	*/
	725
	726	if (auto_join == false) {
	727	tg = thread_get_current_voucher_thread_group(t);
	728	}
	729	}
	730
	731	thread_set_thread_group(t, tg, auto_join);
	732	}
	733
	734	inline cluster_type_t
	735	thread_group_recommendation(struct thread_group *tg)
	736	{
	737	if (tg == NULL) {
	738	return CLUSTER_TYPE_SMP;
	739	} else {
	740	return tg->tg_recommendation;
	741	}
	742	}
	743
	744	inline uint64_t
	745	thread_group_get_id(struct thread_group *tg)
	746	{
	747	return tg->tg_id;
	748	}
	749
	750	uint32_t
	751	thread_group_count(void)
	752	{
	753	return tg_count;
	754	}
	755
	756	/*
	757	* Can only be called while tg cannot be destroyed
	758	*/
	759	inline const char*
	760	thread_group_get_name(struct thread_group *tg)
	761	{
	762	return tg->tg_name;
	763	}
	764
	765	inline void *
	766	thread_group_get_machine_data(struct thread_group *tg)
	767	{
	768	return &tg->tg_machine_data;
	769	}
	770
	771	inline uint32_t
	772	thread_group_machine_data_size(void)
	773	{
	774	return tg_machine_data_size;
	775	}
	776
	777	kern_return_t
	778	thread_group_iterate_stackshot(thread_group_iterate_fn_t callout, void *arg)
	779	{
	780	struct thread_group *tg;
	781	int i = 0;
	782	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
	783	if (tg == NULL \|\| !ml_validate_nofault((vm_offset_t)tg, sizeof(struct thread_group))) {
	784	return KERN_FAILURE;
	785	}
	786	callout(arg, i, tg);
	787	i++;
	788	}
	789	return KERN_SUCCESS;
	790	}
	791
	792	void
	793	thread_group_join_io_storage(void)
	794	{
	795	struct thread_group *tg = thread_group_find_by_id_and_retain(THREAD_GROUP_IO_STORAGE);
	796	assert(tg != NULL);
	797	thread_set_thread_group(current_thread(), tg, false);
	798	}
	799
	800	void
	801	thread_group_join_perf_controller(void)
	802	{
	803	struct thread_group *tg = thread_group_find_by_id_and_retain(THREAD_GROUP_PERF_CONTROLLER);
	804	assert(tg != NULL);
	805	thread_set_thread_group(current_thread(), tg, false);
	806	}
	807
	808	void
	809	thread_group_vm_add(void)
	810	{
	811	assert(tg_vm != NULL);
	812	thread_set_thread_group(current_thread(), thread_group_find_by_id_and_retain(THREAD_GROUP_VM), false);
	813	}
	814
	815	uint32_t
	816	thread_group_get_flags(struct thread_group *tg)
	817	{
	818	return tg->tg_flags;
	819	}
	820
	821	/*
	822	* Returns whether the thread group is restricted to the E-cluster when CLPC is
	823	* turned off.
	824	*/
	825	boolean_t
	826	thread_group_smp_restricted(struct thread_group *tg)
	827	{
	828	if (tg->tg_flags & THREAD_GROUP_FLAGS_SMP_RESTRICT) {
	829	return true;
	830	} else {
	831	return false;
	832	}
	833	}
	834
	835	void
	836	thread_group_update_recommendation(struct thread_group *tg, cluster_type_t new_recommendation)
	837	{
	838	/*
	839	* Since the tg->tg_recommendation field is read by CPUs trying to determine
	840	* where a thread/thread group needs to be placed, it is important to use
	841	* atomic operations to update the recommendation.
	842	*/
	843	os_atomic_store(&tg->tg_recommendation, new_recommendation, relaxed);
	844	}
	845
	846	#if CONFIG_SCHED_EDGE
	847
	848	int sched_edge_restrict_ut = 1;
	849	int sched_edge_restrict_bg = 1;
	850
	851	void
	852	sched_perfcontrol_thread_group_recommend(__unused void *machine_data, __unused cluster_type_t new_recommendation)
	853	{
	854	struct thread_group tg = (struct thread_group )((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
	855	/*
	856	* CLUSTER_TYPE_SMP was used for some debugging support when CLPC dynamic control was turned off.
	857	* In more recent implementations, CLPC simply recommends "P-spill" when dynamic control is turned off. So it should
	858	* never be recommending CLUSTER_TYPE_SMP for thread groups.
	859	*/
	860	assert(new_recommendation != CLUSTER_TYPE_SMP);
	861	/*
	862	* The Edge scheduler expects preferred cluster recommendations for each QoS level within a TG. Until the new CLPC
	863	* routine is being called, fake out the call from the old CLPC interface.
	864	*/
	865	uint32_t tg_bucket_preferred_cluster[TH_BUCKET_SCHED_MAX] = {0};
	866	/*
	867	* For all buckets higher than UT, apply the recommendation to the thread group bucket
	868	*/
	869	for (sched_bucket_t bucket = TH_BUCKET_FIXPRI; bucket < TH_BUCKET_SHARE_UT; bucket++) {
	870	tg_bucket_preferred_cluster[bucket] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
	871	}
	872	/* For UT & BG QoS, set the recommendation only if they havent been restricted via sysctls */
	873	if (!sched_edge_restrict_ut) {
	874	tg_bucket_preferred_cluster[TH_BUCKET_SHARE_UT] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
	875	}
	876	if (!sched_edge_restrict_bg) {
	877	tg_bucket_preferred_cluster[TH_BUCKET_SHARE_BG] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
	878	}
	879	sched_perfcontrol_preferred_cluster_options_t options = 0;
	880	if (new_recommendation == CLUSTER_TYPE_P) {
	881	options \|= SCHED_PERFCONTROL_PREFERRED_CLUSTER_MIGRATE_RUNNING;
	882	}
	883	sched_edge_tg_preferred_cluster_change(tg, tg_bucket_preferred_cluster, options);
	884	}
	885
	886	void
	887	sched_perfcontrol_edge_matrix_get(sched_clutch_edge edge_matrix, bool edge_request_bitmap, uint64_t flags, uint64_t matrix_order)
	888	{
	889	sched_edge_matrix_get(edge_matrix, edge_request_bitmap, flags, matrix_order);
	890	}
	891
	892	void
	893	sched_perfcontrol_edge_matrix_set(sched_clutch_edge edge_matrix, bool edge_changes_bitmap, uint64_t flags, uint64_t matrix_order)
	894	{
	895	sched_edge_matrix_set(edge_matrix, edge_changes_bitmap, flags, matrix_order);
	896	}
	897
	898	void
	899	sched_perfcontrol_thread_group_preferred_clusters_set(void *machine_data, uint32_t tg_preferred_cluster,
	900	uint32_t overrides[PERFCONTROL_CLASS_MAX], sched_perfcontrol_preferred_cluster_options_t options)
	901	{
	902	struct thread_group tg = (struct thread_group )((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
	903	uint32_t tg_bucket_preferred_cluster[TH_BUCKET_SCHED_MAX] = {
	904	[TH_BUCKET_FIXPRI] = (overrides[PERFCONTROL_CLASS_ABOVEUI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_ABOVEUI] : tg_preferred_cluster,
	905	[TH_BUCKET_SHARE_FG] = (overrides[PERFCONTROL_CLASS_UI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UI] : tg_preferred_cluster,
	906	[TH_BUCKET_SHARE_IN] = (overrides[PERFCONTROL_CLASS_UI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UI] : tg_preferred_cluster,
	907	[TH_BUCKET_SHARE_DF] = (overrides[PERFCONTROL_CLASS_NONUI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_NONUI] : tg_preferred_cluster,
	908	[TH_BUCKET_SHARE_UT] = (overrides[PERFCONTROL_CLASS_UTILITY] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UTILITY] : tg_preferred_cluster,
	909	[TH_BUCKET_SHARE_BG] = (overrides[PERFCONTROL_CLASS_BACKGROUND] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_BACKGROUND] : tg_preferred_cluster,
	910	};
	911	sched_edge_tg_preferred_cluster_change(tg, tg_bucket_preferred_cluster, options);
	912	}
	913
	914	#else /* CONFIG_SCHED_EDGE */
	915
	916	void
	917	sched_perfcontrol_thread_group_recommend(__unused void *machine_data, __unused cluster_type_t new_recommendation)
	918	{
	919	struct thread_group tg = (struct thread_group )((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
	920	SCHED(thread_group_recommendation_change)(tg, new_recommendation);
	921	}
	922
	923	void
	924	sched_perfcontrol_edge_matrix_get(__unused sched_clutch_edge edge_matrix, __unused bool edge_request_bitmap, __unused uint64_t flags, __unused uint64_t matrix_order)
	925	{
	926	}
	927
	928	void
	929	sched_perfcontrol_edge_matrix_set(__unused sched_clutch_edge edge_matrix, __unused bool edge_changes_bitmap, __unused uint64_t flags, __unused uint64_t matrix_order)
	930	{
	931	}
	932
	933	void
	934	sched_perfcontrol_thread_group_preferred_clusters_set(__unused void *machine_data, __unused uint32_t tg_preferred_cluster,
	935	__unused uint32_t overrides[PERFCONTROL_CLASS_MAX], __unused sched_perfcontrol_preferred_cluster_options_t options)
	936	{
	937	}
	938
	939	#endif /* CONFIG_SCHED_EDGE */
	940
	941	#endif /* CONFIG_THREAD_GROUPS */