git.saurik.com Git - apple/xnu.git/blame - bsd/pthread/pthread

Commit	Line	Data
d9a64523	1	/*
f427ee49	2	* Copyright (c) 2000-2020 Apple Inc. All rights reserved.
d9a64523 A	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/* Copyright (c) 1995-2018 Apple, Inc. All Rights Reserved */
	29
	30	#include <sys/cdefs.h>
	31
d9a64523 A	32	#include <kern/assert.h>
	33	#include <kern/ast.h>
	34	#include <kern/clock.h>
	35	#include <kern/cpu_data.h>
	36	#include <kern/kern_types.h>
	37	#include <kern/policy_internal.h>
	38	#include <kern/processor.h>
0a7de745	39	#include <kern/sched_prim.h> /* for thread_exception_return */
d9a64523 A	40	#include <kern/task.h>
	41	#include <kern/thread.h>
	42	#include <kern/zalloc.h>
	43	#include <mach/kern_return.h>
	44	#include <mach/mach_param.h>
	45	#include <mach/mach_port.h>
	46	#include <mach/mach_types.h>
	47	#include <mach/mach_vm.h>
	48	#include <mach/sync_policy.h>
	49	#include <mach/task.h>
	50	#include <mach/thread_act.h> /* for thread_resume */
	51	#include <mach/thread_policy.h>
	52	#include <mach/thread_status.h>
	53	#include <mach/vm_prot.h>
	54	#include <mach/vm_statistics.h>
	55	#include <machine/atomic.h>
	56	#include <machine/machine_routines.h>
	57	#include <vm/vm_map.h>
	58	#include <vm/vm_protos.h>
	59
	60	#include <sys/eventvar.h>
	61	#include <sys/kdebug.h>
	62	#include <sys/kernel.h>
	63	#include <sys/lock.h>
	64	#include <sys/param.h>
0a7de745	65	#include <sys/proc_info.h> /* for fill_procworkqueue */
d9a64523 A	66	#include <sys/proc_internal.h>
	67	#include <sys/pthread_shims.h>
	68	#include <sys/resourcevar.h>
	69	#include <sys/signalvar.h>
	70	#include <sys/sysctl.h>
	71	#include <sys/sysproto.h>
	72	#include <sys/systm.h>
	73	#include <sys/ulock.h> /* for ulock_owner_value_to_port_name */
	74
	75	#include <pthread/bsdthread_private.h>
	76	#include <pthread/workqueue_syscalls.h>
	77	#include <pthread/workqueue_internal.h>
	78	#include <pthread/workqueue_trace.h>
	79
	80	#include <os/log.h>
	81
d9a64523	82	static void workq_unpark_continue(void *uth, wait_result_t wr) __dead2;
cb323159 A	83	static void workq_schedule_creator(proc_t p, struct workqueue *wq,
cb323159 A	84	workq_kern_threadreq_flags_t flags);
d9a64523 A	85
d9a64523 A	86	static bool workq_threadreq_admissible(struct workqueue wq, struct uthread uth,
0a7de745	87	workq_threadreq_t req);
d9a64523 A	88
d9a64523 A	89	static uint32_t workq_constrained_allowance(struct workqueue *wq,
0a7de745	90	thread_qos_t at_qos, struct uthread *uth, bool may_start_timer);
d9a64523 A	91
d9a64523 A	92	static bool workq_thread_is_busy(uint64_t cur_ts,
0a7de745	93	_Atomic uint64_t *lastblocked_tsp);
d9a64523 A	94
	95	static int workq_sysctl_handle_usecs SYSCTL_HANDLER_ARGS;
	96
	97	#pragma mark globals
	98
	99	struct workq_usec_var {
	100	uint32_t usecs;
	101	uint64_t abstime;
	102	};
	103
	104	#define WORKQ_SYSCTL_USECS(var, init) \
0a7de745 A	105	static struct workq_usec_var var = { .usecs = init }; \
	106	SYSCTL_OID(_kern, OID_AUTO, var##_usecs, \
	107	CTLTYPE_INT \| CTLFLAG_RW \| CTLFLAG_LOCKED, &var, 0, \
	108	workq_sysctl_handle_usecs, "I", "")
d9a64523	109
f427ee49	110	static LCK_GRP_DECLARE(workq_lck_grp, "workq");
d9a64523 A	111	os_refgrp_decl(static, workq_refgrp, "workq", NULL);
d9a64523 A	112
f427ee49 A	113	static ZONE_DECLARE(workq_zone_workqueue, "workq.wq",
	114	sizeof(struct workqueue), ZC_NONE);
	115	static ZONE_DECLARE(workq_zone_threadreq, "workq.threadreq",
	116	sizeof(struct workq_threadreq_s), ZC_CACHING);
	117
cb323159	118	static struct mpsc_daemon_queue workq_deallocate_queue;
d9a64523	119
0a7de745	120	WORKQ_SYSCTL_USECS(wq_stalled_window, WQ_STALLED_WINDOW_USECS);
d9a64523 A	121	WORKQ_SYSCTL_USECS(wq_reduce_pool_window, WQ_REDUCE_POOL_WINDOW_USECS);
	122	WORKQ_SYSCTL_USECS(wq_max_timer_interval, WQ_MAX_TIMER_INTERVAL_USECS);
	123	static uint32_t wq_max_threads = WORKQUEUE_MAXTHREADS;
	124	static uint32_t wq_max_constrained_threads = WORKQUEUE_MAXTHREADS / 8;
	125	static uint32_t wq_init_constrained_limit = 1;
	126	static uint16_t wq_death_max_load;
	127	static uint32_t wq_max_parallelism[WORKQ_NUM_QOS_BUCKETS];
	128
	129	#pragma mark sysctls
	130
	131	static int
	132	workq_sysctl_handle_usecs SYSCTL_HANDLER_ARGS
	133	{
	134	#pragma unused(arg2)
	135	struct workq_usec_var *v = arg1;
	136	int error = sysctl_handle_int(oidp, &v->usecs, 0, req);
0a7de745	137	if (error \|\| !req->newptr) {
d9a64523	138	return error;
0a7de745	139	}
d9a64523	140	clock_interval_to_absolutetime_interval(v->usecs, NSEC_PER_USEC,
0a7de745	141	&v->abstime);
d9a64523 A	142	return 0;
	143	}
	144
	145	SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW \| CTLFLAG_LOCKED,
0a7de745	146	&wq_max_threads, 0, "");
d9a64523 A	147
d9a64523 A	148	SYSCTL_INT(_kern, OID_AUTO, wq_max_constrained_threads, CTLFLAG_RW \| CTLFLAG_LOCKED,
0a7de745	149	&wq_max_constrained_threads, 0, "");
d9a64523 A	150
	151	#pragma mark p_wqptr
	152
	153	#define WQPTR_IS_INITING_VALUE ((struct workqueue *)~(uintptr_t)0)
	154
	155	static struct workqueue *
	156	proc_get_wqptr_fast(struct proc *p)
	157	{
	158	return os_atomic_load(&p->p_wqptr, relaxed);
	159	}
	160
	161	static struct workqueue *
	162	proc_get_wqptr(struct proc *p)
	163	{
	164	struct workqueue *wq = proc_get_wqptr_fast(p);
	165	return wq == WQPTR_IS_INITING_VALUE ? NULL : wq;
	166	}
	167
	168	static void
	169	proc_set_wqptr(struct proc p, struct workqueue wq)
	170	{
	171	wq = os_atomic_xchg(&p->p_wqptr, wq, release);
	172	if (wq == WQPTR_IS_INITING_VALUE) {
	173	proc_lock(p);
	174	thread_wakeup(&p->p_wqptr);
	175	proc_unlock(p);
	176	}
	177	}
	178
	179	static bool
	180	proc_init_wqptr_or_wait(struct proc *p)
	181	{
	182	struct workqueue *wq;
	183
	184	proc_lock(p);
cb323159	185	wq = os_atomic_load(&p->p_wqptr, relaxed);
d9a64523 A	186
d9a64523 A	187	if (wq == NULL) {
cb323159	188	os_atomic_store(&p->p_wqptr, WQPTR_IS_INITING_VALUE, relaxed);
d9a64523 A	189	proc_unlock(p);
	190	return true;
	191	}
	192
	193	if (wq == WQPTR_IS_INITING_VALUE) {
	194	assert_wait(&p->p_wqptr, THREAD_UNINT);
	195	proc_unlock(p);
	196	thread_block(THREAD_CONTINUE_NULL);
	197	} else {
	198	proc_unlock(p);
	199	}
	200	return false;
	201	}
	202
	203	static inline event_t
	204	workq_parked_wait_event(struct uthread *uth)
	205	{
	206	return (event_t)&uth->uu_workq_stackaddr;
	207	}
	208
	209	static inline void
	210	workq_thread_wakeup(struct uthread *uth)
	211	{
cb323159	212	thread_wakeup_thread(workq_parked_wait_event(uth), uth->uu_thread);
d9a64523 A	213	}
	214
	215	#pragma mark wq_thactive
	216
	217	#if defined(__LP64__)
	218	// Layout is:
	219	// 127 - 115 : 13 bits of zeroes
	220	// 114 - 112 : best QoS among all pending constrained requests
	221	// 111 - 0 : MGR, AUI, UI, IN, DF, UT, BG+MT buckets every 16 bits
	222	#define WQ_THACTIVE_BUCKET_WIDTH 16
	223	#define WQ_THACTIVE_QOS_SHIFT (7 * WQ_THACTIVE_BUCKET_WIDTH)
	224	#else
	225	// Layout is:
	226	// 63 - 61 : best QoS among all pending constrained requests
	227	// 60 : Manager bucket (0 or 1)
	228	// 59 - 0 : AUI, UI, IN, DF, UT, BG+MT buckets every 10 bits
	229	#define WQ_THACTIVE_BUCKET_WIDTH 10
	230	#define WQ_THACTIVE_QOS_SHIFT (6 * WQ_THACTIVE_BUCKET_WIDTH + 1)
	231	#endif
	232	#define WQ_THACTIVE_BUCKET_MASK ((1U << WQ_THACTIVE_BUCKET_WIDTH) - 1)
	233	#define WQ_THACTIVE_BUCKET_HALF (1U << (WQ_THACTIVE_BUCKET_WIDTH - 1))
	234
	235	static_assert(sizeof(wq_thactive_t) * CHAR_BIT - WQ_THACTIVE_QOS_SHIFT >= 3,
0a7de745	236	"Make sure we have space to encode a QoS");
d9a64523 A	237
	238	static inline wq_thactive_t
	239	_wq_thactive(struct workqueue *wq)
	240	{
cb323159	241	return os_atomic_load_wide(&wq->wq_thactive, relaxed);
d9a64523 A	242	}
	243
	244	static inline int
	245	_wq_bucket(thread_qos_t qos)
	246	{
	247	// Map both BG and MT to the same bucket by over-shifting down and
	248	// clamping MT and BG together.
	249	switch (qos) {
	250	case THREAD_QOS_MAINTENANCE:
	251	return 0;
	252	default:
	253	return qos - 2;
	254	}
	255	}
	256
	257	#define WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(tha) \
f427ee49	258	((thread_qos_t)((tha) >> WQ_THACTIVE_QOS_SHIFT))
d9a64523 A	259
	260	static inline thread_qos_t
	261	_wq_thactive_best_constrained_req_qos(struct workqueue *wq)
	262	{
	263	// Avoid expensive atomic operations: the three bits we're loading are in
	264	// a single byte, and always updated under the workqueue lock
	265	wq_thactive_t v = (wq_thactive_t )&wq->wq_thactive;
	266	return WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(v);
	267	}
	268
	269	static void
	270	_wq_thactive_refresh_best_constrained_req_qos(struct workqueue *wq)
	271	{
	272	thread_qos_t old_qos, new_qos;
	273	workq_threadreq_t req;
	274
	275	req = priority_queue_max(&wq->wq_constrained_queue,
0a7de745	276	struct workq_threadreq_s, tr_entry);
d9a64523 A	277	new_qos = req ? req->tr_qos : THREAD_QOS_UNSPECIFIED;
	278	old_qos = _wq_thactive_best_constrained_req_qos(wq);
	279	if (old_qos != new_qos) {
	280	long delta = (long)new_qos - (long)old_qos;
	281	wq_thactive_t v = (wq_thactive_t)delta << WQ_THACTIVE_QOS_SHIFT;
	282	/*
	283	* We can do an atomic add relative to the initial load because updates
	284	* to this qos are always serialized under the workqueue lock.
	285	*/
	286	v = os_atomic_add(&wq->wq_thactive, v, relaxed);
	287	#ifdef __LP64__
	288	WQ_TRACE_WQ(TRACE_wq_thactive_update, wq, (uint64_t)v,
0a7de745	289	(uint64_t)(v >> 64), 0, 0);
d9a64523 A	290	#else
	291	WQ_TRACE_WQ(TRACE_wq_thactive_update, wq, v, 0, 0, 0);
	292	#endif
	293	}
	294	}
	295
	296	static inline wq_thactive_t
	297	_wq_thactive_offset_for_qos(thread_qos_t qos)
	298	{
	299	return (wq_thactive_t)1 << (_wq_bucket(qos) * WQ_THACTIVE_BUCKET_WIDTH);
	300	}
	301
	302	static inline wq_thactive_t
	303	_wq_thactive_inc(struct workqueue *wq, thread_qos_t qos)
	304	{
	305	wq_thactive_t v = _wq_thactive_offset_for_qos(qos);
	306	return os_atomic_add_orig(&wq->wq_thactive, v, relaxed);
	307	}
	308
	309	static inline wq_thactive_t
	310	_wq_thactive_dec(struct workqueue *wq, thread_qos_t qos)
	311	{
	312	wq_thactive_t v = _wq_thactive_offset_for_qos(qos);
	313	return os_atomic_sub_orig(&wq->wq_thactive, v, relaxed);
	314	}
	315
	316	static inline void
	317	_wq_thactive_move(struct workqueue *wq,
0a7de745	318	thread_qos_t old_qos, thread_qos_t new_qos)
d9a64523 A	319	{
d9a64523 A	320	wq_thactive_t v = _wq_thactive_offset_for_qos(new_qos) -
0a7de745	321	_wq_thactive_offset_for_qos(old_qos);
cb323159	322	os_atomic_add(&wq->wq_thactive, v, relaxed);
d9a64523 A	323	wq->wq_thscheduled_count[_wq_bucket(old_qos)]--;
	324	wq->wq_thscheduled_count[_wq_bucket(new_qos)]++;
	325	}
	326
	327	static inline uint32_t
	328	_wq_thactive_aggregate_downto_qos(struct workqueue *wq, wq_thactive_t v,
0a7de745	329	thread_qos_t qos, uint32_t busycount, uint32_t max_busycount)
d9a64523 A	330	{
	331	uint32_t count = 0, active;
	332	uint64_t curtime;
	333
	334	assert(WORKQ_THREAD_QOS_MIN <= qos && qos <= WORKQ_THREAD_QOS_MAX);
	335
	336	if (busycount) {
	337	curtime = mach_absolute_time();
	338	*busycount = 0;
	339	}
	340	if (max_busycount) {
	341	*max_busycount = THREAD_QOS_LAST - qos;
	342	}
	343
	344	int i = _wq_bucket(qos);
	345	v >>= i * WQ_THACTIVE_BUCKET_WIDTH;
	346	for (; i < WORKQ_NUM_QOS_BUCKETS; i++, v >>= WQ_THACTIVE_BUCKET_WIDTH) {
	347	active = v & WQ_THACTIVE_BUCKET_MASK;
	348	count += active;
	349
	350	if (busycount && wq->wq_thscheduled_count[i] > active) {
	351	if (workq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i])) {
	352	/*
	353	* We only consider the last blocked thread for a given bucket
	354	* as busy because we don't want to take the list lock in each
	355	* sched callback. However this is an approximation that could
	356	* contribute to thread creation storms.
	357	*/
	358	(*busycount)++;
	359	}
	360	}
	361	}
	362
	363	return count;
	364	}
	365
	366	#pragma mark wq_flags
	367
	368	static inline uint32_t
	369	_wq_flags(struct workqueue *wq)
	370	{
	371	return os_atomic_load(&wq->wq_flags, relaxed);
	372	}
	373
	374	static inline bool
	375	_wq_exiting(struct workqueue *wq)
	376	{
	377	return _wq_flags(wq) & WQ_EXITING;
	378	}
	379
	380	bool
	381	workq_is_exiting(struct proc *p)
	382	{
	383	struct workqueue *wq = proc_get_wqptr(p);
	384	return !wq \|\| _wq_exiting(wq);
	385	}
	386
d9a64523 A	387	#pragma mark workqueue lock
	388
	389	static bool
	390	workq_lock_spin_is_acquired_kdp(struct workqueue *wq)
	391	{
	392	return kdp_lck_spin_is_acquired(&wq->wq_lock);
	393	}
	394
	395	static inline void
	396	workq_lock_spin(struct workqueue *wq)
	397	{
f427ee49	398	lck_spin_lock_grp(&wq->wq_lock, &workq_lck_grp);
d9a64523 A	399	}
	400
	401	static inline void
	402	workq_lock_held(__assert_only struct workqueue *wq)
	403	{
	404	LCK_SPIN_ASSERT(&wq->wq_lock, LCK_ASSERT_OWNED);
	405	}
	406
	407	static inline bool
	408	workq_lock_try(struct workqueue *wq)
	409	{
f427ee49	410	return lck_spin_try_lock_grp(&wq->wq_lock, &workq_lck_grp);
d9a64523 A	411	}
	412
	413	static inline void
	414	workq_unlock(struct workqueue *wq)
	415	{
	416	lck_spin_unlock(&wq->wq_lock);
	417	}
	418
	419	#pragma mark idle thread lists
	420
	421	#define WORKQ_POLICY_INIT(qos) \
0a7de745	422	(struct uu_workq_policy){ .qos_req = qos, .qos_bucket = qos }
d9a64523 A	423
	424	static inline thread_qos_t
	425	workq_pri_bucket(struct uu_workq_policy req)
	426	{
	427	return MAX(MAX(req.qos_req, req.qos_max), req.qos_override);
	428	}
	429
	430	static inline thread_qos_t
	431	workq_pri_override(struct uu_workq_policy req)
	432	{
	433	return MAX(workq_pri_bucket(req), req.qos_bucket);
	434	}
	435
	436	static inline bool
	437	workq_thread_needs_params_change(workq_threadreq_t req, struct uthread *uth)
	438	{
	439	workq_threadreq_param_t cur_trp, req_trp = { };
	440
	441	cur_trp.trp_value = uth->uu_save.uus_workq_park_data.workloop_params;
cb323159	442	if (req->tr_flags & WORKQ_TR_FLAG_WL_PARAMS) {
d9a64523 A	443	req_trp = kqueue_threadreq_workloop_param(req);
	444	}
	445
	446	/*
	447	* CPU percent flags are handled separately to policy changes, so ignore
	448	* them for all of these checks.
	449	*/
	450	uint16_t cur_flags = (cur_trp.trp_flags & ~TRP_CPUPERCENT);
	451	uint16_t req_flags = (req_trp.trp_flags & ~TRP_CPUPERCENT);
	452
	453	if (!req_flags && !cur_flags) {
	454	return false;
	455	}
	456
	457	if (req_flags != cur_flags) {
	458	return true;
	459	}
	460
	461	if ((req_flags & TRP_PRIORITY) && req_trp.trp_pri != cur_trp.trp_pri) {
	462	return true;
	463	}
	464
4ba76501	465	if ((req_flags & TRP_POLICY) && req_trp.trp_pol != cur_trp.trp_pol) {
d9a64523 A	466	return true;
	467	}
	468
	469	return false;
	470	}
	471
	472	static inline bool
	473	workq_thread_needs_priority_change(workq_threadreq_t req, struct uthread *uth)
	474	{
	475	if (workq_thread_needs_params_change(req, uth)) {
	476	return true;
	477	}
	478
	479	return req->tr_qos != workq_pri_override(uth->uu_workq_pri);
	480	}
	481
	482	static void
	483	workq_thread_update_bucket(proc_t p, struct workqueue wq, struct uthread uth,
0a7de745 A	484	struct uu_workq_policy old_pri, struct uu_workq_policy new_pri,
0a7de745 A	485	bool force_run)
d9a64523 A	486	{
	487	thread_qos_t old_bucket = old_pri.qos_bucket;
	488	thread_qos_t new_bucket = workq_pri_bucket(new_pri);
	489
	490	if (old_bucket != new_bucket) {
	491	_wq_thactive_move(wq, old_bucket, new_bucket);
	492	}
	493
	494	new_pri.qos_bucket = new_bucket;
	495	uth->uu_workq_pri = new_pri;
	496
	497	if (workq_pri_override(old_pri) != new_bucket) {
	498	thread_set_workq_override(uth->uu_thread, new_bucket);
	499	}
	500
	501	if (wq->wq_reqcount && (old_bucket > new_bucket \|\| force_run)) {
	502	int flags = WORKQ_THREADREQ_CAN_CREATE_THREADS;
	503	if (old_bucket > new_bucket) {
	504	/*
	505	* When lowering our bucket, we may unblock a thread request,
	506	* but we can't drop our priority before we have evaluated
	507	* whether this is the case, and if we ever drop the workqueue lock
	508	* that would cause a priority inversion.
	509	*
	510	* We hence have to disallow thread creation in that case.
	511	*/
	512	flags = 0;
	513	}
	514	workq_schedule_creator(p, wq, flags);
	515	}
	516	}
	517
	518	/*
	519	* Sets/resets the cpu percent limits on the current thread. We can't set
	520	* these limits from outside of the current thread, so this function needs
	521	* to be called when we're executing on the intended
	522	*/
	523	static void
	524	workq_thread_reset_cpupercent(workq_threadreq_t req, struct uthread *uth)
	525	{
	526	assert(uth == current_uthread());
	527	workq_threadreq_param_t trp = { };
	528
cb323159	529	if (req && (req->tr_flags & WORKQ_TR_FLAG_WL_PARAMS)) {
d9a64523 A	530	trp = kqueue_threadreq_workloop_param(req);
	531	}
	532
	533	if (uth->uu_workq_flags & UT_WORKQ_CPUPERCENT) {
	534	/*
	535	* Going through disable when we have an existing CPU percent limit
	536	* set will force the ledger to refill the token bucket of the current
	537	* thread. Removing any penalty applied by previous thread use.
	538	*/
	539	thread_set_cpulimit(THREAD_CPULIMIT_DISABLE, 0, 0);
	540	uth->uu_workq_flags &= ~UT_WORKQ_CPUPERCENT;
	541	}
	542
	543	if (trp.trp_flags & TRP_CPUPERCENT) {
	544	thread_set_cpulimit(THREAD_CPULIMIT_BLOCK, trp.trp_cpupercent,
0a7de745	545	(uint64_t)trp.trp_refillms * NSEC_PER_SEC);
d9a64523 A	546	uth->uu_workq_flags \|= UT_WORKQ_CPUPERCENT;
	547	}
	548	}
	549
	550	static void
	551	workq_thread_reset_pri(struct workqueue wq, struct uthread uth,
cb323159	552	workq_threadreq_t req, bool unpark)
d9a64523 A	553	{
	554	thread_t th = uth->uu_thread;
	555	thread_qos_t qos = req ? req->tr_qos : WORKQ_THREAD_QOS_CLEANUP;
	556	workq_threadreq_param_t trp = { };
	557	int priority = 31;
	558	int policy = POLICY_TIMESHARE;
	559
cb323159	560	if (req && (req->tr_flags & WORKQ_TR_FLAG_WL_PARAMS)) {
d9a64523 A	561	trp = kqueue_threadreq_workloop_param(req);
	562	}
	563
	564	uth->uu_workq_pri = WORKQ_POLICY_INIT(qos);
	565	uth->uu_workq_flags &= ~UT_WORKQ_OUTSIDE_QOS;
d9a64523	566
cb323159 A	567	if (unpark) {
	568	uth->uu_save.uus_workq_park_data.workloop_params = trp.trp_value;
	569	// qos sent out to userspace (may differ from uu_workq_pri on param threads)
	570	uth->uu_save.uus_workq_park_data.qos = qos;
	571	}
d9a64523 A	572
	573	if (qos == WORKQ_THREAD_QOS_MANAGER) {
	574	uint32_t mgr_pri = wq->wq_event_manager_priority;
	575	assert(trp.trp_value == 0); // manager qos and thread policy don't mix
	576
	577	if (mgr_pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) {
	578	mgr_pri &= _PTHREAD_PRIORITY_SCHED_PRI_MASK;
	579	thread_set_workq_pri(th, THREAD_QOS_UNSPECIFIED, mgr_pri,
0a7de745	580	POLICY_TIMESHARE);
d9a64523 A	581	return;
	582	}
	583
	584	qos = _pthread_priority_thread_qos(mgr_pri);
	585	} else {
	586	if (trp.trp_flags & TRP_PRIORITY) {
	587	qos = THREAD_QOS_UNSPECIFIED;
	588	priority = trp.trp_pri;
	589	uth->uu_workq_flags \|= UT_WORKQ_OUTSIDE_QOS;
	590	}
	591
	592	if (trp.trp_flags & TRP_POLICY) {
	593	policy = trp.trp_pol;
	594	}
	595	}
	596
	597	thread_set_workq_pri(th, qos, priority, policy);
	598	}
	599
	600	/*
	601	* Called by kevent with the NOTE_WL_THREAD_REQUEST knote lock held,
	602	* every time a servicer is being told about a new max QoS.
	603	*/
	604	void
cb323159	605	workq_thread_set_max_qos(struct proc *p, workq_threadreq_t kqr)
d9a64523 A	606	{
d9a64523 A	607	struct uu_workq_policy old_pri, new_pri;
cb323159	608	struct uthread *uth = current_uthread();
d9a64523	609	struct workqueue *wq = proc_get_wqptr_fast(p);
cb323159	610	thread_qos_t qos = kqr->tr_kq_qos_index;
d9a64523	611
0a7de745	612	if (uth->uu_workq_pri.qos_max == qos) {
d9a64523	613	return;
0a7de745	614	}
d9a64523 A	615
	616	workq_lock_spin(wq);
	617	old_pri = new_pri = uth->uu_workq_pri;
	618	new_pri.qos_max = qos;
	619	workq_thread_update_bucket(p, wq, uth, old_pri, new_pri, false);
	620	workq_unlock(wq);
	621	}
	622
	623	#pragma mark idle threads accounting and handling
	624
	625	static inline struct uthread *
	626	workq_oldest_killable_idle_thread(struct workqueue *wq)
	627	{
	628	struct uthread *uth = TAILQ_LAST(&wq->wq_thidlelist, workq_uthread_head);
	629
	630	if (uth && !uth->uu_save.uus_workq_park_data.has_stack) {
	631	uth = TAILQ_PREV(uth, workq_uthread_head, uu_workq_entry);
	632	if (uth) {
	633	assert(uth->uu_save.uus_workq_park_data.has_stack);
	634	}
	635	}
	636	return uth;
	637	}
	638
	639	static inline uint64_t
	640	workq_kill_delay_for_idle_thread(struct workqueue *wq)
	641	{
	642	uint64_t delay = wq_reduce_pool_window.abstime;
	643	uint16_t idle = wq->wq_thidlecount;
	644
	645	/*
	646	* If we have less than wq_death_max_load threads, have a 5s timer.
	647	*
	648	* For the next wq_max_constrained_threads ones, decay linearly from
	649	* from 5s to 50ms.
	650	*/
	651	if (idle <= wq_death_max_load) {
	652	return delay;
	653	}
	654
	655	if (wq_max_constrained_threads > idle - wq_death_max_load) {
	656	delay *= (wq_max_constrained_threads - (idle - wq_death_max_load));
	657	}
	658	return delay / wq_max_constrained_threads;
	659	}
	660
	661	static inline bool
	662	workq_should_kill_idle_thread(struct workqueue wq, struct uthread uth,
0a7de745	663	uint64_t now)
d9a64523 A	664	{
	665	uint64_t delay = workq_kill_delay_for_idle_thread(wq);
	666	return now - uth->uu_save.uus_workq_park_data.idle_stamp > delay;
	667	}
	668
	669	static void
	670	workq_death_call_schedule(struct workqueue *wq, uint64_t deadline)
	671	{
	672	uint32_t wq_flags = os_atomic_load(&wq->wq_flags, relaxed);
	673
	674	if (wq_flags & (WQ_EXITING \| WQ_DEATH_CALL_SCHEDULED)) {
	675	return;
	676	}
	677	os_atomic_or(&wq->wq_flags, WQ_DEATH_CALL_SCHEDULED, relaxed);
	678
	679	WQ_TRACE_WQ(TRACE_wq_death_call \| DBG_FUNC_NONE, wq, 1, 0, 0, 0);
	680
	681	/*
	682	* <rdar://problem/13139182> Due to how long term timers work, the leeway
	683	* can't be too short, so use 500ms which is long enough that we will not
	684	* wake up the CPU for killing threads, but short enough that it doesn't
	685	* fall into long-term timer list shenanigans.
	686	*/
	687	thread_call_enter_delayed_with_leeway(wq->wq_death_call, NULL, deadline,
0a7de745 A	688	wq_reduce_pool_window.abstime / 10,
0a7de745 A	689	THREAD_CALL_DELAY_LEEWAY \| THREAD_CALL_DELAY_USER_BACKGROUND);
d9a64523 A	690	}
	691
	692	/*
	693	* `decrement` is set to the number of threads that are no longer dying:
	694	* - because they have been resuscitated just in time (workq_pop_idle_thread)
	695	* - or have been killed (workq_thread_terminate).
	696	*/
	697	static void
	698	workq_death_policy_evaluate(struct workqueue *wq, uint16_t decrement)
	699	{
	700	struct uthread *uth;
	701
	702	assert(wq->wq_thdying_count >= decrement);
0a7de745	703	if ((wq->wq_thdying_count -= decrement) > 0) {
d9a64523	704	return;
0a7de745	705	}
d9a64523	706
0a7de745	707	if (wq->wq_thidlecount <= 1) {
d9a64523	708	return;
0a7de745	709	}
d9a64523	710
0a7de745	711	if ((uth = workq_oldest_killable_idle_thread(wq)) == NULL) {
d9a64523	712	return;
0a7de745	713	}
d9a64523 A	714
	715	uint64_t now = mach_absolute_time();
	716	uint64_t delay = workq_kill_delay_for_idle_thread(wq);
	717
	718	if (now - uth->uu_save.uus_workq_park_data.idle_stamp > delay) {
	719	WQ_TRACE_WQ(TRACE_wq_thread_terminate \| DBG_FUNC_START,
0a7de745	720	wq, wq->wq_thidlecount, 0, 0, 0);
d9a64523 A	721	wq->wq_thdying_count++;
d9a64523 A	722	uth->uu_workq_flags \|= UT_WORKQ_DYING;
cb323159 A	723	if ((uth->uu_workq_flags & UT_WORKQ_IDLE_CLEANUP) == 0) {
	724	workq_thread_wakeup(uth);
	725	}
d9a64523 A	726	return;
	727	}
	728
	729	workq_death_call_schedule(wq,
0a7de745	730	uth->uu_save.uus_workq_park_data.idle_stamp + delay);
d9a64523 A	731	}
	732
	733	void
	734	workq_thread_terminate(struct proc p, struct uthread uth)
	735	{
	736	struct workqueue *wq = proc_get_wqptr_fast(p);
	737
	738	workq_lock_spin(wq);
	739	TAILQ_REMOVE(&wq->wq_thrunlist, uth, uu_workq_entry);
	740	if (uth->uu_workq_flags & UT_WORKQ_DYING) {
	741	WQ_TRACE_WQ(TRACE_wq_thread_terminate \| DBG_FUNC_END,
0a7de745	742	wq, wq->wq_thidlecount, 0, 0, 0);
d9a64523 A	743	workq_death_policy_evaluate(wq, 1);
	744	}
	745	if (wq->wq_nthreads-- == wq_max_threads) {
	746	/*
	747	* We got under the thread limit again, which may have prevented
	748	* thread creation from happening, redrive if there are pending requests
	749	*/
	750	if (wq->wq_reqcount) {
	751	workq_schedule_creator(p, wq, WORKQ_THREADREQ_CAN_CREATE_THREADS);
	752	}
	753	}
	754	workq_unlock(wq);
	755
	756	thread_deallocate(uth->uu_thread);
	757	}
	758
	759	static void
	760	workq_kill_old_threads_call(void param0, void param1 __unused)
	761	{
	762	struct workqueue *wq = param0;
	763
	764	workq_lock_spin(wq);
	765	WQ_TRACE_WQ(TRACE_wq_death_call \| DBG_FUNC_START, wq, 0, 0, 0, 0);
cb323159	766	os_atomic_andnot(&wq->wq_flags, WQ_DEATH_CALL_SCHEDULED, relaxed);
d9a64523 A	767	workq_death_policy_evaluate(wq, 0);
	768	WQ_TRACE_WQ(TRACE_wq_death_call \| DBG_FUNC_END, wq, 0, 0, 0, 0);
	769	workq_unlock(wq);
	770	}
	771
	772	static struct uthread *
cb323159 A	773	workq_pop_idle_thread(struct workqueue *wq, uint8_t uu_flags,
cb323159 A	774	bool *needs_wakeup)
d9a64523 A	775	{
	776	struct uthread *uth;
	777
	778	if ((uth = TAILQ_FIRST(&wq->wq_thidlelist))) {
	779	TAILQ_REMOVE(&wq->wq_thidlelist, uth, uu_workq_entry);
	780	} else {
	781	uth = TAILQ_FIRST(&wq->wq_thnewlist);
	782	TAILQ_REMOVE(&wq->wq_thnewlist, uth, uu_workq_entry);
	783	}
	784	TAILQ_INSERT_TAIL(&wq->wq_thrunlist, uth, uu_workq_entry);
	785
	786	assert((uth->uu_workq_flags & UT_WORKQ_RUNNING) == 0);
cb323159 A	787	uth->uu_workq_flags \|= UT_WORKQ_RUNNING \| uu_flags;
	788	if ((uu_flags & UT_WORKQ_OVERCOMMIT) == 0) {
	789	wq->wq_constrained_threads_scheduled++;
	790	}
d9a64523 A	791	wq->wq_threads_scheduled++;
	792	wq->wq_thidlecount--;
	793
	794	if (__improbable(uth->uu_workq_flags & UT_WORKQ_DYING)) {
	795	uth->uu_workq_flags ^= UT_WORKQ_DYING;
	796	workq_death_policy_evaluate(wq, 1);
cb323159 A	797	*needs_wakeup = false;
	798	} else if (uth->uu_workq_flags & UT_WORKQ_IDLE_CLEANUP) {
	799	*needs_wakeup = false;
	800	} else {
	801	*needs_wakeup = true;
d9a64523 A	802	}
	803	return uth;
	804	}
	805
	806	/*
	807	* Called by thread_create_workq_waiting() during thread initialization, before
	808	* assert_wait, before the thread has been started.
	809	*/
	810	event_t
	811	workq_thread_init_and_wq_lock(task_t task, thread_t th)
	812	{
	813	struct uthread *uth = get_bsdthread_info(th);
	814
	815	uth->uu_workq_flags = UT_WORKQ_NEW;
	816	uth->uu_workq_pri = WORKQ_POLICY_INIT(THREAD_QOS_LEGACY);
	817	uth->uu_workq_thport = MACH_PORT_NULL;
	818	uth->uu_workq_stackaddr = 0;
cb323159	819	uth->uu_workq_pthread_kill_allowed = 0;
d9a64523 A	820
	821	thread_set_tag(th, THREAD_TAG_PTHREAD \| THREAD_TAG_WORKQUEUE);
	822	thread_reset_workq_qos(th, THREAD_QOS_LEGACY);
	823
	824	workq_lock_spin(proc_get_wqptr_fast(get_bsdtask_info(task)));
	825	return workq_parked_wait_event(uth);
	826	}
	827
	828	/**
	829	* Try to add a new workqueue thread.
	830	*
	831	* - called with workq lock held
	832	* - dropped and retaken around thread creation
	833	* - return with workq lock held
	834	*/
	835	static bool
	836	workq_add_new_idle_thread(proc_t p, struct workqueue *wq)
	837	{
	838	mach_vm_offset_t th_stackaddr;
	839	kern_return_t kret;
	840	thread_t th;
	841
	842	wq->wq_nthreads++;
	843
	844	workq_unlock(wq);
	845
	846	vm_map_t vmap = get_task_map(p->task);
	847
	848	kret = pthread_functions->workq_create_threadstack(p, vmap, &th_stackaddr);
	849	if (kret != KERN_SUCCESS) {
	850	WQ_TRACE_WQ(TRACE_wq_thread_create_failed \| DBG_FUNC_NONE, wq,
0a7de745	851	kret, 1, 0, 0);
d9a64523 A	852	goto out;
	853	}
	854
	855	kret = thread_create_workq_waiting(p->task, workq_unpark_continue, &th);
	856	if (kret != KERN_SUCCESS) {
	857	WQ_TRACE_WQ(TRACE_wq_thread_create_failed \| DBG_FUNC_NONE, wq,
0a7de745	858	kret, 0, 0, 0);
d9a64523 A	859	pthread_functions->workq_destroy_threadstack(p, vmap, th_stackaddr);
	860	goto out;
	861	}
	862
	863	// thread_create_workq_waiting() will return with the wq lock held
	864	// on success, because it calls workq_thread_init_and_wq_lock() above
	865
	866	struct uthread *uth = get_bsdthread_info(th);
	867
	868	wq->wq_creations++;
	869	wq->wq_thidlecount++;
f427ee49	870	uth->uu_workq_stackaddr = (user_addr_t)th_stackaddr;
d9a64523 A	871	TAILQ_INSERT_TAIL(&wq->wq_thnewlist, uth, uu_workq_entry);
	872
	873	WQ_TRACE_WQ(TRACE_wq_thread_create \| DBG_FUNC_NONE, wq, 0, 0, 0, 0);
	874	return true;
	875
	876	out:
	877	workq_lock_spin(wq);
	878	/*
	879	* Do not redrive here if we went under wq_max_threads again,
	880	* it is the responsibility of the callers of this function
	881	* to do so when it fails.
	882	*/
	883	wq->wq_nthreads--;
	884	return false;
	885	}
	886
	887	#define WORKQ_UNPARK_FOR_DEATH_WAS_IDLE 0x1
	888
	889	__attribute__((noreturn, noinline))
	890	static void
	891	workq_unpark_for_death_and_unlock(proc_t p, struct workqueue *wq,
cb323159	892	struct uthread *uth, uint32_t death_flags, uint32_t setup_flags)
d9a64523 A	893	{
	894	thread_qos_t qos = workq_pri_override(uth->uu_workq_pri);
	895	bool first_use = uth->uu_workq_flags & UT_WORKQ_NEW;
	896
	897	if (qos > WORKQ_THREAD_QOS_CLEANUP) {
cb323159	898	workq_thread_reset_pri(wq, uth, NULL, /unpark/ true);
d9a64523 A	899	qos = WORKQ_THREAD_QOS_CLEANUP;
	900	}
	901
	902	workq_thread_reset_cpupercent(NULL, uth);
	903
	904	if (death_flags & WORKQ_UNPARK_FOR_DEATH_WAS_IDLE) {
	905	wq->wq_thidlecount--;
	906	if (first_use) {
	907	TAILQ_REMOVE(&wq->wq_thnewlist, uth, uu_workq_entry);
	908	} else {
	909	TAILQ_REMOVE(&wq->wq_thidlelist, uth, uu_workq_entry);
	910	}
	911	}
	912	TAILQ_INSERT_TAIL(&wq->wq_thrunlist, uth, uu_workq_entry);
	913
	914	workq_unlock(wq);
	915
cb323159 A	916	if (setup_flags & WQ_SETUP_CLEAR_VOUCHER) {
	917	__assert_only kern_return_t kr;
	918	kr = thread_set_voucher_name(MACH_PORT_NULL);
	919	assert(kr == KERN_SUCCESS);
	920	}
	921
d9a64523	922	uint32_t flags = WQ_FLAG_THREAD_NEWSPI \| qos \| WQ_FLAG_THREAD_PRIO_QOS;
d9a64523 A	923	thread_t th = uth->uu_thread;
	924	vm_map_t vmap = get_task_map(p->task);
	925
0a7de745 A	926	if (!first_use) {
	927	flags \|= WQ_FLAG_THREAD_REUSE;
	928	}
d9a64523 A	929
d9a64523 A	930	pthread_functions->workq_setup_thread(p, th, vmap, uth->uu_workq_stackaddr,
cb323159	931	uth->uu_workq_thport, 0, WQ_SETUP_EXIT_THREAD, flags);
d9a64523 A	932	__builtin_unreachable();
	933	}
	934
	935	bool
	936	workq_is_current_thread_updating_turnstile(struct workqueue *wq)
	937	{
	938	return wq->wq_turnstile_updater == current_thread();
	939	}
	940
	941	__attribute__((always_inline))
	942	static inline void
	943	workq_perform_turnstile_operation_locked(struct workqueue *wq,
0a7de745	944	void (^operation)(void))
d9a64523 A	945	{
	946	workq_lock_held(wq);
	947	wq->wq_turnstile_updater = current_thread();
	948	operation();
	949	wq->wq_turnstile_updater = THREAD_NULL;
	950	}
	951
	952	static void
	953	workq_turnstile_update_inheritor(struct workqueue *wq,
0a7de745 A	954	turnstile_inheritor_t inheritor,
0a7de745 A	955	turnstile_update_flags_t flags)
d9a64523	956	{
cb323159 A	957	if (wq->wq_inheritor == inheritor) {
	958	return;
	959	}
	960	wq->wq_inheritor = inheritor;
d9a64523 A	961	workq_perform_turnstile_operation_locked(wq, ^{
d9a64523 A	962	turnstile_update_inheritor(wq->wq_turnstile, inheritor,
0a7de745	963	flags \| TURNSTILE_IMMEDIATE_UPDATE);
d9a64523	964	turnstile_update_inheritor_complete(wq->wq_turnstile,
0a7de745	965	TURNSTILE_INTERLOCK_HELD);
d9a64523 A	966	});
	967	}
	968
	969	static void
cb323159 A	970	workq_push_idle_thread(proc_t p, struct workqueue wq, struct uthread uth,
cb323159 A	971	uint32_t setup_flags)
d9a64523 A	972	{
d9a64523 A	973	uint64_t now = mach_absolute_time();
cb323159	974	bool is_creator = (uth == wq->wq_creator);
d9a64523	975
d9a64523 A	976	if ((uth->uu_workq_flags & UT_WORKQ_OVERCOMMIT) == 0) {
	977	wq->wq_constrained_threads_scheduled--;
	978	}
cb323159	979	uth->uu_workq_flags &= ~(UT_WORKQ_RUNNING \| UT_WORKQ_OVERCOMMIT);
d9a64523 A	980	TAILQ_REMOVE(&wq->wq_thrunlist, uth, uu_workq_entry);
	981	wq->wq_threads_scheduled--;
	982
cb323159 A	983	if (is_creator) {
cb323159 A	984	wq->wq_creator = NULL;
d9a64523	985	WQ_TRACE_WQ(TRACE_wq_creator_select, wq, 3, 0,
0a7de745	986	uth->uu_save.uus_workq_park_data.yields, 0);
cb323159 A	987	}
	988
	989	if (wq->wq_inheritor == uth->uu_thread) {
	990	assert(wq->wq_creator == NULL);
d9a64523 A	991	if (wq->wq_reqcount) {
	992	workq_turnstile_update_inheritor(wq, wq, TURNSTILE_INHERITOR_WORKQ);
	993	} else {
	994	workq_turnstile_update_inheritor(wq, TURNSTILE_INHERITOR_NULL, 0);
	995	}
cb323159 A	996	}
	997
	998	if (uth->uu_workq_flags & UT_WORKQ_NEW) {
	999	assert(is_creator \|\| (_wq_flags(wq) & WQ_EXITING));
	1000	TAILQ_INSERT_TAIL(&wq->wq_thnewlist, uth, uu_workq_entry);
	1001	wq->wq_thidlecount++;
	1002	return;
	1003	}
	1004
	1005	if (!is_creator) {
d9a64523 A	1006	_wq_thactive_dec(wq, uth->uu_workq_pri.qos_bucket);
d9a64523 A	1007	wq->wq_thscheduled_count[_wq_bucket(uth->uu_workq_pri.qos_bucket)]--;
d9a64523 A	1008	uth->uu_workq_flags \|= UT_WORKQ_IDLE_CLEANUP;
	1009	}
	1010
	1011	uth->uu_save.uus_workq_park_data.idle_stamp = now;
	1012
	1013	struct uthread *oldest = workq_oldest_killable_idle_thread(wq);
	1014	uint16_t cur_idle = wq->wq_thidlecount;
	1015
	1016	if (cur_idle >= wq_max_constrained_threads \|\|
0a7de745 A	1017	(wq->wq_thdying_count == 0 && oldest &&
0a7de745 A	1018	workq_should_kill_idle_thread(wq, oldest, now))) {
d9a64523 A	1019	/*
	1020	* Immediately kill threads if we have too may of them.
	1021	*
	1022	* And swap "place" with the oldest one we'd have woken up.
	1023	* This is a relatively desperate situation where we really
	1024	* need to kill threads quickly and it's best to kill
	1025	* the one that's currently on core than context switching.
	1026	*/
	1027	if (oldest) {
	1028	oldest->uu_save.uus_workq_park_data.idle_stamp = now;
	1029	TAILQ_REMOVE(&wq->wq_thidlelist, oldest, uu_workq_entry);
	1030	TAILQ_INSERT_HEAD(&wq->wq_thidlelist, oldest, uu_workq_entry);
	1031	}
	1032
	1033	WQ_TRACE_WQ(TRACE_wq_thread_terminate \| DBG_FUNC_START,
0a7de745	1034	wq, cur_idle, 0, 0, 0);
d9a64523 A	1035	wq->wq_thdying_count++;
	1036	uth->uu_workq_flags \|= UT_WORKQ_DYING;
	1037	uth->uu_workq_flags &= ~UT_WORKQ_IDLE_CLEANUP;
cb323159	1038	workq_unpark_for_death_and_unlock(p, wq, uth, 0, setup_flags);
d9a64523 A	1039	__builtin_unreachable();
	1040	}
	1041
	1042	struct uthread *tail = TAILQ_LAST(&wq->wq_thidlelist, workq_uthread_head);
	1043
	1044	cur_idle += 1;
	1045	wq->wq_thidlecount = cur_idle;
	1046
	1047	if (cur_idle >= wq_death_max_load && tail &&
0a7de745	1048	tail->uu_save.uus_workq_park_data.has_stack) {
d9a64523 A	1049	uth->uu_save.uus_workq_park_data.has_stack = false;
	1050	TAILQ_INSERT_TAIL(&wq->wq_thidlelist, uth, uu_workq_entry);
	1051	} else {
	1052	uth->uu_save.uus_workq_park_data.has_stack = true;
	1053	TAILQ_INSERT_HEAD(&wq->wq_thidlelist, uth, uu_workq_entry);
	1054	}
	1055
	1056	if (!tail) {
	1057	uint64_t delay = workq_kill_delay_for_idle_thread(wq);
	1058	workq_death_call_schedule(wq, now + delay);
	1059	}
	1060	}
	1061
	1062	#pragma mark thread requests
	1063
	1064	static inline int
	1065	workq_priority_for_req(workq_threadreq_t req)
	1066	{
	1067	thread_qos_t qos = req->tr_qos;
	1068
cb323159	1069	if (req->tr_flags & WORKQ_TR_FLAG_WL_OUTSIDE_QOS) {
d9a64523 A	1070	workq_threadreq_param_t trp = kqueue_threadreq_workloop_param(req);
	1071	assert(trp.trp_flags & TRP_PRIORITY);
	1072	return trp.trp_pri;
	1073	}
	1074	return thread_workq_pri_for_qos(qos);
	1075	}
	1076
f427ee49	1077	static inline struct priority_queue_sched_max *
d9a64523 A	1078	workq_priority_queue_for_req(struct workqueue *wq, workq_threadreq_t req)
d9a64523 A	1079	{
cb323159	1080	if (req->tr_flags & WORKQ_TR_FLAG_WL_OUTSIDE_QOS) {
d9a64523	1081	return &wq->wq_special_queue;
cb323159	1082	} else if (req->tr_flags & WORKQ_TR_FLAG_OVERCOMMIT) {
d9a64523 A	1083	return &wq->wq_overcommit_queue;
	1084	} else {
	1085	return &wq->wq_constrained_queue;
	1086	}
	1087	}
	1088
	1089	/*
	1090	* returns true if the the enqueued request is the highest priority item
	1091	* in its priority queue.
	1092	*/
	1093	static bool
	1094	workq_threadreq_enqueue(struct workqueue *wq, workq_threadreq_t req)
	1095	{
cb323159	1096	assert(req->tr_state == WORKQ_TR_STATE_NEW);
d9a64523	1097
cb323159	1098	req->tr_state = WORKQ_TR_STATE_QUEUED;
d9a64523 A	1099	wq->wq_reqcount += req->tr_count;
	1100
	1101	if (req->tr_qos == WORKQ_THREAD_QOS_MANAGER) {
	1102	assert(wq->wq_event_manager_threadreq == NULL);
cb323159	1103	assert(req->tr_flags & WORKQ_TR_FLAG_KEVENT);
d9a64523 A	1104	assert(req->tr_count == 1);
	1105	wq->wq_event_manager_threadreq = req;
	1106	return true;
	1107	}
f427ee49 A	1108
	1109	struct priority_queue_sched_max *q = workq_priority_queue_for_req(wq, req);
	1110	priority_queue_entry_set_sched_pri(q, &req->tr_entry,
	1111	workq_priority_for_req(req), false);
	1112
	1113	if (priority_queue_insert(q, &req->tr_entry)) {
cb323159	1114	if ((req->tr_flags & WORKQ_TR_FLAG_OVERCOMMIT) == 0) {
d9a64523 A	1115	_wq_thactive_refresh_best_constrained_req_qos(wq);
	1116	}
	1117	return true;
	1118	}
	1119	return false;
	1120	}
	1121
	1122	/*
	1123	* returns true if the the dequeued request was the highest priority item
	1124	* in its priority queue.
	1125	*/
	1126	static bool
	1127	workq_threadreq_dequeue(struct workqueue *wq, workq_threadreq_t req)
	1128	{
	1129	wq->wq_reqcount--;
	1130
	1131	if (--req->tr_count == 0) {
	1132	if (req->tr_qos == WORKQ_THREAD_QOS_MANAGER) {
	1133	assert(wq->wq_event_manager_threadreq == req);
	1134	assert(req->tr_count == 0);
	1135	wq->wq_event_manager_threadreq = NULL;
	1136	return true;
	1137	}
	1138	if (priority_queue_remove(workq_priority_queue_for_req(wq, req),
f427ee49	1139	&req->tr_entry)) {
cb323159	1140	if ((req->tr_flags & WORKQ_TR_FLAG_OVERCOMMIT) == 0) {
d9a64523 A	1141	_wq_thactive_refresh_best_constrained_req_qos(wq);
	1142	}
	1143	return true;
	1144	}
	1145	}
	1146	return false;
	1147	}
	1148
	1149	static void
	1150	workq_threadreq_destroy(proc_t p, workq_threadreq_t req)
	1151	{
cb323159 A	1152	req->tr_state = WORKQ_TR_STATE_CANCELED;
cb323159 A	1153	if (req->tr_flags & (WORKQ_TR_FLAG_WORKLOOP \| WORKQ_TR_FLAG_KEVENT)) {
d9a64523 A	1154	kqueue_threadreq_cancel(p, req);
	1155	} else {
	1156	zfree(workq_zone_threadreq, req);
	1157	}
	1158	}
	1159
d9a64523 A	1160	#pragma mark workqueue thread creation thread calls
	1161
	1162	static inline bool
	1163	workq_thread_call_prepost(struct workqueue *wq, uint32_t sched, uint32_t pend,
0a7de745	1164	uint32_t fail_mask)
d9a64523 A	1165	{
	1166	uint32_t old_flags, new_flags;
	1167
	1168	os_atomic_rmw_loop(&wq->wq_flags, old_flags, new_flags, acquire, {
	1169	if (__improbable(old_flags & (WQ_EXITING \| sched \| pend \| fail_mask))) {
0a7de745	1170	os_atomic_rmw_loop_give_up(return false);
d9a64523 A	1171	}
d9a64523 A	1172	if (__improbable(old_flags & WQ_PROC_SUSPENDED)) {
0a7de745	1173	new_flags = old_flags \| pend;
d9a64523	1174	} else {
0a7de745	1175	new_flags = old_flags \| sched;
d9a64523 A	1176	}
	1177	});
	1178
	1179	return (old_flags & WQ_PROC_SUSPENDED) == 0;
	1180	}
	1181
	1182	#define WORKQ_SCHEDULE_DELAYED_THREAD_CREATION_RESTART 0x1
	1183
	1184	static bool
	1185	workq_schedule_delayed_thread_creation(struct workqueue *wq, int flags)
	1186	{
	1187	assert(!preemption_enabled());
	1188
	1189	if (!workq_thread_call_prepost(wq, WQ_DELAYED_CALL_SCHEDULED,
0a7de745 A	1190	WQ_DELAYED_CALL_PENDED, WQ_IMMEDIATE_CALL_PENDED \|
0a7de745 A	1191	WQ_IMMEDIATE_CALL_SCHEDULED)) {
d9a64523 A	1192	return false;
	1193	}
	1194
	1195	uint64_t now = mach_absolute_time();
	1196
	1197	if (flags & WORKQ_SCHEDULE_DELAYED_THREAD_CREATION_RESTART) {
	1198	/* do not change the window */
	1199	} else if (now - wq->wq_thread_call_last_run <= wq->wq_timer_interval) {
	1200	wq->wq_timer_interval *= 2;
	1201	if (wq->wq_timer_interval > wq_max_timer_interval.abstime) {
f427ee49	1202	wq->wq_timer_interval = (uint32_t)wq_max_timer_interval.abstime;
d9a64523 A	1203	}
	1204	} else if (now - wq->wq_thread_call_last_run > 2 * wq->wq_timer_interval) {
	1205	wq->wq_timer_interval /= 2;
	1206	if (wq->wq_timer_interval < wq_stalled_window.abstime) {
f427ee49	1207	wq->wq_timer_interval = (uint32_t)wq_stalled_window.abstime;
d9a64523 A	1208	}
	1209	}
	1210
	1211	WQ_TRACE_WQ(TRACE_wq_start_add_timer, wq, wq->wq_reqcount,
0a7de745	1212	_wq_flags(wq), wq->wq_timer_interval, 0);
d9a64523 A	1213
	1214	thread_call_t call = wq->wq_delayed_call;
	1215	uintptr_t arg = WQ_DELAYED_CALL_SCHEDULED;
	1216	uint64_t deadline = now + wq->wq_timer_interval;
	1217	if (thread_call_enter1_delayed(call, (void *)arg, deadline)) {
	1218	panic("delayed_call was already enqueued");
	1219	}
	1220	return true;
	1221	}
	1222
	1223	static void
	1224	workq_schedule_immediate_thread_creation(struct workqueue *wq)
	1225	{
	1226	assert(!preemption_enabled());
	1227
	1228	if (workq_thread_call_prepost(wq, WQ_IMMEDIATE_CALL_SCHEDULED,
0a7de745	1229	WQ_IMMEDIATE_CALL_PENDED, 0)) {
d9a64523	1230	WQ_TRACE_WQ(TRACE_wq_start_add_timer, wq, wq->wq_reqcount,
0a7de745	1231	_wq_flags(wq), 0, 0);
d9a64523 A	1232
	1233	uintptr_t arg = WQ_IMMEDIATE_CALL_SCHEDULED;
	1234	if (thread_call_enter1(wq->wq_immediate_call, (void *)arg)) {
	1235	panic("immediate_call was already enqueued");
	1236	}
	1237	}
	1238	}
	1239
	1240	void
	1241	workq_proc_suspended(struct proc *p)
	1242	{
	1243	struct workqueue *wq = proc_get_wqptr(p);
	1244
0a7de745 A	1245	if (wq) {
	1246	os_atomic_or(&wq->wq_flags, WQ_PROC_SUSPENDED, relaxed);
	1247	}
d9a64523 A	1248	}
	1249
	1250	void
	1251	workq_proc_resumed(struct proc *p)
	1252	{
	1253	struct workqueue *wq = proc_get_wqptr(p);
	1254	uint32_t wq_flags;
	1255
0a7de745 A	1256	if (!wq) {
	1257	return;
	1258	}
d9a64523	1259
cb323159 A	1260	wq_flags = os_atomic_andnot_orig(&wq->wq_flags, WQ_PROC_SUSPENDED \|
cb323159 A	1261	WQ_DELAYED_CALL_PENDED \| WQ_IMMEDIATE_CALL_PENDED, relaxed);
d9a64523 A	1262	if ((wq_flags & WQ_EXITING) == 0) {
	1263	disable_preemption();
	1264	if (wq_flags & WQ_IMMEDIATE_CALL_PENDED) {
	1265	workq_schedule_immediate_thread_creation(wq);
	1266	} else if (wq_flags & WQ_DELAYED_CALL_PENDED) {
	1267	workq_schedule_delayed_thread_creation(wq,
0a7de745	1268	WORKQ_SCHEDULE_DELAYED_THREAD_CREATION_RESTART);
d9a64523 A	1269	}
	1270	enable_preemption();
	1271	}
	1272	}
	1273
	1274	/**
	1275	* returns whether lastblocked_tsp is within wq_stalled_window usecs of now
	1276	*/
	1277	static bool
	1278	workq_thread_is_busy(uint64_t now, _Atomic uint64_t *lastblocked_tsp)
	1279	{
cb323159	1280	uint64_t lastblocked_ts = os_atomic_load_wide(lastblocked_tsp, relaxed);
d9a64523 A	1281	if (now <= lastblocked_ts) {
	1282	/*
	1283	* Because the update of the timestamp when a thread blocks
	1284	* isn't serialized against us looking at it (i.e. we don't hold
	1285	* the workq lock), it's possible to have a timestamp that matches
	1286	* the current time or that even looks to be in the future relative
	1287	* to when we grabbed the current time...
	1288	*
	1289	* Just treat this as a busy thread since it must have just blocked.
	1290	*/
	1291	return true;
	1292	}
	1293	return (now - lastblocked_ts) < wq_stalled_window.abstime;
	1294	}
	1295
	1296	static void
	1297	workq_add_new_threads_call(void _p, void flags)
	1298	{
	1299	proc_t p = _p;
	1300	struct workqueue *wq = proc_get_wqptr(p);
	1301	uint32_t my_flag = (uint32_t)(uintptr_t)flags;
	1302
	1303	/*
	1304	* workq_exit() will set the workqueue to NULL before
	1305	* it cancels thread calls.
	1306	*/
0a7de745 A	1307	if (!wq) {
	1308	return;
	1309	}
d9a64523 A	1310
d9a64523 A	1311	assert((my_flag == WQ_DELAYED_CALL_SCHEDULED) \|\|
0a7de745	1312	(my_flag == WQ_IMMEDIATE_CALL_SCHEDULED));
d9a64523 A	1313
d9a64523 A	1314	WQ_TRACE_WQ(TRACE_wq_add_timer \| DBG_FUNC_START, wq, _wq_flags(wq),
0a7de745	1315	wq->wq_nthreads, wq->wq_thidlecount, 0);
d9a64523 A	1316
	1317	workq_lock_spin(wq);
	1318
	1319	wq->wq_thread_call_last_run = mach_absolute_time();
cb323159	1320	os_atomic_andnot(&wq->wq_flags, my_flag, release);
d9a64523 A	1321
	1322	/* This can drop the workqueue lock, and take it again */
	1323	workq_schedule_creator(p, wq, WORKQ_THREADREQ_CAN_CREATE_THREADS);
	1324
	1325	workq_unlock(wq);
	1326
	1327	WQ_TRACE_WQ(TRACE_wq_add_timer \| DBG_FUNC_END, wq, 0,
0a7de745	1328	wq->wq_nthreads, wq->wq_thidlecount, 0);
d9a64523 A	1329	}
	1330
	1331	#pragma mark thread state tracking
	1332
	1333	static void
	1334	workq_sched_callback(int type, thread_t thread)
	1335	{
	1336	struct uthread *uth = get_bsdthread_info(thread);
	1337	proc_t proc = get_bsdtask_info(get_threadtask(thread));
	1338	struct workqueue *wq = proc_get_wqptr(proc);
	1339	thread_qos_t req_qos, qos = uth->uu_workq_pri.qos_bucket;
	1340	wq_thactive_t old_thactive;
	1341	bool start_timer = false;
	1342
	1343	if (qos == WORKQ_THREAD_QOS_MANAGER) {
	1344	return;
	1345	}
	1346
	1347	switch (type) {
	1348	case SCHED_CALL_BLOCK:
	1349	old_thactive = _wq_thactive_dec(wq, qos);
	1350	req_qos = WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(old_thactive);
	1351
	1352	/*
	1353	* Remember the timestamp of the last thread that blocked in this
	1354	* bucket, it used used by admission checks to ignore one thread
	1355	* being inactive if this timestamp is recent enough.
	1356	*
	1357	* If we collide with another thread trying to update the
	1358	* last_blocked (really unlikely since another thread would have to
	1359	* get scheduled and then block after we start down this path), it's
	1360	* not a problem. Either timestamp is adequate, so no need to retry
	1361	*/
cb323159	1362	os_atomic_store_wide(&wq->wq_lastblocked_ts[_wq_bucket(qos)],
0a7de745	1363	thread_last_run_time(thread), relaxed);
d9a64523 A	1364
	1365	if (req_qos == THREAD_QOS_UNSPECIFIED) {
	1366	/*
	1367	* No pending request at the moment we could unblock, move on.
	1368	*/
	1369	} else if (qos < req_qos) {
	1370	/*
	1371	* The blocking thread is at a lower QoS than the highest currently
	1372	* pending constrained request, nothing has to be redriven
	1373	*/
	1374	} else {
	1375	uint32_t max_busycount, old_req_count;
	1376	old_req_count = _wq_thactive_aggregate_downto_qos(wq, old_thactive,
0a7de745	1377	req_qos, NULL, &max_busycount);
d9a64523 A	1378	/*
	1379	* If it is possible that may_start_constrained_thread had refused
	1380	* admission due to being over the max concurrency, we may need to
	1381	* spin up a new thread.
	1382	*
	1383	* We take into account the maximum number of busy threads
	1384	* that can affect may_start_constrained_thread as looking at the
	1385	* actual number may_start_constrained_thread will see is racy.
	1386	*
	1387	* IOW at NCPU = 4, for IN (req_qos = 1), if the old req count is
	1388	* between NCPU (4) and NCPU - 2 (2) we need to redrive.
	1389	*/
	1390	uint32_t conc = wq_max_parallelism[_wq_bucket(qos)];
	1391	if (old_req_count <= conc && conc <= old_req_count + max_busycount) {
	1392	start_timer = workq_schedule_delayed_thread_creation(wq, 0);
	1393	}
	1394	}
	1395	if (__improbable(kdebug_enable)) {
	1396	__unused uint32_t old = _wq_thactive_aggregate_downto_qos(wq,
0a7de745	1397	old_thactive, qos, NULL, NULL);
d9a64523	1398	WQ_TRACE_WQ(TRACE_wq_thread_block \| DBG_FUNC_START, wq,
0a7de745 A	1399	old - 1, qos \| (req_qos << 8),
0a7de745 A	1400	wq->wq_reqcount << 1 \| start_timer, 0);
d9a64523 A	1401	}
	1402	break;
	1403
	1404	case SCHED_CALL_UNBLOCK:
	1405	/*
	1406	* we cannot take the workqueue_lock here...
	1407	* an UNBLOCK can occur from a timer event which
	1408	* is run from an interrupt context... if the workqueue_lock
	1409	* is already held by this processor, we'll deadlock...
	1410	* the thread lock for the thread being UNBLOCKED
	1411	* is also held
	1412	*/
	1413	old_thactive = _wq_thactive_inc(wq, qos);
	1414	if (__improbable(kdebug_enable)) {
	1415	__unused uint32_t old = _wq_thactive_aggregate_downto_qos(wq,
0a7de745	1416	old_thactive, qos, NULL, NULL);
d9a64523 A	1417	req_qos = WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(old_thactive);
d9a64523 A	1418	WQ_TRACE_WQ(TRACE_wq_thread_block \| DBG_FUNC_END, wq,
0a7de745 A	1419	old + 1, qos \| (req_qos << 8),
0a7de745 A	1420	wq->wq_threads_scheduled, 0);
d9a64523 A	1421	}
	1422	break;
	1423	}
	1424	}
	1425
	1426	#pragma mark workq lifecycle
	1427
	1428	void
	1429	workq_reference(struct workqueue *wq)
	1430	{
	1431	os_ref_retain(&wq->wq_refcnt);
	1432	}
	1433
cb323159 A	1434	static void
	1435	workq_deallocate_queue_invoke(mpsc_queue_chain_t e,
	1436	__assert_only mpsc_daemon_queue_t dq)
d9a64523	1437	{
cb323159	1438	struct workqueue *wq;
d9a64523 A	1439	struct turnstile *ts;
d9a64523 A	1440
cb323159 A	1441	wq = mpsc_queue_element(e, struct workqueue, wq_destroy_link);
	1442	assert(dq == &workq_deallocate_queue);
	1443
	1444	turnstile_complete((uintptr_t)wq, &wq->wq_turnstile, &ts, TURNSTILE_WORKQS);
d9a64523 A	1445	assert(ts);
	1446	turnstile_cleanup();
	1447	turnstile_deallocate(ts);
	1448
f427ee49	1449	lck_spin_destroy(&wq->wq_lock, &workq_lck_grp);
d9a64523 A	1450	zfree(workq_zone_workqueue, wq);
	1451	}
	1452
	1453	static void
	1454	workq_deallocate(struct workqueue *wq)
	1455	{
	1456	if (os_ref_release_relaxed(&wq->wq_refcnt) == 0) {
cb323159 A	1457	workq_deallocate_queue_invoke(&wq->wq_destroy_link,
cb323159 A	1458	&workq_deallocate_queue);
d9a64523 A	1459	}
	1460	}
	1461
	1462	void
	1463	workq_deallocate_safe(struct workqueue *wq)
	1464	{
	1465	if (__improbable(os_ref_release_relaxed(&wq->wq_refcnt) == 0)) {
cb323159 A	1466	mpsc_daemon_enqueue(&workq_deallocate_queue, &wq->wq_destroy_link,
cb323159 A	1467	MPSC_QUEUE_DISABLE_PREEMPTION);
d9a64523 A	1468	}
	1469	}
	1470
	1471	/**
	1472	* Setup per-process state for the workqueue.
	1473	*/
	1474	int
	1475	workq_open(struct proc p, __unused struct workq_open_args uap,
0a7de745	1476	__unused int32_t *retval)
d9a64523 A	1477	{
	1478	struct workqueue *wq;
	1479	int error = 0;
	1480
	1481	if ((p->p_lflag & P_LREGISTER) == 0) {
	1482	return EINVAL;
	1483	}
	1484
	1485	if (wq_init_constrained_limit) {
f427ee49	1486	uint32_t limit, num_cpus = ml_wait_max_cpus();
d9a64523 A	1487
	1488	/*
	1489	* set up the limit for the constrained pool
	1490	* this is a virtual pool in that we don't
	1491	* maintain it on a separate idle and run list
	1492	*/
	1493	limit = num_cpus * WORKQUEUE_CONSTRAINED_FACTOR;
	1494
0a7de745	1495	if (limit > wq_max_constrained_threads) {
d9a64523	1496	wq_max_constrained_threads = limit;
0a7de745	1497	}
d9a64523 A	1498
	1499	if (wq_max_threads > WQ_THACTIVE_BUCKET_HALF) {
	1500	wq_max_threads = WQ_THACTIVE_BUCKET_HALF;
	1501	}
	1502	if (wq_max_threads > CONFIG_THREAD_MAX - 20) {
	1503	wq_max_threads = CONFIG_THREAD_MAX - 20;
	1504	}
	1505
	1506	wq_death_max_load = (uint16_t)fls(num_cpus) + 1;
	1507
	1508	for (thread_qos_t qos = WORKQ_THREAD_QOS_MIN; qos <= WORKQ_THREAD_QOS_MAX; qos++) {
	1509	wq_max_parallelism[_wq_bucket(qos)] =
0a7de745	1510	qos_max_parallelism(qos, QOS_PARALLELISM_COUNT_LOGICAL);
d9a64523 A	1511	}
	1512
	1513	wq_init_constrained_limit = 0;
	1514	}
	1515
	1516	if (proc_get_wqptr(p) == NULL) {
	1517	if (proc_init_wqptr_or_wait(p) == FALSE) {
	1518	assert(proc_get_wqptr(p) != NULL);
	1519	goto out;
	1520	}
	1521
	1522	wq = (struct workqueue *)zalloc(workq_zone_workqueue);
	1523	bzero(wq, sizeof(struct workqueue));
	1524
	1525	os_ref_init_count(&wq->wq_refcnt, &workq_refgrp, 1);
	1526
	1527	// Start the event manager at the priority hinted at by the policy engine
	1528	thread_qos_t mgr_priority_hint = task_get_default_manager_qos(current_task());
	1529	pthread_priority_t pp = _pthread_priority_make_from_thread_qos(mgr_priority_hint, 0, 0);
	1530	wq->wq_event_manager_priority = (uint32_t)pp;
f427ee49	1531	wq->wq_timer_interval = (uint32_t)wq_stalled_window.abstime;
d9a64523 A	1532	wq->wq_proc = p;
d9a64523 A	1533	turnstile_prepare((uintptr_t)wq, &wq->wq_turnstile, turnstile_alloc(),
0a7de745	1534	TURNSTILE_WORKQS);
d9a64523 A	1535
	1536	TAILQ_INIT(&wq->wq_thrunlist);
	1537	TAILQ_INIT(&wq->wq_thnewlist);
	1538	TAILQ_INIT(&wq->wq_thidlelist);
f427ee49 A	1539	priority_queue_init(&wq->wq_overcommit_queue);
	1540	priority_queue_init(&wq->wq_constrained_queue);
	1541	priority_queue_init(&wq->wq_special_queue);
d9a64523	1542
c3c9b80d A	1543	/* We are only using the delayed thread call for the constrained pool
	1544	* which can't have work at >= UI QoS and so we can be fine with a
	1545	* UI QoS thread call.
	1546	*/
	1547	wq->wq_delayed_call = thread_call_allocate_with_qos(
	1548	workq_add_new_threads_call, p, THREAD_QOS_USER_INTERACTIVE,
0a7de745	1549	THREAD_CALL_OPTIONS_ONCE);
d9a64523	1550	wq->wq_immediate_call = thread_call_allocate_with_options(
0a7de745 A	1551	workq_add_new_threads_call, p, THREAD_CALL_PRIORITY_KERNEL,
0a7de745 A	1552	THREAD_CALL_OPTIONS_ONCE);
d9a64523	1553	wq->wq_death_call = thread_call_allocate_with_options(
0a7de745 A	1554	workq_kill_old_threads_call, wq,
0a7de745 A	1555	THREAD_CALL_PRIORITY_USER, THREAD_CALL_OPTIONS_ONCE);
d9a64523	1556
f427ee49	1557	lck_spin_init(&wq->wq_lock, &workq_lck_grp, LCK_ATTR_NULL);
d9a64523 A	1558
d9a64523 A	1559	WQ_TRACE_WQ(TRACE_wq_create \| DBG_FUNC_NONE, wq,
0a7de745	1560	VM_KERNEL_ADDRHIDE(wq), 0, 0, 0);
d9a64523 A	1561	proc_set_wqptr(p, wq);
	1562	}
	1563	out:
	1564
	1565	return error;
	1566	}
	1567
	1568	/*
	1569	* Routine: workq_mark_exiting
	1570	*
	1571	* Function: Mark the work queue such that new threads will not be added to the
	1572	* work queue after we return.
	1573	*
	1574	* Conditions: Called against the current process.
	1575	*/
	1576	void
	1577	workq_mark_exiting(struct proc *p)
	1578	{
	1579	struct workqueue *wq = proc_get_wqptr(p);
	1580	uint32_t wq_flags;
	1581	workq_threadreq_t mgr_req;
	1582
0a7de745 A	1583	if (!wq) {
	1584	return;
	1585	}
d9a64523	1586
0a7de745	1587	WQ_TRACE_WQ(TRACE_wq_pthread_exit \| DBG_FUNC_START, wq, 0, 0, 0, 0);
d9a64523 A	1588
	1589	workq_lock_spin(wq);
	1590
	1591	wq_flags = os_atomic_or_orig(&wq->wq_flags, WQ_EXITING, relaxed);
	1592	if (__improbable(wq_flags & WQ_EXITING)) {
	1593	panic("workq_mark_exiting called twice");
	1594	}
	1595
	1596	/*
	1597	* Opportunistically try to cancel thread calls that are likely in flight.
	1598	* workq_exit() will do the proper cleanup.
	1599	*/
	1600	if (wq_flags & WQ_IMMEDIATE_CALL_SCHEDULED) {
	1601	thread_call_cancel(wq->wq_immediate_call);
	1602	}
	1603	if (wq_flags & WQ_DELAYED_CALL_SCHEDULED) {
	1604	thread_call_cancel(wq->wq_delayed_call);
	1605	}
	1606	if (wq_flags & WQ_DEATH_CALL_SCHEDULED) {
	1607	thread_call_cancel(wq->wq_death_call);
	1608	}
	1609
	1610	mgr_req = wq->wq_event_manager_threadreq;
	1611	wq->wq_event_manager_threadreq = NULL;
	1612	wq->wq_reqcount = 0; /* workq_schedule_creator must not look at queues */
cb323159 A	1613	wq->wq_creator = NULL;
cb323159 A	1614	workq_turnstile_update_inheritor(wq, TURNSTILE_INHERITOR_NULL, 0);
d9a64523 A	1615
	1616	workq_unlock(wq);
	1617
	1618	if (mgr_req) {
	1619	kqueue_threadreq_cancel(p, mgr_req);
	1620	}
	1621	/*
	1622	* No one touches the priority queues once WQ_EXITING is set.
	1623	* It is hence safe to do the tear down without holding any lock.
	1624	*/
	1625	priority_queue_destroy(&wq->wq_overcommit_queue,
f427ee49	1626	struct workq_threadreq_s, tr_entry, ^(workq_threadreq_t e){
d9a64523 A	1627	workq_threadreq_destroy(p, e);
	1628	});
	1629	priority_queue_destroy(&wq->wq_constrained_queue,
f427ee49	1630	struct workq_threadreq_s, tr_entry, ^(workq_threadreq_t e){
d9a64523 A	1631	workq_threadreq_destroy(p, e);
	1632	});
	1633	priority_queue_destroy(&wq->wq_special_queue,
f427ee49	1634	struct workq_threadreq_s, tr_entry, ^(workq_threadreq_t e){
d9a64523 A	1635	workq_threadreq_destroy(p, e);
	1636	});
	1637
0a7de745	1638	WQ_TRACE(TRACE_wq_pthread_exit \| DBG_FUNC_END, 0, 0, 0, 0, 0);
d9a64523 A	1639	}
	1640
	1641	/*
	1642	* Routine: workq_exit
	1643	*
	1644	* Function: clean up the work queue structure(s) now that there are no threads
	1645	* left running inside the work queue (except possibly current_thread).
	1646	*
	1647	* Conditions: Called by the last thread in the process.
	1648	* Called against current process.
	1649	*/
	1650	void
	1651	workq_exit(struct proc *p)
	1652	{
	1653	struct workqueue *wq;
	1654	struct uthread uth, tmp;
	1655
	1656	wq = os_atomic_xchg(&p->p_wqptr, NULL, relaxed);
	1657	if (wq != NULL) {
	1658	thread_t th = current_thread();
	1659
0a7de745	1660	WQ_TRACE_WQ(TRACE_wq_workqueue_exit \| DBG_FUNC_START, wq, 0, 0, 0, 0);
d9a64523 A	1661
	1662	if (thread_get_tag(th) & THREAD_TAG_WORKQUEUE) {
	1663	/*
	1664	* <rdar://problem/40111515> Make sure we will no longer call the
	1665	* sched call, if we ever block this thread, which the cancel_wait
	1666	* below can do.
	1667	*/
	1668	thread_sched_call(th, NULL);
	1669	}
	1670
	1671	/*
	1672	* Thread calls are always scheduled by the proc itself or under the
	1673	* workqueue spinlock if WQ_EXITING is not yet set.
	1674	*
	1675	* Either way, when this runs, the proc has no threads left beside
	1676	* the one running this very code, so we know no thread call can be
	1677	* dispatched anymore.
	1678	*/
	1679	thread_call_cancel_wait(wq->wq_delayed_call);
	1680	thread_call_cancel_wait(wq->wq_immediate_call);
	1681	thread_call_cancel_wait(wq->wq_death_call);
	1682	thread_call_free(wq->wq_delayed_call);
	1683	thread_call_free(wq->wq_immediate_call);
	1684	thread_call_free(wq->wq_death_call);
	1685
	1686	/*
	1687	* Clean up workqueue data structures for threads that exited and
	1688	* didn't get a chance to clean up after themselves.
	1689	*
	1690	* idle/new threads should have been interrupted and died on their own
	1691	*/
	1692	TAILQ_FOREACH_SAFE(uth, &wq->wq_thrunlist, uu_workq_entry, tmp) {
	1693	thread_sched_call(uth->uu_thread, NULL);
	1694	thread_deallocate(uth->uu_thread);
	1695	}
	1696	assert(TAILQ_EMPTY(&wq->wq_thnewlist));
	1697	assert(TAILQ_EMPTY(&wq->wq_thidlelist));
	1698
	1699	WQ_TRACE_WQ(TRACE_wq_destroy \| DBG_FUNC_END, wq,
0a7de745	1700	VM_KERNEL_ADDRHIDE(wq), 0, 0, 0);
d9a64523 A	1701
	1702	workq_deallocate(wq);
	1703
0a7de745	1704	WQ_TRACE(TRACE_wq_workqueue_exit \| DBG_FUNC_END, 0, 0, 0, 0, 0);
d9a64523 A	1705	}
	1706	}
	1707
	1708
	1709	#pragma mark bsd thread control
	1710
	1711	static bool
	1712	_pthread_priority_to_policy(pthread_priority_t priority,
0a7de745	1713	thread_qos_policy_data_t *data)
d9a64523 A	1714	{
	1715	data->qos_tier = _pthread_priority_thread_qos(priority);
	1716	data->tier_importance = _pthread_priority_relpri(priority);
	1717	if (data->qos_tier == THREAD_QOS_UNSPECIFIED \|\| data->tier_importance > 0 \|\|
0a7de745	1718	data->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
d9a64523 A	1719	return false;
	1720	}
	1721	return true;
	1722	}
	1723
	1724	static int
	1725	bsdthread_set_self(proc_t p, thread_t th, pthread_priority_t priority,
0a7de745	1726	mach_port_name_t voucher, enum workq_set_self_flags flags)
d9a64523 A	1727	{
	1728	struct uthread *uth = get_bsdthread_info(th);
	1729	struct workqueue *wq = proc_get_wqptr(p);
	1730
	1731	kern_return_t kr;
	1732	int unbind_rv = 0, qos_rv = 0, voucher_rv = 0, fixedpri_rv = 0;
	1733	bool is_wq_thread = (thread_get_tag(th) & THREAD_TAG_WORKQUEUE);
	1734
	1735	if (flags & WORKQ_SET_SELF_WQ_KEVENT_UNBIND) {
	1736	if (!is_wq_thread) {
	1737	unbind_rv = EINVAL;
	1738	goto qos;
	1739	}
	1740
	1741	if (uth->uu_workq_pri.qos_bucket == WORKQ_THREAD_QOS_MANAGER) {
	1742	unbind_rv = EINVAL;
	1743	goto qos;
	1744	}
	1745
cb323159	1746	workq_threadreq_t kqr = uth->uu_kqr_bound;
d9a64523 A	1747	if (kqr == NULL) {
	1748	unbind_rv = EALREADY;
	1749	goto qos;
	1750	}
	1751
cb323159	1752	if (kqr->tr_flags & WORKQ_TR_FLAG_WORKLOOP) {
d9a64523 A	1753	unbind_rv = EINVAL;
	1754	goto qos;
	1755	}
	1756
cb323159	1757	kqueue_threadreq_unbind(p, kqr);
d9a64523 A	1758	}
	1759
	1760	qos:
	1761	if (flags & WORKQ_SET_SELF_QOS_FLAG) {
	1762	thread_qos_policy_data_t new_policy;
	1763
	1764	if (!_pthread_priority_to_policy(priority, &new_policy)) {
	1765	qos_rv = EINVAL;
	1766	goto voucher;
	1767	}
	1768
	1769	if (!is_wq_thread) {
	1770	/*
	1771	* Threads opted out of QoS can't change QoS
	1772	*/
	1773	if (!thread_has_qos_policy(th)) {
	1774	qos_rv = EPERM;
	1775	goto voucher;
	1776	}
cb323159 A	1777	} else if (uth->uu_workq_pri.qos_bucket == WORKQ_THREAD_QOS_MANAGER \|\|
cb323159 A	1778	uth->uu_workq_pri.qos_bucket == WORKQ_THREAD_QOS_ABOVEUI) {
d9a64523	1779	/*
cb323159	1780	* Workqueue manager threads or threads above UI can't change QoS
d9a64523 A	1781	*/
	1782	qos_rv = EINVAL;
	1783	goto voucher;
	1784	} else {
	1785	/*
	1786	* For workqueue threads, possibly adjust buckets and redrive thread
	1787	* requests.
	1788	*/
	1789	bool old_overcommit = uth->uu_workq_flags & UT_WORKQ_OVERCOMMIT;
	1790	bool new_overcommit = priority & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
	1791	struct uu_workq_policy old_pri, new_pri;
	1792	bool force_run = false;
	1793
	1794	workq_lock_spin(wq);
	1795
	1796	if (old_overcommit != new_overcommit) {
	1797	uth->uu_workq_flags ^= UT_WORKQ_OVERCOMMIT;
	1798	if (old_overcommit) {
	1799	wq->wq_constrained_threads_scheduled++;
	1800	} else if (wq->wq_constrained_threads_scheduled-- ==
0a7de745	1801	wq_max_constrained_threads) {
d9a64523 A	1802	force_run = true;
	1803	}
	1804	}
	1805
	1806	old_pri = new_pri = uth->uu_workq_pri;
f427ee49	1807	new_pri.qos_req = (thread_qos_t)new_policy.qos_tier;
d9a64523 A	1808	workq_thread_update_bucket(p, wq, uth, old_pri, new_pri, force_run);
	1809	workq_unlock(wq);
	1810	}
	1811
	1812	kr = thread_policy_set_internal(th, THREAD_QOS_POLICY,
0a7de745	1813	(thread_policy_t)&new_policy, THREAD_QOS_POLICY_COUNT);
d9a64523 A	1814	if (kr != KERN_SUCCESS) {
	1815	qos_rv = EINVAL;
	1816	}
	1817	}
	1818
	1819	voucher:
	1820	if (flags & WORKQ_SET_SELF_VOUCHER_FLAG) {
	1821	kr = thread_set_voucher_name(voucher);
	1822	if (kr != KERN_SUCCESS) {
	1823	voucher_rv = ENOENT;
	1824	goto fixedpri;
	1825	}
	1826	}
	1827
	1828	fixedpri:
0a7de745 A	1829	if (qos_rv) {
	1830	goto done;
	1831	}
d9a64523 A	1832	if (flags & WORKQ_SET_SELF_FIXEDPRIORITY_FLAG) {
	1833	thread_extended_policy_data_t extpol = {.timeshare = 0};
	1834
	1835	if (is_wq_thread) {
	1836	/* Not allowed on workqueue threads */
	1837	fixedpri_rv = ENOTSUP;
	1838	goto done;
	1839	}
	1840
	1841	kr = thread_policy_set_internal(th, THREAD_EXTENDED_POLICY,
0a7de745	1842	(thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT);
d9a64523 A	1843	if (kr != KERN_SUCCESS) {
	1844	fixedpri_rv = EINVAL;
	1845	goto done;
	1846	}
	1847	} else if (flags & WORKQ_SET_SELF_TIMESHARE_FLAG) {
	1848	thread_extended_policy_data_t extpol = {.timeshare = 1};
	1849
	1850	if (is_wq_thread) {
	1851	/* Not allowed on workqueue threads */
	1852	fixedpri_rv = ENOTSUP;
	1853	goto done;
	1854	}
	1855
	1856	kr = thread_policy_set_internal(th, THREAD_EXTENDED_POLICY,
0a7de745	1857	(thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT);
d9a64523 A	1858	if (kr != KERN_SUCCESS) {
	1859	fixedpri_rv = EINVAL;
	1860	goto done;
	1861	}
	1862	}
	1863
	1864	done:
	1865	if (qos_rv && voucher_rv) {
	1866	/* Both failed, give that a unique error. */
	1867	return EBADMSG;
	1868	}
	1869
	1870	if (unbind_rv) {
	1871	return unbind_rv;
	1872	}
	1873
	1874	if (qos_rv) {
	1875	return qos_rv;
	1876	}
	1877
	1878	if (voucher_rv) {
	1879	return voucher_rv;
	1880	}
	1881
	1882	if (fixedpri_rv) {
	1883	return fixedpri_rv;
	1884	}
	1885
c6bf4f31	1886
d9a64523 A	1887	return 0;
	1888	}
	1889
	1890	static int
	1891	bsdthread_add_explicit_override(proc_t p, mach_port_name_t kport,
0a7de745	1892	pthread_priority_t pp, user_addr_t resource)
d9a64523 A	1893	{
	1894	thread_qos_t qos = _pthread_priority_thread_qos(pp);
	1895	if (qos == THREAD_QOS_UNSPECIFIED) {
	1896	return EINVAL;
	1897	}
	1898
cb323159 A	1899	thread_t th = port_name_to_thread(kport,
cb323159 A	1900	PORT_TO_THREAD_IN_CURRENT_TASK);
d9a64523 A	1901	if (th == THREAD_NULL) {
	1902	return ESRCH;
	1903	}
	1904
	1905	int rv = proc_thread_qos_add_override(p->task, th, 0, qos, TRUE,
0a7de745	1906	resource, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE);
d9a64523 A	1907
	1908	thread_deallocate(th);
	1909	return rv;
	1910	}
	1911
	1912	static int
	1913	bsdthread_remove_explicit_override(proc_t p, mach_port_name_t kport,
0a7de745	1914	user_addr_t resource)
d9a64523	1915	{
cb323159 A	1916	thread_t th = port_name_to_thread(kport,
cb323159 A	1917	PORT_TO_THREAD_IN_CURRENT_TASK);
d9a64523 A	1918	if (th == THREAD_NULL) {
	1919	return ESRCH;
	1920	}
	1921
	1922	int rv = proc_thread_qos_remove_override(p->task, th, 0, resource,
0a7de745	1923	THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE);
d9a64523 A	1924
	1925	thread_deallocate(th);
	1926	return rv;
	1927	}
	1928
	1929	static int
	1930	workq_thread_add_dispatch_override(proc_t p, mach_port_name_t kport,
0a7de745	1931	pthread_priority_t pp, user_addr_t ulock_addr)
d9a64523 A	1932	{
	1933	struct uu_workq_policy old_pri, new_pri;
	1934	struct workqueue *wq = proc_get_wqptr(p);
	1935
	1936	thread_qos_t qos_override = _pthread_priority_thread_qos(pp);
	1937	if (qos_override == THREAD_QOS_UNSPECIFIED) {
	1938	return EINVAL;
	1939	}
	1940
cb323159 A	1941	thread_t thread = port_name_to_thread(kport,
cb323159 A	1942	PORT_TO_THREAD_IN_CURRENT_TASK);
d9a64523 A	1943	if (thread == THREAD_NULL) {
	1944	return ESRCH;
	1945	}
	1946
	1947	struct uthread *uth = get_bsdthread_info(thread);
	1948	if ((thread_get_tag(thread) & THREAD_TAG_WORKQUEUE) == 0) {
	1949	thread_deallocate(thread);
	1950	return EPERM;
	1951	}
	1952
	1953	WQ_TRACE_WQ(TRACE_wq_override_dispatch \| DBG_FUNC_NONE,
0a7de745	1954	wq, thread_tid(thread), 1, pp, 0);
d9a64523 A	1955
	1956	thread_mtx_lock(thread);
	1957
	1958	if (ulock_addr) {
cb323159	1959	uint32_t val;
d9a64523 A	1960	int rc;
	1961	/*
	1962	* Workaround lack of explicit support for 'no-fault copyin'
	1963	* <rdar://problem/24999882>, as disabling preemption prevents paging in
	1964	*/
	1965	disable_preemption();
cb323159	1966	rc = copyin_atomic32(ulock_addr, &val);
d9a64523	1967	enable_preemption();
cb323159	1968	if (rc == 0 && ulock_owner_value_to_port_name(val) != kport) {
d9a64523 A	1969	goto out;
	1970	}
	1971	}
	1972
	1973	workq_lock_spin(wq);
	1974
	1975	old_pri = uth->uu_workq_pri;
	1976	if (old_pri.qos_override >= qos_override) {
	1977	/* Nothing to do */
	1978	} else if (thread == current_thread()) {
	1979	new_pri = old_pri;
	1980	new_pri.qos_override = qos_override;
	1981	workq_thread_update_bucket(p, wq, uth, old_pri, new_pri, false);
	1982	} else {
	1983	uth->uu_workq_pri.qos_override = qos_override;
	1984	if (qos_override > workq_pri_override(old_pri)) {
	1985	thread_set_workq_override(thread, qos_override);
	1986	}
	1987	}
	1988
	1989	workq_unlock(wq);
	1990
	1991	out:
	1992	thread_mtx_unlock(thread);
	1993	thread_deallocate(thread);
	1994	return 0;
	1995	}
	1996
	1997	static int
	1998	workq_thread_reset_dispatch_override(proc_t p, thread_t thread)
	1999	{
	2000	struct uu_workq_policy old_pri, new_pri;
	2001	struct workqueue *wq = proc_get_wqptr(p);
	2002	struct uthread *uth = get_bsdthread_info(thread);
	2003
	2004	if ((thread_get_tag(thread) & THREAD_TAG_WORKQUEUE) == 0) {
	2005	return EPERM;
	2006	}
	2007
	2008	WQ_TRACE_WQ(TRACE_wq_override_reset \| DBG_FUNC_NONE, wq, 0, 0, 0, 0);
	2009
	2010	workq_lock_spin(wq);
	2011	old_pri = new_pri = uth->uu_workq_pri;
	2012	new_pri.qos_override = THREAD_QOS_UNSPECIFIED;
	2013	workq_thread_update_bucket(p, wq, uth, old_pri, new_pri, false);
	2014	workq_unlock(wq);
	2015	return 0;
	2016	}
	2017
cb323159 A	2018	static int
	2019	workq_thread_allow_kill(__unused proc_t p, thread_t thread, bool enable)
	2020	{
	2021	if (!(thread_get_tag(thread) & THREAD_TAG_WORKQUEUE)) {
	2022	// If the thread isn't a workqueue thread, don't set the
	2023	// kill_allowed bit; however, we still need to return 0
	2024	// instead of an error code since this code is executed
	2025	// on the abort path which needs to not depend on the
	2026	// pthread_t (returning an error depends on pthread_t via
	2027	// cerror_nocancel)
	2028	return 0;
	2029	}
	2030	struct uthread *uth = get_bsdthread_info(thread);
	2031	uth->uu_workq_pthread_kill_allowed = enable;
	2032	return 0;
	2033	}
	2034
d9a64523 A	2035	static int
d9a64523 A	2036	bsdthread_get_max_parallelism(thread_qos_t qos, unsigned long flags,
0a7de745	2037	int *retval)
d9a64523 A	2038	{
d9a64523 A	2039	static_assert(QOS_PARALLELISM_COUNT_LOGICAL ==
0a7de745	2040	_PTHREAD_QOS_PARALLELISM_COUNT_LOGICAL, "logical");
d9a64523	2041	static_assert(QOS_PARALLELISM_REALTIME ==
0a7de745	2042	_PTHREAD_QOS_PARALLELISM_REALTIME, "realtime");
d9a64523 A	2043
	2044	if (flags & ~(QOS_PARALLELISM_REALTIME \| QOS_PARALLELISM_COUNT_LOGICAL)) {
	2045	return EINVAL;
	2046	}
	2047
	2048	if (flags & QOS_PARALLELISM_REALTIME) {
	2049	if (qos) {
	2050	return EINVAL;
	2051	}
	2052	} else if (qos == THREAD_QOS_UNSPECIFIED \|\| qos >= THREAD_QOS_LAST) {
	2053	return EINVAL;
	2054	}
	2055
	2056	*retval = qos_max_parallelism(qos, flags);
	2057	return 0;
	2058	}
	2059
	2060	#define ENSURE_UNUSED(arg) \
0a7de745	2061	({ if ((arg) != 0) { return EINVAL; } })
d9a64523 A	2062
	2063	int
	2064	bsdthread_ctl(struct proc p, struct bsdthread_ctl_args uap, int *retval)
	2065	{
	2066	switch (uap->cmd) {
	2067	case BSDTHREAD_CTL_QOS_OVERRIDE_START:
	2068	return bsdthread_add_explicit_override(p, (mach_port_name_t)uap->arg1,
0a7de745	2069	(pthread_priority_t)uap->arg2, uap->arg3);
d9a64523 A	2070	case BSDTHREAD_CTL_QOS_OVERRIDE_END:
	2071	ENSURE_UNUSED(uap->arg3);
	2072	return bsdthread_remove_explicit_override(p, (mach_port_name_t)uap->arg1,
0a7de745	2073	(user_addr_t)uap->arg2);
d9a64523 A	2074
	2075	case BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH:
	2076	return workq_thread_add_dispatch_override(p, (mach_port_name_t)uap->arg1,
0a7de745	2077	(pthread_priority_t)uap->arg2, uap->arg3);
d9a64523 A	2078	case BSDTHREAD_CTL_QOS_OVERRIDE_RESET:
	2079	return workq_thread_reset_dispatch_override(p, current_thread());
	2080
	2081	case BSDTHREAD_CTL_SET_SELF:
	2082	return bsdthread_set_self(p, current_thread(),
0a7de745 A	2083	(pthread_priority_t)uap->arg1, (mach_port_name_t)uap->arg2,
0a7de745 A	2084	(enum workq_set_self_flags)uap->arg3);
d9a64523 A	2085
	2086	case BSDTHREAD_CTL_QOS_MAX_PARALLELISM:
	2087	ENSURE_UNUSED(uap->arg3);
	2088	return bsdthread_get_max_parallelism((thread_qos_t)uap->arg1,
0a7de745	2089	(unsigned long)uap->arg2, retval);
cb323159 A	2090	case BSDTHREAD_CTL_WORKQ_ALLOW_KILL:
	2091	ENSURE_UNUSED(uap->arg2);
	2092	ENSURE_UNUSED(uap->arg3);
	2093	return workq_thread_allow_kill(p, current_thread(), (bool)uap->arg1);
d9a64523 A	2094
	2095	case BSDTHREAD_CTL_SET_QOS:
	2096	case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD:
	2097	case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET:
	2098	/* no longer supported */
	2099	return ENOTSUP;
	2100
	2101	default:
	2102	return EINVAL;
	2103	}
	2104	}
	2105
	2106	#pragma mark workqueue thread manipulation
	2107
cb323159 A	2108	static void __dead2
	2109	workq_unpark_select_threadreq_or_park_and_unlock(proc_t p, struct workqueue *wq,
	2110	struct uthread *uth, uint32_t setup_flags);
	2111
d9a64523 A	2112	static void __dead2
d9a64523 A	2113	workq_select_threadreq_or_park_and_unlock(proc_t p, struct workqueue *wq,
cb323159	2114	struct uthread *uth, uint32_t setup_flags);
d9a64523 A	2115
	2116	static void workq_setup_and_run(proc_t p, struct uthread *uth, int flags) __dead2;
	2117
	2118	#if KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD
	2119	static inline uint64_t
	2120	workq_trace_req_id(workq_threadreq_t req)
	2121	{
	2122	struct kqworkloop *kqwl;
cb323159 A	2123	if (req->tr_flags & WORKQ_TR_FLAG_WORKLOOP) {
cb323159 A	2124	kqwl = __container_of(req, struct kqworkloop, kqwl_request);
d9a64523 A	2125	return kqwl->kqwl_dynamicid;
	2126	}
	2127
	2128	return VM_KERNEL_ADDRHIDE(req);
	2129	}
	2130	#endif
	2131
	2132	/**
	2133	* Entry point for libdispatch to ask for threads
	2134	*/
	2135	static int
	2136	workq_reqthreads(struct proc *p, uint32_t reqcount, pthread_priority_t pp)
	2137	{
	2138	thread_qos_t qos = _pthread_priority_thread_qos(pp);
	2139	struct workqueue *wq = proc_get_wqptr(p);
	2140	uint32_t unpaced, upcall_flags = WQ_FLAG_THREAD_NEWSPI;
	2141
	2142	if (wq == NULL \|\| reqcount <= 0 \|\| reqcount > UINT16_MAX \|\|
0a7de745	2143	qos == THREAD_QOS_UNSPECIFIED) {
d9a64523 A	2144	return EINVAL;
	2145	}
	2146
	2147	WQ_TRACE_WQ(TRACE_wq_wqops_reqthreads \| DBG_FUNC_NONE,
0a7de745	2148	wq, reqcount, pp, 0, 0);
d9a64523 A	2149
	2150	workq_threadreq_t req = zalloc(workq_zone_threadreq);
	2151	priority_queue_entry_init(&req->tr_entry);
cb323159	2152	req->tr_state = WORKQ_TR_STATE_NEW;
d9a64523 A	2153	req->tr_flags = 0;
	2154	req->tr_qos = qos;
	2155
	2156	if (pp & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) {
cb323159	2157	req->tr_flags \|= WORKQ_TR_FLAG_OVERCOMMIT;
d9a64523 A	2158	upcall_flags \|= WQ_FLAG_THREAD_OVERCOMMIT;
	2159	}
	2160
	2161	WQ_TRACE_WQ(TRACE_wq_thread_request_initiate \| DBG_FUNC_NONE,
0a7de745	2162	wq, workq_trace_req_id(req), req->tr_qos, reqcount, 0);
d9a64523 A	2163
	2164	workq_lock_spin(wq);
	2165	do {
	2166	if (_wq_exiting(wq)) {
	2167	goto exiting;
	2168	}
	2169
	2170	/*
	2171	* When userspace is asking for parallelism, wakeup up to (reqcount - 1)
	2172	* threads without pacing, to inform the scheduler of that workload.
	2173	*
	2174	* The last requests, or the ones that failed the admission checks are
	2175	* enqueued and go through the regular creator codepath.
	2176	*
	2177	* If there aren't enough threads, add one, but re-evaluate everything
	2178	* as conditions may now have changed.
	2179	*/
cb323159	2180	if (reqcount > 1 && (req->tr_flags & WORKQ_TR_FLAG_OVERCOMMIT) == 0) {
d9a64523 A	2181	unpaced = workq_constrained_allowance(wq, qos, NULL, false);
	2182	if (unpaced >= reqcount - 1) {
	2183	unpaced = reqcount - 1;
	2184	}
	2185	} else {
	2186	unpaced = reqcount - 1;
	2187	}
	2188
	2189	/*
	2190	* This path does not currently handle custom workloop parameters
	2191	* when creating threads for parallelism.
	2192	*/
cb323159	2193	assert(!(req->tr_flags & WORKQ_TR_FLAG_WL_PARAMS));
d9a64523 A	2194
	2195	/*
	2196	* This is a trimmed down version of workq_threadreq_bind_and_unlock()
	2197	*/
	2198	while (unpaced > 0 && wq->wq_thidlecount) {
cb323159 A	2199	struct uthread *uth;
	2200	bool needs_wakeup;
	2201	uint8_t uu_flags = UT_WORKQ_EARLY_BOUND;
	2202
	2203	if (req->tr_flags & WORKQ_TR_FLAG_OVERCOMMIT) {
	2204	uu_flags \|= UT_WORKQ_OVERCOMMIT;
	2205	}
	2206
	2207	uth = workq_pop_idle_thread(wq, uu_flags, &needs_wakeup);
d9a64523 A	2208
	2209	_wq_thactive_inc(wq, qos);
	2210	wq->wq_thscheduled_count[_wq_bucket(qos)]++;
cb323159	2211	workq_thread_reset_pri(wq, uth, req, /unpark/ true);
d9a64523 A	2212	wq->wq_fulfilled++;
d9a64523 A	2213
d9a64523 A	2214	uth->uu_save.uus_workq_park_data.upcall_flags = upcall_flags;
d9a64523 A	2215	uth->uu_save.uus_workq_park_data.thread_request = req;
cb323159 A	2216	if (needs_wakeup) {
	2217	workq_thread_wakeup(uth);
	2218	}
d9a64523 A	2219	unpaced--;
	2220	reqcount--;
	2221	}
	2222	} while (unpaced && wq->wq_nthreads < wq_max_threads &&
0a7de745	2223	workq_add_new_idle_thread(p, wq));
d9a64523 A	2224
	2225	if (_wq_exiting(wq)) {
	2226	goto exiting;
	2227	}
	2228
f427ee49	2229	req->tr_count = (uint16_t)reqcount;
d9a64523 A	2230	if (workq_threadreq_enqueue(wq, req)) {
	2231	/* This can drop the workqueue lock, and take it again */
	2232	workq_schedule_creator(p, wq, WORKQ_THREADREQ_CAN_CREATE_THREADS);
	2233	}
	2234	workq_unlock(wq);
	2235	return 0;
	2236
	2237	exiting:
	2238	workq_unlock(wq);
	2239	zfree(workq_zone_threadreq, req);
	2240	return ECANCELED;
	2241	}
	2242
	2243	bool
cb323159 A	2244	workq_kern_threadreq_initiate(struct proc *p, workq_threadreq_t req,
	2245	struct turnstile *workloop_ts, thread_qos_t qos,
	2246	workq_kern_threadreq_flags_t flags)
d9a64523 A	2247	{
d9a64523 A	2248	struct workqueue *wq = proc_get_wqptr_fast(p);
d9a64523	2249	struct uthread *uth = NULL;
d9a64523	2250
cb323159	2251	assert(req->tr_flags & (WORKQ_TR_FLAG_WORKLOOP \| WORKQ_TR_FLAG_KEVENT));
d9a64523	2252
cb323159	2253	if (req->tr_flags & WORKQ_TR_FLAG_WL_OUTSIDE_QOS) {
d9a64523	2254	workq_threadreq_param_t trp = kqueue_threadreq_workloop_param(req);
cb323159 A	2255	qos = thread_workq_qos_for_pri(trp.trp_pri);
	2256	if (qos == THREAD_QOS_UNSPECIFIED) {
	2257	qos = WORKQ_THREAD_QOS_ABOVEUI;
d9a64523	2258	}
d9a64523 A	2259	}
d9a64523 A	2260
cb323159	2261	assert(req->tr_state == WORKQ_TR_STATE_IDLE);
d9a64523 A	2262	priority_queue_entry_init(&req->tr_entry);
d9a64523 A	2263	req->tr_count = 1;
cb323159	2264	req->tr_state = WORKQ_TR_STATE_NEW;
d9a64523 A	2265	req->tr_qos = qos;
	2266
	2267	WQ_TRACE_WQ(TRACE_wq_thread_request_initiate \| DBG_FUNC_NONE, wq,
0a7de745	2268	workq_trace_req_id(req), qos, 1, 0);
d9a64523 A	2269
	2270	if (flags & WORKQ_THREADREQ_ATTEMPT_REBIND) {
	2271	/*
	2272	* we're called back synchronously from the context of
	2273	* kqueue_threadreq_unbind from within workq_thread_return()
	2274	* we can try to match up this thread with this request !
	2275	*/
	2276	uth = current_uthread();
	2277	assert(uth->uu_kqr_bound == NULL);
	2278	}
	2279
	2280	workq_lock_spin(wq);
	2281	if (_wq_exiting(wq)) {
cb323159	2282	req->tr_state = WORKQ_TR_STATE_IDLE;
d9a64523 A	2283	workq_unlock(wq);
	2284	return false;
	2285	}
	2286
	2287	if (uth && workq_threadreq_admissible(wq, uth, req)) {
	2288	assert(uth != wq->wq_creator);
cb323159 A	2289	if (uth->uu_workq_pri.qos_bucket != req->tr_qos) {
	2290	_wq_thactive_move(wq, uth->uu_workq_pri.qos_bucket, req->tr_qos);
	2291	workq_thread_reset_pri(wq, uth, req, /unpark/ false);
	2292	}
	2293	/*
	2294	* We're called from workq_kern_threadreq_initiate()
	2295	* due to an unbind, with the kq req held.
	2296	*/
	2297	WQ_TRACE_WQ(TRACE_wq_thread_logical_run \| DBG_FUNC_START, wq,
	2298	workq_trace_req_id(req), 0, 0, 0);
	2299	wq->wq_fulfilled++;
	2300	kqueue_threadreq_bind(p, req, uth->uu_thread, 0);
d9a64523 A	2301	} else {
	2302	if (workloop_ts) {
	2303	workq_perform_turnstile_operation_locked(wq, ^{
	2304	turnstile_update_inheritor(workloop_ts, wq->wq_turnstile,
0a7de745	2305	TURNSTILE_IMMEDIATE_UPDATE \| TURNSTILE_INHERITOR_TURNSTILE);
d9a64523	2306	turnstile_update_inheritor_complete(workloop_ts,
0a7de745	2307	TURNSTILE_INTERLOCK_HELD);
d9a64523 A	2308	});
	2309	}
	2310	if (workq_threadreq_enqueue(wq, req)) {
	2311	workq_schedule_creator(p, wq, flags);
	2312	}
d9a64523 A	2313	}
d9a64523 A	2314
cb323159 A	2315	workq_unlock(wq);
cb323159 A	2316
d9a64523 A	2317	return true;
	2318	}
	2319
	2320	void
cb323159 A	2321	workq_kern_threadreq_modify(struct proc *p, workq_threadreq_t req,
cb323159 A	2322	thread_qos_t qos, workq_kern_threadreq_flags_t flags)
d9a64523 A	2323	{
d9a64523 A	2324	struct workqueue *wq = proc_get_wqptr_fast(p);
cb323159	2325	bool make_overcommit = false;
d9a64523	2326
cb323159	2327	if (req->tr_flags & WORKQ_TR_FLAG_WL_OUTSIDE_QOS) {
d9a64523 A	2328	/* Requests outside-of-QoS shouldn't accept modify operations */
	2329	return;
	2330	}
	2331
	2332	workq_lock_spin(wq);
	2333
	2334	assert(req->tr_qos != WORKQ_THREAD_QOS_MANAGER);
cb323159	2335	assert(req->tr_flags & (WORKQ_TR_FLAG_KEVENT \| WORKQ_TR_FLAG_WORKLOOP));
d9a64523	2336
cb323159 A	2337	if (req->tr_state == WORKQ_TR_STATE_BINDING) {
cb323159 A	2338	kqueue_threadreq_bind(p, req, req->tr_thread, 0);
d9a64523 A	2339	workq_unlock(wq);
	2340	return;
	2341	}
	2342
cb323159 A	2343	if (flags & WORKQ_THREADREQ_MAKE_OVERCOMMIT) {
	2344	make_overcommit = (req->tr_flags & WORKQ_TR_FLAG_OVERCOMMIT) == 0;
	2345	}
d9a64523	2346
cb323159	2347	if (_wq_exiting(wq) \|\| (req->tr_qos == qos && !make_overcommit)) {
d9a64523 A	2348	workq_unlock(wq);
	2349	return;
	2350	}
	2351
	2352	assert(req->tr_count == 1);
cb323159	2353	if (req->tr_state != WORKQ_TR_STATE_QUEUED) {
d9a64523 A	2354	panic("Invalid thread request (%p) state %d", req, req->tr_state);
	2355	}
	2356
	2357	WQ_TRACE_WQ(TRACE_wq_thread_request_modify \| DBG_FUNC_NONE, wq,
0a7de745	2358	workq_trace_req_id(req), qos, 0, 0);
d9a64523	2359
f427ee49	2360	struct priority_queue_sched_max *pq = workq_priority_queue_for_req(wq, req);
d9a64523 A	2361	workq_threadreq_t req_max;
	2362
	2363	/*
	2364	* Stage 1: Dequeue the request from its priority queue.
	2365	*
	2366	* If we dequeue the root item of the constrained priority queue,
	2367	* maintain the best constrained request qos invariant.
	2368	*/
f427ee49	2369	if (priority_queue_remove(pq, &req->tr_entry)) {
cb323159	2370	if ((req->tr_flags & WORKQ_TR_FLAG_OVERCOMMIT) == 0) {
d9a64523 A	2371	_wq_thactive_refresh_best_constrained_req_qos(wq);
	2372	}
	2373	}
	2374
	2375	/*
	2376	* Stage 2: Apply changes to the thread request
	2377	*
	2378	* If the item will not become the root of the priority queue it belongs to,
	2379	* then we need to wait in line, just enqueue and return quickly.
	2380	*/
cb323159 A	2381	if (__improbable(make_overcommit)) {
cb323159 A	2382	req->tr_flags ^= WORKQ_TR_FLAG_OVERCOMMIT;
d9a64523 A	2383	pq = workq_priority_queue_for_req(wq, req);
	2384	}
	2385	req->tr_qos = qos;
	2386
	2387	req_max = priority_queue_max(pq, struct workq_threadreq_s, tr_entry);
	2388	if (req_max && req_max->tr_qos >= qos) {
f427ee49 A	2389	priority_queue_entry_set_sched_pri(pq, &req->tr_entry,
	2390	workq_priority_for_req(req), false);
	2391	priority_queue_insert(pq, &req->tr_entry);
d9a64523 A	2392	workq_unlock(wq);
	2393	return;
	2394	}
	2395
	2396	/*
	2397	* Stage 3: Reevaluate whether we should run the thread request.
	2398	*
	2399	* Pretend the thread request is new again:
	2400	* - adjust wq_reqcount to not count it anymore.
cb323159	2401	* - make its state WORKQ_TR_STATE_NEW (so that workq_threadreq_bind_and_unlock
d9a64523 A	2402	* properly attempts a synchronous bind)
	2403	*/
	2404	wq->wq_reqcount--;
cb323159	2405	req->tr_state = WORKQ_TR_STATE_NEW;
d9a64523 A	2406	if (workq_threadreq_enqueue(wq, req)) {
	2407	workq_schedule_creator(p, wq, flags);
	2408	}
	2409	workq_unlock(wq);
	2410	}
	2411
	2412	void
	2413	workq_kern_threadreq_lock(struct proc *p)
	2414	{
	2415	workq_lock_spin(proc_get_wqptr_fast(p));
	2416	}
	2417
	2418	void
	2419	workq_kern_threadreq_unlock(struct proc *p)
	2420	{
	2421	workq_unlock(proc_get_wqptr_fast(p));
	2422	}
	2423
	2424	void
cb323159	2425	workq_kern_threadreq_update_inheritor(struct proc *p, workq_threadreq_t req,
0a7de745 A	2426	thread_t owner, struct turnstile *wl_ts,
0a7de745 A	2427	turnstile_update_flags_t flags)
d9a64523 A	2428	{
d9a64523 A	2429	struct workqueue *wq = proc_get_wqptr_fast(p);
d9a64523 A	2430	turnstile_inheritor_t inheritor;
	2431
	2432	assert(req->tr_qos != WORKQ_THREAD_QOS_MANAGER);
cb323159	2433	assert(req->tr_flags & WORKQ_TR_FLAG_WORKLOOP);
d9a64523 A	2434	workq_lock_held(wq);
d9a64523 A	2435
cb323159 A	2436	if (req->tr_state == WORKQ_TR_STATE_BINDING) {
cb323159 A	2437	kqueue_threadreq_bind(p, req, req->tr_thread,
0a7de745	2438	KQUEUE_THREADERQ_BIND_NO_INHERITOR_UPDATE);
d9a64523 A	2439	return;
	2440	}
	2441
	2442	if (_wq_exiting(wq)) {
	2443	inheritor = TURNSTILE_INHERITOR_NULL;
	2444	} else {
cb323159	2445	if (req->tr_state != WORKQ_TR_STATE_QUEUED) {
d9a64523 A	2446	panic("Invalid thread request (%p) state %d", req, req->tr_state);
	2447	}
	2448
	2449	if (owner) {
	2450	inheritor = owner;
	2451	flags \|= TURNSTILE_INHERITOR_THREAD;
	2452	} else {
	2453	inheritor = wq->wq_turnstile;
	2454	flags \|= TURNSTILE_INHERITOR_TURNSTILE;
	2455	}
	2456	}
	2457
	2458	workq_perform_turnstile_operation_locked(wq, ^{
	2459	turnstile_update_inheritor(wl_ts, inheritor, flags);
	2460	});
	2461	}
	2462
	2463	void
cb323159	2464	workq_kern_threadreq_redrive(struct proc *p, workq_kern_threadreq_flags_t flags)
d9a64523 A	2465	{
	2466	struct workqueue *wq = proc_get_wqptr_fast(p);
	2467
	2468	workq_lock_spin(wq);
	2469	workq_schedule_creator(p, wq, flags);
	2470	workq_unlock(wq);
	2471	}
	2472
	2473	void
	2474	workq_schedule_creator_turnstile_redrive(struct workqueue *wq, bool locked)
	2475	{
cb323159 A	2476	if (locked) {
	2477	workq_schedule_creator(NULL, wq, WORKQ_THREADREQ_NONE);
	2478	} else {
	2479	workq_schedule_immediate_thread_creation(wq);
0a7de745	2480	}
d9a64523 A	2481	}
	2482
	2483	static int
	2484	workq_thread_return(struct proc p, struct workq_kernreturn_args uap,
0a7de745	2485	struct workqueue *wq)
d9a64523 A	2486	{
	2487	thread_t th = current_thread();
	2488	struct uthread *uth = get_bsdthread_info(th);
cb323159	2489	workq_threadreq_t kqr = uth->uu_kqr_bound;
d9a64523 A	2490	workq_threadreq_param_t trp = { };
	2491	int nevents = uap->affinity, error;
	2492	user_addr_t eventlist = uap->item;
	2493
	2494	if (((thread_get_tag(th) & THREAD_TAG_WORKQUEUE) == 0) \|\|
0a7de745	2495	(uth->uu_workq_flags & UT_WORKQ_DYING)) {
d9a64523 A	2496	return EINVAL;
	2497	}
	2498
	2499	if (eventlist && nevents && kqr == NULL) {
	2500	return EINVAL;
	2501	}
	2502
	2503	/* reset signal mask on the workqueue thread to default state */
	2504	if (uth->uu_sigmask != (sigset_t)(~workq_threadmask)) {
	2505	proc_lock(p);
	2506	uth->uu_sigmask = ~workq_threadmask;
	2507	proc_unlock(p);
	2508	}
	2509
cb323159	2510	if (kqr && kqr->tr_flags & WORKQ_TR_FLAG_WL_PARAMS) {
d9a64523 A	2511	/*
	2512	* Ensure we store the threadreq param before unbinding
	2513	* the kqr from this thread.
	2514	*/
cb323159	2515	trp = kqueue_threadreq_workloop_param(kqr);
d9a64523 A	2516	}
d9a64523 A	2517
cb323159 A	2518	/*
	2519	* Freeze thee base pri while we decide the fate of this thread.
	2520	*
	2521	* Either:
	2522	* - we return to user and kevent_cleanup will have unfrozen the base pri,
	2523	* - or we proceed to workq_select_threadreq_or_park_and_unlock() who will.
	2524	*/
	2525	thread_freeze_base_pri(th);
	2526
d9a64523 A	2527	if (kqr) {
d9a64523 A	2528	uint32_t upcall_flags = WQ_FLAG_THREAD_NEWSPI \| WQ_FLAG_THREAD_REUSE;
cb323159	2529	if (kqr->tr_flags & WORKQ_TR_FLAG_WORKLOOP) {
d9a64523 A	2530	upcall_flags \|= WQ_FLAG_THREAD_WORKLOOP \| WQ_FLAG_THREAD_KEVENT;
	2531	} else {
	2532	upcall_flags \|= WQ_FLAG_THREAD_KEVENT;
	2533	}
	2534	if (uth->uu_workq_pri.qos_bucket == WORKQ_THREAD_QOS_MANAGER) {
	2535	upcall_flags \|= WQ_FLAG_THREAD_EVENT_MANAGER;
	2536	} else {
	2537	if (uth->uu_workq_flags & UT_WORKQ_OVERCOMMIT) {
	2538	upcall_flags \|= WQ_FLAG_THREAD_OVERCOMMIT;
	2539	}
	2540	if (uth->uu_workq_flags & UT_WORKQ_OUTSIDE_QOS) {
	2541	upcall_flags \|= WQ_FLAG_THREAD_OUTSIDEQOS;
	2542	} else {
	2543	upcall_flags \|= uth->uu_workq_pri.qos_req \|
0a7de745	2544	WQ_FLAG_THREAD_PRIO_QOS;
d9a64523 A	2545	}
	2546	}
	2547
	2548	error = pthread_functions->workq_handle_stack_events(p, th,
0a7de745 A	2549	get_task_map(p->task), uth->uu_workq_stackaddr,
	2550	uth->uu_workq_thport, eventlist, nevents, upcall_flags);
	2551	if (error) {
cb323159	2552	assert(uth->uu_kqr_bound == kqr);
0a7de745 A	2553	return error;
0a7de745 A	2554	}
d9a64523 A	2555
	2556	// pthread is supposed to pass KEVENT_FLAG_PARKING here
	2557	// which should cause the above call to either:
	2558	// - not return
	2559	// - return an error
	2560	// - return 0 and have unbound properly
	2561	assert(uth->uu_kqr_bound == NULL);
	2562	}
	2563
	2564	WQ_TRACE_WQ(TRACE_wq_runthread \| DBG_FUNC_END, wq, uap->options, 0, 0, 0);
	2565
	2566	thread_sched_call(th, NULL);
	2567	thread_will_park_or_terminate(th);
	2568	#if CONFIG_WORKLOOP_DEBUG
	2569	UU_KEVENT_HISTORY_WRITE_ENTRY(uth, { .uu_error = -1, });
	2570	#endif
	2571
	2572	workq_lock_spin(wq);
	2573	WQ_TRACE_WQ(TRACE_wq_thread_logical_run \| DBG_FUNC_END, wq, 0, 0, 0, 0);
	2574	uth->uu_save.uus_workq_park_data.workloop_params = trp.trp_value;
cb323159 A	2575	workq_select_threadreq_or_park_and_unlock(p, wq, uth,
cb323159 A	2576	WQ_SETUP_CLEAR_VOUCHER);
d9a64523 A	2577	__builtin_unreachable();
	2578	}
	2579
	2580	/**
	2581	* Multiplexed call to interact with the workqueue mechanism
	2582	*/
	2583	int
	2584	workq_kernreturn(struct proc p, struct workq_kernreturn_args uap, int32_t *retval)
	2585	{
	2586	int options = uap->options;
	2587	int arg2 = uap->affinity;
	2588	int arg3 = uap->prio;
	2589	struct workqueue *wq = proc_get_wqptr(p);
	2590	int error = 0;
	2591
	2592	if ((p->p_lflag & P_LREGISTER) == 0) {
	2593	return EINVAL;
	2594	}
	2595
	2596	switch (options) {
	2597	case WQOPS_QUEUE_NEWSPISUPP: {
	2598	/*
	2599	* arg2 = offset of serialno into dispatch queue
	2600	* arg3 = kevent support
	2601	*/
	2602	int offset = arg2;
0a7de745	2603	if (arg3 & 0x01) {
d9a64523 A	2604	// If we get here, then userspace has indicated support for kevent delivery.
	2605	}
	2606
	2607	p->p_dispatchqueue_serialno_offset = (uint64_t)offset;
	2608	break;
	2609	}
	2610	case WQOPS_QUEUE_REQTHREADS: {
	2611	/*
	2612	* arg2 = number of threads to start
	2613	* arg3 = priority
	2614	*/
	2615	error = workq_reqthreads(p, arg2, arg3);
	2616	break;
	2617	}
	2618	case WQOPS_SET_EVENT_MANAGER_PRIORITY: {
	2619	/*
	2620	* arg2 = priority for the manager thread
	2621	*
	2622	* if _PTHREAD_PRIORITY_SCHED_PRI_FLAG is set,
	2623	* the low bits of the value contains a scheduling priority
	2624	* instead of a QOS value
	2625	*/
	2626	pthread_priority_t pri = arg2;
	2627
	2628	if (wq == NULL) {
	2629	error = EINVAL;
	2630	break;
	2631	}
	2632
	2633	/*
	2634	* Normalize the incoming priority so that it is ordered numerically.
	2635	*/
	2636	if (pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) {
	2637	pri &= (_PTHREAD_PRIORITY_SCHED_PRI_MASK \|
0a7de745	2638	_PTHREAD_PRIORITY_SCHED_PRI_FLAG);
d9a64523 A	2639	} else {
	2640	thread_qos_t qos = _pthread_priority_thread_qos(pri);
	2641	int relpri = _pthread_priority_relpri(pri);
	2642	if (relpri > 0 \|\| relpri < THREAD_QOS_MIN_TIER_IMPORTANCE \|\|
0a7de745	2643	qos == THREAD_QOS_UNSPECIFIED) {
d9a64523 A	2644	error = EINVAL;
	2645	break;
	2646	}
	2647	pri &= ~_PTHREAD_PRIORITY_FLAGS_MASK;
	2648	}
	2649
	2650	/*
	2651	* If userspace passes a scheduling priority, that wins over any QoS.
	2652	* Userspace should takes care not to lower the priority this way.
	2653	*/
	2654	workq_lock_spin(wq);
	2655	if (wq->wq_event_manager_priority < (uint32_t)pri) {
	2656	wq->wq_event_manager_priority = (uint32_t)pri;
	2657	}
	2658	workq_unlock(wq);
	2659	break;
	2660	}
	2661	case WQOPS_THREAD_KEVENT_RETURN:
	2662	case WQOPS_THREAD_WORKLOOP_RETURN:
	2663	case WQOPS_THREAD_RETURN: {
	2664	error = workq_thread_return(p, uap, wq);
	2665	break;
	2666	}
	2667
	2668	case WQOPS_SHOULD_NARROW: {
	2669	/*
	2670	* arg2 = priority to test
	2671	* arg3 = unused
	2672	*/
	2673	thread_t th = current_thread();
	2674	struct uthread *uth = get_bsdthread_info(th);
	2675	if (((thread_get_tag(th) & THREAD_TAG_WORKQUEUE) == 0) \|\|
0a7de745	2676	(uth->uu_workq_flags & (UT_WORKQ_DYING \| UT_WORKQ_OVERCOMMIT))) {
d9a64523 A	2677	error = EINVAL;
	2678	break;
	2679	}
	2680
	2681	thread_qos_t qos = _pthread_priority_thread_qos(arg2);
	2682	if (qos == THREAD_QOS_UNSPECIFIED) {
	2683	error = EINVAL;
	2684	break;
	2685	}
	2686	workq_lock_spin(wq);
	2687	bool should_narrow = !workq_constrained_allowance(wq, qos, uth, false);
	2688	workq_unlock(wq);
	2689
	2690	*retval = should_narrow;
	2691	break;
	2692	}
cb323159 A	2693	case WQOPS_SETUP_DISPATCH: {
	2694	/*
	2695	* item = pointer to workq_dispatch_config structure
	2696	* arg2 = sizeof(item)
	2697	*/
	2698	struct workq_dispatch_config cfg;
	2699	bzero(&cfg, sizeof(cfg));
	2700
	2701	error = copyin(uap->item, &cfg, MIN(sizeof(cfg), (unsigned long) arg2));
	2702	if (error) {
	2703	break;
	2704	}
	2705
	2706	if (cfg.wdc_flags & ~WORKQ_DISPATCH_SUPPORTED_FLAGS \|\|
	2707	cfg.wdc_version < WORKQ_DISPATCH_MIN_SUPPORTED_VERSION) {
	2708	error = ENOTSUP;
	2709	break;
	2710	}
	2711
	2712	/* Load fields from version 1 */
	2713	p->p_dispatchqueue_serialno_offset = cfg.wdc_queue_serialno_offs;
	2714
	2715	/* Load fields from version 2 */
	2716	if (cfg.wdc_version >= 2) {
	2717	p->p_dispatchqueue_label_offset = cfg.wdc_queue_label_offs;
	2718	}
	2719
	2720	break;
	2721	}
d9a64523 A	2722	default:
	2723	error = EINVAL;
	2724	break;
	2725	}
	2726
0a7de745	2727	return error;
d9a64523 A	2728	}
	2729
	2730	/*
	2731	* We have no work to do, park ourselves on the idle list.
	2732	*
	2733	* Consumes the workqueue lock and does not return.
	2734	*/
	2735	__attribute__((noreturn, noinline))
	2736	static void
cb323159 A	2737	workq_park_and_unlock(proc_t p, struct workqueue wq, struct uthread uth,
cb323159 A	2738	uint32_t setup_flags)
d9a64523 A	2739	{
	2740	assert(uth == current_uthread());
	2741	assert(uth->uu_kqr_bound == NULL);
cb323159	2742	workq_push_idle_thread(p, wq, uth, setup_flags); // may not return
d9a64523 A	2743
	2744	workq_thread_reset_cpupercent(NULL, uth);
	2745
cb323159 A	2746	if ((uth->uu_workq_flags & UT_WORKQ_IDLE_CLEANUP) &&
cb323159 A	2747	!(uth->uu_workq_flags & UT_WORKQ_DYING)) {
d9a64523 A	2748	workq_unlock(wq);
	2749
	2750	/*
	2751	* workq_push_idle_thread() will unset `has_stack`
	2752	* if it wants us to free the stack before parking.
	2753	*/
	2754	if (!uth->uu_save.uus_workq_park_data.has_stack) {
	2755	pthread_functions->workq_markfree_threadstack(p, uth->uu_thread,
0a7de745	2756	get_task_map(p->task), uth->uu_workq_stackaddr);
d9a64523 A	2757	}
	2758
	2759	/*
	2760	* When we remove the voucher from the thread, we may lose our importance
	2761	* causing us to get preempted, so we do this after putting the thread on
	2762	* the idle list. Then, when we get our importance back we'll be able to
	2763	* use this thread from e.g. the kevent call out to deliver a boosting
	2764	* message.
	2765	*/
	2766	__assert_only kern_return_t kr;
	2767	kr = thread_set_voucher_name(MACH_PORT_NULL);
	2768	assert(kr == KERN_SUCCESS);
	2769
	2770	workq_lock_spin(wq);
	2771	uth->uu_workq_flags &= ~UT_WORKQ_IDLE_CLEANUP;
cb323159	2772	setup_flags &= ~WQ_SETUP_CLEAR_VOUCHER;
d9a64523 A	2773	}
d9a64523 A	2774
f427ee49 A	2775	WQ_TRACE_WQ(TRACE_wq_thread_logical_run \| DBG_FUNC_END, wq, 0, 0, 0, 0);
f427ee49 A	2776
d9a64523 A	2777	if (uth->uu_workq_flags & UT_WORKQ_RUNNING) {
	2778	/*
	2779	* While we'd dropped the lock to unset our voucher, someone came
	2780	* around and made us runnable. But because we weren't waiting on the
	2781	* event their thread_wakeup() was ineffectual. To correct for that,
	2782	* we just run the continuation ourselves.
	2783	*/
cb323159	2784	workq_unpark_select_threadreq_or_park_and_unlock(p, wq, uth, setup_flags);
d9a64523 A	2785	__builtin_unreachable();
	2786	}
	2787
	2788	if (uth->uu_workq_flags & UT_WORKQ_DYING) {
	2789	workq_unpark_for_death_and_unlock(p, wq, uth,
cb323159	2790	WORKQ_UNPARK_FOR_DEATH_WAS_IDLE, setup_flags);
d9a64523 A	2791	__builtin_unreachable();
	2792	}
	2793
	2794	thread_set_pending_block_hint(uth->uu_thread, kThreadWaitParkedWorkQueue);
	2795	assert_wait(workq_parked_wait_event(uth), THREAD_INTERRUPTIBLE);
	2796	workq_unlock(wq);
d9a64523 A	2797	thread_block(workq_unpark_continue);
	2798	__builtin_unreachable();
	2799	}
	2800
	2801	static inline bool
	2802	workq_may_start_event_mgr_thread(struct workqueue wq, struct uthread uth)
	2803	{
	2804	/*
	2805	* There's an event manager request and either:
	2806	* - no event manager currently running
	2807	* - we are re-using the event manager
	2808	*/
	2809	return wq->wq_thscheduled_count[_wq_bucket(WORKQ_THREAD_QOS_MANAGER)] == 0 \|\|
0a7de745	2810	(uth && uth->uu_workq_pri.qos_bucket == WORKQ_THREAD_QOS_MANAGER);
d9a64523 A	2811	}
	2812
	2813	static uint32_t
	2814	workq_constrained_allowance(struct workqueue *wq, thread_qos_t at_qos,
0a7de745	2815	struct uthread *uth, bool may_start_timer)
d9a64523 A	2816	{
	2817	assert(at_qos != WORKQ_THREAD_QOS_MANAGER);
	2818	uint32_t count = 0;
	2819
	2820	uint32_t max_count = wq->wq_constrained_threads_scheduled;
	2821	if (uth && (uth->uu_workq_flags & UT_WORKQ_OVERCOMMIT) == 0) {
	2822	/*
	2823	* don't count the current thread as scheduled
	2824	*/
	2825	assert(max_count > 0);
	2826	max_count--;
	2827	}
	2828	if (max_count >= wq_max_constrained_threads) {
	2829	WQ_TRACE_WQ(TRACE_wq_constrained_admission \| DBG_FUNC_NONE, wq, 1,
0a7de745 A	2830	wq->wq_constrained_threads_scheduled,
0a7de745 A	2831	wq_max_constrained_threads, 0);
d9a64523 A	2832	/*
	2833	* we need 1 or more constrained threads to return to the kernel before
	2834	* we can dispatch additional work
	2835	*/
	2836	return 0;
	2837	}
	2838	max_count -= wq_max_constrained_threads;
	2839
	2840	/*
	2841	* Compute a metric for many how many threads are active. We find the
c3c9b80d A	2842	* highest priority request outstanding and then add up the number of active
	2843	* threads in that and all higher-priority buckets. We'll also add any
	2844	* "busy" threads which are not currently active but blocked recently enough
	2845	* that we can't be sure that they won't be unblocked soon and start
	2846	* being active again.
	2847	*
	2848	* We'll then compare this metric to our max concurrency to decide whether
	2849	* to add a new thread.
d9a64523 A	2850	*/
	2851
	2852	uint32_t busycount, thactive_count;
	2853
	2854	thactive_count = _wq_thactive_aggregate_downto_qos(wq, _wq_thactive(wq),
0a7de745	2855	at_qos, &busycount, NULL);
d9a64523 A	2856
d9a64523 A	2857	if (uth && uth->uu_workq_pri.qos_bucket != WORKQ_THREAD_QOS_MANAGER &&
0a7de745	2858	at_qos <= uth->uu_workq_pri.qos_bucket) {
d9a64523 A	2859	/*
	2860	* Don't count this thread as currently active, but only if it's not
	2861	* a manager thread, as _wq_thactive_aggregate_downto_qos ignores active
	2862	* managers.
	2863	*/
	2864	assert(thactive_count > 0);
	2865	thactive_count--;
	2866	}
	2867
	2868	count = wq_max_parallelism[_wq_bucket(at_qos)];
	2869	if (count > thactive_count + busycount) {
	2870	count -= thactive_count + busycount;
	2871	WQ_TRACE_WQ(TRACE_wq_constrained_admission \| DBG_FUNC_NONE, wq, 2,
0a7de745	2872	thactive_count, busycount, 0);
d9a64523 A	2873	return MIN(count, max_count);
	2874	} else {
	2875	WQ_TRACE_WQ(TRACE_wq_constrained_admission \| DBG_FUNC_NONE, wq, 3,
0a7de745	2876	thactive_count, busycount, 0);
d9a64523 A	2877	}
d9a64523 A	2878
c3c9b80d	2879	if (may_start_timer) {
d9a64523 A	2880	/*
	2881	* If this is called from the add timer, we won't have another timer
	2882	* fire when the thread exits the "busy" state, so rearm the timer.
	2883	*/
	2884	workq_schedule_delayed_thread_creation(wq, 0);
	2885	}
	2886
	2887	return 0;
	2888	}
	2889
	2890	static bool
	2891	workq_threadreq_admissible(struct workqueue wq, struct uthread uth,
0a7de745	2892	workq_threadreq_t req)
d9a64523 A	2893	{
	2894	if (req->tr_qos == WORKQ_THREAD_QOS_MANAGER) {
	2895	return workq_may_start_event_mgr_thread(wq, uth);
	2896	}
cb323159	2897	if ((req->tr_flags & WORKQ_TR_FLAG_OVERCOMMIT) == 0) {
d9a64523 A	2898	return workq_constrained_allowance(wq, req->tr_qos, uth, true);
	2899	}
	2900	return true;
	2901	}
	2902
	2903	static workq_threadreq_t
	2904	workq_threadreq_select_for_creator(struct workqueue *wq)
	2905	{
f427ee49	2906	workq_threadreq_t req_qos, req_pri, req_tmp, req_mgr;
d9a64523 A	2907	thread_qos_t qos = THREAD_QOS_UNSPECIFIED;
	2908	uint8_t pri = 0;
	2909
d9a64523 A	2910	/*
	2911	* Compute the best priority request, and ignore the turnstile for now
	2912	*/
	2913
	2914	req_pri = priority_queue_max(&wq->wq_special_queue,
0a7de745	2915	struct workq_threadreq_s, tr_entry);
d9a64523	2916	if (req_pri) {
f427ee49 A	2917	pri = (uint8_t)priority_queue_entry_sched_pri(&wq->wq_special_queue,
	2918	&req_pri->tr_entry);
	2919	}
	2920
	2921	/*
	2922	* Handle the manager thread request. The special queue might yield
	2923	* a higher priority, but the manager always beats the QoS world.
	2924	*/
	2925
	2926	req_mgr = wq->wq_event_manager_threadreq;
	2927	if (req_mgr && workq_may_start_event_mgr_thread(wq, NULL)) {
	2928	uint32_t mgr_pri = wq->wq_event_manager_priority;
	2929
	2930	if (mgr_pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) {
	2931	mgr_pri &= _PTHREAD_PRIORITY_SCHED_PRI_MASK;
	2932	} else {
	2933	mgr_pri = thread_workq_pri_for_qos(
	2934	_pthread_priority_thread_qos(mgr_pri));
	2935	}
	2936
	2937	return mgr_pri >= pri ? req_mgr : req_pri;
d9a64523 A	2938	}
	2939
	2940	/*
	2941	* Compute the best QoS Request, and check whether it beats the "pri" one
	2942	*/
	2943
	2944	req_qos = priority_queue_max(&wq->wq_overcommit_queue,
0a7de745	2945	struct workq_threadreq_s, tr_entry);
d9a64523 A	2946	if (req_qos) {
	2947	qos = req_qos->tr_qos;
	2948	}
	2949
	2950	req_tmp = priority_queue_max(&wq->wq_constrained_queue,
0a7de745	2951	struct workq_threadreq_s, tr_entry);
d9a64523 A	2952
	2953	if (req_tmp && qos < req_tmp->tr_qos) {
	2954	if (pri && pri >= thread_workq_pri_for_qos(req_tmp->tr_qos)) {
	2955	return req_pri;
	2956	}
	2957
	2958	if (workq_constrained_allowance(wq, req_tmp->tr_qos, NULL, true)) {
	2959	/*
	2960	* If the constrained thread request is the best one and passes
	2961	* the admission check, pick it.
	2962	*/
	2963	return req_tmp;
	2964	}
	2965	}
	2966
	2967	if (pri && (!qos \|\| pri >= thread_workq_pri_for_qos(qos))) {
	2968	return req_pri;
	2969	}
	2970
	2971	if (req_qos) {
	2972	return req_qos;
	2973	}
	2974
	2975	/*
	2976	* If we had no eligible request but we have a turnstile push,
	2977	* it must be a non overcommit thread request that failed
	2978	* the admission check.
	2979	*
	2980	* Just fake a BG thread request so that if the push stops the creator
	2981	* priority just drops to 4.
	2982	*/
	2983	if (turnstile_workq_proprietor_of_max_turnstile(wq->wq_turnstile, NULL)) {
	2984	static struct workq_threadreq_s workq_sync_push_fake_req = {
	2985	.tr_qos = THREAD_QOS_BACKGROUND,
	2986	};
	2987
	2988	return &workq_sync_push_fake_req;
	2989	}
	2990
	2991	return NULL;
	2992	}
	2993
	2994	static workq_threadreq_t
	2995	workq_threadreq_select(struct workqueue wq, struct uthread uth)
	2996	{
f427ee49	2997	workq_threadreq_t req_qos, req_pri, req_tmp, req_mgr;
d9a64523 A	2998	uintptr_t proprietor;
	2999	thread_qos_t qos = THREAD_QOS_UNSPECIFIED;
	3000	uint8_t pri = 0;
	3001
0a7de745 A	3002	if (uth == wq->wq_creator) {
	3003	uth = NULL;
	3004	}
d9a64523	3005
d9a64523 A	3006	/*
	3007	* Compute the best priority request (special or turnstile)
	3008	*/
	3009
f427ee49	3010	pri = (uint8_t)turnstile_workq_proprietor_of_max_turnstile(wq->wq_turnstile,
0a7de745	3011	&proprietor);
d9a64523 A	3012	if (pri) {
d9a64523 A	3013	struct kqworkloop kqwl = (struct kqworkloop )proprietor;
cb323159 A	3014	req_pri = &kqwl->kqwl_request;
cb323159 A	3015	if (req_pri->tr_state != WORKQ_TR_STATE_QUEUED) {
d9a64523	3016	panic("Invalid thread request (%p) state %d",
0a7de745	3017	req_pri, req_pri->tr_state);
d9a64523 A	3018	}
	3019	} else {
	3020	req_pri = NULL;
	3021	}
	3022
	3023	req_tmp = priority_queue_max(&wq->wq_special_queue,
0a7de745	3024	struct workq_threadreq_s, tr_entry);
f427ee49	3025	if (req_tmp && pri < priority_queue_entry_sched_pri(&wq->wq_special_queue,
0a7de745	3026	&req_tmp->tr_entry)) {
d9a64523	3027	req_pri = req_tmp;
f427ee49 A	3028	pri = (uint8_t)priority_queue_entry_sched_pri(&wq->wq_special_queue,
	3029	&req_tmp->tr_entry);
	3030	}
	3031
	3032	/*
	3033	* Handle the manager thread request. The special queue might yield
	3034	* a higher priority, but the manager always beats the QoS world.
	3035	*/
	3036
	3037	req_mgr = wq->wq_event_manager_threadreq;
	3038	if (req_mgr && workq_may_start_event_mgr_thread(wq, uth)) {
	3039	uint32_t mgr_pri = wq->wq_event_manager_priority;
	3040
	3041	if (mgr_pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) {
	3042	mgr_pri &= _PTHREAD_PRIORITY_SCHED_PRI_MASK;
	3043	} else {
	3044	mgr_pri = thread_workq_pri_for_qos(
	3045	_pthread_priority_thread_qos(mgr_pri));
	3046	}
	3047
	3048	return mgr_pri >= pri ? req_mgr : req_pri;
d9a64523 A	3049	}
	3050
	3051	/*
	3052	* Compute the best QoS Request, and check whether it beats the "pri" one
	3053	*/
	3054
	3055	req_qos = priority_queue_max(&wq->wq_overcommit_queue,
0a7de745	3056	struct workq_threadreq_s, tr_entry);
d9a64523 A	3057	if (req_qos) {
	3058	qos = req_qos->tr_qos;
	3059	}
	3060
	3061	req_tmp = priority_queue_max(&wq->wq_constrained_queue,
0a7de745	3062	struct workq_threadreq_s, tr_entry);
d9a64523 A	3063
	3064	if (req_tmp && qos < req_tmp->tr_qos) {
	3065	if (pri && pri >= thread_workq_pri_for_qos(req_tmp->tr_qos)) {
	3066	return req_pri;
	3067	}
	3068
	3069	if (workq_constrained_allowance(wq, req_tmp->tr_qos, uth, true)) {
	3070	/*
	3071	* If the constrained thread request is the best one and passes
	3072	* the admission check, pick it.
	3073	*/
	3074	return req_tmp;
	3075	}
	3076	}
	3077
	3078	if (req_pri && (!qos \|\| pri >= thread_workq_pri_for_qos(qos))) {
	3079	return req_pri;
	3080	}
	3081
	3082	return req_qos;
	3083	}
	3084
	3085	/*
	3086	* The creator is an anonymous thread that is counted as scheduled,
	3087	* but otherwise without its scheduler callback set or tracked as active
	3088	* that is used to make other threads.
	3089	*
	3090	* When more requests are added or an existing one is hurried along,
	3091	* a creator is elected and setup, or the existing one overridden accordingly.
	3092	*
	3093	* While this creator is in flight, because no request has been dequeued,
	3094	* already running threads have a chance at stealing thread requests avoiding
	3095	* useless context switches, and the creator once scheduled may not find any
	3096	* work to do and will then just park again.
	3097	*
	3098	* The creator serves the dual purpose of informing the scheduler of work that
	3099	* hasn't be materialized as threads yet, and also as a natural pacing mechanism
	3100	* for thread creation.
	3101	*
	3102	* By being anonymous (and not bound to anything) it means that thread requests
	3103	* can be stolen from this creator by threads already on core yielding more
	3104	* efficient scheduling and reduced context switches.
	3105	*/
	3106	static void
cb323159 A	3107	workq_schedule_creator(proc_t p, struct workqueue *wq,
cb323159 A	3108	workq_kern_threadreq_flags_t flags)
d9a64523 A	3109	{
	3110	workq_threadreq_t req;
	3111	struct uthread *uth;
cb323159	3112	bool needs_wakeup;
d9a64523 A	3113
	3114	workq_lock_held(wq);
	3115	assert(p \|\| (flags & WORKQ_THREADREQ_CAN_CREATE_THREADS) == 0);
	3116
	3117	again:
	3118	uth = wq->wq_creator;
	3119
	3120	if (!wq->wq_reqcount) {
cb323159 A	3121	/*
	3122	* There is no thread request left.
	3123	*
	3124	* If there is a creator, leave everything in place, so that it cleans
	3125	* up itself in workq_push_idle_thread().
	3126	*
	3127	* Else, make sure the turnstile state is reset to no inheritor.
	3128	*/
d9a64523 A	3129	if (uth == NULL) {
	3130	workq_turnstile_update_inheritor(wq, TURNSTILE_INHERITOR_NULL, 0);
	3131	}
	3132	return;
	3133	}
	3134
	3135	req = workq_threadreq_select_for_creator(wq);
	3136	if (req == NULL) {
cb323159 A	3137	/*
	3138	* There isn't a thread request that passes the admission check.
	3139	*
	3140	* If there is a creator, do not touch anything, the creator will sort
	3141	* it out when it runs.
	3142	*
	3143	* Else, set the inheritor to "WORKQ" so that the turnstile propagation
	3144	* code calls us if anything changes.
	3145	*/
	3146	if (uth == NULL) {
d9a64523 A	3147	workq_turnstile_update_inheritor(wq, wq, TURNSTILE_INHERITOR_WORKQ);
	3148	}
	3149	return;
	3150	}
	3151
	3152	if (uth) {
	3153	/*
	3154	* We need to maybe override the creator we already have
	3155	*/
	3156	if (workq_thread_needs_priority_change(req, uth)) {
	3157	WQ_TRACE_WQ(TRACE_wq_creator_select \| DBG_FUNC_NONE,
0a7de745	3158	wq, 1, thread_tid(uth->uu_thread), req->tr_qos, 0);
cb323159	3159	workq_thread_reset_pri(wq, uth, req, /unpark/ true);
d9a64523	3160	}
cb323159	3161	assert(wq->wq_inheritor == uth->uu_thread);
d9a64523 A	3162	} else if (wq->wq_thidlecount) {
	3163	/*
	3164	* We need to unpark a creator thread
	3165	*/
cb323159 A	3166	wq->wq_creator = uth = workq_pop_idle_thread(wq, UT_WORKQ_OVERCOMMIT,
cb323159 A	3167	&needs_wakeup);
4ba76501 A	3168	/* Always reset the priorities on the newly chosen creator */
4ba76501 A	3169	workq_thread_reset_pri(wq, uth, req, /unpark/ true);
d9a64523	3170	workq_turnstile_update_inheritor(wq, uth->uu_thread,
0a7de745	3171	TURNSTILE_INHERITOR_THREAD);
d9a64523	3172	WQ_TRACE_WQ(TRACE_wq_creator_select \| DBG_FUNC_NONE,
0a7de745	3173	wq, 2, thread_tid(uth->uu_thread), req->tr_qos, 0);
d9a64523 A	3174	uth->uu_save.uus_workq_park_data.fulfilled_snapshot = wq->wq_fulfilled;
d9a64523 A	3175	uth->uu_save.uus_workq_park_data.yields = 0;
cb323159 A	3176	if (needs_wakeup) {
	3177	workq_thread_wakeup(uth);
	3178	}
d9a64523 A	3179	} else {
	3180	/*
	3181	* We need to allocate a thread...
	3182	*/
	3183	if (__improbable(wq->wq_nthreads >= wq_max_threads)) {
	3184	/* out of threads, just go away */
cb323159	3185	flags = WORKQ_THREADREQ_NONE;
d9a64523 A	3186	} else if (flags & WORKQ_THREADREQ_SET_AST_ON_FAILURE) {
	3187	act_set_astkevent(current_thread(), AST_KEVENT_REDRIVE_THREADREQ);
	3188	} else if (!(flags & WORKQ_THREADREQ_CAN_CREATE_THREADS)) {
	3189	/* This can drop the workqueue lock, and take it again */
	3190	workq_schedule_immediate_thread_creation(wq);
	3191	} else if (workq_add_new_idle_thread(p, wq)) {
	3192	goto again;
	3193	} else {
	3194	workq_schedule_delayed_thread_creation(wq, 0);
	3195	}
	3196
cb323159 A	3197	/*
	3198	* If the current thread is the inheritor:
	3199	*
	3200	* If we set the AST, then the thread will stay the inheritor until
	3201	* either the AST calls workq_kern_threadreq_redrive(), or it parks
	3202	* and calls workq_push_idle_thread().
	3203	*
	3204	* Else, the responsibility of the thread creation is with a thread-call
	3205	* and we need to clear the inheritor.
	3206	*/
	3207	if ((flags & WORKQ_THREADREQ_SET_AST_ON_FAILURE) == 0 &&
	3208	wq->wq_inheritor == current_thread()) {
	3209	workq_turnstile_update_inheritor(wq, TURNSTILE_INHERITOR_NULL, 0);
	3210	}
	3211	}
	3212	}
	3213
	3214	/**
	3215	* Same as workq_unpark_select_threadreq_or_park_and_unlock,
	3216	* but do not allow early binds.
	3217	*
	3218	* Called with the base pri frozen, will unfreeze it.
	3219	*/
	3220	__attribute__((noreturn, noinline))
	3221	static void
	3222	workq_select_threadreq_or_park_and_unlock(proc_t p, struct workqueue *wq,
	3223	struct uthread *uth, uint32_t setup_flags)
	3224	{
	3225	workq_threadreq_t req = NULL;
	3226	bool is_creator = (wq->wq_creator == uth);
	3227	bool schedule_creator = false;
	3228
	3229	if (__improbable(_wq_exiting(wq))) {
	3230	WQ_TRACE_WQ(TRACE_wq_select_threadreq \| DBG_FUNC_NONE, wq, 0, 0, 0, 0);
	3231	goto park;
	3232	}
	3233
	3234	if (wq->wq_reqcount == 0) {
	3235	WQ_TRACE_WQ(TRACE_wq_select_threadreq \| DBG_FUNC_NONE, wq, 1, 0, 0, 0);
	3236	goto park;
	3237	}
	3238
	3239	req = workq_threadreq_select(wq, uth);
	3240	if (__improbable(req == NULL)) {
	3241	WQ_TRACE_WQ(TRACE_wq_select_threadreq \| DBG_FUNC_NONE, wq, 2, 0, 0, 0);
	3242	goto park;
	3243	}
	3244
	3245	uint8_t tr_flags = req->tr_flags;
	3246	struct turnstile *req_ts = kqueue_threadreq_get_turnstile(req);
	3247
	3248	/*
	3249	* Attempt to setup ourselves as the new thing to run, moving all priority
	3250	* pushes to ourselves.
	3251	*
	3252	* If the current thread is the creator, then the fact that we are presently
	3253	* running is proof that we'll do something useful, so keep going.
	3254	*
	3255	* For other cases, peek at the AST to know whether the scheduler wants
	3256	* to preempt us, if yes, park instead, and move the thread request
	3257	* turnstile back to the workqueue.
	3258	*/
	3259	if (req_ts) {
	3260	workq_perform_turnstile_operation_locked(wq, ^{
3261	turnstile_update_inheritor(req_ts, uth->uu_thread,
3262	TURNSTILE_IMMEDIATE_UPDATE \| TURNSTILE_INHERITOR_THREAD);
3263	turnstile_update_inheritor_complete(req_ts,
3264	TURNSTILE_INTERLOCK_HELD);
3265	});
3266	}
3267
3268	if (is_creator) {
3269	WQ_TRACE_WQ(TRACE_wq_creator_select, wq, 4, 0,
3270	uth->uu_save.uus_workq_park_data.yields, 0);
3271	wq->wq_creator = NULL;
3272	_wq_thactive_inc(wq, req->tr_qos);
3273	wq->wq_thscheduled_count[_wq_bucket(req->tr_qos)]++;
3274	} else if (uth->uu_workq_pri.qos_bucket != req->tr_qos) {
3275	_wq_thactive_move(wq, uth->uu_workq_pri.qos_bucket, req->tr_qos);
3276	}
3277
3278	workq_thread_reset_pri(wq, uth, req, /unpark/ true);
3279
3280	if (__improbable(thread_unfreeze_base_pri(uth->uu_thread) && !is_creator)) {
3281	if (req_ts) {
3282	workq_perform_turnstile_operation_locked(wq, ^{
3283	turnstile_update_inheritor(req_ts, wq->wq_turnstile,
3284	TURNSTILE_IMMEDIATE_UPDATE \| TURNSTILE_INHERITOR_TURNSTILE);
3285	turnstile_update_inheritor_complete(req_ts,
3286	TURNSTILE_INTERLOCK_HELD);
3287	});
d9a64523	3288	}
cb323159 A	3289	WQ_TRACE_WQ(TRACE_wq_select_threadreq \| DBG_FUNC_NONE, wq, 3, 0, 0, 0);
	3290	goto park_thawed;
	3291	}
	3292
	3293	/*
	3294	* We passed all checks, dequeue the request, bind to it, and set it up
	3295	* to return to user.
	3296	*/
	3297	WQ_TRACE_WQ(TRACE_wq_thread_logical_run \| DBG_FUNC_START, wq,
	3298	workq_trace_req_id(req), 0, 0, 0);
	3299	wq->wq_fulfilled++;
	3300	schedule_creator = workq_threadreq_dequeue(wq, req);
	3301
	3302	if (tr_flags & (WORKQ_TR_FLAG_KEVENT \| WORKQ_TR_FLAG_WORKLOOP)) {
	3303	kqueue_threadreq_bind_prepost(p, req, uth);
	3304	req = NULL;
	3305	} else if (req->tr_count > 0) {
	3306	req = NULL;
	3307	}
	3308
	3309	workq_thread_reset_cpupercent(req, uth);
	3310	if (uth->uu_workq_flags & UT_WORKQ_NEW) {
	3311	uth->uu_workq_flags ^= UT_WORKQ_NEW;
	3312	setup_flags \|= WQ_SETUP_FIRST_USE;
	3313	}
	3314	if (tr_flags & WORKQ_TR_FLAG_OVERCOMMIT) {
	3315	if ((uth->uu_workq_flags & UT_WORKQ_OVERCOMMIT) == 0) {
	3316	uth->uu_workq_flags \|= UT_WORKQ_OVERCOMMIT;
	3317	wq->wq_constrained_threads_scheduled--;
	3318	}
	3319	} else {
	3320	if ((uth->uu_workq_flags & UT_WORKQ_OVERCOMMIT) != 0) {
	3321	uth->uu_workq_flags &= ~UT_WORKQ_OVERCOMMIT;
	3322	wq->wq_constrained_threads_scheduled++;
	3323	}
	3324	}
	3325
	3326	if (is_creator \|\| schedule_creator) {
	3327	/* This can drop the workqueue lock, and take it again */
	3328	workq_schedule_creator(p, wq, WORKQ_THREADREQ_CAN_CREATE_THREADS);
	3329	}
	3330
	3331	workq_unlock(wq);
	3332
	3333	if (req) {
	3334	zfree(workq_zone_threadreq, req);
	3335	}
	3336
	3337	/*
	3338	* Run Thread, Run!
	3339	*/
	3340	uint32_t upcall_flags = WQ_FLAG_THREAD_NEWSPI;
	3341	if (uth->uu_workq_pri.qos_bucket == WORKQ_THREAD_QOS_MANAGER) {
	3342	upcall_flags \|= WQ_FLAG_THREAD_EVENT_MANAGER;
	3343	} else if (tr_flags & WORKQ_TR_FLAG_OVERCOMMIT) {
	3344	upcall_flags \|= WQ_FLAG_THREAD_OVERCOMMIT;
d9a64523	3345	}
cb323159 A	3346	if (tr_flags & WORKQ_TR_FLAG_KEVENT) {
	3347	upcall_flags \|= WQ_FLAG_THREAD_KEVENT;
	3348	}
	3349	if (tr_flags & WORKQ_TR_FLAG_WORKLOOP) {
	3350	upcall_flags \|= WQ_FLAG_THREAD_WORKLOOP \| WQ_FLAG_THREAD_KEVENT;
	3351	}
	3352	uth->uu_save.uus_workq_park_data.upcall_flags = upcall_flags;
	3353
	3354	if (tr_flags & (WORKQ_TR_FLAG_KEVENT \| WORKQ_TR_FLAG_WORKLOOP)) {
	3355	kqueue_threadreq_bind_commit(p, uth->uu_thread);
	3356	}
	3357	workq_setup_and_run(p, uth, setup_flags);
	3358	__builtin_unreachable();
	3359
	3360	park:
	3361	thread_unfreeze_base_pri(uth->uu_thread);
	3362	park_thawed:
	3363	workq_park_and_unlock(p, wq, uth, setup_flags);
d9a64523 A	3364	}
	3365
	3366	/**
	3367	* Runs a thread request on a thread
	3368	*
	3369	* - if thread is THREAD_NULL, will find a thread and run the request there.
	3370	* Otherwise, the thread must be the current thread.
	3371	*
	3372	* - if req is NULL, will find the highest priority request and run that. If
	3373	* it is not NULL, it must be a threadreq object in state NEW. If it can not
	3374	* be run immediately, it will be enqueued and moved to state QUEUED.
	3375	*
	3376	* Either way, the thread request object serviced will be moved to state
	3377	* BINDING and attached to the uthread.
	3378	*
cb323159 A	3379	* Should be called with the workqueue lock held. Will drop it.
cb323159 A	3380	* Should be called with the base pri not frozen.
d9a64523 A	3381	*/
	3382	__attribute__((noreturn, noinline))
	3383	static void
cb323159 A	3384	workq_unpark_select_threadreq_or_park_and_unlock(proc_t p, struct workqueue *wq,
cb323159 A	3385	struct uthread *uth, uint32_t setup_flags)
d9a64523	3386	{
d9a64523 A	3387	if (uth->uu_workq_flags & UT_WORKQ_EARLY_BOUND) {
	3388	if (uth->uu_workq_flags & UT_WORKQ_NEW) {
	3389	setup_flags \|= WQ_SETUP_FIRST_USE;
	3390	}
	3391	uth->uu_workq_flags &= ~(UT_WORKQ_NEW \| UT_WORKQ_EARLY_BOUND);
	3392	/*
	3393	* This pointer is possibly freed and only used for tracing purposes.
	3394	*/
cb323159	3395	workq_threadreq_t req = uth->uu_save.uus_workq_park_data.thread_request;
d9a64523 A	3396	workq_unlock(wq);
d9a64523 A	3397	WQ_TRACE_WQ(TRACE_wq_thread_logical_run \| DBG_FUNC_START, wq,
0a7de745	3398	VM_KERNEL_ADDRHIDE(req), 0, 0, 0);
cb323159	3399	(void)req;
d9a64523 A	3400	workq_setup_and_run(p, uth, setup_flags);
	3401	__builtin_unreachable();
	3402	}
	3403
cb323159 A	3404	thread_freeze_base_pri(uth->uu_thread);
cb323159 A	3405	workq_select_threadreq_or_park_and_unlock(p, wq, uth, setup_flags);
d9a64523 A	3406	}
	3407
	3408	static bool
	3409	workq_creator_should_yield(struct workqueue wq, struct uthread uth)
	3410	{
	3411	thread_qos_t qos = workq_pri_override(uth->uu_workq_pri);
	3412
	3413	if (qos >= THREAD_QOS_USER_INTERACTIVE) {
	3414	return false;
	3415	}
	3416
	3417	uint32_t snapshot = uth->uu_save.uus_workq_park_data.fulfilled_snapshot;
	3418	if (wq->wq_fulfilled == snapshot) {
	3419	return false;
	3420	}
	3421
	3422	uint32_t cnt = 0, conc = wq_max_parallelism[_wq_bucket(qos)];
	3423	if (wq->wq_fulfilled - snapshot > conc) {
	3424	/* we fulfilled more than NCPU requests since being dispatched */
	3425	WQ_TRACE_WQ(TRACE_wq_creator_yield, wq, 1,
0a7de745	3426	wq->wq_fulfilled, snapshot, 0);
d9a64523 A	3427	return true;
	3428	}
	3429
	3430	for (int i = _wq_bucket(qos); i < WORKQ_NUM_QOS_BUCKETS; i++) {
	3431	cnt += wq->wq_thscheduled_count[i];
	3432	}
	3433	if (conc <= cnt) {
	3434	/* We fulfilled requests and have more than NCPU scheduled threads */
	3435	WQ_TRACE_WQ(TRACE_wq_creator_yield, wq, 2,
0a7de745	3436	wq->wq_fulfilled, snapshot, 0);
d9a64523 A	3437	return true;
	3438	}
	3439
	3440	return false;
	3441	}
	3442
	3443	/**
	3444	* parked thread wakes up
	3445	*/
	3446	__attribute__((noreturn, noinline))
	3447	static void
	3448	workq_unpark_continue(void *parameter __unused, wait_result_t wr __unused)
	3449	{
cb323159 A	3450	thread_t th = current_thread();
cb323159 A	3451	struct uthread *uth = get_bsdthread_info(th);
d9a64523 A	3452	proc_t p = current_proc();
	3453	struct workqueue *wq = proc_get_wqptr_fast(p);
	3454
	3455	workq_lock_spin(wq);
	3456
	3457	if (wq->wq_creator == uth && workq_creator_should_yield(wq, uth)) {
	3458	/*
	3459	* If the number of threads we have out are able to keep up with the
	3460	* demand, then we should avoid sending this creator thread to
	3461	* userspace.
	3462	*/
	3463	uth->uu_save.uus_workq_park_data.fulfilled_snapshot = wq->wq_fulfilled;
	3464	uth->uu_save.uus_workq_park_data.yields++;
	3465	workq_unlock(wq);
	3466	thread_yield_with_continuation(workq_unpark_continue, NULL);
	3467	__builtin_unreachable();
	3468	}
	3469
	3470	if (__probable(uth->uu_workq_flags & UT_WORKQ_RUNNING)) {
cb323159	3471	workq_unpark_select_threadreq_or_park_and_unlock(p, wq, uth, WQ_SETUP_NONE);
d9a64523 A	3472	__builtin_unreachable();
	3473	}
	3474
	3475	if (__probable(wr == THREAD_AWAKENED)) {
	3476	/*
	3477	* We were set running, but for the purposes of dying.
	3478	*/
	3479	assert(uth->uu_workq_flags & UT_WORKQ_DYING);
	3480	assert((uth->uu_workq_flags & UT_WORKQ_NEW) == 0);
	3481	} else {
	3482	/*
	3483	* workaround for <rdar://problem/38647347>,
	3484	* in case we do hit userspace, make sure calling
	3485	* workq_thread_terminate() does the right thing here,
	3486	* and if we never call it, that workq_exit() will too because it sees
	3487	* this thread on the runlist.
	3488	*/
	3489	assert(wr == THREAD_INTERRUPTED);
	3490	wq->wq_thdying_count++;
	3491	uth->uu_workq_flags \|= UT_WORKQ_DYING;
	3492	}
	3493
	3494	workq_unpark_for_death_and_unlock(p, wq, uth,
cb323159	3495	WORKQ_UNPARK_FOR_DEATH_WAS_IDLE, WQ_SETUP_NONE);
d9a64523 A	3496	__builtin_unreachable();
	3497	}
	3498
	3499	__attribute__((noreturn, noinline))
	3500	static void
	3501	workq_setup_and_run(proc_t p, struct uthread *uth, int setup_flags)
	3502	{
	3503	thread_t th = uth->uu_thread;
	3504	vm_map_t vmap = get_task_map(p->task);
	3505
	3506	if (setup_flags & WQ_SETUP_CLEAR_VOUCHER) {
	3507	/*
	3508	* For preemption reasons, we want to reset the voucher as late as
	3509	* possible, so we do it in two places:
	3510	* - Just before parking (i.e. in workq_park_and_unlock())
	3511	* - Prior to doing the setup for the next workitem (i.e. here)
	3512	*
	3513	* Those two places are sufficient to ensure we always reset it before
	3514	* it goes back out to user space, but be careful to not break that
	3515	* guarantee.
	3516	*/
	3517	__assert_only kern_return_t kr;
	3518	kr = thread_set_voucher_name(MACH_PORT_NULL);
	3519	assert(kr == KERN_SUCCESS);
	3520	}
	3521
	3522	uint32_t upcall_flags = uth->uu_save.uus_workq_park_data.upcall_flags;
	3523	if (!(setup_flags & WQ_SETUP_FIRST_USE)) {
	3524	upcall_flags \|= WQ_FLAG_THREAD_REUSE;
	3525	}
	3526
	3527	if (uth->uu_workq_flags & UT_WORKQ_OUTSIDE_QOS) {
	3528	/*
	3529	* For threads that have an outside-of-QoS thread priority, indicate
	3530	* to userspace that setting QoS should only affect the TSD and not
	3531	* change QOS in the kernel.
	3532	*/
	3533	upcall_flags \|= WQ_FLAG_THREAD_OUTSIDEQOS;
	3534	} else {
	3535	/*
	3536	* Put the QoS class value into the lower bits of the reuse_thread
	3537	* register, this is where the thread priority used to be stored
	3538	* anyway.
	3539	*/
	3540	upcall_flags \|= uth->uu_save.uus_workq_park_data.qos \|
0a7de745	3541	WQ_FLAG_THREAD_PRIO_QOS;
d9a64523 A	3542	}
	3543
	3544	if (uth->uu_workq_thport == MACH_PORT_NULL) {
c3c9b80d	3545	/* convert_thread_to_port_pinned() consumes a reference */
d9a64523	3546	thread_reference(th);
c3c9b80d A	3547	/* Convert to immovable/pinned thread port, but port is not pinned yet */
	3548	ipc_port_t port = convert_thread_to_port_pinned(th);
	3549	/* Atomically, pin and copy out the port */
	3550	uth->uu_workq_thport = ipc_port_copyout_send_pinned(port, get_task_ipcspace(p->task));
d9a64523 A	3551	}
	3552
	3553	/*
	3554	* Call out to pthread, this sets up the thread, pulls in kevent structs
	3555	* onto the stack, sets up the thread state and then returns to userspace.
	3556	*/
	3557	WQ_TRACE_WQ(TRACE_wq_runthread \| DBG_FUNC_START,
0a7de745	3558	proc_get_wqptr_fast(p), 0, 0, 0, 0);
d9a64523 A	3559	thread_sched_call(th, workq_sched_callback);
d9a64523 A	3560	pthread_functions->workq_setup_thread(p, th, vmap, uth->uu_workq_stackaddr,
0a7de745	3561	uth->uu_workq_thport, 0, setup_flags, upcall_flags);
d9a64523 A	3562
	3563	__builtin_unreachable();
	3564	}
	3565
	3566	#pragma mark misc
	3567
	3568	int
	3569	fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo)
	3570	{
	3571	struct workqueue *wq = proc_get_wqptr(p);
	3572	int error = 0;
0a7de745	3573	int activecount;
d9a64523 A	3574
	3575	if (wq == NULL) {
	3576	return EINVAL;
	3577	}
	3578
	3579	/*
	3580	* This is sometimes called from interrupt context by the kperf sampler.
	3581	* In that case, it's not safe to spin trying to take the lock since we
	3582	* might already hold it. So, we just try-lock it and error out if it's
	3583	* already held. Since this is just a debugging aid, and all our callers
	3584	* are able to handle an error, that's fine.
	3585	*/
	3586	bool locked = workq_lock_try(wq);
	3587	if (!locked) {
	3588	return EBUSY;
	3589	}
	3590
	3591	wq_thactive_t act = _wq_thactive(wq);
	3592	activecount = _wq_thactive_aggregate_downto_qos(wq, act,
0a7de745	3593	WORKQ_THREAD_QOS_MIN, NULL, NULL);
d9a64523 A	3594	if (act & _wq_thactive_offset_for_qos(WORKQ_THREAD_QOS_MANAGER)) {
	3595	activecount++;
	3596	}
	3597	pwqinfo->pwq_nthreads = wq->wq_nthreads;
	3598	pwqinfo->pwq_runthreads = activecount;
	3599	pwqinfo->pwq_blockedthreads = wq->wq_threads_scheduled - activecount;
	3600	pwqinfo->pwq_state = 0;
	3601
	3602	if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
	3603	pwqinfo->pwq_state \|= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
	3604	}
	3605
	3606	if (wq->wq_nthreads >= wq_max_threads) {
	3607	pwqinfo->pwq_state \|= WQ_EXCEEDED_TOTAL_THREAD_LIMIT;
	3608	}
	3609
	3610	workq_unlock(wq);
	3611	return error;
	3612	}
	3613
	3614	boolean_t
	3615	workqueue_get_pwq_exceeded(void v, boolean_t exceeded_total,
0a7de745	3616	boolean_t *exceeded_constrained)
d9a64523 A	3617	{
	3618	proc_t p = v;
	3619	struct proc_workqueueinfo pwqinfo;
	3620	int err;
	3621
	3622	assert(p != NULL);
	3623	assert(exceeded_total != NULL);
	3624	assert(exceeded_constrained != NULL);
	3625
	3626	err = fill_procworkqueue(p, &pwqinfo);
	3627	if (err) {
	3628	return FALSE;
	3629	}
	3630	if (!(pwqinfo.pwq_state & WQ_FLAGS_AVAILABLE)) {
	3631	return FALSE;
	3632	}
	3633
	3634	*exceeded_total = (pwqinfo.pwq_state & WQ_EXCEEDED_TOTAL_THREAD_LIMIT);
	3635	*exceeded_constrained = (pwqinfo.pwq_state & WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT);
	3636
	3637	return TRUE;
	3638	}
	3639
	3640	uint32_t
	3641	workqueue_get_pwq_state_kdp(void * v)
	3642	{
	3643	static_assert((WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT << 17) ==
0a7de745	3644	kTaskWqExceededConstrainedThreadLimit);
d9a64523	3645	static_assert((WQ_EXCEEDED_TOTAL_THREAD_LIMIT << 17) ==
0a7de745	3646	kTaskWqExceededTotalThreadLimit);
d9a64523 A	3647	static_assert((WQ_FLAGS_AVAILABLE << 17) == kTaskWqFlagsAvailable);
d9a64523 A	3648	static_assert((WQ_FLAGS_AVAILABLE \| WQ_EXCEEDED_TOTAL_THREAD_LIMIT \|
0a7de745	3649	WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT) == 0x7);
d9a64523 A	3650
	3651	if (v == NULL) {
	3652	return 0;
	3653	}
	3654
	3655	proc_t p = v;
	3656	struct workqueue *wq = proc_get_wqptr(p);
	3657
	3658	if (wq == NULL \|\| workq_lock_spin_is_acquired_kdp(wq)) {
	3659	return 0;
	3660	}
	3661
	3662	uint32_t pwq_state = WQ_FLAGS_AVAILABLE;
	3663
	3664	if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
	3665	pwq_state \|= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
	3666	}
	3667
	3668	if (wq->wq_nthreads >= wq_max_threads) {
	3669	pwq_state \|= WQ_EXCEEDED_TOTAL_THREAD_LIMIT;
	3670	}
	3671
	3672	return pwq_state;
	3673	}
	3674
	3675	void
	3676	workq_init(void)
	3677	{
d9a64523	3678	clock_interval_to_absolutetime_interval(wq_stalled_window.usecs,
0a7de745	3679	NSEC_PER_USEC, &wq_stalled_window.abstime);
d9a64523	3680	clock_interval_to_absolutetime_interval(wq_reduce_pool_window.usecs,
0a7de745	3681	NSEC_PER_USEC, &wq_reduce_pool_window.abstime);
d9a64523	3682	clock_interval_to_absolutetime_interval(wq_max_timer_interval.usecs,
0a7de745	3683	NSEC_PER_USEC, &wq_max_timer_interval.abstime);
cb323159 A	3684
	3685	thread_deallocate_daemon_register_queue(&workq_deallocate_queue,
	3686	workq_deallocate_queue_invoke);
d9a64523	3687	}