git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1993-1995, 1999-2020 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28
	29	#include <mach/mach_types.h>
	30	#include <mach/thread_act.h>
	31
	32	#include <kern/kern_types.h>
	33	#include <kern/zalloc.h>
	34	#include <kern/sched_prim.h>
	35	#include <kern/clock.h>
	36	#include <kern/task.h>
	37	#include <kern/thread.h>
	38	#include <kern/waitq.h>
	39	#include <kern/ledger.h>
	40	#include <kern/policy_internal.h>
	41
	42	#include <vm/vm_pageout.h>
	43
	44	#include <kern/thread_call.h>
	45	#include <kern/timer_call.h>
	46
	47	#include <libkern/OSAtomic.h>
	48	#include <kern/timer_queue.h>
	49
	50	#include <sys/kdebug.h>
	51	#if CONFIG_DTRACE
	52	#include <mach/sdt.h>
	53	#endif
	54	#include <machine/machine_routines.h>
	55
	56	static ZONE_DECLARE(thread_call_zone, "thread_call",
	57	sizeof(thread_call_data_t), ZC_NOENCRYPT);
	58
	59	typedef enum {
	60	TCF_ABSOLUTE = 0,
	61	TCF_CONTINUOUS = 1,
	62	TCF_COUNT = 2,
	63	} thread_call_flavor_t;
	64
	65	__options_decl(thread_call_group_flags_t, uint32_t, {
	66	TCG_NONE = 0x0,
	67	TCG_PARALLEL = 0x1,
	68	TCG_DEALLOC_ACTIVE = 0x2,
	69	});
	70
	71	static struct thread_call_group {
	72	__attribute__((aligned(128))) lck_ticket_t tcg_lock;
	73
	74	const char * tcg_name;
	75
	76	queue_head_t pending_queue;
	77	uint32_t pending_count;
	78
	79	queue_head_t delayed_queues[TCF_COUNT];
	80	struct priority_queue_deadline_min delayed_pqueues[TCF_COUNT];
	81	timer_call_data_t delayed_timers[TCF_COUNT];
	82
	83	timer_call_data_t dealloc_timer;
	84
	85	struct waitq idle_waitq;
	86	uint64_t idle_timestamp;
	87	uint32_t idle_count, active_count, blocked_count;
	88
	89	uint32_t tcg_thread_pri;
	90	uint32_t target_thread_count;
	91
	92	thread_call_group_flags_t tcg_flags;
	93
	94	struct waitq waiters_waitq;
	95	} thread_call_groups[THREAD_CALL_INDEX_MAX] = {
	96	[THREAD_CALL_INDEX_HIGH] = {
	97	.tcg_name = "high",
	98	.tcg_thread_pri = BASEPRI_PREEMPT_HIGH,
	99	.target_thread_count = 4,
	100	.tcg_flags = TCG_NONE,
	101	},
	102	[THREAD_CALL_INDEX_KERNEL] = {
	103	.tcg_name = "kernel",
	104	.tcg_thread_pri = BASEPRI_KERNEL,
	105	.target_thread_count = 1,
	106	.tcg_flags = TCG_PARALLEL,
	107	},
	108	[THREAD_CALL_INDEX_USER] = {
	109	.tcg_name = "user",
	110	.tcg_thread_pri = BASEPRI_DEFAULT,
	111	.target_thread_count = 1,
	112	.tcg_flags = TCG_PARALLEL,
	113	},
	114	[THREAD_CALL_INDEX_LOW] = {
	115	.tcg_name = "low",
	116	.tcg_thread_pri = MAXPRI_THROTTLE,
	117	.target_thread_count = 1,
	118	.tcg_flags = TCG_PARALLEL,
	119	},
	120	[THREAD_CALL_INDEX_KERNEL_HIGH] = {
	121	.tcg_name = "kernel-high",
	122	.tcg_thread_pri = BASEPRI_PREEMPT,
	123	.target_thread_count = 2,
	124	.tcg_flags = TCG_NONE,
	125	},
	126	[THREAD_CALL_INDEX_QOS_UI] = {
	127	.tcg_name = "qos-ui",
	128	.tcg_thread_pri = BASEPRI_FOREGROUND,
	129	.target_thread_count = 1,
	130	.tcg_flags = TCG_NONE,
	131	},
	132	[THREAD_CALL_INDEX_QOS_IN] = {
	133	.tcg_name = "qos-in",
	134	.tcg_thread_pri = BASEPRI_USER_INITIATED,
	135	.target_thread_count = 1,
	136	.tcg_flags = TCG_NONE,
	137	},
	138	[THREAD_CALL_INDEX_QOS_UT] = {
	139	.tcg_name = "qos-ut",
	140	.tcg_thread_pri = BASEPRI_UTILITY,
	141	.target_thread_count = 1,
	142	.tcg_flags = TCG_NONE,
	143	},
	144	};
	145
	146	typedef struct thread_call_group *thread_call_group_t;
	147
	148	#define INTERNAL_CALL_COUNT 768
	149	#define THREAD_CALL_DEALLOC_INTERVAL_NS (5 * NSEC_PER_MSEC) /* 5 ms */
	150	#define THREAD_CALL_ADD_RATIO 4
	151	#define THREAD_CALL_MACH_FACTOR_CAP 3
	152	#define THREAD_CALL_GROUP_MAX_THREADS 500
	153
	154	struct thread_call_thread_state {
	155	struct thread_call_group * thc_group;
	156	struct thread_call * thc_call; /* debug only, may be deallocated */
	157	uint64_t thc_call_start;
	158	uint64_t thc_call_soft_deadline;
	159	uint64_t thc_call_hard_deadline;
	160	uint64_t thc_call_pending_timestamp;
	161	uint64_t thc_IOTES_invocation_timestamp;
	162	thread_call_func_t thc_func;
	163	thread_call_param_t thc_param0;
	164	thread_call_param_t thc_param1;
	165	};
	166
	167	static bool thread_call_daemon_awake = true;
	168	/*
	169	* This special waitq exists because the daemon thread
	170	* might need to be woken while already holding a global waitq locked.
	171	*/
	172	static struct waitq daemon_waitq;
	173
	174	static thread_call_data_t internal_call_storage[INTERNAL_CALL_COUNT];
	175	static queue_head_t thread_call_internal_queue;
	176	int thread_call_internal_queue_count = 0;
	177	static uint64_t thread_call_dealloc_interval_abs;
	178
	179	static void _internal_call_init(void);
	180
	181	static thread_call_t _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0);
	182	static bool _is_internal_call(thread_call_t call);
	183	static void _internal_call_release(thread_call_t call);
	184	static bool _pending_call_enqueue(thread_call_t call, thread_call_group_t group, uint64_t now);
	185	static bool _delayed_call_enqueue(thread_call_t call, thread_call_group_t group,
	186	uint64_t deadline, thread_call_flavor_t flavor);
	187	static bool _call_dequeue(thread_call_t call, thread_call_group_t group);
	188	static void thread_call_wake(thread_call_group_t group);
	189	static void thread_call_daemon(void *arg);
	190	static void thread_call_thread(thread_call_group_t group, wait_result_t wres);
	191	static void thread_call_dealloc_timer(timer_call_param_t p0, timer_call_param_t p1);
	192	static void thread_call_group_setup(thread_call_group_t group);
	193	static void sched_call_thread(int type, thread_t thread);
	194	static void thread_call_start_deallocate_timer(thread_call_group_t group);
	195	static void thread_call_wait_locked(thread_call_t call, spl_t s);
	196	static bool thread_call_wait_once_locked(thread_call_t call, spl_t s);
	197
	198	static boolean_t thread_call_enter_delayed_internal(thread_call_t call,
	199	thread_call_func_t alt_func, thread_call_param_t alt_param0,
	200	thread_call_param_t param1, uint64_t deadline,
	201	uint64_t leeway, unsigned int flags);
	202
	203	/* non-static so dtrace can find it rdar://problem/31156135&31379348 */
	204	extern void thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1);
	205
	206	LCK_GRP_DECLARE(thread_call_lck_grp, "thread_call");
	207
	208
	209	static void
	210	thread_call_lock_spin(thread_call_group_t group)
	211	{
	212	lck_ticket_lock(&group->tcg_lock, &thread_call_lck_grp);
	213	}
	214
	215	static void
	216	thread_call_unlock(thread_call_group_t group)
	217	{
	218	lck_ticket_unlock(&group->tcg_lock);
	219	}
	220
	221	static void __assert_only
	222	thread_call_assert_locked(thread_call_group_t group)
	223	{
	224	lck_ticket_assert_owned(&group->tcg_lock);
	225	}
	226
	227
	228	static spl_t
	229	disable_ints_and_lock(thread_call_group_t group)
	230	{
	231	spl_t s = splsched();
	232	thread_call_lock_spin(group);
	233
	234	return s;
	235	}
	236
	237	static void
	238	enable_ints_and_unlock(thread_call_group_t group, spl_t s)
	239	{
	240	thread_call_unlock(group);
	241	splx(s);
	242	}
	243
	244	/* Lock held */
	245	static thread_call_group_t
	246	thread_call_get_group(thread_call_t call)
	247	{
	248	thread_call_index_t index = call->tc_index;
	249
	250	assert(index >= 0 && index < THREAD_CALL_INDEX_MAX);
	251
	252	return &thread_call_groups[index];
	253	}
	254
	255	/* Lock held */
	256	static thread_call_flavor_t
	257	thread_call_get_flavor(thread_call_t call)
	258	{
	259	return (call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
	260	}
	261
	262	/* Lock held */
	263	static thread_call_flavor_t
	264	thread_call_set_flavor(thread_call_t call, thread_call_flavor_t flavor)
	265	{
	266	assert(flavor == TCF_CONTINUOUS \|\| flavor == TCF_ABSOLUTE);
	267	thread_call_flavor_t old_flavor = thread_call_get_flavor(call);
	268
	269	if (old_flavor != flavor) {
	270	if (flavor == TCF_CONTINUOUS) {
	271	call->tc_flags \|= THREAD_CALL_FLAG_CONTINUOUS;
	272	} else {
	273	call->tc_flags &= ~THREAD_CALL_FLAG_CONTINUOUS;
	274	}
	275	}
	276
	277	return old_flavor;
	278	}
	279
	280	/* returns true if it was on a queue */
	281	static bool
	282	thread_call_enqueue_tail(
	283	thread_call_t call,
	284	queue_t new_queue)
	285	{
	286	queue_t old_queue = call->tc_queue;
	287
	288	thread_call_group_t group = thread_call_get_group(call);
	289	thread_call_flavor_t flavor = thread_call_get_flavor(call);
	290
	291	if (old_queue != NULL &&
	292	old_queue != &group->delayed_queues[flavor]) {
	293	panic("thread call (%p) on bad queue (old_queue: %p)", call, old_queue);
	294	}
	295
	296	if (old_queue == &group->delayed_queues[flavor]) {
	297	priority_queue_remove(&group->delayed_pqueues[flavor], &call->tc_pqlink);
	298	}
	299
	300	if (old_queue == NULL) {
	301	enqueue_tail(new_queue, &call->tc_qlink);
	302	} else {
	303	re_queue_tail(new_queue, &call->tc_qlink);
	304	}
	305
	306	call->tc_queue = new_queue;
	307
	308	return old_queue != NULL;
	309	}
	310
	311	static queue_head_t *
	312	thread_call_dequeue(
	313	thread_call_t call)
	314	{
	315	queue_t old_queue = call->tc_queue;
	316
	317	thread_call_group_t group = thread_call_get_group(call);
	318	thread_call_flavor_t flavor = thread_call_get_flavor(call);
	319
	320	if (old_queue != NULL &&
	321	old_queue != &group->pending_queue &&
	322	old_queue != &group->delayed_queues[flavor]) {
	323	panic("thread call (%p) on bad queue (old_queue: %p)", call, old_queue);
	324	}
	325
	326	if (old_queue == &group->delayed_queues[flavor]) {
	327	priority_queue_remove(&group->delayed_pqueues[flavor], &call->tc_pqlink);
	328	}
	329
	330	if (old_queue != NULL) {
	331	remqueue(&call->tc_qlink);
	332
	333	call->tc_queue = NULL;
	334	}
	335	return old_queue;
	336	}
	337
	338	static queue_head_t *
	339	thread_call_enqueue_deadline(
	340	thread_call_t call,
	341	thread_call_group_t group,
	342	thread_call_flavor_t flavor,
	343	uint64_t deadline)
	344	{
	345	queue_t old_queue = call->tc_queue;
	346	queue_t new_queue = &group->delayed_queues[flavor];
	347
	348	thread_call_flavor_t old_flavor = thread_call_set_flavor(call, flavor);
	349
	350	if (old_queue != NULL &&
	351	old_queue != &group->pending_queue &&
	352	old_queue != &group->delayed_queues[old_flavor]) {
	353	panic("thread call (%p) on bad queue (old_queue: %p)", call, old_queue);
	354	}
	355
	356	if (old_queue == new_queue) {
	357	/* optimize the same-queue case to avoid a full re-insert */
	358	uint64_t old_deadline = call->tc_pqlink.deadline;
	359	call->tc_pqlink.deadline = deadline;
	360
	361	if (old_deadline < deadline) {
	362	priority_queue_entry_increased(&group->delayed_pqueues[flavor],
	363	&call->tc_pqlink);
	364	} else {
	365	priority_queue_entry_decreased(&group->delayed_pqueues[flavor],
	366	&call->tc_pqlink);
	367	}
	368	} else {
	369	if (old_queue == &group->delayed_queues[old_flavor]) {
	370	priority_queue_remove(&group->delayed_pqueues[old_flavor],
	371	&call->tc_pqlink);
	372	}
	373
	374	call->tc_pqlink.deadline = deadline;
	375
	376	priority_queue_insert(&group->delayed_pqueues[flavor], &call->tc_pqlink);
	377	}
	378
	379	if (old_queue == NULL) {
	380	enqueue_tail(new_queue, &call->tc_qlink);
	381	} else if (old_queue != new_queue) {
	382	re_queue_tail(new_queue, &call->tc_qlink);
	383	}
	384
	385	call->tc_queue = new_queue;
	386
	387	return old_queue;
	388	}
	389
	390	uint64_t
	391	thread_call_get_armed_deadline(thread_call_t call)
	392	{
	393	return call->tc_pqlink.deadline;
	394	}
	395
	396
	397	static bool
	398	group_isparallel(thread_call_group_t group)
	399	{
	400	return (group->tcg_flags & TCG_PARALLEL) != 0;
	401	}
	402
	403	static bool
	404	thread_call_group_should_add_thread(thread_call_group_t group)
	405	{
	406	if ((group->active_count + group->blocked_count + group->idle_count) >= THREAD_CALL_GROUP_MAX_THREADS) {
	407	panic("thread_call group '%s' reached max thread cap (%d): active: %d, blocked: %d, idle: %d",
	408	group->tcg_name, THREAD_CALL_GROUP_MAX_THREADS,
	409	group->active_count, group->blocked_count, group->idle_count);
	410	}
	411
	412	if (group_isparallel(group) == false) {
	413	if (group->pending_count > 0 && group->active_count == 0) {
	414	return true;
	415	}
	416
	417	return false;
	418	}
	419
	420	if (group->pending_count > 0) {
	421	if (group->idle_count > 0) {
	422	return false;
	423	}
	424
	425	uint32_t thread_count = group->active_count;
	426
	427	/*
	428	* Add a thread if either there are no threads,
	429	* the group has fewer than its target number of
	430	* threads, or the amount of work is large relative
	431	* to the number of threads. In the last case, pay attention
	432	* to the total load on the system, and back off if
	433	* it's high.
	434	*/
	435	if ((thread_count == 0) \|\|
	436	(thread_count < group->target_thread_count) \|\|
	437	((group->pending_count > THREAD_CALL_ADD_RATIO * thread_count) &&
	438	(sched_mach_factor < THREAD_CALL_MACH_FACTOR_CAP))) {
	439	return true;
	440	}
	441	}
	442
	443	return false;
	444	}
	445
	446	static void
	447	thread_call_group_setup(thread_call_group_t group)
	448	{
	449	lck_ticket_init(&group->tcg_lock, &thread_call_lck_grp);
	450
	451	queue_init(&group->pending_queue);
	452
	453	for (thread_call_flavor_t flavor = 0; flavor < TCF_COUNT; flavor++) {
	454	queue_init(&group->delayed_queues[flavor]);
	455	priority_queue_init(&group->delayed_pqueues[flavor]);
	456	timer_call_setup(&group->delayed_timers[flavor], thread_call_delayed_timer, group);
	457	}
	458
	459	timer_call_setup(&group->dealloc_timer, thread_call_dealloc_timer, group);
	460
	461	waitq_init(&group->waiters_waitq, SYNC_POLICY_DISABLE_IRQ);
	462
	463	/* Reverse the wait order so we re-use the most recently parked thread from the pool */
	464	waitq_init(&group->idle_waitq, SYNC_POLICY_REVERSED \| SYNC_POLICY_DISABLE_IRQ);
	465	}
	466
	467	/*
	468	* Simple wrapper for creating threads bound to
	469	* thread call groups.
	470	*/
	471	static void
	472	thread_call_thread_create(
	473	thread_call_group_t group)
	474	{
	475	thread_t thread;
	476	kern_return_t result;
	477
	478	int thread_pri = group->tcg_thread_pri;
	479
	480	result = kernel_thread_start_priority((thread_continue_t)thread_call_thread,
	481	group, thread_pri, &thread);
	482	if (result != KERN_SUCCESS) {
	483	panic("cannot create new thread call thread %d", result);
	484	}
	485
	486	if (thread_pri <= BASEPRI_KERNEL) {
	487	/*
	488	* THREAD_CALL_PRIORITY_KERNEL and lower don't get to run to completion
	489	* in kernel if there are higher priority threads available.
	490	*/
	491	thread_set_eager_preempt(thread);
	492	}
	493
	494	char name[MAXTHREADNAMESIZE] = "";
	495
	496	int group_thread_count = group->idle_count + group->active_count + group->blocked_count;
	497
	498	snprintf(name, sizeof(name), "thread call %s #%d", group->tcg_name, group_thread_count);
	499	thread_set_thread_name(thread, name);
	500
	501	thread_deallocate(thread);
	502	}
	503
	504	/*
	505	* thread_call_initialize:
	506	*
	507	* Initialize this module, called
	508	* early during system initialization.
	509	*/
	510	void
	511	thread_call_initialize(void)
	512	{
	513	nanotime_to_absolutetime(0, THREAD_CALL_DEALLOC_INTERVAL_NS, &thread_call_dealloc_interval_abs);
	514	waitq_init(&daemon_waitq, SYNC_POLICY_DISABLE_IRQ \| SYNC_POLICY_FIFO);
	515
	516	for (uint32_t i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
	517	thread_call_group_setup(&thread_call_groups[i]);
	518	}
	519
	520	_internal_call_init();
	521
	522	thread_t thread;
	523	kern_return_t result;
	524
	525	result = kernel_thread_start_priority((thread_continue_t)thread_call_daemon,
	526	NULL, BASEPRI_PREEMPT_HIGH + 1, &thread);
	527	if (result != KERN_SUCCESS) {
	528	panic("thread_call_initialize");
	529	}
	530
	531	thread_deallocate(thread);
	532	}
	533
	534	void
	535	thread_call_setup_with_options(
	536	thread_call_t call,
	537	thread_call_func_t func,
	538	thread_call_param_t param0,
	539	thread_call_priority_t pri,
	540	thread_call_options_t options)
	541	{
	542	bzero(call, sizeof(*call));
	543
	544	*call = (struct thread_call) {
	545	.tc_func = func,
	546	.tc_param0 = param0,
	547	};
	548
	549	switch (pri) {
	550	case THREAD_CALL_PRIORITY_HIGH:
	551	call->tc_index = THREAD_CALL_INDEX_HIGH;
	552	break;
	553	case THREAD_CALL_PRIORITY_KERNEL:
	554	call->tc_index = THREAD_CALL_INDEX_KERNEL;
	555	break;
	556	case THREAD_CALL_PRIORITY_USER:
	557	call->tc_index = THREAD_CALL_INDEX_USER;
	558	break;
	559	case THREAD_CALL_PRIORITY_LOW:
	560	call->tc_index = THREAD_CALL_INDEX_LOW;
	561	break;
	562	case THREAD_CALL_PRIORITY_KERNEL_HIGH:
	563	call->tc_index = THREAD_CALL_INDEX_KERNEL_HIGH;
	564	break;
	565	default:
	566	panic("Invalid thread call pri value: %d", pri);
	567	break;
	568	}
	569
	570	if (options & THREAD_CALL_OPTIONS_ONCE) {
	571	call->tc_flags \|= THREAD_CALL_ONCE;
	572	}
	573	if (options & THREAD_CALL_OPTIONS_SIGNAL) {
	574	call->tc_flags \|= THREAD_CALL_SIGNAL \| THREAD_CALL_ONCE;
	575	}
	576	}
	577
	578	void
	579	thread_call_setup(
	580	thread_call_t call,
	581	thread_call_func_t func,
	582	thread_call_param_t param0)
	583	{
	584	thread_call_setup_with_options(call, func, param0,
	585	THREAD_CALL_PRIORITY_HIGH, 0);
	586	}
	587
	588	static void
	589	_internal_call_init(void)
	590	{
	591	/* Function-only thread calls are only kept in the default HIGH group */
	592	thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
	593
	594	spl_t s = disable_ints_and_lock(group);
	595
	596	queue_init(&thread_call_internal_queue);
	597
	598	for (unsigned i = 0; i < INTERNAL_CALL_COUNT; i++) {
	599	enqueue_tail(&thread_call_internal_queue, &internal_call_storage[i].tc_qlink);
	600	thread_call_internal_queue_count++;
	601	}
	602
	603	enable_ints_and_unlock(group, s);
	604	}
	605
	606	/*
	607	* _internal_call_allocate:
	608	*
	609	* Allocate an internal callout entry.
	610	*
	611	* Called with thread_call_lock held.
	612	*/
	613	static thread_call_t
	614	_internal_call_allocate(thread_call_func_t func, thread_call_param_t param0)
	615	{
	616	/* Function-only thread calls are only kept in the default HIGH group */
	617	thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
	618
	619	spl_t s = disable_ints_and_lock(group);
	620
	621	thread_call_t call = qe_dequeue_head(&thread_call_internal_queue,
	622	struct thread_call, tc_qlink);
	623
	624	if (call == NULL) {
	625	panic("_internal_call_allocate: thread_call_internal_queue empty");
	626	}
	627
	628	thread_call_internal_queue_count--;
	629
	630	thread_call_setup(call, func, param0);
	631	/* THREAD_CALL_ALLOC not set, do not free back to zone */
	632	assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
	633	enable_ints_and_unlock(group, s);
	634
	635	return call;
	636	}
	637
	638	/* Check if a call is internal and needs to be returned to the internal pool. */
	639	static bool
	640	_is_internal_call(thread_call_t call)
	641	{
	642	if (call >= internal_call_storage &&
	643	call < &internal_call_storage[INTERNAL_CALL_COUNT]) {
	644	assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
	645	return true;
	646	}
	647	return false;
	648	}
	649
	650	/*
	651	* _internal_call_release:
	652	*
	653	* Release an internal callout entry which
	654	* is no longer pending (or delayed).
	655	*
	656	* Called with thread_call_lock held.
	657	*/
	658	static void
	659	_internal_call_release(thread_call_t call)
	660	{
	661	assert(_is_internal_call(call));
	662
	663	thread_call_group_t group = thread_call_get_group(call);
	664
	665	assert(group == &thread_call_groups[THREAD_CALL_INDEX_HIGH]);
	666	thread_call_assert_locked(group);
	667
	668	enqueue_head(&thread_call_internal_queue, &call->tc_qlink);
	669	thread_call_internal_queue_count++;
	670	}
	671
	672	/*
	673	* _pending_call_enqueue:
	674	*
	675	* Place an entry at the end of the
	676	* pending queue, to be executed soon.
	677	*
	678	* Returns TRUE if the entry was already
	679	* on a queue.
	680	*
	681	* Called with thread_call_lock held.
	682	*/
	683	static bool
	684	_pending_call_enqueue(thread_call_t call,
	685	thread_call_group_t group,
	686	uint64_t now)
	687	{
	688	if ((THREAD_CALL_ONCE \| THREAD_CALL_RUNNING)
	689	== (call->tc_flags & (THREAD_CALL_ONCE \| THREAD_CALL_RUNNING))) {
	690	call->tc_pqlink.deadline = 0;
	691
	692	thread_call_flags_t flags = call->tc_flags;
	693	call->tc_flags \|= THREAD_CALL_RESCHEDULE;
	694
	695	assert(call->tc_queue == NULL);
	696
	697	return flags & THREAD_CALL_RESCHEDULE;
	698	}
	699
	700	call->tc_pending_timestamp = now;
	701
	702	bool was_on_queue = thread_call_enqueue_tail(call, &group->pending_queue);
	703
	704	if (!was_on_queue) {
	705	call->tc_submit_count++;
	706	}
	707
	708	group->pending_count++;
	709
	710	thread_call_wake(group);
	711
	712	return was_on_queue;
	713	}
	714
	715	/*
	716	* _delayed_call_enqueue:
	717	*
	718	* Place an entry on the delayed queue,
	719	* after existing entries with an earlier
	720	* (or identical) deadline.
	721	*
	722	* Returns TRUE if the entry was already
	723	* on a queue.
	724	*
	725	* Called with thread_call_lock held.
	726	*/
	727	static bool
	728	_delayed_call_enqueue(
	729	thread_call_t call,
	730	thread_call_group_t group,
	731	uint64_t deadline,
	732	thread_call_flavor_t flavor)
	733	{
	734	if ((THREAD_CALL_ONCE \| THREAD_CALL_RUNNING)
	735	== (call->tc_flags & (THREAD_CALL_ONCE \| THREAD_CALL_RUNNING))) {
	736	call->tc_pqlink.deadline = deadline;
	737
	738	thread_call_flags_t flags = call->tc_flags;
	739	call->tc_flags \|= THREAD_CALL_RESCHEDULE;
	740
	741	assert(call->tc_queue == NULL);
	742	thread_call_set_flavor(call, flavor);
	743
	744	return flags & THREAD_CALL_RESCHEDULE;
	745	}
	746
	747	queue_head_t *old_queue = thread_call_enqueue_deadline(call, group, flavor, deadline);
	748
	749	if (old_queue == &group->pending_queue) {
	750	group->pending_count--;
	751	} else if (old_queue == NULL) {
	752	call->tc_submit_count++;
	753	}
	754
	755	return old_queue != NULL;
	756	}
	757
	758	/*
	759	* _call_dequeue:
	760	*
	761	* Remove an entry from a queue.
	762	*
	763	* Returns TRUE if the entry was on a queue.
	764	*
	765	* Called with thread_call_lock held.
	766	*/
	767	static bool
	768	_call_dequeue(
	769	thread_call_t call,
	770	thread_call_group_t group)
	771	{
	772	queue_head_t *old_queue = thread_call_dequeue(call);
	773
	774	if (old_queue == NULL) {
	775	return false;
	776	}
	777
	778	call->tc_finish_count++;
	779
	780	if (old_queue == &group->pending_queue) {
	781	group->pending_count--;
	782	}
	783
	784	return true;
	785	}
	786
	787	/*
	788	* _arm_delayed_call_timer:
	789	*
	790	* Check if the timer needs to be armed for this flavor,
	791	* and if so, arm it.
	792	*
	793	* If call is non-NULL, only re-arm the timer if the specified call
	794	* is the first in the queue.
	795	*
	796	* Returns true if the timer was armed/re-armed, false if it was left unset
	797	* Caller should cancel the timer if need be.
	798	*
	799	* Called with thread_call_lock held.
	800	*/
	801	static bool
	802	_arm_delayed_call_timer(thread_call_t new_call,
	803	thread_call_group_t group,
	804	thread_call_flavor_t flavor)
	805	{
	806	/* No calls implies no timer needed */
	807	if (queue_empty(&group->delayed_queues[flavor])) {
	808	return false;
	809	}
	810
	811	thread_call_t call = priority_queue_min(&group->delayed_pqueues[flavor], struct thread_call, tc_pqlink);
	812
	813	/* We only need to change the hard timer if this new call is the first in the list */
	814	if (new_call != NULL && new_call != call) {
	815	return false;
	816	}
	817
	818	assert((call->tc_soft_deadline != 0) && ((call->tc_soft_deadline <= call->tc_pqlink.deadline)));
	819
	820	uint64_t fire_at = call->tc_soft_deadline;
	821
	822	if (flavor == TCF_CONTINUOUS) {
	823	assert(call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS);
	824	fire_at = continuoustime_to_absolutetime(fire_at);
	825	} else {
	826	assert((call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS) == 0);
	827	}
	828
	829	/*
	830	* Note: This picks the soonest-deadline call's leeway as the hard timer's leeway,
	831	* which does not take into account later-deadline timers with a larger leeway.
	832	* This is a valid coalescing behavior, but masks a possible window to
	833	* fire a timer instead of going idle.
	834	*/
	835	uint64_t leeway = call->tc_pqlink.deadline - call->tc_soft_deadline;
	836
	837	timer_call_enter_with_leeway(&group->delayed_timers[flavor], (timer_call_param_t)flavor,
	838	fire_at, leeway,
	839	TIMER_CALL_SYS_CRITICAL \| TIMER_CALL_LEEWAY,
	840	((call->tc_flags & THREAD_CALL_RATELIMITED) == THREAD_CALL_RATELIMITED));
	841
	842	return true;
	843	}
	844
	845	/*
	846	* _cancel_func_from_queue:
	847	*
	848	* Remove the first (or all) matching
	849	* entries from the specified queue.
	850	*
	851	* Returns TRUE if any matching entries
	852	* were found.
	853	*
	854	* Called with thread_call_lock held.
	855	*/
	856	static boolean_t
	857	_cancel_func_from_queue(thread_call_func_t func,
	858	thread_call_param_t param0,
	859	thread_call_group_t group,
	860	boolean_t remove_all,
	861	queue_head_t *queue)
	862	{
	863	boolean_t call_removed = FALSE;
	864	thread_call_t call;
	865
	866	qe_foreach_element_safe(call, queue, tc_qlink) {
	867	if (call->tc_func != func \|\|
	868	call->tc_param0 != param0) {
	869	continue;
	870	}
	871
	872	_call_dequeue(call, group);
	873
	874	if (_is_internal_call(call)) {
	875	_internal_call_release(call);
	876	}
	877
	878	call_removed = TRUE;
	879	if (!remove_all) {
	880	break;
	881	}
	882	}
	883
	884	return call_removed;
	885	}
	886
	887	/*
	888	* thread_call_func_delayed:
	889	*
	890	* Enqueue a function callout to
	891	* occur at the stated time.
	892	*/
	893	void
	894	thread_call_func_delayed(
	895	thread_call_func_t func,
	896	thread_call_param_t param,
	897	uint64_t deadline)
	898	{
	899	(void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, 0, 0);
	900	}
	901
	902	/*
	903	* thread_call_func_delayed_with_leeway:
	904	*
	905	* Same as thread_call_func_delayed(), but with
	906	* leeway/flags threaded through.
	907	*/
	908
	909	void
	910	thread_call_func_delayed_with_leeway(
	911	thread_call_func_t func,
	912	thread_call_param_t param,
	913	uint64_t deadline,
	914	uint64_t leeway,
	915	uint32_t flags)
	916	{
	917	(void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, leeway, flags);
	918	}
	919
	920	/*
	921	* thread_call_func_cancel:
	922	*
	923	* Dequeue a function callout.
	924	*
	925	* Removes one (or all) { function, argument }
	926	* instance(s) from either (or both)
	927	* the pending and the delayed queue,
	928	* in that order.
	929	*
	930	* Returns TRUE if any calls were cancelled.
	931	*
	932	* This iterates all of the pending or delayed thread calls in the group,
	933	* which is really inefficient. Switch to an allocated thread call instead.
	934	*
	935	* TODO: Give 'func' thread calls their own group, so this silliness doesn't
	936	* affect the main 'high' group.
	937	*/
	938	boolean_t
	939	thread_call_func_cancel(
	940	thread_call_func_t func,
	941	thread_call_param_t param,
	942	boolean_t cancel_all)
	943	{
	944	boolean_t result;
	945
	946	assert(func != NULL);
	947
	948	/* Function-only thread calls are only kept in the default HIGH group */
	949	thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
	950
	951	spl_t s = disable_ints_and_lock(group);
	952
	953	if (cancel_all) {
	954	/* exhaustively search every queue, and return true if any search found something */
	955	result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) \|
	956	_cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE]) \|
	957	_cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
	958	} else {
	959	/* early-exit as soon as we find something, don't search other queues */
	960	result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) \|\|
	961	_cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE]) \|\|
	962	_cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
	963	}
	964
	965	enable_ints_and_unlock(group, s);
	966
	967	return result;
	968	}
	969
	970	/*
	971	* Allocate a thread call with a given priority. Importances other than
	972	* THREAD_CALL_PRIORITY_HIGH or THREAD_CALL_PRIORITY_KERNEL_HIGH will be run in threads
	973	* with eager preemption enabled (i.e. may be aggressively preempted by higher-priority
	974	* threads which are not in the normal "urgent" bands).
	975	*/
	976	thread_call_t
	977	thread_call_allocate_with_priority(
	978	thread_call_func_t func,
	979	thread_call_param_t param0,
	980	thread_call_priority_t pri)
	981	{
	982	return thread_call_allocate_with_options(func, param0, pri, 0);
	983	}
	984
	985	thread_call_t
	986	thread_call_allocate_with_options(
	987	thread_call_func_t func,
	988	thread_call_param_t param0,
	989	thread_call_priority_t pri,
	990	thread_call_options_t options)
	991	{
	992	thread_call_t call = zalloc(thread_call_zone);
	993
	994	thread_call_setup_with_options(call, func, param0, pri, options);
	995	call->tc_refs = 1;
	996	call->tc_flags \|= THREAD_CALL_ALLOC;
	997
	998	return call;
	999	}
	1000
	1001	thread_call_t
	1002	thread_call_allocate_with_qos(thread_call_func_t func,
	1003	thread_call_param_t param0,
	1004	int qos_tier,
	1005	thread_call_options_t options)
	1006	{
	1007	thread_call_t call = thread_call_allocate(func, param0);
	1008
	1009	switch (qos_tier) {
	1010	case THREAD_QOS_UNSPECIFIED:
	1011	call->tc_index = THREAD_CALL_INDEX_HIGH;
	1012	break;
	1013	case THREAD_QOS_LEGACY:
	1014	call->tc_index = THREAD_CALL_INDEX_USER;
	1015	break;
	1016	case THREAD_QOS_MAINTENANCE:
	1017	case THREAD_QOS_BACKGROUND:
	1018	call->tc_index = THREAD_CALL_INDEX_LOW;
	1019	break;
	1020	case THREAD_QOS_UTILITY:
	1021	call->tc_index = THREAD_CALL_INDEX_QOS_UT;
	1022	break;
	1023	case THREAD_QOS_USER_INITIATED:
	1024	call->tc_index = THREAD_CALL_INDEX_QOS_IN;
	1025	break;
	1026	case THREAD_QOS_USER_INTERACTIVE:
	1027	call->tc_index = THREAD_CALL_INDEX_QOS_UI;
	1028	break;
	1029	default:
	1030	panic("Invalid thread call qos value: %d", qos_tier);
	1031	break;
	1032	}
	1033
	1034	if (options & THREAD_CALL_OPTIONS_ONCE) {
	1035	call->tc_flags \|= THREAD_CALL_ONCE;
	1036	}
	1037
	1038	/* does not support THREAD_CALL_OPTIONS_SIGNAL */
	1039
	1040	return call;
	1041	}
	1042
	1043
	1044	/*
	1045	* thread_call_allocate:
	1046	*
	1047	* Allocate a callout entry.
	1048	*/
	1049	thread_call_t
	1050	thread_call_allocate(
	1051	thread_call_func_t func,
	1052	thread_call_param_t param0)
	1053	{
	1054	return thread_call_allocate_with_options(func, param0,
	1055	THREAD_CALL_PRIORITY_HIGH, 0);
	1056	}
	1057
	1058	/*
	1059	* thread_call_free:
	1060	*
	1061	* Release a callout. If the callout is currently
	1062	* executing, it will be freed when all invocations
	1063	* finish.
	1064	*
	1065	* If the callout is currently armed to fire again, then
	1066	* freeing is not allowed and returns FALSE. The
	1067	* client must have canceled the pending invocation before freeing.
	1068	*/
	1069	boolean_t
	1070	thread_call_free(
	1071	thread_call_t call)
	1072	{
	1073	thread_call_group_t group = thread_call_get_group(call);
	1074
	1075	spl_t s = disable_ints_and_lock(group);
	1076
	1077	if (call->tc_queue != NULL \|\|
	1078	((call->tc_flags & THREAD_CALL_RESCHEDULE) != 0)) {
	1079	thread_call_unlock(group);
	1080	splx(s);
	1081
	1082	return FALSE;
	1083	}
	1084
	1085	int32_t refs = --call->tc_refs;
	1086	if (refs < 0) {
	1087	panic("Refcount negative: %d\n", refs);
	1088	}
	1089
	1090	if ((THREAD_CALL_SIGNAL \| THREAD_CALL_RUNNING)
	1091	== ((THREAD_CALL_SIGNAL \| THREAD_CALL_RUNNING) & call->tc_flags)) {
	1092	thread_call_wait_once_locked(call, s);
	1093	/* thread call lock has been unlocked */
	1094	} else {
	1095	enable_ints_and_unlock(group, s);
	1096	}
	1097
	1098	if (refs == 0) {
	1099	assert(call->tc_finish_count == call->tc_submit_count);
	1100	zfree(thread_call_zone, call);
	1101	}
	1102
	1103	return TRUE;
	1104	}
	1105
	1106	/*
	1107	* thread_call_enter:
	1108	*
	1109	* Enqueue a callout entry to occur "soon".
	1110	*
	1111	* Returns TRUE if the call was
	1112	* already on a queue.
	1113	*/
	1114	boolean_t
	1115	thread_call_enter(
	1116	thread_call_t call)
	1117	{
	1118	return thread_call_enter1(call, 0);
	1119	}
	1120
	1121	boolean_t
	1122	thread_call_enter1(
	1123	thread_call_t call,
	1124	thread_call_param_t param1)
	1125	{
	1126	assert(call->tc_func != NULL);
	1127	assert((call->tc_flags & THREAD_CALL_SIGNAL) == 0);
	1128
	1129	thread_call_group_t group = thread_call_get_group(call);
	1130	bool result = true;
	1131
	1132	spl_t s = disable_ints_and_lock(group);
	1133
	1134	if (call->tc_queue != &group->pending_queue) {
	1135	result = _pending_call_enqueue(call, group, mach_absolute_time());
	1136	}
	1137
	1138	call->tc_param1 = param1;
	1139
	1140	enable_ints_and_unlock(group, s);
	1141
	1142	return result;
	1143	}
	1144
	1145	/*
	1146	* thread_call_enter_delayed:
	1147	*
	1148	* Enqueue a callout entry to occur
	1149	* at the stated time.
	1150	*
	1151	* Returns TRUE if the call was
	1152	* already on a queue.
	1153	*/
	1154	boolean_t
	1155	thread_call_enter_delayed(
	1156	thread_call_t call,
	1157	uint64_t deadline)
	1158	{
	1159	assert(call != NULL);
	1160	return thread_call_enter_delayed_internal(call, NULL, 0, 0, deadline, 0, 0);
	1161	}
	1162
	1163	boolean_t
	1164	thread_call_enter1_delayed(
	1165	thread_call_t call,
	1166	thread_call_param_t param1,
	1167	uint64_t deadline)
	1168	{
	1169	assert(call != NULL);
	1170	return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, 0, 0);
	1171	}
	1172
	1173	boolean_t
	1174	thread_call_enter_delayed_with_leeway(
	1175	thread_call_t call,
	1176	thread_call_param_t param1,
	1177	uint64_t deadline,
	1178	uint64_t leeway,
	1179	unsigned int flags)
	1180	{
	1181	assert(call != NULL);
	1182	return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, leeway, flags);
	1183	}
	1184
	1185
	1186	/*
	1187	* thread_call_enter_delayed_internal:
	1188	* enqueue a callout entry to occur at the stated time
	1189	*
	1190	* Returns True if the call was already on a queue
	1191	* params:
	1192	* call - structure encapsulating state of the callout
	1193	* alt_func/alt_param0 - if call is NULL, allocate temporary storage using these parameters
	1194	* deadline - time deadline in nanoseconds
	1195	* leeway - timer slack represented as delta of deadline.
	1196	* flags - THREAD_CALL_DELAY_XXX : classification of caller's desires wrt timer coalescing.
	1197	* THREAD_CALL_DELAY_LEEWAY : value in leeway is used for timer coalescing.
	1198	* THREAD_CALL_CONTINUOUS: thread call will be called according to mach_continuous_time rather
	1199	* than mach_absolute_time
	1200	*/
	1201	boolean_t
	1202	thread_call_enter_delayed_internal(
	1203	thread_call_t call,
	1204	thread_call_func_t alt_func,
	1205	thread_call_param_t alt_param0,
	1206	thread_call_param_t param1,
	1207	uint64_t deadline,
	1208	uint64_t leeway,
	1209	unsigned int flags)
	1210	{
	1211	uint64_t now, sdeadline;
	1212
	1213	thread_call_flavor_t flavor = (flags & THREAD_CALL_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
	1214
	1215	/* direct mapping between thread_call, timer_call, and timeout_urgency values */
	1216	uint32_t urgency = (flags & TIMEOUT_URGENCY_MASK);
	1217
	1218	if (call == NULL) {
	1219	/* allocate a structure out of internal storage, as a convenience for BSD callers */
	1220	call = _internal_call_allocate(alt_func, alt_param0);
	1221	}
	1222
	1223	assert(call->tc_func != NULL);
	1224	thread_call_group_t group = thread_call_get_group(call);
	1225
	1226	spl_t s = disable_ints_and_lock(group);
	1227
	1228	/*
	1229	* kevent and IOTES let you change flavor for an existing timer, so we have to
	1230	* support flipping flavors for enqueued thread calls.
	1231	*/
	1232	if (flavor == TCF_CONTINUOUS) {
	1233	now = mach_continuous_time();
	1234	} else {
	1235	now = mach_absolute_time();
	1236	}
	1237
	1238	call->tc_flags \|= THREAD_CALL_DELAYED;
	1239
	1240	call->tc_soft_deadline = sdeadline = deadline;
	1241
	1242	boolean_t ratelimited = FALSE;
	1243	uint64_t slop = timer_call_slop(deadline, now, urgency, current_thread(), &ratelimited);
	1244
	1245	if ((flags & THREAD_CALL_DELAY_LEEWAY) != 0 && leeway > slop) {
	1246	slop = leeway;
	1247	}
	1248
	1249	if (UINT64_MAX - deadline <= slop) {
	1250	deadline = UINT64_MAX;
	1251	} else {
	1252	deadline += slop;
	1253	}
	1254
	1255	if (ratelimited) {
	1256	call->tc_flags \|= THREAD_CALL_RATELIMITED;
	1257	} else {
	1258	call->tc_flags &= ~THREAD_CALL_RATELIMITED;
	1259	}
	1260
	1261	call->tc_param1 = param1;
	1262
	1263	call->tc_ttd = (sdeadline > now) ? (sdeadline - now) : 0;
	1264
	1265	bool result = _delayed_call_enqueue(call, group, deadline, flavor);
	1266
	1267	_arm_delayed_call_timer(call, group, flavor);
	1268
	1269	#if CONFIG_DTRACE
	1270	DTRACE_TMR5(thread_callout__create, thread_call_func_t, call->tc_func,
	1271	uint64_t, (deadline - sdeadline), uint64_t, (call->tc_ttd >> 32),
	1272	(unsigned) (call->tc_ttd & 0xFFFFFFFF), call);
	1273	#endif
	1274
	1275	enable_ints_and_unlock(group, s);
	1276
	1277	return result;
	1278	}
	1279
	1280	/*
	1281	* Remove a callout entry from the queue
	1282	* Called with thread_call_lock held
	1283	*/
	1284	static bool
	1285	thread_call_cancel_locked(thread_call_t call)
	1286	{
	1287	bool canceled;
	1288
	1289	if (call->tc_flags & THREAD_CALL_RESCHEDULE) {
	1290	call->tc_flags &= ~THREAD_CALL_RESCHEDULE;
	1291	canceled = true;
	1292
	1293	/* if reschedule was set, it must not have been queued */
	1294	assert(call->tc_queue == NULL);
	1295	} else {
	1296	bool queue_head_changed = false;
	1297
	1298	thread_call_flavor_t flavor = thread_call_get_flavor(call);
	1299	thread_call_group_t group = thread_call_get_group(call);
	1300
	1301	if (call->tc_pqlink.deadline != 0 &&
	1302	call == priority_queue_min(&group->delayed_pqueues[flavor], struct thread_call, tc_pqlink)) {
	1303	assert(call->tc_queue == &group->delayed_queues[flavor]);
	1304	queue_head_changed = true;
	1305	}
	1306
	1307	canceled = _call_dequeue(call, group);
	1308
	1309	if (queue_head_changed) {
	1310	if (_arm_delayed_call_timer(NULL, group, flavor) == false) {
	1311	timer_call_cancel(&group->delayed_timers[flavor]);
	1312	}
	1313	}
	1314	}
	1315
	1316	#if CONFIG_DTRACE
	1317	DTRACE_TMR4(thread_callout__cancel, thread_call_func_t, call->tc_func,
	1318	0, (call->tc_ttd >> 32), (unsigned) (call->tc_ttd & 0xFFFFFFFF));
	1319	#endif
	1320
	1321	return canceled;
	1322	}
	1323
	1324	/*
	1325	* thread_call_cancel:
	1326	*
	1327	* Dequeue a callout entry.
	1328	*
	1329	* Returns TRUE if the call was
	1330	* on a queue.
	1331	*/
	1332	boolean_t
	1333	thread_call_cancel(thread_call_t call)
	1334	{
	1335	thread_call_group_t group = thread_call_get_group(call);
	1336
	1337	spl_t s = disable_ints_and_lock(group);
	1338
	1339	boolean_t result = thread_call_cancel_locked(call);
	1340
	1341	enable_ints_and_unlock(group, s);
	1342
	1343	return result;
	1344	}
	1345
	1346	/*
	1347	* Cancel a thread call. If it cannot be cancelled (i.e.
	1348	* is already in flight), waits for the most recent invocation
	1349	* to finish. Note that if clients re-submit this thread call,
	1350	* it may still be pending or in flight when thread_call_cancel_wait
	1351	* returns, but all requests to execute this work item prior
	1352	* to the call to thread_call_cancel_wait will have finished.
	1353	*/
	1354	boolean_t
	1355	thread_call_cancel_wait(thread_call_t call)
	1356	{
	1357	thread_call_group_t group = thread_call_get_group(call);
	1358
	1359	if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
	1360	panic("thread_call_cancel_wait: can't wait on thread call whose storage I don't own");
	1361	}
	1362
	1363	if (!ml_get_interrupts_enabled()) {
	1364	panic("unsafe thread_call_cancel_wait");
	1365	}
	1366
	1367	thread_t self = current_thread();
	1368
	1369	if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) &&
	1370	self->thc_state && self->thc_state->thc_call == call) {
	1371	panic("thread_call_cancel_wait: deadlock waiting on self from inside call: %p to function %p",
	1372	call, call->tc_func);
	1373	}
	1374
	1375	spl_t s = disable_ints_and_lock(group);
	1376
	1377	boolean_t canceled = thread_call_cancel_locked(call);
	1378
	1379	if ((call->tc_flags & THREAD_CALL_ONCE) == THREAD_CALL_ONCE) {
	1380	/*
	1381	* A cancel-wait on a 'once' call will both cancel
	1382	* the pending call and wait for the in-flight call
	1383	*/
	1384
	1385	thread_call_wait_once_locked(call, s);
	1386	/* thread call lock unlocked */
	1387	} else {
	1388	/*
	1389	* A cancel-wait on a normal call will only wait for the in-flight calls
	1390	* if it did not cancel the pending call.
	1391	*
	1392	* TODO: This seems less than useful - shouldn't it do the wait as well?
	1393	*/
	1394
	1395	if (canceled == FALSE) {
	1396	thread_call_wait_locked(call, s);
	1397	/* thread call lock unlocked */
	1398	} else {
	1399	enable_ints_and_unlock(group, s);
	1400	}
	1401	}
	1402
	1403	return canceled;
	1404	}
	1405
	1406
	1407	/*
	1408	* thread_call_wake:
	1409	*
	1410	* Wake a call thread to service
	1411	* pending call entries. May wake
	1412	* the daemon thread in order to
	1413	* create additional call threads.
	1414	*
	1415	* Called with thread_call_lock held.
	1416	*
	1417	* For high-priority group, only does wakeup/creation if there are no threads
	1418	* running.
	1419	*/
	1420	static void
	1421	thread_call_wake(
	1422	thread_call_group_t group)
	1423	{
	1424	/*
	1425	* New behavior: use threads if you've got 'em.
	1426	* Traditional behavior: wake only if no threads running.
	1427	*/
	1428	if (group_isparallel(group) \|\| group->active_count == 0) {
	1429	if (group->idle_count) {
	1430	__assert_only kern_return_t kr;
	1431
	1432	kr = waitq_wakeup64_one(&group->idle_waitq, CAST_EVENT64_T(group),
	1433	THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
	1434	assert(kr == KERN_SUCCESS);
	1435
	1436	group->idle_count--;
	1437	group->active_count++;
	1438
	1439	if (group->idle_count == 0 && (group->tcg_flags & TCG_DEALLOC_ACTIVE) == TCG_DEALLOC_ACTIVE) {
	1440	if (timer_call_cancel(&group->dealloc_timer) == TRUE) {
	1441	group->tcg_flags &= ~TCG_DEALLOC_ACTIVE;
	1442	}
	1443	}
	1444	} else {
	1445	if (thread_call_group_should_add_thread(group) &&
	1446	os_atomic_cmpxchg(&thread_call_daemon_awake,
	1447	false, true, relaxed)) {
	1448	waitq_wakeup64_all(&daemon_waitq, CAST_EVENT64_T(&thread_call_daemon_awake),
	1449	THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
	1450	}
	1451	}
	1452	}
	1453	}
	1454
	1455	/*
	1456	* sched_call_thread:
	1457	*
	1458	* Call out invoked by the scheduler.
	1459	*/
	1460	static void
	1461	sched_call_thread(
	1462	int type,
	1463	thread_t thread)
	1464	{
	1465	thread_call_group_t group;
	1466
	1467	assert(thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT);
	1468	assert(thread->thc_state != NULL);
	1469
	1470	group = thread->thc_state->thc_group;
	1471	assert((group - &thread_call_groups[0]) < THREAD_CALL_INDEX_MAX);
	1472
	1473	thread_call_lock_spin(group);
	1474
	1475	switch (type) {
	1476	case SCHED_CALL_BLOCK:
	1477	assert(group->active_count);
	1478	--group->active_count;
	1479	group->blocked_count++;
	1480	if (group->pending_count > 0) {
	1481	thread_call_wake(group);
	1482	}
	1483	break;
	1484
	1485	case SCHED_CALL_UNBLOCK:
	1486	assert(group->blocked_count);
	1487	--group->blocked_count;
	1488	group->active_count++;
	1489	break;
	1490	}
	1491
	1492	thread_call_unlock(group);
	1493	}
	1494
	1495	/*
	1496	* Interrupts disabled, lock held; returns the same way.
	1497	* Only called on thread calls whose storage we own. Wakes up
	1498	* anyone who might be waiting on this work item and frees it
	1499	* if the client has so requested.
	1500	*/
	1501	static bool
	1502	thread_call_finish(thread_call_t call, thread_call_group_t group, spl_t *s)
	1503	{
	1504	assert(thread_call_get_group(call) == group);
	1505
	1506	bool repend = false;
	1507	bool signal = call->tc_flags & THREAD_CALL_SIGNAL;
	1508	bool alloc = call->tc_flags & THREAD_CALL_ALLOC;
	1509
	1510	call->tc_finish_count++;
	1511
	1512	if (!signal && alloc) {
	1513	/* The thread call thread owns a ref until the call is finished */
	1514	if (call->tc_refs <= 0) {
	1515	panic("thread_call_finish: detected over-released thread call: %p", call);
	1516	}
	1517	call->tc_refs--;
	1518	}
	1519
	1520	thread_call_flags_t old_flags = call->tc_flags;
	1521	call->tc_flags &= ~(THREAD_CALL_RESCHEDULE \| THREAD_CALL_RUNNING \| THREAD_CALL_WAIT);
	1522
	1523	if ((!alloc \|\| call->tc_refs != 0) &&
	1524	(old_flags & THREAD_CALL_RESCHEDULE) != 0) {
	1525	assert(old_flags & THREAD_CALL_ONCE);
	1526	thread_call_flavor_t flavor = thread_call_get_flavor(call);
	1527
	1528	if (old_flags & THREAD_CALL_DELAYED) {
	1529	uint64_t now = mach_absolute_time();
	1530	if (flavor == TCF_CONTINUOUS) {
	1531	now = absolutetime_to_continuoustime(now);
	1532	}
	1533	if (call->tc_soft_deadline <= now) {
	1534	/* The deadline has already expired, go straight to pending */
	1535	call->tc_flags &= ~(THREAD_CALL_DELAYED \| THREAD_CALL_RATELIMITED);
	1536	call->tc_pqlink.deadline = 0;
	1537	}
	1538	}
	1539
	1540	if (call->tc_pqlink.deadline) {
	1541	_delayed_call_enqueue(call, group, call->tc_pqlink.deadline, flavor);
	1542	if (!signal) {
	1543	_arm_delayed_call_timer(call, group, flavor);
	1544	}
	1545	} else if (signal) {
	1546	call->tc_submit_count++;
	1547	repend = true;
	1548	} else {
	1549	_pending_call_enqueue(call, group, mach_absolute_time());
	1550	}
	1551	}
	1552
	1553	if (!signal && alloc && call->tc_refs == 0) {
	1554	if ((old_flags & THREAD_CALL_WAIT) != 0) {
	1555	panic("Someone waiting on a thread call that is scheduled for free: %p\n", call->tc_func);
	1556	}
	1557
	1558	assert(call->tc_finish_count == call->tc_submit_count);
	1559
	1560	enable_ints_and_unlock(group, *s);
	1561
	1562	zfree(thread_call_zone, call);
	1563
	1564	*s = disable_ints_and_lock(group);
	1565	}
	1566
	1567	if ((old_flags & THREAD_CALL_WAIT) != 0) {
	1568	/*
	1569	* This may wake up a thread with a registered sched_call.
	1570	* That call might need the group lock, so we drop the lock
	1571	* to avoid deadlocking.
	1572	*
	1573	* We also must use a separate waitq from the idle waitq, as
	1574	* this path goes waitq lock->thread lock->group lock, but
	1575	* the idle wait goes group lock->waitq_lock->thread_lock.
	1576	*/
	1577	thread_call_unlock(group);
	1578
	1579	waitq_wakeup64_all(&group->waiters_waitq, CAST_EVENT64_T(call),
	1580	THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
	1581
	1582	thread_call_lock_spin(group);
	1583	/* THREAD_CALL_SIGNAL call may have been freed */
	1584	}
	1585
	1586	return repend;
	1587	}
	1588
	1589	/*
	1590	* thread_call_invoke
	1591	*
	1592	* Invoke the function provided for this thread call
	1593	*
	1594	* Note that the thread call object can be deallocated by the function if we do not control its storage.
	1595	*/
	1596	static void __attribute__((noinline))
	1597	thread_call_invoke(thread_call_func_t func,
	1598	thread_call_param_t param0,
	1599	thread_call_param_t param1,
	1600	__unused thread_call_t call)
	1601	{
	1602	#if DEVELOPMENT \|\| DEBUG
	1603	KERNEL_DEBUG_CONSTANT(
	1604	MACHDBG_CODE(DBG_MACH_SCHED, MACH_CALLOUT) \| DBG_FUNC_START,
	1605	VM_KERNEL_UNSLIDE(func), VM_KERNEL_ADDRHIDE(param0), VM_KERNEL_ADDRHIDE(param1), 0, 0);
	1606	#endif /* DEVELOPMENT \|\| DEBUG */
	1607
	1608	#if CONFIG_DTRACE
	1609	uint64_t tc_ttd = call->tc_ttd;
	1610	boolean_t is_delayed = call->tc_flags & THREAD_CALL_DELAYED;
	1611	DTRACE_TMR6(thread_callout__start, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
	1612	(unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
	1613	#endif
	1614
	1615	(*func)(param0, param1);
	1616
	1617	#if CONFIG_DTRACE
	1618	DTRACE_TMR6(thread_callout__end, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
	1619	(unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
	1620	#endif
	1621
	1622	#if DEVELOPMENT \|\| DEBUG
	1623	KERNEL_DEBUG_CONSTANT(
	1624	MACHDBG_CODE(DBG_MACH_SCHED, MACH_CALLOUT) \| DBG_FUNC_END,
	1625	VM_KERNEL_UNSLIDE(func), 0, 0, 0, 0);
	1626	#endif /* DEVELOPMENT \|\| DEBUG */
	1627	}
	1628
	1629	/*
	1630	* thread_call_thread:
	1631	*/
	1632	static void
	1633	thread_call_thread(
	1634	thread_call_group_t group,
	1635	wait_result_t wres)
	1636	{
	1637	thread_t self = current_thread();
	1638
	1639	if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0) {
	1640	(void)thread_set_tag_internal(self, THREAD_TAG_CALLOUT);
	1641	}
	1642
	1643	/*
	1644	* A wakeup with THREAD_INTERRUPTED indicates that
	1645	* we should terminate.
	1646	*/
	1647	if (wres == THREAD_INTERRUPTED) {
	1648	thread_terminate(self);
	1649
	1650	/* NOTREACHED */
	1651	panic("thread_terminate() returned?");
	1652	}
	1653
	1654	spl_t s = disable_ints_and_lock(group);
	1655
	1656	struct thread_call_thread_state thc_state = { .thc_group = group };
	1657	self->thc_state = &thc_state;
	1658
	1659	thread_sched_call(self, sched_call_thread);
	1660
	1661	while (group->pending_count > 0) {
	1662	thread_call_t call = qe_dequeue_head(&group->pending_queue,
	1663	struct thread_call, tc_qlink);
	1664	assert(call != NULL);
	1665
	1666	group->pending_count--;
	1667	if (group->pending_count == 0) {
	1668	assert(queue_empty(&group->pending_queue));
	1669	}
	1670
	1671	thread_call_func_t func = call->tc_func;
	1672	thread_call_param_t param0 = call->tc_param0;
	1673	thread_call_param_t param1 = call->tc_param1;
	1674
	1675	call->tc_queue = NULL;
	1676
	1677	if (_is_internal_call(call)) {
	1678	_internal_call_release(call);
	1679	}
	1680
	1681	/*
	1682	* Can only do wakeups for thread calls whose storage
	1683	* we control.
	1684	*/
	1685	bool needs_finish = false;
	1686	if (call->tc_flags & THREAD_CALL_ALLOC) {
	1687	call->tc_refs++; /* Delay free until we're done */
	1688	}
	1689	if (call->tc_flags & (THREAD_CALL_ALLOC \| THREAD_CALL_ONCE)) {
	1690	/*
	1691	* If THREAD_CALL_ONCE is used, and the timer wasn't
	1692	* THREAD_CALL_ALLOC, then clients swear they will use
	1693	* thread_call_cancel_wait() before destroying
	1694	* the thread call.
	1695	*
	1696	* Else, the storage for the thread call might have
	1697	* disappeared when thread_call_invoke() ran.
	1698	*/
	1699	needs_finish = true;
	1700	call->tc_flags \|= THREAD_CALL_RUNNING;
	1701	}
	1702
	1703	thc_state.thc_call = call;
	1704	thc_state.thc_call_pending_timestamp = call->tc_pending_timestamp;
	1705	thc_state.thc_call_soft_deadline = call->tc_soft_deadline;
	1706	thc_state.thc_call_hard_deadline = call->tc_pqlink.deadline;
	1707	thc_state.thc_func = func;
	1708	thc_state.thc_param0 = param0;
	1709	thc_state.thc_param1 = param1;
	1710	thc_state.thc_IOTES_invocation_timestamp = 0;
	1711
	1712	enable_ints_and_unlock(group, s);
	1713
	1714	thc_state.thc_call_start = mach_absolute_time();
	1715
	1716	thread_call_invoke(func, param0, param1, call);
	1717
	1718	thc_state.thc_call = NULL;
	1719
	1720	if (get_preemption_level() != 0) {
	1721	int pl = get_preemption_level();
	1722	panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)",
	1723	pl, (void *)VM_KERNEL_UNSLIDE(func), param0, param1);
	1724	}
	1725
	1726	s = disable_ints_and_lock(group);
	1727
	1728	if (needs_finish) {
	1729	/* Release refcount, may free, may temporarily drop lock */
	1730	thread_call_finish(call, group, &s);
	1731	}
	1732	}
	1733
	1734	thread_sched_call(self, NULL);
	1735	group->active_count--;
	1736
	1737	if (self->callout_woken_from_icontext && !self->callout_woke_thread) {
	1738	ledger_credit(self->t_ledger, task_ledgers.interrupt_wakeups, 1);
	1739	if (self->callout_woken_from_platform_idle) {
	1740	ledger_credit(self->t_ledger, task_ledgers.platform_idle_wakeups, 1);
	1741	}
	1742	}
	1743
	1744	self->callout_woken_from_icontext = FALSE;
	1745	self->callout_woken_from_platform_idle = FALSE;
	1746	self->callout_woke_thread = FALSE;
	1747
	1748	self->thc_state = NULL;
	1749
	1750	if (group_isparallel(group)) {
	1751	/*
	1752	* For new style of thread group, thread always blocks.
	1753	* If we have more than the target number of threads,
	1754	* and this is the first to block, and it isn't active
	1755	* already, set a timer for deallocating a thread if we
	1756	* continue to have a surplus.
	1757	*/
	1758	group->idle_count++;
	1759
	1760	if (group->idle_count == 1) {
	1761	group->idle_timestamp = mach_absolute_time();
	1762	}
	1763
	1764	if (((group->tcg_flags & TCG_DEALLOC_ACTIVE) == 0) &&
	1765	((group->active_count + group->idle_count) > group->target_thread_count)) {
	1766	thread_call_start_deallocate_timer(group);
	1767	}
	1768
	1769	/* Wait for more work (or termination) */
	1770	wres = waitq_assert_wait64(&group->idle_waitq, CAST_EVENT64_T(group), THREAD_INTERRUPTIBLE, 0);
	1771	if (wres != THREAD_WAITING) {
	1772	panic("kcall worker unable to assert wait?");
	1773	}
	1774
	1775	enable_ints_and_unlock(group, s);
	1776
	1777	thread_block_parameter((thread_continue_t)thread_call_thread, group);
	1778	} else {
	1779	if (group->idle_count < group->target_thread_count) {
	1780	group->idle_count++;
	1781
	1782	waitq_assert_wait64(&group->idle_waitq, CAST_EVENT64_T(group), THREAD_UNINT, 0); /* Interrupted means to exit */
	1783
	1784	enable_ints_and_unlock(group, s);
	1785
	1786	thread_block_parameter((thread_continue_t)thread_call_thread, group);
	1787	/* NOTREACHED */
	1788	}
	1789	}
	1790
	1791	enable_ints_and_unlock(group, s);
	1792
	1793	thread_terminate(self);
	1794	/* NOTREACHED */
	1795	}
	1796
	1797	void
	1798	thread_call_start_iotes_invocation(__assert_only thread_call_t call)
	1799	{
	1800	thread_t self = current_thread();
	1801
	1802	if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0) {
	1803	/* not a thread call thread, might be a workloop IOTES */
	1804	return;
	1805	}
	1806
	1807	assert(self->thc_state);
	1808	assert(self->thc_state->thc_call == call);
	1809
	1810	self->thc_state->thc_IOTES_invocation_timestamp = mach_absolute_time();
	1811	}
	1812
	1813
	1814	/*
	1815	* thread_call_daemon: walk list of groups, allocating
	1816	* threads if appropriate (as determined by
	1817	* thread_call_group_should_add_thread()).
	1818	*/
	1819	static void
	1820	thread_call_daemon_continue(__unused void *arg)
	1821	{
	1822	do {
	1823	os_atomic_store(&thread_call_daemon_awake, false, relaxed);
	1824
	1825	/* Starting at zero happens to be high-priority first. */
	1826	for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
	1827	thread_call_group_t group = &thread_call_groups[i];
	1828
	1829	spl_t s = disable_ints_and_lock(group);
	1830
	1831	while (thread_call_group_should_add_thread(group)) {
	1832	group->active_count++;
	1833
	1834	enable_ints_and_unlock(group, s);
	1835
	1836	thread_call_thread_create(group);
	1837
	1838	s = disable_ints_and_lock(group);
	1839	}
	1840
	1841	enable_ints_and_unlock(group, s);
	1842	}
	1843	} while (os_atomic_load(&thread_call_daemon_awake, relaxed));
	1844
	1845	waitq_assert_wait64(&daemon_waitq, CAST_EVENT64_T(&thread_call_daemon_awake), THREAD_UNINT, 0);
	1846
	1847	if (os_atomic_load(&thread_call_daemon_awake, relaxed)) {
	1848	clear_wait(current_thread(), THREAD_AWAKENED);
	1849	}
	1850
	1851	thread_block_parameter((thread_continue_t)thread_call_daemon_continue, NULL);
	1852	/* NOTREACHED */
	1853	}
	1854
	1855	static void
	1856	thread_call_daemon(
	1857	__unused void *arg)
	1858	{
	1859	thread_t self = current_thread();
	1860
	1861	self->options \|= TH_OPT_VMPRIV;
	1862	vm_page_free_reserve(2); /* XXX */
	1863
	1864	thread_set_thread_name(self, "thread_call_daemon");
	1865
	1866	thread_call_daemon_continue(NULL);
	1867	/* NOTREACHED */
	1868	}
	1869
	1870	/*
	1871	* Schedule timer to deallocate a worker thread if we have a surplus
	1872	* of threads (in excess of the group's target) and at least one thread
	1873	* is idle the whole time.
	1874	*/
	1875	static void
	1876	thread_call_start_deallocate_timer(thread_call_group_t group)
	1877	{
	1878	__assert_only bool already_enqueued;
	1879
	1880	assert(group->idle_count > 0);
	1881	assert((group->tcg_flags & TCG_DEALLOC_ACTIVE) == 0);
	1882
	1883	group->tcg_flags \|= TCG_DEALLOC_ACTIVE;
	1884
	1885	uint64_t deadline = group->idle_timestamp + thread_call_dealloc_interval_abs;
	1886
	1887	already_enqueued = timer_call_enter(&group->dealloc_timer, deadline, 0);
	1888
	1889	assert(already_enqueued == false);
	1890	}
	1891
	1892	/* non-static so dtrace can find it rdar://problem/31156135&31379348 */
	1893	void
	1894	thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1)
	1895	{
	1896	thread_call_group_t group = (thread_call_group_t) p0;
	1897	thread_call_flavor_t flavor = (thread_call_flavor_t) p1;
	1898
	1899	thread_call_t call;
	1900	uint64_t now;
	1901
	1902	thread_call_lock_spin(group);
	1903
	1904	if (flavor == TCF_CONTINUOUS) {
	1905	now = mach_continuous_time();
	1906	} else if (flavor == TCF_ABSOLUTE) {
	1907	now = mach_absolute_time();
	1908	} else {
	1909	panic("invalid timer flavor: %d", flavor);
	1910	}
	1911
	1912	while ((call = priority_queue_min(&group->delayed_pqueues[flavor],
	1913	struct thread_call, tc_pqlink)) != NULL) {
	1914	assert(thread_call_get_group(call) == group);
	1915	assert(thread_call_get_flavor(call) == flavor);
	1916
	1917	/*
	1918	* if we hit a call that isn't yet ready to expire,
	1919	* then we're done for now
	1920	* TODO: The next timer in the list could have a larger leeway
	1921	* and therefore be ready to expire.
	1922	*/
	1923	if (call->tc_soft_deadline > now) {
	1924	break;
	1925	}
	1926
	1927	/*
	1928	* If we hit a rate-limited timer, don't eagerly wake it up.
	1929	* Wait until it reaches the end of the leeway window.
	1930	*
	1931	* TODO: What if the next timer is not rate-limited?
	1932	* Have a separate rate-limited queue to avoid this
	1933	*/
	1934	if ((call->tc_flags & THREAD_CALL_RATELIMITED) &&
	1935	(call->tc_pqlink.deadline > now) &&
	1936	(ml_timer_forced_evaluation() == FALSE)) {
	1937	break;
	1938	}
	1939
	1940	if (THREAD_CALL_SIGNAL & call->tc_flags) {
	1941	__assert_only queue_head_t *old_queue;
	1942	old_queue = thread_call_dequeue(call);
	1943	assert(old_queue == &group->delayed_queues[flavor]);
	1944
	1945	do {
	1946	thread_call_func_t func = call->tc_func;
	1947	thread_call_param_t param0 = call->tc_param0;
	1948	thread_call_param_t param1 = call->tc_param1;
	1949
	1950	call->tc_flags \|= THREAD_CALL_RUNNING;
	1951
	1952	thread_call_unlock(group);
	1953	thread_call_invoke(func, param0, param1, call);
	1954	thread_call_lock_spin(group);
	1955
	1956	/* finish may detect that the call has been re-pended */
	1957	} while (thread_call_finish(call, group, NULL));
	1958	/* call may have been freed by the finish */
	1959	} else {
	1960	_pending_call_enqueue(call, group, now);
	1961	}
	1962	}
	1963
	1964	_arm_delayed_call_timer(call, group, flavor);
	1965
	1966	thread_call_unlock(group);
	1967	}
	1968
	1969	static void
	1970	thread_call_delayed_timer_rescan(thread_call_group_t group,
	1971	thread_call_flavor_t flavor)
	1972	{
	1973	thread_call_t call;
	1974	uint64_t now;
	1975
	1976	spl_t s = disable_ints_and_lock(group);
	1977
	1978	assert(ml_timer_forced_evaluation() == TRUE);
	1979
	1980	if (flavor == TCF_CONTINUOUS) {
	1981	now = mach_continuous_time();
	1982	} else {
	1983	now = mach_absolute_time();
	1984	}
	1985
	1986	qe_foreach_element_safe(call, &group->delayed_queues[flavor], tc_qlink) {
	1987	if (call->tc_soft_deadline <= now) {
	1988	_pending_call_enqueue(call, group, now);
	1989	} else {
	1990	uint64_t skew = call->tc_pqlink.deadline - call->tc_soft_deadline;
	1991	assert(call->tc_pqlink.deadline >= call->tc_soft_deadline);
	1992	/*
	1993	* On a latency quality-of-service level change,
	1994	* re-sort potentially rate-limited callout. The platform
	1995	* layer determines which timers require this.
	1996	*
	1997	* This trick works by updating the deadline value to
	1998	* equal soft-deadline, effectively crushing away
	1999	* timer coalescing slop values for any armed
	2000	* timer in the queue.
	2001	*
	2002	* TODO: keep a hint on the timer to tell whether its inputs changed, so we
	2003	* only have to crush coalescing for timers that need it.
	2004	*
	2005	* TODO: Keep a separate queue of timers above the re-sort
	2006	* threshold, so we only have to look at those.
	2007	*/
	2008	if (timer_resort_threshold(skew)) {
	2009	_call_dequeue(call, group);
	2010	_delayed_call_enqueue(call, group, call->tc_soft_deadline, flavor);
	2011	}
	2012	}
	2013	}
	2014
	2015	_arm_delayed_call_timer(NULL, group, flavor);
	2016
	2017	enable_ints_and_unlock(group, s);
	2018	}
	2019
	2020	void
	2021	thread_call_delayed_timer_rescan_all(void)
	2022	{
	2023	for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
	2024	for (thread_call_flavor_t flavor = 0; flavor < TCF_COUNT; flavor++) {
	2025	thread_call_delayed_timer_rescan(&thread_call_groups[i], flavor);
	2026	}
	2027	}
	2028	}
	2029
	2030	/*
	2031	* Timer callback to tell a thread to terminate if
	2032	* we have an excess of threads and at least one has been
	2033	* idle for a long time.
	2034	*/
	2035	static void
	2036	thread_call_dealloc_timer(
	2037	timer_call_param_t p0,
	2038	__unused timer_call_param_t p1)
	2039	{
	2040	thread_call_group_t group = (thread_call_group_t)p0;
	2041	uint64_t now;
	2042	kern_return_t res;
	2043	bool terminated = false;
	2044
	2045	thread_call_lock_spin(group);
	2046
	2047	assert(group->tcg_flags & TCG_DEALLOC_ACTIVE);
	2048
	2049	now = mach_absolute_time();
	2050
	2051	if (group->idle_count > 0) {
	2052	if (now > group->idle_timestamp + thread_call_dealloc_interval_abs) {
	2053	terminated = true;
	2054	group->idle_count--;
	2055	res = waitq_wakeup64_one(&group->idle_waitq, CAST_EVENT64_T(group),
	2056	THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES);
	2057	if (res != KERN_SUCCESS) {
	2058	panic("Unable to wake up idle thread for termination?");
	2059	}
	2060	}
	2061	}
	2062
	2063	group->tcg_flags &= ~TCG_DEALLOC_ACTIVE;
	2064
	2065	/*
	2066	* If we still have an excess of threads, schedule another
	2067	* invocation of this function.
	2068	*/
	2069	if (group->idle_count > 0 && (group->idle_count + group->active_count > group->target_thread_count)) {
	2070	/*
	2071	* If we killed someone just now, push out the
	2072	* next deadline.
	2073	*/
	2074	if (terminated) {
	2075	group->idle_timestamp = now;
	2076	}
	2077
	2078	thread_call_start_deallocate_timer(group);
	2079	}
	2080
	2081	thread_call_unlock(group);
	2082	}
	2083
	2084	/*
	2085	* Wait for the invocation of the thread call to complete
	2086	* We know there's only one in flight because of the 'once' flag.
	2087	*
	2088	* If a subsequent invocation comes in before we wake up, that's OK
	2089	*
	2090	* TODO: Here is where we will add priority inheritance to the thread executing
	2091	* the thread call in case it's lower priority than the current thread
	2092	* <rdar://problem/30321792> Priority inheritance for thread_call_wait_once
	2093	*
	2094	* Takes the thread call lock locked, returns unlocked
	2095	* This lets us avoid a spurious take/drop after waking up from thread_block
	2096	*
	2097	* This thread could be a thread call thread itself, blocking and therefore making a
	2098	* sched_call upcall into the thread call subsystem, needing the group lock.
	2099	* However, we're saved from deadlock because the 'block' upcall is made in
	2100	* thread_block, not in assert_wait.
	2101	*/
	2102	static bool
	2103	thread_call_wait_once_locked(thread_call_t call, spl_t s)
	2104	{
	2105	assert(call->tc_flags & THREAD_CALL_ALLOC);
	2106	assert(call->tc_flags & THREAD_CALL_ONCE);
	2107
	2108	thread_call_group_t group = thread_call_get_group(call);
	2109
	2110	if ((call->tc_flags & THREAD_CALL_RUNNING) == 0) {
	2111	enable_ints_and_unlock(group, s);
	2112	return false;
	2113	}
	2114
	2115	/* call is running, so we have to wait for it */
	2116	call->tc_flags \|= THREAD_CALL_WAIT;
	2117
	2118	wait_result_t res = waitq_assert_wait64(&group->waiters_waitq, CAST_EVENT64_T(call), THREAD_UNINT, 0);
	2119	if (res != THREAD_WAITING) {
	2120	panic("Unable to assert wait: %d", res);
	2121	}
	2122
	2123	enable_ints_and_unlock(group, s);
	2124
	2125	res = thread_block(THREAD_CONTINUE_NULL);
	2126	if (res != THREAD_AWAKENED) {
	2127	panic("Awoken with %d?", res);
	2128	}
	2129
	2130	/* returns unlocked */
	2131	return true;
	2132	}
	2133
	2134	/*
	2135	* Wait for an in-flight invocation to complete
	2136	* Does NOT try to cancel, so the client doesn't need to hold their
	2137	* lock while calling this function.
	2138	*
	2139	* Returns whether or not it had to wait.
	2140	*
	2141	* Only works for THREAD_CALL_ONCE calls.
	2142	*/
	2143	boolean_t
	2144	thread_call_wait_once(thread_call_t call)
	2145	{
	2146	if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
	2147	panic("thread_call_wait_once: can't wait on thread call whose storage I don't own");
	2148	}
	2149
	2150	if ((call->tc_flags & THREAD_CALL_ONCE) == 0) {
	2151	panic("thread_call_wait_once: can't wait_once on a non-once call");
	2152	}
	2153
	2154	if (!ml_get_interrupts_enabled()) {
	2155	panic("unsafe thread_call_wait_once");
	2156	}
	2157
	2158	thread_t self = current_thread();
	2159
	2160	if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) &&
	2161	self->thc_state && self->thc_state->thc_call == call) {
	2162	panic("thread_call_wait_once: deadlock waiting on self from inside call: %p to function %p",
	2163	call, call->tc_func);
	2164	}
	2165
	2166	thread_call_group_t group = thread_call_get_group(call);
	2167
	2168	spl_t s = disable_ints_and_lock(group);
	2169
	2170	bool waited = thread_call_wait_once_locked(call, s);
	2171	/* thread call lock unlocked */
	2172
	2173	return waited;
	2174	}
	2175
	2176
	2177	/*
	2178	* Wait for all requested invocations of a thread call prior to now
	2179	* to finish. Can only be invoked on thread calls whose storage we manage.
	2180	* Just waits for the finish count to catch up to the submit count we find
	2181	* at the beginning of our wait.
	2182	*
	2183	* Called with thread_call_lock held. Returns with lock released.
	2184	*/
	2185	static void
	2186	thread_call_wait_locked(thread_call_t call, spl_t s)
	2187	{
	2188	thread_call_group_t group = thread_call_get_group(call);
	2189
	2190	assert(call->tc_flags & THREAD_CALL_ALLOC);
	2191
	2192	uint64_t submit_count = call->tc_submit_count;
	2193
	2194	while (call->tc_finish_count < submit_count) {
	2195	call->tc_flags \|= THREAD_CALL_WAIT;
	2196
	2197	wait_result_t res = waitq_assert_wait64(&group->waiters_waitq,
	2198	CAST_EVENT64_T(call), THREAD_UNINT, 0);
	2199
	2200	if (res != THREAD_WAITING) {
	2201	panic("Unable to assert wait: %d", res);
	2202	}
	2203
	2204	enable_ints_and_unlock(group, s);
	2205
	2206	res = thread_block(THREAD_CONTINUE_NULL);
	2207	if (res != THREAD_AWAKENED) {
	2208	panic("Awoken with %d?", res);
	2209	}
	2210
	2211	s = disable_ints_and_lock(group);
	2212	}
	2213
	2214	enable_ints_and_unlock(group, s);
	2215	}
	2216
	2217	/*
	2218	* Determine whether a thread call is either on a queue or
	2219	* currently being executed.
	2220	*/
	2221	boolean_t
	2222	thread_call_isactive(thread_call_t call)
	2223	{
	2224	thread_call_group_t group = thread_call_get_group(call);
	2225
	2226	spl_t s = disable_ints_and_lock(group);
	2227	boolean_t active = (call->tc_submit_count > call->tc_finish_count);
	2228	enable_ints_and_unlock(group, s);
	2229
	2230	return active;
	2231	}
	2232
	2233	/*
	2234	* adjust_cont_time_thread_calls
	2235	* on wake, reenqueue delayed call timer for continuous time thread call groups
	2236	*/
	2237	void
	2238	adjust_cont_time_thread_calls(void)
	2239	{
	2240	for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
	2241	thread_call_group_t group = &thread_call_groups[i];
	2242	spl_t s = disable_ints_and_lock(group);
	2243
	2244	/* only the continuous timers need to be re-armed */
	2245
	2246	_arm_delayed_call_timer(NULL, group, TCF_CONTINUOUS);
	2247	enable_ints_and_unlock(group, s);
	2248	}
	2249	}