git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000-2020 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/*
	29	* @OSF_FREE_COPYRIGHT@
	30	*/
	31	/*
	32	* Mach Operating System
	33	* Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
	34	* All Rights Reserved.
	35	*
	36	* Permission to use, copy, modify and distribute this software and its
	37	* documentation is hereby granted, provided that both the copyright
	38	* notice and this permission notice appear in all copies of the
	39	* software, derivative works or modified versions, and any portions
	40	* thereof, and that both notices appear in supporting documentation.
	41	*
	42	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	43	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
	44	* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	45	*
	46	* Carnegie Mellon requests users of this software to return to
	47	*
	48	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	49	* School of Computer Science
	50	* Carnegie Mellon University
	51	* Pittsburgh PA 15213-3890
	52	*
	53	* any improvements or extensions that they make and grant Carnegie Mellon
	54	* the rights to redistribute these changes.
	55	*/
	56	/*
	57	*/
	58	/*
	59	* File: kern/thread.c
	60	* Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub
	61	* Date: 1986
	62	*
	63	* Thread management primitives implementation.
	64	*/
	65	/*
	66	* Copyright (c) 1993 The University of Utah and
	67	* the Computer Systems Laboratory (CSL). All rights reserved.
	68	*
	69	* Permission to use, copy, modify and distribute this software and its
	70	* documentation is hereby granted, provided that both the copyright
	71	* notice and this permission notice appear in all copies of the
	72	* software, derivative works or modified versions, and any portions
	73	* thereof, and that both notices appear in supporting documentation.
	74	*
	75	* THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
	76	* IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
	77	* ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	78	*
	79	* CSL requests users of this software to return to csl-dist@cs.utah.edu any
	80	* improvements that they make and grant CSL redistribution rights.
	81	*
	82	*/
	83
	84	#include <mach/mach_types.h>
	85	#include <mach/boolean.h>
	86	#include <mach/policy.h>
	87	#include <mach/thread_info.h>
	88	#include <mach/thread_special_ports.h>
	89	#include <mach/thread_status.h>
	90	#include <mach/time_value.h>
	91	#include <mach/vm_param.h>
	92
	93	#include <machine/thread.h>
	94	#include <machine/pal_routines.h>
	95	#include <machine/limits.h>
	96
	97	#include <kern/kern_types.h>
	98	#include <kern/kalloc.h>
	99	#include <kern/cpu_data.h>
	100	#include <kern/counters.h>
	101	#include <kern/extmod_statistics.h>
	102	#include <kern/ipc_mig.h>
	103	#include <kern/ipc_tt.h>
	104	#include <kern/mach_param.h>
	105	#include <kern/machine.h>
	106	#include <kern/misc_protos.h>
	107	#include <kern/processor.h>
	108	#include <kern/queue.h>
	109	#include <kern/sched.h>
	110	#include <kern/sched_prim.h>
	111	#include <kern/sync_lock.h>
	112	#include <kern/syscall_subr.h>
	113	#include <kern/task.h>
	114	#include <kern/thread.h>
	115	#include <kern/thread_group.h>
	116	#include <kern/coalition.h>
	117	#include <kern/host.h>
	118	#include <kern/zalloc.h>
	119	#include <kern/assert.h>
	120	#include <kern/exc_resource.h>
	121	#include <kern/exc_guard.h>
	122	#include <kern/telemetry.h>
	123	#include <kern/policy_internal.h>
	124	#include <kern/turnstile.h>
	125	#include <kern/sched_clutch.h>
	126
	127	#include <corpses/task_corpse.h>
	128	#if KPC
	129	#include <kern/kpc.h>
	130	#endif
	131
	132	#if MONOTONIC
	133	#include <kern/monotonic.h>
	134	#include <machine/monotonic.h>
	135	#endif /* MONOTONIC */
	136
	137	#include <ipc/ipc_kmsg.h>
	138	#include <ipc/ipc_port.h>
	139	#include <bank/bank_types.h>
	140
	141	#include <vm/vm_kern.h>
	142	#include <vm/vm_pageout.h>
	143
	144	#include <sys/kdebug.h>
	145	#include <sys/bsdtask_info.h>
	146	#include <mach/sdt.h>
	147	#include <san/kasan.h>
	148	#if CONFIG_KSANCOV
	149	#include <san/ksancov.h>
	150	#endif
	151
	152	#include <stdatomic.h>
	153
	154	#if defined(HAS_APPLE_PAC)
	155	#include <ptrauth.h>
	156	#include <arm64/proc_reg.h>
	157	#endif /* defined(HAS_APPLE_PAC) */
	158
	159	/*
	160	* Exported interfaces
	161	*/
	162	#include <mach/task_server.h>
	163	#include <mach/thread_act_server.h>
	164	#include <mach/mach_host_server.h>
	165	#include <mach/host_priv_server.h>
	166	#include <mach/mach_voucher_server.h>
	167	#include <kern/policy_internal.h>
	168
	169	#if CONFIG_MACF
	170	#include <security/mac_mach_internal.h>
	171	#endif
	172
	173	LCK_GRP_DECLARE(thread_lck_grp, "thread");
	174
	175	ZONE_DECLARE(thread_zone, "threads", sizeof(struct thread), ZC_ZFREE_CLEARMEM);
	176
	177	ZONE_DECLARE(thread_qos_override_zone, "thread qos override",
	178	sizeof(struct thread_qos_override), ZC_NOENCRYPT);
	179
	180	static struct mpsc_daemon_queue thread_stack_queue;
	181	static struct mpsc_daemon_queue thread_terminate_queue;
	182	static struct mpsc_daemon_queue thread_deallocate_queue;
	183	static struct mpsc_daemon_queue thread_exception_queue;
	184
	185	decl_simple_lock_data(static, crashed_threads_lock);
	186	static queue_head_t crashed_threads_queue;
	187
	188	struct thread_exception_elt {
	189	struct mpsc_queue_chain link;
	190	exception_type_t exception_type;
	191	task_t exception_task;
	192	thread_t exception_thread;
	193	};
	194
	195	static SECURITY_READ_ONLY_LATE(struct thread) thread_template = {
	196	#if MACH_ASSERT
	197	.thread_magic = THREAD_MAGIC,
	198	#endif /* MACH_ASSERT */
	199	.wait_result = THREAD_WAITING,
	200	.options = THREAD_ABORTSAFE,
	201	.state = TH_WAIT \| TH_UNINT,
	202	.th_sched_bucket = TH_BUCKET_RUN,
	203	.base_pri = BASEPRI_DEFAULT,
	204	.realtime.deadline = UINT64_MAX,
	205	.last_made_runnable_time = THREAD_NOT_RUNNABLE,
	206	.last_basepri_change_time = THREAD_NOT_RUNNABLE,
	207	#if defined(CONFIG_SCHED_TIMESHARE_CORE)
	208	.pri_shift = INT8_MAX,
	209	#endif
	210	/* timers are initialized in thread_bootstrap */
	211	};
	212
	213	static struct thread init_thread;
	214	static void thread_deallocate_enqueue(thread_t thread);
	215	static void thread_deallocate_complete(thread_t thread);
	216
	217	#ifdef MACH_BSD
	218	extern void proc_exit(void *);
	219	extern mach_exception_data_type_t proc_encode_exit_exception_code(void *);
	220	extern uint64_t get_dispatchqueue_offset_from_proc(void *);
	221	extern uint64_t get_return_to_kernel_offset_from_proc(void *p);
	222	extern int proc_selfpid(void);
	223	extern void proc_name(int, char*, int);
	224	extern char * proc_name_address(void *p);
	225	#endif /* MACH_BSD */
	226
	227	extern int disable_exc_resource;
	228	extern int audio_active;
	229	extern int debug_task;
	230	int thread_max = CONFIG_THREAD_MAX; /* Max number of threads */
	231	int task_threadmax = CONFIG_THREAD_MAX;
	232
	233	static uint64_t thread_unique_id = 100;
	234
	235	struct _thread_ledger_indices thread_ledgers = { .cpu_time = -1 };
	236	static ledger_template_t thread_ledger_template = NULL;
	237	static void init_thread_ledgers(void);
	238
	239	#if CONFIG_JETSAM
	240	void jetsam_on_ledger_cpulimit_exceeded(void);
	241	#endif
	242
	243	extern int task_thread_soft_limit;
	244	extern int exc_via_corpse_forking;
	245
	246	#if DEVELOPMENT \|\| DEBUG
	247	extern int exc_resource_threads_enabled;
	248	#endif /* DEVELOPMENT \|\| DEBUG */
	249
	250	/*
	251	* Level (in terms of percentage of the limit) at which the CPU usage monitor triggers telemetry.
	252	*
	253	* (ie when any thread's CPU consumption exceeds 70% of the limit, start taking user
	254	* stacktraces, aka micro-stackshots)
	255	*/
	256	#define CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT 70
	257
	258	/* Percentage. Level at which we start gathering telemetry. */
	259	static TUNABLE(uint8_t, cpumon_ustackshots_trigger_pct,
	260	"cpumon_ustackshots_trigger_pct", CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT);
	261	void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void);
	262	#if DEVELOPMENT \|\| DEBUG
	263	void __attribute__((noinline)) SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(task_t, int);
	264	#endif /* DEVELOPMENT \|\| DEBUG */
	265
	266	/*
	267	* The smallest interval over which we support limiting CPU consumption is 1ms
	268	*/
	269	#define MINIMUM_CPULIMIT_INTERVAL_MS 1
	270
	271	os_refgrp_decl(static, thread_refgrp, "thread", NULL);
	272
	273	static inline void
	274	init_thread_from_template(thread_t thread)
	275	{
	276	/*
	277	* In general, struct thread isn't trivially-copyable, since it may
	278	* contain pointers to thread-specific state. This may be enforced at
	279	* compile time on architectures that store authed + diversified
	280	* pointers in machine_thread.
	281	*
	282	* In this specific case, where we're initializing a new thread from a
	283	* thread_template, we know all diversified pointers are NULL; these are
	284	* safe to bitwise copy.
	285	*/
	286	#pragma clang diagnostic push
	287	#pragma clang diagnostic ignored "-Wnontrivial-memaccess"
	288	memcpy(thread, &thread_template, sizeof(*thread));
	289	#pragma clang diagnostic pop
	290	}
	291
	292	thread_t
	293	thread_bootstrap(void)
	294	{
	295	/*
	296	* Fill in a template thread for fast initialization.
	297	*/
	298	timer_init(&thread_template.user_timer);
	299	timer_init(&thread_template.system_timer);
	300	timer_init(&thread_template.ptime);
	301	timer_init(&thread_template.runnable_timer);
	302
	303	init_thread_from_template(&init_thread);
	304	/* fiddle with init thread to skip asserts in set_sched_pri */
	305	init_thread.sched_pri = MAXPRI_KERNEL;
	306	#if DEBUG \|\| DEVELOPMENT
	307	queue_init(&init_thread.t_temp_alloc_list);
	308	#endif /* DEBUG \|\| DEVELOPMENT */
	309
	310	return &init_thread;
	311	}
	312
	313	void
	314	thread_machine_init_template(void)
	315	{
	316	machine_thread_template_init(&thread_template);
	317	}
	318
	319	void
	320	thread_init(void)
	321	{
	322	stack_init();
	323
	324	thread_policy_init();
	325
	326	/*
	327	* Initialize any machine-dependent
	328	* per-thread structures necessary.
	329	*/
	330	machine_thread_init();
	331
	332	init_thread_ledgers();
	333	}
	334
	335	boolean_t
	336	thread_is_active(thread_t thread)
	337	{
	338	return thread->active;
	339	}
	340
	341	void
	342	thread_corpse_continue(void)
	343	{
	344	thread_t thread = current_thread();
	345
	346	thread_terminate_internal(thread);
	347
	348	/*
	349	* Handle the thread termination directly
	350	* here instead of returning to userspace.
	351	*/
	352	assert(thread->active == FALSE);
	353	thread_ast_clear(thread, AST_APC);
	354	thread_apc_ast(thread);
	355
	356	panic("thread_corpse_continue");
	357	/NOTREACHED/
	358	}
	359
	360	__dead2
	361	static void
	362	thread_terminate_continue(void)
	363	{
	364	panic("thread_terminate_continue");
	365	/NOTREACHED/
	366	}
	367
	368	/*
	369	* thread_terminate_self:
	370	*/
	371	void
	372	thread_terminate_self(void)
	373	{
	374	thread_t thread = current_thread();
	375	task_t task;
	376	int threadcnt;
	377
	378	if (thread->t_temp_alloc_count) {
	379	kheap_temp_leak_panic(thread);
	380	}
	381
	382	pal_thread_terminate_self(thread);
	383
	384	DTRACE_PROC(lwp__exit);
	385
	386	thread_mtx_lock(thread);
	387
	388	ipc_thread_disable(thread);
	389
	390	thread_mtx_unlock(thread);
	391
	392	thread_sched_call(thread, NULL);
	393
	394	spl_t s = splsched();
	395	thread_lock(thread);
	396
	397	thread_depress_abort_locked(thread);
	398
	399	thread_unlock(thread);
	400	splx(s);
	401
	402	#if CONFIG_TASKWATCH
	403	thead_remove_taskwatch(thread);
	404	#endif /* CONFIG_TASKWATCH */
	405
	406	work_interval_thread_terminate(thread);
	407
	408	thread_mtx_lock(thread);
	409
	410	thread_policy_reset(thread);
	411
	412	thread_mtx_unlock(thread);
	413
	414	assert(thread->th_work_interval == NULL);
	415
	416	bank_swap_thread_bank_ledger(thread, NULL);
	417
	418	if (kdebug_enable && bsd_hasthreadname(thread->uthread)) {
	419	char threadname[MAXTHREADNAMESIZE];
	420	bsd_getthreadname(thread->uthread, threadname);
	421	kernel_debug_string_simple(TRACE_STRING_THREADNAME_PREV, threadname);
	422	}
	423
	424	task = thread->task;
	425	uthread_cleanup(task, thread->uthread, task->bsd_info);
	426
	427	if (kdebug_enable && task->bsd_info && !task_is_exec_copy(task)) {
	428	/* trace out pid before we sign off */
	429	long dbg_arg1 = 0;
	430	long dbg_arg2 = 0;
	431
	432	kdbg_trace_data(thread->task->bsd_info, &dbg_arg1, &dbg_arg2);
	433	#if MONOTONIC
	434	if (kdebug_debugid_enabled(DBG_MT_INSTRS_CYCLES_THR_EXIT)) {
	435	uint64_t counts[MT_CORE_NFIXED];
	436	uint64_t thread_user_time;
	437	uint64_t thread_system_time;
	438	thread_user_time = timer_grab(&thread->user_timer);
	439	thread_system_time = timer_grab(&thread->system_timer);
	440	mt_fixed_thread_counts(thread, counts);
	441	KDBG_RELEASE(DBG_MT_INSTRS_CYCLES_THR_EXIT,
	442	#ifdef MT_CORE_INSTRS
	443	counts[MT_CORE_INSTRS],
	444	#else /* defined(MT_CORE_INSTRS) */
	445	0,
	446	#endif/* !defined(MT_CORE_INSTRS) */
	447	counts[MT_CORE_CYCLES],
	448	thread_system_time, thread_user_time);
	449	}
	450	#endif/* MONOTONIC */
	451	KDBG_RELEASE(TRACE_DATA_THREAD_TERMINATE_PID, dbg_arg1, dbg_arg2);
	452	}
	453
	454	/*
	455	* After this subtraction, this thread should never access
	456	* task->bsd_info unless it got 0 back from the os_atomic_dec. It
	457	* could be racing with other threads to be the last thread in the
	458	* process, and the last thread in the process will tear down the proc
	459	* structure and zero-out task->bsd_info.
	460	*/
	461	threadcnt = os_atomic_dec(&task->active_thread_count, relaxed);
	462
	463	/*
	464	* If we are the last thread to terminate and the task is
	465	* associated with a BSD process, perform BSD process exit.
	466	*/
	467	if (threadcnt == 0 && task->bsd_info != NULL && !task_is_exec_copy(task)) {
	468	mach_exception_data_type_t subcode = 0;
	469	if (kdebug_enable) {
	470	/* since we're the last thread in this process, trace out the command name too */
	471	long args[4] = {};
	472	kdbg_trace_string(thread->task->bsd_info, &args[0], &args[1], &args[2], &args[3]);
	473	#if MONOTONIC
	474	if (kdebug_debugid_enabled(DBG_MT_INSTRS_CYCLES_PROC_EXIT)) {
	475	uint64_t counts[MT_CORE_NFIXED];
	476	uint64_t task_user_time;
	477	uint64_t task_system_time;
	478	mt_fixed_task_counts(task, counts);
	479	/* since the thread time is not yet added to the task */
	480	task_user_time = task->total_user_time + timer_grab(&thread->user_timer);
	481	task_system_time = task->total_system_time + timer_grab(&thread->system_timer);
	482	KDBG_RELEASE((DBG_MT_INSTRS_CYCLES_PROC_EXIT),
	483	#ifdef MT_CORE_INSTRS
	484	counts[MT_CORE_INSTRS],
	485	#else /* defined(MT_CORE_INSTRS) */
	486	0,
	487	#endif/* !defined(MT_CORE_INSTRS) */
	488	counts[MT_CORE_CYCLES],
	489	task_system_time, task_user_time);
	490	}
	491	#endif/* MONOTONIC */
	492	KDBG_RELEASE(TRACE_STRING_PROC_EXIT, args[0], args[1], args[2], args[3]);
	493	}
	494
	495	/* Get the exit reason before proc_exit */
	496	subcode = proc_encode_exit_exception_code(task->bsd_info);
	497	proc_exit(task->bsd_info);
	498	/*
	499	* if there is crash info in task
	500	* then do the deliver action since this is
	501	* last thread for this task.
	502	*/
	503	if (task->corpse_info) {
	504	task_deliver_crash_notification(task, current_thread(), EXC_RESOURCE, subcode);
	505	}
	506	}
	507
	508	if (threadcnt == 0) {
	509	task_lock(task);
	510	if (task_is_a_corpse_fork(task)) {
	511	thread_wakeup((event_t)&task->active_thread_count);
	512	}
	513	task_unlock(task);
	514	}
	515
	516	uthread_cred_free(thread->uthread);
	517
	518	s = splsched();
	519	thread_lock(thread);
	520
	521	/*
	522	* Ensure that the depress timer is no longer enqueued,
	523	* so the timer (stored in the thread) can be safely deallocated
	524	*
	525	* TODO: build timer_call_cancel_wait
	526	*/
	527
	528	assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) == 0);
	529
	530	uint32_t delay_us = 1;
	531
	532	while (thread->depress_timer_active > 0) {
	533	thread_unlock(thread);
	534	splx(s);
	535
	536	delay(delay_us++);
	537
	538	if (delay_us > USEC_PER_SEC) {
	539	panic("depress timer failed to inactivate!"
	540	"thread: %p depress_timer_active: %d",
	541	thread, thread->depress_timer_active);
	542	}
	543
	544	s = splsched();
	545	thread_lock(thread);
	546	}
	547
	548	/*
	549	* Cancel wait timer, and wait for
	550	* concurrent expirations.
	551	*/
	552	if (thread->wait_timer_is_set) {
	553	thread->wait_timer_is_set = FALSE;
	554
	555	if (timer_call_cancel(&thread->wait_timer)) {
	556	thread->wait_timer_active--;
	557	}
	558	}
	559
	560	delay_us = 1;
	561
	562	while (thread->wait_timer_active > 0) {
	563	thread_unlock(thread);
	564	splx(s);
	565
	566	delay(delay_us++);
	567
	568	if (delay_us > USEC_PER_SEC) {
	569	panic("wait timer failed to inactivate!"
	570	"thread: %p wait_timer_active: %d",
	571	thread, thread->wait_timer_active);
	572	}
	573
	574	s = splsched();
	575	thread_lock(thread);
	576	}
	577
	578	/*
	579	* If there is a reserved stack, release it.
	580	*/
	581	if (thread->reserved_stack != 0) {
	582	stack_free_reserved(thread);
	583	thread->reserved_stack = 0;
	584	}
	585
	586	/*
	587	* Mark thread as terminating, and block.
	588	*/
	589	thread->state \|= TH_TERMINATE;
	590	thread_mark_wait_locked(thread, THREAD_UNINT);
	591
	592	assert((thread->sched_flags & TH_SFLAG_WAITQ_PROMOTED) == 0);
	593	assert((thread->sched_flags & TH_SFLAG_RW_PROMOTED) == 0);
	594	assert((thread->sched_flags & TH_SFLAG_EXEC_PROMOTED) == 0);
	595	assert((thread->sched_flags & TH_SFLAG_PROMOTED) == 0);
	596	assert((thread->sched_flags & TH_SFLAG_THREAD_GROUP_AUTO_JOIN) == 0);
	597	assert(thread->th_work_interval_flags == TH_WORK_INTERVAL_FLAGS_NONE);
	598	assert(thread->kern_promotion_schedpri == 0);
	599	assert(thread->waiting_for_mutex == NULL);
	600	assert(thread->rwlock_count == 0);
	601	assert(thread->handoff_thread == THREAD_NULL);
	602	assert(thread->th_work_interval == NULL);
	603
	604	thread_unlock(thread);
	605	/* splsched */
	606
	607	thread_block((thread_continue_t)thread_terminate_continue);
	608	/NOTREACHED/
	609	}
	610
	611	static bool
	612	thread_ref_release(thread_t thread)
	613	{
	614	if (thread == THREAD_NULL) {
	615	return false;
	616	}
	617
	618	assert_thread_magic(thread);
	619
	620	return os_ref_release(&thread->ref_count) == 0;
	621	}
	622
	623	/* Drop a thread refcount safely without triggering a zfree */
	624	void
	625	thread_deallocate_safe(thread_t thread)
	626	{
	627	if (__improbable(thread_ref_release(thread))) {
	628	/* enqueue the thread for thread deallocate deamon to call thread_deallocate_complete */
	629	thread_deallocate_enqueue(thread);
	630	}
	631	}
	632
	633	void
	634	thread_deallocate(thread_t thread)
	635	{
	636	if (__improbable(thread_ref_release(thread))) {
	637	thread_deallocate_complete(thread);
	638	}
	639	}
	640
	641	void
	642	thread_deallocate_complete(
	643	thread_t thread)
	644	{
	645	task_t task;
	646
	647	assert_thread_magic(thread);
	648
	649	assert(os_ref_get_count(&thread->ref_count) == 0);
	650
	651	if (!(thread->state & TH_TERMINATE2)) {
	652	panic("thread_deallocate: thread not properly terminated\n");
	653	}
	654
	655	assert(thread->runq == PROCESSOR_NULL);
	656
	657	#if KPC
	658	kpc_thread_destroy(thread);
	659	#endif
	660
	661	ipc_thread_terminate(thread);
	662
	663	proc_thread_qos_deallocate(thread);
	664
	665	task = thread->task;
	666
	667	#ifdef MACH_BSD
	668	{
	669	void *ut = thread->uthread;
	670
	671	thread->uthread = NULL;
	672	uthread_zone_free(ut);
	673	}
	674	#endif /* MACH_BSD */
	675
	676	if (thread->t_ledger) {
	677	ledger_dereference(thread->t_ledger);
	678	}
	679	if (thread->t_threadledger) {
	680	ledger_dereference(thread->t_threadledger);
	681	}
	682
	683	assert(thread->turnstile != TURNSTILE_NULL);
	684	if (thread->turnstile) {
	685	turnstile_deallocate(thread->turnstile);
	686	}
	687
	688	if (IPC_VOUCHER_NULL != thread->ith_voucher) {
	689	ipc_voucher_release(thread->ith_voucher);
	690	}
	691
	692	if (thread->thread_io_stats) {
	693	kheap_free(KHEAP_DATA_BUFFERS, thread->thread_io_stats,
	694	sizeof(struct io_stat_info));
	695	}
	696
	697	if (thread->kernel_stack != 0) {
	698	stack_free(thread);
	699	}
	700
	701	lck_mtx_destroy(&thread->mutex, &thread_lck_grp);
	702	machine_thread_destroy(thread);
	703
	704	task_deallocate(task);
	705
	706	#if MACH_ASSERT
	707	assert_thread_magic(thread);
	708	thread->thread_magic = 0;
	709	#endif /* MACH_ASSERT */
	710
	711	zfree(thread_zone, thread);
	712	}
	713
	714	/*
	715	* thread_inspect_deallocate:
	716	*
	717	* Drop a thread inspection reference.
	718	*/
	719	void
	720	thread_inspect_deallocate(
	721	thread_inspect_t thread_inspect)
	722	{
	723	return thread_deallocate((thread_t)thread_inspect);
	724	}
	725
	726	/*
	727	* thread_read_deallocate:
	728	*
	729	* Drop a reference on thread read port.
	730	*/
	731	void
	732	thread_read_deallocate(
	733	thread_read_t thread_read)
	734	{
	735	return thread_deallocate((thread_t)thread_read);
	736	}
	737
	738
	739	/*
	740	* thread_exception_queue_invoke:
	741	*
	742	* Deliver EXC_{RESOURCE,GUARD} exception
	743	*/
	744	static void
	745	thread_exception_queue_invoke(mpsc_queue_chain_t elm,
	746	__assert_only mpsc_daemon_queue_t dq)
	747	{
	748	struct thread_exception_elt *elt;
	749	task_t task;
	750	thread_t thread;
	751	exception_type_t etype;
	752
	753	assert(dq == &thread_exception_queue);
	754	elt = mpsc_queue_element(elm, struct thread_exception_elt, link);
	755
	756	etype = elt->exception_type;
	757	task = elt->exception_task;
	758	thread = elt->exception_thread;
	759	assert_thread_magic(thread);
	760
	761	kfree(elt, sizeof(*elt));
	762
	763	/* wait for all the threads in the task to terminate */
	764	task_lock(task);
	765	task_wait_till_threads_terminate_locked(task);
	766	task_unlock(task);
	767
	768	/* Consumes the task ref returned by task_generate_corpse_internal */
	769	task_deallocate(task);
	770	/* Consumes the thread ref returned by task_generate_corpse_internal */
	771	thread_deallocate(thread);
	772
	773	/* Deliver the notification, also clears the corpse. */
	774	task_deliver_crash_notification(task, thread, etype, 0);
	775	}
	776
	777	/*
	778	* thread_exception_enqueue:
	779	*
	780	* Enqueue a corpse port to be delivered an EXC_{RESOURCE,GUARD}.
	781	*/
	782	void
	783	thread_exception_enqueue(
	784	task_t task,
	785	thread_t thread,
	786	exception_type_t etype)
	787	{
	788	assert(EXC_RESOURCE == etype \|\| EXC_GUARD == etype);
	789	struct thread_exception_elt elt = kalloc(sizeof(elt));
	790	elt->exception_type = etype;
	791	elt->exception_task = task;
	792	elt->exception_thread = thread;
	793
	794	mpsc_daemon_enqueue(&thread_exception_queue, &elt->link,
	795	MPSC_QUEUE_DISABLE_PREEMPTION);
	796	}
	797
	798	/*
	799	* thread_copy_resource_info
	800	*
	801	* Copy the resource info counters from source
	802	* thread to destination thread.
	803	*/
	804	void
	805	thread_copy_resource_info(
	806	thread_t dst_thread,
	807	thread_t src_thread)
	808	{
	809	dst_thread->c_switch = src_thread->c_switch;
	810	dst_thread->p_switch = src_thread->p_switch;
	811	dst_thread->ps_switch = src_thread->ps_switch;
	812	dst_thread->precise_user_kernel_time = src_thread->precise_user_kernel_time;
	813	dst_thread->user_timer = src_thread->user_timer;
	814	dst_thread->user_timer_save = src_thread->user_timer_save;
	815	dst_thread->system_timer = src_thread->system_timer;
	816	dst_thread->system_timer_save = src_thread->system_timer_save;
	817	dst_thread->runnable_timer = src_thread->runnable_timer;
	818	dst_thread->vtimer_user_save = src_thread->vtimer_user_save;
	819	dst_thread->vtimer_prof_save = src_thread->vtimer_prof_save;
	820	dst_thread->vtimer_rlim_save = src_thread->vtimer_rlim_save;
	821	dst_thread->vtimer_qos_save = src_thread->vtimer_qos_save;
	822	dst_thread->syscalls_unix = src_thread->syscalls_unix;
	823	dst_thread->syscalls_mach = src_thread->syscalls_mach;
	824	ledger_rollup(dst_thread->t_threadledger, src_thread->t_threadledger);
	825	dst_thread->thread_io_stats = src_thread->thread_io_stats;
	826	}
	827
	828	static void
	829	thread_terminate_queue_invoke(mpsc_queue_chain_t e,
	830	__assert_only mpsc_daemon_queue_t dq)
	831	{
	832	thread_t thread = mpsc_queue_element(e, struct thread, mpsc_links);
	833	task_t task = thread->task;
	834
	835	assert(dq == &thread_terminate_queue);
	836
	837	task_lock(task);
	838
	839	/*
	840	* if marked for crash reporting, skip reaping.
	841	* The corpse delivery thread will clear bit and enqueue
	842	* for reaping when done
	843	*
	844	* Note: the inspection field is set under the task lock
	845	*
	846	* FIXME[mad]: why enqueue for termination before `inspection` is false ?
	847	*/
	848	if (__improbable(thread->inspection)) {
	849	simple_lock(&crashed_threads_lock, &thread_lck_grp);
	850	task_unlock(task);
	851
	852	enqueue_tail(&crashed_threads_queue, &thread->runq_links);
	853	simple_unlock(&crashed_threads_lock);
	854	return;
	855	}
	856
	857
	858	task->total_user_time += timer_grab(&thread->user_timer);
	859	task->total_ptime += timer_grab(&thread->ptime);
	860	task->total_runnable_time += timer_grab(&thread->runnable_timer);
	861	if (thread->precise_user_kernel_time) {
	862	task->total_system_time += timer_grab(&thread->system_timer);
	863	} else {
	864	task->total_user_time += timer_grab(&thread->system_timer);
	865	}
	866
	867	task->c_switch += thread->c_switch;
	868	task->p_switch += thread->p_switch;
	869	task->ps_switch += thread->ps_switch;
	870
	871	task->syscalls_unix += thread->syscalls_unix;
	872	task->syscalls_mach += thread->syscalls_mach;
	873
	874	task->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
	875	task->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
	876	task->task_gpu_ns += ml_gpu_stat(thread);
	877	task->task_energy += ml_energy_stat(thread);
	878	task->decompressions += thread->decompressions;
	879
	880	#if MONOTONIC
	881	mt_terminate_update(task, thread);
	882	#endif /* MONOTONIC */
	883
	884	thread_update_qos_cpu_time(thread);
	885
	886	queue_remove(&task->threads, thread, thread_t, task_threads);
	887	task->thread_count--;
	888
	889	/*
	890	* If the task is being halted, and there is only one thread
	891	* left in the task after this one, then wakeup that thread.
	892	*/
	893	if (task->thread_count == 1 && task->halting) {
	894	thread_wakeup((event_t)&task->halting);
	895	}
	896
	897	task_unlock(task);
	898
	899	lck_mtx_lock(&tasks_threads_lock);
	900	queue_remove(&threads, thread, thread_t, threads);
	901	threads_count--;
	902	lck_mtx_unlock(&tasks_threads_lock);
	903
	904	thread_deallocate(thread);
	905	}
	906
	907	static void
	908	thread_deallocate_queue_invoke(mpsc_queue_chain_t e,
	909	__assert_only mpsc_daemon_queue_t dq)
	910	{
	911	thread_t thread = mpsc_queue_element(e, struct thread, mpsc_links);
	912
	913	assert(dq == &thread_deallocate_queue);
	914
	915	thread_deallocate_complete(thread);
	916	}
	917
	918	/*
	919	* thread_terminate_enqueue:
	920	*
	921	* Enqueue a terminating thread for final disposition.
	922	*
	923	* Called at splsched.
	924	*/
	925	void
	926	thread_terminate_enqueue(
	927	thread_t thread)
	928	{
	929	KDBG_RELEASE(TRACE_DATA_THREAD_TERMINATE, thread->thread_id);
	930
	931	mpsc_daemon_enqueue(&thread_terminate_queue, &thread->mpsc_links,
	932	MPSC_QUEUE_DISABLE_PREEMPTION);
	933	}
	934
	935	/*
	936	* thread_deallocate_enqueue:
	937	*
	938	* Enqueue a thread for final deallocation.
	939	*/
	940	static void
	941	thread_deallocate_enqueue(
	942	thread_t thread)
	943	{
	944	mpsc_daemon_enqueue(&thread_deallocate_queue, &thread->mpsc_links,
	945	MPSC_QUEUE_DISABLE_PREEMPTION);
	946	}
	947
	948	/*
	949	* thread_terminate_crashed_threads:
	950	* walk the list of crashed threads and put back set of threads
	951	* who are no longer being inspected.
	952	*/
	953	void
	954	thread_terminate_crashed_threads(void)
	955	{
	956	thread_t th_remove;
	957
	958	simple_lock(&crashed_threads_lock, &thread_lck_grp);
	959	/*
	960	* loop through the crashed threads queue
	961	* to put any threads that are not being inspected anymore
	962	*/
	963
	964	qe_foreach_element_safe(th_remove, &crashed_threads_queue, runq_links) {
	965	/* make sure current_thread is never in crashed queue */
	966	assert(th_remove != current_thread());
	967
	968	if (th_remove->inspection == FALSE) {
	969	remqueue(&th_remove->runq_links);
	970	mpsc_daemon_enqueue(&thread_terminate_queue, &th_remove->mpsc_links,
	971	MPSC_QUEUE_NONE);
	972	}
	973	}
	974
	975	simple_unlock(&crashed_threads_lock);
	976	}
	977
	978	/*
	979	* thread_stack_queue_invoke:
	980	*
	981	* Perform stack allocation as required due to
	982	* invoke failures.
	983	*/
	984	static void
	985	thread_stack_queue_invoke(mpsc_queue_chain_t elm,
	986	__assert_only mpsc_daemon_queue_t dq)
	987	{
	988	thread_t thread = mpsc_queue_element(elm, struct thread, mpsc_links);
	989
	990	assert(dq == &thread_stack_queue);
	991
	992	/* allocate stack with interrupts enabled so that we can call into VM */
	993	stack_alloc(thread);
	994
	995	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_WAIT) \| DBG_FUNC_END, thread_tid(thread), 0, 0, 0, 0);
	996
	997	spl_t s = splsched();
	998	thread_lock(thread);
	999	thread_setrun(thread, SCHED_PREEMPT \| SCHED_TAILQ);
	1000	thread_unlock(thread);
	1001	splx(s);
	1002	}
	1003
	1004	/*
	1005	* thread_stack_enqueue:
	1006	*
	1007	* Enqueue a thread for stack allocation.
	1008	*
	1009	* Called at splsched.
	1010	*/
	1011	void
	1012	thread_stack_enqueue(
	1013	thread_t thread)
	1014	{
	1015	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_WAIT) \| DBG_FUNC_START, thread_tid(thread), 0, 0, 0, 0);
	1016	assert_thread_magic(thread);
	1017
	1018	mpsc_daemon_enqueue(&thread_stack_queue, &thread->mpsc_links,
	1019	MPSC_QUEUE_DISABLE_PREEMPTION);
	1020	}
	1021
	1022	void
	1023	thread_daemon_init(void)
	1024	{
	1025	kern_return_t result;
	1026
	1027	thread_deallocate_daemon_init();
	1028
	1029	thread_deallocate_daemon_register_queue(&thread_terminate_queue,
	1030	thread_terminate_queue_invoke);
	1031
	1032	thread_deallocate_daemon_register_queue(&thread_deallocate_queue,
	1033	thread_deallocate_queue_invoke);
	1034
	1035	simple_lock_init(&crashed_threads_lock, 0);
	1036	queue_init(&crashed_threads_queue);
	1037
	1038	result = mpsc_daemon_queue_init_with_thread(&thread_stack_queue,
	1039	thread_stack_queue_invoke, BASEPRI_PREEMPT_HIGH,
	1040	"daemon.thread-stack");
	1041	if (result != KERN_SUCCESS) {
	1042	panic("thread_daemon_init: thread_stack_daemon");
	1043	}
	1044
	1045	result = mpsc_daemon_queue_init_with_thread(&thread_exception_queue,
	1046	thread_exception_queue_invoke, MINPRI_KERNEL,
	1047	"daemon.thread-exception");
	1048	if (result != KERN_SUCCESS) {
	1049	panic("thread_daemon_init: thread_exception_daemon");
	1050	}
	1051	}
	1052
	1053	#define TH_OPTION_NONE 0x00
	1054	#define TH_OPTION_NOCRED 0x01
	1055	#define TH_OPTION_NOSUSP 0x02
	1056	#define TH_OPTION_WORKQ 0x04
	1057
	1058	/*
	1059	* Create a new thread.
	1060	* Doesn't start the thread running.
	1061	*
	1062	* Task and tasks_threads_lock are returned locked on success.
	1063	*/
	1064	static kern_return_t
	1065	thread_create_internal(
	1066	task_t parent_task,
	1067	integer_t priority,
	1068	thread_continue_t continuation,
	1069	void *parameter,
	1070	int options,
	1071	thread_t *out_thread)
	1072	{
	1073	thread_t new_thread;
	1074	static thread_t first_thread;
	1075
	1076	/*
	1077	* Allocate a thread and initialize static fields
	1078	*/
	1079	if (first_thread == THREAD_NULL) {
	1080	new_thread = first_thread = current_thread();
	1081	} else {
	1082	new_thread = (thread_t)zalloc(thread_zone);
	1083	}
	1084	if (new_thread == THREAD_NULL) {
	1085	return KERN_RESOURCE_SHORTAGE;
	1086	}
	1087
	1088	if (new_thread != first_thread) {
	1089	init_thread_from_template(new_thread);
	1090	}
	1091
	1092	os_ref_init_count(&new_thread->ref_count, &thread_refgrp, 2);
	1093	#if DEBUG \|\| DEVELOPMENT
	1094	queue_init(&new_thread->t_temp_alloc_list);
	1095	#endif /* DEBUG \|\| DEVELOPMENT */
	1096
	1097	#ifdef MACH_BSD
	1098	new_thread->uthread = uthread_alloc(parent_task, new_thread, (options & TH_OPTION_NOCRED) != 0);
	1099	if (new_thread->uthread == NULL) {
	1100	#if MACH_ASSERT
	1101	new_thread->thread_magic = 0;
	1102	#endif /* MACH_ASSERT */
	1103
	1104	zfree(thread_zone, new_thread);
	1105	return KERN_RESOURCE_SHORTAGE;
	1106	}
	1107	#endif /* MACH_BSD */
	1108
	1109	if (machine_thread_create(new_thread, parent_task) != KERN_SUCCESS) {
	1110	#ifdef MACH_BSD
	1111	void *ut = new_thread->uthread;
	1112
	1113	new_thread->uthread = NULL;
	1114	/* cred free may not be necessary */
	1115	uthread_cleanup(parent_task, ut, parent_task->bsd_info);
	1116	uthread_cred_free(ut);
	1117	uthread_zone_free(ut);
	1118	#endif /* MACH_BSD */
	1119
	1120	#if MACH_ASSERT
	1121	new_thread->thread_magic = 0;
	1122	#endif /* MACH_ASSERT */
	1123
	1124	zfree(thread_zone, new_thread);
	1125	return KERN_FAILURE;
	1126	}
	1127
	1128	new_thread->task = parent_task;
	1129
	1130	thread_lock_init(new_thread);
	1131	wake_lock_init(new_thread);
	1132
	1133	lck_mtx_init(&new_thread->mutex, &thread_lck_grp, LCK_ATTR_NULL);
	1134
	1135	ipc_thread_init(new_thread);
	1136
	1137	new_thread->continuation = continuation;
	1138	new_thread->parameter = parameter;
	1139	new_thread->inheritor_flags = TURNSTILE_UPDATE_FLAGS_NONE;
	1140	priority_queue_init(&new_thread->sched_inheritor_queue);
	1141	priority_queue_init(&new_thread->base_inheritor_queue);
	1142	#if CONFIG_SCHED_CLUTCH
	1143	priority_queue_entry_init(&new_thread->th_clutch_runq_link);
	1144	priority_queue_entry_init(&new_thread->th_clutch_pri_link);
	1145	#endif /* CONFIG_SCHED_CLUTCH */
	1146
	1147	#if CONFIG_SCHED_EDGE
	1148	new_thread->th_bound_cluster_enqueued = false;
	1149	#endif /* CONFIG_SCHED_EDGE */
	1150
	1151	/* Allocate I/O Statistics structure */
	1152	new_thread->thread_io_stats = kheap_alloc(KHEAP_DATA_BUFFERS,
	1153	sizeof(struct io_stat_info), Z_WAITOK \| Z_ZERO);
	1154	assert(new_thread->thread_io_stats != NULL);
	1155
	1156	#if KASAN
	1157	kasan_init_thread(&new_thread->kasan_data);
	1158	#endif
	1159
	1160	#if CONFIG_KSANCOV
	1161	new_thread->ksancov_data = NULL;
	1162	#endif
	1163
	1164	#if CONFIG_IOSCHED
	1165	/* Clear out the I/O Scheduling info for AppleFSCompression */
	1166	new_thread->decmp_upl = NULL;
	1167	#endif /* CONFIG_IOSCHED */
	1168
	1169	new_thread->thread_region_page_shift = 0;
	1170
	1171	#if DEVELOPMENT \|\| DEBUG
	1172	task_lock(parent_task);
	1173	uint16_t thread_limit = parent_task->task_thread_limit;
	1174	if (exc_resource_threads_enabled &&
	1175	thread_limit > 0 &&
	1176	parent_task->thread_count >= thread_limit &&
	1177	!parent_task->task_has_crossed_thread_limit &&
	1178	!(parent_task->t_flags & TF_CORPSE)) {
	1179	int thread_count = parent_task->thread_count;
	1180	parent_task->task_has_crossed_thread_limit = TRUE;
	1181	task_unlock(parent_task);
	1182	SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(parent_task, thread_count);
	1183	} else {
	1184	task_unlock(parent_task);
	1185	}
	1186	#endif
	1187
	1188	lck_mtx_lock(&tasks_threads_lock);
	1189	task_lock(parent_task);
	1190
	1191	/*
	1192	* Fail thread creation if parent task is being torn down or has too many threads
	1193	* If the caller asked for TH_OPTION_NOSUSP, also fail if the parent task is suspended
	1194	*/
	1195	if (parent_task->active == 0 \|\| parent_task->halting \|\|
	1196	(parent_task->suspend_count > 0 && (options & TH_OPTION_NOSUSP) != 0) \|\|
	1197	(parent_task->thread_count >= task_threadmax && parent_task != kernel_task)) {
	1198	task_unlock(parent_task);
	1199	lck_mtx_unlock(&tasks_threads_lock);
	1200
	1201	#ifdef MACH_BSD
	1202	{
	1203	void *ut = new_thread->uthread;
	1204
	1205	new_thread->uthread = NULL;
	1206	uthread_cleanup(parent_task, ut, parent_task->bsd_info);
	1207	/* cred free may not be necessary */
	1208	uthread_cred_free(ut);
	1209	uthread_zone_free(ut);
	1210	}
	1211	#endif /* MACH_BSD */
	1212	ipc_thread_disable(new_thread);
	1213	ipc_thread_terminate(new_thread);
	1214	kheap_free(KHEAP_DATA_BUFFERS, new_thread->thread_io_stats,
	1215	sizeof(struct io_stat_info));
	1216	lck_mtx_destroy(&new_thread->mutex, &thread_lck_grp);
	1217	machine_thread_destroy(new_thread);
	1218	zfree(thread_zone, new_thread);
	1219	return KERN_FAILURE;
	1220	}
	1221
	1222	/* Protected by the tasks_threads_lock */
	1223	new_thread->thread_id = ++thread_unique_id;
	1224
	1225	/* New threads inherit any default state on the task */
	1226	machine_thread_inherit_taskwide(new_thread, parent_task);
	1227
	1228	task_reference_internal(parent_task);
	1229
	1230	if (new_thread->task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
	1231	/*
	1232	* This task has a per-thread CPU limit; make sure this new thread
	1233	* gets its limit set too, before it gets out of the kernel.
	1234	*/
	1235	act_set_astledger(new_thread);
	1236	}
	1237
	1238	/* Instantiate a thread ledger. Do not fail thread creation if ledger creation fails. */
	1239	if ((new_thread->t_threadledger = ledger_instantiate(thread_ledger_template,
	1240	LEDGER_CREATE_INACTIVE_ENTRIES)) != LEDGER_NULL) {
	1241	ledger_entry_setactive(new_thread->t_threadledger, thread_ledgers.cpu_time);
	1242	}
	1243
	1244	new_thread->t_bankledger = LEDGER_NULL;
	1245	new_thread->t_deduct_bank_ledger_time = 0;
	1246	new_thread->t_deduct_bank_ledger_energy = 0;
	1247
	1248	new_thread->t_ledger = new_thread->task->ledger;
	1249	if (new_thread->t_ledger) {
	1250	ledger_reference(new_thread->t_ledger);
	1251	}
	1252
	1253	#if defined(CONFIG_SCHED_MULTIQ)
	1254	/* Cache the task's sched_group */
	1255	new_thread->sched_group = parent_task->sched_group;
	1256	#endif /* defined(CONFIG_SCHED_MULTIQ) */
	1257
	1258	/* Cache the task's map */
	1259	new_thread->map = parent_task->map;
	1260
	1261	timer_call_setup(&new_thread->wait_timer, thread_timer_expire, new_thread);
	1262	timer_call_setup(&new_thread->depress_timer, thread_depress_expire, new_thread);
	1263
	1264	#if KPC
	1265	kpc_thread_create(new_thread);
	1266	#endif
	1267
	1268	/* Set the thread's scheduling parameters */
	1269	new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task);
	1270	new_thread->max_priority = parent_task->max_priority;
	1271	new_thread->task_priority = parent_task->priority;
	1272
	1273	#if CONFIG_THREAD_GROUPS
	1274	thread_group_init_thread(new_thread, parent_task);
	1275	#endif /* CONFIG_THREAD_GROUPS */
	1276
	1277	int new_priority = (priority < 0) ? parent_task->priority: priority;
	1278	new_priority = (priority < 0)? parent_task->priority: priority;
	1279	if (new_priority > new_thread->max_priority) {
	1280	new_priority = new_thread->max_priority;
	1281	}
	1282	#if !defined(XNU_TARGET_OS_OSX)
	1283	if (new_priority < MAXPRI_THROTTLE) {
	1284	new_priority = MAXPRI_THROTTLE;
	1285	}
	1286	#endif /* !defined(XNU_TARGET_OS_OSX) */
	1287
	1288	new_thread->importance = new_priority - new_thread->task_priority;
	1289
	1290	sched_set_thread_base_priority(new_thread, new_priority);
	1291
	1292	#if defined(CONFIG_SCHED_TIMESHARE_CORE)
	1293	new_thread->sched_stamp = sched_tick;
	1294	#if CONFIG_SCHED_CLUTCH
	1295	new_thread->pri_shift = sched_clutch_thread_pri_shift(new_thread, new_thread->th_sched_bucket);
	1296	#else /* CONFIG_SCHED_CLUTCH */
	1297	new_thread->pri_shift = sched_pri_shifts[new_thread->th_sched_bucket];
	1298	#endif /* CONFIG_SCHED_CLUTCH */
	1299	#endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) */
	1300
	1301	if (parent_task->max_priority <= MAXPRI_THROTTLE) {
	1302	sched_thread_mode_demote(new_thread, TH_SFLAG_THROTTLED);
	1303	}
	1304
	1305	thread_policy_create(new_thread);
	1306
	1307	/* Chain the thread onto the task's list */
	1308	queue_enter(&parent_task->threads, new_thread, thread_t, task_threads);
	1309	parent_task->thread_count++;
	1310
	1311	/* So terminating threads don't need to take the task lock to decrement */
	1312	os_atomic_inc(&parent_task->active_thread_count, relaxed);
	1313
	1314	queue_enter(&threads, new_thread, thread_t, threads);
	1315	threads_count++;
	1316
	1317	new_thread->active = TRUE;
	1318	if (task_is_a_corpse_fork(parent_task)) {
	1319	/* Set the inspection bit if the task is a corpse fork */
	1320	new_thread->inspection = TRUE;
	1321	} else {
	1322	new_thread->inspection = FALSE;
	1323	}
	1324	new_thread->corpse_dup = FALSE;
	1325	new_thread->turnstile = turnstile_alloc();
	1326
	1327
	1328	*out_thread = new_thread;
	1329
	1330	if (kdebug_enable) {
	1331	long args[4] = {};
	1332
	1333	kdbg_trace_data(parent_task->bsd_info, &args[1], &args[3]);
	1334
	1335	/*
	1336	* Starting with 26604425, exec'ing creates a new task/thread.
	1337	*
	1338	* NEWTHREAD in the current process has two possible meanings:
	1339	*
	1340	* 1) Create a new thread for this process.
	1341	* 2) Create a new thread for the future process this will become in an
	1342	* exec.
	1343	*
	1344	* To disambiguate these, arg3 will be set to TRUE for case #2.
	1345	*
	1346	* The value we need to find (TPF_EXEC_COPY) is stable in the case of a
	1347	* task exec'ing. The read of t_procflags does not take the proc_lock.
	1348	*/
	1349	args[2] = task_is_exec_copy(parent_task) ? 1 : 0;
	1350
	1351	KDBG_RELEASE(TRACE_DATA_NEWTHREAD, (uintptr_t)thread_tid(new_thread),
	1352	args[1], args[2], args[3]);
	1353
	1354	kdbg_trace_string(parent_task->bsd_info, &args[0], &args[1],
	1355	&args[2], &args[3]);
	1356	KDBG_RELEASE(TRACE_STRING_NEWTHREAD, args[0], args[1], args[2],
	1357	args[3]);
	1358	}
	1359
	1360	DTRACE_PROC1(lwp__create, thread_t, *out_thread);
	1361
	1362	return KERN_SUCCESS;
	1363	}
	1364
	1365	static kern_return_t
	1366	thread_create_internal2(
	1367	task_t task,
	1368	thread_t *new_thread,
	1369	boolean_t from_user,
	1370	thread_continue_t continuation)
	1371	{
	1372	kern_return_t result;
	1373	thread_t thread;
	1374
	1375	if (task == TASK_NULL \|\| task == kernel_task) {
	1376	return KERN_INVALID_ARGUMENT;
	1377	}
	1378
	1379	#if CONFIG_MACF
	1380	if (from_user && current_task() != task &&
	1381	mac_proc_check_remote_thread_create(task, -1, NULL, 0) != 0) {
	1382	return KERN_DENIED;
	1383	}
	1384	#endif
	1385
	1386	result = thread_create_internal(task, -1, continuation, NULL, TH_OPTION_NONE, &thread);
	1387	if (result != KERN_SUCCESS) {
	1388	return result;
	1389	}
	1390
	1391	thread->user_stop_count = 1;
	1392	thread_hold(thread);
	1393	if (task->suspend_count > 0) {
	1394	thread_hold(thread);
	1395	}
	1396
	1397	if (from_user) {
	1398	extmod_statistics_incr_thread_create(task);
	1399	}
	1400
	1401	task_unlock(task);
	1402	lck_mtx_unlock(&tasks_threads_lock);
	1403
	1404	*new_thread = thread;
	1405
	1406	return KERN_SUCCESS;
	1407	}
	1408
	1409	/* No prototype, since task_server.h has the _from_user version if KERNEL_SERVER */
	1410	kern_return_t
	1411	thread_create(
	1412	task_t task,
	1413	thread_t *new_thread);
	1414
	1415	kern_return_t
	1416	thread_create(
	1417	task_t task,
	1418	thread_t *new_thread)
	1419	{
	1420	return thread_create_internal2(task, new_thread, FALSE, (thread_continue_t)thread_bootstrap_return);
	1421	}
	1422
	1423	kern_return_t
	1424	thread_create_from_user(
	1425	task_t task,
	1426	thread_t *new_thread)
	1427	{
	1428	return thread_create_internal2(task, new_thread, TRUE, (thread_continue_t)thread_bootstrap_return);
	1429	}
	1430
	1431	kern_return_t
	1432	thread_create_with_continuation(
	1433	task_t task,
	1434	thread_t *new_thread,
	1435	thread_continue_t continuation)
	1436	{
	1437	return thread_create_internal2(task, new_thread, FALSE, continuation);
	1438	}
	1439
	1440	/*
	1441	* Create a thread that is already started, but is waiting on an event
	1442	*/
	1443	static kern_return_t
	1444	thread_create_waiting_internal(
	1445	task_t task,
	1446	thread_continue_t continuation,
	1447	event_t event,
	1448	block_hint_t block_hint,
	1449	int options,
	1450	thread_t *new_thread)
	1451	{
	1452	kern_return_t result;
	1453	thread_t thread;
	1454
	1455	if (task == TASK_NULL \|\| task == kernel_task) {
	1456	return KERN_INVALID_ARGUMENT;
	1457	}
	1458
	1459	result = thread_create_internal(task, -1, continuation, NULL,
	1460	options, &thread);
	1461	if (result != KERN_SUCCESS) {
	1462	return result;
	1463	}
	1464
	1465	/* note no user_stop_count or thread_hold here */
	1466
	1467	if (task->suspend_count > 0) {
	1468	thread_hold(thread);
	1469	}
	1470
	1471	thread_mtx_lock(thread);
	1472	thread_set_pending_block_hint(thread, block_hint);
	1473	if (options & TH_OPTION_WORKQ) {
	1474	thread->static_param = true;
	1475	event = workq_thread_init_and_wq_lock(task, thread);
	1476	}
	1477	thread_start_in_assert_wait(thread, event, THREAD_INTERRUPTIBLE);
	1478	thread_mtx_unlock(thread);
	1479
	1480	task_unlock(task);
	1481	lck_mtx_unlock(&tasks_threads_lock);
	1482
	1483	*new_thread = thread;
	1484
	1485	return KERN_SUCCESS;
	1486	}
	1487
	1488	kern_return_t
	1489	thread_create_waiting(
	1490	task_t task,
	1491	thread_continue_t continuation,
	1492	event_t event,
	1493	thread_t *new_thread)
	1494	{
	1495	return thread_create_waiting_internal(task, continuation, event,
	1496	kThreadWaitNone, TH_OPTION_NONE, new_thread);
	1497	}
	1498
	1499
	1500	static kern_return_t
	1501	thread_create_running_internal2(
	1502	task_t task,
	1503	int flavor,
	1504	thread_state_t new_state,
	1505	mach_msg_type_number_t new_state_count,
	1506	thread_t *new_thread,
	1507	boolean_t from_user)
	1508	{
	1509	kern_return_t result;
	1510	thread_t thread;
	1511
	1512	if (task == TASK_NULL \|\| task == kernel_task) {
	1513	return KERN_INVALID_ARGUMENT;
	1514	}
	1515
	1516	#if CONFIG_MACF
	1517	if (from_user && current_task() != task &&
	1518	mac_proc_check_remote_thread_create(task, flavor, new_state, new_state_count) != 0) {
	1519	return KERN_DENIED;
	1520	}
	1521	#endif
	1522
	1523	result = thread_create_internal(task, -1,
	1524	(thread_continue_t)thread_bootstrap_return, NULL,
	1525	TH_OPTION_NONE, &thread);
	1526	if (result != KERN_SUCCESS) {
	1527	return result;
	1528	}
	1529
	1530	if (task->suspend_count > 0) {
	1531	thread_hold(thread);
	1532	}
	1533
	1534	if (from_user) {
	1535	result = machine_thread_state_convert_from_user(thread, flavor,
	1536	new_state, new_state_count);
	1537	}
	1538	if (result == KERN_SUCCESS) {
	1539	result = machine_thread_set_state(thread, flavor, new_state,
	1540	new_state_count);
	1541	}
	1542	if (result != KERN_SUCCESS) {
	1543	task_unlock(task);
	1544	lck_mtx_unlock(&tasks_threads_lock);
	1545
	1546	thread_terminate(thread);
	1547	thread_deallocate(thread);
	1548	return result;
	1549	}
	1550
	1551	thread_mtx_lock(thread);
	1552	thread_start(thread);
	1553	thread_mtx_unlock(thread);
	1554
	1555	if (from_user) {
	1556	extmod_statistics_incr_thread_create(task);
	1557	}
	1558
	1559	task_unlock(task);
	1560	lck_mtx_unlock(&tasks_threads_lock);
	1561
	1562	*new_thread = thread;
	1563
	1564	return result;
	1565	}
	1566
	1567	/* Prototype, see justification above */
	1568	kern_return_t
	1569	thread_create_running(
	1570	task_t task,
	1571	int flavor,
	1572	thread_state_t new_state,
	1573	mach_msg_type_number_t new_state_count,
	1574	thread_t *new_thread);
	1575
	1576	kern_return_t
	1577	thread_create_running(
	1578	task_t task,
	1579	int flavor,
	1580	thread_state_t new_state,
	1581	mach_msg_type_number_t new_state_count,
	1582	thread_t *new_thread)
	1583	{
	1584	return thread_create_running_internal2(
	1585	task, flavor, new_state, new_state_count,
	1586	new_thread, FALSE);
	1587	}
	1588
	1589	kern_return_t
	1590	thread_create_running_from_user(
	1591	task_t task,
	1592	int flavor,
	1593	thread_state_t new_state,
	1594	mach_msg_type_number_t new_state_count,
	1595	thread_t *new_thread)
	1596	{
	1597	return thread_create_running_internal2(
	1598	task, flavor, new_state, new_state_count,
	1599	new_thread, TRUE);
	1600	}
	1601
	1602	kern_return_t
	1603	thread_create_workq_waiting(
	1604	task_t task,
	1605	thread_continue_t continuation,
	1606	thread_t *new_thread)
	1607	{
	1608	int options = TH_OPTION_NOCRED \| TH_OPTION_NOSUSP \| TH_OPTION_WORKQ;
	1609	return thread_create_waiting_internal(task, continuation, NULL,
	1610	kThreadWaitParkedWorkQueue, options, new_thread);
	1611	}
	1612
	1613	/*
	1614	* kernel_thread_create:
	1615	*
	1616	* Create a thread in the kernel task
	1617	* to execute in kernel context.
	1618	*/
	1619	kern_return_t
	1620	kernel_thread_create(
	1621	thread_continue_t continuation,
	1622	void *parameter,
	1623	integer_t priority,
	1624	thread_t *new_thread)
	1625	{
	1626	kern_return_t result;
	1627	thread_t thread;
	1628	task_t task = kernel_task;
	1629
	1630	result = thread_create_internal(task, priority, continuation, parameter,
	1631	TH_OPTION_NOCRED \| TH_OPTION_NONE, &thread);
	1632	if (result != KERN_SUCCESS) {
	1633	return result;
	1634	}
	1635
	1636	task_unlock(task);
	1637	lck_mtx_unlock(&tasks_threads_lock);
	1638
	1639	stack_alloc(thread);
	1640	assert(thread->kernel_stack != 0);
	1641	#if !defined(XNU_TARGET_OS_OSX)
	1642	if (priority > BASEPRI_KERNEL)
	1643	#endif
	1644	thread->reserved_stack = thread->kernel_stack;
	1645
	1646	if (debug_task & 1) {
	1647	kprintf("kernel_thread_create: thread = %p continuation = %p\n", thread, continuation);
	1648	}
	1649	*new_thread = thread;
	1650
	1651	return result;
	1652	}
	1653
	1654	kern_return_t
	1655	kernel_thread_start_priority(
	1656	thread_continue_t continuation,
	1657	void *parameter,
	1658	integer_t priority,
	1659	thread_t *new_thread)
	1660	{
	1661	kern_return_t result;
	1662	thread_t thread;
	1663
	1664	result = kernel_thread_create(continuation, parameter, priority, &thread);
	1665	if (result != KERN_SUCCESS) {
	1666	return result;
	1667	}
	1668
	1669	*new_thread = thread;
	1670
	1671	thread_mtx_lock(thread);
	1672	thread_start(thread);
	1673	thread_mtx_unlock(thread);
	1674
	1675	return result;
	1676	}
	1677
	1678	kern_return_t
	1679	kernel_thread_start(
	1680	thread_continue_t continuation,
	1681	void *parameter,
	1682	thread_t *new_thread)
	1683	{
	1684	return kernel_thread_start_priority(continuation, parameter, -1, new_thread);
	1685	}
	1686
	1687	/* Separated into helper function so it can be used by THREAD_BASIC_INFO and THREAD_EXTENDED_INFO */
	1688	/* it is assumed that the thread is locked by the caller */
	1689	static void
	1690	retrieve_thread_basic_info(thread_t thread, thread_basic_info_t basic_info)
	1691	{
	1692	int state, flags;
	1693
	1694	/* fill in info */
	1695
	1696	thread_read_times(thread, &basic_info->user_time,
	1697	&basic_info->system_time, NULL);
	1698
	1699	/*
	1700	* Update lazy-evaluated scheduler info because someone wants it.
	1701	*/
	1702	if (SCHED(can_update_priority)(thread)) {
	1703	SCHED(update_priority)(thread);
	1704	}
	1705
	1706	basic_info->sleep_time = 0;
	1707
	1708	/*
	1709	* To calculate cpu_usage, first correct for timer rate,
	1710	* then for 5/8 ageing. The correction factor [3/5] is
	1711	* (1/(5/8) - 1).
	1712	*/
	1713	basic_info->cpu_usage = 0;
	1714	#if defined(CONFIG_SCHED_TIMESHARE_CORE)
	1715	if (sched_tick_interval) {
	1716	basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage
	1717	* TH_USAGE_SCALE) / sched_tick_interval);
	1718	basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
	1719	}
	1720	#endif
	1721
	1722	if (basic_info->cpu_usage > TH_USAGE_SCALE) {
	1723	basic_info->cpu_usage = TH_USAGE_SCALE;
	1724	}
	1725
	1726	basic_info->policy = ((thread->sched_mode == TH_MODE_TIMESHARE)?
	1727	POLICY_TIMESHARE: POLICY_RR);
	1728
	1729	flags = 0;
	1730	if (thread->options & TH_OPT_IDLE_THREAD) {
	1731	flags \|= TH_FLAGS_IDLE;
	1732	}
	1733
	1734	if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
	1735	flags \|= TH_FLAGS_GLOBAL_FORCED_IDLE;
	1736	}
	1737
	1738	if (!thread->kernel_stack) {
	1739	flags \|= TH_FLAGS_SWAPPED;
	1740	}
	1741
	1742	state = 0;
	1743	if (thread->state & TH_TERMINATE) {
	1744	state = TH_STATE_HALTED;
	1745	} else if (thread->state & TH_RUN) {
	1746	state = TH_STATE_RUNNING;
	1747	} else if (thread->state & TH_UNINT) {
	1748	state = TH_STATE_UNINTERRUPTIBLE;
	1749	} else if (thread->state & TH_SUSP) {
	1750	state = TH_STATE_STOPPED;
	1751	} else if (thread->state & TH_WAIT) {
	1752	state = TH_STATE_WAITING;
	1753	}
	1754
	1755	basic_info->run_state = state;
	1756	basic_info->flags = flags;
	1757
	1758	basic_info->suspend_count = thread->user_stop_count;
	1759
	1760	return;
	1761	}
	1762
	1763	kern_return_t
	1764	thread_info_internal(
	1765	thread_t thread,
	1766	thread_flavor_t flavor,
	1767	thread_info_t thread_info_out, /* ptr to OUT array */
	1768	mach_msg_type_number_t thread_info_count) /IN/OUT*/
	1769	{
	1770	spl_t s;
	1771
	1772	if (thread == THREAD_NULL) {
	1773	return KERN_INVALID_ARGUMENT;
	1774	}
	1775
	1776	if (flavor == THREAD_BASIC_INFO) {
	1777	if (*thread_info_count < THREAD_BASIC_INFO_COUNT) {
	1778	return KERN_INVALID_ARGUMENT;
	1779	}
	1780
	1781	s = splsched();
	1782	thread_lock(thread);
	1783
	1784	retrieve_thread_basic_info(thread, (thread_basic_info_t) thread_info_out);
	1785
	1786	thread_unlock(thread);
	1787	splx(s);
	1788
	1789	*thread_info_count = THREAD_BASIC_INFO_COUNT;
	1790
	1791	return KERN_SUCCESS;
	1792	} else if (flavor == THREAD_IDENTIFIER_INFO) {
	1793	thread_identifier_info_t identifier_info;
	1794
	1795	if (*thread_info_count < THREAD_IDENTIFIER_INFO_COUNT) {
	1796	return KERN_INVALID_ARGUMENT;
	1797	}
	1798
	1799	identifier_info = __IGNORE_WCASTALIGN((thread_identifier_info_t)thread_info_out);
	1800
	1801	s = splsched();
	1802	thread_lock(thread);
	1803
	1804	identifier_info->thread_id = thread->thread_id;
	1805	identifier_info->thread_handle = thread->machine.cthread_self;
	1806	identifier_info->dispatch_qaddr = thread_dispatchqaddr(thread);
	1807
	1808	thread_unlock(thread);
	1809	splx(s);
	1810	return KERN_SUCCESS;
	1811	} else if (flavor == THREAD_SCHED_TIMESHARE_INFO) {
	1812	policy_timeshare_info_t ts_info;
	1813
	1814	if (*thread_info_count < POLICY_TIMESHARE_INFO_COUNT) {
	1815	return KERN_INVALID_ARGUMENT;
	1816	}
	1817
	1818	ts_info = (policy_timeshare_info_t)thread_info_out;
	1819
	1820	s = splsched();
	1821	thread_lock(thread);
	1822
	1823	if (thread->sched_mode != TH_MODE_TIMESHARE) {
	1824	thread_unlock(thread);
	1825	splx(s);
	1826	return KERN_INVALID_POLICY;
	1827	}
	1828
	1829	ts_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
	1830	if (ts_info->depressed) {
	1831	ts_info->base_priority = DEPRESSPRI;
	1832	ts_info->depress_priority = thread->base_pri;
	1833	} else {
	1834	ts_info->base_priority = thread->base_pri;
	1835	ts_info->depress_priority = -1;
	1836	}
	1837
	1838	ts_info->cur_priority = thread->sched_pri;
	1839	ts_info->max_priority = thread->max_priority;
	1840
	1841	thread_unlock(thread);
	1842	splx(s);
	1843
	1844	*thread_info_count = POLICY_TIMESHARE_INFO_COUNT;
	1845
	1846	return KERN_SUCCESS;
	1847	} else if (flavor == THREAD_SCHED_FIFO_INFO) {
	1848	if (*thread_info_count < POLICY_FIFO_INFO_COUNT) {
	1849	return KERN_INVALID_ARGUMENT;
	1850	}
	1851
	1852	return KERN_INVALID_POLICY;
	1853	} else if (flavor == THREAD_SCHED_RR_INFO) {
	1854	policy_rr_info_t rr_info;
	1855	uint32_t quantum_time;
	1856	uint64_t quantum_ns;
	1857
	1858	if (*thread_info_count < POLICY_RR_INFO_COUNT) {
	1859	return KERN_INVALID_ARGUMENT;
	1860	}
	1861
	1862	rr_info = (policy_rr_info_t) thread_info_out;
	1863
	1864	s = splsched();
	1865	thread_lock(thread);
	1866
	1867	if (thread->sched_mode == TH_MODE_TIMESHARE) {
	1868	thread_unlock(thread);
	1869	splx(s);
	1870
	1871	return KERN_INVALID_POLICY;
	1872	}
	1873
	1874	rr_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
	1875	if (rr_info->depressed) {
	1876	rr_info->base_priority = DEPRESSPRI;
	1877	rr_info->depress_priority = thread->base_pri;
	1878	} else {
	1879	rr_info->base_priority = thread->base_pri;
	1880	rr_info->depress_priority = -1;
	1881	}
	1882
	1883	quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
	1884	absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
	1885
	1886	rr_info->max_priority = thread->max_priority;
	1887	rr_info->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
	1888
	1889	thread_unlock(thread);
	1890	splx(s);
	1891
	1892	*thread_info_count = POLICY_RR_INFO_COUNT;
	1893
	1894	return KERN_SUCCESS;
	1895	} else if (flavor == THREAD_EXTENDED_INFO) {
	1896	thread_basic_info_data_t basic_info;
	1897	thread_extended_info_t extended_info = __IGNORE_WCASTALIGN((thread_extended_info_t)thread_info_out);
	1898
	1899	if (*thread_info_count < THREAD_EXTENDED_INFO_COUNT) {
	1900	return KERN_INVALID_ARGUMENT;
	1901	}
	1902
	1903	s = splsched();
	1904	thread_lock(thread);
	1905
	1906	/* NOTE: This mimics fill_taskthreadinfo(), which is the function used by proc_pidinfo() for
	1907	* the PROC_PIDTHREADINFO flavor (which can't be used on corpses)
	1908	*/
	1909	retrieve_thread_basic_info(thread, &basic_info);
	1910	extended_info->pth_user_time = (((uint64_t)basic_info.user_time.seconds * NSEC_PER_SEC) + ((uint64_t)basic_info.user_time.microseconds * NSEC_PER_USEC));
	1911	extended_info->pth_system_time = (((uint64_t)basic_info.system_time.seconds * NSEC_PER_SEC) + ((uint64_t)basic_info.system_time.microseconds * NSEC_PER_USEC));
	1912
	1913	extended_info->pth_cpu_usage = basic_info.cpu_usage;
	1914	extended_info->pth_policy = basic_info.policy;
	1915	extended_info->pth_run_state = basic_info.run_state;
	1916	extended_info->pth_flags = basic_info.flags;
	1917	extended_info->pth_sleep_time = basic_info.sleep_time;
	1918	extended_info->pth_curpri = thread->sched_pri;
	1919	extended_info->pth_priority = thread->base_pri;
	1920	extended_info->pth_maxpriority = thread->max_priority;
	1921
	1922	bsd_getthreadname(thread->uthread, extended_info->pth_name);
	1923
	1924	thread_unlock(thread);
	1925	splx(s);
	1926
	1927	*thread_info_count = THREAD_EXTENDED_INFO_COUNT;
	1928
	1929	return KERN_SUCCESS;
	1930	} else if (flavor == THREAD_DEBUG_INFO_INTERNAL) {
	1931	#if DEVELOPMENT \|\| DEBUG
	1932	thread_debug_info_internal_t dbg_info;
	1933	if (*thread_info_count < THREAD_DEBUG_INFO_INTERNAL_COUNT) {
	1934	return KERN_NOT_SUPPORTED;
	1935	}
	1936
	1937	if (thread_info_out == NULL) {
	1938	return KERN_INVALID_ARGUMENT;
	1939	}
	1940
	1941	dbg_info = __IGNORE_WCASTALIGN((thread_debug_info_internal_t)thread_info_out);
	1942	dbg_info->page_creation_count = thread->t_page_creation_count;
	1943
	1944	*thread_info_count = THREAD_DEBUG_INFO_INTERNAL_COUNT;
	1945	return KERN_SUCCESS;
	1946	#endif /* DEVELOPMENT \|\| DEBUG */
	1947	return KERN_NOT_SUPPORTED;
	1948	}
	1949
	1950	return KERN_INVALID_ARGUMENT;
	1951	}
	1952
	1953	void
	1954	thread_read_times(
	1955	thread_t thread,
	1956	time_value_t *user_time,
	1957	time_value_t *system_time,
	1958	time_value_t *runnable_time)
	1959	{
	1960	clock_sec_t secs;
	1961	clock_usec_t usecs;
	1962	uint64_t tval_user, tval_system;
	1963
	1964	tval_user = timer_grab(&thread->user_timer);
	1965	tval_system = timer_grab(&thread->system_timer);
	1966
	1967	if (thread->precise_user_kernel_time) {
	1968	absolutetime_to_microtime(tval_user, &secs, &usecs);
	1969	user_time->seconds = (typeof(user_time->seconds))secs;
	1970	user_time->microseconds = usecs;
	1971
	1972	absolutetime_to_microtime(tval_system, &secs, &usecs);
	1973	system_time->seconds = (typeof(system_time->seconds))secs;
	1974	system_time->microseconds = usecs;
	1975	} else {
	1976	/* system_timer may represent either sys or user */
	1977	tval_user += tval_system;
	1978	absolutetime_to_microtime(tval_user, &secs, &usecs);
	1979	user_time->seconds = (typeof(user_time->seconds))secs;
	1980	user_time->microseconds = usecs;
	1981
	1982	system_time->seconds = 0;
	1983	system_time->microseconds = 0;
	1984	}
	1985
	1986	if (runnable_time) {
	1987	uint64_t tval_runnable = timer_grab(&thread->runnable_timer);
	1988	absolutetime_to_microtime(tval_runnable, &secs, &usecs);
	1989	runnable_time->seconds = (typeof(runnable_time->seconds))secs;
	1990	runnable_time->microseconds = usecs;
	1991	}
	1992	}
	1993
	1994	uint64_t
	1995	thread_get_runtime_self(void)
	1996	{
	1997	boolean_t interrupt_state;
	1998	uint64_t runtime;
	1999	thread_t thread = NULL;
	2000	processor_t processor = NULL;
	2001
	2002	thread = current_thread();
	2003
	2004	/* Not interrupt safe, as the scheduler may otherwise update timer values underneath us */
	2005	interrupt_state = ml_set_interrupts_enabled(FALSE);
	2006	processor = current_processor();
	2007	timer_update(processor->thread_timer, mach_absolute_time());
	2008	runtime = (timer_grab(&thread->user_timer) + timer_grab(&thread->system_timer));
	2009	ml_set_interrupts_enabled(interrupt_state);
	2010
	2011	return runtime;
	2012	}
	2013
	2014	kern_return_t
	2015	thread_assign(
	2016	__unused thread_t thread,
	2017	__unused processor_set_t new_pset)
	2018	{
	2019	return KERN_FAILURE;
	2020	}
	2021
	2022	/*
	2023	* thread_assign_default:
	2024	*
	2025	* Special version of thread_assign for assigning threads to default
	2026	* processor set.
	2027	*/
	2028	kern_return_t
	2029	thread_assign_default(
	2030	thread_t thread)
	2031	{
	2032	return thread_assign(thread, &pset0);
	2033	}
	2034
	2035	/*
	2036	* thread_get_assignment
	2037	*
	2038	* Return current assignment for this thread.
	2039	*/
	2040	kern_return_t
	2041	thread_get_assignment(
	2042	thread_t thread,
	2043	processor_set_t *pset)
	2044	{
	2045	if (thread == NULL) {
	2046	return KERN_INVALID_ARGUMENT;
	2047	}
	2048
	2049	*pset = &pset0;
	2050
	2051	return KERN_SUCCESS;
	2052	}
	2053
	2054	/*
	2055	* thread_wire_internal:
	2056	*
	2057	* Specify that the target thread must always be able
	2058	* to run and to allocate memory.
	2059	*/
	2060	kern_return_t
	2061	thread_wire_internal(
	2062	host_priv_t host_priv,
	2063	thread_t thread,
	2064	boolean_t wired,
	2065	boolean_t *prev_state)
	2066	{
	2067	if (host_priv == NULL \|\| thread != current_thread()) {
	2068	return KERN_INVALID_ARGUMENT;
	2069	}
	2070
	2071	assert(host_priv == &realhost);
	2072
	2073	if (prev_state) {
	2074	*prev_state = (thread->options & TH_OPT_VMPRIV) != 0;
	2075	}
	2076
	2077	if (wired) {
	2078	if (!(thread->options & TH_OPT_VMPRIV)) {
	2079	vm_page_free_reserve(1); /* XXX */
	2080	}
	2081	thread->options \|= TH_OPT_VMPRIV;
	2082	} else {
	2083	if (thread->options & TH_OPT_VMPRIV) {
	2084	vm_page_free_reserve(-1); /* XXX */
	2085	}
	2086	thread->options &= ~TH_OPT_VMPRIV;
	2087	}
	2088
	2089	return KERN_SUCCESS;
	2090	}
	2091
	2092
	2093	/*
	2094	* thread_wire:
	2095	*
	2096	* User-api wrapper for thread_wire_internal()
	2097	*/
	2098	kern_return_t
	2099	thread_wire(
	2100	host_priv_t host_priv,
	2101	thread_t thread,
	2102	boolean_t wired)
	2103	{
	2104	return thread_wire_internal(host_priv, thread, wired, NULL);
	2105	}
	2106
	2107
	2108	boolean_t
	2109	is_vm_privileged(void)
	2110	{
	2111	return current_thread()->options & TH_OPT_VMPRIV ? TRUE : FALSE;
	2112	}
	2113
	2114	boolean_t
	2115	set_vm_privilege(boolean_t privileged)
	2116	{
	2117	boolean_t was_vmpriv;
	2118
	2119	if (current_thread()->options & TH_OPT_VMPRIV) {
	2120	was_vmpriv = TRUE;
	2121	} else {
	2122	was_vmpriv = FALSE;
	2123	}
	2124
	2125	if (privileged != FALSE) {
	2126	current_thread()->options \|= TH_OPT_VMPRIV;
	2127	} else {
	2128	current_thread()->options &= ~TH_OPT_VMPRIV;
	2129	}
	2130
	2131	return was_vmpriv;
	2132	}
	2133
	2134	void
	2135	set_thread_rwlock_boost(void)
	2136	{
	2137	current_thread()->rwlock_count++;
	2138	}
	2139
	2140	void
	2141	clear_thread_rwlock_boost(void)
	2142	{
	2143	thread_t thread = current_thread();
	2144
	2145	if ((thread->rwlock_count-- == 1) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
	2146	lck_rw_clear_promotion(thread, 0);
	2147	}
	2148	}
	2149
	2150	/*
	2151	* XXX assuming current thread only, for now...
	2152	*/
	2153	void
	2154	thread_guard_violation(thread_t thread,
	2155	mach_exception_data_type_t code, mach_exception_data_type_t subcode, boolean_t fatal)
	2156	{
	2157	assert(thread == current_thread());
	2158
	2159	/* Don't set up the AST for kernel threads; this check is needed to ensure
	2160	* that the guard_exc_* fields in the thread structure are set only by the
	2161	* current thread and therefore, don't require a lock.
	2162	*/
	2163	if (thread->task == kernel_task) {
	2164	return;
	2165	}
	2166
	2167	assert(EXC_GUARD_DECODE_GUARD_TYPE(code));
	2168
	2169	/*
	2170	* Use the saved state area of the thread structure
	2171	* to store all info required to handle the AST when
	2172	* returning to userspace. It's possible that there is
	2173	* already a pending guard exception. If it's non-fatal,
	2174	* it can only be over-written by a fatal exception code.
	2175	*/
	2176	if (thread->guard_exc_info.code && (thread->guard_exc_fatal \|\| !fatal)) {
	2177	return;
	2178	}
	2179
	2180	thread->guard_exc_info.code = code;
	2181	thread->guard_exc_info.subcode = subcode;
	2182	thread->guard_exc_fatal = fatal ? 1 : 0;
	2183
	2184	spl_t s = splsched();
	2185	thread_ast_set(thread, AST_GUARD);
	2186	ast_propagate(thread);
	2187	splx(s);
	2188	}
	2189
	2190	/*
	2191	* guard_ast:
	2192	*
	2193	* Handle AST_GUARD for a thread. This routine looks at the
	2194	* state saved in the thread structure to determine the cause
	2195	* of this exception. Based on this value, it invokes the
	2196	* appropriate routine which determines other exception related
	2197	* info and raises the exception.
	2198	*/
	2199	void
	2200	guard_ast(thread_t t)
	2201	{
	2202	const mach_exception_data_type_t
	2203	code = t->guard_exc_info.code,
	2204	subcode = t->guard_exc_info.subcode;
	2205
	2206	t->guard_exc_info.code = 0;
	2207	t->guard_exc_info.subcode = 0;
	2208	t->guard_exc_fatal = 0;
	2209
	2210	switch (EXC_GUARD_DECODE_GUARD_TYPE(code)) {
	2211	case GUARD_TYPE_NONE:
	2212	/* lingering AST_GUARD on the processor? */
	2213	break;
	2214	case GUARD_TYPE_MACH_PORT:
	2215	mach_port_guard_ast(t, code, subcode);
	2216	break;
	2217	case GUARD_TYPE_FD:
	2218	fd_guard_ast(t, code, subcode);
	2219	break;
	2220	#if CONFIG_VNGUARD
	2221	case GUARD_TYPE_VN:
	2222	vn_guard_ast(t, code, subcode);
	2223	break;
	2224	#endif
	2225	case GUARD_TYPE_VIRT_MEMORY:
	2226	virt_memory_guard_ast(t, code, subcode);
	2227	break;
	2228	default:
	2229	panic("guard_exc_info %llx %llx", code, subcode);
	2230	}
	2231	}
	2232
	2233	static void
	2234	thread_cputime_callback(int warning, __unused const void arg0, __unused const void arg1)
	2235	{
	2236	if (warning == LEDGER_WARNING_ROSE_ABOVE) {
	2237	#if CONFIG_TELEMETRY
	2238	/*
	2239	* This thread is in danger of violating the CPU usage monitor. Enable telemetry
	2240	* on the entire task so there are micro-stackshots available if and when
	2241	* EXC_RESOURCE is triggered. We could have chosen to enable micro-stackshots
	2242	* for this thread only; but now that this task is suspect, knowing what all of
	2243	* its threads are up to will be useful.
	2244	*/
	2245	telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 1);
	2246	#endif
	2247	return;
	2248	}
	2249
	2250	#if CONFIG_TELEMETRY
	2251	/*
	2252	* If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
	2253	* exceeded the limit, turn telemetry off for the task.
	2254	*/
	2255	telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 0);
	2256	#endif
	2257
	2258	if (warning == 0) {
	2259	SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU();
	2260	}
	2261	}
	2262
	2263	void __attribute__((noinline))
	2264	SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void)
	2265	{
	2266	int pid = 0;
	2267	task_t task = current_task();
	2268	thread_t thread = current_thread();
	2269	uint64_t tid = thread->thread_id;
	2270	const char *procname = "unknown";
	2271	time_value_t thread_total_time = {0, 0};
	2272	time_value_t thread_system_time;
	2273	time_value_t thread_user_time;
	2274	int action;
	2275	uint8_t percentage;
	2276	uint32_t usage_percent = 0;
	2277	uint32_t interval_sec;
	2278	uint64_t interval_ns;
	2279	uint64_t balance_ns;
	2280	boolean_t fatal = FALSE;
	2281	boolean_t send_exc_resource = TRUE; /* in addition to RESOURCE_NOTIFY */
	2282	kern_return_t kr;
	2283
	2284	#ifdef EXC_RESOURCE_MONITORS
	2285	mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
	2286	#endif /* EXC_RESOURCE_MONITORS */
	2287	struct ledger_entry_info lei;
	2288
	2289	assert(thread->t_threadledger != LEDGER_NULL);
	2290
	2291	/*
	2292	* Extract the fatal bit and suspend the monitor (which clears the bit).
	2293	*/
	2294	task_lock(task);
	2295	if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_CPUMON) {
	2296	fatal = TRUE;
	2297	send_exc_resource = TRUE;
	2298	}
	2299	/* Only one thread can be here at a time. Whichever makes it through
	2300	* first will successfully suspend the monitor and proceed to send the
	2301	* notification. Other threads will get an error trying to suspend the
	2302	* monitor and give up on sending the notification. In the first release,
	2303	* the monitor won't be resumed for a number of seconds, but we may
	2304	* eventually need to handle low-latency resume.
	2305	*/
	2306	kr = task_suspend_cpumon(task);
	2307	task_unlock(task);
	2308	if (kr == KERN_INVALID_ARGUMENT) {
	2309	return;
	2310	}
	2311
	2312	#ifdef MACH_BSD
	2313	pid = proc_selfpid();
	2314	if (task->bsd_info != NULL) {
	2315	procname = proc_name_address(task->bsd_info);
	2316	}
	2317	#endif
	2318
	2319	thread_get_cpulimit(&action, &percentage, &interval_ns);
	2320
	2321	interval_sec = (uint32_t)(interval_ns / NSEC_PER_SEC);
	2322
	2323	thread_read_times(thread, &thread_user_time, &thread_system_time, NULL);
	2324	time_value_add(&thread_total_time, &thread_user_time);
	2325	time_value_add(&thread_total_time, &thread_system_time);
	2326	ledger_get_entry_info(thread->t_threadledger, thread_ledgers.cpu_time, &lei);
	2327
	2328	/* credit/debit/balance/limit are in absolute time units;
	2329	* the refill info is in nanoseconds. */
	2330	absolutetime_to_nanoseconds(lei.lei_balance, &balance_ns);
	2331	if (lei.lei_last_refill > 0) {
	2332	usage_percent = (uint32_t)((balance_ns * 100ULL) / lei.lei_last_refill);
	2333	}
	2334
	2335	/* TODO: show task total runtime (via TASK_ABSOLUTETIME_INFO)? */
	2336	printf("process %s[%d] thread %llu caught burning CPU! It used more than %d%% CPU over %u seconds\n",
	2337	procname, pid, tid, percentage, interval_sec);
	2338	printf(" (actual recent usage: %d%% over ~%llu seconds)\n",
	2339	usage_percent, (lei.lei_last_refill + NSEC_PER_SEC / 2) / NSEC_PER_SEC);
	2340	printf(" Thread lifetime cpu usage %d.%06ds, (%d.%06d user, %d.%06d sys)\n",
	2341	thread_total_time.seconds, thread_total_time.microseconds,
	2342	thread_user_time.seconds, thread_user_time.microseconds,
	2343	thread_system_time.seconds, thread_system_time.microseconds);
	2344	printf(" Ledger balance: %lld; mabs credit: %lld; mabs debit: %lld\n",
	2345	lei.lei_balance, lei.lei_credit, lei.lei_debit);
	2346	printf(" mabs limit: %llu; mabs period: %llu ns; last refill: %llu ns%s.\n",
	2347	lei.lei_limit, lei.lei_refill_period, lei.lei_last_refill,
	2348	(fatal ? " [fatal violation]" : ""));
	2349
	2350	/*
	2351	* For now, send RESOURCE_NOTIFY in parallel with EXC_RESOURCE. Once
	2352	* we have logging parity, we will stop sending EXC_RESOURCE (24508922).
	2353	*/
	2354
	2355	/* RESOURCE_NOTIFY MIG specifies nanoseconds of CPU time */
	2356	lei.lei_balance = balance_ns;
	2357	absolutetime_to_nanoseconds(lei.lei_limit, &lei.lei_limit);
	2358	trace_resource_violation(RMON_CPUUSAGE_VIOLATED, &lei);
	2359	kr = send_resource_violation(send_cpu_usage_violation, task, &lei,
	2360	fatal ? kRNFatalLimitFlag : 0);
	2361	if (kr) {
	2362	printf("send_resource_violation(CPU usage, ...): error %#x\n", kr);
	2363	}
	2364
	2365	#ifdef EXC_RESOURCE_MONITORS
	2366	if (send_exc_resource) {
	2367	if (disable_exc_resource) {
	2368	printf("process %s[%d] thread %llu caught burning CPU! "
	2369	"EXC_RESOURCE%s supressed by a boot-arg\n",
	2370	procname, pid, tid, fatal ? " (and termination)" : "");
	2371	return;
	2372	}
	2373
	2374	if (audio_active) {
	2375	printf("process %s[%d] thread %llu caught burning CPU! "
	2376	"EXC_RESOURCE & termination supressed due to audio playback\n",
	2377	procname, pid, tid);
	2378	return;
	2379	}
	2380	}
	2381
	2382
	2383	if (send_exc_resource) {
	2384	code[0] = code[1] = 0;
	2385	EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_CPU);
	2386	if (fatal) {
	2387	EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR_FATAL);
	2388	} else {
	2389	EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR);
	2390	}
	2391	EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code[0], interval_sec);
	2392	EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[0], percentage);
	2393	EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[1], usage_percent);
	2394	exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
	2395	}
	2396	#endif /* EXC_RESOURCE_MONITORS */
	2397
	2398	if (fatal) {
	2399	#if CONFIG_JETSAM
	2400	jetsam_on_ledger_cpulimit_exceeded();
	2401	#else
	2402	task_terminate_internal(task);
	2403	#endif
	2404	}
	2405	}
	2406
	2407	#if DEVELOPMENT \|\| DEBUG
	2408	void __attribute__((noinline))
	2409	SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(task_t task, int thread_count)
	2410	{
	2411	mach_exception_data_type_t code[EXCEPTION_CODE_MAX] = {0};
	2412	int pid = task_pid(task);
	2413	char procname[MAXCOMLEN + 1] = "unknown";
	2414
	2415	if (pid == 1) {
	2416	/*
	2417	* Cannot suspend launchd
	2418	*/
	2419	return;
	2420	}
	2421
	2422	proc_name(pid, procname, sizeof(procname));
	2423
	2424	if (disable_exc_resource) {
	2425	printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
	2426	"supressed by a boot-arg. \n", procname, pid, thread_count);
	2427	return;
	2428	}
	2429
	2430	if (audio_active) {
	2431	printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
	2432	"supressed due to audio playback.\n", procname, pid, thread_count);
	2433	return;
	2434	}
	2435
	2436	if (exc_via_corpse_forking == 0) {
	2437	printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
	2438	"supressed due to corpse forking being disabled.\n", procname, pid,
	2439	thread_count);
	2440	return;
	2441	}
	2442
	2443	printf("process %s[%d] crossed thread count high watermark (%d), sending "
	2444	"EXC_RESOURCE\n", procname, pid, thread_count);
	2445
	2446	EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_THREADS);
	2447	EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_THREADS_HIGH_WATERMARK);
	2448	EXC_RESOURCE_THREADS_ENCODE_THREADS(code[0], thread_count);
	2449
	2450	task_enqueue_exception_with_corpse(task, EXC_RESOURCE, code, EXCEPTION_CODE_MAX, NULL);
	2451	}
	2452	#endif /* DEVELOPMENT \|\| DEBUG */
	2453
	2454	void
	2455	thread_update_io_stats(thread_t thread, int size, int io_flags)
	2456	{
	2457	int io_tier;
	2458
	2459	if (thread->thread_io_stats == NULL \|\| thread->task->task_io_stats == NULL) {
	2460	return;
	2461	}
	2462
	2463	if (io_flags & DKIO_READ) {
	2464	UPDATE_IO_STATS(thread->thread_io_stats->disk_reads, size);
	2465	UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->disk_reads, size);
	2466	}
	2467
	2468	if (io_flags & DKIO_META) {
	2469	UPDATE_IO_STATS(thread->thread_io_stats->metadata, size);
	2470	UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->metadata, size);
	2471	}
	2472
	2473	if (io_flags & DKIO_PAGING) {
	2474	UPDATE_IO_STATS(thread->thread_io_stats->paging, size);
	2475	UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->paging, size);
	2476	}
	2477
	2478	io_tier = ((io_flags & DKIO_TIER_MASK) >> DKIO_TIER_SHIFT);
	2479	assert(io_tier < IO_NUM_PRIORITIES);
	2480
	2481	UPDATE_IO_STATS(thread->thread_io_stats->io_priority[io_tier], size);
	2482	UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->io_priority[io_tier], size);
	2483
	2484	/* Update Total I/O Counts */
	2485	UPDATE_IO_STATS(thread->thread_io_stats->total_io, size);
	2486	UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->total_io, size);
	2487
	2488	if (!(io_flags & DKIO_READ)) {
	2489	DTRACE_IO3(physical_writes, struct task *, thread->task, uint32_t, size, int, io_flags);
	2490	ledger_credit(thread->task->ledger, task_ledgers.physical_writes, size);
	2491	}
	2492	}
	2493
	2494	static void
	2495	init_thread_ledgers(void)
	2496	{
	2497	ledger_template_t t;
	2498	int idx;
	2499
	2500	assert(thread_ledger_template == NULL);
	2501
	2502	if ((t = ledger_template_create("Per-thread ledger")) == NULL) {
	2503	panic("couldn't create thread ledger template");
	2504	}
	2505
	2506	if ((idx = ledger_entry_add(t, "cpu_time", "sched", "ns")) < 0) {
	2507	panic("couldn't create cpu_time entry for thread ledger template");
	2508	}
	2509
	2510	if (ledger_set_callback(t, idx, thread_cputime_callback, NULL, NULL) < 0) {
	2511	panic("couldn't set thread ledger callback for cpu_time entry");
	2512	}
	2513
	2514	thread_ledgers.cpu_time = idx;
	2515
	2516	ledger_template_complete(t);
	2517	thread_ledger_template = t;
	2518	}
	2519
	2520	/*
	2521	* Returns currently applied CPU usage limit, or 0/0 if none is applied.
	2522	*/
	2523	int
	2524	thread_get_cpulimit(int action, uint8_t percentage, uint64_t *interval_ns)
	2525	{
	2526	int64_t abstime = 0;
	2527	uint64_t limittime = 0;
	2528	thread_t thread = current_thread();
	2529
	2530	*percentage = 0;
	2531	*interval_ns = 0;
	2532	*action = 0;
	2533
	2534	if (thread->t_threadledger == LEDGER_NULL) {
	2535	/*
	2536	* This thread has no per-thread ledger, so it can't possibly
	2537	* have a CPU limit applied.
	2538	*/
	2539	return KERN_SUCCESS;
	2540	}
	2541
	2542	ledger_get_period(thread->t_threadledger, thread_ledgers.cpu_time, interval_ns);
	2543	ledger_get_limit(thread->t_threadledger, thread_ledgers.cpu_time, &abstime);
	2544
	2545	if ((abstime == LEDGER_LIMIT_INFINITY) \|\| (*interval_ns == 0)) {
	2546	/*
	2547	* This thread's CPU time ledger has no period or limit; so it
	2548	* doesn't have a CPU limit applied.
	2549	*/
	2550	return KERN_SUCCESS;
	2551	}
	2552
	2553	/*
	2554	* This calculation is the converse to the one in thread_set_cpulimit().
	2555	*/
	2556	absolutetime_to_nanoseconds(abstime, &limittime);
	2557	percentage = (uint8_t)((limittime 100ULL) / *interval_ns);
	2558	assert(*percentage <= 100);
	2559
	2560	if (thread->options & TH_OPT_PROC_CPULIMIT) {
	2561	assert((thread->options & TH_OPT_PRVT_CPULIMIT) == 0);
	2562
	2563	*action = THREAD_CPULIMIT_BLOCK;
	2564	} else if (thread->options & TH_OPT_PRVT_CPULIMIT) {
	2565	assert((thread->options & TH_OPT_PROC_CPULIMIT) == 0);
	2566
	2567	*action = THREAD_CPULIMIT_EXCEPTION;
	2568	} else {
	2569	*action = THREAD_CPULIMIT_DISABLE;
	2570	}
	2571
	2572	return KERN_SUCCESS;
	2573	}
	2574
	2575	/*
	2576	* Set CPU usage limit on a thread.
	2577	*
	2578	* Calling with percentage of 0 will unset the limit for this thread.
	2579	*/
	2580	int
	2581	thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns)
	2582	{
	2583	thread_t thread = current_thread();
	2584	ledger_t l;
	2585	uint64_t limittime = 0;
	2586	uint64_t abstime = 0;
	2587
	2588	assert(percentage <= 100);
	2589
	2590	if (action == THREAD_CPULIMIT_DISABLE) {
	2591	/*
	2592	* Remove CPU limit, if any exists.
	2593	*/
	2594	if (thread->t_threadledger != LEDGER_NULL) {
	2595	l = thread->t_threadledger;
	2596	ledger_set_limit(l, thread_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0);
	2597	ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_IGNORE);
	2598	thread->options &= ~(TH_OPT_PROC_CPULIMIT \| TH_OPT_PRVT_CPULIMIT);
	2599	}
	2600
	2601	return 0;
	2602	}
	2603
	2604	if (interval_ns < MINIMUM_CPULIMIT_INTERVAL_MS * NSEC_PER_MSEC) {
	2605	return KERN_INVALID_ARGUMENT;
	2606	}
	2607
	2608	l = thread->t_threadledger;
	2609	if (l == LEDGER_NULL) {
	2610	/*
	2611	* This thread doesn't yet have a per-thread ledger; so create one with the CPU time entry active.
	2612	*/
	2613	if ((l = ledger_instantiate(thread_ledger_template, LEDGER_CREATE_INACTIVE_ENTRIES)) == LEDGER_NULL) {
	2614	return KERN_RESOURCE_SHORTAGE;
	2615	}
	2616
	2617	/*
	2618	* We are the first to create this thread's ledger, so only activate our entry.
	2619	*/
	2620	ledger_entry_setactive(l, thread_ledgers.cpu_time);
	2621	thread->t_threadledger = l;
	2622	}
	2623
	2624	/*
	2625	* The limit is specified as a percentage of CPU over an interval in nanoseconds.
	2626	* Calculate the amount of CPU time that the thread needs to consume in order to hit the limit.
	2627	*/
	2628	limittime = (interval_ns * percentage) / 100;
	2629	nanoseconds_to_absolutetime(limittime, &abstime);
	2630	ledger_set_limit(l, thread_ledgers.cpu_time, abstime, cpumon_ustackshots_trigger_pct);
	2631	/*
	2632	* Refill the thread's allotted CPU time every interval_ns nanoseconds.
	2633	*/
	2634	ledger_set_period(l, thread_ledgers.cpu_time, interval_ns);
	2635
	2636	if (action == THREAD_CPULIMIT_EXCEPTION) {
	2637	/*
	2638	* We don't support programming the CPU usage monitor on a task if any of its
	2639	* threads have a per-thread blocking CPU limit configured.
	2640	*/
	2641	if (thread->options & TH_OPT_PRVT_CPULIMIT) {
	2642	panic("CPU usage monitor activated, but blocking thread limit exists");
	2643	}
	2644
	2645	/*
	2646	* Make a note that this thread's CPU limit is being used for the task-wide CPU
	2647	* usage monitor. We don't have to arm the callback which will trigger the
	2648	* exception, because that was done for us in ledger_instantiate (because the
	2649	* ledger template used has a default callback).
	2650	*/
	2651	thread->options \|= TH_OPT_PROC_CPULIMIT;
	2652	} else {
	2653	/*
	2654	* We deliberately override any CPU limit imposed by a task-wide limit (eg
	2655	* CPU usage monitor).
	2656	*/
	2657	thread->options &= ~TH_OPT_PROC_CPULIMIT;
	2658
	2659	thread->options \|= TH_OPT_PRVT_CPULIMIT;
	2660	/* The per-thread ledger template by default has a callback for CPU time */
	2661	ledger_disable_callback(l, thread_ledgers.cpu_time);
	2662	ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
	2663	}
	2664
	2665	return 0;
	2666	}
	2667
	2668	void
	2669	thread_sched_call(
	2670	thread_t thread,
	2671	sched_call_t call)
	2672	{
	2673	assert((thread->state & TH_WAIT_REPORT) == 0);
	2674	thread->sched_call = call;
	2675	}
	2676
	2677	uint64_t
	2678	thread_tid(
	2679	thread_t thread)
	2680	{
	2681	return thread != THREAD_NULL? thread->thread_id: 0;
	2682	}
	2683
	2684	uint16_t
	2685	thread_set_tag(thread_t th, uint16_t tag)
	2686	{
	2687	return thread_set_tag_internal(th, tag);
	2688	}
	2689
	2690	uint16_t
	2691	thread_get_tag(thread_t th)
	2692	{
	2693	return thread_get_tag_internal(th);
	2694	}
	2695
	2696	uint64_t
	2697	thread_last_run_time(thread_t th)
	2698	{
	2699	return th->last_run_time;
	2700	}
	2701
	2702	uint64_t
	2703	thread_dispatchqaddr(
	2704	thread_t thread)
	2705	{
	2706	uint64_t dispatchqueue_addr;
	2707	uint64_t thread_handle;
	2708
	2709	if (thread == THREAD_NULL) {
	2710	return 0;
	2711	}
	2712
	2713	thread_handle = thread->machine.cthread_self;
	2714	if (thread_handle == 0) {
	2715	return 0;
	2716	}
	2717
	2718	if (thread->inspection == TRUE) {
	2719	dispatchqueue_addr = thread_handle + get_task_dispatchqueue_offset(thread->task);
	2720	} else if (thread->task->bsd_info) {
	2721	dispatchqueue_addr = thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info);
	2722	} else {
	2723	dispatchqueue_addr = 0;
	2724	}
	2725
	2726	return dispatchqueue_addr;
	2727	}
	2728
	2729	uint64_t
	2730	thread_rettokern_addr(
	2731	thread_t thread)
	2732	{
	2733	uint64_t rettokern_addr;
	2734	uint64_t rettokern_offset;
	2735	uint64_t thread_handle;
	2736
	2737	if (thread == THREAD_NULL) {
	2738	return 0;
	2739	}
	2740
	2741	thread_handle = thread->machine.cthread_self;
	2742	if (thread_handle == 0) {
	2743	return 0;
	2744	}
	2745
	2746	if (thread->task->bsd_info) {
	2747	rettokern_offset = get_return_to_kernel_offset_from_proc(thread->task->bsd_info);
	2748
	2749	/* Return 0 if return to kernel offset is not initialized. */
	2750	if (rettokern_offset == 0) {
	2751	rettokern_addr = 0;
	2752	} else {
	2753	rettokern_addr = thread_handle + rettokern_offset;
	2754	}
	2755	} else {
	2756	rettokern_addr = 0;
	2757	}
	2758
	2759	return rettokern_addr;
	2760	}
	2761
	2762	/*
	2763	* Export routines to other components for things that are done as macros
	2764	* within the osfmk component.
	2765	*/
	2766
	2767	#undef thread_mtx_lock
	2768	void thread_mtx_lock(thread_t thread);
	2769	void
	2770	thread_mtx_lock(thread_t thread)
	2771	{
	2772	lck_mtx_lock(&thread->mutex);
	2773	}
	2774
	2775	#undef thread_mtx_unlock
	2776	void thread_mtx_unlock(thread_t thread);
	2777	void
	2778	thread_mtx_unlock(thread_t thread)
	2779	{
	2780	lck_mtx_unlock(&thread->mutex);
	2781	}
	2782
	2783	#undef thread_reference
	2784	void thread_reference(thread_t thread);
	2785	void
	2786	thread_reference(
	2787	thread_t thread)
	2788	{
	2789	if (thread != THREAD_NULL) {
	2790	thread_reference_internal(thread);
	2791	}
	2792	}
	2793
	2794	#undef thread_should_halt
	2795
	2796	boolean_t
	2797	thread_should_halt(
	2798	thread_t th)
	2799	{
	2800	return thread_should_halt_fast(th);
	2801	}
	2802
	2803	/*
	2804	* thread_set_voucher_name - reset the voucher port name bound to this thread
	2805	*
	2806	* Conditions: nothing locked
	2807	*/
	2808
	2809	kern_return_t
	2810	thread_set_voucher_name(mach_port_name_t voucher_name)
	2811	{
	2812	thread_t thread = current_thread();
	2813	ipc_voucher_t new_voucher = IPC_VOUCHER_NULL;
	2814	ipc_voucher_t voucher;
	2815	ledger_t bankledger = NULL;
	2816	struct thread_group *banktg = NULL;
	2817	uint32_t persona_id = 0;
	2818
	2819	if (MACH_PORT_DEAD == voucher_name) {
	2820	return KERN_INVALID_RIGHT;
	2821	}
	2822
	2823	/*
	2824	* agressively convert to voucher reference
	2825	*/
	2826	if (MACH_PORT_VALID(voucher_name)) {
	2827	new_voucher = convert_port_name_to_voucher(voucher_name);
	2828	if (IPC_VOUCHER_NULL == new_voucher) {
	2829	return KERN_INVALID_ARGUMENT;
	2830	}
	2831	}
	2832	bank_get_bank_ledger_thread_group_and_persona(new_voucher, &bankledger, &banktg, &persona_id);
	2833
	2834	thread_mtx_lock(thread);
	2835	voucher = thread->ith_voucher;
	2836	thread->ith_voucher_name = voucher_name;
	2837	thread->ith_voucher = new_voucher;
	2838	thread_mtx_unlock(thread);
	2839
	2840	bank_swap_thread_bank_ledger(thread, bankledger);
	2841	#if CONFIG_THREAD_GROUPS
	2842	thread_group_set_bank(thread, banktg);
	2843	#endif /* CONFIG_THREAD_GROUPS */
	2844
	2845	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
	2846	MACHDBG_CODE(DBG_MACH_IPC, MACH_THREAD_SET_VOUCHER) \| DBG_FUNC_NONE,
	2847	(uintptr_t)thread_tid(thread),
	2848	(uintptr_t)voucher_name,
	2849	VM_KERNEL_ADDRPERM((uintptr_t)new_voucher),
	2850	persona_id, 0);
	2851
	2852	if (IPC_VOUCHER_NULL != voucher) {
	2853	ipc_voucher_release(voucher);
	2854	}
	2855
	2856	return KERN_SUCCESS;
	2857	}
	2858
	2859	/*
	2860	* thread_get_mach_voucher - return a voucher reference for the specified thread voucher
	2861	*
	2862	* Conditions: nothing locked
	2863	*
	2864	* NOTE: At the moment, there is no distinction between the current and effective
	2865	* vouchers because we only set them at the thread level currently.
	2866	*/
	2867	kern_return_t
	2868	thread_get_mach_voucher(
	2869	thread_act_t thread,
	2870	mach_voucher_selector_t __unused which,
	2871	ipc_voucher_t *voucherp)
	2872	{
	2873	ipc_voucher_t voucher;
	2874
	2875	if (THREAD_NULL == thread) {
	2876	return KERN_INVALID_ARGUMENT;
	2877	}
	2878
	2879	thread_mtx_lock(thread);
	2880	voucher = thread->ith_voucher;
	2881
	2882	if (IPC_VOUCHER_NULL != voucher) {
	2883	ipc_voucher_reference(voucher);
	2884	thread_mtx_unlock(thread);
	2885	*voucherp = voucher;
	2886	return KERN_SUCCESS;
	2887	}
	2888
	2889	thread_mtx_unlock(thread);
	2890
	2891	*voucherp = IPC_VOUCHER_NULL;
	2892	return KERN_SUCCESS;
	2893	}
	2894
	2895	/*
	2896	* thread_set_mach_voucher - set a voucher reference for the specified thread voucher
	2897	*
	2898	* Conditions: callers holds a reference on the voucher.
	2899	* nothing locked.
	2900	*
	2901	* We grab another reference to the voucher and bind it to the thread.
	2902	* The old voucher reference associated with the thread is
	2903	* discarded.
	2904	*/
	2905	kern_return_t
	2906	thread_set_mach_voucher(
	2907	thread_t thread,
	2908	ipc_voucher_t voucher)
	2909	{
	2910	ipc_voucher_t old_voucher;
	2911	ledger_t bankledger = NULL;
	2912	struct thread_group *banktg = NULL;
	2913	uint32_t persona_id = 0;
	2914
	2915	if (THREAD_NULL == thread) {
	2916	return KERN_INVALID_ARGUMENT;
	2917	}
	2918
	2919	bank_get_bank_ledger_thread_group_and_persona(voucher, &bankledger, &banktg, &persona_id);
	2920
	2921	thread_mtx_lock(thread);
	2922	/*
	2923	* Once the thread is started, we will look at `ith_voucher` without
	2924	* holding any lock.
	2925	*
	2926	* Setting the voucher hence can only be done by current_thread() or
	2927	* before it started. "started" flips under the thread mutex and must be
	2928	* tested under it too.
	2929	*/
	2930	if (thread != current_thread() && thread->started) {
	2931	thread_mtx_unlock(thread);
	2932	return KERN_INVALID_ARGUMENT;
	2933	}
	2934
	2935	ipc_voucher_reference(voucher);
	2936	old_voucher = thread->ith_voucher;
	2937	thread->ith_voucher = voucher;
	2938	thread->ith_voucher_name = MACH_PORT_NULL;
	2939	thread_mtx_unlock(thread);
	2940
	2941	bank_swap_thread_bank_ledger(thread, bankledger);
	2942	#if CONFIG_THREAD_GROUPS
	2943	thread_group_set_bank(thread, banktg);
	2944	#endif /* CONFIG_THREAD_GROUPS */
	2945
	2946	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
	2947	MACHDBG_CODE(DBG_MACH_IPC, MACH_THREAD_SET_VOUCHER) \| DBG_FUNC_NONE,
	2948	(uintptr_t)thread_tid(thread),
	2949	(uintptr_t)MACH_PORT_NULL,
	2950	VM_KERNEL_ADDRPERM((uintptr_t)voucher),
	2951	persona_id, 0);
	2952
	2953	ipc_voucher_release(old_voucher);
	2954
	2955	return KERN_SUCCESS;
	2956	}
	2957
	2958	/*
	2959	* thread_swap_mach_voucher - swap a voucher reference for the specified thread voucher
	2960	*
	2961	* Conditions: callers holds a reference on the new and presumed old voucher(s).
	2962	* nothing locked.
	2963	*
	2964	* This function is no longer supported.
	2965	*/
	2966	kern_return_t
	2967	thread_swap_mach_voucher(
	2968	__unused thread_t thread,
	2969	__unused ipc_voucher_t new_voucher,
	2970	ipc_voucher_t *in_out_old_voucher)
	2971	{
	2972	/*
	2973	* Currently this function is only called from a MIG generated
	2974	* routine which doesn't release the reference on the voucher
	2975	* addressed by in_out_old_voucher. To avoid leaking this reference,
	2976	* a call to release it has been added here.
	2977	*/
	2978	ipc_voucher_release(*in_out_old_voucher);
	2979	return KERN_NOT_SUPPORTED;
	2980	}
	2981
	2982	/*
	2983	* thread_get_current_voucher_origin_pid - get the pid of the originator of the current voucher.
	2984	*/
	2985	kern_return_t
	2986	thread_get_current_voucher_origin_pid(
	2987	int32_t *pid)
	2988	{
	2989	uint32_t buf_size;
	2990	kern_return_t kr;
	2991	thread_t thread = current_thread();
	2992
	2993	buf_size = sizeof(*pid);
	2994	kr = mach_voucher_attr_command(thread->ith_voucher,
	2995	MACH_VOUCHER_ATTR_KEY_BANK,
	2996	BANK_ORIGINATOR_PID,
	2997	NULL,
	2998	0,
	2999	(mach_voucher_attr_content_t)pid,
	3000	&buf_size);
	3001
	3002	return kr;
	3003	}
	3004
	3005	#if CONFIG_THREAD_GROUPS
	3006	/*
	3007	* Returns the current thread's voucher-carried thread group
	3008	*
	3009	* Reference is borrowed from this being the current voucher, so it does NOT
	3010	* return a reference to the group.
	3011	*/
	3012	struct thread_group *
	3013	thread_get_current_voucher_thread_group(thread_t thread)
	3014	{
	3015	assert(thread == current_thread());
	3016
	3017	if (thread->ith_voucher == NULL) {
	3018	return NULL;
	3019	}
	3020
	3021	ledger_t bankledger = NULL;
	3022	struct thread_group *banktg = NULL;
	3023
	3024	bank_get_bank_ledger_thread_group_and_persona(thread->ith_voucher, &bankledger, &banktg, NULL);
	3025
	3026	return banktg;
	3027	}
	3028
	3029	#endif /* CONFIG_THREAD_GROUPS */
	3030
	3031	boolean_t
	3032	thread_has_thread_name(thread_t th)
	3033	{
	3034	if ((th) && (th->uthread)) {
	3035	return bsd_hasthreadname(th->uthread);
	3036	}
	3037
	3038	/*
	3039	* This is an odd case; clients may set the thread name based on the lack of
	3040	* a name, but in this context there is no uthread to attach the name to.
	3041	*/
	3042	return FALSE;
	3043	}
	3044
	3045	void
	3046	thread_set_thread_name(thread_t th, const char* name)
	3047	{
	3048	if ((th) && (th->uthread) && name) {
	3049	bsd_setthreadname(th->uthread, name);
	3050	}
	3051	}
	3052
	3053	void
	3054	thread_get_thread_name(thread_t th, char* name)
	3055	{
	3056	if (!name) {
	3057	return;
	3058	}
	3059	if ((th) && (th->uthread)) {
	3060	bsd_getthreadname(th->uthread, name);
	3061	} else {
	3062	name[0] = '\0';
	3063	}
	3064	}
	3065
	3066	void
	3067	thread_set_honor_qlimit(thread_t thread)
	3068	{
	3069	thread->options \|= TH_OPT_HONOR_QLIMIT;
	3070	}
	3071
	3072	void
	3073	thread_clear_honor_qlimit(thread_t thread)
	3074	{
	3075	thread->options &= (~TH_OPT_HONOR_QLIMIT);
	3076	}
	3077
	3078	/*
	3079	* thread_enable_send_importance - set/clear the SEND_IMPORTANCE thread option bit.
	3080	*/
	3081	void
	3082	thread_enable_send_importance(thread_t thread, boolean_t enable)
	3083	{
	3084	if (enable == TRUE) {
	3085	thread->options \|= TH_OPT_SEND_IMPORTANCE;
	3086	} else {
	3087	thread->options &= ~TH_OPT_SEND_IMPORTANCE;
	3088	}
	3089	}
	3090
	3091	/*
	3092	* thread_set_allocation_name - .
	3093	*/
	3094
	3095	kern_allocation_name_t
	3096	thread_set_allocation_name(kern_allocation_name_t new_name)
	3097	{
	3098	kern_allocation_name_t ret;
	3099	thread_kernel_state_t kstate = thread_get_kernel_state(current_thread());
	3100	ret = kstate->allocation_name;
	3101	// fifo
	3102	if (!new_name \|\| !kstate->allocation_name) {
	3103	kstate->allocation_name = new_name;
	3104	}
	3105	return ret;
	3106	}
	3107
	3108	void *
	3109	thread_iokit_tls_get(uint32_t index)
	3110	{
	3111	assert(index < THREAD_SAVE_IOKIT_TLS_COUNT);
	3112	return current_thread()->saved.iokit.tls[index];
	3113	}
	3114
	3115	void
	3116	thread_iokit_tls_set(uint32_t index, void * data)
	3117	{
	3118	assert(index < THREAD_SAVE_IOKIT_TLS_COUNT);
	3119	current_thread()->saved.iokit.tls[index] = data;
	3120	}
	3121
	3122	uint64_t
	3123	thread_get_last_wait_duration(thread_t thread)
	3124	{
	3125	return thread->last_made_runnable_time - thread->last_run_time;
	3126	}
	3127
	3128	integer_t
	3129	thread_kern_get_pri(thread_t thr)
	3130	{
	3131	return thr->base_pri;
	3132	}
	3133
	3134	void
	3135	thread_kern_set_pri(thread_t thr, integer_t pri)
	3136	{
	3137	sched_set_kernel_thread_priority(thr, pri);
	3138	}
	3139
	3140	integer_t
	3141	thread_kern_get_kernel_maxpri(void)
	3142	{
	3143	return MAXPRI_KERNEL;
	3144	}
	3145	/*
	3146	* thread_port_with_flavor_notify
	3147	*
	3148	* Called whenever the Mach port system detects no-senders on
	3149	* the thread inspect or read port. These ports are allocated lazily and
	3150	* should be deallocated here when there are no senders remaining.
	3151	*/
	3152	void
	3153	thread_port_with_flavor_notify(mach_msg_header_t *msg)
	3154	{
	3155	mach_no_senders_notification_t notification = (void )msg;
	3156	ipc_port_t port = notification->not_header.msgh_remote_port;
	3157	thread_t thread;
	3158	mach_thread_flavor_t flavor;
	3159	ipc_kobject_type_t kotype;
	3160
	3161	ip_lock(port);
	3162	if (port->ip_srights > 0) {
	3163	ip_unlock(port);
	3164	return;
	3165	}
	3166	thread = (thread_t)port->ip_kobject;
	3167	kotype = ip_kotype(port);
	3168	if (thread != THREAD_NULL) {
	3169	assert((IKOT_THREAD_READ == kotype) \|\| (IKOT_THREAD_INSPECT == kotype));
	3170	thread_reference_internal(thread);
	3171	}
	3172	ip_unlock(port);
	3173
	3174	if (thread == THREAD_NULL) {
	3175	/* The thread is exiting or disabled; it will eventually deallocate the port */
	3176	return;
	3177	}
	3178
	3179	thread_mtx_lock(thread);
	3180	ip_lock(port);
	3181	require_ip_active(port);
	3182	/*
	3183	* Check for a stale no-senders notification. A call to any function
	3184	* that vends out send rights to this port could resurrect it between
	3185	* this notification being generated and actually being handled here.
	3186	*/
	3187	if (port->ip_srights > 0) {
	3188	ip_unlock(port);
	3189	thread_mtx_unlock(thread);
	3190	thread_deallocate(thread);
	3191	return;
	3192	}
	3193	if (kotype == IKOT_THREAD_READ) {
	3194	flavor = THREAD_FLAVOR_READ;
	3195	} else {
	3196	flavor = THREAD_FLAVOR_INSPECT;
	3197	}
	3198	assert(thread->ith_self[flavor] == port);
	3199	thread->ith_self[flavor] = IP_NULL;
	3200	port->ip_kobject = IKOT_NONE;
	3201	ip_unlock(port);
	3202	thread_mtx_unlock(thread);
	3203	thread_deallocate(thread);
	3204
	3205	ipc_port_dealloc_kernel(port);
	3206	}
	3207
	3208	/*
	3209	* The 'thread_region_page_shift' is used by footprint
	3210	* to specify the page size that it will use to
	3211	* accomplish its accounting work on the task being
	3212	* inspected. Since footprint uses a thread for each
	3213	* task that it works on, we need to keep the page_shift
	3214	* on a per-thread basis.
	3215	*/
	3216
	3217	int
	3218	thread_self_region_page_shift(void)
	3219	{
	3220	/*
	3221	* Return the page shift that this thread
	3222	* would like to use for its accounting work.
	3223	*/
	3224	return current_thread()->thread_region_page_shift;
	3225	}
	3226
	3227	void
	3228	thread_self_region_page_shift_set(
	3229	int pgshift)
	3230	{
	3231	/*
	3232	* Set the page shift that this thread
	3233	* would like to use for its accounting work
	3234	* when dealing with a task.
	3235	*/
	3236	current_thread()->thread_region_page_shift = pgshift;
	3237	}
	3238
	3239	#if CONFIG_DTRACE
	3240	uint32_t
	3241	dtrace_get_thread_predcache(thread_t thread)
	3242	{
	3243	if (thread != THREAD_NULL) {
	3244	return thread->t_dtrace_predcache;
	3245	} else {
	3246	return 0;
	3247	}
	3248	}
	3249
	3250	int64_t
	3251	dtrace_get_thread_vtime(thread_t thread)
	3252	{
	3253	if (thread != THREAD_NULL) {
	3254	return thread->t_dtrace_vtime;
	3255	} else {
	3256	return 0;
	3257	}
	3258	}
	3259
	3260	int
	3261	dtrace_get_thread_last_cpu_id(thread_t thread)
	3262	{
	3263	if ((thread != THREAD_NULL) && (thread->last_processor != PROCESSOR_NULL)) {
	3264	return thread->last_processor->cpu_id;
	3265	} else {
	3266	return -1;
	3267	}
	3268	}
	3269
	3270	int64_t
	3271	dtrace_get_thread_tracing(thread_t thread)
	3272	{
	3273	if (thread != THREAD_NULL) {
	3274	return thread->t_dtrace_tracing;
	3275	} else {
	3276	return 0;
	3277	}
	3278	}
	3279
	3280	uint16_t
	3281	dtrace_get_thread_inprobe(thread_t thread)
	3282	{
	3283	if (thread != THREAD_NULL) {
	3284	return thread->t_dtrace_inprobe;
	3285	} else {
	3286	return 0;
	3287	}
	3288	}
	3289
	3290	vm_offset_t
	3291	dtrace_get_kernel_stack(thread_t thread)
	3292	{
	3293	if (thread != THREAD_NULL) {
	3294	return thread->kernel_stack;
	3295	} else {
	3296	return 0;
	3297	}
	3298	}
	3299
	3300	#if KASAN
	3301	struct kasan_thread_data *
	3302	kasan_get_thread_data(thread_t thread)
	3303	{
	3304	return &thread->kasan_data;
	3305	}
	3306	#endif
	3307
	3308	#if CONFIG_KSANCOV
	3309	void **
	3310	__sanitizer_get_thread_data(thread_t thread)
	3311	{
	3312	return &thread->ksancov_data;
	3313	}
	3314	#endif
	3315
	3316	int64_t
	3317	dtrace_calc_thread_recent_vtime(thread_t thread)
	3318	{
	3319	if (thread != THREAD_NULL) {
	3320	processor_t processor = current_processor();
	3321	uint64_t abstime = mach_absolute_time();
	3322	timer_t timer;
	3323
	3324	timer = processor->thread_timer;
	3325
	3326	return timer_grab(&(thread->system_timer)) + timer_grab(&(thread->user_timer)) +
	3327	(abstime - timer->tstamp); /* XXX need interrupts off to prevent missed time? */
	3328	} else {
	3329	return 0;
	3330	}
	3331	}
	3332
	3333	void
	3334	dtrace_set_thread_predcache(thread_t thread, uint32_t predcache)
	3335	{
	3336	if (thread != THREAD_NULL) {
	3337	thread->t_dtrace_predcache = predcache;
	3338	}
	3339	}
	3340
	3341	void
	3342	dtrace_set_thread_vtime(thread_t thread, int64_t vtime)
	3343	{
	3344	if (thread != THREAD_NULL) {
	3345	thread->t_dtrace_vtime = vtime;
	3346	}
	3347	}
	3348
	3349	void
	3350	dtrace_set_thread_tracing(thread_t thread, int64_t accum)
	3351	{
	3352	if (thread != THREAD_NULL) {
	3353	thread->t_dtrace_tracing = accum;
	3354	}
	3355	}
	3356
	3357	void
	3358	dtrace_set_thread_inprobe(thread_t thread, uint16_t inprobe)
	3359	{
	3360	if (thread != THREAD_NULL) {
	3361	thread->t_dtrace_inprobe = inprobe;
	3362	}
	3363	}
	3364
	3365	vm_offset_t
	3366	dtrace_set_thread_recover(thread_t thread, vm_offset_t recover)
	3367	{
	3368	vm_offset_t prev = 0;
	3369
	3370	if (thread != THREAD_NULL) {
	3371	prev = thread->recover;
	3372	thread->recover = recover;
	3373	}
	3374	return prev;
	3375	}
	3376
	3377	vm_offset_t
	3378	dtrace_sign_and_set_thread_recover(thread_t thread, vm_offset_t recover)
	3379	{
	3380	#if defined(HAS_APPLE_PAC)
	3381	return dtrace_set_thread_recover(thread,
	3382	(vm_address_t)ptrauth_sign_unauthenticated((void *)recover,
	3383	ptrauth_key_function_pointer,
	3384	ptrauth_blend_discriminator(&thread->recover, PAC_DISCRIMINATOR_RECOVER)));
	3385	#else /* defined(HAS_APPLE_PAC) */
	3386	return dtrace_set_thread_recover(thread, recover);
	3387	#endif /* defined(HAS_APPLE_PAC) */
	3388	}
	3389
	3390	void
	3391	dtrace_thread_bootstrap(void)
	3392	{
	3393	task_t task = current_task();
	3394
	3395	if (task->thread_count == 1) {
	3396	thread_t thread = current_thread();
	3397	if (thread->t_dtrace_flags & TH_DTRACE_EXECSUCCESS) {
	3398	thread->t_dtrace_flags &= ~TH_DTRACE_EXECSUCCESS;
	3399	DTRACE_PROC(exec__success);
	3400	KDBG(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXEC),
	3401	task_pid(task));
	3402	}
	3403	DTRACE_PROC(start);
	3404	}
	3405	DTRACE_PROC(lwp__start);
	3406	}
	3407
	3408	void
	3409	dtrace_thread_didexec(thread_t thread)
	3410	{
	3411	thread->t_dtrace_flags \|= TH_DTRACE_EXECSUCCESS;
	3412	}
	3413	#endif /* CONFIG_DTRACE */