]> git.saurik.com Git - apple/xnu.git/blame - osfmk/kern/thread.c
xnu-3789.70.16.tar.gz
[apple/xnu.git] / osfmk / kern / thread.c
CommitLineData
1c79356b 1/*
3e170ce0 2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_FREE_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: kern/thread.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub
61 * Date: 1986
62 *
91447636 63 * Thread management primitives implementation.
1c79356b
A
64 */
65/*
66 * Copyright (c) 1993 The University of Utah and
67 * the Computer Systems Laboratory (CSL). All rights reserved.
68 *
69 * Permission to use, copy, modify and distribute this software and its
70 * documentation is hereby granted, provided that both the copyright
71 * notice and this permission notice appear in all copies of the
72 * software, derivative works or modified versions, and any portions
73 * thereof, and that both notices appear in supporting documentation.
74 *
75 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
76 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
77 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
78 *
79 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
80 * improvements that they make and grant CSL redistribution rights.
81 *
82 */
83
91447636 84#include <mach/mach_types.h>
1c79356b
A
85#include <mach/boolean.h>
86#include <mach/policy.h>
87#include <mach/thread_info.h>
88#include <mach/thread_special_ports.h>
89#include <mach/thread_status.h>
90#include <mach/time_value.h>
91#include <mach/vm_param.h>
91447636
A
92
93#include <machine/thread.h>
6d2010ae 94#include <machine/pal_routines.h>
316670eb 95#include <machine/limits.h>
91447636
A
96
97#include <kern/kern_types.h>
98#include <kern/kalloc.h>
1c79356b
A
99#include <kern/cpu_data.h>
100#include <kern/counters.h>
6d2010ae 101#include <kern/extmod_statistics.h>
1c79356b
A
102#include <kern/ipc_mig.h>
103#include <kern/ipc_tt.h>
104#include <kern/mach_param.h>
105#include <kern/machine.h>
106#include <kern/misc_protos.h>
107#include <kern/processor.h>
108#include <kern/queue.h>
109#include <kern/sched.h>
110#include <kern/sched_prim.h>
91447636
A
111#include <kern/sync_lock.h>
112#include <kern/syscall_subr.h>
1c79356b
A
113#include <kern/task.h>
114#include <kern/thread.h>
1c79356b
A
115#include <kern/host.h>
116#include <kern/zalloc.h>
1c79356b 117#include <kern/assert.h>
39236c6e
A
118#include <kern/exc_resource.h>
119#include <kern/telemetry.h>
39037602
A
120#include <kern/policy_internal.h>
121
3e170ce0 122#include <corpses/task_corpse.h>
39236c6e
A
123#if KPC
124#include <kern/kpc.h>
125#endif
91447636
A
126
127#include <ipc/ipc_kmsg.h>
128#include <ipc/ipc_port.h>
fe8ab488 129#include <bank/bank_types.h>
91447636
A
130
131#include <vm/vm_kern.h>
132#include <vm/vm_pageout.h>
133
1c79356b 134#include <sys/kdebug.h>
3e170ce0 135#include <sys/bsdtask_info.h>
2d21ac55
A
136#include <mach/sdt.h>
137
1c79356b
A
138/*
139 * Exported interfaces
140 */
91447636 141#include <mach/task_server.h>
1c79356b
A
142#include <mach/thread_act_server.h>
143#include <mach/mach_host_server.h>
91447636 144#include <mach/host_priv_server.h>
fe8ab488 145#include <mach/mach_voucher_server.h>
39037602 146#include <kern/policy_internal.h>
1c79356b 147
55e303ae 148static struct zone *thread_zone;
b0d623f7
A
149static lck_grp_attr_t thread_lck_grp_attr;
150lck_attr_t thread_lck_attr;
151lck_grp_t thread_lck_grp;
1c79356b 152
a1c7dba1
A
153struct zone *thread_qos_override_zone;
154
91447636
A
155decl_simple_lock_data(static,thread_stack_lock)
156static queue_head_t thread_stack_queue;
1c79356b 157
91447636
A
158decl_simple_lock_data(static,thread_terminate_lock)
159static queue_head_t thread_terminate_queue;
1c79356b 160
3e170ce0
A
161static queue_head_t crashed_threads_queue;
162
39037602
A
163decl_simple_lock_data(static,thread_exception_lock)
164static queue_head_t thread_exception_queue;
165
166struct thread_exception_elt {
167 queue_chain_t elt;
168 task_t exception_task;
169 thread_t exception_thread;
170};
171
55e303ae 172static struct thread thread_template, init_thread;
1c79356b 173
2d21ac55
A
174static void sched_call_null(
175 int type,
176 thread_t thread);
b0d623f7 177
91447636
A
178#ifdef MACH_BSD
179extern void proc_exit(void *);
39037602 180extern mach_exception_data_type_t proc_encode_exit_exception_code(void *);
b0d623f7 181extern uint64_t get_dispatchqueue_offset_from_proc(void *);
39236c6e
A
182extern int proc_selfpid(void);
183extern char * proc_name_address(void *p);
91447636 184#endif /* MACH_BSD */
b0d623f7 185
39236c6e 186extern int disable_exc_resource;
15129b1c 187extern int audio_active;
2d21ac55 188extern int debug_task;
b0d623f7
A
189int thread_max = CONFIG_THREAD_MAX; /* Max number of threads */
190int task_threadmax = CONFIG_THREAD_MAX;
191
fe8ab488 192static uint64_t thread_unique_id = 100;
55e303ae 193
316670eb
A
194struct _thread_ledger_indices thread_ledgers = { -1 };
195static ledger_template_t thread_ledger_template = NULL;
39037602 196static void init_thread_ledgers(void);
39236c6e 197
fe8ab488
A
198#if CONFIG_JETSAM
199void jetsam_on_ledger_cpulimit_exceeded(void);
200#endif
201
39236c6e
A
202/*
203 * Level (in terms of percentage of the limit) at which the CPU usage monitor triggers telemetry.
204 *
205 * (ie when any thread's CPU consumption exceeds 70% of the limit, start taking user
206 * stacktraces, aka micro-stackshots)
207 */
208#define CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT 70
209
210int cpumon_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
39037602 211void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void);
39236c6e
A
212
213/*
214 * The smallest interval over which we support limiting CPU consumption is 1ms
215 */
216#define MINIMUM_CPULIMIT_INTERVAL_MS 1
316670eb 217
91447636
A
218void
219thread_bootstrap(void)
220{
221 /*
222 * Fill in a template thread for fast initialization.
223 */
1c79356b 224
39037602
A
225#if MACH_ASSERT
226 thread_template.thread_magic = THREAD_MAGIC;
227#endif /* MACH_ASSERT */
228
2d21ac55 229 thread_template.runq = PROCESSOR_NULL;
1c79356b 230
91447636 231 thread_template.ref_count = 2;
55e303ae 232
91447636
A
233 thread_template.reason = AST_NONE;
234 thread_template.at_safe_point = FALSE;
235 thread_template.wait_event = NO_EVENT64;
3e170ce0 236 thread_template.waitq = NULL;
91447636
A
237 thread_template.wait_result = THREAD_WAITING;
238 thread_template.options = THREAD_ABORTSAFE;
239 thread_template.state = TH_WAIT | TH_UNINT;
240 thread_template.wake_active = FALSE;
241 thread_template.continuation = THREAD_CONTINUE_NULL;
242 thread_template.parameter = NULL;
1c79356b 243
91447636 244 thread_template.importance = 0;
6d2010ae
A
245 thread_template.sched_mode = TH_MODE_NONE;
246 thread_template.sched_flags = 0;
247 thread_template.saved_mode = TH_MODE_NONE;
91447636 248 thread_template.safe_release = 0;
39037602 249 thread_template.th_sched_bucket = TH_BUCKET_RUN;
0b4e3aa0 250
fe8ab488
A
251 thread_template.sfi_class = SFI_CLASS_UNSPECIFIED;
252 thread_template.sfi_wait_class = SFI_CLASS_UNSPECIFIED;
253
254 thread_template.active = 0;
255 thread_template.started = 0;
256 thread_template.static_param = 0;
257 thread_template.policy_reset = 0;
258
490019cf 259 thread_template.base_pri = BASEPRI_DEFAULT;
91447636
A
260 thread_template.sched_pri = 0;
261 thread_template.max_priority = 0;
262 thread_template.task_priority = 0;
263 thread_template.promotions = 0;
264 thread_template.pending_promoter_index = 0;
3e170ce0 265 thread_template.pending_promoter[0] = NULL;
316670eb 266 thread_template.pending_promoter[1] = NULL;
39236c6e 267 thread_template.rwlock_count = 0;
1c79356b 268
fe8ab488 269
91447636 270 thread_template.realtime.deadline = UINT64_MAX;
1c79356b 271
fe8ab488 272 thread_template.quantum_remaining = 0;
6d2010ae 273 thread_template.last_run_time = 0;
3e170ce0 274 thread_template.last_made_runnable_time = 0;
1c79356b 275
91447636
A
276 thread_template.computation_metered = 0;
277 thread_template.computation_epoch = 0;
1c79356b 278
fe8ab488 279#if defined(CONFIG_SCHED_TIMESHARE_CORE)
91447636 280 thread_template.sched_stamp = 0;
91447636 281 thread_template.pri_shift = INT8_MAX;
6d2010ae 282 thread_template.sched_usage = 0;
91447636 283 thread_template.cpu_usage = thread_template.cpu_delta = 0;
6d2010ae 284#endif
2d21ac55 285 thread_template.c_switch = thread_template.p_switch = thread_template.ps_switch = 0;
0b4e3aa0 286
91447636
A
287 thread_template.bound_processor = PROCESSOR_NULL;
288 thread_template.last_processor = PROCESSOR_NULL;
1c79356b 289
2d21ac55
A
290 thread_template.sched_call = sched_call_null;
291
91447636
A
292 timer_init(&thread_template.user_timer);
293 timer_init(&thread_template.system_timer);
294 thread_template.user_timer_save = 0;
295 thread_template.system_timer_save = 0;
2d21ac55
A
296 thread_template.vtimer_user_save = 0;
297 thread_template.vtimer_prof_save = 0;
298 thread_template.vtimer_rlim_save = 0;
39037602 299 thread_template.vtimer_qos_save = 0;
1c79356b 300
3e170ce0 301#if CONFIG_SCHED_SFI
fe8ab488 302 thread_template.wait_sfi_begin_time = 0;
3e170ce0 303#endif
fe8ab488 304
91447636
A
305 thread_template.wait_timer_is_set = FALSE;
306 thread_template.wait_timer_active = 0;
1c79356b 307
91447636 308 thread_template.depress_timer_active = 0;
0b4e3aa0 309
91447636 310 thread_template.recover = (vm_offset_t)NULL;
2d21ac55
A
311
312 thread_template.map = VM_MAP_NULL;
313
314#if CONFIG_DTRACE
315 thread_template.t_dtrace_predcache = 0;
316 thread_template.t_dtrace_vtime = 0;
317 thread_template.t_dtrace_tracing = 0;
318#endif /* CONFIG_DTRACE */
b0d623f7 319
39037602
A
320#if KPERF
321 thread_template.kperf_flags = 0;
322 thread_template.kperf_pet_gen = 0;
323 thread_template.kperf_c_switch = 0;
324 thread_template.kperf_pet_cnt = 0;
325#endif
326
39236c6e
A
327#if KPC
328 thread_template.kpc_buf = NULL;
329#endif
330
fe8ab488
A
331#if HYPERVISOR
332 thread_template.hv_thread_target = NULL;
333#endif /* HYPERVISOR */
334
04b8595b
A
335#if (DEVELOPMENT || DEBUG)
336 thread_template.t_page_creation_throttled_hard = 0;
337 thread_template.t_page_creation_throttled_soft = 0;
338#endif /* DEVELOPMENT || DEBUG */
339 thread_template.t_page_creation_throttled = 0;
b0d623f7
A
340 thread_template.t_page_creation_count = 0;
341 thread_template.t_page_creation_time = 0;
55e303ae 342
2d21ac55
A
343 thread_template.affinity_set = NULL;
344
6d2010ae
A
345 thread_template.syscalls_unix = 0;
346 thread_template.syscalls_mach = 0;
347
316670eb
A
348 thread_template.t_ledger = LEDGER_NULL;
349 thread_template.t_threadledger = LEDGER_NULL;
fe8ab488
A
350#ifdef CONFIG_BANK
351 thread_template.t_bankledger = LEDGER_NULL;
352 thread_template.t_deduct_bank_ledger_time = 0;
353#endif
316670eb 354
39037602
A
355 thread_template.requested_policy = (struct thread_requested_policy) {};
356 thread_template.effective_policy = (struct thread_effective_policy) {};
39236c6e 357
a1c7dba1 358 bzero(&thread_template.overrides, sizeof(thread_template.overrides));
39236c6e 359
fe8ab488
A
360 thread_template.iotier_override = THROTTLE_LEVEL_NONE;
361 thread_template.thread_io_stats = NULL;
39236c6e
A
362 thread_template.thread_callout_interrupt_wakeups = thread_template.thread_callout_platform_idle_wakeups = 0;
363
364 thread_template.thread_timer_wakeups_bin_1 = thread_template.thread_timer_wakeups_bin_2 = 0;
365 thread_template.callout_woken_from_icontext = thread_template.callout_woken_from_platform_idle = 0;
366
367 thread_template.thread_tag = 0;
6d2010ae 368
fe8ab488
A
369 thread_template.ith_voucher_name = MACH_PORT_NULL;
370 thread_template.ith_voucher = IPC_VOUCHER_NULL;
371
3e170ce0
A
372 thread_template.work_interval_id = 0;
373
91447636
A
374 init_thread = thread_template;
375 machine_set_current_thread(&init_thread);
1c79356b
A
376}
377
fe8ab488
A
378extern boolean_t allow_qos_policy_set;
379
55e303ae 380void
91447636 381thread_init(void)
0b4e3aa0 382{
91447636
A
383 thread_zone = zinit(
384 sizeof(struct thread),
b0d623f7 385 thread_max * sizeof(struct thread),
91447636
A
386 THREAD_CHUNK * sizeof(struct thread),
387 "threads");
6d2010ae 388
a1c7dba1
A
389 thread_qos_override_zone = zinit(
390 sizeof(struct thread_qos_override),
391 4 * thread_max * sizeof(struct thread_qos_override),
392 PAGE_SIZE,
393 "thread qos override");
394 zone_change(thread_qos_override_zone, Z_EXPAND, TRUE);
395 zone_change(thread_qos_override_zone, Z_COLLECT, TRUE);
396 zone_change(thread_qos_override_zone, Z_CALLERACCT, FALSE);
397 zone_change(thread_qos_override_zone, Z_NOENCRYPT, TRUE);
398
b0d623f7
A
399 lck_grp_attr_setdefault(&thread_lck_grp_attr);
400 lck_grp_init(&thread_lck_grp, "thread", &thread_lck_grp_attr);
401 lck_attr_setdefault(&thread_lck_attr);
39037602 402
91447636 403 stack_init();
55e303ae 404
39037602
A
405 thread_policy_init();
406
91447636
A
407 /*
408 * Initialize any machine-dependent
409 * per-thread structures necessary.
410 */
411 machine_thread_init();
316670eb 412
39236c6e
A
413 if (!PE_parse_boot_argn("cpumon_ustackshots_trigger_pct", &cpumon_ustackshots_trigger_pct,
414 sizeof (cpumon_ustackshots_trigger_pct))) {
415 cpumon_ustackshots_trigger_pct = CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT;
416 }
417
fe8ab488
A
418 PE_parse_boot_argn("-qos-policy-allow", &allow_qos_policy_set, sizeof(allow_qos_policy_set));
419
316670eb 420 init_thread_ledgers();
91447636 421}
0b4e3aa0 422
743345f9
A
423boolean_t
424thread_is_active(thread_t thread)
425{
426 return (thread->active);
427}
428
39037602
A
429void
430thread_corpse_continue(void)
431{
432 thread_t thread = current_thread();
433
434 thread_terminate_internal(thread);
435 ml_set_interrupts_enabled(FALSE);
436 ast_taken(AST_APC, TRUE);
437
438 panic("thread_corpse_continue");
439 /*NOTREACHED*/
440}
441
91447636
A
442static void
443thread_terminate_continue(void)
444{
445 panic("thread_terminate_continue");
446 /*NOTREACHED*/
0b4e3aa0
A
447}
448
1c79356b 449/*
91447636 450 * thread_terminate_self:
1c79356b 451 */
1c79356b 452void
91447636 453thread_terminate_self(void)
1c79356b 454{
91447636
A
455 thread_t thread = current_thread();
456 task_t task;
457 spl_t s;
b0d623f7
A
458 int threadcnt;
459
6d2010ae
A
460 pal_thread_terminate_self(thread);
461
b0d623f7 462 DTRACE_PROC(lwp__exit);
2d21ac55
A
463
464 thread_mtx_lock(thread);
465
2d21ac55 466 ipc_thread_disable(thread);
39037602 467
2d21ac55 468 thread_mtx_unlock(thread);
55e303ae 469
91447636
A
470 s = splsched();
471 thread_lock(thread);
1c79356b 472
91447636 473 /*
2d21ac55
A
474 * Cancel priority depression, wait for concurrent expirations
475 * on other processors.
91447636 476 */
6d2010ae
A
477 if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
478 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
1c79356b 479
fe8ab488 480 /* If our priority was low because of a depressed yield, restore it in case we block below */
3e170ce0 481 thread_recompute_sched_pri(thread, FALSE);
fe8ab488 482
91447636
A
483 if (timer_call_cancel(&thread->depress_timer))
484 thread->depress_timer_active--;
1c79356b 485 }
1c79356b 486
91447636
A
487 while (thread->depress_timer_active > 0) {
488 thread_unlock(thread);
489 splx(s);
55e303ae 490
91447636 491 delay(1);
55e303ae 492
91447636
A
493 s = splsched();
494 thread_lock(thread);
55e303ae
A
495 }
496
b0d623f7
A
497 thread_sched_call(thread, NULL);
498
91447636
A
499 thread_unlock(thread);
500 splx(s);
55e303ae 501
fe8ab488
A
502
503 thread_mtx_lock(thread);
504
2d21ac55 505 thread_policy_reset(thread);
55e303ae 506
fe8ab488 507 thread_mtx_unlock(thread);
316670eb 508
b0d623f7 509 task = thread->task;
39037602 510 uthread_cleanup(task, thread->uthread, task->bsd_info);
b0d623f7
A
511 threadcnt = hw_atomic_sub(&task->active_thread_count, 1);
512
743345f9 513 if (task->bsd_info && !task_is_exec_copy(task)) {
39037602
A
514 /* trace out pid before we sign off */
515 long dbg_arg1 = 0;
516
517 kdbg_trace_data(thread->task->bsd_info, &dbg_arg1);
518
519 KERNEL_DEBUG_CONSTANT(TRACE_DATA_THREAD_TERMINATE_PID | DBG_FUNC_NONE,
520 dbg_arg1, 0, 0, 0, 0);
521 }
522
91447636
A
523 /*
524 * If we are the last thread to terminate and the task is
525 * associated with a BSD process, perform BSD process exit.
526 */
743345f9 527 if (threadcnt == 0 && task->bsd_info != NULL && !task_is_exec_copy(task)) {
39037602
A
528 mach_exception_data_type_t subcode = 0;
529 {
530 /* since we're the last thread in this process, trace out the command name too */
531 long dbg_arg1 = 0, dbg_arg2 = 0, dbg_arg3 = 0, dbg_arg4 = 0;
532
533 kdbg_trace_string(thread->task->bsd_info, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
534
535 KERNEL_DEBUG_CONSTANT(TRACE_STRING_PROC_EXIT | DBG_FUNC_NONE,
536 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
537 }
538
539 /* Get the exit reason before proc_exit */
540 subcode = proc_encode_exit_exception_code(task->bsd_info);
91447636 541 proc_exit(task->bsd_info);
3e170ce0
A
542 /*
543 * if there is crash info in task
544 * then do the deliver action since this is
545 * last thread for this task.
546 */
547 if (task->corpse_info) {
39037602
A
548 task_deliver_crash_notification(task, current_thread(), subcode);
549 }
550 }
551
552 if (threadcnt == 0) {
553 task_lock(task);
554 if (task_is_a_corpse_fork(task)) {
555 thread_wakeup((event_t)&task->active_thread_count);
3e170ce0 556 }
39037602 557 task_unlock(task);
3e170ce0 558 }
39037602 559
2d21ac55
A
560 uthread_cred_free(thread->uthread);
561
91447636
A
562 s = splsched();
563 thread_lock(thread);
1c79356b 564
91447636
A
565 /*
566 * Cancel wait timer, and wait for
567 * concurrent expirations.
568 */
569 if (thread->wait_timer_is_set) {
570 thread->wait_timer_is_set = FALSE;
1c79356b 571
91447636
A
572 if (timer_call_cancel(&thread->wait_timer))
573 thread->wait_timer_active--;
574 }
1c79356b 575
91447636
A
576 while (thread->wait_timer_active > 0) {
577 thread_unlock(thread);
578 splx(s);
0b4e3aa0 579
91447636 580 delay(1);
1c79356b 581
91447636
A
582 s = splsched();
583 thread_lock(thread);
584 }
1c79356b 585
91447636
A
586 /*
587 * If there is a reserved stack, release it.
588 */
589 if (thread->reserved_stack != 0) {
6d2010ae 590 stack_free_reserved(thread);
91447636
A
591 thread->reserved_stack = 0;
592 }
1c79356b 593
91447636
A
594 /*
595 * Mark thread as terminating, and block.
596 */
597 thread->state |= TH_TERMINATE;
598 thread_mark_wait_locked(thread, THREAD_UNINT);
3e170ce0 599 assert((thread->sched_flags & TH_SFLAG_PROMOTED) == 0);
91447636 600 assert(thread->promotions == 0);
3e170ce0 601 assert(!(thread->sched_flags & TH_SFLAG_WAITQ_PROMOTED));
39236c6e 602 assert(thread->rwlock_count == 0);
91447636
A
603 thread_unlock(thread);
604 /* splsched */
1c79356b 605
91447636
A
606 thread_block((thread_continue_t)thread_terminate_continue);
607 /*NOTREACHED*/
55e303ae
A
608}
609
3e170ce0
A
610/* Drop a thread refcount that definitely isn't the last one. */
611void
612thread_deallocate_safe(thread_t thread)
613{
39037602
A
614 assert_thread_magic(thread);
615
616 uint32_t old_refcount = hw_atomic_sub(&(thread)->ref_count, 1) + 1;
617
618 if (__improbable(old_refcount <= 1))
619 panic("bad thread refcount: %d", old_refcount);
3e170ce0
A
620}
621
55e303ae 622void
91447636
A
623thread_deallocate(
624 thread_t thread)
1c79356b 625{
91447636 626 task_t task;
1c79356b 627
91447636
A
628 if (thread == THREAD_NULL)
629 return;
1c79356b 630
39037602
A
631 assert_thread_magic(thread);
632 assert(thread->ref_count > 0);
633
3e170ce0 634 if (__probable(hw_atomic_sub(&(thread)->ref_count, 1) > 0))
91447636 635 return;
1c79356b 636
39236c6e
A
637 if(!(thread->state & TH_TERMINATE2))
638 panic("thread_deallocate: thread not properly terminated\n");
639
3e170ce0
A
640 assert(thread->runq == PROCESSOR_NULL);
641
39037602
A
642 assert(thread->user_promotions == 0);
643
39236c6e
A
644#if KPC
645 kpc_thread_destroy(thread);
646#endif
647
91447636 648 ipc_thread_terminate(thread);
1c79356b 649
a1c7dba1
A
650 proc_thread_qos_deallocate(thread);
651
91447636 652 task = thread->task;
0b4e3aa0 653
91447636
A
654#ifdef MACH_BSD
655 {
656 void *ut = thread->uthread;
0b4e3aa0 657
91447636 658 thread->uthread = NULL;
2d21ac55 659 uthread_zone_free(ut);
91447636
A
660 }
661#endif /* MACH_BSD */
0b4e3aa0 662
316670eb
A
663 if (thread->t_ledger)
664 ledger_dereference(thread->t_ledger);
665 if (thread->t_threadledger)
666 ledger_dereference(thread->t_threadledger);
667
fe8ab488
A
668 if (IPC_VOUCHER_NULL != thread->ith_voucher)
669 ipc_voucher_release(thread->ith_voucher);
670
671 if (thread->thread_io_stats)
672 kfree(thread->thread_io_stats, sizeof(struct io_stat_info));
673
91447636
A
674 if (thread->kernel_stack != 0)
675 stack_free(thread);
0b4e3aa0 676
b0d623f7 677 lck_mtx_destroy(&thread->mutex, &thread_lck_grp);
91447636 678 machine_thread_destroy(thread);
1c79356b 679
6d2010ae
A
680 task_deallocate(task);
681
39037602
A
682#if MACH_ASSERT
683 assert_thread_magic(thread);
684 thread->thread_magic = 0;
685#endif /* MACH_ASSERT */
686
91447636
A
687 zfree(thread_zone, thread);
688}
0b4e3aa0 689
813fb2f6
A
690/*
691 * thread_inspect_deallocate:
692 *
693 * Drop a thread inspection reference.
694 */
695void
696thread_inspect_deallocate(
697 thread_inspect_t thread_inspect)
698{
699 return(thread_deallocate((thread_t)thread_inspect));
700}
701
39037602
A
702/*
703 * thread_exception_daemon:
704 *
705 * Deliver EXC_RESOURCE exception
706 */
707static void
708thread_exception_daemon(void)
709{
710 struct thread_exception_elt *elt;
711 task_t task;
712 thread_t thread;
713
714 simple_lock(&thread_exception_lock);
715 while ((elt = (struct thread_exception_elt *)dequeue_head(&thread_exception_queue)) != NULL) {
716 simple_unlock(&thread_exception_lock);
717
718 task = elt->exception_task;
719 thread = elt->exception_thread;
720 assert_thread_magic(thread);
721
722 kfree(elt, sizeof(struct thread_exception_elt));
723
724 /* wait for all the threads in the task to terminate */
725 task_lock(task);
726 task_wait_till_threads_terminate_locked(task);
727 task_unlock(task);
728
729 /* Consumes the task ref returned by task_generate_corpse_internal */
730 task_deallocate(task);
731 /* Consumes the thread ref returned by task_generate_corpse_internal */
732 thread_deallocate(thread);
733
734 /* Deliver the EXC_RESOURCE notification, also clears the corpse. */
735 task_deliver_crash_notification(task, thread, 0);
736
737 simple_lock(&thread_exception_lock);
738 }
739
740 assert_wait((event_t)&thread_exception_queue, THREAD_UNINT);
741 simple_unlock(&thread_exception_lock);
742
743 thread_block((thread_continue_t)thread_exception_daemon);
744}
745
746/*
747 * thread_exception_enqueue:
748 *
749 * Enqueue a corpse port to be delivered an EXC_RESOURCE.
750 */
751void
752thread_exception_enqueue(
753 task_t task,
754 thread_t thread)
755{
756 struct thread_exception_elt *elt = (struct thread_exception_elt*) kalloc(
757 sizeof(struct thread_exception_elt));
758
759 elt->exception_task = task;
760 elt->exception_thread = thread;
761
762 simple_lock(&thread_exception_lock);
763 enqueue_tail(&thread_exception_queue, (queue_entry_t)elt);
764 simple_unlock(&thread_exception_lock);
765
766 thread_wakeup((event_t)&thread_exception_queue);
767}
768
769/*
770 * thread_copy_resource_info
771 *
772 * Copy the resource info counters from source
773 * thread to destination thread.
774 */
775void
776thread_copy_resource_info(
777 thread_t dst_thread,
778 thread_t src_thread)
779{
780 dst_thread->thread_tag = src_thread->thread_tag;
781 dst_thread->c_switch = src_thread->c_switch;
782 dst_thread->p_switch = src_thread->p_switch;
783 dst_thread->ps_switch = src_thread->ps_switch;
784 dst_thread->precise_user_kernel_time = src_thread->precise_user_kernel_time;
785 dst_thread->user_timer = src_thread->user_timer;
786 dst_thread->user_timer_save = src_thread->user_timer_save;
787 dst_thread->system_timer_save = src_thread->system_timer_save;
788 dst_thread->syscalls_unix = src_thread->syscalls_unix;
789 dst_thread->syscalls_mach = src_thread->syscalls_mach;
790 ledger_rollup(dst_thread->t_threadledger, src_thread->t_threadledger);
791 *dst_thread->thread_io_stats = *src_thread->thread_io_stats;
792
793}
794
91447636
A
795/*
796 * thread_terminate_daemon:
797 *
798 * Perform final clean up for terminating threads.
799 */
800static void
801thread_terminate_daemon(void)
802{
6d2010ae
A
803 thread_t self, thread;
804 task_t task;
805
806 self = current_thread();
807 self->options |= TH_OPT_SYSTEM_CRITICAL;
0b4e3aa0 808
91447636
A
809 (void)splsched();
810 simple_lock(&thread_terminate_lock);
0b4e3aa0 811
39037602
A
812 while ((thread = qe_dequeue_head(&thread_terminate_queue, struct thread, runq_links)) != THREAD_NULL) {
813 assert_thread_magic(thread);
3e170ce0
A
814
815 /*
816 * if marked for crash reporting, skip reaping.
817 * The corpse delivery thread will clear bit and enqueue
818 * for reaping when done
819 */
820 if (thread->inspection){
39037602 821 enqueue_tail(&crashed_threads_queue, &thread->runq_links);
3e170ce0
A
822 continue;
823 }
824
91447636
A
825 simple_unlock(&thread_terminate_lock);
826 (void)spllo();
0b4e3aa0 827
91447636 828 task = thread->task;
55e303ae 829
91447636
A
830 task_lock(task);
831 task->total_user_time += timer_grab(&thread->user_timer);
316670eb
A
832 if (thread->precise_user_kernel_time) {
833 task->total_system_time += timer_grab(&thread->system_timer);
834 } else {
835 task->total_user_time += timer_grab(&thread->system_timer);
836 }
55e303ae 837
2d21ac55
A
838 task->c_switch += thread->c_switch;
839 task->p_switch += thread->p_switch;
840 task->ps_switch += thread->ps_switch;
841
6d2010ae
A
842 task->syscalls_unix += thread->syscalls_unix;
843 task->syscalls_mach += thread->syscalls_mach;
844
4b17d6b6
A
845 task->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
846 task->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
fe8ab488 847 task->task_gpu_ns += ml_gpu_stat(thread);
39037602
A
848 task->task_energy += ml_energy_stat(thread);
849
850 thread_update_qos_cpu_time(thread);
851
91447636
A
852 queue_remove(&task->threads, thread, thread_t, task_threads);
853 task->thread_count--;
b0d623f7
A
854
855 /*
856 * If the task is being halted, and there is only one thread
857 * left in the task after this one, then wakeup that thread.
858 */
859 if (task->thread_count == 1 && task->halting)
860 thread_wakeup((event_t)&task->halting);
861
91447636 862 task_unlock(task);
1c79356b 863
b0d623f7 864 lck_mtx_lock(&tasks_threads_lock);
2d21ac55
A
865 queue_remove(&threads, thread, thread_t, threads);
866 threads_count--;
b0d623f7 867 lck_mtx_unlock(&tasks_threads_lock);
1c79356b 868
91447636 869 thread_deallocate(thread);
1c79356b 870
91447636
A
871 (void)splsched();
872 simple_lock(&thread_terminate_lock);
0b4e3aa0 873 }
1c79356b 874
91447636
A
875 assert_wait((event_t)&thread_terminate_queue, THREAD_UNINT);
876 simple_unlock(&thread_terminate_lock);
877 /* splsched */
878
6d2010ae 879 self->options &= ~TH_OPT_SYSTEM_CRITICAL;
91447636
A
880 thread_block((thread_continue_t)thread_terminate_daemon);
881 /*NOTREACHED*/
1c79356b
A
882}
883
9bccf70c 884/*
91447636
A
885 * thread_terminate_enqueue:
886 *
887 * Enqueue a terminating thread for final disposition.
888 *
889 * Called at splsched.
9bccf70c 890 */
1c79356b 891void
91447636 892thread_terminate_enqueue(
1c79356b
A
893 thread_t thread)
894{
04b8595b 895 KERNEL_DEBUG_CONSTANT(TRACE_DATA_THREAD_TERMINATE | DBG_FUNC_NONE, thread->thread_id, 0, 0, 0, 0);
fe8ab488 896
91447636 897 simple_lock(&thread_terminate_lock);
39037602 898 enqueue_tail(&thread_terminate_queue, &thread->runq_links);
91447636 899 simple_unlock(&thread_terminate_lock);
1c79356b 900
91447636 901 thread_wakeup((event_t)&thread_terminate_queue);
1c79356b
A
902}
903
3e170ce0
A
904/*
905 * thread_terminate_crashed_threads:
39037602 906 * walk the list of crashed threads and put back set of threads
3e170ce0
A
907 * who are no longer being inspected.
908 */
909void
910thread_terminate_crashed_threads()
911{
39037602 912 thread_t th_remove;
3e170ce0
A
913 boolean_t should_wake_terminate_queue = FALSE;
914
915 simple_lock(&thread_terminate_lock);
916 /*
917 * loop through the crashed threads queue
918 * to put any threads that are not being inspected anymore
919 */
3e170ce0 920
39037602 921 qe_foreach_element_safe(th_remove, &crashed_threads_queue, runq_links) {
3e170ce0
A
922 /* make sure current_thread is never in crashed queue */
923 assert(th_remove != current_thread());
39037602
A
924
925 if (th_remove->inspection == FALSE) {
926 re_queue_tail(&thread_terminate_queue, &th_remove->runq_links);
3e170ce0
A
927 should_wake_terminate_queue = TRUE;
928 }
929 }
930
931 simple_unlock(&thread_terminate_lock);
932 if (should_wake_terminate_queue == TRUE) {
933 thread_wakeup((event_t)&thread_terminate_queue);
934 }
935}
936
91447636
A
937/*
938 * thread_stack_daemon:
939 *
940 * Perform stack allocation as required due to
941 * invoke failures.
942 */
943static void
944thread_stack_daemon(void)
9bccf70c 945{
91447636 946 thread_t thread;
39236c6e 947 spl_t s;
91447636 948
39236c6e 949 s = splsched();
91447636
A
950 simple_lock(&thread_stack_lock);
951
39037602
A
952 while ((thread = qe_dequeue_head(&thread_stack_queue, struct thread, runq_links)) != THREAD_NULL) {
953 assert_thread_magic(thread);
954
91447636 955 simple_unlock(&thread_stack_lock);
39236c6e 956 splx(s);
91447636 957
39236c6e 958 /* allocate stack with interrupts enabled so that we can call into VM */
91447636 959 stack_alloc(thread);
3e170ce0
A
960
961 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_WAIT) | DBG_FUNC_END, thread_tid(thread), 0, 0, 0, 0);
2d21ac55 962
39236c6e 963 s = splsched();
91447636
A
964 thread_lock(thread);
965 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
966 thread_unlock(thread);
91447636 967
91447636
A
968 simple_lock(&thread_stack_lock);
969 }
970
971 assert_wait((event_t)&thread_stack_queue, THREAD_UNINT);
972 simple_unlock(&thread_stack_lock);
39236c6e 973 splx(s);
91447636
A
974
975 thread_block((thread_continue_t)thread_stack_daemon);
9bccf70c
A
976 /*NOTREACHED*/
977}
1c79356b
A
978
979/*
91447636 980 * thread_stack_enqueue:
1c79356b 981 *
91447636 982 * Enqueue a thread for stack allocation.
1c79356b 983 *
91447636 984 * Called at splsched.
1c79356b
A
985 */
986void
91447636
A
987thread_stack_enqueue(
988 thread_t thread)
1c79356b 989{
3e170ce0 990 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_WAIT) | DBG_FUNC_START, thread_tid(thread), 0, 0, 0, 0);
39037602 991 assert_thread_magic(thread);
3e170ce0 992
91447636 993 simple_lock(&thread_stack_lock);
39037602 994 enqueue_tail(&thread_stack_queue, &thread->runq_links);
91447636 995 simple_unlock(&thread_stack_lock);
1c79356b 996
91447636
A
997 thread_wakeup((event_t)&thread_stack_queue);
998}
9bccf70c 999
91447636
A
1000void
1001thread_daemon_init(void)
1002{
1003 kern_return_t result;
6d2010ae 1004 thread_t thread = NULL;
0b4e3aa0 1005
91447636
A
1006 simple_lock_init(&thread_terminate_lock, 0);
1007 queue_init(&thread_terminate_queue);
3e170ce0 1008 queue_init(&crashed_threads_queue);
1c79356b 1009
91447636
A
1010 result = kernel_thread_start_priority((thread_continue_t)thread_terminate_daemon, NULL, MINPRI_KERNEL, &thread);
1011 if (result != KERN_SUCCESS)
1012 panic("thread_daemon_init: thread_terminate_daemon");
1c79356b 1013
91447636 1014 thread_deallocate(thread);
1c79356b 1015
91447636
A
1016 simple_lock_init(&thread_stack_lock, 0);
1017 queue_init(&thread_stack_queue);
1c79356b 1018
91447636
A
1019 result = kernel_thread_start_priority((thread_continue_t)thread_stack_daemon, NULL, BASEPRI_PREEMPT, &thread);
1020 if (result != KERN_SUCCESS)
1021 panic("thread_daemon_init: thread_stack_daemon");
1c79356b 1022
91447636 1023 thread_deallocate(thread);
39037602
A
1024
1025 simple_lock_init(&thread_exception_lock, 0);
1026 queue_init(&thread_exception_queue);
1027
1028 result = kernel_thread_start_priority((thread_continue_t)thread_exception_daemon, NULL, MINPRI_KERNEL, &thread);
1029 if (result != KERN_SUCCESS)
1030 panic("thread_daemon_init: thread_exception_daemon");
1031
1032 thread_deallocate(thread);
1c79356b
A
1033}
1034
3e170ce0
A
1035#define TH_OPTION_NONE 0x00
1036#define TH_OPTION_NOCRED 0x01
1037#define TH_OPTION_NOSUSP 0x02
39037602 1038
1c79356b
A
1039/*
1040 * Create a new thread.
55e303ae 1041 * Doesn't start the thread running.
fe8ab488
A
1042 *
1043 * Task and tasks_threads_lock are returned locked on success.
1c79356b 1044 */
55e303ae
A
1045static kern_return_t
1046thread_create_internal(
1047 task_t parent_task,
1c79356b 1048 integer_t priority,
91447636 1049 thread_continue_t continuation,
b0d623f7 1050 int options,
55e303ae 1051 thread_t *out_thread)
1c79356b 1052{
55e303ae 1053 thread_t new_thread;
55e303ae 1054 static thread_t first_thread;
1c79356b
A
1055
1056 /*
1057 * Allocate a thread and initialize static fields
1058 */
b0d623f7 1059 if (first_thread == THREAD_NULL)
91447636 1060 new_thread = first_thread = current_thread();
55e303ae
A
1061 else
1062 new_thread = (thread_t)zalloc(thread_zone);
b0d623f7 1063 if (new_thread == THREAD_NULL)
1c79356b
A
1064 return (KERN_RESOURCE_SHORTAGE);
1065
55e303ae
A
1066 if (new_thread != first_thread)
1067 *new_thread = thread_template;
1068
1069#ifdef MACH_BSD
b0d623f7
A
1070 new_thread->uthread = uthread_alloc(parent_task, new_thread, (options & TH_OPTION_NOCRED) != 0);
1071 if (new_thread->uthread == NULL) {
39037602
A
1072#if MACH_ASSERT
1073 new_thread->thread_magic = 0;
1074#endif /* MACH_ASSERT */
1075
b0d623f7
A
1076 zfree(thread_zone, new_thread);
1077 return (KERN_RESOURCE_SHORTAGE);
55e303ae
A
1078 }
1079#endif /* MACH_BSD */
1c79356b 1080
55e303ae
A
1081 if (machine_thread_create(new_thread, parent_task) != KERN_SUCCESS) {
1082#ifdef MACH_BSD
b0d623f7 1083 void *ut = new_thread->uthread;
1c79356b 1084
b0d623f7
A
1085 new_thread->uthread = NULL;
1086 /* cred free may not be necessary */
39037602 1087 uthread_cleanup(parent_task, ut, parent_task->bsd_info);
b0d623f7
A
1088 uthread_cred_free(ut);
1089 uthread_zone_free(ut);
55e303ae 1090#endif /* MACH_BSD */
b0d623f7 1091
39037602
A
1092#if MACH_ASSERT
1093 new_thread->thread_magic = 0;
1094#endif /* MACH_ASSERT */
1095
91447636 1096 zfree(thread_zone, new_thread);
55e303ae
A
1097 return (KERN_FAILURE);
1098 }
1099
316670eb 1100 new_thread->task = parent_task;
55e303ae
A
1101
1102 thread_lock_init(new_thread);
1103 wake_lock_init(new_thread);
1104
b0d623f7 1105 lck_mtx_init(&new_thread->mutex, &thread_lck_grp, &thread_lck_attr);
0b4e3aa0 1106
55e303ae 1107 ipc_thread_init(new_thread);
55e303ae 1108
91447636 1109 new_thread->continuation = continuation;
0b4e3aa0 1110
fe8ab488
A
1111 /* Allocate I/O Statistics structure */
1112 new_thread->thread_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1113 assert(new_thread->thread_io_stats != NULL);
1114 bzero(new_thread->thread_io_stats, sizeof(struct io_stat_info));
1115
1116#if CONFIG_IOSCHED
1117 /* Clear out the I/O Scheduling info for AppleFSCompression */
1118 new_thread->decmp_upl = NULL;
1119#endif /* CONFIG_IOSCHED */
1120
b0d623f7 1121 lck_mtx_lock(&tasks_threads_lock);
1c79356b
A
1122 task_lock(parent_task);
1123
39037602
A
1124 /*
1125 * Fail thread creation if parent task is being torn down or has too many threads
1126 * If the caller asked for TH_OPTION_NOSUSP, also fail if the parent task is suspended
1127 */
1128 if (parent_task->active == 0 || parent_task->halting ||
1129 (parent_task->suspend_count > 0 && (options & TH_OPTION_NOSUSP) != 0) ||
1130 (parent_task->thread_count >= task_threadmax && parent_task != kernel_task)) {
1c79356b 1131 task_unlock(parent_task);
b0d623f7 1132 lck_mtx_unlock(&tasks_threads_lock);
55e303ae
A
1133
1134#ifdef MACH_BSD
1135 {
55e303ae
A
1136 void *ut = new_thread->uthread;
1137
1138 new_thread->uthread = NULL;
39037602 1139 uthread_cleanup(parent_task, ut, parent_task->bsd_info);
2d21ac55
A
1140 /* cred free may not be necessary */
1141 uthread_cred_free(ut);
1142 uthread_zone_free(ut);
55e303ae
A
1143 }
1144#endif /* MACH_BSD */
91447636
A
1145 ipc_thread_disable(new_thread);
1146 ipc_thread_terminate(new_thread);
fe8ab488 1147 kfree(new_thread->thread_io_stats, sizeof(struct io_stat_info));
b0d623f7 1148 lck_mtx_destroy(&new_thread->mutex, &thread_lck_grp);
55e303ae 1149 machine_thread_destroy(new_thread);
91447636 1150 zfree(thread_zone, new_thread);
1c79356b
A
1151 return (KERN_FAILURE);
1152 }
1153
b0d623f7
A
1154 /* New threads inherit any default state on the task */
1155 machine_thread_inherit_taskwide(new_thread, parent_task);
1156
91447636 1157 task_reference_internal(parent_task);
55e303ae 1158
316670eb
A
1159 if (new_thread->task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
1160 /*
1161 * This task has a per-thread CPU limit; make sure this new thread
1162 * gets its limit set too, before it gets out of the kernel.
1163 */
1164 set_astledger(new_thread);
1165 }
fe8ab488
A
1166
1167 /* Instantiate a thread ledger. Do not fail thread creation if ledger creation fails. */
1168 if ((new_thread->t_threadledger = ledger_instantiate(thread_ledger_template,
1169 LEDGER_CREATE_INACTIVE_ENTRIES)) != LEDGER_NULL) {
1170
1171 ledger_entry_setactive(new_thread->t_threadledger, thread_ledgers.cpu_time);
1172 }
1173
fe8ab488
A
1174#ifdef CONFIG_BANK
1175 new_thread->t_bankledger = LEDGER_NULL;
1176 new_thread->t_deduct_bank_ledger_time = 0;
1177#endif
1178
316670eb
A
1179 new_thread->t_ledger = new_thread->task->ledger;
1180 if (new_thread->t_ledger)
1181 ledger_reference(new_thread->t_ledger);
1182
fe8ab488
A
1183#if defined(CONFIG_SCHED_MULTIQ)
1184 /* Cache the task's sched_group */
1185 new_thread->sched_group = parent_task->sched_group;
1186#endif /* defined(CONFIG_SCHED_MULTIQ) */
1187
55e303ae
A
1188 /* Cache the task's map */
1189 new_thread->map = parent_task->map;
1c79356b 1190
91447636
A
1191 timer_call_setup(&new_thread->wait_timer, thread_timer_expire, new_thread);
1192 timer_call_setup(&new_thread->depress_timer, thread_depress_expire, new_thread);
1c79356b 1193
39236c6e
A
1194#if KPC
1195 kpc_thread_create(new_thread);
1196#endif
b0d623f7 1197
1c79356b 1198 /* Set the thread's scheduling parameters */
6d2010ae 1199 new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task);
55e303ae
A
1200 new_thread->max_priority = parent_task->max_priority;
1201 new_thread->task_priority = parent_task->priority;
316670eb 1202
3e170ce0
A
1203 int new_priority = (priority < 0) ? parent_task->priority: priority;
1204 new_priority = (priority < 0)? parent_task->priority: priority;
1205 if (new_priority > new_thread->max_priority)
1206 new_priority = new_thread->max_priority;
1207
1208 new_thread->importance = new_priority - new_thread->task_priority;
fe8ab488 1209
3e170ce0 1210 sched_set_thread_base_priority(new_thread, new_priority);
1c79356b 1211
39037602
A
1212#if defined(CONFIG_SCHED_TIMESHARE_CORE)
1213 new_thread->sched_stamp = sched_tick;
1214 new_thread->pri_shift = sched_pri_shifts[new_thread->th_sched_bucket];
1215#endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) */
1216
490019cf 1217
fe8ab488
A
1218 thread_policy_create(new_thread);
1219
1220 /* Chain the thread onto the task's list */
1221 queue_enter(&parent_task->threads, new_thread, thread_t, task_threads);
1222 parent_task->thread_count++;
1223
1224 /* So terminating threads don't need to take the task lock to decrement */
1225 hw_atomic_add(&parent_task->active_thread_count, 1);
1226
1227 /* Protected by the tasks_threads_lock */
1228 new_thread->thread_id = ++thread_unique_id;
1229
1230 queue_enter(&threads, new_thread, thread_t, threads);
1231 threads_count++;
1232
55e303ae 1233 new_thread->active = TRUE;
39037602
A
1234 if (task_is_a_corpse_fork(parent_task)) {
1235 /* Set the inspection bit if the task is a corpse fork */
1236 new_thread->inspection = TRUE;
1237 } else {
1238 new_thread->inspection = FALSE;
1239 }
1240 new_thread->corpse_dup = FALSE;
55e303ae 1241 *out_thread = new_thread;
1c79356b
A
1242
1243 {
9bccf70c 1244 long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4;
1c79356b 1245
55e303ae
A
1246 kdbg_trace_data(parent_task->bsd_info, &dbg_arg2);
1247
743345f9
A
1248 /*
1249 * Starting with 26604425, exec'ing creates a new task/thread.
1250 *
1251 * NEWTHREAD in the current process has two possible meanings:
1252 *
1253 * 1) Create a new thread for this process.
1254 * 2) Create a new thread for the future process this will become in an exec.
1255 *
1256 * To disambiguate these, arg3 will be set to TRUE for case #2.
1257 *
1258 * The value we need to find (TPF_EXEC_COPY) is stable in the case of a
1259 * task exec'ing. The read of t_procflags does not take the proc_lock.
1260 */
1261 dbg_arg3 = (task_is_exec_copy(parent_task)) ? TRUE : 0;
1262
316670eb 1263 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
39037602 1264 TRACE_DATA_NEWTHREAD | DBG_FUNC_NONE,
743345f9 1265 (vm_address_t)(uintptr_t)thread_tid(new_thread), dbg_arg2, dbg_arg3, 0, 0);
1c79356b 1266
9bccf70c
A
1267 kdbg_trace_string(parent_task->bsd_info,
1268 &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
1269
316670eb 1270 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
39037602 1271 TRACE_STRING_NEWTHREAD | DBG_FUNC_NONE,
316670eb 1272 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
1c79356b
A
1273 }
1274
2d21ac55
A
1275 DTRACE_PROC1(lwp__create, thread_t, *out_thread);
1276
1c79356b
A
1277 return (KERN_SUCCESS);
1278}
1279
6d2010ae
A
1280static kern_return_t
1281thread_create_internal2(
1c79356b 1282 task_t task,
6d2010ae 1283 thread_t *new_thread,
3e170ce0
A
1284 boolean_t from_user,
1285 thread_continue_t continuation)
1c79356b 1286{
1c79356b 1287 kern_return_t result;
9bccf70c 1288 thread_t thread;
1c79356b 1289
55e303ae
A
1290 if (task == TASK_NULL || task == kernel_task)
1291 return (KERN_INVALID_ARGUMENT);
1c79356b 1292
3e170ce0 1293 result = thread_create_internal(task, -1, continuation, TH_OPTION_NONE, &thread);
1c79356b
A
1294 if (result != KERN_SUCCESS)
1295 return (result);
1296
55e303ae
A
1297 thread->user_stop_count = 1;
1298 thread_hold(thread);
9bccf70c 1299 if (task->suspend_count > 0)
55e303ae 1300 thread_hold(thread);
1c79356b 1301
6d2010ae
A
1302 if (from_user)
1303 extmod_statistics_incr_thread_create(task);
1304
9bccf70c 1305 task_unlock(task);
b0d623f7 1306 lck_mtx_unlock(&tasks_threads_lock);
1c79356b 1307
55e303ae 1308 *new_thread = thread;
1c79356b
A
1309
1310 return (KERN_SUCCESS);
1311}
1312
6d2010ae 1313/* No prototype, since task_server.h has the _from_user version if KERNEL_SERVER */
1c79356b 1314kern_return_t
6d2010ae
A
1315thread_create(
1316 task_t task,
1317 thread_t *new_thread);
1318
1319kern_return_t
1320thread_create(
1321 task_t task,
1322 thread_t *new_thread)
1323{
3e170ce0 1324 return thread_create_internal2(task, new_thread, FALSE, (thread_continue_t)thread_bootstrap_return);
6d2010ae
A
1325}
1326
1327kern_return_t
1328thread_create_from_user(
1329 task_t task,
1330 thread_t *new_thread)
1331{
3e170ce0
A
1332 return thread_create_internal2(task, new_thread, TRUE, (thread_continue_t)thread_bootstrap_return);
1333}
1334
1335kern_return_t
1336thread_create_with_continuation(
1337 task_t task,
1338 thread_t *new_thread,
1339 thread_continue_t continuation)
1340{
1341 return thread_create_internal2(task, new_thread, FALSE, continuation);
6d2010ae
A
1342}
1343
743345f9
A
1344/*
1345 * Create a thread that is already started, but is waiting on an event
1346 */
1347static kern_return_t
1348thread_create_waiting_internal(
1349 task_t task,
1350 thread_continue_t continuation,
1351 event_t event,
1352 int options,
1353 thread_t *new_thread)
1354{
1355 kern_return_t result;
1356 thread_t thread;
1357
1358 if (task == TASK_NULL || task == kernel_task)
1359 return (KERN_INVALID_ARGUMENT);
1360
1361 result = thread_create_internal(task, -1, continuation, options, &thread);
1362 if (result != KERN_SUCCESS)
1363 return (result);
1364
1365 /* note no user_stop_count or thread_hold here */
1366
1367 if (task->suspend_count > 0)
1368 thread_hold(thread);
1369
1370 thread_mtx_lock(thread);
1371 thread_start_in_assert_wait(thread, event, THREAD_INTERRUPTIBLE);
1372 thread_mtx_unlock(thread);
1373
1374 task_unlock(task);
1375 lck_mtx_unlock(&tasks_threads_lock);
1376
1377 *new_thread = thread;
1378
1379 return (KERN_SUCCESS);
1380}
1381
1382kern_return_t
1383thread_create_waiting(
1384 task_t task,
1385 thread_continue_t continuation,
1386 event_t event,
1387 thread_t *new_thread)
1388{
1389 return thread_create_waiting_internal(task, continuation, event,
1390 TH_OPTION_NONE, new_thread);
1391}
1392
1393
6d2010ae
A
1394static kern_return_t
1395thread_create_running_internal2(
39037602 1396 task_t task,
1c79356b
A
1397 int flavor,
1398 thread_state_t new_state,
1399 mach_msg_type_number_t new_state_count,
6d2010ae
A
1400 thread_t *new_thread,
1401 boolean_t from_user)
1c79356b 1402{
39037602 1403 kern_return_t result;
9bccf70c 1404 thread_t thread;
9bccf70c 1405
55e303ae
A
1406 if (task == TASK_NULL || task == kernel_task)
1407 return (KERN_INVALID_ARGUMENT);
1c79356b 1408
b0d623f7 1409 result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return, TH_OPTION_NONE, &thread);
1c79356b
A
1410 if (result != KERN_SUCCESS)
1411 return (result);
1412
39037602
A
1413 if (task->suspend_count > 0)
1414 thread_hold(thread);
1415
3e170ce0 1416 result = machine_thread_set_state(thread, flavor, new_state, new_state_count);
1c79356b 1417 if (result != KERN_SUCCESS) {
9bccf70c 1418 task_unlock(task);
b0d623f7 1419 lck_mtx_unlock(&tasks_threads_lock);
9bccf70c 1420
55e303ae 1421 thread_terminate(thread);
91447636 1422 thread_deallocate(thread);
1c79356b
A
1423 return (result);
1424 }
1425
91447636 1426 thread_mtx_lock(thread);
39037602 1427 thread_start(thread);
91447636 1428 thread_mtx_unlock(thread);
2d21ac55 1429
6d2010ae
A
1430 if (from_user)
1431 extmod_statistics_incr_thread_create(task);
1432
9bccf70c 1433 task_unlock(task);
b0d623f7 1434 lck_mtx_unlock(&tasks_threads_lock);
9bccf70c 1435
55e303ae 1436 *new_thread = thread;
9bccf70c 1437
1c79356b
A
1438 return (result);
1439}
1440
6d2010ae
A
1441/* Prototype, see justification above */
1442kern_return_t
1443thread_create_running(
39037602 1444 task_t task,
6d2010ae
A
1445 int flavor,
1446 thread_state_t new_state,
1447 mach_msg_type_number_t new_state_count,
1448 thread_t *new_thread);
1449
1450kern_return_t
1451thread_create_running(
39037602 1452 task_t task,
6d2010ae
A
1453 int flavor,
1454 thread_state_t new_state,
1455 mach_msg_type_number_t new_state_count,
1456 thread_t *new_thread)
1457{
1458 return thread_create_running_internal2(
1459 task, flavor, new_state, new_state_count,
1460 new_thread, FALSE);
1461}
1462
1463kern_return_t
1464thread_create_running_from_user(
39037602 1465 task_t task,
6d2010ae
A
1466 int flavor,
1467 thread_state_t new_state,
1468 mach_msg_type_number_t new_state_count,
1469 thread_t *new_thread)
1470{
1471 return thread_create_running_internal2(
1472 task, flavor, new_state, new_state_count,
1473 new_thread, TRUE);
1474}
1475
b0d623f7
A
1476kern_return_t
1477thread_create_workq(
1478 task_t task,
b7266188 1479 thread_continue_t thread_return,
b0d623f7
A
1480 thread_t *new_thread)
1481{
1482 kern_return_t result;
1483 thread_t thread;
1484
1485 if (task == TASK_NULL || task == kernel_task)
1486 return (KERN_INVALID_ARGUMENT);
1487
b7266188 1488 result = thread_create_internal(task, -1, thread_return, TH_OPTION_NOCRED | TH_OPTION_NOSUSP, &thread);
b0d623f7
A
1489 if (result != KERN_SUCCESS)
1490 return (result);
1491
1492 thread->user_stop_count = 1;
1493 thread_hold(thread);
1494 if (task->suspend_count > 0)
1495 thread_hold(thread);
1496
1497 task_unlock(task);
1498 lck_mtx_unlock(&tasks_threads_lock);
1499
1500 *new_thread = thread;
1501
1502 return (KERN_SUCCESS);
1503}
1504
39037602
A
1505kern_return_t
1506thread_create_workq_waiting(
1507 task_t task,
743345f9 1508 thread_continue_t continuation,
39037602
A
1509 event_t event,
1510 thread_t *new_thread)
1511{
39037602 1512
743345f9
A
1513 return thread_create_waiting_internal(task, continuation, event,
1514 TH_OPTION_NOCRED | TH_OPTION_NOSUSP,
1515 new_thread);
39037602
A
1516}
1517
1c79356b 1518/*
91447636 1519 * kernel_thread_create:
1c79356b 1520 *
55e303ae
A
1521 * Create a thread in the kernel task
1522 * to execute in kernel context.
1c79356b 1523 */
91447636 1524kern_return_t
55e303ae 1525kernel_thread_create(
91447636
A
1526 thread_continue_t continuation,
1527 void *parameter,
1528 integer_t priority,
1529 thread_t *new_thread)
1c79356b
A
1530{
1531 kern_return_t result;
1532 thread_t thread;
91447636 1533 task_t task = kernel_task;
1c79356b 1534
b0d623f7 1535 result = thread_create_internal(task, priority, continuation, TH_OPTION_NONE, &thread);
9bccf70c 1536 if (result != KERN_SUCCESS)
91447636 1537 return (result);
1c79356b 1538
9bccf70c 1539 task_unlock(task);
b0d623f7 1540 lck_mtx_unlock(&tasks_threads_lock);
9bccf70c 1541
91447636 1542 stack_alloc(thread);
55e303ae
A
1543 assert(thread->kernel_stack != 0);
1544 thread->reserved_stack = thread->kernel_stack;
1545
91447636 1546 thread->parameter = parameter;
55e303ae 1547
2d21ac55
A
1548if(debug_task & 1)
1549 kprintf("kernel_thread_create: thread = %p continuation = %p\n", thread, continuation);
91447636
A
1550 *new_thread = thread;
1551
1552 return (result);
55e303ae
A
1553}
1554
91447636
A
1555kern_return_t
1556kernel_thread_start_priority(
1557 thread_continue_t continuation,
1558 void *parameter,
1559 integer_t priority,
1560 thread_t *new_thread)
55e303ae 1561{
91447636 1562 kern_return_t result;
55e303ae 1563 thread_t thread;
1c79356b 1564
91447636
A
1565 result = kernel_thread_create(continuation, parameter, priority, &thread);
1566 if (result != KERN_SUCCESS)
1567 return (result);
1c79356b 1568
b0d623f7
A
1569 *new_thread = thread;
1570
91447636 1571 thread_mtx_lock(thread);
39037602 1572 thread_start(thread);
91447636 1573 thread_mtx_unlock(thread);
1c79356b 1574
91447636
A
1575 return (result);
1576}
1577
1578kern_return_t
1579kernel_thread_start(
1580 thread_continue_t continuation,
1581 void *parameter,
1582 thread_t *new_thread)
1583{
1584 return kernel_thread_start_priority(continuation, parameter, -1, new_thread);
1c79356b
A
1585}
1586
3e170ce0
A
1587/* Separated into helper function so it can be used by THREAD_BASIC_INFO and THREAD_EXTENDED_INFO */
1588/* it is assumed that the thread is locked by the caller */
1589static void
1590retrieve_thread_basic_info(thread_t thread, thread_basic_info_t basic_info)
1591{
1592 int state, flags;
1593
1594 /* fill in info */
1595
1596 thread_read_times(thread, &basic_info->user_time,
1597 &basic_info->system_time);
1598
1599 /*
1600 * Update lazy-evaluated scheduler info because someone wants it.
1601 */
1602 if (SCHED(can_update_priority)(thread))
1603 SCHED(update_priority)(thread);
1604
1605 basic_info->sleep_time = 0;
1606
1607 /*
1608 * To calculate cpu_usage, first correct for timer rate,
1609 * then for 5/8 ageing. The correction factor [3/5] is
1610 * (1/(5/8) - 1).
1611 */
1612 basic_info->cpu_usage = 0;
1613#if defined(CONFIG_SCHED_TIMESHARE_CORE)
1614 if (sched_tick_interval) {
1615 basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage
1616 * TH_USAGE_SCALE) / sched_tick_interval);
1617 basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
1618 }
1619#endif
1620
1621 if (basic_info->cpu_usage > TH_USAGE_SCALE)
1622 basic_info->cpu_usage = TH_USAGE_SCALE;
1623
1624 basic_info->policy = ((thread->sched_mode == TH_MODE_TIMESHARE)?
1625 POLICY_TIMESHARE: POLICY_RR);
1626
1627 flags = 0;
1628 if (thread->options & TH_OPT_IDLE_THREAD)
1629 flags |= TH_FLAGS_IDLE;
1630
1631 if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
1632 flags |= TH_FLAGS_GLOBAL_FORCED_IDLE;
1633 }
1634
1635 if (!thread->kernel_stack)
1636 flags |= TH_FLAGS_SWAPPED;
1637
1638 state = 0;
1639 if (thread->state & TH_TERMINATE)
1640 state = TH_STATE_HALTED;
1641 else
1642 if (thread->state & TH_RUN)
1643 state = TH_STATE_RUNNING;
1644 else
1645 if (thread->state & TH_UNINT)
1646 state = TH_STATE_UNINTERRUPTIBLE;
1647 else
1648 if (thread->state & TH_SUSP)
1649 state = TH_STATE_STOPPED;
1650 else
1651 if (thread->state & TH_WAIT)
1652 state = TH_STATE_WAITING;
1653
1654 basic_info->run_state = state;
1655 basic_info->flags = flags;
1656
1657 basic_info->suspend_count = thread->user_stop_count;
1658
1659 return;
1660}
b0d623f7 1661
1c79356b 1662kern_return_t
91447636 1663thread_info_internal(
39037602 1664 thread_t thread,
1c79356b
A
1665 thread_flavor_t flavor,
1666 thread_info_t thread_info_out, /* ptr to OUT array */
1667 mach_msg_type_number_t *thread_info_count) /*IN/OUT*/
1668{
3e170ce0 1669 spl_t s;
1c79356b
A
1670
1671 if (thread == THREAD_NULL)
1672 return (KERN_INVALID_ARGUMENT);
1673
1674 if (flavor == THREAD_BASIC_INFO) {
1c79356b 1675
3e170ce0 1676 if (*thread_info_count < THREAD_BASIC_INFO_COUNT)
1c79356b
A
1677 return (KERN_INVALID_ARGUMENT);
1678
3e170ce0
A
1679 s = splsched();
1680 thread_lock(thread);
1c79356b 1681
3e170ce0 1682 retrieve_thread_basic_info(thread, (thread_basic_info_t) thread_info_out);
1c79356b 1683
3e170ce0
A
1684 thread_unlock(thread);
1685 splx(s);
0b4e3aa0 1686
3e170ce0 1687 *thread_info_count = THREAD_BASIC_INFO_COUNT;
0b4e3aa0 1688
3e170ce0 1689 return (KERN_SUCCESS);
1c79356b
A
1690 }
1691 else
b0d623f7 1692 if (flavor == THREAD_IDENTIFIER_INFO) {
39037602 1693 thread_identifier_info_t identifier_info;
b0d623f7 1694
3e170ce0 1695 if (*thread_info_count < THREAD_IDENTIFIER_INFO_COUNT)
b0d623f7
A
1696 return (KERN_INVALID_ARGUMENT);
1697
3e170ce0 1698 identifier_info = (thread_identifier_info_t) thread_info_out;
b0d623f7 1699
3e170ce0
A
1700 s = splsched();
1701 thread_lock(thread);
b0d623f7 1702
3e170ce0
A
1703 identifier_info->thread_id = thread->thread_id;
1704 identifier_info->thread_handle = thread->machine.cthread_self;
1705 identifier_info->dispatch_qaddr = thread_dispatchqaddr(thread);
b0d623f7 1706
3e170ce0
A
1707 thread_unlock(thread);
1708 splx(s);
1709 return KERN_SUCCESS;
b0d623f7
A
1710 }
1711 else
1c79356b
A
1712 if (flavor == THREAD_SCHED_TIMESHARE_INFO) {
1713 policy_timeshare_info_t ts_info;
1714
1715 if (*thread_info_count < POLICY_TIMESHARE_INFO_COUNT)
1716 return (KERN_INVALID_ARGUMENT);
1717
1718 ts_info = (policy_timeshare_info_t)thread_info_out;
1719
3e170ce0 1720 s = splsched();
1c79356b
A
1721 thread_lock(thread);
1722
3e170ce0
A
1723 if (thread->sched_mode != TH_MODE_TIMESHARE) {
1724 thread_unlock(thread);
1c79356b 1725 splx(s);
1c79356b 1726 return (KERN_INVALID_POLICY);
3e170ce0 1727 }
1c79356b 1728
6d2010ae 1729 ts_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
9bccf70c
A
1730 if (ts_info->depressed) {
1731 ts_info->base_priority = DEPRESSPRI;
3e170ce0 1732 ts_info->depress_priority = thread->base_pri;
9bccf70c
A
1733 }
1734 else {
3e170ce0 1735 ts_info->base_priority = thread->base_pri;
9bccf70c
A
1736 ts_info->depress_priority = -1;
1737 }
1c79356b 1738
9bccf70c
A
1739 ts_info->cur_priority = thread->sched_pri;
1740 ts_info->max_priority = thread->max_priority;
1c79356b
A
1741
1742 thread_unlock(thread);
3e170ce0 1743 splx(s);
1c79356b
A
1744
1745 *thread_info_count = POLICY_TIMESHARE_INFO_COUNT;
1746
3e170ce0 1747 return (KERN_SUCCESS);
1c79356b
A
1748 }
1749 else
1750 if (flavor == THREAD_SCHED_FIFO_INFO) {
1c79356b
A
1751 if (*thread_info_count < POLICY_FIFO_INFO_COUNT)
1752 return (KERN_INVALID_ARGUMENT);
1753
0b4e3aa0 1754 return (KERN_INVALID_POLICY);
1c79356b
A
1755 }
1756 else
1757 if (flavor == THREAD_SCHED_RR_INFO) {
1758 policy_rr_info_t rr_info;
6d2010ae
A
1759 uint32_t quantum_time;
1760 uint64_t quantum_ns;
3e170ce0 1761
1c79356b
A
1762 if (*thread_info_count < POLICY_RR_INFO_COUNT)
1763 return (KERN_INVALID_ARGUMENT);
1764
1765 rr_info = (policy_rr_info_t) thread_info_out;
1766
3e170ce0 1767 s = splsched();
1c79356b
A
1768 thread_lock(thread);
1769
3e170ce0
A
1770 if (thread->sched_mode == TH_MODE_TIMESHARE) {
1771 thread_unlock(thread);
1c79356b
A
1772 splx(s);
1773
1774 return (KERN_INVALID_POLICY);
1775 }
1776
6d2010ae 1777 rr_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
9bccf70c
A
1778 if (rr_info->depressed) {
1779 rr_info->base_priority = DEPRESSPRI;
3e170ce0 1780 rr_info->depress_priority = thread->base_pri;
9bccf70c
A
1781 }
1782 else {
3e170ce0 1783 rr_info->base_priority = thread->base_pri;
9bccf70c
A
1784 rr_info->depress_priority = -1;
1785 }
1786
6d2010ae
A
1787 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
1788 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
3e170ce0 1789
1c79356b 1790 rr_info->max_priority = thread->max_priority;
3e170ce0 1791 rr_info->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
1c79356b 1792
1c79356b 1793 thread_unlock(thread);
3e170ce0 1794 splx(s);
1c79356b
A
1795
1796 *thread_info_count = POLICY_RR_INFO_COUNT;
1797
3e170ce0
A
1798 return (KERN_SUCCESS);
1799 }
1800 else
1801 if (flavor == THREAD_EXTENDED_INFO) {
1802 thread_basic_info_data_t basic_info;
1803 thread_extended_info_t extended_info = (thread_extended_info_t) thread_info_out;
1804
1805 if (*thread_info_count < THREAD_EXTENDED_INFO_COUNT) {
1806 return (KERN_INVALID_ARGUMENT);
1807 }
1808
1809 s = splsched();
1810 thread_lock(thread);
1811
1812 /* NOTE: This mimics fill_taskthreadinfo(), which is the function used by proc_pidinfo() for
1813 * the PROC_PIDTHREADINFO flavor (which can't be used on corpses)
1814 */
1815 retrieve_thread_basic_info(thread, &basic_info);
1816 extended_info->pth_user_time = ((basic_info.user_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.user_time.microseconds * (integer_t)NSEC_PER_USEC));
1817 extended_info->pth_system_time = ((basic_info.system_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.system_time.microseconds * (integer_t)NSEC_PER_USEC));
1818
1819 extended_info->pth_cpu_usage = basic_info.cpu_usage;
1820 extended_info->pth_policy = basic_info.policy;
1821 extended_info->pth_run_state = basic_info.run_state;
1822 extended_info->pth_flags = basic_info.flags;
1823 extended_info->pth_sleep_time = basic_info.sleep_time;
1824 extended_info->pth_curpri = thread->sched_pri;
1825 extended_info->pth_priority = thread->base_pri;
1826 extended_info->pth_maxpriority = thread->max_priority;
1827
1828 bsd_getthreadname(thread->uthread,extended_info->pth_name);
1829
1830 thread_unlock(thread);
1831 splx(s);
1832
1833 *thread_info_count = THREAD_EXTENDED_INFO_COUNT;
1834
1835 return (KERN_SUCCESS);
1836 }
1837 else
1838 if (flavor == THREAD_DEBUG_INFO_INTERNAL) {
1839#if DEVELOPMENT || DEBUG
1840 thread_debug_info_internal_t dbg_info;
1841 if (*thread_info_count < THREAD_DEBUG_INFO_INTERNAL_COUNT)
1842 return (KERN_NOT_SUPPORTED);
1843
1844 if (thread_info_out == NULL)
1845 return (KERN_INVALID_ARGUMENT);
1846
1847 dbg_info = (thread_debug_info_internal_t) thread_info_out;
1848 dbg_info->page_creation_count = thread->t_page_creation_count;
1849
1850 *thread_info_count = THREAD_DEBUG_INFO_INTERNAL_COUNT;
1851 return (KERN_SUCCESS);
1852#endif /* DEVELOPMENT || DEBUG */
1853 return (KERN_NOT_SUPPORTED);
1c79356b
A
1854 }
1855
1856 return (KERN_INVALID_ARGUMENT);
1857}
1858
1859void
91447636
A
1860thread_read_times(
1861 thread_t thread,
1862 time_value_t *user_time,
1863 time_value_t *system_time)
1c79356b 1864{
b0d623f7
A
1865 clock_sec_t secs;
1866 clock_usec_t usecs;
316670eb 1867 uint64_t tval_user, tval_system;
b0d623f7 1868
316670eb
A
1869 tval_user = timer_grab(&thread->user_timer);
1870 tval_system = timer_grab(&thread->system_timer);
9bccf70c 1871
316670eb
A
1872 if (thread->precise_user_kernel_time) {
1873 absolutetime_to_microtime(tval_user, &secs, &usecs);
1874 user_time->seconds = (typeof(user_time->seconds))secs;
1875 user_time->microseconds = usecs;
1876
1877 absolutetime_to_microtime(tval_system, &secs, &usecs);
1878 system_time->seconds = (typeof(system_time->seconds))secs;
1879 system_time->microseconds = usecs;
1880 } else {
1881 /* system_timer may represent either sys or user */
1882 tval_user += tval_system;
1883 absolutetime_to_microtime(tval_user, &secs, &usecs);
1884 user_time->seconds = (typeof(user_time->seconds))secs;
1885 user_time->microseconds = usecs;
1886
1887 system_time->seconds = 0;
1888 system_time->microseconds = 0;
1889 }
1c79356b
A
1890}
1891
fe8ab488
A
1892uint64_t thread_get_runtime_self(void)
1893{
1894 boolean_t interrupt_state;
1895 uint64_t runtime;
1896 thread_t thread = NULL;
1897 processor_t processor = NULL;
1898
1899 thread = current_thread();
1900
1901 /* Not interrupt safe, as the scheduler may otherwise update timer values underneath us */
1902 interrupt_state = ml_set_interrupts_enabled(FALSE);
1903 processor = current_processor();
1904 timer_switch(PROCESSOR_DATA(processor, thread_timer), mach_absolute_time(), PROCESSOR_DATA(processor, thread_timer));
1905 runtime = (timer_grab(&thread->user_timer) + timer_grab(&thread->system_timer));
1906 ml_set_interrupts_enabled(interrupt_state);
1907
1908 return runtime;
1909}
1910
1c79356b
A
1911kern_return_t
1912thread_assign(
91447636
A
1913 __unused thread_t thread,
1914 __unused processor_set_t new_pset)
1c79356b 1915{
91447636 1916 return (KERN_FAILURE);
1c79356b
A
1917}
1918
1919/*
1920 * thread_assign_default:
1921 *
1922 * Special version of thread_assign for assigning threads to default
1923 * processor set.
1924 */
1925kern_return_t
1926thread_assign_default(
91447636 1927 thread_t thread)
1c79356b 1928{
2d21ac55 1929 return (thread_assign(thread, &pset0));
1c79356b
A
1930}
1931
1932/*
1933 * thread_get_assignment
1934 *
1935 * Return current assignment for this thread.
1936 */
1937kern_return_t
1938thread_get_assignment(
91447636 1939 thread_t thread,
1c79356b
A
1940 processor_set_t *pset)
1941{
91447636
A
1942 if (thread == NULL)
1943 return (KERN_INVALID_ARGUMENT);
1944
2d21ac55
A
1945 *pset = &pset0;
1946
91447636 1947 return (KERN_SUCCESS);
1c79356b
A
1948}
1949
1950/*
55e303ae 1951 * thread_wire_internal:
1c79356b
A
1952 *
1953 * Specify that the target thread must always be able
1954 * to run and to allocate memory.
1955 */
1956kern_return_t
55e303ae 1957thread_wire_internal(
91447636
A
1958 host_priv_t host_priv,
1959 thread_t thread,
1960 boolean_t wired,
1961 boolean_t *prev_state)
1c79356b 1962{
91447636 1963 if (host_priv == NULL || thread != current_thread())
1c79356b
A
1964 return (KERN_INVALID_ARGUMENT);
1965
1966 assert(host_priv == &realhost);
1967
91447636
A
1968 if (prev_state)
1969 *prev_state = (thread->options & TH_OPT_VMPRIV) != 0;
55e303ae 1970
1c79356b 1971 if (wired) {
91447636 1972 if (!(thread->options & TH_OPT_VMPRIV))
1c79356b 1973 vm_page_free_reserve(1); /* XXX */
91447636
A
1974 thread->options |= TH_OPT_VMPRIV;
1975 }
1976 else {
1977 if (thread->options & TH_OPT_VMPRIV)
1c79356b 1978 vm_page_free_reserve(-1); /* XXX */
91447636 1979 thread->options &= ~TH_OPT_VMPRIV;
1c79356b
A
1980 }
1981
91447636 1982 return (KERN_SUCCESS);
1c79356b
A
1983}
1984
1c79356b
A
1985
1986/*
55e303ae 1987 * thread_wire:
1c79356b 1988 *
55e303ae 1989 * User-api wrapper for thread_wire_internal()
1c79356b 1990 */
55e303ae
A
1991kern_return_t
1992thread_wire(
1993 host_priv_t host_priv,
91447636 1994 thread_t thread,
55e303ae 1995 boolean_t wired)
1c79356b 1996{
91447636 1997 return (thread_wire_internal(host_priv, thread, wired, NULL));
1c79356b
A
1998}
1999
39236c6e 2000
39037602
A
2001boolean_t
2002is_vm_privileged(void)
2003{
2004 return current_thread()->options & TH_OPT_VMPRIV ? TRUE : FALSE;
2005}
2006
fe8ab488
A
2007boolean_t
2008set_vm_privilege(boolean_t privileged)
2009{
2010 boolean_t was_vmpriv;
2011
2012 if (current_thread()->options & TH_OPT_VMPRIV)
2013 was_vmpriv = TRUE;
2014 else
2015 was_vmpriv = FALSE;
2016
2017 if (privileged != FALSE)
2018 current_thread()->options |= TH_OPT_VMPRIV;
2019 else
2020 current_thread()->options &= ~TH_OPT_VMPRIV;
2021
2022 return (was_vmpriv);
2023}
2024
ecc0ceb4
A
2025void
2026set_thread_rwlock_boost(void)
2027{
2028 current_thread()->rwlock_count++;
2029}
2030
2031void
2032clear_thread_rwlock_boost(void)
2033{
2034 thread_t thread = current_thread();
2035
2036 if ((thread->rwlock_count-- == 1) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2037
2038 lck_rw_clear_promotion(thread);
2039 }
2040}
fe8ab488 2041
39236c6e
A
2042/*
2043 * XXX assuming current thread only, for now...
2044 */
2045void
2046thread_guard_violation(thread_t thread, unsigned type)
2047{
2048 assert(thread == current_thread());
2049
2050 spl_t s = splsched();
2051 /*
2052 * Use the saved state area of the thread structure
2053 * to store all info required to handle the AST when
2054 * returning to userspace
2055 */
2056 thread->guard_exc_info.type = type;
2057 thread_ast_set(thread, AST_GUARD);
2058 ast_propagate(thread->ast);
2059
2060 splx(s);
2061}
2062
2063/*
2064 * guard_ast:
2065 *
2066 * Handle AST_GUARD for a thread. This routine looks at the
2067 * state saved in the thread structure to determine the cause
2068 * of this exception. Based on this value, it invokes the
2069 * appropriate routine which determines other exception related
2070 * info and raises the exception.
2071 */
2072void
2073guard_ast(thread_t thread)
2074{
2075 if (thread->guard_exc_info.type == GUARD_TYPE_MACH_PORT)
2076 mach_port_guard_ast(thread);
2077 else
2078 fd_guard_ast(thread);
2079}
2080
316670eb 2081static void
39236c6e 2082thread_cputime_callback(int warning, __unused const void *arg0, __unused const void *arg1)
316670eb 2083{
39236c6e
A
2084 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
2085#if CONFIG_TELEMETRY
2086 /*
2087 * This thread is in danger of violating the CPU usage monitor. Enable telemetry
2088 * on the entire task so there are micro-stackshots available if and when
2089 * EXC_RESOURCE is triggered. We could have chosen to enable micro-stackshots
2090 * for this thread only; but now that this task is suspect, knowing what all of
2091 * its threads are up to will be useful.
2092 */
2093 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 1);
2094#endif
2095 return;
2096 }
2097
2098#if CONFIG_TELEMETRY
2099 /*
2100 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
2101 * exceeded the limit, turn telemetry off for the task.
2102 */
2103 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 0);
2104#endif
2105
2106 if (warning == 0) {
39037602 2107 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU();
39236c6e
A
2108 }
2109}
2110
2111void __attribute__((noinline))
39037602 2112SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void)
39236c6e
A
2113{
2114 int pid = 0;
2115 task_t task = current_task();
2116 thread_t thread = current_thread();
2117 uint64_t tid = thread->thread_id;
3e170ce0 2118 const char *procname = "unknown";
39236c6e
A
2119 time_value_t thread_total_time = {0, 0};
2120 time_value_t thread_system_time;
2121 time_value_t thread_user_time;
2122 int action;
2123 uint8_t percentage;
39037602 2124 uint32_t usage_percent = 0;
39236c6e
A
2125 uint32_t interval_sec;
2126 uint64_t interval_ns;
2127 uint64_t balance_ns;
2128 boolean_t fatal = FALSE;
39037602
A
2129 boolean_t send_exc_resource = TRUE; /* in addition to RESOURCE_NOTIFY */
2130 kern_return_t kr;
39236c6e 2131
39037602 2132#ifdef EXC_RESOURCE_MONITORS
39236c6e 2133 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
39037602 2134#endif /* EXC_RESOURCE_MONITORS */
39236c6e
A
2135 struct ledger_entry_info lei;
2136
316670eb
A
2137 assert(thread->t_threadledger != LEDGER_NULL);
2138
2139 /*
39037602 2140 * Extract the fatal bit and suspend the monitor (which clears the bit).
316670eb 2141 */
39236c6e 2142 task_lock(task);
39236c6e
A
2143 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_CPUMON) {
2144 fatal = TRUE;
39037602 2145 send_exc_resource = TRUE;
39236c6e 2146 }
39037602
A
2147 /* Only one thread can be here at a time. Whichever makes it through
2148 first will successfully suspend the monitor and proceed to send the
2149 notification. Other threads will get an error trying to suspend the
2150 monitor and give up on sending the notification. In the first release,
2151 the monitor won't be resumed for a number of seconds, but we may
2152 eventually need to handle low-latency resume.
2153 */
2154 kr = task_suspend_cpumon(task);
39236c6e 2155 task_unlock(task);
39037602 2156 if (kr == KERN_INVALID_ARGUMENT) return;
39236c6e
A
2157
2158#ifdef MACH_BSD
2159 pid = proc_selfpid();
39037602 2160 if (task->bsd_info != NULL) {
39236c6e 2161 procname = proc_name_address(task->bsd_info);
39037602 2162 }
39236c6e
A
2163#endif
2164
2165 thread_get_cpulimit(&action, &percentage, &interval_ns);
2166
2167 interval_sec = (uint32_t)(interval_ns / NSEC_PER_SEC);
2168
2169 thread_read_times(thread, &thread_user_time, &thread_system_time);
2170 time_value_add(&thread_total_time, &thread_user_time);
2171 time_value_add(&thread_total_time, &thread_system_time);
39236c6e 2172 ledger_get_entry_info(thread->t_threadledger, thread_ledgers.cpu_time, &lei);
316670eb 2173
39037602
A
2174 /* credit/debit/balance/limit are in absolute time units;
2175 the refill info is in nanoseconds. */
39236c6e 2176 absolutetime_to_nanoseconds(lei.lei_balance, &balance_ns);
39037602
A
2177 if (lei.lei_last_refill > 0) {
2178 usage_percent = (uint32_t)((balance_ns*100ULL) / lei.lei_last_refill);
2179 }
39236c6e 2180
39037602
A
2181 /* TODO: show task total runtime (via TASK_ABSOLUTETIME_INFO)? */
2182 printf("process %s[%d] thread %llu caught burning CPU! "
2183 "It used more than %d%% CPU over %u seconds "
2184 "(actual recent usage: %d%% over ~%llu seconds). "
2185 "Thread lifetime cpu usage %d.%06ds, (%d.%06d user, %d.%06d sys) "
2186 "ledger balance: %lld mabs credit: %lld mabs debit: %lld mabs "
2187 "limit: %llu mabs period: %llu ns last refill: %llu ns%s.\n",
2188 procname, pid, tid,
2189 percentage, interval_sec,
2190 usage_percent,
2191 (lei.lei_last_refill + NSEC_PER_SEC/2) / NSEC_PER_SEC,
2192 thread_total_time.seconds, thread_total_time.microseconds,
2193 thread_user_time.seconds, thread_user_time.microseconds,
2194 thread_system_time.seconds,thread_system_time.microseconds,
2195 lei.lei_balance, lei.lei_credit, lei.lei_debit,
2196 lei.lei_limit, lei.lei_refill_period, lei.lei_last_refill,
2197 (fatal ? " [fatal violation]" : ""));
39236c6e 2198
39037602
A
2199 /*
2200 For now, send RESOURCE_NOTIFY in parallel with EXC_RESOURCE. Once
2201 we have logging parity, we will stop sending EXC_RESOURCE (24508922).
2202 */
39236c6e 2203
39037602
A
2204 /* RESOURCE_NOTIFY MIG specifies nanoseconds of CPU time */
2205 lei.lei_balance = balance_ns;
2206 absolutetime_to_nanoseconds(lei.lei_limit, &lei.lei_limit);
2207 trace_resource_violation(RMON_CPUUSAGE_VIOLATED, &lei);
2208 kr = send_resource_violation(send_cpu_usage_violation, task, &lei,
2209 fatal ? kRNFatalLimitFlag : 0);
2210 if (kr) {
2211 printf("send_resource_violation(CPU usage, ...): error %#x\n", kr);
39236c6e
A
2212 }
2213
39037602
A
2214#ifdef EXC_RESOURCE_MONITORS
2215 if (send_exc_resource) {
2216 if (disable_exc_resource) {
2217 printf("process %s[%d] thread %llu caught burning CPU! "
2218 "EXC_RESOURCE%s supressed by a boot-arg\n",
2219 procname, pid, tid, fatal ? " (and termination)" : "");
2220 return;
2221 }
2222
2223 if (audio_active) {
2224 printf("process %s[%d] thread %llu caught burning CPU! "
2225 "EXC_RESOURCE & termination supressed due to audio playback\n",
2226 procname, pid, tid);
2227 return;
2228 }
15129b1c 2229 }
39037602
A
2230
2231
2232 if (send_exc_resource) {
2233 code[0] = code[1] = 0;
2234 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_CPU);
2235 if (fatal) {
2236 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR_FATAL);
2237 }else {
2238 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR);
2239 }
2240 EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code[0], interval_sec);
2241 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[0], percentage);
2242 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[1], usage_percent);
2243 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
fe8ab488 2244 }
39037602 2245#endif /* EXC_RESOURCE_MONITORS */
39236c6e
A
2246
2247 if (fatal) {
fe8ab488
A
2248#if CONFIG_JETSAM
2249 jetsam_on_ledger_cpulimit_exceeded();
2250#else
39236c6e 2251 task_terminate_internal(task);
fe8ab488
A
2252#endif
2253 }
2254}
2255
fe8ab488
A
2256void thread_update_io_stats(thread_t thread, int size, int io_flags)
2257{
2258 int io_tier;
2259
2260 if (thread->thread_io_stats == NULL || thread->task->task_io_stats == NULL)
2261 return;
2262
2263 if (io_flags & DKIO_READ) {
2264 UPDATE_IO_STATS(thread->thread_io_stats->disk_reads, size);
2265 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->disk_reads, size);
2266 }
2267
2268 if (io_flags & DKIO_META) {
2269 UPDATE_IO_STATS(thread->thread_io_stats->metadata, size);
2270 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->metadata, size);
39236c6e 2271 }
fe8ab488
A
2272
2273 if (io_flags & DKIO_PAGING) {
2274 UPDATE_IO_STATS(thread->thread_io_stats->paging, size);
2275 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->paging, size);
2276 }
2277
2278 io_tier = ((io_flags & DKIO_TIER_MASK) >> DKIO_TIER_SHIFT);
2279 assert (io_tier < IO_NUM_PRIORITIES);
2280
2281 UPDATE_IO_STATS(thread->thread_io_stats->io_priority[io_tier], size);
2282 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->io_priority[io_tier], size);
2283
2284 /* Update Total I/O Counts */
2285 UPDATE_IO_STATS(thread->thread_io_stats->total_io, size);
2286 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->total_io, size);
2287
39037602
A
2288 if (!(io_flags & DKIO_READ)) {
2289 DTRACE_IO3(physical_writes, struct task *, thread->task, uint32_t, size, int, io_flags);
2290 ledger_credit(thread->task->ledger, task_ledgers.physical_writes, size);
2291 }
316670eb
A
2292}
2293
39037602 2294static void
316670eb
A
2295init_thread_ledgers(void) {
2296 ledger_template_t t;
2297 int idx;
2298
2299 assert(thread_ledger_template == NULL);
2300
2301 if ((t = ledger_template_create("Per-thread ledger")) == NULL)
2302 panic("couldn't create thread ledger template");
2303
2304 if ((idx = ledger_entry_add(t, "cpu_time", "sched", "ns")) < 0) {
2305 panic("couldn't create cpu_time entry for thread ledger template");
2306 }
2307
39236c6e 2308 if (ledger_set_callback(t, idx, thread_cputime_callback, NULL, NULL) < 0) {
316670eb
A
2309 panic("couldn't set thread ledger callback for cpu_time entry");
2310 }
2311
2312 thread_ledgers.cpu_time = idx;
fe8ab488 2313
316670eb
A
2314 thread_ledger_template = t;
2315}
2316
39236c6e
A
2317/*
2318 * Returns currently applied CPU usage limit, or 0/0 if none is applied.
2319 */
2320int
2321thread_get_cpulimit(int *action, uint8_t *percentage, uint64_t *interval_ns)
2322{
2323 int64_t abstime = 0;
2324 uint64_t limittime = 0;
2325 thread_t thread = current_thread();
2326
2327 *percentage = 0;
2328 *interval_ns = 0;
2329 *action = 0;
2330
2331 if (thread->t_threadledger == LEDGER_NULL) {
2332 /*
2333 * This thread has no per-thread ledger, so it can't possibly
2334 * have a CPU limit applied.
2335 */
2336 return (KERN_SUCCESS);
2337 }
2338
2339 ledger_get_period(thread->t_threadledger, thread_ledgers.cpu_time, interval_ns);
2340 ledger_get_limit(thread->t_threadledger, thread_ledgers.cpu_time, &abstime);
2341
2342 if ((abstime == LEDGER_LIMIT_INFINITY) || (*interval_ns == 0)) {
2343 /*
2344 * This thread's CPU time ledger has no period or limit; so it
2345 * doesn't have a CPU limit applied.
2346 */
2347 return (KERN_SUCCESS);
2348 }
2349
2350 /*
2351 * This calculation is the converse to the one in thread_set_cpulimit().
2352 */
2353 absolutetime_to_nanoseconds(abstime, &limittime);
2354 *percentage = (limittime * 100ULL) / *interval_ns;
2355 assert(*percentage <= 100);
2356
2357 if (thread->options & TH_OPT_PROC_CPULIMIT) {
2358 assert((thread->options & TH_OPT_PRVT_CPULIMIT) == 0);
2359
2360 *action = THREAD_CPULIMIT_BLOCK;
2361 } else if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2362 assert((thread->options & TH_OPT_PROC_CPULIMIT) == 0);
2363
2364 *action = THREAD_CPULIMIT_EXCEPTION;
2365 } else {
2366 *action = THREAD_CPULIMIT_DISABLE;
2367 }
2368
2369 return (KERN_SUCCESS);
2370}
2371
316670eb
A
2372/*
2373 * Set CPU usage limit on a thread.
2374 *
2375 * Calling with percentage of 0 will unset the limit for this thread.
2376 */
316670eb
A
2377int
2378thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns)
2379{
2380 thread_t thread = current_thread();
2381 ledger_t l;
2382 uint64_t limittime = 0;
2383 uint64_t abstime = 0;
2384
2385 assert(percentage <= 100);
2386
39236c6e 2387 if (action == THREAD_CPULIMIT_DISABLE) {
316670eb
A
2388 /*
2389 * Remove CPU limit, if any exists.
2390 */
2391 if (thread->t_threadledger != LEDGER_NULL) {
39236c6e 2392 l = thread->t_threadledger;
fe8ab488
A
2393 ledger_set_limit(l, thread_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0);
2394 ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_IGNORE);
316670eb
A
2395 thread->options &= ~(TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT);
2396 }
2397
2398 return (0);
2399 }
2400
39236c6e
A
2401 if (interval_ns < MINIMUM_CPULIMIT_INTERVAL_MS * NSEC_PER_MSEC) {
2402 return (KERN_INVALID_ARGUMENT);
2403 }
2404
316670eb
A
2405 l = thread->t_threadledger;
2406 if (l == LEDGER_NULL) {
2407 /*
2408 * This thread doesn't yet have a per-thread ledger; so create one with the CPU time entry active.
2409 */
2410 if ((l = ledger_instantiate(thread_ledger_template, LEDGER_CREATE_INACTIVE_ENTRIES)) == LEDGER_NULL)
2411 return (KERN_RESOURCE_SHORTAGE);
2412
2413 /*
2414 * We are the first to create this thread's ledger, so only activate our entry.
2415 */
2416 ledger_entry_setactive(l, thread_ledgers.cpu_time);
2417 thread->t_threadledger = l;
2418 }
2419
2420 /*
2421 * The limit is specified as a percentage of CPU over an interval in nanoseconds.
2422 * Calculate the amount of CPU time that the thread needs to consume in order to hit the limit.
2423 */
2424 limittime = (interval_ns * percentage) / 100;
2425 nanoseconds_to_absolutetime(limittime, &abstime);
39236c6e 2426 ledger_set_limit(l, thread_ledgers.cpu_time, abstime, cpumon_ustackshots_trigger_pct);
316670eb
A
2427 /*
2428 * Refill the thread's allotted CPU time every interval_ns nanoseconds.
2429 */
2430 ledger_set_period(l, thread_ledgers.cpu_time, interval_ns);
2431
316670eb 2432 if (action == THREAD_CPULIMIT_EXCEPTION) {
39236c6e
A
2433 /*
2434 * We don't support programming the CPU usage monitor on a task if any of its
2435 * threads have a per-thread blocking CPU limit configured.
2436 */
2437 if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2438 panic("CPU usage monitor activated, but blocking thread limit exists");
2439 }
2440
2441 /*
2442 * Make a note that this thread's CPU limit is being used for the task-wide CPU
2443 * usage monitor. We don't have to arm the callback which will trigger the
2444 * exception, because that was done for us in ledger_instantiate (because the
2445 * ledger template used has a default callback).
2446 */
316670eb 2447 thread->options |= TH_OPT_PROC_CPULIMIT;
39236c6e
A
2448 } else {
2449 /*
2450 * We deliberately override any CPU limit imposed by a task-wide limit (eg
2451 * CPU usage monitor).
2452 */
2453 thread->options &= ~TH_OPT_PROC_CPULIMIT;
316670eb 2454
316670eb
A
2455 thread->options |= TH_OPT_PRVT_CPULIMIT;
2456 /* The per-thread ledger template by default has a callback for CPU time */
2457 ledger_disable_callback(l, thread_ledgers.cpu_time);
2458 ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
2459 }
2460
316670eb
A
2461 return (0);
2462}
2463
2d21ac55
A
2464static void
2465sched_call_null(
2466__unused int type,
2467__unused thread_t thread)
2468{
2469 return;
2470}
2471
2472void
2473thread_sched_call(
2474 thread_t thread,
2475 sched_call_t call)
2476{
2477 thread->sched_call = (call != NULL)? call: sched_call_null;
2478}
2479
39037602
A
2480sched_call_t
2481thread_disable_sched_call(
2482 thread_t thread,
2483 sched_call_t call)
2484{
2485 if (call) {
2486 spl_t s = splsched();
2487 thread_lock(thread);
2488 if (thread->sched_call == call) {
2489 thread->sched_call = sched_call_null;
2490 } else {
2491 call = NULL;
2492 }
2493 thread_unlock(thread);
2494 splx(s);
2495 }
2496 return call;
2497}
2498
2499void
2500thread_reenable_sched_call(
2501 thread_t thread,
2502 sched_call_t call)
2503{
2504 if (call) {
2505 spl_t s = splsched();
2506 thread_lock(thread);
2507 thread_sched_call(thread, call);
2508 thread_unlock(thread);
2509 splx(s);
2510 }
2511}
2512
2d21ac55
A
2513void
2514thread_static_param(
2515 thread_t thread,
2516 boolean_t state)
2517{
2518 thread_mtx_lock(thread);
2519 thread->static_param = state;
2520 thread_mtx_unlock(thread);
2521}
1c79356b 2522
b0d623f7
A
2523uint64_t
2524thread_tid(
2525 thread_t thread)
2526{
2527 return (thread != THREAD_NULL? thread->thread_id: 0);
2528}
2529
39236c6e 2530uint16_t thread_set_tag(thread_t th, uint16_t tag) {
4b17d6b6
A
2531 return thread_set_tag_internal(th, tag);
2532}
39236c6e 2533uint16_t thread_get_tag(thread_t th) {
4b17d6b6
A
2534 return thread_get_tag_internal(th);
2535}
2536
b0d623f7
A
2537uint64_t
2538thread_dispatchqaddr(
2539 thread_t thread)
2540{
39037602
A
2541 uint64_t dispatchqueue_addr;
2542 uint64_t thread_handle;
b0d623f7 2543
39037602
A
2544 if (thread == THREAD_NULL)
2545 return 0;
2546
2547 thread_handle = thread->machine.cthread_self;
2548 if (thread_handle == 0)
2549 return 0;
2550
2551 if (thread->inspection == TRUE)
2552 dispatchqueue_addr = thread_handle + get_task_dispatchqueue_offset(thread->task);
2553 else if (thread->task->bsd_info)
2554 dispatchqueue_addr = thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info);
2555 else
2556 dispatchqueue_addr = 0;
b0d623f7 2557
39037602 2558 return dispatchqueue_addr;
b0d623f7
A
2559}
2560
91447636
A
2561/*
2562 * Export routines to other components for things that are done as macros
2563 * within the osfmk component.
2564 */
1c79356b 2565
91447636
A
2566#undef thread_reference
2567void thread_reference(thread_t thread);
1c79356b 2568void
91447636
A
2569thread_reference(
2570 thread_t thread)
1c79356b 2571{
91447636
A
2572 if (thread != THREAD_NULL)
2573 thread_reference_internal(thread);
1c79356b
A
2574}
2575
1c79356b 2576#undef thread_should_halt
91447636 2577
1c79356b
A
2578boolean_t
2579thread_should_halt(
55e303ae 2580 thread_t th)
1c79356b 2581{
91447636 2582 return (thread_should_halt_fast(th));
55e303ae 2583}
2d21ac55 2584
fe8ab488
A
2585/*
2586 * thread_set_voucher_name - reset the voucher port name bound to this thread
2587 *
2588 * Conditions: nothing locked
2589 *
2590 * If we already converted the previous name to a cached voucher
2591 * reference, then we discard that reference here. The next lookup
2592 * will cache it again.
2593 */
2594
2595kern_return_t
2596thread_set_voucher_name(mach_port_name_t voucher_name)
2597{
2598 thread_t thread = current_thread();
2599 ipc_voucher_t new_voucher = IPC_VOUCHER_NULL;
2600 ipc_voucher_t voucher;
2601#ifdef CONFIG_BANK
2602 ledger_t bankledger = NULL;
2603#endif
2604
2605 if (MACH_PORT_DEAD == voucher_name)
2606 return KERN_INVALID_RIGHT;
2607
2608 /*
2609 * agressively convert to voucher reference
2610 */
2611 if (MACH_PORT_VALID(voucher_name)) {
2612 new_voucher = convert_port_name_to_voucher(voucher_name);
2613 if (IPC_VOUCHER_NULL == new_voucher)
2614 return KERN_INVALID_ARGUMENT;
2615 }
2616#ifdef CONFIG_BANK
2617 bankledger = bank_get_voucher_ledger(new_voucher);
2618#endif
2619
2620 thread_mtx_lock(thread);
2621 voucher = thread->ith_voucher;
2622 thread->ith_voucher_name = voucher_name;
2623 thread->ith_voucher = new_voucher;
2624#ifdef CONFIG_BANK
2625 bank_swap_thread_bank_ledger(thread, bankledger);
2626#endif
2627 thread_mtx_unlock(thread);
2628
2629 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2630 MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2631 (uintptr_t)thread_tid(thread),
2632 (uintptr_t)voucher_name,
2633 VM_KERNEL_ADDRPERM((uintptr_t)new_voucher),
2634 1, 0);
2635
2636 if (IPC_VOUCHER_NULL != voucher)
2637 ipc_voucher_release(voucher);
2638
2639 return KERN_SUCCESS;
2640}
2641
2642/*
2643 * thread_get_mach_voucher - return a voucher reference for the specified thread voucher
2644 *
2645 * Conditions: nothing locked
2646 *
2647 * A reference to the voucher may be lazily pending, if someone set the voucher name
2648 * but nobody has done a lookup yet. In that case, we'll have to do the equivalent
2649 * lookup here.
2650 *
2651 * NOTE: At the moment, there is no distinction between the current and effective
2652 * vouchers because we only set them at the thread level currently.
2653 */
2654kern_return_t
2655thread_get_mach_voucher(
2656 thread_act_t thread,
2657 mach_voucher_selector_t __unused which,
2658 ipc_voucher_t *voucherp)
2659{
2660 ipc_voucher_t voucher;
2661 mach_port_name_t voucher_name;
2662
2663 if (THREAD_NULL == thread)
2664 return KERN_INVALID_ARGUMENT;
2665
2666 thread_mtx_lock(thread);
2667 voucher = thread->ith_voucher;
2668
2669 /* if already cached, just return a ref */
2670 if (IPC_VOUCHER_NULL != voucher) {
2671 ipc_voucher_reference(voucher);
2672 thread_mtx_unlock(thread);
2673 *voucherp = voucher;
2674 return KERN_SUCCESS;
2675 }
2676
2677 voucher_name = thread->ith_voucher_name;
2678
2679 /* convert the name to a port, then voucher reference */
2680 if (MACH_PORT_VALID(voucher_name)) {
2681 ipc_port_t port;
2682
2683 if (KERN_SUCCESS !=
2684 ipc_object_copyin(thread->task->itk_space, voucher_name,
2685 MACH_MSG_TYPE_COPY_SEND, (ipc_object_t *)&port)) {
2686 thread->ith_voucher_name = MACH_PORT_NULL;
2687 thread_mtx_unlock(thread);
2688 *voucherp = IPC_VOUCHER_NULL;
2689 return KERN_SUCCESS;
2690 }
2691
2692 /* convert to a voucher ref to return, and cache a ref on thread */
2693 voucher = convert_port_to_voucher(port);
2694 ipc_voucher_reference(voucher);
2695 thread->ith_voucher = voucher;
2696 thread_mtx_unlock(thread);
2697
2698 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2699 MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2700 (uintptr_t)thread_tid(thread),
2701 (uintptr_t)port,
2702 VM_KERNEL_ADDRPERM((uintptr_t)voucher),
2703 2, 0);
2704
2705
2706 ipc_port_release_send(port);
2707 } else
2708 thread_mtx_unlock(thread);
2709
2710 *voucherp = voucher;
2711 return KERN_SUCCESS;
2712}
2713
2714/*
2715 * thread_set_mach_voucher - set a voucher reference for the specified thread voucher
2716 *
2717 * Conditions: callers holds a reference on the voucher.
2718 * nothing locked.
2719 *
2720 * We grab another reference to the voucher and bind it to the thread. Any lazy
2721 * binding is erased. The old voucher reference associated with the thread is
2722 * discarded.
2723 */
2724kern_return_t
2725thread_set_mach_voucher(
2726 thread_t thread,
2727 ipc_voucher_t voucher)
2728{
2729 ipc_voucher_t old_voucher;
2730#ifdef CONFIG_BANK
2731 ledger_t bankledger = NULL;
2732#endif
2733
2734 if (THREAD_NULL == thread)
2735 return KERN_INVALID_ARGUMENT;
2736
2737 if (thread != current_thread() || thread->started)
2738 return KERN_INVALID_ARGUMENT;
2739
2740
2741 ipc_voucher_reference(voucher);
2742#ifdef CONFIG_BANK
2743 bankledger = bank_get_voucher_ledger(voucher);
2744#endif
2745 thread_mtx_lock(thread);
2746 old_voucher = thread->ith_voucher;
2747 thread->ith_voucher = voucher;
2748 thread->ith_voucher_name = MACH_PORT_NULL;
2749#ifdef CONFIG_BANK
2750 bank_swap_thread_bank_ledger(thread, bankledger);
2751#endif
2752 thread_mtx_unlock(thread);
2753
2754 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2755 MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2756 (uintptr_t)thread_tid(thread),
2757 (uintptr_t)MACH_PORT_NULL,
2758 VM_KERNEL_ADDRPERM((uintptr_t)voucher),
2759 3, 0);
2760
2761 ipc_voucher_release(old_voucher);
2762
2763 return KERN_SUCCESS;
2764}
2765
2766/*
2767 * thread_swap_mach_voucher - swap a voucher reference for the specified thread voucher
2768 *
2769 * Conditions: callers holds a reference on the new and presumed old voucher(s).
2770 * nothing locked.
2771 *
2772 * If the old voucher is still the same as passed in, replace it with new voucher
2773 * and discard the old (and the reference passed in). Otherwise, discard the new
2774 * and return an updated old voucher.
2775 */
2776kern_return_t
2777thread_swap_mach_voucher(
2778 thread_t thread,
2779 ipc_voucher_t new_voucher,
2780 ipc_voucher_t *in_out_old_voucher)
2781{
2782 mach_port_name_t old_voucher_name;
2783 ipc_voucher_t old_voucher;
2784#ifdef CONFIG_BANK
2785 ledger_t bankledger = NULL;
2786#endif
2787
2788 if (THREAD_NULL == thread)
2789 return KERN_INVALID_TASK;
2790
2791 if (thread != current_thread() || thread->started)
2792 return KERN_INVALID_ARGUMENT;
2793
2794#ifdef CONFIG_BANK
2795 bankledger = bank_get_voucher_ledger(new_voucher);
2796#endif
2797
2798 thread_mtx_lock(thread);
2799
2800 old_voucher = thread->ith_voucher;
2801
2802 if (IPC_VOUCHER_NULL == old_voucher) {
2803 old_voucher_name = thread->ith_voucher_name;
2804
2805 /* perform lazy binding if needed */
2806 if (MACH_PORT_VALID(old_voucher_name)) {
2807 old_voucher = convert_port_name_to_voucher(old_voucher_name);
2808 thread->ith_voucher_name = MACH_PORT_NULL;
2809 thread->ith_voucher = old_voucher;
2810
2811 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2812 MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2813 (uintptr_t)thread_tid(thread),
2814 (uintptr_t)old_voucher_name,
2815 VM_KERNEL_ADDRPERM((uintptr_t)old_voucher),
2816 4, 0);
2817
2818 }
2819 }
2820
2821 /* swap in new voucher, if old voucher matches the one supplied */
2822 if (old_voucher == *in_out_old_voucher) {
2823 ipc_voucher_reference(new_voucher);
2824 thread->ith_voucher = new_voucher;
2825 thread->ith_voucher_name = MACH_PORT_NULL;
2826#ifdef CONFIG_BANK
2827 bank_swap_thread_bank_ledger(thread, bankledger);
2828#endif
2829 thread_mtx_unlock(thread);
2830
2831 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2832 MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2833 (uintptr_t)thread_tid(thread),
2834 (uintptr_t)MACH_PORT_NULL,
2835 VM_KERNEL_ADDRPERM((uintptr_t)new_voucher),
2836 5, 0);
2837
2838 ipc_voucher_release(old_voucher);
2839
2840 *in_out_old_voucher = IPC_VOUCHER_NULL;
2841 return KERN_SUCCESS;
2842 }
2843
2844 /* Otherwise, just return old voucher reference */
2845 ipc_voucher_reference(old_voucher);
2846 thread_mtx_unlock(thread);
2847 *in_out_old_voucher = old_voucher;
2848 return KERN_SUCCESS;
2849}
2850
2851/*
2852 * thread_get_current_voucher_origin_pid - get the pid of the originator of the current voucher.
2853 */
2854kern_return_t
2855thread_get_current_voucher_origin_pid(
2856 int32_t *pid)
2857{
2858 uint32_t buf_size;
2859 kern_return_t kr;
2860 thread_t thread = current_thread();
2861
2862 buf_size = sizeof(*pid);
2863 kr = mach_voucher_attr_command(thread->ith_voucher,
2864 MACH_VOUCHER_ATTR_KEY_BANK,
2865 BANK_ORIGINATOR_PID,
2866 NULL,
2867 0,
2868 (mach_voucher_attr_content_t)pid,
2869 &buf_size);
2870
2871 return kr;
2872}
2873
39037602
A
2874boolean_t
2875thread_has_thread_name(thread_t th)
2876{
2877 if ((th) && (th->uthread)) {
2878 return bsd_hasthreadname(th->uthread);
2879 }
2880
2881 /*
2882 * This is an odd case; clients may set the thread name based on the lack of
2883 * a name, but in this context there is no uthread to attach the name to.
2884 */
2885 return FALSE;
2886}
2887
2888void
2889thread_set_thread_name(thread_t th, const char* name)
2890{
2891 if ((th) && (th->uthread) && name) {
2892 bsd_setthreadname(th->uthread, name);
2893 }
2894}
2895
490019cf
A
2896/*
2897 * thread_enable_send_importance - set/clear the SEND_IMPORTANCE thread option bit.
2898 */
2899void thread_enable_send_importance(thread_t thread, boolean_t enable)
2900{
2901 if (enable == TRUE)
2902 thread->options |= TH_OPT_SEND_IMPORTANCE;
2903 else
2904 thread->options &= ~TH_OPT_SEND_IMPORTANCE;
2905}
2906
2d21ac55
A
2907#if CONFIG_DTRACE
2908uint32_t dtrace_get_thread_predcache(thread_t thread)
2909{
2910 if (thread != THREAD_NULL)
2911 return thread->t_dtrace_predcache;
2912 else
2913 return 0;
2914}
2915
2916int64_t dtrace_get_thread_vtime(thread_t thread)
2917{
2918 if (thread != THREAD_NULL)
2919 return thread->t_dtrace_vtime;
2920 else
2921 return 0;
2922}
2923
3e170ce0
A
2924int dtrace_get_thread_last_cpu_id(thread_t thread)
2925{
2926 if ((thread != THREAD_NULL) && (thread->last_processor != PROCESSOR_NULL)) {
2927 return thread->last_processor->cpu_id;
2928 } else {
2929 return -1;
2930 }
2931}
2932
2d21ac55
A
2933int64_t dtrace_get_thread_tracing(thread_t thread)
2934{
2935 if (thread != THREAD_NULL)
2936 return thread->t_dtrace_tracing;
2937 else
2938 return 0;
2939}
2940
2941boolean_t dtrace_get_thread_reentering(thread_t thread)
2942{
2943 if (thread != THREAD_NULL)
2944 return (thread->options & TH_OPT_DTRACE) ? TRUE : FALSE;
2945 else
2946 return 0;
2947}
2948
2949vm_offset_t dtrace_get_kernel_stack(thread_t thread)
2950{
2951 if (thread != THREAD_NULL)
2952 return thread->kernel_stack;
2953 else
2954 return 0;
2955}
2956
2957int64_t dtrace_calc_thread_recent_vtime(thread_t thread)
2958{
2d21ac55
A
2959 if (thread != THREAD_NULL) {
2960 processor_t processor = current_processor();
2961 uint64_t abstime = mach_absolute_time();
2962 timer_t timer;
2963
2964 timer = PROCESSOR_DATA(processor, thread_timer);
2965
2966 return timer_grab(&(thread->system_timer)) + timer_grab(&(thread->user_timer)) +
2967 (abstime - timer->tstamp); /* XXX need interrupts off to prevent missed time? */
2968 } else
2969 return 0;
2d21ac55
A
2970}
2971
2972void dtrace_set_thread_predcache(thread_t thread, uint32_t predcache)
2973{
2974 if (thread != THREAD_NULL)
2975 thread->t_dtrace_predcache = predcache;
2976}
2977
2978void dtrace_set_thread_vtime(thread_t thread, int64_t vtime)
2979{
2980 if (thread != THREAD_NULL)
2981 thread->t_dtrace_vtime = vtime;
2982}
2983
2984void dtrace_set_thread_tracing(thread_t thread, int64_t accum)
2985{
2986 if (thread != THREAD_NULL)
2987 thread->t_dtrace_tracing = accum;
2988}
2989
2990void dtrace_set_thread_reentering(thread_t thread, boolean_t vbool)
2991{
2992 if (thread != THREAD_NULL) {
2993 if (vbool)
2994 thread->options |= TH_OPT_DTRACE;
2995 else
2996 thread->options &= (~TH_OPT_DTRACE);
2997 }
2998}
2999
3000vm_offset_t dtrace_set_thread_recover(thread_t thread, vm_offset_t recover)
3001{
3002 vm_offset_t prev = 0;
3003
3004 if (thread != THREAD_NULL) {
3005 prev = thread->recover;
3006 thread->recover = recover;
3007 }
3008 return prev;
3009}
3010
b0d623f7
A
3011void dtrace_thread_bootstrap(void)
3012{
3013 task_t task = current_task();
39236c6e
A
3014
3015 if (task->thread_count == 1) {
3016 thread_t thread = current_thread();
3017 if (thread->t_dtrace_flags & TH_DTRACE_EXECSUCCESS) {
3018 thread->t_dtrace_flags &= ~TH_DTRACE_EXECSUCCESS;
3019 DTRACE_PROC(exec__success);
39037602
A
3020 KDBG(BSDDBG_CODE(DBG_BSD_PROC,BSD_PROC_EXEC),
3021 task_pid(task));
39236c6e 3022 }
b0d623f7
A
3023 DTRACE_PROC(start);
3024 }
3025 DTRACE_PROC(lwp__start);
3026
3027}
39236c6e
A
3028
3029void
3030dtrace_thread_didexec(thread_t thread)
3031{
3032 thread->t_dtrace_flags |= TH_DTRACE_EXECSUCCESS;
3033}
2d21ac55 3034#endif /* CONFIG_DTRACE */